{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "code", "source": [ "!pip install gtts\n", "!pip install gradio\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ufQmdSNceI4n", "outputId": "4d9d43d1-df1f-4e31-d299-06f77b952329" }, "execution_count": 1, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting gtts\n", " Downloading gTTS-2.5.3-py3-none-any.whl.metadata (4.1 kB)\n", "Requirement already satisfied: requests<3,>=2.27 in /usr/local/lib/python3.10/dist-packages (from gtts) (2.32.3)\n", "Requirement already satisfied: click<8.2,>=7.1 in /usr/local/lib/python3.10/dist-packages (from gtts) (8.1.7)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (3.10)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (2.2.3)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27->gtts) (2024.8.30)\n", "Downloading gTTS-2.5.3-py3-none-any.whl (29 kB)\n", "Installing collected packages: gtts\n", "Successfully installed gtts-2.5.3\n", "Collecting gradio\n", " Downloading gradio-4.44.1-py3-none-any.whl.metadata (15 kB)\n", "Collecting aiofiles<24.0,>=22.0 (from gradio)\n", " Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)\n", "Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.7.1)\n", "Collecting fastapi<1.0 (from gradio)\n", " Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB)\n", "Collecting ffmpy (from gradio)\n", " Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)\n", "Collecting gradio-client==1.3.0 (from gradio)\n", " Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)\n", "Collecting httpx>=0.24.1 (from gradio)\n", " Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)\n", "Requirement already satisfied: huggingface-hub>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from gradio) (0.24.7)\n", "Requirement already satisfied: importlib-resources<7.0,>=1.3 in /usr/local/lib/python3.10/dist-packages (from gradio) (6.4.5)\n", "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.1.4)\n", "Requirement already satisfied: markupsafe~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.1.5)\n", "Requirement already satisfied: matplotlib~=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.7.1)\n", "Requirement already satisfied: numpy<3.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (1.26.4)\n", "Collecting orjson~=3.0 (from gradio)\n", " Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.4/50.4 kB\u001b[0m \u001b[31m784.9 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from gradio) (24.1)\n", "Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.2.2)\n", "Requirement already satisfied: pillow<11.0,>=8.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (10.4.0)\n", "Requirement already satisfied: pydantic>=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.9.2)\n", "Collecting pydub (from gradio)\n", " Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)\n", "Collecting python-multipart>=0.0.9 (from gradio)\n", " Downloading python_multipart-0.0.12-py3-none-any.whl.metadata (1.9 kB)\n", "Requirement already satisfied: pyyaml<7.0,>=5.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (6.0.2)\n", "Collecting ruff>=0.2.2 (from gradio)\n", " Downloading ruff-0.6.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)\n", "Collecting semantic-version~=2.0 (from gradio)\n", " Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)\n", "Collecting tomlkit==0.12.0 (from gradio)\n", " Downloading tomlkit-0.12.0-py3-none-any.whl.metadata (2.7 kB)\n", "Requirement already satisfied: typer<1.0,>=0.12 in /usr/local/lib/python3.10/dist-packages (from gradio) (0.12.5)\n", "Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (4.12.2)\n", "Requirement already satisfied: urllib3~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.2.3)\n", "Collecting uvicorn>=0.14.0 (from gradio)\n", " Downloading uvicorn-0.31.0-py3-none-any.whl.metadata (6.6 kB)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from gradio-client==1.3.0->gradio) (2024.6.1)\n", "Collecting websockets<13.0,>=10.0 (from gradio-client==1.3.0->gradio)\n", " Downloading websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n", "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<5.0,>=3.0->gradio) (3.10)\n", "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<5.0,>=3.0->gradio) (1.3.1)\n", "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5.0,>=3.0->gradio) (1.2.2)\n", "Collecting starlette<0.39.0,>=0.37.2 (from fastapi<1.0->gradio)\n", " Downloading starlette-0.38.6-py3-none-any.whl.metadata (6.0 kB)\n", "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx>=0.24.1->gradio) (2024.8.30)\n", "Collecting httpcore==1.* (from httpx>=0.24.1->gradio)\n", " Downloading httpcore-1.0.6-py3-none-any.whl.metadata (21 kB)\n", "Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx>=0.24.1->gradio)\n", " Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.3->gradio) (3.16.1)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.3->gradio) (2.32.3)\n", "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.3->gradio) (4.66.5)\n", "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (1.3.0)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (0.12.1)\n", "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (4.54.1)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (1.4.7)\n", "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (3.1.4)\n", "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio) (2024.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio) (2024.2)\n", "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2.0->gradio) (0.7.0)\n", "Requirement already satisfied: pydantic-core==2.23.4 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2.0->gradio) (2.23.4)\n", "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.12->gradio) (8.1.7)\n", "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.12->gradio) (1.5.4)\n", "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.12->gradio) (13.8.1)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib~=3.0->gradio) (1.16.0)\n", "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (3.0.0)\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (2.18.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.19.3->gradio) (3.3.2)\n", "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio) (0.1.2)\n", "Downloading gradio-4.44.1-py3-none-any.whl (18.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m18.1/18.1 MB\u001b[0m \u001b[31m73.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading gradio_client-1.3.0-py3-none-any.whl (318 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m318.7/318.7 kB\u001b[0m \u001b[31m26.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading tomlkit-0.12.0-py3-none-any.whl (37 kB)\n", "Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\n", "Downloading fastapi-0.115.0-py3-none-any.whl (94 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m94.6/94.6 kB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading httpx-0.27.2-py3-none-any.whl (76 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.4/76.4 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading httpcore-1.0.6-py3-none-any.whl (78 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.0/78.0 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (141 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.9/141.9 kB\u001b[0m \u001b[31m12.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading python_multipart-0.0.12-py3-none-any.whl (23 kB)\n", "Downloading ruff-0.6.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.9/10.9 MB\u001b[0m \u001b[31m62.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n", "Downloading uvicorn-0.31.0-py3-none-any.whl (63 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.7/63.7 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading ffmpy-0.4.0-py3-none-any.whl (5.8 kB)\n", "Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n", "Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading starlette-0.38.6-py3-none-any.whl (71 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.5/71.5 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (130 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: pydub, websockets, tomlkit, semantic-version, ruff, python-multipart, orjson, h11, ffmpy, aiofiles, uvicorn, starlette, httpcore, httpx, fastapi, gradio-client, gradio\n", "Successfully installed aiofiles-23.2.1 fastapi-0.115.0 ffmpy-0.4.0 gradio-4.44.1 gradio-client-1.3.0 h11-0.14.0 httpcore-1.0.6 httpx-0.27.2 orjson-3.10.7 pydub-0.25.1 python-multipart-0.0.12 ruff-0.6.8 semantic-version-2.10.0 starlette-0.38.6 tomlkit-0.12.0 uvicorn-0.31.0 websockets-12.0\n" ] } ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 626 }, "id": "s-QPCL4neAAp", "outputId": "d9fbc640-ce93-4ec8-e7ac-b92b841656b4" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n", "\n", "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n", "Running on public URL: https://363c324a1d48e4dc70.gradio.live\n", "\n", "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [ "
" ] }, "metadata": {} }, { "output_type": "execute_result", "data": { "text/plain": [] }, "metadata": {}, "execution_count": 2 } ], "source": [ "# Import necessary libraries and modules\n", "from transformers import BlipProcessor, BlipForConditionalGeneration, MBartForConditionalGeneration, MBart50Tokenizer\n", "from gtts import gTTS\n", "from PIL import Image\n", "import gradio as gr\n", "\n", "# Pipeline Component 1: Image Captioning Model\n", "class ImageToText:\n", " def __init__(self):\n", " \"\"\"Initializes the BLIP model for image captioning.\"\"\"\n", " self.processor = BlipProcessor.from_pretrained(\"Salesforce/blip-image-captioning-base\")\n", " self.model = BlipForConditionalGeneration.from_pretrained(\"Salesforce/blip-image-captioning-base\")\n", " print(\"BLIP Image Captioning Model Loaded\")\n", "\n", " def generate_caption(self, img):\n", " \"\"\"Generates a caption for the given image.\"\"\"\n", " inputs = self.processor(images=img, return_tensors=\"pt\")\n", " generated_ids = self.model.generate(**inputs)\n", " caption = self.processor.decode(generated_ids[0], skip_special_tokens=True)\n", " return caption\n", "\n", "# Pipeline Component 2: Arabic Translation Model (mBART)\n", "class ArabicTranslator:\n", " def __init__(self):\n", " \"\"\"Initializes the mBART model for English to Arabic translation.\"\"\"\n", " self.tokenizer = MBart50Tokenizer.from_pretrained(\"facebook/mbart-large-50-many-to-many-mmt\")\n", " self.model = MBartForConditionalGeneration.from_pretrained(\"facebook/mbart-large-50-many-to-many-mmt\")\n", " print(\"mBART Arabic Translation Model Loaded\")\n", "\n", " def translate(self, text):\n", " \"\"\"Translates the given English text to Arabic.\"\"\"\n", " inputs = self.tokenizer(text, return_tensors=\"pt\", src_lang=\"en_XX\")\n", " translated = self.model.generate(inputs[\"input_ids\"], forced_bos_token_id=self.tokenizer.lang_code_to_id[\"ar_AR\"])\n", " translated_text = self.tokenizer.batch_decode(translated, skip_special_tokens=True)[0]\n", " return translated_text\n", "\n", "# Pipeline Component 3: Text-to-Speech Model (gTTS)\n", "class TextToSpeech:\n", " def __init__(self, lang='ar'):\n", " \"\"\"Initializes the Text-to-Speech system for Arabic.\"\"\"\n", " self.lang = lang\n", "\n", " def generate_audio(self, text):\n", " \"\"\"Generates audio from the given Arabic text.\"\"\"\n", " tts = gTTS(text=text, lang=self.lang, slow=False)\n", " audio_file_path = 'output.mp3'\n", " tts.save(audio_file_path)\n", " return audio_file_path\n", "\n", "# Main Pipeline Integration\n", "class ImageToArabicSpeechPipeline:\n", " def __init__(self):\n", " \"\"\"Initializes all pipeline components.\"\"\"\n", " self.caption_model = ImageToText()\n", " self.translation_model = ArabicTranslator()\n", " self.tts_model = TextToSpeech()\n", "\n", " def process_image(self, img):\n", " \"\"\"Processes the image, generates a caption, translates it to Arabic, and converts it to speech.\"\"\"\n", " caption = self.caption_model.generate_caption(img)\n", " translated_text = self.translation_model.translate(caption)\n", " audio_file = self.tts_model.generate_audio(translated_text)\n", " return caption, translated_text, audio_file\n", "\n", "# Gradio Interface Setup\n", "def demo(image):\n", " \"\"\"Function to be used in Gradio for processing the image and returning caption, translation, and audio.\"\"\"\n", " img = Image.open(image)\n", " pipeline = ImageToArabicSpeechPipeline()\n", " caption, translated_text, audio_file = pipeline.process_image(img)\n", " return caption, translated_text, audio_file\n", "\n", "# Define Gradio Interface\n", "iface = gr.Interface(\n", " fn=demo,\n", " inputs=gr.Image(type=\"filepath\"),\n", " outputs=[gr.Textbox(label=\"Caption\"), gr.Textbox(label=\"Translated Text\"), gr.Audio(label=\"Generated Speech\")]\n", ")\n", "\n", "# Launch the Gradio Interface\n", "iface.launch()\n" ] }, { "cell_type": "code", "source": [], "metadata": { "id": "nTStDt6SgkY3" }, "execution_count": null, "outputs": [] } ] }