Spaces:

25b3nk
/

GeminiVision-Test

Sleeping

App Files Files Community

bhaskarEEN commited on May 26, 2024

Commit

25852f1

0 Parent(s):

- gemini pro testing

Browse files

Files changed (3) hide show

.gitignore +1 -0
app.py +82 -0
requirements.txt +2 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .env

app.py ADDED Viewed

	@@ -0,0 +1,82 @@

+"""
+App to take in image and output a list of objects in the image
+"""
+import os
+from pathlib import Path
+import google.generativeai as genai
+import gradio as gr
+from dotenv import load_dotenv
+load_dotenv()  # Load environment variables from .env file
+genai.configure(api_key=os.environ["GOOGLE_API_KEY"])
+input_prompt = """
+Extract the objects in the provided image and output them in a list in alphabetical order
+"""
+# Set up the model
+generation_config = {
+    "temperature": 0,
+    "top_p": 1,
+    "top_k": 32,
+    "max_output_tokens": 4096,
+}
+safety_settings = [
+    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
+    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
+    {
+        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+        "threshold": "BLOCK_MEDIUM_AND_ABOVE",
+    },
+    {
+        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+        "threshold": "BLOCK_MEDIUM_AND_ABOVE",
+    },
+]
+model = genai.GenerativeModel(
+    model_name="gemini-pro-vision",
+    generation_config=generation_config,
+    safety_settings=safety_settings,
+)
+def input_image_setup(file_loc):
+    if not (img := Path(file_loc)).exists():
+        raise FileNotFoundError(f"Could not find image: {img}")
+    image_parts = [{"mime_type": "image/jpeg", "data": Path(file_loc).read_bytes()}]
+    return image_parts
+def generate_gemini_response(input_prompt, image_loc):
+    image_prompt = input_image_setup(image_loc)
+    prompt_parts = [input_prompt, image_prompt[0]]
+    response = model.generate_content(prompt_parts)
+    output = "The objects in the image are: \n" + response.text
+    # print(response.text)
+    return output
+def upload_file(file_path):
+    # print(file_path)
+    output = generate_gemini_response(input_prompt, file_path)
+    return file_path, output
+with gr.Blocks() as demo:
+    header = gr.Label("Gemini Pro Vision testing")
+    image_output = gr.Image()
+    submit = gr.UploadButton(label="Click to upload the image to be studied", file_count="single", file_types=["image"])
+    output = gr.Textbox(label="Output")
+    print("here")
+    combined_output = [image_output, output]
+    submit.upload(upload_file, submit, combined_output)
+demo.launch(debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ gradio==4.31.5
2	+ google-generativeai==0.5.4