PHI35VISION

Runtime error

App Files Files Community

aiqtech commited on 14 days ago

Commit

36163a8

•

1 Parent(s): e228d7c

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -52

app.py CHANGED Viewed

@@ -5,34 +5,16 @@ import torch
 import gradio as gr
 from threading import Thread
 from PIL import Image
-# Install required packages
-import subprocess
-subprocess.run('pip install --upgrade transformers', shell=True)
-subprocess.run('pip install accelerate', shell=True)
-from transformers import AutoConfig, PreTrainedModel, AutoTokenizer
-# Model and tokenizer initialization
-model_name = "Qwen/QVQ-72B-Preview"
-config = AutoConfig.from_pretrained(
-    model_name,
-    trust_remote_code=True
-)
-tokenizer = AutoTokenizer.from_pretrained(
-    model_name,
-    trust_remote_code=True
-)
-model = PreTrainedModel.from_pretrained(
-    model_name,
-    config=config,
-    trust_remote_code=True,
-    device_map="auto",
-    torch_dtype=torch.float16
 )
 # Footer
 footer = """
@@ -45,35 +27,58 @@ footer = """
 @spaces.GPU()
 def process_image(image, text_input=None):
     try:
-        # Convert image to PIL format
-        image = Image.fromarray(image).convert("RGB")
-        # Prepare inputs
-        if text_input:
-            messages = [
-                {
-                    "role": "user",
-                    "content": [
-                        {"image": image},
-                        {"text": text_input}
-                    ]
-                }
-            ]
-        else:
-            messages = [
-                {
-                    "role": "user",
-                    "content": [
-                        {"image": image},
-                        {"text": "Please describe this image in detail."}
-                    ]
-                }
-            ]
         # Generate response
-        response = model.chat(tokenizer, messages=messages)
-        return response
     except Exception as e:
         return f"Error processing image: {str(e)}"

 import gradio as gr
 from threading import Thread
 from PIL import Image
+from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
+from qwen_vl_utils import process_vision_info
+# Model and processor initialization
+model = Qwen2VLForConditionalGeneration.from_pretrained(
+    "Qwen/QVQ-72B-Preview",
+    torch_dtype="auto",
+    device_map="auto"
 )
+processor = AutoProcessor.from_pretrained("Qwen/QVQ-72B-Preview")
 # Footer
 footer = """
 @spaces.GPU()
 def process_image(image, text_input=None):
     try:
+        # Convert image to PIL format if needed
+        if not isinstance(image, Image.Image):
+            image = Image.fromarray(image).convert("RGB")
+        # Prepare messages
+        if not text_input:
+            text_input = "Please describe this image in detail."
+        messages = [
+            {
+                "role": "system",
+                "content": [
+                    {"type": "text", "text": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step."}
+                ],
+            },
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image", "image": image},
+                    {"type": "text", "text": text_input}
+                ],
+            }
+        ]
+        # Process inputs
+        text = processor.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        image_inputs, video_inputs = process_vision_info(messages)
+        inputs = processor(
+            text=[text],
+            images=image_inputs,
+            videos=video_inputs,
+            padding=True,
+            return_tensors="pt",
+        )
+        inputs = inputs.to("cuda")
         # Generate response
+        generated_ids = model.generate(**inputs, max_new_tokens=8192)
+        generated_ids_trimmed = [
+            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+        ]
+        output_text = processor.batch_decode(
+            generated_ids_trimmed,
+            skip_special_tokens=True,
+            clean_up_tokenization_spaces=False
+        )[0]
+        return output_text
     except Exception as e:
         return f"Error processing image: {str(e)}"