Qwen
/

Qwen2-VL-7B-Instruct

Image-Text-to-Text

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

nbroad HF staff commited on Nov 22, 2024

Commit

a4b7c25

·

verified ·

1 Parent(s): c3a944b

Update handler.py

Files changed (1) hide show

handler.py +4 -4

handler.py CHANGED Viewed

@@ -5,22 +5,22 @@ from qwen_vl_utils import process_vision_info
 class EndpointHandler():
-    def __init__(self):
         # default: Load the model on the available device(s)
         self.model = Qwen2VLForConditionalGeneration.from_pretrained(
-            "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto",
         )
         # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
         # model = Qwen2VLForConditionalGeneration.from_pretrained(
-        #     "Qwen/Qwen2-VL-7B-Instruct",
         #     torch_dtype=torch.bfloat16,
         #     attn_implementation="flash_attention_2",
         #     device_map="auto",
         # )
         # default processer
-        self.processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
         # The default range for the number of visual tokens per image in the model is 4-16384. You can set min_pixels and max_pixels according to your needs, such as a token count range of 256-1280, to balance speed and memory usage.
         # min_pixels = 256*28*28

 class EndpointHandler():
+    def __init__(self, path):
         # default: Load the model on the available device(s)
         self.model = Qwen2VLForConditionalGeneration.from_pretrained(
+           path, torch_dtype="auto", device_map="auto",
         )
         # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
         # model = Qwen2VLForConditionalGeneration.from_pretrained(
+        #    path,
         #     torch_dtype=torch.bfloat16,
         #     attn_implementation="flash_attention_2",
         #     device_map="auto",
         # )
         # default processer
+        self.processor = AutoProcessor.from_pretrained(path)
         # The default range for the number of visual tokens per image in the model is 4-16384. You can set min_pixels and max_pixels according to your needs, such as a token count range of 256-1280, to balance speed and memory usage.
         # min_pixels = 256*28*28