nbroad HF staff commited on
Commit
a4b7c25
·
verified ·
1 Parent(s): c3a944b

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +4 -4
handler.py CHANGED
@@ -5,22 +5,22 @@ from qwen_vl_utils import process_vision_info
5
 
6
  class EndpointHandler():
7
 
8
- def __init__(self):
9
  # default: Load the model on the available device(s)
10
  self.model = Qwen2VLForConditionalGeneration.from_pretrained(
11
- "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto",
12
  )
13
 
14
  # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
15
  # model = Qwen2VLForConditionalGeneration.from_pretrained(
16
- # "Qwen/Qwen2-VL-7B-Instruct",
17
  # torch_dtype=torch.bfloat16,
18
  # attn_implementation="flash_attention_2",
19
  # device_map="auto",
20
  # )
21
 
22
  # default processer
23
- self.processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
24
 
25
  # The default range for the number of visual tokens per image in the model is 4-16384. You can set min_pixels and max_pixels according to your needs, such as a token count range of 256-1280, to balance speed and memory usage.
26
  # min_pixels = 256*28*28
 
5
 
6
  class EndpointHandler():
7
 
8
+ def __init__(self, path):
9
  # default: Load the model on the available device(s)
10
  self.model = Qwen2VLForConditionalGeneration.from_pretrained(
11
+ path, torch_dtype="auto", device_map="auto",
12
  )
13
 
14
  # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
15
  # model = Qwen2VLForConditionalGeneration.from_pretrained(
16
+ # path,
17
  # torch_dtype=torch.bfloat16,
18
  # attn_implementation="flash_attention_2",
19
  # device_map="auto",
20
  # )
21
 
22
  # default processer
23
+ self.processor = AutoProcessor.from_pretrained(path)
24
 
25
  # The default range for the number of visual tokens per image in the model is 4-16384. You can set min_pixels and max_pixels according to your needs, such as a token count range of 256-1280, to balance speed and memory usage.
26
  # min_pixels = 256*28*28