dacorvo HF staff commited on
Commit
a8df9db
·
verified ·
1 Parent(s): 20fca04

Create qwen-2.5-large.json

Browse files
inference-cache-config/qwen-2.5-large.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Qwen/Qwen2.5-32B-Instruct": [
3
+ {
4
+ "batch_size": 1,
5
+ "sequence_length": 4096,
6
+ "num_cores": 8,
7
+ "auto_cast_type": "bf16"
8
+ },
9
+ {
10
+ "batch_size": 8,
11
+ "sequence_length": 4096,
12
+ "num_cores": 8,
13
+ "auto_cast_type": "bf16"
14
+ }
15
+ ],
16
+ "Qwen/Qwen2.5-72B-Instruct": [
17
+ {
18
+ "batch_size": 1,
19
+ "sequence_length": 4096,
20
+ "num_cores": 2,
21
+ "auto_cast_type": "bf16"
22
+ },
23
+ {
24
+ "batch_size": 4,
25
+ "sequence_length": 4096,
26
+ "num_cores": 24,
27
+ "auto_cast_type": "bf16"
28
+ }
29
+ ]
30
+ }