huseinzol05 commited on
Commit
8b2096c
Β·
1 Parent(s): 58e4143
Files changed (1) hide show
  1. app.py +90 -99
app.py CHANGED
@@ -21,8 +21,19 @@ INTRODUCTION_TEXT = """
21
  - This test is for programming language understanding.
22
  """
23
 
 
 
 
 
 
 
 
 
 
 
 
24
  close_source = [
25
- {
26
  'model': 'gpt-4-1106-preview',
27
  'BM-PT3 0-shot': 51.85185185185185,
28
  'BM-PT3 1-shot': 66.66666666666666,
@@ -40,105 +51,85 @@ close_source = [
40
  'Tatabahasa 1-shot': 60.80691642651297,
41
  'Tatabahasa 3-shots': 63.03724928366762,
42
  },
43
- {
44
- 'model': 'Antrophic Claude 2',
45
- 'Tatabahasa 0-shot': 61,
46
- 'Tatabahasa 3-shots': 57.8,
47
- },
48
- {
49
- 'model': 'Antrophic Claude 1',
50
- 'Tatabahasa 3-shots': 67,
51
- },
52
  ]
53
 
54
  open_source = [
55
- {
56
- 'model': '[llama2-7b](https://huggingface.co/meta-llama/Llama-2-7b-hf)',
57
- 'Tatabahasa 0-shot': 24.355300859598856,
58
- 'Tatabahasa 1-shot': 28.08022922636103,
59
- 'Tatabahasa 3-shots': 24.641833810888254,
60
- },
61
- {
62
- 'model': '[malaysian-llama2-7b-32k](https://huggingface.co/mesolitica/llama-7b-hf-32768-fpf)',
63
- 'BM-PT3 0-shot': 20.37037037037037,
64
- 'BM-PT3 1-shot': 20.37037037037037,
65
- 'BM-PT3 3-shots': 29.629629629629626,
66
- 'Tatabahasa 0-shot': 17.765042979942695,
67
- 'Tatabahasa 1-shot': 24.068767908309454,
68
- 'Tatabahasa 3-shots': 27.507163323782237,
69
- },
70
- {
71
- 'model': '[malaysian-llama2-7b-32k-instructions](https://huggingface.co/mesolitica/malaysian-llama2-7b-32k-instructions)',
72
- 'BM-PT3 0-shot': 35.294117647058826,
73
- 'BM-PT3 1-shot': 21.153846153846153,
74
- 'BM-PT3 3-shots': 28.30188679245283,
75
- },
76
- {
77
- 'model': '[malaysian-llama2-13b-32k](https://huggingface.co/mesolitica/llama-13b-hf-32768-fpf)',
78
- 'BM-PT3 0-shot': 33.33333333333333,
79
- 'BM-PT3 1-shot': 20.37037037037037,
80
- 'BM-PT3 3-shots': 31.48148148148148,
81
- 'Tatabahasa 0-shot': 26.07449856733524,
82
- 'Tatabahasa 1-shot': 25.214899713467048,
83
- 'Tatabahasa 3-shots': 24.355300859598856,
84
- },
85
- {
86
- 'model': '[malaysian-llama2-13b-32k-instructions](https://huggingface.co/mesolitica/malaysian-llama2-13b-32k-instructions)',
87
- 'BM-PT3 0-shot': 28.57142857142857,
88
- 'BM-PT3 1-shot': 12.244897959183673,
89
- 'BM-PT3 3-shots': 17.307692307692307,
90
- },
91
- {
92
- 'model': '[mistral-7b](https://huggingface.co/mistralai/Mistral-7B-v0.1)',
93
- 'Tatabahasa 0-shot': 28.939828080229223,
94
- 'Tatabahasa 1-shot': 34.38395415472779,
95
- 'Tatabahasa 3-shots': 32.95128939828081,
96
- },
97
- {
98
- 'model': '[malaysian-mistral-7b-4k](https://huggingface.co/mesolitica/mistral-7b-4096-fpf)',
99
- 'BM-PT3 0-shot': 20.37037037037037,
100
- 'BM-PT3 1-shot': 22.22222222222222,
101
- 'BM-PT3 3-shots': 33.33333333333333,
102
- 'Tatabahasa 0-shot': 21.48997134670487,
103
- 'Tatabahasa 1-shot': 28.939828080229223,
104
- 'Tatabahasa 3-shots': 24.641833810888254,
105
- },
106
- {
107
- 'model': '[malaysian-mistral-7b-32k](https://huggingface.co/mesolitica/mistral-7b-32768-fpf)',
108
- 'BM-PT3 0-shot': 16.666666666666664,
109
- 'BM-PT3 1-shot': 16.666666666666664,
110
- 'BM-PT3 3-shots': 25.925925925925924,
111
- 'Tatabahasa 0-shot': 18.624641833810887,
112
- 'Tatabahasa 1-shot': 24.355300859598856,
113
- 'Tatabahasa 3-shots': 28.653295128939828,
114
- },
115
- {
116
- 'model': '[malaysian-mistral-7b-32k-instructions](https://huggingface.co/mesolitica/malaysian-mistral-7b-32k-instructions)',
117
- 'BM-PT3 0-shot': 35.18518518518518,
118
- 'BM-PT3 1-shot': 33.33333333333333,
119
- 'BM-PT3 3-shots': 37.03703703703704,
120
- 'Tatabahasa 0-shot': 55.014326647564474,
121
- 'Tatabahasa 1-shot': 42.693409742120345,
122
- 'Tatabahasa 3-shots': 33.33333333333333,
123
- },
124
- {
125
- 'model': '[aisingapore/sealion3b](https://huggingface.co/aisingapore/sealion3b)',
126
- 'BM-PT3 0-shot': 20.37037037037037,
127
- 'BM-PT3 1-shot': 25.925925925925924,
128
- 'BM-PT3 3-shots': 31.48148148148148,
129
- 'Tatabahasa 0-shot': 21.776504297994272,
130
- 'Tatabahasa 1-shot': 21.776504297994272,
131
- 'Tatabahasa 3-shots': 24.641833810888254,
132
- },
133
- {
134
- 'model': '[aisingapore/sealion7b](https://huggingface.co/aisingapore/sealion7b)',
135
- 'BM-PT3 0-shot': 20.37037037037037,
136
- 'BM-PT3 1-shot': 24.074074074074073,
137
- 'BM-PT3 3-shots': 33.33333333333333,
138
- 'Tatabahasa 0-shot': 25.787965616045845,
139
- 'Tatabahasa 1-shot': 27.507163323782237,
140
- 'Tatabahasa 3-shots': 26.07449856733524,
141
- }
142
  ]
143
 
144
  data = pd.DataFrame(close_source + open_source)
@@ -147,6 +138,6 @@ demo = gr.Blocks(css=custom_css)
147
  with demo:
148
  gr.HTML(TITLE)
149
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
150
- gr.DataFrame(data, datatype = 'markdown')
151
 
152
- demo.launch()
 
21
  - This test is for programming language understanding.
22
  """
23
 
24
+ not_verified = [
25
+ {
26
+ 'model': 'Antrophic Claude 2',
27
+ 'Tatabahasa 0-shot': 61,
28
+ 'Tatabahasa 3-shots': 57.8,
29
+ },
30
+ {
31
+ 'model': 'Antrophic Claude 1',
32
+ 'Tatabahasa 3-shots': 67,
33
+ },
34
+ ]
35
  close_source = [
36
+ {
37
  'model': 'gpt-4-1106-preview',
38
  'BM-PT3 0-shot': 51.85185185185185,
39
  'BM-PT3 1-shot': 66.66666666666666,
 
51
  'Tatabahasa 1-shot': 60.80691642651297,
52
  'Tatabahasa 3-shots': 63.03724928366762,
53
  },
 
 
 
 
 
 
 
 
 
54
  ]
55
 
56
  open_source = [
57
+ {'model': '[llama2-7b](https://huggingface.co/meta-llama/Llama-2-7b-hf)',
58
+ 'Tatabahasa 0-shot': 24.355300859598856,
59
+ 'Tatabahasa 1-shot': 28.08022922636103,
60
+ 'Tatabahasa 3-shots': 24.641833810888254,
61
+ },
62
+ {'model': '[malaysian-llama2-7b-32k](https://huggingface.co/mesolitica/llama-7b-hf-32768-fpf)',
63
+ 'BM-PT3 0-shot': 20.37037037037037,
64
+ 'BM-PT3 1-shot': 20.37037037037037,
65
+ 'BM-PT3 3-shots': 29.629629629629626,
66
+ 'Tatabahasa 0-shot': 17.765042979942695,
67
+ 'Tatabahasa 1-shot': 24.068767908309454,
68
+ 'Tatabahasa 3-shots': 27.507163323782237,
69
+ },
70
+ {'model': '[malaysian-llama2-7b-32k-instructions](https://huggingface.co/mesolitica/malaysian-llama2-7b-32k-instructions)',
71
+ 'BM-PT3 0-shot': 35.294117647058826,
72
+ 'BM-PT3 1-shot': 21.153846153846153,
73
+ 'BM-PT3 3-shots': 28.30188679245283,
74
+ },
75
+ {'model': '[malaysian-llama2-13b-32k](https://huggingface.co/mesolitica/llama-13b-hf-32768-fpf)',
76
+ 'BM-PT3 0-shot': 33.33333333333333,
77
+ 'BM-PT3 1-shot': 20.37037037037037,
78
+ 'BM-PT3 3-shots': 31.48148148148148,
79
+ 'Tatabahasa 0-shot': 26.07449856733524,
80
+ 'Tatabahasa 1-shot': 25.214899713467048,
81
+ 'Tatabahasa 3-shots': 24.355300859598856,
82
+ },
83
+ {'model': '[malaysian-llama2-13b-32k-instructions](https://huggingface.co/mesolitica/malaysian-llama2-13b-32k-instructions)',
84
+ 'BM-PT3 0-shot': 28.57142857142857,
85
+ 'BM-PT3 1-shot': 12.244897959183673,
86
+ 'BM-PT3 3-shots': 17.307692307692307,
87
+ },
88
+ {'model': '[mistral-7b](https://huggingface.co/mistralai/Mistral-7B-v0.1)',
89
+ 'Tatabahasa 0-shot': 28.939828080229223,
90
+ 'Tatabahasa 1-shot': 34.38395415472779,
91
+ 'Tatabahasa 3-shots': 32.95128939828081,
92
+ },
93
+ {'model': '[malaysian-mistral-7b-4k](https://huggingface.co/mesolitica/mistral-7b-4096-fpf)',
94
+ 'BM-PT3 0-shot': 20.37037037037037,
95
+ 'BM-PT3 1-shot': 22.22222222222222,
96
+ 'BM-PT3 3-shots': 33.33333333333333,
97
+ 'Tatabahasa 0-shot': 21.48997134670487,
98
+ 'Tatabahasa 1-shot': 28.939828080229223,
99
+ 'Tatabahasa 3-shots': 24.641833810888254,
100
+ },
101
+ {'model': '[malaysian-mistral-7b-32k](https://huggingface.co/mesolitica/mistral-7b-32768-fpf)',
102
+ 'BM-PT3 0-shot': 16.666666666666664,
103
+ 'BM-PT3 1-shot': 16.666666666666664,
104
+ 'BM-PT3 3-shots': 25.925925925925924,
105
+ 'Tatabahasa 0-shot': 18.624641833810887,
106
+ 'Tatabahasa 1-shot': 24.355300859598856,
107
+ 'Tatabahasa 3-shots': 28.653295128939828,
108
+ },
109
+ {'model': '[malaysian-mistral-7b-32k-instructions](https://huggingface.co/mesolitica/malaysian-mistral-7b-32k-instructions)',
110
+ 'BM-PT3 0-shot': 35.18518518518518,
111
+ 'BM-PT3 1-shot': 33.33333333333333,
112
+ 'BM-PT3 3-shots': 37.03703703703704,
113
+ 'Tatabahasa 0-shot': 55.014326647564474,
114
+ 'Tatabahasa 1-shot': 42.693409742120345,
115
+ 'Tatabahasa 3-shots': 33.33333333333333,
116
+ },
117
+ {'model': '[aisingapore/sealion3b](https://huggingface.co/aisingapore/sealion3b)',
118
+ 'BM-PT3 0-shot': 20.37037037037037,
119
+ 'BM-PT3 1-shot': 25.925925925925924,
120
+ 'BM-PT3 3-shots': 31.48148148148148,
121
+ 'Tatabahasa 0-shot': 21.776504297994272,
122
+ 'Tatabahasa 1-shot': 21.776504297994272,
123
+ 'Tatabahasa 3-shots': 24.641833810888254,
124
+ },
125
+ {'model': '[aisingapore/sealion7b](https://huggingface.co/aisingapore/sealion7b)',
126
+ 'BM-PT3 0-shot': 20.37037037037037,
127
+ 'BM-PT3 1-shot': 24.074074074074073,
128
+ 'BM-PT3 3-shots': 33.33333333333333,
129
+ 'Tatabahasa 0-shot': 25.787965616045845,
130
+ 'Tatabahasa 1-shot': 27.507163323782237,
131
+ 'Tatabahasa 3-shots': 26.07449856733524,
132
+ }
 
 
 
 
 
 
 
 
 
 
 
133
  ]
134
 
135
  data = pd.DataFrame(close_source + open_source)
 
138
  with demo:
139
  gr.HTML(TITLE)
140
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
141
+ gr.DataFrame(data, datatype='markdown')
142
 
143
+ demo.launch()