Update afrobench lite

#7
by JessicaOjo - opened
Files changed (1) hide show
  1. leaderboard_json/afrobench_lite.json +176 -0
leaderboard_json/afrobench_lite.json ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "NLI": {
3
+ "afrixnli": {
4
+ "Aya-101 13B": 52.4,
5
+ "Gemma1.1 7b": 34.6,
6
+ "LLaMa2 7b": 33.7,
7
+ "LLaMa3 8B": 35.6,
8
+ "LLaMa3.1 8B": 36.4,
9
+ "LLaMAX3 8B": 41.4,
10
+ "Gemma2 9b": 40.9,
11
+ "Gemma2 27b": 43.7,
12
+ "LLaMa3.1 70B": 38.4,
13
+ "AfroLLaMa 8B": 36.3,
14
+ "Gemini 1.5 pro": 63.6,
15
+ "GPT-4o (Aug)": 66.3,
16
+ "Gemma 3 27b": 51.2,
17
+ "Gemini-2.0 Flash": 66.5,
18
+ "GPT-4.1 (April)": 67.5,
19
+ "LLaMa 4 405B": 45.5,
20
+ "Lugha-Llama 8B": 36.7,
21
+ "Gemini-2.5 Flash": 69.9,
22
+ "Claude 4.0 Sonnet": 68.1,
23
+ "Claude 3.7 Sonnet": 59.8,
24
+ "Claude 4.5 Sonnet": 71.7
25
+ }
26
+ },
27
+ "Intent": {
28
+ "injongointent": {
29
+ "AfroLLaMa 8B": 0.9,
30
+ "LLaMAX3 8B": 6.3,
31
+ "LLaMa2 7b": 1.4,
32
+ "LLaMa3.1 70B": 36.9,
33
+ "LLaMa3.1 8B": 6.3,
34
+ "LLaMa3 8B": 2.3,
35
+ "Gemma1.1 7b": 10.2,
36
+ "Gemma2 27b": 37.8,
37
+ "Gemma2 9b": 34.1,
38
+ "Aya-101 13B": 48.2,
39
+ "Gemini 1.5 pro": 76.3,
40
+ "GPT-4o (Aug)": 78.3,
41
+ "Gemma 3 27b": 55.2,
42
+ "Gemini-2.0 Flash": 73.9,
43
+ "GPT-4.1 (April)": 84.4,
44
+ "LLaMa 4 405B": 73.9,
45
+ "Lugha-Llama 8B": 4.1,
46
+ "Gemini-2.5 Flash": 87.9,
47
+ "Claude 4.0 Sonnet": 80.4,
48
+ "Claude 3.7 Sonnet": 73.4,
49
+ "Claude 4.5 Sonnet": 81.6
50
+ }
51
+ },
52
+ "MT(en/fr-xx)": {
53
+ "flores - en_xx": {
54
+ "AfroLLaMa 8B": 10.7,
55
+ "LLaMAX3 8B": 27.7,
56
+ "LLaMa2 7b": 9.2,
57
+ "LLaMa3.1 70B": 27.4,
58
+ "LLaMa3.1 8B": 14.5,
59
+ "LLaMa3 8B": 11.8,
60
+ "Aya-101 13B": 31.1,
61
+ "Gemma1.1 7b": 11.7,
62
+ "Gemma2 27b": 38.4,
63
+ "Gemma2 9b": 36.5,
64
+ "Gemini 1.5 pro": 43.0,
65
+ "GPT-4o (Aug)": 42.4,
66
+ "Gemma 3 27b": 33.1,
67
+ "Gemini-2.0 Flash": 59.1,
68
+ "GPT-4.1 (April)": 47.3,
69
+ "LLaMa 4 405B": 42.8,
70
+ "Lugha-Llama 8B": 22.1,
71
+ "Gemini-2.5 Flash": 46.5,
72
+ "Claude 4.0 Sonnet": 46.0,
73
+ "Claude 3.7 Sonnet": 44.0
74
+ }
75
+ },
76
+ "MMLU": {
77
+ "afrimmlu": {
78
+ "AfroLLaMa 8B": 25.8,
79
+ "Aya-101 13B": 30.2,
80
+ "Gemma1.1 7b": 27.3,
81
+ "LLaMa2 7b": 25.9,
82
+ "LLaMa3 8B": 28.4,
83
+ "LLaMAX3 8B": 29.6,
84
+ "Gemma2 9b": 36.1,
85
+ "Gemma2 27b": 40.8,
86
+ "LLaMa3.1 8B": 31.6,
87
+ "LLaMa3.1 70B": 40.6,
88
+ "GPT-4o (Aug)": 63.1,
89
+ "Gemini 1.5 pro": 62.6,
90
+ "Gemma 3 27b": 44.4,
91
+ "Gemini-2.0 Flash": 57.8,
92
+ "GPT-4.1 (April)": 60.2,
93
+ "LLaMa 4 405B": 15.8,
94
+ "Lugha-Llama 8B": 25.2,
95
+ "Gemini-2.5 Flash": 67.7,
96
+ "Claude 4.0 Sonnet": 75.5,
97
+ "Claude 3.7 Sonnet": 66.7,
98
+ "Claude 4.5 Sonnet": 58.6
99
+ }
100
+ },
101
+ "Math": {
102
+ "afrimgsm": {
103
+ "AfroLLaMa 8B": 0.3,
104
+ "Aya-101 13B": 4.9,
105
+ "Gemma1.1 7b": 5.0,
106
+ "LLaMa2 7b": 1.9,
107
+ "LLaMa3 8B": 5.7,
108
+ "LLaMa3.1 8B": 7.8,
109
+ "LLaMAX3 8B": 5.2,
110
+ "Gemma2 9b": 21.7,
111
+ "Gemma2 27b": 31.1,
112
+ "LLaMa3.1 70B": 26.5,
113
+ "GPT-4o (Aug)": 57.3,
114
+ "Gemini 1.5 pro": 57.7,
115
+ "Gemma 3 27b": 47.5,
116
+ "Gemini-2.0 Flash": 67.5,
117
+ "GPT-4.1 (April)": 59.5,
118
+ "LLaMa 4 405B": 65.0,
119
+ "Lugha-Llama 8B": 1.8,
120
+ "Gemini-2.5 Flash": 70.6,
121
+ "Claude 4.0 Sonnet": 66.9,
122
+ "Claude 3.7 Sonnet": 35.2,
123
+ "Claude 4.5 Sonnet": 73.1
124
+ }
125
+ },
126
+ "Topic": {
127
+ "sib": {
128
+ "AfroLLaMa 8B": 13.9,
129
+ "LLaMAX3 8B": 62.6,
130
+ "LLaMa2 7b": 18.0,
131
+ "LLaMa3.1 70B": 64.3,
132
+ "LLaMa3.1 8B": 48.0,
133
+ "LLaMa3 8B": 40.1,
134
+ "Aya-101 13B": 76.6,
135
+ "Gemma1.1 7b": 42.7,
136
+ "Gemma2 27b": 68.8,
137
+ "Gemma2 9b": 65.7,
138
+ "Gemini 1.5 pro": 81.3,
139
+ "GPT-4o (Aug)": 82.8,
140
+ "Gemma 3 27b": 74.2,
141
+ "Gemini-2.0 Flash": 84.9,
142
+ "GPT-4.1 (April)": 84.8,
143
+ "LLaMa 4 405B": 80.6,
144
+ "Lugha-Llama 8B": 34.1,
145
+ "Gemini-2.5 Flash": 87.2,
146
+ "Claude 4.0 Sonnet": 83.2,
147
+ "Claude 3.7 Sonnet": 84.9,
148
+ "Claude 4.5 Sonnet": 84.2
149
+ }
150
+ },
151
+ "RC": {
152
+ "belebele": {
153
+ "AfroLLaMa 8B": 24.7,
154
+ "LLaMAX3 8B": 28.9,
155
+ "LLaMa2 7b": 23.5,
156
+ "LLaMa3.1 8B": 36.2,
157
+ "LLaMa3.1 70B": 45.9,
158
+ "LLaMa3 8B": 26.2,
159
+ "Aya-101 13B": 60.3,
160
+ "Gemma1.1 7b": 34.3,
161
+ "Gemma2 27b": 53.5,
162
+ "Gemma2 9b": 50.5,
163
+ "Gemini 1.5 pro": 55.0,
164
+ "GPT-4o (Aug)": 70.4,
165
+ "Gemma 3 27b": 62.4,
166
+ "Gemini-2.0 Flash": 56.8,
167
+ "GPT-4.1 (April)": 64.8,
168
+ "LLaMa 4 405B": 24.6,
169
+ "Lugha-Llama 8B": 23.0,
170
+ "Gemini-2.5 Flash": 42.2,
171
+ "Claude 4.0 Sonnet": 76.2,
172
+ "Claude 3.7 Sonnet": 65.1,
173
+ "Claude 4.5 Sonnet": 74.8
174
+ }
175
+ }
176
+ }