Update README.md
Browse files
README.md
CHANGED
|
@@ -159,6 +159,16 @@ This version of the lm-evaluation-harness includes versions of MMLU, ARC-Challen
|
|
| 159 |
<td><strong>Recovery</strong>
|
| 160 |
</td>
|
| 161 |
</tr>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
<tr>
|
| 163 |
<td>MMLU (CoT, 0-shot)
|
| 164 |
</td>
|
|
@@ -229,6 +239,16 @@ This version of the lm-evaluation-harness includes versions of MMLU, ARC-Challen
|
|
| 229 |
<td>101.2%
|
| 230 |
</td>
|
| 231 |
</tr>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
</table>
|
| 233 |
|
| 234 |
### Reproduction
|
|
|
|
| 159 |
<td><strong>Recovery</strong>
|
| 160 |
</td>
|
| 161 |
</tr>
|
| 162 |
+
<tr>
|
| 163 |
+
<td>MMLU (5-shot)
|
| 164 |
+
</td>
|
| 165 |
+
<td>87.41
|
| 166 |
+
</td>
|
| 167 |
+
<td>87.47
|
| 168 |
+
</td>
|
| 169 |
+
<td>100.1%
|
| 170 |
+
</td>
|
| 171 |
+
</tr>
|
| 172 |
<tr>
|
| 173 |
<td>MMLU (CoT, 0-shot)
|
| 174 |
</td>
|
|
|
|
| 239 |
<td>101.2%
|
| 240 |
</td>
|
| 241 |
</tr>
|
| 242 |
+
<tr>
|
| 243 |
+
<td><strong>Average</strong>
|
| 244 |
+
</td>
|
| 245 |
+
<td><strong>86.73</strong>
|
| 246 |
+
</td>
|
| 247 |
+
<td><strong>86.89</strong>
|
| 248 |
+
</td>
|
| 249 |
+
<td><strong>100.2%</strong>
|
| 250 |
+
</td>
|
| 251 |
+
</tr>
|
| 252 |
</table>
|
| 253 |
|
| 254 |
### Reproduction
|