zyq commited on
Commit ·
5aa594b
1
Parent(s): dec8751
fix rope
Browse files- README.md +1 -1
- README_ZH.md +1 -1
- config.json +1 -1
README.md
CHANGED
|
@@ -55,7 +55,7 @@ InnoMegrez2-Preview is a device native large language model. Megrez2 takes advan
|
|
| 55 |
| **Number of Shared Experts** | 4 |
|
| 56 |
| **Vocabulary Size** | 128,880 |
|
| 57 |
| **Context Length** | 32K |
|
| 58 |
-
| **Base Frequency of RoPE** |
|
| 59 |
| **Attention Mechanism** | GQA |
|
| 60 |
| **Activation Function** | SwiGLU |
|
| 61 |
</div>
|
|
|
|
| 55 |
| **Number of Shared Experts** | 4 |
|
| 56 |
| **Vocabulary Size** | 128,880 |
|
| 57 |
| **Context Length** | 32K |
|
| 58 |
+
| **Base Frequency of RoPE** | 5,000,000 |
|
| 59 |
| **Attention Mechanism** | GQA |
|
| 60 |
| **Activation Function** | SwiGLU |
|
| 61 |
</div>
|
README_ZH.md
CHANGED
|
@@ -42,7 +42,7 @@ InnoMegrez2-Preview 是专为终端设备设计的大模型,兼顾MoE的精度
|
|
| 42 |
| **Number of Shared Experts** | 4 |
|
| 43 |
| **Vocabulary Size** | 128,880 |
|
| 44 |
| **Context Length** | 32K |
|
| 45 |
-
| **Base Frequency of RoPE** |
|
| 46 |
| **Attention Mechanism** | GQA |
|
| 47 |
| **Activation Function** | SwiGLU |
|
| 48 |
</div>
|
|
|
|
| 42 |
| **Number of Shared Experts** | 4 |
|
| 43 |
| **Vocabulary Size** | 128,880 |
|
| 44 |
| **Context Length** | 32K |
|
| 45 |
+
| **Base Frequency of RoPE** | 5,000,000 |
|
| 46 |
| **Attention Mechanism** | GQA |
|
| 47 |
| **Activation Function** | SwiGLU |
|
| 48 |
</div>
|
config.json
CHANGED
|
@@ -36,7 +36,7 @@
|
|
| 36 |
"pretraining_tp": 1,
|
| 37 |
"rms_norm_eps": 1e-06,
|
| 38 |
"rope_scaling": null,
|
| 39 |
-
"rope_theta":
|
| 40 |
"routed_scaling_factor": 1.0,
|
| 41 |
"scoring_func": "softmax",
|
| 42 |
"seq_aux": true,
|
|
|
|
| 36 |
"pretraining_tp": 1,
|
| 37 |
"rms_norm_eps": 1e-06,
|
| 38 |
"rope_scaling": null,
|
| 39 |
+
"rope_theta": 5000000,
|
| 40 |
"routed_scaling_factor": 1.0,
|
| 41 |
"scoring_func": "softmax",
|
| 42 |
"seq_aux": true,
|