zyq commited on
Commit
5aa594b
·
1 Parent(s): dec8751
Files changed (3) hide show
  1. README.md +1 -1
  2. README_ZH.md +1 -1
  3. config.json +1 -1
README.md CHANGED
@@ -55,7 +55,7 @@ InnoMegrez2-Preview is a device native large language model. Megrez2 takes advan
55
  | **Number of Shared Experts** | 4 |
56
  | **Vocabulary Size** | 128,880 |
57
  | **Context Length** | 32K |
58
- | **Base Frequency of RoPE** | 1,000,000 |
59
  | **Attention Mechanism** | GQA |
60
  | **Activation Function** | SwiGLU |
61
  </div>
 
55
  | **Number of Shared Experts** | 4 |
56
  | **Vocabulary Size** | 128,880 |
57
  | **Context Length** | 32K |
58
+ | **Base Frequency of RoPE** | 5,000,000 |
59
  | **Attention Mechanism** | GQA |
60
  | **Activation Function** | SwiGLU |
61
  </div>
README_ZH.md CHANGED
@@ -42,7 +42,7 @@ InnoMegrez2-Preview 是专为终端设备设计的大模型,兼顾MoE的精度
42
  | **Number of Shared Experts** | 4 |
43
  | **Vocabulary Size** | 128,880 |
44
  | **Context Length** | 32K |
45
- | **Base Frequency of RoPE** | 1,000,000 |
46
  | **Attention Mechanism** | GQA |
47
  | **Activation Function** | SwiGLU |
48
  </div>
 
42
  | **Number of Shared Experts** | 4 |
43
  | **Vocabulary Size** | 128,880 |
44
  | **Context Length** | 32K |
45
+ | **Base Frequency of RoPE** | 5,000,000 |
46
  | **Attention Mechanism** | GQA |
47
  | **Activation Function** | SwiGLU |
48
  </div>
config.json CHANGED
@@ -36,7 +36,7 @@
36
  "pretraining_tp": 1,
37
  "rms_norm_eps": 1e-06,
38
  "rope_scaling": null,
39
- "rope_theta": 1000000,
40
  "routed_scaling_factor": 1.0,
41
  "scoring_func": "softmax",
42
  "seq_aux": true,
 
36
  "pretraining_tp": 1,
37
  "rms_norm_eps": 1e-06,
38
  "rope_scaling": null,
39
+ "rope_theta": 5000000,
40
  "routed_scaling_factor": 1.0,
41
  "scoring_func": "softmax",
42
  "seq_aux": true,