Update README.md
Browse files
README.md
CHANGED
|
@@ -16,7 +16,7 @@ license: llama3.1
|
|
| 16 |
base_model: meta-llama/Meta-Llama-3.1-405B-Instruct
|
| 17 |
---
|
| 18 |
|
| 19 |
-
# Meta-Llama-3.1-405B-Instruct-quantized.
|
| 20 |
|
| 21 |
## Model Overview
|
| 22 |
- **Model Architecture:** Meta-Llama-3
|
|
@@ -271,7 +271,7 @@ lm_eval \
|
|
| 271 |
```
|
| 272 |
lm_eval \
|
| 273 |
--model vllm \
|
| 274 |
-
--model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.
|
| 275 |
--tasks mmlu_cot_0shot_llama_3.1_instruct \
|
| 276 |
--apply_chat_template \
|
| 277 |
--num_fewshot 0 \
|
|
@@ -282,7 +282,7 @@ lm_eval \
|
|
| 282 |
```
|
| 283 |
lm_eval \
|
| 284 |
--model vllm \
|
| 285 |
-
--model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.
|
| 286 |
--tasks arc_challenge_llama_3.1_instruct \
|
| 287 |
--apply_chat_template \
|
| 288 |
--num_fewshot 0 \
|
|
@@ -293,7 +293,7 @@ lm_eval \
|
|
| 293 |
```
|
| 294 |
lm_eval \
|
| 295 |
--model vllm \
|
| 296 |
-
--model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.
|
| 297 |
--tasks gsm8k_cot_llama_3.1_instruct \
|
| 298 |
--fewshot_as_multiturn \
|
| 299 |
--apply_chat_template \
|
|
@@ -305,7 +305,7 @@ lm_eval \
|
|
| 305 |
```
|
| 306 |
lm_eval \
|
| 307 |
--model vllm \
|
| 308 |
-
--model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.
|
| 309 |
--tasks hellaswag \
|
| 310 |
--num_fewshot 10 \
|
| 311 |
--batch_size auto
|
|
@@ -315,7 +315,7 @@ lm_eval \
|
|
| 315 |
```
|
| 316 |
lm_eval \
|
| 317 |
--model vllm \
|
| 318 |
-
--model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.
|
| 319 |
--tasks winogrande \
|
| 320 |
--num_fewshot 5 \
|
| 321 |
--batch_size auto
|
|
@@ -325,7 +325,7 @@ lm_eval \
|
|
| 325 |
```
|
| 326 |
lm_eval \
|
| 327 |
--model vllm \
|
| 328 |
-
--model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.
|
| 329 |
--tasks truthfulqa \
|
| 330 |
--num_fewshot 0 \
|
| 331 |
--batch_size auto
|
|
|
|
| 16 |
base_model: meta-llama/Meta-Llama-3.1-405B-Instruct
|
| 17 |
---
|
| 18 |
|
| 19 |
+
# Meta-Llama-3.1-405B-Instruct-quantized.w8a16
|
| 20 |
|
| 21 |
## Model Overview
|
| 22 |
- **Model Architecture:** Meta-Llama-3
|
|
|
|
| 271 |
```
|
| 272 |
lm_eval \
|
| 273 |
--model vllm \
|
| 274 |
+
--model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.w8a16",dtype=auto,add_bos_token=True,max_model_len=4064,max_gen_toks=1024,enable_chunked_prefill=True,tensor_parallel_size=8 \
|
| 275 |
--tasks mmlu_cot_0shot_llama_3.1_instruct \
|
| 276 |
--apply_chat_template \
|
| 277 |
--num_fewshot 0 \
|
|
|
|
| 282 |
```
|
| 283 |
lm_eval \
|
| 284 |
--model vllm \
|
| 285 |
+
--model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.w8a16",dtype=auto,add_bos_token=True,max_model_len=3940,max_gen_toks=100,enable_chunked_prefill=True,tensor_parallel_size=8 \
|
| 286 |
--tasks arc_challenge_llama_3.1_instruct \
|
| 287 |
--apply_chat_template \
|
| 288 |
--num_fewshot 0 \
|
|
|
|
| 293 |
```
|
| 294 |
lm_eval \
|
| 295 |
--model vllm \
|
| 296 |
+
--model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.w8a16",dtype=auto,add_bos_token=True,max_model_len=4096,max_gen_toks=1024,enable_chunked_prefill=True,tensor_parallel_size=8 \
|
| 297 |
--tasks gsm8k_cot_llama_3.1_instruct \
|
| 298 |
--fewshot_as_multiturn \
|
| 299 |
--apply_chat_template \
|
|
|
|
| 305 |
```
|
| 306 |
lm_eval \
|
| 307 |
--model vllm \
|
| 308 |
+
--model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.w8a16",dtype=auto,add_bos_token=True,max_model_len=4096,enable_chunked_prefill=True,tensor_parallel_size=8 \
|
| 309 |
--tasks hellaswag \
|
| 310 |
--num_fewshot 10 \
|
| 311 |
--batch_size auto
|
|
|
|
| 315 |
```
|
| 316 |
lm_eval \
|
| 317 |
--model vllm \
|
| 318 |
+
--model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.w8a16",dtype=auto,add_bos_token=True,max_model_len=4096,enable_chunked_prefill=True,tensor_parallel_size=8 \
|
| 319 |
--tasks winogrande \
|
| 320 |
--num_fewshot 5 \
|
| 321 |
--batch_size auto
|
|
|
|
| 325 |
```
|
| 326 |
lm_eval \
|
| 327 |
--model vllm \
|
| 328 |
+
--model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.w8a16",dtype=auto,add_bos_token=True,max_model_len=4096,enable_chunked_prefill=True,tensor_parallel_size=8 \
|
| 329 |
--tasks truthfulqa \
|
| 330 |
--num_fewshot 0 \
|
| 331 |
--batch_size auto
|