alexmarques commited on
Commit
70795f2
·
verified ·
1 Parent(s): 88efeae

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +7 -7
README.md CHANGED
@@ -16,7 +16,7 @@ license: llama3.1
16
  base_model: meta-llama/Meta-Llama-3.1-405B-Instruct
17
  ---
18
 
19
- # Meta-Llama-3.1-405B-Instruct-quantized.w8a8
20
 
21
  ## Model Overview
22
  - **Model Architecture:** Meta-Llama-3
@@ -271,7 +271,7 @@ lm_eval \
271
  ```
272
  lm_eval \
273
  --model vllm \
274
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.w8a8",dtype=auto,add_bos_token=True,max_model_len=4064,max_gen_toks=1024,enable_chunked_prefill=True,tensor_parallel_size=8 \
275
  --tasks mmlu_cot_0shot_llama_3.1_instruct \
276
  --apply_chat_template \
277
  --num_fewshot 0 \
@@ -282,7 +282,7 @@ lm_eval \
282
  ```
283
  lm_eval \
284
  --model vllm \
285
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.w8a8",dtype=auto,add_bos_token=True,max_model_len=3940,max_gen_toks=100,enable_chunked_prefill=True,tensor_parallel_size=8 \
286
  --tasks arc_challenge_llama_3.1_instruct \
287
  --apply_chat_template \
288
  --num_fewshot 0 \
@@ -293,7 +293,7 @@ lm_eval \
293
  ```
294
  lm_eval \
295
  --model vllm \
296
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.w8a8",dtype=auto,add_bos_token=True,max_model_len=4096,max_gen_toks=1024,enable_chunked_prefill=True,tensor_parallel_size=8 \
297
  --tasks gsm8k_cot_llama_3.1_instruct \
298
  --fewshot_as_multiturn \
299
  --apply_chat_template \
@@ -305,7 +305,7 @@ lm_eval \
305
  ```
306
  lm_eval \
307
  --model vllm \
308
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.w8a8",dtype=auto,add_bos_token=True,max_model_len=4096,enable_chunked_prefill=True,tensor_parallel_size=8 \
309
  --tasks hellaswag \
310
  --num_fewshot 10 \
311
  --batch_size auto
@@ -315,7 +315,7 @@ lm_eval \
315
  ```
316
  lm_eval \
317
  --model vllm \
318
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.w8a8",dtype=auto,add_bos_token=True,max_model_len=4096,enable_chunked_prefill=True,tensor_parallel_size=8 \
319
  --tasks winogrande \
320
  --num_fewshot 5 \
321
  --batch_size auto
@@ -325,7 +325,7 @@ lm_eval \
325
  ```
326
  lm_eval \
327
  --model vllm \
328
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.w8a8",dtype=auto,add_bos_token=True,max_model_len=4096,enable_chunked_prefill=True,tensor_parallel_size=8 \
329
  --tasks truthfulqa \
330
  --num_fewshot 0 \
331
  --batch_size auto
 
16
  base_model: meta-llama/Meta-Llama-3.1-405B-Instruct
17
  ---
18
 
19
+ # Meta-Llama-3.1-405B-Instruct-quantized.w8a16
20
 
21
  ## Model Overview
22
  - **Model Architecture:** Meta-Llama-3
 
271
  ```
272
  lm_eval \
273
  --model vllm \
274
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.w8a16",dtype=auto,add_bos_token=True,max_model_len=4064,max_gen_toks=1024,enable_chunked_prefill=True,tensor_parallel_size=8 \
275
  --tasks mmlu_cot_0shot_llama_3.1_instruct \
276
  --apply_chat_template \
277
  --num_fewshot 0 \
 
282
  ```
283
  lm_eval \
284
  --model vllm \
285
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.w8a16",dtype=auto,add_bos_token=True,max_model_len=3940,max_gen_toks=100,enable_chunked_prefill=True,tensor_parallel_size=8 \
286
  --tasks arc_challenge_llama_3.1_instruct \
287
  --apply_chat_template \
288
  --num_fewshot 0 \
 
293
  ```
294
  lm_eval \
295
  --model vllm \
296
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.w8a16",dtype=auto,add_bos_token=True,max_model_len=4096,max_gen_toks=1024,enable_chunked_prefill=True,tensor_parallel_size=8 \
297
  --tasks gsm8k_cot_llama_3.1_instruct \
298
  --fewshot_as_multiturn \
299
  --apply_chat_template \
 
305
  ```
306
  lm_eval \
307
  --model vllm \
308
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.w8a16",dtype=auto,add_bos_token=True,max_model_len=4096,enable_chunked_prefill=True,tensor_parallel_size=8 \
309
  --tasks hellaswag \
310
  --num_fewshot 10 \
311
  --batch_size auto
 
315
  ```
316
  lm_eval \
317
  --model vllm \
318
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.w8a16",dtype=auto,add_bos_token=True,max_model_len=4096,enable_chunked_prefill=True,tensor_parallel_size=8 \
319
  --tasks winogrande \
320
  --num_fewshot 5 \
321
  --batch_size auto
 
325
  ```
326
  lm_eval \
327
  --model vllm \
328
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-405B-Instruct-quantized.w8a16",dtype=auto,add_bos_token=True,max_model_len=4096,enable_chunked_prefill=True,tensor_parallel_size=8 \
329
  --tasks truthfulqa \
330
  --num_fewshot 0 \
331
  --batch_size auto