++ head -n 1 ++ scontrol show hostnames g064 + export MASTER_ADDR=g064 + MASTER_ADDR=g064 +++ tail -c 4 +++ echo -n 16715025 ++ expr 10000 + 5025 + export MASTER_PORT=15025 + MASTER_PORT=15025 + export RANK=0 + RANK=0 + export WORLD_SIZE=1 + WORLD_SIZE=1 + echo 'SLURM Job ID: 16715025' + echo 'SLURM Node List: g064' + echo 'SLURM Number of Nodes: 1' + echo 'SLURM Number of Tasks: 1' + echo 'SLURM Tasks per Node: 1' + echo 'SLURM Local ID: 0' + echo 'SLURM Procedure ID: 0' + echo 'SLURM Node ID: 0' + echo 'MASTER_ADDR: g064' + echo 'MASTER_PORT: 15025' + echo 'RANK: 0' + echo 'WORLD_SIZE: 1' + echo 'CUDA_VISIBLE_DEVICES: 0' + cd /home/henrycastillo/modded-nanogpt + ./eval.sh 2025-10-03 16:54:19.220 | INFO | __main__::20 - importing transformers... 2025-10-03 16:54:19.243 | INFO | __main__::25 - importing train_gpt_medium... 2025-10-03 16:55:51.604 | INFO | __main__:main:243 - Evaluating logs in logs 2025-10-03 16:55:51.692 | INFO | __main__:main:255 - Finding latest model in logs/000_c2e7a920-6eca-4f21-8a3c-6022d81a4f29 2025-10-03 16:55:51.753 | INFO | __main__:main:259 - Loading model from logs/000_c2e7a920-6eca-4f21-8a3c-6022d81a4f29/latest_model.pt 2025-10-03 16:55:51.754 | INFO | __main__:__init__:201 - Initializing model with hyperparameters: Hyperparameters() 2025-10-03 16:56:00.528 | INFO | __main__:__init__:210 - Loading model state dict from logs/000_c2e7a920-6eca-4f21-8a3c-6022d81a4f29/latest_model.pt 2025-10-03 16:56:03.691 | INFO | __main__:__init__:213 - Renaming keys in model state dict 2025-10-03 16:56:04.236 | INFO | __main__:__init__:224 - Initializing tokenizer 2025-10-03 16:56:11.050 | INFO | __main__:main:262 - Wrapping model in HFLM `pretrained` model kwarg is not of type `str`. Many other model arguments may be ignored. Please do not launch via accelerate or use `parallelize=True` if passing an existing model this way. HF model type is neither marked as CausalLM or Seq2SeqLM. This is expected if your model requires `trust_remote_code=True` but may be an error otherwise.Setting backend to causal Passed an already-initialized model through `pretrained`, assuming single-process call to evaluate() or custom distributed integration 2025-10-03 16:56:11.339 | INFO | __main__:main:268 - Evaluating model 2025-10-03:16:56:11 INFO [evaluator:202] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234 2025-10-03:16:56:11 INFO [evaluator:258] Using pre-initialized model Using the latest cached version of the dataset since Rowan/hellaswag couldn't be found on the Hugging Face Hub 2025-10-03:17:07:57 WARNING [datasets.load:817] Using the latest cached version of the dataset since Rowan/hellaswag couldn't be found on the Hugging Face Hub Found the latest cached dataset configuration 'default' at /scratch/user/henrycastillo/.cache/huggingface/datasets/Rowan___hellaswag/default/0.0.0/218ec52e09a7e7462a5400043bb9a69a41d06b76 (last modified on Tue Sep 23 13:09:29 2025). 2025-10-03:17:07:57 WARNING [datasets.packaged_modules.cache.cache:94] Found the latest cached dataset configuration 'default' at /scratch/user/henrycastillo/.cache/huggingface/datasets/Rowan___hellaswag/default/0.0.0/218ec52e09a7e7462a5400043bb9a69a41d06b76 (last modified on Tue Sep 23 13:09:29 2025). 2025-10-03:17:10:47 DEBUG [api.task:884] No custom filters defined. Using default 'take_first' filter for handling repeats. 2025-10-03:17:11:39 INFO [api.task:434] Building contexts for hellaswag on rank 0... 0%| | 0/10042 [00:00: 'CustomModel( (model): EvaluationGPT( (embed): Embedding(50257, 1024) (value_embeds): ModuleList( (0-2): 3 x Embedding(50257, 1024) ) (blocks): ModuleList( (0-6): 7 x Block( (attn): CausalSelfAttention( (rotary): Rotary() ) (mlp): MLP() ) (7): Block( (mlp): MLP() ) (8-15): 8 x Block( (attn): CausalSelfAttention( (rotary): Rotary() ) (mlp): MLP() ) ) ) )'. 2025-10-03 18:16:50.520 | INFO | __main__:main:273 - {'alias': 'hellaswag', 'acc,none': 0.25403306114319857, 'acc_stderr,none': 0.004344266179634878, 'acc_norm,none': 0.25473013343955386, 'acc_norm_stderr,none': 0.0043481894593367845} 2025-10-03 18:16:50.520 | INFO | __main__:main:274 - Saving results to logs/000_c2e7a920-6eca-4f21-8a3c-6022d81a4f29/hellaswag.json 2025-10-03 18:16:50.582 | INFO | __main__:main:279 - Total evaluation time: 4858.98s 2025-10-03 18:16:50.582 | SUCCESS | __main__:main:280 - Final accuracy: 0.25403306114319857