| | |
| | """ |
| | Test script for Rax 3.5 Chat model |
| | """ |
| |
|
| | from transformers import AutoTokenizer, AutoModelForCausalLM |
| | import torch |
| |
|
| | def test_rax_chat(): |
| | print("Loading Rax 3.5 Chat model...") |
| | |
| | |
| | tokenizer = AutoTokenizer.from_pretrained(".") |
| | model = AutoModelForCausalLM.from_pretrained( |
| | ".", |
| | torch_dtype=torch.bfloat16, |
| | device_map="auto" |
| | ) |
| | |
| | print("Model loaded successfully!") |
| | |
| | |
| | messages = [ |
| | {"role": "system", "content": "You are Rax, a helpful AI assistant."}, |
| | {"role": "user", "content": "Hello! Can you tell me about yourself?"} |
| | ] |
| | |
| | |
| | input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
| | print(f"Input: {input_text}") |
| | |
| | inputs = tokenizer(input_text, return_tensors="pt") |
| | |
| | |
| | with torch.no_grad(): |
| | outputs = model.generate( |
| | **inputs, |
| | max_new_tokens=128, |
| | temperature=0.7, |
| | do_sample=True, |
| | pad_token_id=tokenizer.eos_token_id |
| | ) |
| | |
| | response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True) |
| | print(f"Rax: {response}") |
| |
|
| | if __name__ == "__main__": |
| | test_rax_chat() |
| |
|