import os import time import spaces import torch from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig from huggingface_hub import hf_hub_download, HfFileSystem, ModelCard from huggingface_hub import login try: import charactertokenizer except ImportError: # charactertokenizer incompatible with transformers 5.x (uses removed download_url) # Preserved for future iteration with older model versions if needed charactertokenizer = None import gradio as gr import tempfile import requests import re from threading import Thread #MODEL = "AlekseyCalvin/LYRICAL_MT_ru2en_28_charllama3bsophist_2.6b" MODEL = "AlekseyCalvin/LYRICAL_MT_ru2en_21_SystemGemma2_9b_4epochs" HF_TOKEN = os.environ.get("HF_TOKEN") login(token=HF_TOKEN) TITLE = """

LYRICAL Machine Translation Russian2English Model Testing Hall

The model is licensed under apache 2.0

""" PLACEHOLDER = """

Prototyping an LLM for song/poem translation fluently adaptive to complexly dimensioned fidelity constraints.

""" css = """ .duplicate-button { margin: auto !important; color: white !important; background: black !important; border-radius: 100vh !important; } h3 { text-align: center; } """ # ZeroGPU compatible device handling - don't initialize CUDA at module level # device is determined dynamically in the GPU-decorated function device = "cuda" # Placeholder for clarity, actual device set by spaces.GPU context #nf4_config = BitsAndBytesConfig( # load_in_4bit=True, # bnb_4bit_quant_type="nf4", # bnb_4bit_use_double_quant=True #) # Load tokenizer and model with modern transformers 5.x best practices # use_kernels=True enables optimized kernels from HF Kernel Hub # attn_implementation="flash_attention_2" for efficient attention (ZeroGPU compatible) tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True) # Model loading happens at module level but CUDA is only initialized inside @spaces.GPU # This is compatible with ZeroGPU because spaces module intercepts CUDA calls model = AutoModelForCausalLM.from_pretrained( MODEL, dtype="auto", # Transformers 5.x: avoid double-loading weights # quantization_config=nf4_config, device_map="auto", trust_remote_code=True, ignore_mismatched_sizes=True, use_kernels=True, # Enable HF Kernel Hub optimizations attn_implementation="kernels-community/vllm-flash-attn3", # Use Flash Attention 2 (ZeroGPU H200 compatible) ) def format_chat(system_prompt, history, message): formatted_chat = f"system\n{system_prompt}\n" # Gradio 6+: history is now list of dicts with 'role' and 'content', not tuples for turn in history: if isinstance(turn, dict): # New format: {"role": "user"/"assistant", "content": "..."} if turn.get("role") == "user": formatted_chat += f"user\n{turn.get('content', '')}\n" elif turn.get("role") == "assistant": formatted_chat += f"assistant\nEnglish:{turn.get('content', '')}\n" else: # Fallback for tuple format (legacy compatibility) prompt, answer = turn formatted_chat += f"user\n{prompt}\nassistant\nEnglish:{answer}\n" formatted_chat += f"user\n{message}\nassistant\n" return formatted_chat @spaces.GPU # Explicit duration for translation tasks def stream_chat( message: str, history: list, # Gradio 6+: list of dicts, not tuples system_prompt: str, temperature: float = 0.2, max_new_tokens: int = 512, top_p: float = 1.0, top_k: int = 0, repetition_penalty: float = 1.3, ): print(f'message: {message}') print(f'history: {history}') formatted_prompt = format_chat(system_prompt, history, message) # Modern transformers: use model.device instead of global device variable inputs = tokenizer(formatted_prompt, return_tensors="pt") # Move to model's device (handled by device_map="auto") inputs = {k: v.to(model.device) for k, v in inputs.items()} streamer = TextIteratorStreamer( tokenizer, timeout=5000.0, skip_prompt=True, skip_special_tokens=True ) # Modern generation arguments for transformers 5.x generate_kwargs = dict( input_ids=inputs["input_ids"], max_new_tokens=max_new_tokens, do_sample=False if temperature == 0 else True, top_p=top_p, top_k=top_k, temperature=temperature, repetition_penalty=repetition_penalty, streamer=streamer, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id, # Enable efficient caching for generation cache_implementation="static", # transformers 5.x: static KV cache for speed use_cache=True, ) # Thread-based generation compatible with ZeroGPU forked processes with torch.no_grad(): thread = Thread(target=model.generate, kwargs=generate_kwargs) thread.start() buffer = "" for new_text in streamer: buffer += new_text if "<|endoftext|>" in buffer: yield buffer.split("<|endoftext|>")[0] break yield buffer # Gradio 6+: Chatbot uses messages format by default (allow_tags=True default) chatbot = gr.Chatbot( height=600, placeholder=PLACEHOLDER ) with gr.Blocks() as demo: gr.HTML(TITLE) gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button") # Gradio 6+: ChatInterface with updated API gr.ChatInterface( fn=stream_chat, chatbot=chatbot, fill_height=True, additional_inputs_accordion=gr.Accordion(label="⚙️ Dials", open=True, render=False), additional_inputs=[ gr.Textbox( value="You are a virtuosic multilingual poet, versatile literary translator, computational linguist, philologist, and singer-songwriter. Translate the below Russian poem, song lyric, or fragment by rigorously discerning its most holistically representative English adaptation possible, faithfully reproducing every source line's lyrical, semantic, formal, poetic, symbolic, cultural, rhythmic, idiomatic, & phonetic features. Fully conveying source meanings, results must also exhibit full fidelity to its meter, imagery, rhyme scheme, tone, and nuances of style and wordplay. \nCrucially, to translate Russian syllabotonic verse, start by analyzing source scansion, rhythm, rhyme, and phonetic patterns to map the adaptation's formal constraints.\nThus, before translating, silently discern:\n1. Each source line's SYLLABLE COUNT\n2. Each line's likeliest syllable STRESS PATTERN (For ex.: if (/) = stressed, (x) = unstressed, then '/ x x / x x / x x / x x' is the stress pattern for the line 'Двигаю ручками, двигаю ножками' and its adaptation 'Moving my handy-arms, leg-footies motioning')\n3.Its METER (dactylic (/ x x) in our example lines, other meters include amphibrachic (x / x), anapestic (x x /), trochaic (/ x), iambic (x /), etc)\n4. Its FOOT (which may be tetrameter (our examples) or dimeter, pentameter, hexameter, etc)\n5. RHYME SCHEME of entire verse (if 1st/3rd lines & 2nd/4th lines rhyme = ABAB scheme, etc)\n6.Other FEATURES, formal or idiomatic or literary (tone, speaker, theme, imagery, meaning, allusions).\n7. Phonetic/musical character of source lines.\nFinally, devise a natural-sounding English adaptation faithful to source meanings and other features alike. Do not add anything besides the translation. Do not explain process. Present only the translation. Переведи на Английский. Translate the following Russian source text to English.\nRussian: ", label="System Prompt", render=False, ), gr.Slider( minimum=0, maximum=2, step=0.01, value=0.1, label="Temperature", render=False, ), gr.Slider( minimum=128, maximum=8192, step=1, value=512, label="Max new tokens", render=False, ), gr.Slider( minimum=0.0, maximum=1.0, step=0.1, value=0.8, label="top_p", render=False, ), gr.Slider( minimum=0, maximum=50, step=1, value=20, label="top_k", render=False, ), gr.Slider( minimum=0.0, maximum=4.0, step=0.1, value=1.8, label="Repetition penalty", render=False, ), ], # Gradio 6+: examples format updated to match messages format examples=[ ["Translate the following song, adapting all formal and poetic characteristics (like meter, tone, mood, rhyme-scheme, and syllable patterns): // На карте кружком обозначено солнце, Пунктирные линии, ветер и соль, Под веками плавают сны на оконце, Спадает волнами зеленая боль, На наших глазах исчезают потери; Душа выпускает скопившийся страх; Я слышу шаги, открываются двери И смерть исчезает на наших глазах; На наших глазах; На наших глазах"], ["Translate these song lyrics into English: Иду я на верёвочке вздыхаю на ходу / Доска моя кончается сейчас я упаду / Под ноги под колёса под тяжёлый молоток / Всё с молотка / О, продана смерть моя…"], ["Translate: Мы вышли за рамки людских представлений / И даже представить себе не могли / Что выше всех горестей, бед и мучений / Мы будем под слоем промёрзшей земли."], ], cache_examples=False, # Gradio 6+: boolean only, use cache_mode for lazy/eager ) if __name__ == "__main__": demo.queue(default_concurrency_limit=None) demo.launch(share=True, theme=gr.themes.Soft(), css=css, mcp_server=True)