Spaces:
Sleeping
Sleeping
| import os | |
| import spaces | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig | |
| from huggingface_hub import login | |
| import gradio as gr | |
| from threading import Thread | |
| # Unused but preserved for iteration | |
| try: | |
| import charactertokenizer | |
| except ImportError: | |
| charactertokenizer = None | |
| MODEL = "AlekseyCalvin/Lyrical_Llama31_8B_ru2en_SFT" | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| if HF_TOKEN: | |
| login(token=HF_TOKEN) | |
| TITLE = """ | |
| <h1><center>LYRICAL Machine Translation Russian2English Model Testing Hall </center></h1> | |
| <center> | |
| <p>The model is licensed under apache 2.0</p> | |
| </center> | |
| """ | |
| PLACEHOLDER = """ | |
| <center> | |
| <p>Prototyping an LLM for song/poem translation fluently adaptive to complexly dimensioned fidelity constraints.</p> | |
| </center> | |
| """ | |
| css = """ | |
| .duplicate-button { | |
| margin: auto !important; | |
| color: white !important; | |
| background: black !important; | |
| border-radius: 100vh !important; | |
| } | |
| h3 { | |
| text-align: center; | |
| } | |
| """ | |
| # Load tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True) | |
| # NV FP4 / 4-bit quantization config | |
| nf4_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_use_double_quant=True, | |
| bnb_4bit_compute_dtype=torch.bfloat16, | |
| ) | |
| # Load model with FP4 + Sage Attention Kernel | |
| print("Loading model with FP4 quantization and Sage Attention...") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL, | |
| quantization_config=nf4_config, | |
| device_map="cuda", | |
| trust_remote_code=True, | |
| use_kernels=True, | |
| attn_implementation="kernels-community/flash-attn3", # Sage Attention implementation | |
| ) | |
| print(f"Model loaded on {model.device}, dtype: {model.dtype}") | |
| # ============================================================ | |
| # CHAT LOGIC | |
| # ============================================================ | |
| def stream_chat( | |
| message: str, | |
| history: list, | |
| system_prompt: str, | |
| temperature: float = 0.1, | |
| max_new_tokens: int = 512, | |
| top_p: float = 0.8, | |
| top_k: int = 20, | |
| repetition_penalty: float = 1.8, | |
| ): | |
| print(f"Message: {message}") | |
| # Build conversation list starting with System Prompt | |
| conversation = [{"role": "system", "content": system_prompt}] | |
| # Add history (Gradio type="messages" passes list of dicts) | |
| conversation.extend(history) | |
| # Add current user message | |
| conversation.append({"role": "user", "content": message}) | |
| # Apply ChatML template | |
| prompt = tokenizer.apply_chat_template( | |
| conversation, | |
| tokenize=False, | |
| add_generation_prompt=True, | |
| ) | |
| # Tokenize | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| # Setup streamer | |
| streamer = TextIteratorStreamer( | |
| tokenizer, | |
| skip_prompt=True, | |
| skip_special_tokens=True, | |
| ) | |
| # Generation kwargs | |
| generate_kwargs = { | |
| "input_ids": inputs["input_ids"], | |
| "attention_mask": inputs["attention_mask"], | |
| "max_new_tokens": max_new_tokens, | |
| "do_sample": temperature > 0, | |
| "temperature": temperature if temperature > 0 else None, | |
| "top_p": top_p, | |
| "top_k": top_k if top_k > 0 else None, | |
| "repetition_penalty": repetition_penalty, | |
| "streamer": streamer, | |
| "pad_token_id": tokenizer.pad_token_id, | |
| "eos_token_id": tokenizer.eos_token_id, | |
| "use_cache": True, | |
| } | |
| # Filter None values to avoid warnings | |
| generate_kwargs = {k: v for k, v in generate_kwargs.items() if v is not None} | |
| # Generate in thread | |
| thread = Thread(target=model.generate, kwargs=generate_kwargs) | |
| thread.start() | |
| # Stream output | |
| buffer = "" | |
| for new_text in streamer: | |
| buffer += new_text | |
| # Clean up potential artifacts if necessary, though skip_special_tokens handles most | |
| if "<|endoftext|>" in buffer: | |
| buffer = buffer.split("<|endoftext|>")[0].strip() | |
| yield buffer | |
| break | |
| yield buffer | |
| # ============================================================ | |
| # GRADIO INTERFACE | |
| # ============================================================ | |
| chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER) | |
| # Blocks context WITHOUT css or theme args, as requested | |
| with gr.Blocks() as demo: | |
| gr.HTML(TITLE) | |
| gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button") | |
| gr.ChatInterface( | |
| fn=stream_chat, | |
| chatbot=chatbot, | |
| fill_height=True, | |
| additional_inputs_accordion=gr.Accordion(label="⚙️ Dials", open=True), | |
| additional_inputs=[ | |
| gr.Textbox( | |
| value="You are a virtuosic multilingual poet, versatile literary translator, computational linguist, philologist, and singer-songwriter. Translate the below Russian poem, song lyric, or fragment by rigorously discerning its most holistically representative English adaptation possible, faithfully reproducing every source line's lyrical, semantic, formal, poetic, symbolic, cultural, rhythmic, idiomatic, & phonetic features. Fully conveying source meanings, results must also exhibit full fidelity to its meter, imagery, rhyme scheme, tone, and nuances of style and wordplay. \nCrucially, to translate Russian syllabotonic verse, start by analyzing source scansion, rhythm, rhyme, and phonetic patterns to map the adaptation's formal constraints.\nThus, before translating, silently discern:\n1. Each source line's SYLLABLE COUNT\n2. Each line's likeliest syllable STRESS PATTERN (For ex.: if (/) = stressed, (x) = unstressed, then '/ x x / x x / x x / x x' is the stress pattern for the line 'Двигаю ручками, двигаю ножками' and its adaptation 'Moving my handy-arms, leg-footies motioning')\n3.Its METER (dactylic (/ x x) in our example lines, other meters include amphibrachic (x / x), anapestic (x x /), trochaic (/ x), iambic (x /), etc)\n4. Its FOOT (which may be tetrameter (our examples) or dimeter, pentameter, hexameter, etc)\n5. RHYME SCHEME of entire verse (if 1st/3rd lines & 2nd/4th lines rhyme = ABAB scheme, etc)\n6.Other FEATURES, formal or idiomatic or literary (tone, speaker, theme, imagery, meaning, allusions).\n7. Phonetic/musical character of source lines.\nFinally, devise a natural-sounding English adaptation faithful to source meanings and other features alike. Do not add anything besides the translation. Do not explain process. Present only the translation. Переведи на Английский. Translate the following Russian source text to English.\nRussian: ", | |
| label="System Prompt", | |
| ), | |
| gr.Slider(minimum=0, maximum=2, step=0.01, value=0.1, label="Temperature"), | |
| gr.Slider(minimum=128, maximum=8192, step=1, value=512, label="Max new tokens"), | |
| gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=0.8, label="top_p"), | |
| gr.Slider(minimum=0, maximum=50, step=1, value=20, label="top_k"), | |
| gr.Slider(minimum=0.0, maximum=4.0, step=0.1, value=1.8, label="Repetition penalty"), | |
| ], | |
| examples=[ | |
| ["Translate the following song, adapting all formal and poetic characteristics (like meter, tone, mood, rhyme-scheme, and syllable patterns): // На карте кружком обозначено солнце, Пунктирные линии, ветер и соль, Под веками плавают сны на оконце, Спадает волнами зеленая боль, На наших глазах исчезают потери; Душа выпускает скопившийся страх; Я слышу шаги, открываются двери И смерть исчезает на наших глазах; На наших глазах; На наших глазах"], | |
| ["Translate these song lyrics into English: Иду я на верёвочке вздыхаю на ходу / Доска моя кончается сейчас я упаду / Под ноги под колёса под тяжёлый молоток / Всё с молотка / О, продана смерть моя…"], | |
| ["Translate: Мы вышли за рамки людских представлений / И даже представить себе не могли / Что выше всех горестей, бед и мучений / Мы будем под слоем промёрзшей земли."], | |
| ], | |
| cache_examples=False, | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue(default_concurrency_limit=None) | |
| # Strictly adhering to requested launch parameters structure | |
| demo.launch(share=True, css=css, theme=gr.themes.Glass(), mcp_server=True) |