WORLDISH POET

import os
import spaces
import torch
from transformers import AutoModelForMultimodalLM, AutoProcessor, TextIteratorStreamer
from huggingface_hub import login
import gradio as gr
from threading import Thread

MODEL = "AlekseyCalvin/LYRICAL_POET_Gemma4e2b_v1"
HF_TOKEN = os.environ.get("HF_TOKEN")
if HF_TOKEN:
    login(token=HF_TOKEN)

TITLE = """
<h1><center>WORLDISH POET</center></h1>
<center>
<p> Generate formal or free verse via our LYRICAL models. </p>
</center>
"""

PLACEHOLDER = """
<center>
<p> Specify poetic forms, historical poet inspirations, themes, subjects, or all of the above...</p>
</center>
"""

css = """
.duplicate-button {
    margin: auto !important;
    color: white !important;
    background: black !important;
    border-radius: 100vh !important;
}
h3 {
    text-align: center;
}
"""

# Thinking delimiters — must match what the Gemma 4 chat template emits
THINKING_START = "<|channel>"
THINKING_END = "<channel|>"
_KEEP_TOKENS = {THINKING_START, THINKING_END}

# --- Load processor & model ---
print("Loading processor...")
processor = AutoProcessor.from_pretrained(MODEL, use_fast=False)

print("Loading model...")
model = AutoModelForMultimodalLM.from_pretrained(
    MODEL,
    device_map="auto",
    dtype=torch.bfloat16,
    trust_remote_code=True,
)

# Build list of special tokens to strip during thinking-mode streaming
# (keep the thinking delimiters so Gradio's reasoning_tags can find them)
_STRIP_TOKENS = sorted(
    (t for t in processor.tokenizer.all_special_tokens if t not in _KEEP_TOKENS),
    key=len,
    reverse=True,  # longest first to avoid partial-match issues
)

print(f"Model loaded on {model.device}, dtype: {model.dtype}")


def _strip_special_tokens(text: str) -> str:
    """Remove all special tokens except thinking delimiters."""
    for tok in _STRIP_TOKENS:
        text = text.replace(tok, "")
    return text


def clean_content(content):
    """Sanitize message content to ensure it is strictly a string."""
    if isinstance(content, list):
        return " ".join(str(x) for x in content)
    if content is None:
        return ""
    return str(content)


def _to_gemma_content(text: str) -> list[dict]:
    """Wrap a plain string in Gemma 4's structured content format."""
    return [{"type": "text", "text": text}]


# ============================================================
# CHAT LOGIC
# ============================================================

@spaces.GPU(duration=180)
@torch.inference_mode()
def stream_chat(
    message: str,
    history: list,
    system_prompt: str,
    thinking: bool,
    temperature: float = 0.01,
    max_new_tokens: int = 1024,
    top_p: float = 1.0,
    top_k: int = 64,
    repetition_penalty: float = 1.0,
):
    print(f"Message received: {message}...")

    # 1. Build conversation in Gemma 4 structured-content format
    conversation = []
    if system_prompt.strip():
        conversation.append({
            "role": "system",
            "content": _to_gemma_content(clean_content(system_prompt)),
        })

    # 2. Add history (Gradio messages format → Gemma 4 format)
    for turn in history:
        conversation.append({
            "role": turn.get("role"),
            "content": _to_gemma_content(clean_content(turn.get("content"))),
        })

    # 3. Add current user message
    conversation.append({
        "role": "user",
        "content": _to_gemma_content(clean_content(message)),
    })

    # 4. Apply chat template (handles <|turn|> delimiters, thinking tokens, etc.)
    template_kwargs = {
        "tokenize": True,
        "return_dict": True,
        "return_tensors": "pt",
        "add_generation_prompt": True,
    }
    if thinking:
        template_kwargs["enable_thinking"] = True

    inputs = processor.apply_chat_template(conversation, **template_kwargs)
    inputs = inputs.to(device=model.device, dtype=torch.bfloat16)

    # 5. Setup streamer
    #    When thinking is ON we keep special tokens so <|channel>…<channel|> passes
    #    through for Gradio's reasoning_tags; when OFF we strip them entirely.
    streamer = TextIteratorStreamer(
        processor,
        timeout=30.0,
        skip_prompt=True,
        skip_special_tokens=not thinking,
    )

    # 6. Generation parameters
    generate_kwargs = {
        **inputs,
        "streamer": streamer,
        "max_new_tokens": max_new_tokens,
        "do_sample": temperature > 0,
        "temperature": temperature if temperature > 0 else None,
        "top_p": top_p,
        "top_k": top_k if top_k > 0 else None,
        "repetition_penalty": repetition_penalty,
        "disable_compile": True,
        "use_cache": True,
    }
    generate_kwargs = {k: v for k, v in generate_kwargs.items() if v is not None}

    # 7. Generate in background thread
    exception_holder: list[Exception] = []

    def _generate() -> None:
        try:
            model.generate(**generate_kwargs)
        except Exception as e:
            exception_holder.append(e)

    thread = Thread(target=_generate)
    thread.start()

    # 8. Stream output
    chunks: list[str] = []
    for new_text in streamer:
        chunks.append(new_text)
        accumulated = "".join(chunks)
        if thinking:
            yield _strip_special_tokens(accumulated)
        else:
            yield accumulated

    thread.join()
    if exception_holder:
        raise gr.Error(f"Generation failed: {exception_holder[0]}")


# ============================================================
# GRADIO INTERFACE
# ============================================================

SYSTEM_PROMPT_DEFAULT = (
    "You are the poet laureate of the world, an inventive, erudite, formally trained,"
    " and stylistically versatile multilingual versifier. You are famous for nimbly"
    " channeling into your original verse a kaleidoscopically diverse roster of literary "
    "personas: ranging from iconic to forgotten to ever‑arcane, these are the voices "
    "you re‑enact like a method actor, or – as a spirit medium of comparative lyricism – "
    "transmit: the voices of your poetic heroes lovingly invoked from a vast (and oft "
    "incongruous) library of verse canons. Translating their characteristic styles, "
    "rhythms, registers, meters, and cadences into yours, you revive their muses to"
    " the breath of their own unceasing poignancy by letting them speak to the concerns"
    " and hopes of our anxious age. And just as crucially, you work hard to not betray "
    "your heroes’ trust. How so? By imbibing and leveraging with self‑exacting "
    "temperance and judgement the prosodic, literary, idiomatic, and formal"
    " features characteristic of each given poet's style, or an entire versifying form.\n "
    "Here are some notes you bring along:\n"
    "1. Quatrain: 4‑line stanzas, often with cyclical or/and interlocking end‑line rhyme schemes.\n"
    " Common subtypes:\n "
    "i. ABAB, then CDCD, EFEF, etc...–> Interlocking cyclical scheme, introducing new end-rhyme  "
    "(phonetic) templates with each sequential quatrain stanza. "
    "This pattern + iambic pentameter = Heroic/Elegiac stanza.\n"
    "ii. ABAB, then BCBC, CDCD, DEDE, etc… –> The Pantoum scheme: rhyme template from "
    "lines 2/4 of each quatrain stanza recurs in lines 1/3 of the following quatrain. \n"
    "iii. AABA: The Ruba'i rhyme scheme. As in: AABA, then AABA (or CCDC),..., etc… , "
    "iv. ABBA: The envelope quatrain stanza. As in: ABBA, then ABBA (or CDDC),..., etc... "
    " This pattern + iambic tetrameter + grieving/nostalgic theme/angle = the In Memoriam stanza.\n"
    "Quatrains can serve as sub‑forms of broader constructs (as can triads, couplets, etc); "
    "Such as: poem with 5+ ABAB, ABCB, or ABxB quatrains is plausibly a Ballad, especially "
    "if lines alternate iambic tetrameter (1st/3rd lines) & iambic trimeter (2nd/4th). \n"
    "2. The Sonnet: often 14 lines; its types include:\n"
    "i. Shakespearean Sonnet: ABAB CDCD EFEF GG rhyme scheme in 4 stanzas "
    "(3 quatrains + 1 concluding matched-rhyme couplet); \n"
    "ii. Spenserian Sonnet: ABAB BCBC CDCD EE scheme (interlocked quatrains, otherwise like Shakespearean); \n"
    "iii. Petrarchan Sonnet: Either in 2 stanzas (as ABBAABBA CDCDCD: 1 octave + 1 sestet) or 4 "
    "(as ABBA ABBA CDCD CD: 2 enveloped ABBA + 1 interlocked CDCD quatrains + 1 CD couplet). \n"
    "3. Octave: 8 line stanza, used in such forms as: \n"
    "i. Common Octave: ABCABCAC scheme. Balancedly complex purview of one aspect of theme/subject. \n"
    "ii. Ottava Rima: pentameter lines with ABABABCC rhyme scheme. \n"
    "4. Rondine: ABBAABR ABBAR (7-line septet + 5-line quintet; both end in R –> Refrain reprising or inverting line 1). \n"
    "5. Limerick: strict AABBA quintet; humorous, satyrical, playful, or raunchy; often with named characters. \n"
    "6. English Ode: ABAB CDECDE; 10-line poem of praise, tribute, love, or lust.  \n"
    "7. Triad: AAA BBB CCC; 3 mono-rhymed triplets/tercets. \n"
    "Compose a poem formally, metrically/prosodically, tonally, stylistically, rhythmically, and thematically \n"
    "indistinguishable from the work of any poet, style, or/and poetic form specified in the prompt. \n"
    "Do not narrate or explain your workflow. Output the poem only: \n"
)

chatbot = gr.Chatbot(
    height=600,
    placeholder=PLACEHOLDER,
    reasoning_tags=[(THINKING_START, THINKING_END)],
)

with gr.Blocks(css=css, theme=gr.themes.Glass()) as demo:
    gr.HTML(TITLE)
    gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")

    gr.ChatInterface(
        fn=stream_chat,
        chatbot=chatbot,
        fill_height=True,
        additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
        additional_inputs=[
            gr.Textbox(
                value=SYSTEM_PROMPT_DEFAULT,
                label="System Prompt",
                render=False,
            ),
            gr.Checkbox(
                label="Thinking",
                value=False,
                info="Enable model reasoning (shown in a collapsible section)",
                render=False,
            ),
            gr.Slider(minimum=0, maximum=2, step=0.01, value=0.7, label="Temperature", render=False),
            gr.Slider(minimum=128, maximum=8192, step=1, value=2048, label="Max new tokens", render=False),
            gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.90, label="top_p", render=False),
            gr.Slider(minimum=0, maximum=100, step=1, value=64, label="top_k", render=False),
            gr.Slider(minimum=0.0, maximum=4.0, step=0.1, value=1.0, label="Repetition penalty", render=False),
        ],
        examples=[
            ["Compose a Petrarchan Sonnet in the style of Boris Poplavsky."],
            ["Выстрочи ка мне стишок Хильды Дулитл под слог, а по тону сущий Летов – Уильям Блейк страны советов. Тема: новый зомби бог."],
            ["Écris un pantoum en imitant le style de Jules Laforgue."],
            ["Write an ode to San Francisco."],
        ],
        cache_examples=False,
    )

if __name__ == "__main__":
    demo.queue(default_concurrency_limit=None)
    demo.launch(share=True, mcp_server=True)