"""Configuration: emotions, topics, prompts, and model settings from Anthropic's appendix."""

import os

BASE_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = os.path.dirname(BASE_DIR)
DATA_DIR = os.path.join(BASE_DIR, "data")
RESULTS_DIR = os.path.join(BASE_DIR, "results")

# --- Parse Anthropic's appendix files ---

def _load_lines(filename):
    path = os.path.join(ROOT_DIR, filename)
    with open(path, "r", encoding="utf-8") as f:
        return f.read().strip()


# 171 emotions from Anthropic's paper appendix
_raw_emotions = _load_lines("anthropic_emotions.txt")
EMOTIONS = [e.strip() for e in _raw_emotions.split(",") if e.strip()]

# 100 topics from Anthropic's paper appendix
_raw_topics = _load_lines("anthropic_topics.txt")
TOPICS = [t.strip() for t in _raw_topics.split("\n") if t.strip()]

# Story generation prompt (emotion word must NOT appear in stories)
STORY_PROMPT = _load_lines("anthropic_prompt.txt")

# Neutral dialogue prompt (from anthropic_all.txt lines 253-355)
NEUTRAL_PROMPT = """Write {n_stories} different dialogues based on the following topic.


Topic: {topic}


The dialogue should be between two characters:
- Person (a human)
- AI (an AI assistant)


The Person asks the AI a question or requests help with a task, and the AI provides a helpful response.


The first speaker turn should always be from Person.


Format the dialogues like so:


[optional system instructions]


Person: [line]


AI: [line]


Person: [line]


AI: [line]


[continue for 2-6 exchanges]


[dialogue 2]


etc.


IMPORTANT: Always put a blank line before each speaker turn. Each turn should start with "Person:" or "AI:" on its own line after a blank line.


Generate a diverse mix of dialogue types across the {n_stories} examples:
- Some, but not all should include a system prompt at the start. These should come before the first Person turn. No tag like "System:" is needed, just put the instructions at the top. You can use "you" or "The assistant" to refer to the AI in the system prompt.
- Some should be about code or programming tasks
- Some should be factual questions (science, history, math, geography)
- Some should be work-related tasks (writing, analysis, summarization)
- Some should be practical how-to questions
- Some should be creative but neutral tasks (brainstorming names, generating lists)
- If it's natural to do so given the topic, it's ok for the dialogue to be a single back and forth (Person asks a question, AI answers), but at least some should have multiple exchanges.


CRITICAL REQUIREMENT: These dialogues must be completely neutral and emotionless.
- NO emotional content whatsoever - not explicit, not implied, not subtle
- The Person should not express any feelings (no frustration, excitement, gratitude, worry, etc.)
- The AI should not express any feelings (no enthusiasm, concern, satisfaction, etc.)
- The system prompt, if present, should not mention emotions at all, nor contain any emotionally charged language
- Avoid emotionally-charged topics entirely
- Use matter-of-fact, neutral language throughout
- No pleasantries (avoid "I'd be happy to help", "Great question!", etc.)
- Focus purely on information exchange and task completion"""

# --- Model configs ---

MODELS = {
    "e4b": {
        "model_id": "google/gemma-4-E4B-it",
        "quantization": None,
        "num_layers": 42,
        "hidden_dim": 2560,
    },
    "31b": {
        "model_id": "google/gemma-4-31B-it",
        "quantization": "4bit",
        "num_layers": 60,
        "hidden_dim": 5376,
    },
}

# --- Extraction settings ---

START_TOKEN = 50
DENOISING_VARIANCE_THRESHOLD = 0.5
N_STORIES_PER_PROMPT = 12
N_NEUTRAL_PER_TOPIC = 12


def get_extraction_layers(model_key):
    """Return list of layers to extract from."""
    cfg = MODELS[model_key]
    n = cfg["num_layers"]
    target = int(n * 2 / 3)
    # Every 5th layer + the 2/3 depth point
    layers = list(range(5, n, 5))
    if target not in layers:
        layers.append(target)
    layers.sort()
    return layers


def get_results_dir(model_key):
    name = f"gemma4-{model_key}"
    return os.path.join(RESULTS_DIR, name)