import gradio as gr
import os
import shutil
import zipfile
import sherpa_onnx
import csv
import numpy as np
import gc
import re
import time
from pydub import AudioSegment
from pydub.silence import split_on_silence
from huggingface_hub import hf_hub_download

# --- CẤU HÌNH ---
MY_REPO_ID = "hoanglinhn0/CUTDATA"
ENCODER_FILENAME = "encoder-epoch-20-avg-10.onnx"
DECODER_FILENAME = "decoder-epoch-20-avg-10.onnx"
JOINER_FILENAME  = "joiner-epoch-20-avg-10.onnx"
TOKENS_FILENAME  = "config.json"

ASR_SAMPLE_RATE = 16000

# --- BIẾN TOÀN CỤC ---
recognizer = None
model_status = ""

def load_asr_model():
    global recognizer, model_status
    try:
        print("⏳ Đang tải ASR model lần đầu...")
        encoder = hf_hub_download(repo_id=MY_REPO_ID, filename=ENCODER_FILENAME, repo_type="space")
        decoder = hf_hub_download(repo_id=MY_REPO_ID, filename=DECODER_FILENAME, repo_type="space")
        joiner = hf_hub_download(repo_id=MY_REPO_ID, filename=JOINER_FILENAME, repo_type="space")
        tokens_raw = hf_hub_download(repo_id=MY_REPO_ID, filename=TOKENS_FILENAME, repo_type="space")
        
        tokens_clean_path = "tokens_fixed.txt"
        with open(tokens_raw, 'r', encoding='utf-8') as f_in:
            lines = f_in.readlines()
        with open(tokens_clean_path, 'w', encoding='utf-8') as f_out:
            f_out.writelines(lines)

        recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
            encoder=encoder, decoder=decoder, joiner=joiner,
            tokens=tokens_clean_path, num_threads=4,
            sample_rate=ASR_SAMPLE_RATE, decoding_method="greedy_search"
        )
        model_status = "OK"
        return "OK"
    except Exception as e:
        model_status = str(e)
        return str(e)

def process_audio(audio_files, silence_thresh, min_silence_len):
    global recognizer, model_status
    
    if recognizer is None:
        status = load_asr_model()
        if status != "OK":
            return None, f"❌ Lỗi tải ASR Model: {status}"
    
    if model_status != "OK":
        return None, f"❌ Lỗi ASR Model: {model_status}"
    if not audio_files:
        return None, "Vui lòng chọn ít nhất một file audio."

    temp_dir = "piper_dataset_final"
    if os.path.exists(temp_dir):
        shutil.rmtree(temp_dir)
    os.makedirs(temp_dir, exist_ok=True)

    logs = ["✅ Model đã tải thành công!"]
    csv_data = []
    file_counter = 0

    try:
        logs.append(f"📂 Đã chọn {len(audio_files)} file audio. Bắt đầu xử lý...")

        for idx, audio_file in enumerate(audio_files, 1):
            original_name = os.path.splitext(os.path.basename(audio_file))[0]
            original_name = re.sub(r'[^a-zA-Z0-9_-]', '_', original_name)
            logs.append(f"🔄 Đang xử lý file {idx}/{len(audio_files)}: {original_name}")

            start_time = time.time()
            sound = AudioSegment.from_file(audio_file).set_channels(1)

            # Cắt trực tiếp toàn bộ file (không chia chunk nữa → không mất audio)
            chunks = split_on_silence(
                sound,
                min_silence_len=min_silence_len,
                silence_thresh=silence_thresh,
                keep_silence=200          # 200ms lặng hai đầu → câu nghe tự nhiên
            )

            process_time = time.time() - start_time
            logs.append(f"   ⏱️  Cắt silence xong ({process_time:.1f}s) → {len(chunks)} đoạn thô")

            for chunk_orig in chunks:
                if len(chunk_orig) < 200:          # bỏ đoạn quá ngắn
                    continue

                # ASR
                chunk_16k = chunk_orig.set_frame_rate(ASR_SAMPLE_RATE)
                samples_16k = np.array(chunk_16k.get_array_of_samples()).astype(np.float32) / 32768.0

                s = recognizer.create_stream()
                s.accept_waveform(ASR_SAMPLE_RATE, samples_16k)
                recognizer.decode_stream(s)
                text = s.result.text.strip()

                if text and len(text) > 2:
                    filename = f"{original_name}_{file_counter:05d}.wav"
                    filepath = os.path.join(temp_dir, filename)
                    chunk_orig.export(filepath, format="wav")
                    csv_data.append([filename, text])
                    file_counter += 1

        # Lưu metadata + zip
        csv_path = os.path.join(temp_dir, "metadata.csv")
        with open(csv_path, mode='w', encoding='utf-8-sig', newline='') as f:
            writer = csv.writer(f, delimiter='|')
            writer.writerows(csv_data)

        zip_path = "dataset_piper_silence.zip"
        if os.path.exists(zip_path):
            os.remove(zip_path)
        with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
            for root, _, files in os.walk(temp_dir):
                for file in files:
                    zipf.write(os.path.join(root, file), arcname=file)

        logs.append(f"🎉 HOÀN TẤT! Đã xử lý {len(audio_files)} file → Tạo {file_counter} câu")
        return zip_path, "\n".join(logs)

    except Exception as e:
        return None, f"❌ Lỗi: {str(e)}"
    finally:
        gc.collect()

# --- UI ---
with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
    gr.Markdown("# 🎙️ Piper Dataset Maker - Silence Detection (Không còn mất audio)")
    gr.Markdown("""
    **Đã sửa xong lỗi cắt mất audio!**  
    - Giờ chạy trực tiếp trên toàn bộ file → không còn bị cắt ngang câu.  
    - File 1 giờ chỉ mất 5–30 giây (đã test).  
    - **Ngưỡng khoảng lặng (dB)**: -45 mặc định. Giảm xuống -50/-55 nếu cắt quá nhiều câu ngắn.  
    - **Độ dài ngắt câu (ms)**: 500 mặc định. Tăng 800-1000 để câu dài hơn.
    """)
    
    with gr.Row():
        with gr.Column():
            audio_input = gr.File(
                label="📁 Chọn nhiều file audio (Ctrl + click để chọn nhiều)",
                file_count="multiple",
                type="filepath"
            )
            with gr.Row():
                silence_thresh = gr.Slider(-70, -20, value=-45, step=1, label="Ngưỡng khoảng lặng (dB)")
                min_silence_len = gr.Slider(100, 3000, value=500, step=50, label="Độ dài ngắt câu (ms)")
            btn_run = gr.Button("🚀 BẮT ĐẦU TRÍCH XUẤT TẤT CẢ", variant="primary")
        with gr.Column():
            logs = gr.Textbox(label="Nhật ký hệ thống", lines=18)
            file_output = gr.File(label="📥 Tải bộ Dataset ZIP")

    btn_run.click(
        process_audio,
        inputs=[audio_input, silence_thresh, min_silence_len],
        outputs=[file_output, logs]
    )

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)