f# DFK {model_suffix} — 4 Kelas

fFine-tuned dari {model_base} untuk deteksi konten berbahaya Bahasa Indonesia. Output: Label + Reasoning + Confidence Score

Kelas

Label Deskripsi
Fakta Konten yang sesuai fakta
Disinformasi Informasi yang menyesatkan
Fitnah Tuduhan tanpa bukti
Ujaran Kebencian Konten menyerang kelompok tertentu

Cara Panggil via HuggingFace Inference API

Tidak perlu load model — tinggal panggil API key.

import requests re
from collections import Counter

HF_API_KEY = hf_xxxx   # ganti dengan HF API key kamu
fMODEL_ID   = {hf_repo}
fAPI_URL    = https://api-inference.huggingface.co/models/{hf_repo}
HEADERS    = {Authorization: fBearer {HF_API_KEY} Content-Type: application/json}

SYSTEM_PROMPT = (
    Anda adalah sistem deteksi konten DFK (Disinformasi Fitnah Kebencian). 
    Klasifikasikan teks ke dalam: Fakta Disinformasi Fitnah atau Ujaran Kebencian. 
    Berikan label dalam format: Label: **NamaLabel.** penjelasan: alasan lengkap.
)

def build_prompt(teks):
    return (
        f<|im_start|>system\\n{SYSTEM_PROMPT}<|im_end|>\\n
        f<|im_start|>user\\nKlasifikasikan teks berikut:\\n{teks}<|im_end|>\\n
        f<|im_start|>assistant\\n
    )

def extract_label(text):
    t = text.lower().strip()
    if ujaran kebencian in t[:80]: return UJARAN_KEBENCIAN
    for kw in [disinformasi fitnah fakta]:
        if kw in t[:80]: return kw.upper()
    return UNKNOWN

def extract_reasoning(text):
    import re
    m = re.search(rpenjelasan\\s*:\\s*(.*) text re.DOTALL | re.IGNORECASE)
    return m.group(1).strip() if m else text.strip()

def extract_confidence(text):
    tegas = [terbukti jelas nyata pasti faktual hoaks bohong]
    ragu  = [mungkin kemungkinan diduga belum pasti tidak jelas]
    t = text.lower()
    score = 0.65
    for k in tegas: score += 0.05 if k in t else 0
    for k in ragu:  score -= 0.05 if k in t else 0
    return round(min(max(score 0.0) 1.0) 4)

def classify_via_api(teks num_trials=3):
    prompt  = build_prompt(teks)
    payload = {
        inputs: prompt
        parameters: {max_new_tokens: 256 temperature: 0.7 do_sample: True return_full_text: False}
    }
    trials = []
    for i in range(num_trials):
        try:
            resp     = requests.post(API_URL headers=HEADERS json=payload timeout=60)
            resp.raise_for_status()
            output   = resp.json()
            gen_text = output[0][generated_text] if isinstance(output list) else str(output)
            trials.append({label: extract_label(gen_text) reasoning: extract_reasoning(gen_text) confidence: extract_confidence(gen_text)})
        except Exception as e:
            print(fTrial {i+1} error: {e})
    if not trials:
        return {error: Semua trial gagal}
    votes           = Counter(t[label] for t in trials)
    best_label cnt = votes.most_common(1)[0]
    winners         = [t for t in trials if t[label] == best_label]
    avg_conf        = round(sum(t[confidence] for t in winners) / len(winners) 4)
    best_reason     = max(winners key=lambda x: x[confidence])[reasoning]
    return {
        label      : best_label
        reasoning  : best_reason
        confidence : f{avg_conf*100:.1f}%
        consistency: f{cnt}/{num_trials}
        ambiguous  : cnt == 1 or avg_conf < 0.45
    }

# Contoh pemakaian
contoh = [
    Air rebusan bawang putih bisa menyembuhkan virus COVID dalam 24 jam.
    BPOM mengkonfirmasi vaksin COVID-19 sudah melalui uji klinis tiga fase.
    Gubernur X terbukti korupsi dana bansos tanpa bukti yang jelas.
]
for teks in contoh:
    hasil = classify_via_api(teks)
    print(f\Label      : {hasil[label]}\)
    print(f\Reasoning  : {hasil[reasoning][:100]}\)
    print(f\Confidence : {hasil[confidence]}\)
    print(f\Consistency: {hasil[consistency]}\)
    print()

Deploy Streamlit

# app.py — jalankan: streamlit run app.py
import streamlit as st
import requests re
from collections import Counter

HF_API_KEY = st.secrets[HF_API_KEY]  # simpan di .streamlit/secrets.toml
fMODEL_ID   = {hf_repo}
fAPI_URL    = https://api-inference.huggingface.co/models/{hf_repo}
HEADERS    = {Authorization: fBearer {HF_API_KEY} Content-Type: application/json}

SYSTEM_PROMPT = (
    Anda adalah sistem deteksi konten DFK. 
    Klasifikasikan: Fakta Disinformasi Fitnah atau Ujaran Kebencian. 
    Format: Label: **NamaLabel.** penjelasan: alasan.
)

def build_prompt(teks):
    return (
        f<|im_start|>system\\n{SYSTEM_PROMPT}<|im_end|>\\n
        f<|im_start|>user\\nKlasifikasikan:\\n{teks}<|im_end|>\\n
        f<|im_start|>assistant\\n
    )

def extract_label(text):
    t = text.lower()
    if ujaran kebencian in t[:80]: return UJARAN_KEBENCIAN
    for kw in [disinformasi fitnah fakta]:
        if kw in t[:80]: return kw.upper()
    return UNKNOWN

def extract_reasoning(text):
    import re
    m = re.search(rpenjelasan\\s*:\\s*(.*) text re.DOTALL | re.IGNORECASE)
    return m.group(1).strip() if m else text.strip()

def classify(teks num_trials=3):
    prompt  = build_prompt(teks)
    payload = {inputs: prompt parameters: {max_new_tokens: 256 temperature: 0.7 do_sample: True return_full_text: False}}
    trials  = []
    for _ in range(num_trials):
        try:
            r   = requests.post(API_URL headers=HEADERS json=payload timeout=60)
            out = r.json()
            txt = out[0][generated_text] if isinstance(out list) else str(out)
            trials.append({label: extract_label(txt) reasoning: extract_reasoning(txt)})
        except: pass
    if not trials: return None
    votes     = Counter(t[label] for t in trials)
    best cnt = votes.most_common(1)[0]
    best_rsn  = next(t[reasoning] for t in trials if t[label] == best)
    return {label: best reasoning: best_rsn consistency: f{cnt}/{num_trials} ambiguous: cnt == 1}

st.set_page_config(page_title=DFK Detector page_icon=🔍)
st.title(🔍 Deteksi Konten DFK)
st.caption(Disinformasi · Fitnah · Kebencian · Fakta)
teks = st.text_area(Masukkan teks: height=150)
if st.button(Klasifikasikan type=primary):
    if teks.strip():
        with st.spinner(Menganalisis...): hasil = classify(teks)
        if hasil:
            warna = {FAKTA: green DISINFORMASI: red FITNAH: orange UJARAN_KEBENCIAN: red UNKNOWN: gray}
            st.markdown(f\### Label: :{warna.get(hasil[label] gray)}[{hasil[label]}]\)
            st.markdown(f\**Konsistensi:** {hasil[consistency]}\)
            if hasil[ambiguous]: st.warning(Prediksi ambigu — pertimbangkan review manual)
            st.info(hasil[reasoning])
        else: st.error(Gagal mendapat respons)
    else: st.warning(Masukkan teks dulu)

Training Info

f- Method: QLoRA 4-bit + LoRA (r={lora_r_val} alpha={lora_a_val})

  • Dataset: 4 kelas DFK · 24.513 baris · Bahasa Indonesia
  • Masking: System+User di-mask hanya Assistant yang dihitung loss
  • Chat Template: ChatML
Downloads last month
131
Safetensors
Model size
6B params
Tensor type
BF16
·
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 1 Ask for provider support

Space using ggapar/Ministral-3-8B-Base-2512-DFK 1