| import gradio_client.utils as gc_utils |
|
|
| _original_json_schema_to_python_type = gc_utils._json_schema_to_python_type |
|
|
| def patched_json_schema_to_python_type(schema, defs=None): |
| if isinstance(schema, bool): |
| return {} |
| return _original_json_schema_to_python_type(schema, defs) |
|
|
| gc_utils._json_schema_to_python_type = patched_json_schema_to_python_type |
|
|
|
|
| import logging |
| import os |
| os.makedirs("tmp", exist_ok=True) |
| os.environ['TMP_DIR'] = "tmp" |
| import subprocess |
| import shutil |
| import glob |
| import gradio as gr |
| import numpy as np |
| from apscheduler.schedulers.background import BackgroundScheduler |
| import json |
| from io import BytesIO |
|
|
| from src.radial.radial import create_plot |
| from gradio_leaderboard import Leaderboard, SelectColumns |
| from gradio_space_ci import enable_space_ci |
| from src.display.about import INTRODUCTION_TEXT, TITLE, LLM_BENCHMARKS_TEXT |
| from src.display.css_html_js import custom_css |
| from src.display.utils import AutoEvalColumn, fields |
| from src.envs import API, H4_TOKEN, HF_HOME, REPO_ID, RESET_JUDGEMENT_ENV |
| from src.leaderboard.build_leaderboard import build_leadearboard_df, download_openbench, download_dataset |
| import huggingface_hub |
|
|
|
|
| os.environ["GRADIO_ANALYTICS_ENABLED"] = "false" |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") |
| enable_space_ci() |
|
|
|
|
| def handle_file_upload(file_bytes): |
| """ |
| Read the uploaded bytes and parse JSON directly, |
| avoiding ephemeral disk paths or file read issues. |
| """ |
| logging.info("File uploaded (bytes). Size: %d bytes", len(file_bytes)) |
| v = json.loads(file_bytes.decode("utf-8")) |
| return v |
|
|
| def submit_file(v, mn): |
| """ |
| We removed file_path because we no longer need it |
| (no ephemeral path). 'v' is the loaded JSON object. |
| """ |
| print('START SUBMITTING!!!') |
| |
| if 'results' not in v: |
| return "Invalid JSON: missing 'results' key" |
| |
| new_file = v['results'] |
| new_file['model'] = mn |
| |
| columns = [ |
| 'mmlu_translated_kk', 'kk_constitution_mc', 'kk_dastur_mc', |
| 'kazakh_and_literature_unt_mc', 'kk_geography_unt_mc', |
| 'kk_world_history_unt_mc', 'kk_history_of_kazakhstan_unt_mc', |
| 'kk_english_unt_mc', 'kk_biology_unt_mc', 'kk_human_society_rights_unt_mc' |
| ] |
|
|
| for column in columns: |
| if column not in new_file or not isinstance(new_file[column], dict): |
| return f"Missing or invalid column: {column}" |
| if 'acc,none' not in new_file[column]: |
| return f"Missing 'acc,none' key in column: {column}" |
| new_file[column] = new_file[column]['acc,none'] |
|
|
| if 'config' not in v or 'model_dtype' not in v['config']: |
| return "Missing 'config' or 'model_dtype' in JSON" |
|
|
| new_file['model_dtype'] = v['config']["model_dtype"] |
| new_file['ppl'] = 0 |
|
|
| print('WE READ FILE: ', new_file) |
| |
| buf = BytesIO() |
| buf.write(json.dumps(new_file).encode('utf-8')) |
| buf.seek(0) |
| API.upload_file( |
| path_or_fileobj=buf, |
| path_in_repo="model_data/external/" + mn.replace('/', '__') + ".json", |
| repo_id="kz-transformers/s-openbench-eval", |
| repo_type="dataset", |
| ) |
|
|
| os.environ[RESET_JUDGEMENT_ENV] = "1" |
| return "Success!" |
|
|
|
|
| def restart_space(): |
| API.restart_space(repo_id=REPO_ID) |
| download_openbench() |
|
|
| def update_plot(selected_models): |
| return create_plot(selected_models) |
|
|
|
|
| def build_demo(): |
| download_openbench() |
| demo = gr.Blocks(title="Kaz LLM LB", css=custom_css) |
| leaderboard_df = build_leadearboard_df() |
| with demo: |
| gr.HTML(TITLE) |
| gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") |
|
|
| with gr.Tabs(elem_classes="tab-buttons"): |
| with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0): |
| Leaderboard( |
| value=leaderboard_df, |
| datatype=[c.type for c in fields(AutoEvalColumn)], |
| select_columns=SelectColumns( |
| default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default], |
| cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy], |
| label="Select Columns to Display:", |
| ), |
| search_columns=[AutoEvalColumn.model.name], |
| ) |
|
|
| with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3): |
| with gr.Row(): |
| gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") |
| with gr.Row(): |
| gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text") |
|
|
| with gr.Column(): |
| model_name_textbox = gr.Textbox(label="Model name") |
|
|
| file_output = gr.File( |
| label="Drag and drop JSON file judgment here", |
| type="binary" |
| ) |
|
|
| uploaded_file = gr.State() |
|
|
| with gr.Row(): |
| with gr.Column(): |
| out = gr.Textbox("Submission Status") |
|
|
| submit_button = gr.Button("Submit File", variant='primary') |
|
|
| file_output.upload( |
| fn=handle_file_upload, |
| inputs=file_output, |
| outputs=uploaded_file |
| ) |
|
|
| submit_button.click( |
| fn=submit_file, |
| inputs=[uploaded_file, model_name_textbox], |
| outputs=[out] |
| ) |
|
|
| with gr.TabItem("📊 Analytics", elem_id="llm-benchmark-tab-table", id=4): |
| with gr.Column(): |
| model_dropdown = gr.Dropdown( |
| choices=leaderboard_df["model"].tolist(), |
| label="Models", |
| value=leaderboard_df["model"].tolist(), |
| multiselect=True, |
| info="Select models" |
| ) |
| with gr.Column(): |
| plot = gr.Plot(update_plot(model_dropdown.value)) |
| model_dropdown.change( |
| fn=update_plot, |
| inputs=[model_dropdown], |
| outputs=[plot] |
| ) |
| return demo |
|
|
| def aggregate_leaderboard_data(): |
| download_dataset("kz-transformers/s-openbench-eval", "m_data") |
| |
| data_list = [ |
| { |
| "model_dtype": "torch.float16", |
| "model": "dummy-random-baseline", |
| "ppl": 0, |
| "mmlu_translated_kk": 0.22991508817766165, |
| "kk_constitution_mc": 0.25120772946859904, |
| "kk_dastur_mc": 0.24477611940298508, |
| "kazakh_and_literature_unt_mc": 0.2090443686006826, |
| "kk_geography_unt_mc": 0.2019790454016298, |
| "kk_world_history_unt_mc": 0.1986970684039088, |
| "kk_history_of_kazakhstan_unt_mc": 0.19417177914110428, |
| "kk_english_unt_mc": 0.189804278561675, |
| "kk_biology_unt_mc": 0.22330729166666666, |
| "kk_human_society_rights_unt_mc": 0.242152466367713, |
| }, |
| { |
| "model_dtype": "torch.float16", |
| "model": "gpt-4o-mini", |
| "ppl": 0, |
| "mmlu_translated_kk": 0.5623775310254735, |
| "kk_constitution_mc": 0.79, |
| "kk_dastur_mc": 0.755, |
| "kazakh_and_literature_unt_mc": 0.4953071672354949, |
| "kk_geography_unt_mc": 0.5675203725261933, |
| "kk_world_history_unt_mc": 0.6091205211726385, |
| "kk_history_of_kazakhstan_unt_mc": 0.47883435582822087, |
| "kk_english_unt_mc": 0.6763768775603095, |
| "kk_biology_unt_mc": 0.607421875, |
| "kk_human_society_rights_unt_mc": 0.7309417040358744, |
| }, |
| { |
| "model_dtype": "api", |
| "model": "gpt-4o", |
| "ppl": 0, |
| "mmlu_translated_kk": 0.7419986936642717, |
| "kk_constitution_mc": 0.841, |
| "kk_dastur_mc": 0.798, |
| "kazakh_and_literature_unt_mc": 0.6785409556313993, |
| "kk_geography_unt_mc": 0.629802095459837, |
| "kk_world_history_unt_mc": 0.6783387622149837, |
| "kk_history_of_kazakhstan_unt_mc": 0.6785276073619632, |
| "kk_english_unt_mc": 0.7410104688211198, |
| "kk_biology_unt_mc": 0.6979166666666666, |
| "kk_human_society_rights_unt_mc": 0.7937219730941704, |
| }, |
| { |
| "model_dtype": "torch.float16", |
| "model": "nova-pro-v1", |
| "ppl": 0, |
| "mmlu_translated_kk": 0.6792945787067276, |
| "kk_constitution_mc": 0.7753623188405797, |
| "kk_dastur_mc": 0.718407960199005, |
| "kazakh_and_literature_unt_mc": 0.4656569965870307, |
| "kk_geography_unt_mc": 0.5541327124563445, |
| "kk_world_history_unt_mc": 0.6425081433224755, |
| "kk_history_of_kazakhstan_unt_mc": 0.5, |
| "kk_english_unt_mc": 0.6845698680018206, |
| "kk_biology_unt_mc": 0.6197916666666666, |
| "kk_human_society_rights_unt_mc": 0.7713004484304933, |
| }, |
| { |
| "model_dtype": "torch.float16", |
| "model": "gemini-1.5-pro", |
| "ppl": 0, |
| "mmlu_translated_kk": 0.7380796864794252, |
| "kk_constitution_mc": 0.8164251207729468, |
| "kk_dastur_mc": 0.7383084577114428, |
| "kazakh_and_literature_unt_mc": 0.5565273037542662, |
| "kk_geography_unt_mc": 0.6065192083818394, |
| "kk_world_history_unt_mc": 0.6669381107491856, |
| "kk_history_of_kazakhstan_unt_mc": 0.5791411042944785, |
| "kk_english_unt_mc": 0.7114246700045517, |
| "kk_biology_unt_mc": 0.6673177083333334, |
| "kk_human_society_rights_unt_mc": 0.7623318385650224, |
| }, |
| { |
| "model_dtype": "torch.float16", |
| "model": "gemini-1.5-flash", |
| "ppl": 0, |
| "mmlu_translated_kk": 0.6335728282168517, |
| "kk_constitution_mc": 0.748792270531401, |
| "kk_dastur_mc": 0.7054726368159204, |
| "kazakh_and_literature_unt_mc": 0.4761092150170648, |
| "kk_geography_unt_mc": 0.5640279394644936, |
| "kk_world_history_unt_mc": 0.5838762214983714, |
| "kk_history_of_kazakhstan_unt_mc": 0.43374233128834355, |
| "kk_english_unt_mc": 0.6681838871187984, |
| "kk_biology_unt_mc": 0.6217447916666666, |
| "kk_human_society_rights_unt_mc": 0.7040358744394619, |
| }, |
| { |
| "model_dtype": "torch.float16", |
| "model": "claude-3-5-sonnet", |
| "ppl": 0, |
| "mmlu_translated_kk": 0.7335075114304376, |
| "kk_constitution_mc": 0.8623188405797102, |
| "kk_dastur_mc": 0.7950248756218905, |
| "kazakh_and_literature_unt_mc": 0.6548634812286689, |
| "kk_geography_unt_mc": 0.6431897555296857, |
| "kk_world_history_unt_mc": 0.6669381107491856, |
| "kk_history_of_kazakhstan_unt_mc": 0.6251533742331289, |
| "kk_english_unt_mc": 0.7291761492944925, |
| "kk_biology_unt_mc": 0.6686197916666666, |
| "kk_human_society_rights_unt_mc": 0.8026905829596412, |
| }, |
| { |
| "model_dtype": "torch.float16", |
| "model": "yandex-gpt", |
| "ppl": 0, |
| "mmlu_translated_kk": 0.39777922926192033, |
| "kk_constitution_mc": 0.7028985507246377, |
| "kk_dastur_mc": 0.6159203980099502, |
| "kazakh_and_literature_unt_mc": 0.3914249146757679, |
| "kk_geography_unt_mc": 0.4912689173457509, |
| "kk_world_history_unt_mc": 0.5244299674267101, |
| "kk_history_of_kazakhstan_unt_mc": 0.4030674846625767, |
| "kk_english_unt_mc": 0.5844333181611289, |
| "kk_biology_unt_mc": 0.4368489583333333, |
| "kk_human_society_rights_unt_mc": 0.6995515695067265, |
| }, |
| ] |
|
|
| files_list = glob.glob("./m_data/model_data/external/*.json") |
| logging.info(f'FILES LIST: {files_list}') |
| |
| for file in files_list: |
| logging.info(f'Trying to read external submit file: {file}') |
| try: |
| with open(file) as f: |
| data = json.load(f) |
| if not isinstance(data, dict): |
| logging.warning(f"File {file} is not a dict, skipping") |
| continue |
| required_keys = {'model_dtype', 'model', 'ppl', 'mmlu_translated_kk'} |
| if not required_keys.issubset(data.keys()): |
| logging.warning(f"File {file} missing required keys, skipping") |
| continue |
|
|
| logging.info(f'Successfully read: {file}, got {len(data)} keys') |
| data_list.append(data) |
| except Exception as e: |
| logging.error(f"Error reading file {file}: {e}") |
| continue |
|
|
| logging.info("Combined data_list length: %d", len(data_list)) |
| |
| with open("genned.json", "w") as f: |
| json.dump(data_list, f) |
| |
| API.upload_file( |
| path_or_fileobj="genned.json", |
| path_in_repo="leaderboard.json", |
| repo_id="kz-transformers/kaz-llm-lb-metainfo", |
| repo_type="dataset", |
| ) |
|
|
| def update_board(): |
| need_reset = os.environ.get(RESET_JUDGEMENT_ENV) |
| logging.info("Updating the judgement (scheduled update): %s", need_reset) |
| if need_reset != "1": |
| pass |
| os.environ[RESET_JUDGEMENT_ENV] = "0" |
| aggregate_leaderboard_data() |
| restart_space() |
|
|
| def update_board_(): |
| logging.info("Updating the judgement at startup") |
| aggregate_leaderboard_data() |
|
|
|
|
| if __name__ == "__main__": |
| os.environ[RESET_JUDGEMENT_ENV] = "1" |
| from apscheduler.schedulers.background import BackgroundScheduler |
| scheduler = BackgroundScheduler() |
| update_board_() |
| scheduler.add_job(update_board, "interval", minutes=10) |
| scheduler.start() |
|
|
| demo_app = build_demo() |
| demo_app.launch(debug=True, share=False, show_api=False) |
|
|