| import os |
| import gradio as gr |
|
|
| from apscheduler.schedulers.background import BackgroundScheduler |
| from dabstep_benchmark.content import TITLE, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_TEXT, CITATION_BUTTON_LABEL, VALIDATION_GUIDELINES |
| from dabstep_benchmark.leaderboard import * |
|
|
|
|
| def restart_space(): |
| HF_API.restart_space(repo_id=HF_LEADERBOARD) |
| |
|
|
| def download_leaderboard(type): |
| verified_lb, unverified_lb = generate_leaderboard_df() |
| if type == "verified": |
| df_to_download = verified_lb |
| if type == "unverified": |
| df_to_download = unverified_lb |
|
|
| path = f"data/{type}_leaderboard.csv" |
| if os.path.exists(path): |
| os.remove(path) |
| df_to_download.to_csv(path, index=False) |
| return path |
|
|
|
|
| if __name__ == "__main__": |
| os.makedirs("data/task_scores", exist_ok=True) |
| refresh(only_leaderboard=False) |
|
|
| demo = gr.Blocks() |
| with demo: |
| gr.Markdown(TITLE) |
| gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") |
| |
| |
| validated_lb, unvalidated_lb = generate_leaderboard_df() |
|
|
| with gr.Tab("Validated"): |
| verified_table = gr.Dataframe( |
| value=validated_lb, |
| datatype=["markdown", "str", "str", "str", "markdown", "str", "str", "str"], |
| interactive=False, |
| column_widths=["20%"], |
| wrap=True, |
| ) |
| verified_download = gr.DownloadButton( |
| label="Download Leaderboard", |
| elem_id="download-verified-lb", |
| ) |
| |
| with gr.Tab("Unvalidated"): |
| unverified_table = gr.Dataframe( |
| value=unvalidated_lb, |
| datatype=["markdown", "str", "str", "str", "markdown", "str", "str", "str"], |
| interactive=False, |
| column_widths=["20%"], |
| wrap=True, |
| ) |
| unverified_download = gr.DownloadButton( |
| label="Download Leaderboard", |
| elem_id="download-unverified-lb", |
| ) |
| |
| demo.load(generate_leaderboard_df, inputs=None, outputs=[verified_table, unverified_table]) |
|
|
| verified_download.click( |
| download_leaderboard, |
| inputs=[gr.Textbox(value="verified", visible=False)], |
| outputs=[verified_download] |
| ) |
| unverified_download.click( |
| download_leaderboard, |
| inputs=[gr.Textbox(value="unverified", visible=False)], |
| outputs=[unverified_download] |
| ) |
|
|
| refresh_button = gr.Button("Refresh") |
| refresh_button.click( |
| refresh, |
| inputs=[ |
| gr.Checkbox(value=True, visible=False) |
| ], |
| outputs=[ |
| verified_table, unverified_table |
| ], |
| ) |
| gr.Markdown(VALIDATION_GUIDELINES, elem_classes="markdown-text") |
| |
| with gr.Row(): |
| with gr.Accordion("π Citation", open=False): |
| citation_button = gr.Textbox( |
| value=CITATION_BUTTON_TEXT, |
| label=CITATION_BUTTON_LABEL, |
| lines=len(CITATION_BUTTON_TEXT.split("\n")), |
| elem_id="citation-button", |
| ) |
|
|
| with gr.Accordion("Submit new agent answers for evaluation"): |
| with gr.Row(): |
| gr.Markdown(SUBMISSION_TEXT, elem_classes="markdown-text") |
| with gr.Row(): |
| with gr.Column(): |
| split = gr.Radio(["all"], value="all", label="Split", visible=False) |
| agent_name_textbox = gr.Textbox(label="Agent name") |
| model_family_textbox = gr.Textbox(label="Model family") |
| system_prompt_textbox = gr.Textbox(label="System prompt example") |
| repo_url_textbox = gr.Textbox(label="Repo URL with agent code") |
| with gr.Column(): |
| organisation = gr.Textbox(label="Organisation") |
| mail = gr.Textbox( |
| label="Contact email (will be stored privately, & used if there is an issue with your submission)") |
| file_output = gr.File() |
|
|
| with gr.Row(): |
| gr.LoginButton() |
| submit_button = gr.Button("Submit answers") |
| submission_result = gr.Markdown() |
| submit_button.click( |
| process_submission, |
| [ |
| split, |
| agent_name_textbox, |
| model_family_textbox, |
| repo_url_textbox, |
| file_output, |
| organisation, |
| mail |
| ], |
| submission_result, |
| ) |
|
|
| scheduler = BackgroundScheduler() |
| scheduler.add_job(restart_space, "interval", seconds=3600*24) |
| scheduler.start() |
| demo.launch(debug=True) |