import gradio as gr from chinatravel.ui import content from chinatravel.ui.eval_runner import DEFAULT_SPLIT, SPLITS_LIST, handle_submission from chinatravel.ui.leaderboard import build_placeholder_frames default_split_value = DEFAULT_SPLIT or (SPLITS_LIST[0] if SPLITS_LIST else None) leaderboard_frames = build_placeholder_frames(SPLITS_LIST) with gr.Blocks(title="ChinaTravel Benchmark Evaluation") as demo: gr.HTML(content.TITLE_HTML) gr.Markdown(content.INTRO_MARKDOWN) gr.Markdown(content.SUBMISSION_GUIDE) gr.Markdown(content.LEADERBOARD_INTRO) if SPLITS_LIST: with gr.Tabs(): for split in SPLITS_LIST: with gr.Tab(split): hf_url = ( f"https://huggingface.co/datasets/LAMDA-NeSy/ChinaTravel/viewer/test/{split}" if split == "human1000" else f"https://huggingface.co/datasets/LAMDA-NeSy/ChinaTravel/viewer/default/{split}" ) gr.Markdown(f"🗂️ [Hugging Face Dataset Viewer]({hf_url})") gr.Dataframe( value=leaderboard_frames.get(split), interactive=False, wrap=True, ) dataset_choice = gr.Radio( SPLITS_LIST, label="Select evaluation split", value=default_split_value, interactive=True, ) zip_input = gr.File(label="Upload result archive (.zip)", file_types=[".zip"]) submit_btn = gr.Button("Run evaluation", variant="primary") status = gr.Markdown("Ready to evaluate.") schema_progress = gr.Textbox( label="Schema progress", value="0%", interactive=False, ) commonsense_progress = gr.Textbox( label="Commonsense progress", value="0%", interactive=False, ) logic_progress = gr.Textbox( label="Logic progress", value="0%", interactive=False, ) result_file = gr.File(label="Download evaluation report", interactive=False) # gr.Markdown(content.CONTACT) submit_btn.click( handle_submission, inputs=[zip_input, dataset_choice], outputs=[ status, schema_progress, commonsense_progress, logic_progress, result_file, ], ) demo.launch(debug=True)