Improve UI

lm-sys · merrymercy · Sep 8, 2023 · Sep 8, 2023 · Sep 8, 2023 · Sep 8, 2023
commit 3f867b83a05e8454fb127f26d010e9809ad51709
diff --git a/fastchat/serve/gradio_block_arena_anony.py b/fastchat/serve/gradio_block_arena_anony.py
@@ -196,7 +196,7 @@ def share_click(state0, state1, model_selector0, model_selector1, request: gr.Re
     "chatglm-6b": 0.5,
 }
 
-SAMPLING_BOOST_MODELS = ["llama-2-70b-chat", "codellama-34b-instruct"]
+SAMPLING_BOOST_MODELS = ["wizardlm-70b"]
 
 model_pairs = []
 model_pairs_weights = []
@@ -420,12 +420,12 @@ def build_side_by_side_ui_anony(models):
         with gr.Column(scale=20):
             textbox = gr.Textbox(
                 show_label=False,
-                placeholder="Enter text and press ENTER",
+                placeholder="Enter your prompt here and press ENTER",
                 visible=False,
                 container=False,
             )
         with gr.Column(scale=1, min_width=50):
-            send_btn = gr.Button(value="Send", visible=False)
+            send_btn = gr.Button(value="Battle", visible=False, variant="primary")
 
     with gr.Row() as button_row2:
         regenerate_btn = gr.Button(value="🔄  Regenerate", interactive=False)

diff --git a/fastchat/serve/gradio_block_arena_named.py b/fastchat/serve/gradio_block_arena_named.py
@@ -352,12 +352,12 @@ def build_side_by_side_ui_named(models):
         with gr.Column(scale=20):
             textbox = gr.Textbox(
                 show_label=False,
-                placeholder="Enter text and press ENTER",
+                placeholder="Enter your prompt here and press ENTER",
                 visible=False,
                 container=False,
             )
         with gr.Column(scale=1, min_width=50):
-            send_btn = gr.Button(value="Send", visible=False)
+            send_btn = gr.Button(value="Battle", visible=False, variant="primary")
 
     with gr.Row() as button_row2:
         regenerate_btn = gr.Button(value="🔄  Regenerate", interactive=False)

diff --git a/fastchat/serve/gradio_web_server.py b/fastchat/serve/gradio_web_server.py
@@ -591,12 +591,12 @@ def build_single_model_ui(models, add_promotion_links=False):
         with gr.Column(scale=20):
             textbox = gr.Textbox(
                 show_label=False,
-                placeholder="Enter text and press ENTER",
+                placeholder="Enter your prompt here and press ENTER",
                 visible=False,
                 container=False,
             )
         with gr.Column(scale=1, min_width=50):
-            send_btn = gr.Button(value="Send", visible=False)
+            send_btn = gr.Button(value="Battle", visible=False, variant="primary")
 
     with gr.Row(visible=False) as button_row:
         upvote_btn = gr.Button(value="👍  Upvote", interactive=False)

diff --git a/fastchat/serve/monitor/monitor.py b/fastchat/serve/monitor/monitor.py
@@ -30,11 +30,11 @@ def make_leaderboard_md(elo_results):
 | [Blog](https://lmsys.org/blog/2023-05-03-arena/) | [GitHub](https://github.com/lm-sys/FastChat) | [Paper](https://arxiv.org/abs/2306.05685) | [Dataset](https://huggingface.co/datasets/lmsys/chatbot_arena_conversations) | [Twitter](https://twitter.com/lmsysorg) | [Discord](https://discord.gg/HSWAKCrnFx) |
 
 🏆 This leaderboard is based on the following three benchmarks.
-- [Chatbot Arena](https://lmsys.org/blog/2023-05-03-arena/) - a crowdsourced, randomized battle platform. We use 50K+ user votes to compute Elo ratings.
+- [Chatbot Arena](https://lmsys.org/blog/2023-05-03-arena/) - a crowdsourced, randomized battle platform. We use 70K+ user votes to compute Elo ratings.
 - [MT-Bench](https://arxiv.org/abs/2306.05685) - a set of challenging multi-turn questions. We use GPT-4 to grade the model responses.
 - [MMLU](https://arxiv.org/abs/2009.03300) (5-shot) - a test to measure a model's multitask accuracy on 57 tasks.
 
-💻 Code: The Arena Elo ratings are computed by this [notebook]({notebook_url}). The MT-bench scores (single-answer grading on a scale of 10) are computed by [fastchat.llm_judge](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge). The MMLU scores are computed by [InstructEval](https://github.com/declare-lab/instruct-eval) and [Chain-of-Thought Hub](https://github.com/FranxYao/chain-of-thought-hub). Higher values are better for all benchmarks. Empty cells mean not available.
+💻 Code: The Arena Elo ratings are computed by this [notebook]({notebook_url}). The MT-bench scores (single-answer grading on a scale of 10) are computed by [fastchat.llm_judge](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge). The MMLU scores are computed by [InstructEval](https://github.com/declare-lab/instruct-eval) and [Chain-of-Thought Hub](https://github.com/FranxYao/chain-of-thought-hub). Higher values are better for all benchmarks. Empty cells mean not available. Last updated: Sept, 2023.
 """
     return leaderboard_md
 
@@ -241,6 +241,10 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file):
                 "#### Figure 4: Average Win Rate Against All Other Models (Assuming Uniform Sampling and No Ties)"
             )
             plot_4 = gr.Plot(p4, show_label=False)
+
+    from fastchat.serve.gradio_web_server import acknowledgment_md
+    gr.Markdown(acknowledgment_md)
+
     return [md_1, plot_1, plot_2, plot_3, plot_4]