Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Improve UI
  • Loading branch information
merrymercy committed Sep 8, 2023
commit 3f867b83a05e8454fb127f26d010e9809ad51709
6 changes: 3 additions & 3 deletions fastchat/serve/gradio_block_arena_anony.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def share_click(state0, state1, model_selector0, model_selector1, request: gr.Re
"chatglm-6b": 0.5,
}

SAMPLING_BOOST_MODELS = ["llama-2-70b-chat", "codellama-34b-instruct"]
SAMPLING_BOOST_MODELS = ["wizardlm-70b"]

model_pairs = []
model_pairs_weights = []
Expand Down Expand Up @@ -420,12 +420,12 @@ def build_side_by_side_ui_anony(models):
with gr.Column(scale=20):
textbox = gr.Textbox(
show_label=False,
placeholder="Enter text and press ENTER",
placeholder="Enter your prompt here and press ENTER",
visible=False,
container=False,
)
with gr.Column(scale=1, min_width=50):
send_btn = gr.Button(value="Send", visible=False)
send_btn = gr.Button(value="Battle", visible=False, variant="primary")

with gr.Row() as button_row2:
regenerate_btn = gr.Button(value="🔄 Regenerate", interactive=False)
Expand Down
4 changes: 2 additions & 2 deletions fastchat/serve/gradio_block_arena_named.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,12 +352,12 @@ def build_side_by_side_ui_named(models):
with gr.Column(scale=20):
textbox = gr.Textbox(
show_label=False,
placeholder="Enter text and press ENTER",
placeholder="Enter your prompt here and press ENTER",
visible=False,
container=False,
)
with gr.Column(scale=1, min_width=50):
send_btn = gr.Button(value="Send", visible=False)
send_btn = gr.Button(value="Battle", visible=False, variant="primary")

with gr.Row() as button_row2:
regenerate_btn = gr.Button(value="🔄 Regenerate", interactive=False)
Expand Down
4 changes: 2 additions & 2 deletions fastchat/serve/gradio_web_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,12 +591,12 @@ def build_single_model_ui(models, add_promotion_links=False):
with gr.Column(scale=20):
textbox = gr.Textbox(
show_label=False,
placeholder="Enter text and press ENTER",
placeholder="Enter your prompt here and press ENTER",
visible=False,
container=False,
)
with gr.Column(scale=1, min_width=50):
send_btn = gr.Button(value="Send", visible=False)
send_btn = gr.Button(value="Battle", visible=False, variant="primary")

with gr.Row(visible=False) as button_row:
upvote_btn = gr.Button(value="👍 Upvote", interactive=False)
Expand Down
8 changes: 6 additions & 2 deletions fastchat/serve/monitor/monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ def make_leaderboard_md(elo_results):
| [Blog](https://lmsys.org/blog/2023-05-03-arena/) | [GitHub](https://github.com/lm-sys/FastChat) | [Paper](https://arxiv.org/abs/2306.05685) | [Dataset](https://huggingface.co/datasets/lmsys/chatbot_arena_conversations) | [Twitter](https://twitter.com/lmsysorg) | [Discord](https://discord.gg/HSWAKCrnFx) |

🏆 This leaderboard is based on the following three benchmarks.
- [Chatbot Arena](https://lmsys.org/blog/2023-05-03-arena/) - a crowdsourced, randomized battle platform. We use 50K+ user votes to compute Elo ratings.
- [Chatbot Arena](https://lmsys.org/blog/2023-05-03-arena/) - a crowdsourced, randomized battle platform. We use 70K+ user votes to compute Elo ratings.
- [MT-Bench](https://arxiv.org/abs/2306.05685) - a set of challenging multi-turn questions. We use GPT-4 to grade the model responses.
- [MMLU](https://arxiv.org/abs/2009.03300) (5-shot) - a test to measure a model's multitask accuracy on 57 tasks.

💻 Code: The Arena Elo ratings are computed by this [notebook]({notebook_url}). The MT-bench scores (single-answer grading on a scale of 10) are computed by [fastchat.llm_judge](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge). The MMLU scores are computed by [InstructEval](https://github.com/declare-lab/instruct-eval) and [Chain-of-Thought Hub](https://github.com/FranxYao/chain-of-thought-hub). Higher values are better for all benchmarks. Empty cells mean not available.
💻 Code: The Arena Elo ratings are computed by this [notebook]({notebook_url}). The MT-bench scores (single-answer grading on a scale of 10) are computed by [fastchat.llm_judge](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge). The MMLU scores are computed by [InstructEval](https://github.com/declare-lab/instruct-eval) and [Chain-of-Thought Hub](https://github.com/FranxYao/chain-of-thought-hub). Higher values are better for all benchmarks. Empty cells mean not available. Last updated: Sept, 2023.
"""
return leaderboard_md

Expand Down Expand Up @@ -241,6 +241,10 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file):
"#### Figure 4: Average Win Rate Against All Other Models (Assuming Uniform Sampling and No Ties)"
)
plot_4 = gr.Plot(p4, show_label=False)

from fastchat.serve.gradio_web_server import acknowledgment_md
gr.Markdown(acknowledgment_md)

return [md_1, plot_1, plot_2, plot_3, plot_4]


Expand Down