-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathllm-leaderboard
More file actions
executable file
·78 lines (66 loc) · 2.99 KB
/
llm-leaderboard
File metadata and controls
executable file
·78 lines (66 loc) · 2.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env python
import json, requests, argparse, sys, re
from bs4 import BeautifulSoup
from collections import OrderedDict
parser = argparse.ArgumentParser(description="llm-leaderboards - CLI for popular leaderboard with large language models.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("board", type=str, choices=["hf","arena"], help="Leaderboard to query.")
parser.add_argument("-n", "--number", type=int, default=10, help="Top n to show.")
parser.add_argument("-k", "--sort-key", type=str, default="average_score", help="Key that determines sort order.")
parser.add_argument("--headers", action=argparse.BooleanOptionalAction, default=True, help="Show table headers or not.")
parser.add_argument("-f","--filter", action="append", default=["sha"], help="Keys to exclude from table.")
args = parser.parse_args()
def get_hf():
url = "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
response = requests.get(url)
return response.json()
def get_arena():
url = "https://lmarena-ai-chatbot-arena-leaderboard.hf.space/"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
script = soup.find_all('script')[1]
json_str = script.string
json_str = re.sub(r'^window\.gradio_config = ', '', json_str)
json_str = re.sub(r';$', '', json_str)
if len(json_str) > 10000:
return json.loads(json_str)
else:
raise ValueError("JSON string is too short")
def normalize(w):
return w.replace(" ", "_").lower()
def process_arena(json_data):
frame = {}
for component in json_data["components"]:
if component.get("type", "") != "dataframe":
continue
headers = [normalize(header) for header in component["props"]["value"]["headers"]]
data = component["props"]["value"]["data"]
for i in range(len(data)):
row = data[i]
if i not in frame:
frame[i] = OrderedDict()
d = OrderedDict(zip(headers, row))
d["model"] = BeautifulSoup(d["model"], "html.parser").get_text()
frame[i] |= d
return list(frame.values())
if args.board == "hf":
json_data = get_hf()
flat_data = sorted([OrderedDict(data["model"] | data["evaluations"] | data["metadata"] | data["features"]) for data in json_data], key=lambda d: d[args.sort_key], reverse=True)
name_key = "name"
elif args.board == "arena":
flat_data = process_arena(get_arena())
name_key = "model"
if args.sort_key == "average_score":
args.sort_key = "arena_score"
# main event
for i in range(min(args.number, len(flat_data))):
data = flat_data[i]
data.move_to_end(args.sort_key, last=False)
data.move_to_end(name_key, last=False)
for key, value in data.items():
if key in args.filter:
continue
if i == 0 and args.headers:
print(key, end= "\t")
continue
print(value, end="\t")
print("")