-
Notifications
You must be signed in to change notification settings - Fork 14
73 lines (65 loc) · 3.05 KB
/
update-benchmarks.yml
File metadata and controls
73 lines (65 loc) · 3.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
name: Update Benchmark Data
on:
schedule:
- cron: '*/30 * * * *' # Every 30 minutes
workflow_dispatch: {}
permissions:
contents: write
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
jobs:
update-benchmarks:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Fetch benchmark data from Artificial Analysis API
env:
AA_API_KEY: ${{ secrets.AA_API_KEY }}
run: |
curl -sf -H "X-API-Key: $AA_API_KEY" \
https://artificialanalysis.ai/api/v2/data/llms/models \
| jq '[.data[] | {
id: .id,
name: .name,
slug: .slug,
creator: (.model_creator?.slug // null),
creator_id: (.model_creator?.id // null),
creator_name: (.model_creator?.name // null),
release_date: .release_date,
intelligence_index: (.evaluations?.artificial_analysis_intelligence_index // null),
coding_index: (.evaluations?.artificial_analysis_coding_index // null),
math_index: (.evaluations?.artificial_analysis_math_index // null),
mmlu_pro: (.evaluations?.mmlu_pro // null),
gpqa: (.evaluations?.gpqa // null),
hle: (.evaluations?.hle // null),
livecodebench: (.evaluations?.livecodebench // null),
scicode: (.evaluations?.scicode // null),
ifbench: (.evaluations?.ifbench // null),
lcr: (.evaluations?.lcr // null),
terminalbench_hard: (.evaluations?.terminalbench_hard // null),
tau2: (.evaluations?.tau2 // null),
math_500: (.evaluations?.math_500 // null),
aime: (.evaluations?.aime // null),
aime_25: (.evaluations?.aime_25 // null),
output_tps: (.median_output_tokens_per_second | if . == 0 then null else . end),
ttft: (.median_time_to_first_token_seconds | if . == 0 then null else . end),
ttfat: (.median_time_to_first_answer_token | if . == 0 then null else . end),
price_input: (.pricing?.price_1m_input_tokens // null),
price_output: (.pricing?.price_1m_output_tokens // null),
price_blended: (.pricing?.price_1m_blended_3_to_1 // null)
}]' > data/benchmarks.json
echo "Fetched $(jq length data/benchmarks.json) entries"
- name: Commit and push if changed
id: commit
run: |
git diff --quiet data/benchmarks.json && echo "changed=false" >> "$GITHUB_OUTPUT" && exit 0
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add data/benchmarks.json
git commit -m "chore: update benchmark data from Artificial Analysis"
git push
echo "changed=true" >> "$GITHUB_OUTPUT"
- name: Purge jsDelivr cache
if: steps.commit.outputs.changed == 'true'
run: |
curl -sf "https://purge.jsdelivr.net/gh/arimxyer/models@main/data/benchmarks.json" || true