Skip to content

Commit 8d4aab9

Browse files
authored
fix: cost callbacks fire only on final chunk with usage (#340)
1 parent 3d874c5 commit 8d4aab9

7 files changed

+952
-3
lines changed

packages/lmi/src/lmi/cost_tracker.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,13 @@ def add_callback(self, callback: Callable[[LLMResponse], Awaitable]) -> None:
2626
self._callbacks.append(callback)
2727

2828
async def record(self, response: LLMResponse) -> None:
29+
# Only record on responses with usage information (final chunk in streaming)
30+
# We check for usage presence rather than cost > 0 because:
31+
# - Free models, unknown models, or custom pricing can have cost = 0
32+
# - We still want to fire callbacks for these to maintain visibility
33+
if not getattr(response, "usage", None):
34+
return
35+
2936
self.lifetime_cost_usd += litellm.cost_calculator.completion_cost(
3037
completion_response=response
3138
)

packages/lmi/tests/cassettes/TestCostTrackerCallback.test_cost_tracking_embeddings[text-embedding-3-small].yaml

Lines changed: 323 additions & 0 deletions
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
interactions:
2+
- request:
3+
body:
4+
'{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Say
5+
hello"}]}],"temperature":1.0,"max_tokens":4096}'
6+
headers:
7+
accept:
8+
- application/json
9+
accept-encoding:
10+
- gzip, deflate
11+
anthropic-version:
12+
- "2023-06-01"
13+
connection:
14+
- keep-alive
15+
content-length:
16+
- "149"
17+
content-type:
18+
- application/json
19+
host:
20+
- api.anthropic.com
21+
user-agent:
22+
- litellm/1.74.15.post2
23+
method: POST
24+
uri: https://api.anthropic.com/v1/messages
25+
response:
26+
body:
27+
string: !!binary |
28+
H4sIAAAAAAAAA3SQS2vDMBCE/4o7Zxlsp4FGl55CQ6GFQG+lCGEttoi8cvRoa4L/e3Fo6IueFuab
29+
GZY5wRpIDLFTVf2YJt7ap33c7UPHD/ebFd/dHCGQppEWF8WoO4JA8G4RdIw2Js0JAoM35CDROp0N
30+
latyXfbaHnLZVM11XTUNBFrPiThBPp8unYnel/T5SOzIOX9V7PxboQMVk8+F8Za7Inmjp1vMLwIx
31+
+VEF0tEzJIiNSjkwPkGkYyZuCZKzcwL5/LE8wfKYk0r+QBwhNwKtbntSbSCdrGf1k1cXHkib/9gl
32+
u9TT2NNAQTu1Hv76v2jd/6azgM/pu1TXApHCq21JJUsBEsvKRgeDef4AAAD//wMAOCJWH7MBAAA=
33+
headers:
34+
CF-RAY:
35+
- 9880ea5a9d9a234f-SJC
36+
Connection:
37+
- keep-alive
38+
Content-Encoding:
39+
- gzip
40+
Content-Type:
41+
- application/json
42+
Date:
43+
- Thu, 02 Oct 2025 02:54:30 GMT
44+
Server:
45+
- cloudflare
46+
Transfer-Encoding:
47+
- chunked
48+
Via:
49+
- 1.1 google
50+
X-Robots-Tag:
51+
- none
52+
anthropic-organization-id:
53+
- f2c99ed9-038a-406f-9cb5-1f840b758a20
54+
anthropic-ratelimit-input-tokens-limit:
55+
- "5000000"
56+
anthropic-ratelimit-input-tokens-remaining:
57+
- "5000000"
58+
anthropic-ratelimit-input-tokens-reset:
59+
- "2025-10-02T02:54:30Z"
60+
anthropic-ratelimit-output-tokens-limit:
61+
- "1000000"
62+
anthropic-ratelimit-output-tokens-remaining:
63+
- "1000000"
64+
anthropic-ratelimit-output-tokens-reset:
65+
- "2025-10-02T02:54:30Z"
66+
anthropic-ratelimit-requests-limit:
67+
- "5000"
68+
anthropic-ratelimit-requests-remaining:
69+
- "4999"
70+
anthropic-ratelimit-requests-reset:
71+
- "2025-10-02T02:54:29Z"
72+
anthropic-ratelimit-tokens-limit:
73+
- "6000000"
74+
anthropic-ratelimit-tokens-remaining:
75+
- "6000000"
76+
anthropic-ratelimit-tokens-reset:
77+
- "2025-10-02T02:54:30Z"
78+
cf-cache-status:
79+
- DYNAMIC
80+
request-id:
81+
- req_011CThckaYWpShvVqgm7rube
82+
strict-transport-security:
83+
- max-age=31536000; includeSubDomains; preload
84+
x-envoy-upstream-service-time:
85+
- "820"
86+
status:
87+
code: 200
88+
message: OK
89+
version: 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
interactions:
2+
- request:
3+
body:
4+
'{"model": "claude-3-5-haiku-20241022", "messages": [{"role": "user", "content":
5+
[{"type": "text", "text": "Say hello"}]}], "temperature": 1.0, "max_tokens":
6+
4096, "stream": true}'
7+
headers:
8+
accept:
9+
- application/json
10+
accept-encoding:
11+
- gzip, deflate
12+
anthropic-version:
13+
- "2023-06-01"
14+
connection:
15+
- keep-alive
16+
content-length:
17+
- "178"
18+
content-type:
19+
- application/json
20+
host:
21+
- api.anthropic.com
22+
user-agent:
23+
- litellm/1.74.15.post2
24+
method: POST
25+
uri: https://api.anthropic.com/v1/messages
26+
response:
27+
body:
28+
string: 'event: message_start
29+
30+
data: {"type":"message_start","message":{"id":"msg_01MMB3iASdJHVbzzEFeJNoQk","type":"message","role":"assistant","model":"claude-3-5-haiku-20241022","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":2,"service_tier":"standard"}} }
31+
32+
33+
event: content_block_start
34+
35+
data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} }
36+
37+
38+
event: content_block_delta
39+
40+
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello!"} }
41+
42+
43+
event: content_block_delta
44+
45+
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
46+
How are you doing today"} }
47+
48+
49+
event: ping
50+
51+
data: {"type": "ping"}
52+
53+
54+
event: content_block_delta
55+
56+
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"?"} }
57+
58+
59+
event: ping
60+
61+
data: {"type": "ping"}
62+
63+
64+
event: content_block_stop
65+
66+
data: {"type":"content_block_stop","index":0 }
67+
68+
69+
event: ping
70+
71+
data: {"type": "ping"}
72+
73+
74+
event: ping
75+
76+
data: {"type": "ping"}
77+
78+
79+
event: message_delta
80+
81+
data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":11} }
82+
83+
84+
event: message_stop
85+
86+
data: {"type":"message_stop" }
87+
88+
89+
'
90+
headers:
91+
CF-RAY:
92+
- 9880ea60ff599429-SJC
93+
Cache-Control:
94+
- no-cache
95+
Connection:
96+
- keep-alive
97+
Content-Type:
98+
- text/event-stream; charset=utf-8
99+
Date:
100+
- Thu, 02 Oct 2025 02:54:31 GMT
101+
Server:
102+
- cloudflare
103+
Transfer-Encoding:
104+
- chunked
105+
Via:
106+
- 1.1 google
107+
X-Robots-Tag:
108+
- none
109+
anthropic-organization-id:
110+
- f2c99ed9-038a-406f-9cb5-1f840b758a20
111+
anthropic-ratelimit-input-tokens-limit:
112+
- "5000000"
113+
anthropic-ratelimit-input-tokens-remaining:
114+
- "5000000"
115+
anthropic-ratelimit-input-tokens-reset:
116+
- "2025-10-02T02:54:30Z"
117+
anthropic-ratelimit-output-tokens-limit:
118+
- "1000000"
119+
anthropic-ratelimit-output-tokens-remaining:
120+
- "1000000"
121+
anthropic-ratelimit-output-tokens-reset:
122+
- "2025-10-02T02:54:30Z"
123+
anthropic-ratelimit-requests-limit:
124+
- "5000"
125+
anthropic-ratelimit-requests-remaining:
126+
- "4999"
127+
anthropic-ratelimit-requests-reset:
128+
- "2025-10-02T02:54:30Z"
129+
anthropic-ratelimit-tokens-limit:
130+
- "6000000"
131+
anthropic-ratelimit-tokens-remaining:
132+
- "6000000"
133+
anthropic-ratelimit-tokens-reset:
134+
- "2025-10-02T02:54:30Z"
135+
cf-cache-status:
136+
- DYNAMIC
137+
request-id:
138+
- req_011CThckevhG3VG3gFbnrP5A
139+
strict-transport-security:
140+
- max-age=31536000; includeSubDomains; preload
141+
x-envoy-upstream-service-time:
142+
- "416"
143+
status:
144+
code: 200
145+
message: OK
146+
version: 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
interactions:
2+
- request:
3+
body: '{"messages":[{"role":"user","content":"Say hello"}],"model":"gpt-4o-mini-2024-07-18","n":1,"temperature":1.0}'
4+
headers:
5+
accept:
6+
- application/json
7+
accept-encoding:
8+
- gzip, deflate
9+
connection:
10+
- keep-alive
11+
content-length:
12+
- "109"
13+
content-type:
14+
- application/json
15+
host:
16+
- api.openai.com
17+
user-agent:
18+
- AsyncOpenAI/Python 1.109.0
19+
x-stainless-arch:
20+
- arm64
21+
x-stainless-async:
22+
- async:asyncio
23+
x-stainless-lang:
24+
- python
25+
x-stainless-os:
26+
- MacOS
27+
x-stainless-package-version:
28+
- 1.109.0
29+
x-stainless-raw-response:
30+
- "true"
31+
x-stainless-read-timeout:
32+
- "60.0"
33+
x-stainless-retry-count:
34+
- "0"
35+
x-stainless-runtime:
36+
- CPython
37+
x-stainless-runtime-version:
38+
- 3.12.8
39+
method: POST
40+
uri: https://api.openai.com/v1/chat/completions
41+
response:
42+
body:
43+
string: !!binary |
44+
H4sIAAAAAAAAAwAAAP//jFJNb9swDL37V3A6x4PT1M7HZRiGBd1hKLrtVhSGKtG2FlkUJHpdUOS/
45+
D7bb2F07YBcd+Pie3iP5mAAIo8UOhGokq9bb9NPX1UHuO/3tcF3w9f7m++fq48M255ts3/wQi55B
46+
9z9R8TPrvaLWW2RDboRVQMnYqy7X+Xa1XhXFZgBa0mh7Wu05vaS0Nc6kF9nFZZqt0+Xmid2QURjF
47+
Dm4TAIDH4e19Oo2/xQ6yxXOlxRhljWJ3bgIQgWxfETJGE1k6FosJVOQY3WD9Cq2ld3BFD6Ckgy8w
48+
EuBIHTBpefwwJwasuih7866zdgZI54hlH36wfPeEnM4mLdU+0H38iyoq40xsyoAykusNRSYvBvSU
49+
ANwNw+he5BM+UOu5ZDrg8N12VBPTBl5jTCztVF5uFm9olRpZGhtnoxRKqgb1xJzmLjttaAYks8Sv
50+
vbylPaY2rv4f+QlQCj2jLn1AbdTLvFNbwP48/9V2nvBgWEQMv4zCkg2GfgsaK9nZ8WhEPEbGtqyM
51+
qzH4YMbLqXyZF5msCszzrUhOyR8AAAD//wMALFy9NEcDAAA=
52+
headers:
53+
Access-Control-Expose-Headers:
54+
- X-Request-ID
55+
CF-RAY:
56+
- 9880ea52bdd9d039-SJC
57+
Connection:
58+
- keep-alive
59+
Content-Encoding:
60+
- gzip
61+
Content-Type:
62+
- application/json
63+
Date:
64+
- Thu, 02 Oct 2025 02:54:28 GMT
65+
Server:
66+
- cloudflare
67+
Set-Cookie:
68+
- __cf_bm=m5ja.WypfEkklowzJ1CXqBpx9Ru94J9qdry5eIMO2XU-1759373668-1.0.1.1-gEhQhk8xtpXCd79m9x0nYnRYvoTiZ.sGROPHtm4HWp_CWCOoWX7Lbon4sVvbBLxt1efVC5ZpRHaa_2GjYdHU7bb09j8Ik.5iIZlMvFt71Tk;
69+
path=/; expires=Thu, 02-Oct-25 03:24:28 GMT; domain=.api.openai.com; HttpOnly;
70+
Secure; SameSite=None
71+
- _cfuvid=p5vPP6jHAzeupritAxf78gztpSXI_WsdGQYFSHVnocg-1759373668768-0.0.1.1-604800000;
72+
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
73+
Strict-Transport-Security:
74+
- max-age=31536000; includeSubDomains; preload
75+
Transfer-Encoding:
76+
- chunked
77+
X-Content-Type-Options:
78+
- nosniff
79+
alt-svc:
80+
- h3=":443"; ma=86400
81+
cf-cache-status:
82+
- DYNAMIC
83+
openai-organization:
84+
- future-house-xr4tdh
85+
openai-processing-ms:
86+
- "381"
87+
openai-project:
88+
- proj_gjPFWJzxsj6depZLFdKd2Yk4
89+
openai-version:
90+
- "2020-10-01"
91+
x-envoy-upstream-service-time:
92+
- "400"
93+
x-openai-proxy-wasm:
94+
- v0.1
95+
x-ratelimit-limit-requests:
96+
- "30000"
97+
x-ratelimit-limit-tokens:
98+
- "150000000"
99+
x-ratelimit-remaining-requests:
100+
- "29999"
101+
x-ratelimit-remaining-tokens:
102+
- "149999995"
103+
x-ratelimit-reset-requests:
104+
- 2ms
105+
x-ratelimit-reset-tokens:
106+
- 0s
107+
x-request-id:
108+
- req_2f3a7e075aed47b0acff02560e6a6013
109+
status:
110+
code: 200
111+
message: OK
112+
version: 1

0 commit comments

Comments
 (0)