diff --git a/packages/lmi/src/lmi/cost_tracker.py b/packages/lmi/src/lmi/cost_tracker.py index 90d9fadc..e7337b56 100644 --- a/packages/lmi/src/lmi/cost_tracker.py +++ b/packages/lmi/src/lmi/cost_tracker.py @@ -11,13 +11,23 @@ logger = logging.getLogger(__name__) +# Module-level context variable to track the currently active cost tracker +_active_tracker: contextvars.ContextVar["CostTracker | None"] = contextvars.ContextVar( + "active_cost_tracker", default=None +) + + +def _get_active_tracker() -> "CostTracker": + """Get the currently active cost tracker, defaulting to GLOBAL_COST_TRACKER.""" + return _active_tracker.get() or GLOBAL_COST_TRACKER + class CostTracker: - def __init__(self): + def __init__(self, enabled: bool = True): self.lifetime_cost_usd = 0.0 self.last_report = 0.0 # A contextvar so that different coroutines don't affect each other's cost tracking - self.enabled = contextvars.ContextVar[bool]("track_costs", default=False) + self.enabled = contextvars.ContextVar[bool]("track_costs", default=enabled) # Not a contextvar because I can't imagine a scenario where you'd want more fine-grained control self.report_every_usd = 1.0 self._callbacks: list[Callable[[LLMResponse], Awaitable]] = [] @@ -25,6 +35,23 @@ def __init__(self): def add_callback(self, callback: Callable[[LLMResponse], Awaitable]) -> None: self._callbacks.append(callback) + def set_reporting_threshold(self, threshold_usd: float) -> None: + """Set the threshold for cost reporting.""" + self.report_every_usd = threshold_usd + + def enable_cost_tracking(self, enabled: bool = True) -> None: + """Enable or disable cost tracking for this tracker.""" + self.enabled.set(enabled) + + def __enter__(self): + """Enter the context manager, making this the active tracker.""" + self._token = _active_tracker.set(self) + return self + + def __exit__(self, *args): + """Exit the context manager, restoring the previous active tracker.""" + _active_tracker.reset(self._token) + async def record(self, response: LLMResponse) -> None: # Only record on responses with usage information (final chunk in streaming) # We check for usage presence rather than cost > 0 because: @@ -50,15 +77,17 @@ async def record(self, response: LLMResponse) -> None: ) -GLOBAL_COST_TRACKER = CostTracker() +GLOBAL_COST_TRACKER = CostTracker(enabled=False) def set_reporting_threshold(threshold_usd: float) -> None: - GLOBAL_COST_TRACKER.report_every_usd = threshold_usd + """Set the reporting threshold for the global cost tracker.""" + GLOBAL_COST_TRACKER.set_reporting_threshold(threshold_usd) def enable_cost_tracking(enabled: bool = True) -> None: - GLOBAL_COST_TRACKER.enabled.set(enabled) + """Enable or disable cost tracking for the global cost tracker.""" + GLOBAL_COST_TRACKER.enable_cost_tracking(enabled) @contextmanager @@ -85,7 +114,8 @@ def track_costs( """Automatically track API costs of a coroutine call. Note that the costs will only be recorded if `enable_cost_tracking()` is called, - or if in a `cost_tracking_ctx()` context. + or if in a `cost_tracking_ctx()` context, or if using a custom CostTracker + as a context manager. Usage: ``` @@ -103,8 +133,9 @@ async def api_call(...) -> litellm.ModelResponse: async def wrapped_func(*args, **kwargs): response = await func(*args, **kwargs) - if GLOBAL_COST_TRACKER.enabled.get(): - await GLOBAL_COST_TRACKER.record(response) + tracker = _get_active_tracker() + if tracker.enabled.get(): + await tracker.record(response) return response return wrapped_func @@ -146,8 +177,9 @@ def __aiter__(self): async def __anext__(self): response = await self.stream.__anext__() - if GLOBAL_COST_TRACKER.enabled.get(): - await GLOBAL_COST_TRACKER.record(response) + tracker = _get_active_tracker() + if tracker.enabled.get(): + await tracker.record(response) return response @@ -161,7 +193,8 @@ def track_costs_iter( `TrackedStreamWrapper.stream`. Note that the costs will only be recorded if `enable_cost_tracking()` is called, - or if in a `cost_tracking_ctx()` context. + or if in a `cost_tracking_ctx()` context, or if using a custom CostTracker + as a context manager. Usage: ``` diff --git a/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[claude-3-5-haiku-20241022-False].yaml b/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[claude-3-5-haiku-20241022-False].yaml new file mode 100644 index 00000000..05544ba7 --- /dev/null +++ b/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[claude-3-5-haiku-20241022-False].yaml @@ -0,0 +1,90 @@ +interactions: + - request: + body: + '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Say + hello"}]}],"temperature":1.0,"max_tokens":4096}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + anthropic-version: + - "2023-06-01" + connection: + - keep-alive + content-length: + - "149" + content-type: + - application/json + host: + - api.anthropic.com + user-agent: + - litellm/1.74.15.post2 + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAAA3TQTUvDQBAG4L8S3/MGmtSC7kV6EHoRROmlIsuyOyRrNztxP9RS8t8lYvELTwPz + vDMMc8TAljwkjNfFUr2sV3Wv3b7U7aI9bxZtCwFnITGkTi2amzXFO3u7vr5Y7reGd0/3u+2mg0A+ + jDSnKCXdEQQi+7mhU3Ip65AhYDhkChny4XjKZ3qb5aNIbMh7Pqs2/FrpSNWBS2XZha7KbPXhCtOj + QMo8qkg6cYAEBatyiQGfkOi5UDAEGYr3AuXjGnmEC2PJKvOeQoK8FDDa9KRMJJ0dB/XTFyePpO1/ + dpqd19PY00BRe7Ua/ua/tOl/6yTAJX9vNY1AovjiDKnsKEJi/qDV0WKa3gEAAP//AwBCBnFdswEA + AA== + headers: + CF-RAY: + - 992d7190ad48159c-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 23 Oct 2025 01:25:59 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + Via: + - 1.1 google + X-Robots-Tag: + - none + anthropic-organization-id: + - f2c99ed9-038a-406f-9cb5-1f840b758a20 + anthropic-ratelimit-input-tokens-limit: + - "5000000" + anthropic-ratelimit-input-tokens-remaining: + - "5000000" + anthropic-ratelimit-input-tokens-reset: + - "2025-10-23T01:25:59Z" + anthropic-ratelimit-output-tokens-limit: + - "1000000" + anthropic-ratelimit-output-tokens-remaining: + - "1000000" + anthropic-ratelimit-output-tokens-reset: + - "2025-10-23T01:25:59Z" + anthropic-ratelimit-requests-limit: + - "5000" + anthropic-ratelimit-requests-remaining: + - "4999" + anthropic-ratelimit-requests-reset: + - "2025-10-23T01:25:58Z" + anthropic-ratelimit-tokens-limit: + - "6000000" + anthropic-ratelimit-tokens-remaining: + - "6000000" + anthropic-ratelimit-tokens-reset: + - "2025-10-23T01:25:59Z" + cf-cache-status: + - DYNAMIC + request-id: + - req_011CUPFzKDcuEAx41uaTFRDg + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - "684" + status: + code: 200 + message: OK +version: 1 diff --git a/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[claude-3-5-haiku-20241022-True].yaml b/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[claude-3-5-haiku-20241022-True].yaml new file mode 100644 index 00000000..0f7ac5be --- /dev/null +++ b/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[claude-3-5-haiku-20241022-True].yaml @@ -0,0 +1,133 @@ +interactions: + - request: + body: + '{"model": "claude-3-5-haiku-20241022", "messages": [{"role": "user", "content": + [{"type": "text", "text": "Say hello"}]}], "temperature": 1.0, "max_tokens": + 4096, "stream": true}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + anthropic-version: + - "2023-06-01" + connection: + - keep-alive + content-length: + - "178" + content-type: + - application/json + host: + - api.anthropic.com + user-agent: + - litellm/1.74.15.post2 + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: 'event: message_start + + data: {"type":"message_start","message":{"model":"claude-3-5-haiku-20241022","id":"msg_015Ec5tfQ5jkVrkJs1Vekcxk","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":2,"service_tier":"standard"}} } + + + event: content_block_start + + data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} } + + + event: content_block_delta + + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello!"} } + + + event: content_block_delta + + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" + How"} } + + + event: ping + + data: {"type": "ping"} + + + event: content_block_delta + + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":" + are you doing today?"} } + + + event: content_block_stop + + data: {"type":"content_block_stop","index":0} + + + event: message_delta + + data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":11} + } + + + event: message_stop + + data: {"type":"message_stop" } + + + ' + headers: + CF-RAY: + - 992d71964d171698-SJC + Cache-Control: + - no-cache + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Thu, 23 Oct 2025 01:25:59 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + Via: + - 1.1 google + X-Robots-Tag: + - none + anthropic-organization-id: + - f2c99ed9-038a-406f-9cb5-1f840b758a20 + anthropic-ratelimit-input-tokens-limit: + - "5000000" + anthropic-ratelimit-input-tokens-remaining: + - "5000000" + anthropic-ratelimit-input-tokens-reset: + - "2025-10-23T01:25:59Z" + anthropic-ratelimit-output-tokens-limit: + - "1000000" + anthropic-ratelimit-output-tokens-remaining: + - "1000000" + anthropic-ratelimit-output-tokens-reset: + - "2025-10-23T01:25:59Z" + anthropic-ratelimit-requests-limit: + - "5000" + anthropic-ratelimit-requests-remaining: + - "4999" + anthropic-ratelimit-requests-reset: + - "2025-10-23T01:25:59Z" + anthropic-ratelimit-tokens-limit: + - "6000000" + anthropic-ratelimit-tokens-remaining: + - "6000000" + anthropic-ratelimit-tokens-reset: + - "2025-10-23T01:25:59Z" + cf-cache-status: + - DYNAMIC + request-id: + - req_011CUPFzNhUdpP2dMfVfrVBD + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - "355" + status: + code: 200 + message: OK +version: 1 diff --git a/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[gpt-4o-mini-2024-07-18-False].yaml b/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[gpt-4o-mini-2024-07-18-False].yaml new file mode 100644 index 00000000..3db7f1eb --- /dev/null +++ b/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[gpt-4o-mini-2024-07-18-False].yaml @@ -0,0 +1,112 @@ +interactions: + - request: + body: '{"messages":[{"role":"user","content":"Say hello"}],"model":"gpt-4o-mini-2024-07-18","n":1,"temperature":1.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "109" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.99.9 + x-stainless-arch: + - x64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.99.9 + x-stainless-raw-response: + - "true" + x-stainless-read-timeout: + - "60.0" + x-stainless-retry-count: + - "0" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4xSTW/UMBC951cMPm/QZtX9vHDgQwUtraDcUBW59iQxOB7LnlBW1f535GS7SWmR + uPgwb97zezPzkAEIo8UOhGokq9bb/O03vPrq91v5+fpm+8m9Z//l3VV3v983H66dmCUG3f1AxY+s + 14pab5ENnWAVUDIm1WK9KorNYr1c90BLGm2i1Z7zC8pb40y+mC8u8vk6LzYndkNGYRQ7+J4BADz0 + b/LpNP4WO5jPHistxihrFLtzE4AIZFNFyBhNZOlYzEZQkWN0vfVLtJZewSXdg5IOPsJAgAN1wKTl + 4c2UGLDqokzmXWftBJDOEcsUvrd8e0KOZ5OWah/oLv5FFZVxJjZlQBnJJUORyYsePWYAt/0wuif5 + hA/Uei6ZfmL/3XZQE+MGnmNMLO1YLjazF7RKjSyNjZNRCiVVg3pkjnOXnTY0AbJJ4udeXtIeUhtX + /4/8CCiFnlGXPqA26mnesS1gOs9/tZ0n3BsWEcMvo7BkgyFtQWMlOzscjYiHyNiWlXE1Bh/McDmV + L5eruaxWuFxuRXbM/gAAAP//AwCfzQXLRwMAAA== + headers: + Access-Control-Expose-Headers: + - X-Request-ID + CF-RAY: + - 992d7185a96715d8-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 23 Oct 2025 01:25:57 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=t6SuQJDpmaqPz3gqv60_9gKwJoAcaYHCmfBq228U69Y-1761182757-1.0.1.1-Ewirrcge.J6PdzRqkfcSPysgM0eM0VMhARcYGMI_CPOGrL8TgFlmU80B984xI7xaPerzHGfEnDHNIYX30vI_MSEdL52a8zAI98LzWjI8Q9g; + path=/; expires=Thu, 23-Oct-25 01:55:57 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=ZvnVJyBxVhjg8InR3AcfWn1176WJKvSgizBWnBR5sX0-1761182757675-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "460" + openai-project: + - proj_Jt6Hc8GI0Cv9yaRVy35owwje + openai-version: + - "2020-10-01" + x-envoy-upstream-service-time: + - "477" + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - "30000" + x-ratelimit-limit-tokens: + - "150000000" + x-ratelimit-remaining-requests: + - "29999" + x-ratelimit-remaining-tokens: + - "149999995" + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_8fe24aed8ac14af3900c8271065ae031 + status: + code: 200 + message: OK +version: 1 diff --git a/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[gpt-4o-mini-2024-07-18-True].yaml b/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[gpt-4o-mini-2024-07-18-True].yaml new file mode 100644 index 00000000..3cf818e4 --- /dev/null +++ b/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[gpt-4o-mini-2024-07-18-True].yaml @@ -0,0 +1,148 @@ +interactions: + - request: + body: '{"messages":[{"role":"user","content":"Say hello"}],"model":"gpt-4o-mini-2024-07-18","n":1,"stream":true,"stream_options":{"include_usage":true},"temperature":1.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - "163" + content-type: + - application/json + host: + - api.openai.com + user-agent: + - AsyncOpenAI/Python 1.99.9 + x-stainless-arch: + - x64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.99.9 + x-stainless-raw-response: + - "true" + x-stainless-read-timeout: + - "60.0" + x-stainless-retry-count: + - "0" + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: + 'data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"JBV4RpKki"} + + + data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"x1BXE1"} + + + data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"hmeYfEU1US"} + + + data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Uq6pelL"} + + + data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":" + can"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"AE3fy3j"} + + + data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":" + I"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"0ACpiLwgA"} + + + data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":" + assist"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"YUVE"} + + + data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"JeSr30H"} + + + data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":" + today"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"nX4iQ"} + + + data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"h8ieO9a2qe"} + + + data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"HxnVw"} + + + data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[],"usage":{"prompt_tokens":9,"completion_tokens":9,"total_tokens":18,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"ohvuGBXylWix"} + + + data: [DONE] + + + ' + headers: + Access-Control-Expose-Headers: + - X-Request-ID + CF-RAY: + - 992d718bb9dfeb25-SJC + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Thu, 23 Oct 2025 01:25:58 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=E2LS.ggZcXfSgDLb3p0AjD_tOW50ZegtvJebwXwRYD8-1761182758-1.0.1.1-fhn6FfJv.cv8qXkw9WSVUYTVE3BTwmm7KrowpNXyJ9N3ok0A6QWcM6EWc6Xau4.l54Z8q1h0J7Ad8KpPKqBwzqjdJ.wDKy1tMWc6M49HQQE; + path=/; expires=Thu, 23-Oct-25 01:55:58 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=LPp8_I1vZfYvARt7De3LOhUTYLMQle.ckowdQe.WZDc-1761182758233-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - future-house-xr4tdh + openai-processing-ms: + - "148" + openai-project: + - proj_Jt6Hc8GI0Cv9yaRVy35owwje + openai-version: + - "2020-10-01" + x-envoy-upstream-service-time: + - "276" + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - "30000" + x-ratelimit-limit-tokens: + - "150000000" + x-ratelimit-remaining-requests: + - "29999" + x-ratelimit-remaining-tokens: + - "149999997" + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_264b637c92944bdfa9163188c65e17b1 + status: + code: 200 + message: OK +version: 1 diff --git a/packages/lmi/tests/test_cost_tracking.py b/packages/lmi/tests/test_cost_tracking.py index f07fb9ad..e78fc201 100644 --- a/packages/lmi/tests/test_cost_tracking.py +++ b/packages/lmi/tests/test_cost_tracking.py @@ -1,3 +1,4 @@ +import asyncio from contextlib import contextmanager from typing import Any from unittest.mock import AsyncMock, MagicMock, patch @@ -7,7 +8,7 @@ from aviary.core import Message from lmi import cost_tracking_ctx -from lmi.cost_tracker import GLOBAL_COST_TRACKER, TrackedStreamWrapper +from lmi.cost_tracker import GLOBAL_COST_TRACKER, CostTracker, TrackedStreamWrapper from lmi.embeddings import LiteLLMEmbeddingModel from lmi.llms import CommonLLMNames, LiteLLMModel from lmi.utils import VCR_DEFAULT_MATCH_ON @@ -369,3 +370,174 @@ async def track_callback(response): # noqa: RUF029 # Callback should have been called exactly once assert len(callback_calls) == 1 + + +class TestCustomCostTracker: + """Test custom cost tracker functionality.""" + + def test_custom_tracker_defaults(self): + """Test that custom trackers are enabled by default and GLOBAL is disabled.""" + # Custom tracker should be enabled by default + custom_tracker = CostTracker() + assert custom_tracker.enabled.get() is True + + # Global tracker should be disabled by default + assert GLOBAL_COST_TRACKER.enabled.get() is False + + # Test instance methods + custom_tracker.set_reporting_threshold(5.0) + assert custom_tracker.report_every_usd == 5.0 + + custom_tracker.enable_cost_tracking(enabled=False) + assert custom_tracker.enabled.get() is False + + custom_tracker.enable_cost_tracking(enabled=True) + assert custom_tracker.enabled.get() is True + + @pytest.mark.asyncio + async def test_custom_tracker_basic_usage(self): + """Test basic usage of custom tracker as context manager.""" + custom_tracker = CostTracker() + mock_response = MagicMock( + model="gpt-4o-mini", + usage=MagicMock(prompt_tokens=10, completion_tokens=20), + ) + + # Record initial costs + global_initial_cost = GLOBAL_COST_TRACKER.lifetime_cost_usd + custom_initial_cost = custom_tracker.lifetime_cost_usd + + with ( + custom_tracker, + patch("litellm.cost_calculator.completion_cost", return_value=0.05), + ): + await custom_tracker.record(mock_response) + + # Custom tracker should have accumulated cost + assert custom_tracker.lifetime_cost_usd > custom_initial_cost + assert custom_tracker.lifetime_cost_usd == 0.05 + + # Global tracker should remain unchanged + assert GLOBAL_COST_TRACKER.lifetime_cost_usd == global_initial_cost + + @pytest.mark.vcr(match_on=VCR_DEFAULT_MATCH_ON) + @pytest.mark.asyncio + @pytest.mark.parametrize( + ("model_name", "stream"), + [ + (CommonLLMNames.OPENAI_TEST, False), + (CommonLLMNames.OPENAI_TEST, True), + (CommonLLMNames.ANTHROPIC_TEST, False), + (CommonLLMNames.ANTHROPIC_TEST, True), + ], + ) + async def test_custom_tracker_with_llm_calls(self, model_name, stream): + """Test custom tracker with real LLM calls.""" + custom_tracker = CostTracker() + model = LiteLLMModel(name=model_name) + + # Record initial costs + global_initial_cost = GLOBAL_COST_TRACKER.lifetime_cost_usd + custom_initial_cost = custom_tracker.lifetime_cost_usd + + with custom_tracker: + if stream: + # Test streaming via callbacks + chunks: list[str] = [] + await model.call_single( + messages=[Message(content="Say hello")], + callbacks=[chunks.append], + ) + assert chunks # Should have received streaming chunks + else: + # Test non-streaming + result = await model.call_single( + messages=[Message(content="Say hello")], + ) + assert result.text + + # Custom tracker should have accumulated cost + assert custom_tracker.lifetime_cost_usd > custom_initial_cost + + # Global tracker should remain unchanged + assert GLOBAL_COST_TRACKER.lifetime_cost_usd == global_initial_cost + + @pytest.mark.asyncio + async def test_multiple_custom_trackers(self): + """Test sequential and nested custom trackers.""" + tracker1 = CostTracker() + tracker2 = CostTracker() + mock_response = MagicMock( + model=CommonLLMNames.OPENAI_TEST.value, + usage=MagicMock(prompt_tokens=10, completion_tokens=20), + ) + + # Test sequential use + with ( + tracker1, + patch("litellm.cost_calculator.completion_cost", return_value=0.01), + ): + await tracker1.record(mock_response) + assert tracker1.lifetime_cost_usd == 0.01 + assert tracker2.lifetime_cost_usd == 0.0 + + with ( + tracker2, + patch("litellm.cost_calculator.completion_cost", return_value=0.02), + ): + await tracker2.record(mock_response) + assert tracker1.lifetime_cost_usd == 0.01 # Unchanged + assert tracker2.lifetime_cost_usd == 0.02 + + # Test nested trackers + tracker3 = CostTracker() + tracker4 = CostTracker() + + with ( + tracker3, + patch("litellm.cost_calculator.completion_cost", return_value=0.03), + ): + await tracker3.record(mock_response) + assert tracker3.lifetime_cost_usd == 0.03 + + # Nested context should override the active tracker + with tracker4: + await tracker4.record(mock_response) + assert tracker4.lifetime_cost_usd == 0.03 + + # Back to outer context + await tracker3.record(mock_response) + assert tracker3.lifetime_cost_usd == 0.06 + assert tracker4.lifetime_cost_usd == 0.03 # Unchanged after exiting + + @pytest.mark.asyncio + async def test_custom_trackers_parallel_tasks(self): + """Test that contextvars properly isolate tracker state across async tasks.""" + tracker1 = CostTracker() + tracker2 = CostTracker() + + async def task_with_tracker(tracker: CostTracker): + """Task that uses a specific tracker and directly manipulates cost.""" + with tracker: + # Simulate recording costs by directly manipulating the tracker + # This avoids the complexity of mocking litellm.cost_calculator in parallel tasks + await asyncio.sleep(0.01) + tracker.lifetime_cost_usd += 0.01 + await asyncio.sleep(0.01) + tracker.lifetime_cost_usd += 0.01 + return tracker.lifetime_cost_usd + + # Run tasks in parallel with different trackers + result1, result2 = await asyncio.gather( + task_with_tracker(tracker1), + task_with_tracker(tracker2), + ) + + # Each tracker should have accumulated its own costs independently + assert result1 == 0.02 + assert result2 == 0.02 + assert tracker1.lifetime_cost_usd == 0.02 + assert tracker2.lifetime_cost_usd == 0.02 + + # Global tracker should remain unchanged + assert GLOBAL_COST_TRACKER.lifetime_cost_usd == 0.0