diff --git a/packages/lmi/src/lmi/cost_tracker.py b/packages/lmi/src/lmi/cost_tracker.py
index 90d9fadc..e7337b56 100644
--- a/packages/lmi/src/lmi/cost_tracker.py
+++ b/packages/lmi/src/lmi/cost_tracker.py
@@ -11,13 +11,23 @@
 
 logger = logging.getLogger(__name__)
 
+# Module-level context variable to track the currently active cost tracker
+_active_tracker: contextvars.ContextVar["CostTracker | None"] = contextvars.ContextVar(
+    "active_cost_tracker", default=None
+)
+
+
+def _get_active_tracker() -> "CostTracker":
+    """Get the currently active cost tracker, defaulting to GLOBAL_COST_TRACKER."""
+    return _active_tracker.get() or GLOBAL_COST_TRACKER
+
 
 class CostTracker:
-    def __init__(self):
+    def __init__(self, enabled: bool = True):
         self.lifetime_cost_usd = 0.0
         self.last_report = 0.0
         # A contextvar so that different coroutines don't affect each other's cost tracking
-        self.enabled = contextvars.ContextVar[bool]("track_costs", default=False)
+        self.enabled = contextvars.ContextVar[bool]("track_costs", default=enabled)
         # Not a contextvar because I can't imagine a scenario where you'd want more fine-grained control
         self.report_every_usd = 1.0
         self._callbacks: list[Callable[[LLMResponse], Awaitable]] = []
@@ -25,6 +35,23 @@ def __init__(self):
     def add_callback(self, callback: Callable[[LLMResponse], Awaitable]) -> None:
         self._callbacks.append(callback)
 
+    def set_reporting_threshold(self, threshold_usd: float) -> None:
+        """Set the threshold for cost reporting."""
+        self.report_every_usd = threshold_usd
+
+    def enable_cost_tracking(self, enabled: bool = True) -> None:
+        """Enable or disable cost tracking for this tracker."""
+        self.enabled.set(enabled)
+
+    def __enter__(self):
+        """Enter the context manager, making this the active tracker."""
+        self._token = _active_tracker.set(self)
+        return self
+
+    def __exit__(self, *args):
+        """Exit the context manager, restoring the previous active tracker."""
+        _active_tracker.reset(self._token)
+
     async def record(self, response: LLMResponse) -> None:
         # Only record on responses with usage information (final chunk in streaming)
         # We check for usage presence rather than cost > 0 because:
@@ -50,15 +77,17 @@ async def record(self, response: LLMResponse) -> None:
                 )
 
 
-GLOBAL_COST_TRACKER = CostTracker()
+GLOBAL_COST_TRACKER = CostTracker(enabled=False)
 
 
 def set_reporting_threshold(threshold_usd: float) -> None:
-    GLOBAL_COST_TRACKER.report_every_usd = threshold_usd
+    """Set the reporting threshold for the global cost tracker."""
+    GLOBAL_COST_TRACKER.set_reporting_threshold(threshold_usd)
 
 
 def enable_cost_tracking(enabled: bool = True) -> None:
-    GLOBAL_COST_TRACKER.enabled.set(enabled)
+    """Enable or disable cost tracking for the global cost tracker."""
+    GLOBAL_COST_TRACKER.enable_cost_tracking(enabled)
 
 
 @contextmanager
@@ -85,7 +114,8 @@ def track_costs(
     """Automatically track API costs of a coroutine call.
 
     Note that the costs will only be recorded if `enable_cost_tracking()` is called,
-    or if in a `cost_tracking_ctx()` context.
+    or if in a `cost_tracking_ctx()` context, or if using a custom CostTracker
+    as a context manager.
 
     Usage:
     ```
@@ -103,8 +133,9 @@ async def api_call(...) -> litellm.ModelResponse:
 
     async def wrapped_func(*args, **kwargs):
         response = await func(*args, **kwargs)
-        if GLOBAL_COST_TRACKER.enabled.get():
-            await GLOBAL_COST_TRACKER.record(response)
+        tracker = _get_active_tracker()
+        if tracker.enabled.get():
+            await tracker.record(response)
         return response
 
     return wrapped_func
@@ -146,8 +177,9 @@ def __aiter__(self):
 
     async def __anext__(self):
         response = await self.stream.__anext__()
-        if GLOBAL_COST_TRACKER.enabled.get():
-            await GLOBAL_COST_TRACKER.record(response)
+        tracker = _get_active_tracker()
+        if tracker.enabled.get():
+            await tracker.record(response)
         return response
 
 
@@ -161,7 +193,8 @@ def track_costs_iter(
     `TrackedStreamWrapper.stream`.
 
     Note that the costs will only be recorded if `enable_cost_tracking()` is called,
-    or if in a `cost_tracking_ctx()` context.
+    or if in a `cost_tracking_ctx()` context, or if using a custom CostTracker
+    as a context manager.
 
     Usage:
     ```
diff --git a/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[claude-3-5-haiku-20241022-False].yaml b/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[claude-3-5-haiku-20241022-False].yaml
new file mode 100644
index 00000000..05544ba7
--- /dev/null
+++ b/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[claude-3-5-haiku-20241022-False].yaml
@@ -0,0 +1,90 @@
+interactions:
+  - request:
+      body:
+        '{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Say
+        hello"}]}],"temperature":1.0,"max_tokens":4096}'
+      headers:
+        accept:
+          - application/json
+        accept-encoding:
+          - gzip, deflate
+        anthropic-version:
+          - "2023-06-01"
+        connection:
+          - keep-alive
+        content-length:
+          - "149"
+        content-type:
+          - application/json
+        host:
+          - api.anthropic.com
+        user-agent:
+          - litellm/1.74.15.post2
+      method: POST
+      uri: https://api.anthropic.com/v1/messages
+    response:
+      body:
+        string: !!binary |
+          H4sIAAAAAAAAA3TQTUvDQBAG4L8S3/MGmtSC7kV6EHoRROmlIsuyOyRrNztxP9RS8t8lYvELTwPz
+          vDMMc8TAljwkjNfFUr2sV3Wv3b7U7aI9bxZtCwFnITGkTi2amzXFO3u7vr5Y7reGd0/3u+2mg0A+
+          jDSnKCXdEQQi+7mhU3Ip65AhYDhkChny4XjKZ3qb5aNIbMh7Pqs2/FrpSNWBS2XZha7KbPXhCtOj
+          QMo8qkg6cYAEBatyiQGfkOi5UDAEGYr3AuXjGnmEC2PJKvOeQoK8FDDa9KRMJJ0dB/XTFyePpO1/
+          dpqd19PY00BRe7Ua/ua/tOl/6yTAJX9vNY1AovjiDKnsKEJi/qDV0WKa3gEAAP//AwBCBnFdswEA
+          AA==
+      headers:
+        CF-RAY:
+          - 992d7190ad48159c-SJC
+        Connection:
+          - keep-alive
+        Content-Encoding:
+          - gzip
+        Content-Type:
+          - application/json
+        Date:
+          - Thu, 23 Oct 2025 01:25:59 GMT
+        Server:
+          - cloudflare
+        Transfer-Encoding:
+          - chunked
+        Via:
+          - 1.1 google
+        X-Robots-Tag:
+          - none
+        anthropic-organization-id:
+          - f2c99ed9-038a-406f-9cb5-1f840b758a20
+        anthropic-ratelimit-input-tokens-limit:
+          - "5000000"
+        anthropic-ratelimit-input-tokens-remaining:
+          - "5000000"
+        anthropic-ratelimit-input-tokens-reset:
+          - "2025-10-23T01:25:59Z"
+        anthropic-ratelimit-output-tokens-limit:
+          - "1000000"
+        anthropic-ratelimit-output-tokens-remaining:
+          - "1000000"
+        anthropic-ratelimit-output-tokens-reset:
+          - "2025-10-23T01:25:59Z"
+        anthropic-ratelimit-requests-limit:
+          - "5000"
+        anthropic-ratelimit-requests-remaining:
+          - "4999"
+        anthropic-ratelimit-requests-reset:
+          - "2025-10-23T01:25:58Z"
+        anthropic-ratelimit-tokens-limit:
+          - "6000000"
+        anthropic-ratelimit-tokens-remaining:
+          - "6000000"
+        anthropic-ratelimit-tokens-reset:
+          - "2025-10-23T01:25:59Z"
+        cf-cache-status:
+          - DYNAMIC
+        request-id:
+          - req_011CUPFzKDcuEAx41uaTFRDg
+        strict-transport-security:
+          - max-age=31536000; includeSubDomains; preload
+        x-envoy-upstream-service-time:
+          - "684"
+      status:
+        code: 200
+        message: OK
+version: 1
diff --git a/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[claude-3-5-haiku-20241022-True].yaml b/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[claude-3-5-haiku-20241022-True].yaml
new file mode 100644
index 00000000..0f7ac5be
--- /dev/null
+++ b/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[claude-3-5-haiku-20241022-True].yaml
@@ -0,0 +1,133 @@
+interactions:
+  - request:
+      body:
+        '{"model": "claude-3-5-haiku-20241022", "messages": [{"role": "user", "content":
+        [{"type": "text", "text": "Say hello"}]}], "temperature": 1.0, "max_tokens":
+        4096, "stream": true}'
+      headers:
+        accept:
+          - application/json
+        accept-encoding:
+          - gzip, deflate
+        anthropic-version:
+          - "2023-06-01"
+        connection:
+          - keep-alive
+        content-length:
+          - "178"
+        content-type:
+          - application/json
+        host:
+          - api.anthropic.com
+        user-agent:
+          - litellm/1.74.15.post2
+      method: POST
+      uri: https://api.anthropic.com/v1/messages
+    response:
+      body:
+        string: 'event: message_start
+
+          data: {"type":"message_start","message":{"model":"claude-3-5-haiku-20241022","id":"msg_015Ec5tfQ5jkVrkJs1Vekcxk","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":2,"service_tier":"standard"}}           }
+
+
+          event: content_block_start
+
+          data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}        }
+
+
+          event: content_block_delta
+
+          data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello!"}             }
+
+
+          event: content_block_delta
+
+          data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+          How"} }
+
+
+          event: ping
+
+          data: {"type": "ping"}
+
+
+          event: content_block_delta
+
+          data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
+          are you doing today?"}     }
+
+
+          event: content_block_stop
+
+          data: {"type":"content_block_stop","index":0}
+
+
+          event: message_delta
+
+          data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":9,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":11}
+          }
+
+
+          event: message_stop
+
+          data: {"type":"message_stop"              }
+
+
+          '
+      headers:
+        CF-RAY:
+          - 992d71964d171698-SJC
+        Cache-Control:
+          - no-cache
+        Connection:
+          - keep-alive
+        Content-Type:
+          - text/event-stream; charset=utf-8
+        Date:
+          - Thu, 23 Oct 2025 01:25:59 GMT
+        Server:
+          - cloudflare
+        Transfer-Encoding:
+          - chunked
+        Via:
+          - 1.1 google
+        X-Robots-Tag:
+          - none
+        anthropic-organization-id:
+          - f2c99ed9-038a-406f-9cb5-1f840b758a20
+        anthropic-ratelimit-input-tokens-limit:
+          - "5000000"
+        anthropic-ratelimit-input-tokens-remaining:
+          - "5000000"
+        anthropic-ratelimit-input-tokens-reset:
+          - "2025-10-23T01:25:59Z"
+        anthropic-ratelimit-output-tokens-limit:
+          - "1000000"
+        anthropic-ratelimit-output-tokens-remaining:
+          - "1000000"
+        anthropic-ratelimit-output-tokens-reset:
+          - "2025-10-23T01:25:59Z"
+        anthropic-ratelimit-requests-limit:
+          - "5000"
+        anthropic-ratelimit-requests-remaining:
+          - "4999"
+        anthropic-ratelimit-requests-reset:
+          - "2025-10-23T01:25:59Z"
+        anthropic-ratelimit-tokens-limit:
+          - "6000000"
+        anthropic-ratelimit-tokens-remaining:
+          - "6000000"
+        anthropic-ratelimit-tokens-reset:
+          - "2025-10-23T01:25:59Z"
+        cf-cache-status:
+          - DYNAMIC
+        request-id:
+          - req_011CUPFzNhUdpP2dMfVfrVBD
+        strict-transport-security:
+          - max-age=31536000; includeSubDomains; preload
+        x-envoy-upstream-service-time:
+          - "355"
+      status:
+        code: 200
+        message: OK
+version: 1
diff --git a/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[gpt-4o-mini-2024-07-18-False].yaml b/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[gpt-4o-mini-2024-07-18-False].yaml
new file mode 100644
index 00000000..3db7f1eb
--- /dev/null
+++ b/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[gpt-4o-mini-2024-07-18-False].yaml
@@ -0,0 +1,112 @@
+interactions:
+  - request:
+      body: '{"messages":[{"role":"user","content":"Say hello"}],"model":"gpt-4o-mini-2024-07-18","n":1,"temperature":1.0}'
+      headers:
+        accept:
+          - application/json
+        accept-encoding:
+          - gzip, deflate
+        connection:
+          - keep-alive
+        content-length:
+          - "109"
+        content-type:
+          - application/json
+        host:
+          - api.openai.com
+        user-agent:
+          - AsyncOpenAI/Python 1.99.9
+        x-stainless-arch:
+          - x64
+        x-stainless-async:
+          - async:asyncio
+        x-stainless-lang:
+          - python
+        x-stainless-os:
+          - Linux
+        x-stainless-package-version:
+          - 1.99.9
+        x-stainless-raw-response:
+          - "true"
+        x-stainless-read-timeout:
+          - "60.0"
+        x-stainless-retry-count:
+          - "0"
+        x-stainless-runtime:
+          - CPython
+        x-stainless-runtime-version:
+          - 3.13.3
+      method: POST
+      uri: https://api.openai.com/v1/chat/completions
+    response:
+      body:
+        string: !!binary |
+          H4sIAAAAAAAAA4xSTW/UMBC951cMPm/QZtX9vHDgQwUtraDcUBW59iQxOB7LnlBW1f535GS7SWmR
+          uPgwb97zezPzkAEIo8UOhGokq9bb/O03vPrq91v5+fpm+8m9Z//l3VV3v983H66dmCUG3f1AxY+s
+          14pab5ENnWAVUDIm1WK9KorNYr1c90BLGm2i1Z7zC8pb40y+mC8u8vk6LzYndkNGYRQ7+J4BADz0
+          b/LpNP4WO5jPHistxihrFLtzE4AIZFNFyBhNZOlYzEZQkWN0vfVLtJZewSXdg5IOPsJAgAN1wKTl
+          4c2UGLDqokzmXWftBJDOEcsUvrd8e0KOZ5OWah/oLv5FFZVxJjZlQBnJJUORyYsePWYAt/0wuif5
+          hA/Uei6ZfmL/3XZQE+MGnmNMLO1YLjazF7RKjSyNjZNRCiVVg3pkjnOXnTY0AbJJ4udeXtIeUhtX
+          /4/8CCiFnlGXPqA26mnesS1gOs9/tZ0n3BsWEcMvo7BkgyFtQWMlOzscjYiHyNiWlXE1Bh/McDmV
+          L5eruaxWuFxuRXbM/gAAAP//AwCfzQXLRwMAAA==
+      headers:
+        Access-Control-Expose-Headers:
+          - X-Request-ID
+        CF-RAY:
+          - 992d7185a96715d8-SJC
+        Connection:
+          - keep-alive
+        Content-Encoding:
+          - gzip
+        Content-Type:
+          - application/json
+        Date:
+          - Thu, 23 Oct 2025 01:25:57 GMT
+        Server:
+          - cloudflare
+        Set-Cookie:
+          - __cf_bm=t6SuQJDpmaqPz3gqv60_9gKwJoAcaYHCmfBq228U69Y-1761182757-1.0.1.1-Ewirrcge.J6PdzRqkfcSPysgM0eM0VMhARcYGMI_CPOGrL8TgFlmU80B984xI7xaPerzHGfEnDHNIYX30vI_MSEdL52a8zAI98LzWjI8Q9g;
+            path=/; expires=Thu, 23-Oct-25 01:55:57 GMT; domain=.api.openai.com; HttpOnly;
+            Secure; SameSite=None
+          - _cfuvid=ZvnVJyBxVhjg8InR3AcfWn1176WJKvSgizBWnBR5sX0-1761182757675-0.0.1.1-604800000;
+            path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+        Strict-Transport-Security:
+          - max-age=31536000; includeSubDomains; preload
+        Transfer-Encoding:
+          - chunked
+        X-Content-Type-Options:
+          - nosniff
+        alt-svc:
+          - h3=":443"; ma=86400
+        cf-cache-status:
+          - DYNAMIC
+        openai-organization:
+          - future-house-xr4tdh
+        openai-processing-ms:
+          - "460"
+        openai-project:
+          - proj_Jt6Hc8GI0Cv9yaRVy35owwje
+        openai-version:
+          - "2020-10-01"
+        x-envoy-upstream-service-time:
+          - "477"
+        x-openai-proxy-wasm:
+          - v0.1
+        x-ratelimit-limit-requests:
+          - "30000"
+        x-ratelimit-limit-tokens:
+          - "150000000"
+        x-ratelimit-remaining-requests:
+          - "29999"
+        x-ratelimit-remaining-tokens:
+          - "149999995"
+        x-ratelimit-reset-requests:
+          - 2ms
+        x-ratelimit-reset-tokens:
+          - 0s
+        x-request-id:
+          - req_8fe24aed8ac14af3900c8271065ae031
+      status:
+        code: 200
+        message: OK
+version: 1
diff --git a/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[gpt-4o-mini-2024-07-18-True].yaml b/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[gpt-4o-mini-2024-07-18-True].yaml
new file mode 100644
index 00000000..3cf818e4
--- /dev/null
+++ b/packages/lmi/tests/cassettes/TestCustomCostTracker.test_custom_tracker_with_llm_calls[gpt-4o-mini-2024-07-18-True].yaml
@@ -0,0 +1,148 @@
+interactions:
+  - request:
+      body: '{"messages":[{"role":"user","content":"Say hello"}],"model":"gpt-4o-mini-2024-07-18","n":1,"stream":true,"stream_options":{"include_usage":true},"temperature":1.0}'
+      headers:
+        accept:
+          - application/json
+        accept-encoding:
+          - gzip, deflate
+        connection:
+          - keep-alive
+        content-length:
+          - "163"
+        content-type:
+          - application/json
+        host:
+          - api.openai.com
+        user-agent:
+          - AsyncOpenAI/Python 1.99.9
+        x-stainless-arch:
+          - x64
+        x-stainless-async:
+          - async:asyncio
+        x-stainless-lang:
+          - python
+        x-stainless-os:
+          - Linux
+        x-stainless-package-version:
+          - 1.99.9
+        x-stainless-raw-response:
+          - "true"
+        x-stainless-read-timeout:
+          - "60.0"
+        x-stainless-retry-count:
+          - "0"
+        x-stainless-runtime:
+          - CPython
+        x-stainless-runtime-version:
+          - 3.13.3
+      method: POST
+      uri: https://api.openai.com/v1/chat/completions
+    response:
+      body:
+        string:
+          'data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"JBV4RpKki"}
+
+
+          data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"x1BXE1"}
+
+
+          data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"hmeYfEU1US"}
+
+
+          data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"
+          How"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Uq6pelL"}
+
+
+          data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"
+          can"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"AE3fy3j"}
+
+
+          data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"
+          I"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"0ACpiLwgA"}
+
+
+          data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"
+          assist"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"YUVE"}
+
+
+          data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"
+          you"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"JeSr30H"}
+
+
+          data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"
+          today"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"nX4iQ"}
+
+
+          data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"h8ieO9a2qe"}
+
+
+          data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"HxnVw"}
+
+
+          data: {"id":"chatcmpl-CTeNSuSz9KX5UByNEqB5hPOqJGvig","object":"chat.completion.chunk","created":1761182758,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[],"usage":{"prompt_tokens":9,"completion_tokens":9,"total_tokens":18,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"ohvuGBXylWix"}
+
+
+          data: [DONE]
+
+
+          '
+      headers:
+        Access-Control-Expose-Headers:
+          - X-Request-ID
+        CF-RAY:
+          - 992d718bb9dfeb25-SJC
+        Connection:
+          - keep-alive
+        Content-Type:
+          - text/event-stream; charset=utf-8
+        Date:
+          - Thu, 23 Oct 2025 01:25:58 GMT
+        Server:
+          - cloudflare
+        Set-Cookie:
+          - __cf_bm=E2LS.ggZcXfSgDLb3p0AjD_tOW50ZegtvJebwXwRYD8-1761182758-1.0.1.1-fhn6FfJv.cv8qXkw9WSVUYTVE3BTwmm7KrowpNXyJ9N3ok0A6QWcM6EWc6Xau4.l54Z8q1h0J7Ad8KpPKqBwzqjdJ.wDKy1tMWc6M49HQQE;
+            path=/; expires=Thu, 23-Oct-25 01:55:58 GMT; domain=.api.openai.com; HttpOnly;
+            Secure; SameSite=None
+          - _cfuvid=LPp8_I1vZfYvARt7De3LOhUTYLMQle.ckowdQe.WZDc-1761182758233-0.0.1.1-604800000;
+            path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+        Strict-Transport-Security:
+          - max-age=31536000; includeSubDomains; preload
+        Transfer-Encoding:
+          - chunked
+        X-Content-Type-Options:
+          - nosniff
+        alt-svc:
+          - h3=":443"; ma=86400
+        cf-cache-status:
+          - DYNAMIC
+        openai-organization:
+          - future-house-xr4tdh
+        openai-processing-ms:
+          - "148"
+        openai-project:
+          - proj_Jt6Hc8GI0Cv9yaRVy35owwje
+        openai-version:
+          - "2020-10-01"
+        x-envoy-upstream-service-time:
+          - "276"
+        x-openai-proxy-wasm:
+          - v0.1
+        x-ratelimit-limit-requests:
+          - "30000"
+        x-ratelimit-limit-tokens:
+          - "150000000"
+        x-ratelimit-remaining-requests:
+          - "29999"
+        x-ratelimit-remaining-tokens:
+          - "149999997"
+        x-ratelimit-reset-requests:
+          - 2ms
+        x-ratelimit-reset-tokens:
+          - 0s
+        x-request-id:
+          - req_264b637c92944bdfa9163188c65e17b1
+      status:
+        code: 200
+        message: OK
+version: 1
diff --git a/packages/lmi/tests/test_cost_tracking.py b/packages/lmi/tests/test_cost_tracking.py
index f07fb9ad..e78fc201 100644
--- a/packages/lmi/tests/test_cost_tracking.py
+++ b/packages/lmi/tests/test_cost_tracking.py
@@ -1,3 +1,4 @@
+import asyncio
 from contextlib import contextmanager
 from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch
@@ -7,7 +8,7 @@
 from aviary.core import Message
 
 from lmi import cost_tracking_ctx
-from lmi.cost_tracker import GLOBAL_COST_TRACKER, TrackedStreamWrapper
+from lmi.cost_tracker import GLOBAL_COST_TRACKER, CostTracker, TrackedStreamWrapper
 from lmi.embeddings import LiteLLMEmbeddingModel
 from lmi.llms import CommonLLMNames, LiteLLMModel
 from lmi.utils import VCR_DEFAULT_MATCH_ON
@@ -369,3 +370,174 @@ async def track_callback(response):  # noqa: RUF029
 
             # Callback should have been called exactly once
             assert len(callback_calls) == 1
+
+
+class TestCustomCostTracker:
+    """Test custom cost tracker functionality."""
+
+    def test_custom_tracker_defaults(self):
+        """Test that custom trackers are enabled by default and GLOBAL is disabled."""
+        # Custom tracker should be enabled by default
+        custom_tracker = CostTracker()
+        assert custom_tracker.enabled.get() is True
+
+        # Global tracker should be disabled by default
+        assert GLOBAL_COST_TRACKER.enabled.get() is False
+
+        # Test instance methods
+        custom_tracker.set_reporting_threshold(5.0)
+        assert custom_tracker.report_every_usd == 5.0
+
+        custom_tracker.enable_cost_tracking(enabled=False)
+        assert custom_tracker.enabled.get() is False
+
+        custom_tracker.enable_cost_tracking(enabled=True)
+        assert custom_tracker.enabled.get() is True
+
+    @pytest.mark.asyncio
+    async def test_custom_tracker_basic_usage(self):
+        """Test basic usage of custom tracker as context manager."""
+        custom_tracker = CostTracker()
+        mock_response = MagicMock(
+            model="gpt-4o-mini",
+            usage=MagicMock(prompt_tokens=10, completion_tokens=20),
+        )
+
+        # Record initial costs
+        global_initial_cost = GLOBAL_COST_TRACKER.lifetime_cost_usd
+        custom_initial_cost = custom_tracker.lifetime_cost_usd
+
+        with (
+            custom_tracker,
+            patch("litellm.cost_calculator.completion_cost", return_value=0.05),
+        ):
+            await custom_tracker.record(mock_response)
+
+            # Custom tracker should have accumulated cost
+            assert custom_tracker.lifetime_cost_usd > custom_initial_cost
+            assert custom_tracker.lifetime_cost_usd == 0.05
+
+            # Global tracker should remain unchanged
+            assert GLOBAL_COST_TRACKER.lifetime_cost_usd == global_initial_cost
+
+    @pytest.mark.vcr(match_on=VCR_DEFAULT_MATCH_ON)
+    @pytest.mark.asyncio
+    @pytest.mark.parametrize(
+        ("model_name", "stream"),
+        [
+            (CommonLLMNames.OPENAI_TEST, False),
+            (CommonLLMNames.OPENAI_TEST, True),
+            (CommonLLMNames.ANTHROPIC_TEST, False),
+            (CommonLLMNames.ANTHROPIC_TEST, True),
+        ],
+    )
+    async def test_custom_tracker_with_llm_calls(self, model_name, stream):
+        """Test custom tracker with real LLM calls."""
+        custom_tracker = CostTracker()
+        model = LiteLLMModel(name=model_name)
+
+        # Record initial costs
+        global_initial_cost = GLOBAL_COST_TRACKER.lifetime_cost_usd
+        custom_initial_cost = custom_tracker.lifetime_cost_usd
+
+        with custom_tracker:
+            if stream:
+                # Test streaming via callbacks
+                chunks: list[str] = []
+                await model.call_single(
+                    messages=[Message(content="Say hello")],
+                    callbacks=[chunks.append],
+                )
+                assert chunks  # Should have received streaming chunks
+            else:
+                # Test non-streaming
+                result = await model.call_single(
+                    messages=[Message(content="Say hello")],
+                )
+                assert result.text
+
+            # Custom tracker should have accumulated cost
+            assert custom_tracker.lifetime_cost_usd > custom_initial_cost
+
+        # Global tracker should remain unchanged
+        assert GLOBAL_COST_TRACKER.lifetime_cost_usd == global_initial_cost
+
+    @pytest.mark.asyncio
+    async def test_multiple_custom_trackers(self):
+        """Test sequential and nested custom trackers."""
+        tracker1 = CostTracker()
+        tracker2 = CostTracker()
+        mock_response = MagicMock(
+            model=CommonLLMNames.OPENAI_TEST.value,
+            usage=MagicMock(prompt_tokens=10, completion_tokens=20),
+        )
+
+        # Test sequential use
+        with (
+            tracker1,
+            patch("litellm.cost_calculator.completion_cost", return_value=0.01),
+        ):
+            await tracker1.record(mock_response)
+            assert tracker1.lifetime_cost_usd == 0.01
+            assert tracker2.lifetime_cost_usd == 0.0
+
+        with (
+            tracker2,
+            patch("litellm.cost_calculator.completion_cost", return_value=0.02),
+        ):
+            await tracker2.record(mock_response)
+            assert tracker1.lifetime_cost_usd == 0.01  # Unchanged
+            assert tracker2.lifetime_cost_usd == 0.02
+
+        # Test nested trackers
+        tracker3 = CostTracker()
+        tracker4 = CostTracker()
+
+        with (
+            tracker3,
+            patch("litellm.cost_calculator.completion_cost", return_value=0.03),
+        ):
+            await tracker3.record(mock_response)
+            assert tracker3.lifetime_cost_usd == 0.03
+
+            # Nested context should override the active tracker
+            with tracker4:
+                await tracker4.record(mock_response)
+                assert tracker4.lifetime_cost_usd == 0.03
+
+            # Back to outer context
+            await tracker3.record(mock_response)
+            assert tracker3.lifetime_cost_usd == 0.06
+            assert tracker4.lifetime_cost_usd == 0.03  # Unchanged after exiting
+
+    @pytest.mark.asyncio
+    async def test_custom_trackers_parallel_tasks(self):
+        """Test that contextvars properly isolate tracker state across async tasks."""
+        tracker1 = CostTracker()
+        tracker2 = CostTracker()
+
+        async def task_with_tracker(tracker: CostTracker):
+            """Task that uses a specific tracker and directly manipulates cost."""
+            with tracker:
+                # Simulate recording costs by directly manipulating the tracker
+                # This avoids the complexity of mocking litellm.cost_calculator in parallel tasks
+                await asyncio.sleep(0.01)
+                tracker.lifetime_cost_usd += 0.01
+                await asyncio.sleep(0.01)
+                tracker.lifetime_cost_usd += 0.01
+                return tracker.lifetime_cost_usd
+
+        # Run tasks in parallel with different trackers
+        result1, result2 = await asyncio.gather(
+            task_with_tracker(tracker1),
+            task_with_tracker(tracker2),
+        )
+
+        # Each tracker should have accumulated its own costs independently
+        assert result1 == 0.02
+        assert result2 == 0.02
+        assert tracker1.lifetime_cost_usd == 0.02
+        assert tracker2.lifetime_cost_usd == 0.02
+
+        # Global tracker should remain unchanged
+        assert GLOBAL_COST_TRACKER.lifetime_cost_usd == 0.0