Skip to content

Commit 605611c

Browse files
authored
feat: Support Anthropic extended thinking and interleaved thinking (openai#1744)
1 parent 456d284 commit 605611c

File tree

4 files changed

+219
-18
lines changed

4 files changed

+219
-18
lines changed

src/agents/extensions/models/litellm_model.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,15 @@ async def _fetch_response(
257257
stream: bool = False,
258258
prompt: Any | None = None,
259259
) -> litellm.types.utils.ModelResponse | tuple[Response, AsyncStream[ChatCompletionChunk]]:
260-
converted_messages = Converter.items_to_messages(input)
260+
# Preserve reasoning messages for tool calls when reasoning is on
261+
# This is needed for models like Claude 4 Sonnet/Opus which support interleaved thinking
262+
preserve_thinking_blocks = (
263+
model_settings.reasoning is not None and model_settings.reasoning.effort is not None
264+
)
265+
266+
converted_messages = Converter.items_to_messages(
267+
input, preserve_thinking_blocks=preserve_thinking_blocks
268+
)
261269

262270
if system_instructions:
263271
converted_messages.insert(

src/agents/models/chatcmpl_converter.py

Lines changed: 65 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
ResponseReasoningItemParam,
4040
)
4141
from openai.types.responses.response_input_param import FunctionCallOutput, ItemReference, Message
42-
from openai.types.responses.response_reasoning_item import Summary
42+
from openai.types.responses.response_reasoning_item import Content, Summary
4343

4444
from ..agent_output import AgentOutputSchemaBase
4545
from ..exceptions import AgentsException, UserError
@@ -93,24 +93,38 @@ def convert_response_format(
9393
def message_to_output_items(cls, message: ChatCompletionMessage) -> list[TResponseOutputItem]:
9494
items: list[TResponseOutputItem] = []
9595

96-
# Handle reasoning content if available
96+
# Check if message is agents.extentions.models.litellm_model.InternalChatCompletionMessage
97+
# We can't actually import it here because litellm is an optional dependency
98+
# So we use hasattr to check for reasoning_content and thinking_blocks
9799
if hasattr(message, "reasoning_content") and message.reasoning_content:
98100
reasoning_item = ResponseReasoningItem(
99101
id=FAKE_RESPONSES_ID,
100102
summary=[Summary(text=message.reasoning_content, type="summary_text")],
101103
type="reasoning",
102104
)
103105

104-
# Store full thinking blocks for Anthropic compatibility
106+
# Store thinking blocks for Anthropic compatibility
105107
if hasattr(message, "thinking_blocks") and message.thinking_blocks:
106-
# Store thinking blocks in the reasoning item's content
107-
# Convert thinking blocks to Content objects
108-
from openai.types.responses.response_reasoning_item import Content
109-
110-
reasoning_item.content = [
111-
Content(text=str(block.get("thinking", "")), type="reasoning_text")
112-
for block in message.thinking_blocks
113-
]
108+
# Store thinking text in content and signature in encrypted_content
109+
reasoning_item.content = []
110+
signature = None
111+
for block in message.thinking_blocks:
112+
if isinstance(block, dict):
113+
thinking_text = block.get("thinking", "")
114+
if thinking_text:
115+
reasoning_item.content.append(
116+
Content(text=thinking_text, type="reasoning_text")
117+
)
118+
# Store the signature if present
119+
if block.get("signature"):
120+
signature = block.get("signature")
121+
122+
# Store only the last signature in encrypted_content
123+
# If there are multiple thinking blocks, this should be a problem.
124+
# In practice, there should only be one signature for the entire reasoning step.
125+
# Tested with: claude-sonnet-4-20250514
126+
if signature:
127+
reasoning_item.encrypted_content = signature
114128

115129
items.append(reasoning_item)
116130

@@ -301,10 +315,18 @@ def extract_all_content(
301315
def items_to_messages(
302316
cls,
303317
items: str | Iterable[TResponseInputItem],
318+
preserve_thinking_blocks: bool = False,
304319
) -> list[ChatCompletionMessageParam]:
305320
"""
306321
Convert a sequence of 'Item' objects into a list of ChatCompletionMessageParam.
307322
323+
Args:
324+
items: A string or iterable of response input items to convert
325+
preserve_thinking_blocks: Whether to preserve thinking blocks in tool calls
326+
for reasoning models like Claude 4 Sonnet/Opus which support interleaved
327+
thinking. When True, thinking blocks are reconstructed and included in
328+
assistant messages with tool calls.
329+
308330
Rules:
309331
- EasyInputMessage or InputMessage (role=user) => ChatCompletionUserMessageParam
310332
- EasyInputMessage or InputMessage (role=system) => ChatCompletionSystemMessageParam
@@ -325,6 +347,7 @@ def items_to_messages(
325347

326348
result: list[ChatCompletionMessageParam] = []
327349
current_assistant_msg: ChatCompletionAssistantMessageParam | None = None
350+
pending_thinking_blocks: list[dict[str, str]] | None = None
328351

329352
def flush_assistant_message() -> None:
330353
nonlocal current_assistant_msg
@@ -336,10 +359,11 @@ def flush_assistant_message() -> None:
336359
current_assistant_msg = None
337360

338361
def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
339-
nonlocal current_assistant_msg
362+
nonlocal current_assistant_msg, pending_thinking_blocks
340363
if current_assistant_msg is None:
341364
current_assistant_msg = ChatCompletionAssistantMessageParam(role="assistant")
342365
current_assistant_msg["tool_calls"] = []
366+
343367
return current_assistant_msg
344368

345369
for item in items:
@@ -455,6 +479,13 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
455479

456480
elif func_call := cls.maybe_function_tool_call(item):
457481
asst = ensure_assistant_message()
482+
483+
# If we have pending thinking blocks, use them as the content
484+
# This is required for Anthropic API tool calls with interleaved thinking
485+
if pending_thinking_blocks:
486+
asst["content"] = pending_thinking_blocks # type: ignore
487+
pending_thinking_blocks = None # Clear after using
488+
458489
tool_calls = list(asst.get("tool_calls", []))
459490
arguments = func_call["arguments"] if func_call["arguments"] else "{}"
460491
new_tool_call = ChatCompletionMessageFunctionToolCallParam(
@@ -483,9 +514,28 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
483514
f"Encountered an item_reference, which is not supported: {item_ref}"
484515
)
485516

486-
# 7) reasoning message => not handled
487-
elif cls.maybe_reasoning_message(item):
488-
pass
517+
# 7) reasoning message => extract thinking blocks if present
518+
elif reasoning_item := cls.maybe_reasoning_message(item):
519+
# Reconstruct thinking blocks from content (text) and encrypted_content (signature)
520+
content_items = reasoning_item.get("content", [])
521+
signature = reasoning_item.get("encrypted_content")
522+
523+
if content_items and preserve_thinking_blocks:
524+
# Reconstruct thinking blocks from content and signature
525+
pending_thinking_blocks = []
526+
for content_item in content_items:
527+
if (
528+
isinstance(content_item, dict)
529+
and content_item.get("type") == "reasoning_text"
530+
):
531+
thinking_block = {
532+
"type": "thinking",
533+
"thinking": content_item.get("text", ""),
534+
}
535+
# Add signature if available
536+
if signature:
537+
thinking_block["signature"] = signature
538+
pending_thinking_blocks.append(thinking_block)
489539

490540
# 8) If we haven't recognized it => fail or ignore
491541
else:

src/agents/models/chatcmpl_stream_handler.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ class StreamingState:
6262
# Fields for real-time function call streaming
6363
function_call_streaming: dict[int, bool] = field(default_factory=dict)
6464
function_call_output_idx: dict[int, int] = field(default_factory=dict)
65+
# Store accumulated thinking text and signature for Anthropic compatibility
66+
thinking_text: str = ""
67+
thinking_signature: str | None = None
6568

6669

6770
class SequenceNumber:
@@ -101,6 +104,19 @@ async def handle_stream(
101104

102105
delta = chunk.choices[0].delta
103106

107+
# Handle thinking blocks from Anthropic (for preserving signatures)
108+
if hasattr(delta, "thinking_blocks") and delta.thinking_blocks:
109+
for block in delta.thinking_blocks:
110+
if isinstance(block, dict):
111+
# Accumulate thinking text
112+
thinking_text = block.get("thinking", "")
113+
if thinking_text:
114+
state.thinking_text += thinking_text
115+
# Store signature if present
116+
signature = block.get("signature")
117+
if signature:
118+
state.thinking_signature = signature
119+
104120
# Handle reasoning content for reasoning summaries
105121
if hasattr(delta, "reasoning_content"):
106122
reasoning_content = delta.reasoning_content
@@ -527,7 +543,19 @@ async def handle_stream(
527543

528544
# include Reasoning item if it exists
529545
if state.reasoning_content_index_and_output:
530-
outputs.append(state.reasoning_content_index_and_output[1])
546+
reasoning_item = state.reasoning_content_index_and_output[1]
547+
# Store thinking text in content and signature in encrypted_content
548+
if state.thinking_text:
549+
# Add thinking text as a Content object
550+
if not reasoning_item.content:
551+
reasoning_item.content = []
552+
reasoning_item.content.append(
553+
Content(text=state.thinking_text, type="reasoning_text")
554+
)
555+
# Store signature in encrypted_content
556+
if state.thinking_signature:
557+
reasoning_item.encrypted_content = state.thinking_signature
558+
outputs.append(reasoning_item)
531559

532560
# include text or refusal content if they exist
533561
if state.text_content_index_and_output or state.refusal_content_index_and_output:

tests/test_anthropic_thinking_blocks.py

Lines changed: 116 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@
1010

1111
from __future__ import annotations
1212

13-
from typing import Any
13+
from typing import Any, cast
14+
15+
from openai.types.chat import ChatCompletionMessageToolCall
16+
from openai.types.chat.chat_completion_message_tool_call import Function
1417

1518
from agents.extensions.models.litellm_model import InternalChatCompletionMessage
1619
from agents.models.chatcmpl_converter import Converter
@@ -99,3 +102,115 @@ def test_reasoning_items_preserved_in_message_conversion():
99102
thinking_block = reasoning_item.content[0]
100103
assert thinking_block.type == "reasoning_text"
101104
assert thinking_block.text == "I need to call the weather function for Paris"
105+
106+
107+
def test_anthropic_thinking_blocks_with_tool_calls():
108+
"""
109+
Test for models with extended thinking and interleaved thinking with tool calls.
110+
111+
This test verifies the Anthropic's API's requirements for thinking blocks
112+
to be the first content in assistant messages when reasoning is enabled and tool
113+
calls are present.
114+
"""
115+
# Create a message with reasoning, thinking blocks and tool calls
116+
message = InternalChatCompletionMessage(
117+
role="assistant",
118+
content="I'll check the weather for you.",
119+
reasoning_content="The user wants weather information, I need to call the weather function",
120+
thinking_blocks=[
121+
{
122+
"type": "thinking",
123+
"thinking": (
124+
"The user is asking about weather. "
125+
"Let me use the weather tool to get this information."
126+
),
127+
"signature": "TestSignature123",
128+
}
129+
],
130+
tool_calls=[
131+
ChatCompletionMessageToolCall(
132+
id="call_123",
133+
type="function",
134+
function=Function(name="get_weather", arguments='{"city": "Tokyo"}'),
135+
)
136+
],
137+
)
138+
139+
# Step 1: Convert message to output items
140+
output_items = Converter.message_to_output_items(message)
141+
142+
# Verify reasoning item exists and contains thinking blocks
143+
reasoning_items = [
144+
item for item in output_items if hasattr(item, "type") and item.type == "reasoning"
145+
]
146+
assert len(reasoning_items) == 1, "Should have exactly one reasoning item"
147+
148+
reasoning_item = reasoning_items[0]
149+
150+
# Verify thinking text is stored in content
151+
assert hasattr(reasoning_item, "content") and reasoning_item.content, (
152+
"Reasoning item should have content"
153+
)
154+
assert reasoning_item.content[0].type == "reasoning_text", (
155+
"Content should be reasoning_text type"
156+
)
157+
158+
# Verify signature is stored in encrypted_content
159+
assert hasattr(reasoning_item, "encrypted_content"), (
160+
"Reasoning item should have encrypted_content"
161+
)
162+
assert reasoning_item.encrypted_content == "TestSignature123", "Signature should be preserved"
163+
164+
# Verify tool calls are present
165+
tool_call_items = [
166+
item for item in output_items if hasattr(item, "type") and item.type == "function_call"
167+
]
168+
assert len(tool_call_items) == 1, "Should have exactly one tool call"
169+
170+
# Step 2: Convert output items back to messages
171+
# Convert items to dicts for the converter (simulating serialization/deserialization)
172+
items_as_dicts: list[dict[str, Any]] = []
173+
for item in output_items:
174+
if hasattr(item, "model_dump"):
175+
items_as_dicts.append(item.model_dump())
176+
else:
177+
items_as_dicts.append(cast(dict[str, Any], item))
178+
179+
messages = Converter.items_to_messages(items_as_dicts, preserve_thinking_blocks=True) # type: ignore[arg-type]
180+
181+
# Find the assistant message with tool calls
182+
assistant_messages = [
183+
msg for msg in messages if msg.get("role") == "assistant" and msg.get("tool_calls")
184+
]
185+
assert len(assistant_messages) == 1, "Should have exactly one assistant message with tool calls"
186+
187+
assistant_msg = assistant_messages[0]
188+
189+
# Content must start with thinking blocks, not text
190+
content = assistant_msg.get("content")
191+
assert content is not None, "Assistant message should have content"
192+
193+
assert isinstance(content, list) and len(content) > 0, (
194+
"Assistant message content should be a non-empty list"
195+
)
196+
197+
first_content = content[0]
198+
assert first_content.get("type") == "thinking", (
199+
f"First content must be 'thinking' type for Anthropic compatibility, "
200+
f"but got '{first_content.get('type')}'"
201+
)
202+
expected_thinking = (
203+
"The user is asking about weather. Let me use the weather tool to get this information."
204+
)
205+
assert first_content.get("thinking") == expected_thinking, (
206+
"Thinking content should be preserved"
207+
)
208+
# Signature should also be preserved
209+
assert first_content.get("signature") == "TestSignature123", (
210+
"Signature should be preserved in thinking block"
211+
)
212+
213+
# Verify tool calls are preserved
214+
tool_calls = assistant_msg.get("tool_calls", [])
215+
assert len(cast(list[Any], tool_calls)) == 1, "Tool calls should be preserved"
216+
assert cast(list[Any], tool_calls)[0]["function"]["name"] == "get_weather"

0 commit comments

Comments
 (0)