From 18c8aba190a131a03cf1b911ddde3fe9299e305d Mon Sep 17 00:00:00 2001 From: Chesars Date: Wed, 10 Dec 2025 21:52:23 -0300 Subject: [PATCH] fix(completion): transform image content in tool results for Responses API When using litellm.completion() with model="openai/responses/...", images in tool message content were not being transformed from Chat Completion format to Responses API format. Chat Completion format: {"type": "image_url", "image_url": {"url": "..."}} Responses API format: {"type": "input_image", "image_url": "..."} This caused OpenAI to reject the request with error 400 since "image_url" is not a valid type for function_call_output content. --- .../transformation.py | 9 +- ...responses_transformation_transformation.py | 84 +++++++++++++++++++ 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/litellm/completion_extras/litellm_responses_transformation/transformation.py b/litellm/completion_extras/litellm_responses_transformation/transformation.py index 37170c6010dd..24a66547aaa4 100644 --- a/litellm/completion_extras/litellm_responses_transformation/transformation.py +++ b/litellm/completion_extras/litellm_responses_transformation/transformation.py @@ -165,11 +165,18 @@ def convert_chat_completion_messages_to_responses_api( ) elif role == "tool": # Convert tool message to function call output format + # Transform content if it's multimodal (list with images, etc.) + if isinstance(content, list): + transformed_output = self._convert_content_to_responses_format( + content, "tool" + ) + else: + transformed_output = content input_items.append( { "type": "function_call_output", "call_id": tool_call_id, - "output": content, + "output": transformed_output, } ) elif role == "assistant" and tool_calls and isinstance(tool_calls, list): diff --git a/tests/test_litellm/completion_extras/litellm_responses_transformation/test_completion_extras_litellm_responses_transformation_transformation.py b/tests/test_litellm/completion_extras/litellm_responses_transformation/test_completion_extras_litellm_responses_transformation_transformation.py index b6869525e6d7..421fe635c846 100644 --- a/tests/test_litellm/completion_extras/litellm_responses_transformation/test_completion_extras_litellm_responses_transformation_transformation.py +++ b/tests/test_litellm/completion_extras/litellm_responses_transformation/test_completion_extras_litellm_responses_transformation_transformation.py @@ -52,6 +52,90 @@ def test_convert_chat_completion_messages_to_responses_api_image_input(): assert response[0]["content"][1]["image_url"] == user_image +def test_convert_chat_completion_messages_to_responses_api_tool_result_with_image(): + """ + Test that tool messages with image content are correctly transformed to Responses API format. + + This is a regression test for issue #17762 where images in tool results were not + correctly transformed from Chat Completion format (image_url with nested object) + to Responses API format (input_image with flat string). + + Chat Completion format: + {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}} + + Responses API format: + {"type": "input_image", "image_url": "data:image/png;base64,..."} + """ + from litellm.completion_extras.litellm_responses_transformation.transformation import ( + LiteLLMResponsesTransformationHandler, + ) + + handler = LiteLLMResponsesTransformationHandler() + + test_image_base64 = "" + + # Chat Completion format with image in tool result + messages = [ + { + "role": "user", + "content": "Fetch the image from this URL", + }, + { + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": "call_abc123", + "type": "function", + "function": { + "name": "fetch_image", + "arguments": '{"url": "https://example.com/image.png"}', + }, + } + ], + }, + { + "role": "tool", + "tool_call_id": "call_abc123", + "content": [ + { + "type": "image_url", + "image_url": {"url": test_image_base64}, + } + ], + }, + { + "role": "user", + "content": "What color is the image?", + }, + ] + + response, _ = handler.convert_chat_completion_messages_to_responses_api(messages) + + # Find the function_call_output item + function_call_output = None + for item in response: + if item.get("type") == "function_call_output": + function_call_output = item + break + + assert function_call_output is not None, "function_call_output not found in response" + assert function_call_output["call_id"] == "call_abc123" + + # Check that the output is correctly transformed + output = function_call_output["output"] + assert isinstance(output, list), "output should be a list" + assert len(output) == 1, "output should have one item" + + image_item = output[0] + # Should be transformed to Responses API format + assert image_item["type"] == "input_image", f"Expected type 'input_image', got '{image_item.get('type')}'" + assert image_item["image_url"] == test_image_base64, "image_url should be a flat string, not a nested object" + assert "detail" in image_item, "detail field should be present" + + print("✓ Tool result with image correctly transformed to Responses API format") + + def test_openai_responses_chunk_parser_reasoning_summary(): from litellm.completion_extras.litellm_responses_transformation.transformation import ( OpenAiResponsesToChatCompletionStreamIterator,