-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Fix content input conversion for OpenAI Responses API #8993
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
a1d14a5
bb6ab0f
ac2709b
71bd42a
43c8fc9
6da81b3
0055def
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -468,7 +468,9 @@ def _convert_chat_request_to_responses_request(request: dict[str, Any]): | |
| if isinstance(c, str): | ||
| content_blocks.append({"type": "input_text", "text": c}) | ||
| elif isinstance(c, list): | ||
| content_blocks.extend(c) | ||
| # Convert each content item from Chat API format to Responses API format | ||
| for item in c: | ||
| content_blocks.append(_convert_content_item_to_responses_format(item)) | ||
| request["input"] = [{"role": msg.get("role", "user"), "content": content_blocks}] | ||
|
|
||
| # Convert `response_format` to `text.format` for Responses API | ||
|
|
@@ -480,6 +482,61 @@ def _convert_chat_request_to_responses_request(request: dict[str, Any]): | |
| return request | ||
|
|
||
|
|
||
| def _convert_content_item_to_responses_format(item: dict[str, Any]) -> dict[str, Any]: | ||
| """ | ||
| Convert a content item from Chat API format to Responses API format. | ||
|
|
||
| For images, converts from: | ||
| {"type": "image_url", "image_url": {"url": "..."}} | ||
| To: | ||
| {"type": "input_image", "source": {"type": "url", "url": "..."}} | ||
| or: | ||
| {"type": "input_image", "source": {"type": "base64", "media_type": "...", "data": "..."}} | ||
|
|
||
| For text and other types, passes through as-is (already in correct format). | ||
| """ | ||
| if item.get("type") == "image_url": | ||
| image_url = item.get("image_url", {}).get("url", "") | ||
|
|
||
| # Check if it's a base64 data URI | ||
| if image_url.startswith("data:"): | ||
| # Extract media type and base64 data | ||
| # Format: ... | ||
| parts = image_url.split(",", 1) | ||
| if len(parts) == 2: | ||
| header, data = parts | ||
| # Extract media type from header (e.g., "data:image/png;base64" -> "image/png") | ||
| # Handle both "data:image/png;base64" and "data:image/png" formats | ||
| media_type_parts = header.split(";")[0].replace("data:", "") | ||
| if media_type_parts: | ||
| media_type = media_type_parts | ||
| else: | ||
| # Fallback to a default media type if extraction fails | ||
| media_type = "image/png" | ||
|
|
||
| return { | ||
| "type": "input_image", | ||
| "source": { | ||
| "type": "base64", | ||
| "media_type": media_type, | ||
| "data": data, | ||
| } | ||
| } | ||
| # If data URI is malformed (doesn't have comma separator), fall through to URL handling | ||
|
|
||
| # Otherwise treat as URL | ||
| return { | ||
| "type": "input_image", | ||
| "source": { | ||
| "type": "url", | ||
| "url": image_url, | ||
| } | ||
| } | ||
|
|
||
| # For non-image items, return as-is | ||
|
||
| return item | ||
|
|
||
|
|
||
| def _get_headers(headers: dict[str, Any] | None = None): | ||
| headers = headers or {} | ||
| return { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -343,8 +343,8 @@ def test_reasoning_model_requirements(model_name): | |
| lm = dspy.LM( | ||
| model=model_name, | ||
| ) | ||
| assert lm.kwargs["temperature"] == None | ||
| assert lm.kwargs["max_completion_tokens"] == None | ||
| assert lm.kwargs["temperature"] is None | ||
| assert lm.kwargs["max_completion_tokens"] is None | ||
|
|
||
|
|
||
| def test_dump_state(): | ||
|
|
@@ -633,3 +633,133 @@ def test_api_key_not_saved_in_json(): | |
| assert saved_state["lm"]["model"] == "openai/gpt-4o-mini" | ||
| assert saved_state["lm"]["temperature"] == 1.0 | ||
| assert saved_state["lm"]["max_tokens"] == 100 | ||
|
|
||
|
|
||
| def test_responses_api_converts_images_correctly(): | ||
| """Test that image_url format is converted to input_image format for Responses API.""" | ||
|
||
| from dspy.clients.lm import _convert_chat_request_to_responses_request | ||
|
|
||
| # Test with base64 image | ||
| request_with_base64_image = { | ||
| "model": "openai/gpt-5-mini", | ||
| "messages": [ | ||
| { | ||
| "role": "user", | ||
| "content": [ | ||
| {"type": "text", "text": "What's in this image?"}, | ||
| { | ||
| "type": "image_url", | ||
| "image_url": { | ||
| "url": "" | ||
| } | ||
| } | ||
| ] | ||
| } | ||
| ] | ||
| } | ||
|
|
||
| result = _convert_chat_request_to_responses_request(request_with_base64_image) | ||
|
|
||
| assert "input" in result | ||
| assert len(result["input"]) == 1 | ||
| assert result["input"][0]["role"] == "user" | ||
|
|
||
| content = result["input"][0]["content"] | ||
| assert len(content) == 2 | ||
|
|
||
| # First item should be text (passed through as-is since it's already in correct format) | ||
| assert content[0]["type"] == "text" | ||
| assert content[0]["text"] == "What's in this image?" | ||
|
|
||
| # Second item should be converted to input_image format | ||
| assert content[1]["type"] == "input_image" | ||
| assert content[1]["source"]["type"] == "base64" | ||
| assert content[1]["source"]["media_type"] == "image/png" | ||
| assert content[1]["source"]["data"] == "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==" | ||
|
|
||
| # Test with URL image | ||
| request_with_url_image = { | ||
| "model": "openai/gpt-5-mini", | ||
| "messages": [ | ||
| { | ||
| "role": "user", | ||
| "content": [ | ||
| { | ||
| "type": "image_url", | ||
| "image_url": { | ||
| "url": "https://example.com/image.jpg" | ||
| } | ||
| } | ||
| ] | ||
| } | ||
| ] | ||
| } | ||
|
|
||
| result = _convert_chat_request_to_responses_request(request_with_url_image) | ||
|
|
||
| content = result["input"][0]["content"] | ||
| assert len(content) == 1 | ||
| assert content[0]["type"] == "input_image" | ||
| assert content[0]["source"]["type"] == "url" | ||
| assert content[0]["source"]["url"] == "https://example.com/image.jpg" | ||
|
|
||
|
|
||
| def test_responses_api_with_image_input(): | ||
| """Test that LM with model_type='responses' handles Image inputs correctly.""" | ||
| api_response = make_response( | ||
| output_blocks=[ | ||
| ResponseOutputMessage( | ||
| **{ | ||
| "id": "msg_1", | ||
| "type": "message", | ||
| "role": "assistant", | ||
| "status": "completed", | ||
| "content": [ | ||
| {"type": "output_text", "text": "This is a test answer with image input.", "annotations": []} | ||
| ], | ||
| }, | ||
| ), | ||
| ] | ||
| ) | ||
|
|
||
| with mock.patch("litellm.responses", autospec=True, return_value=api_response) as dspy_responses: | ||
| lm = dspy.LM( | ||
| model="openai/gpt-5-mini", | ||
| model_type="responses", | ||
| cache=False, | ||
| temperature=1.0, | ||
| max_tokens=16000, | ||
| ) | ||
|
|
||
| # Test with messages containing an image | ||
| messages = [ | ||
| { | ||
| "role": "user", | ||
| "content": [ | ||
| {"type": "text", "text": "Describe this image"}, | ||
| { | ||
| "type": "image_url", | ||
| "image_url": { | ||
| "url": "" | ||
| } | ||
| } | ||
| ] | ||
| } | ||
| ] | ||
|
|
||
| lm_result = lm(messages=messages) | ||
|
|
||
| assert lm_result == [{"text": "This is a test answer with image input."}] | ||
|
|
||
| dspy_responses.assert_called_once() | ||
| call_args = dspy_responses.call_args.kwargs | ||
|
|
||
| # Verify the request was converted correctly | ||
| assert "input" in call_args | ||
| content = call_args["input"][0]["content"] | ||
|
|
||
| # Check that image was converted to input_image format | ||
| image_content = [c for c in content if c.get("type") == "input_image"] | ||
| assert len(image_content) == 1 | ||
| assert image_content[0]["source"]["type"] == "base64" | ||
| assert image_content[0]["source"]["media_type"] == "image/png" | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@copilot Does this match the documented interface?

https://platform.openai.com/docs/api-reference/responses/create?lang=curl
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're absolutely right - I was using the wrong format. Fixed in 6da81b3 to match the OpenAI documentation. The Responses API expects
{"type": "input_image", "image_url": "..."}withimage_urlas a direct property, not a nestedsourceobject.