stanfordnlp · TomeHirata · Nov 6, 2025 · Oct 29, 2025 · Oct 29, 2025 · Oct 29, 2025
diff --git a/dspy/clients/lm.py b/dspy/clients/lm.py
@@ -468,7 +468,9 @@ def _convert_chat_request_to_responses_request(request: dict[str, Any]):
             if isinstance(c, str):
                 content_blocks.append({"type": "input_text", "text": c})
             elif isinstance(c, list):
-                content_blocks.extend(c)
+                # Convert each content item from Chat API format to Responses API format
+                for item in c:
+                    content_blocks.append(_convert_content_item_to_responses_format(item))
         request["input"] = [{"role": msg.get("role", "user"), "content": content_blocks}]
 
     # Convert `response_format` to `text.format` for Responses API
@@ -480,6 +482,61 @@ def _convert_chat_request_to_responses_request(request: dict[str, Any]):
     return request
 
 
+def _convert_content_item_to_responses_format(item: dict[str, Any]) -> dict[str, Any]:
+    """
+    Convert a content item from Chat API format to Responses API format.
+
+    For images, converts from:
+        {"type": "image_url", "image_url": {"url": "..."}}
+    To:
+        {"type": "input_image", "source": {"type": "url", "url": "..."}}
+    or:
+        {"type": "input_image", "source": {"type": "base64", "media_type": "...", "data": "..."}}
+
+    For text and other types, passes through as-is (already in correct format).
+    """
+    if item.get("type") == "image_url":
+        image_url = item.get("image_url", {}).get("url", "")
+
+        # Check if it's a base64 data URI
+        if image_url.startswith("data:"):
+            # Extract media type and base64 data
+            # Format: data:image/png;base64,iVBORw0KG...
+            parts = image_url.split(",", 1)
+            if len(parts) == 2:
+                header, data = parts
+                # Extract media type from header (e.g., "data:image/png;base64" -> "image/png")
+                # Handle both "data:image/png;base64" and "data:image/png" formats
+                media_type_parts = header.split(";")[0].replace("data:", "")
+                if media_type_parts:
+                    media_type = media_type_parts
+                else:
+                    # Fallback to a default media type if extraction fails
+                    media_type = "image/png"
+
+                return {
+                    "type": "input_image",
+                    "source": {
+                        "type": "base64",
+                        "media_type": media_type,
+                        "data": data,
+                    }
+                }
+            # If data URI is malformed (doesn't have comma separator), fall through to URL handling
+
+        # Otherwise treat as URL
+        return {
+            "type": "input_image",
+            "source": {
+                "type": "url",
+                "url": image_url,
+            }
+        }
+
+    # For non-image items, return as-is
+    return item
+
+
 def _get_headers(headers: dict[str, Any] | None = None):
     headers = headers or {}
     return {

diff --git a/tests/clients/test_lm.py b/tests/clients/test_lm.py
@@ -343,8 +343,8 @@ def test_reasoning_model_requirements(model_name):
     lm = dspy.LM(
         model=model_name,
     )
-    assert lm.kwargs["temperature"] == None
-    assert lm.kwargs["max_completion_tokens"] == None
+    assert lm.kwargs["temperature"] is None
+    assert lm.kwargs["max_completion_tokens"] is None
 
 
 def test_dump_state():
@@ -633,3 +633,133 @@ def test_api_key_not_saved_in_json():
         assert saved_state["lm"]["model"] == "openai/gpt-4o-mini"
         assert saved_state["lm"]["temperature"] == 1.0
         assert saved_state["lm"]["max_tokens"] == 100
+
+
+def test_responses_api_converts_images_correctly():
+    """Test that image_url format is converted to input_image format for Responses API."""
+    from dspy.clients.lm import _convert_chat_request_to_responses_request
+
+    # Test with base64 image
+    request_with_base64_image = {
+        "model": "openai/gpt-5-mini",
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "What's in this image?"},
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
+                        }
+                    }
+                ]
+            }
+        ]
+    }
+
+    result = _convert_chat_request_to_responses_request(request_with_base64_image)
+
+    assert "input" in result
+    assert len(result["input"]) == 1
+    assert result["input"][0]["role"] == "user"
+
+    content = result["input"][0]["content"]
+    assert len(content) == 2
+
+    # First item should be text (passed through as-is since it's already in correct format)
+    assert content[0]["type"] == "text"
+    assert content[0]["text"] == "What's in this image?"
+
+    # Second item should be converted to input_image format
+    assert content[1]["type"] == "input_image"
+    assert content[1]["source"]["type"] == "base64"
+    assert content[1]["source"]["media_type"] == "image/png"
+    assert content[1]["source"]["data"] == "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
+
+    # Test with URL image
+    request_with_url_image = {
+        "model": "openai/gpt-5-mini",
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "https://example.com/image.jpg"
+                        }
+                    }
+                ]
+            }
+        ]
+    }
+
+    result = _convert_chat_request_to_responses_request(request_with_url_image)
+
+    content = result["input"][0]["content"]
+    assert len(content) == 1
+    assert content[0]["type"] == "input_image"
+    assert content[0]["source"]["type"] == "url"
+    assert content[0]["source"]["url"] == "https://example.com/image.jpg"
+
+
+def test_responses_api_with_image_input():
+    """Test that LM with model_type='responses' handles Image inputs correctly."""
+    api_response = make_response(
+        output_blocks=[
+            ResponseOutputMessage(
+                **{
+                    "id": "msg_1",
+                    "type": "message",
+                    "role": "assistant",
+                    "status": "completed",
+                    "content": [
+                        {"type": "output_text", "text": "This is a test answer with image input.", "annotations": []}
+                    ],
+                },
+            ),
+        ]
+    )
+
+    with mock.patch("litellm.responses", autospec=True, return_value=api_response) as dspy_responses:
+        lm = dspy.LM(
+            model="openai/gpt-5-mini",
+            model_type="responses",
+            cache=False,
+            temperature=1.0,
+            max_tokens=16000,
+        )
+
+        # Test with messages containing an image
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Describe this image"},
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
+                        }
+                    }
+                ]
+            }
+        ]
+
+        lm_result = lm(messages=messages)
+
+        assert lm_result == [{"text": "This is a test answer with image input."}]
+
+        dspy_responses.assert_called_once()
+        call_args = dspy_responses.call_args.kwargs
+
+        # Verify the request was converted correctly
+        assert "input" in call_args
+        content = call_args["input"][0]["content"]
+
+        # Check that image was converted to input_image format
+        image_content = [c for c in content if c.get("type") == "input_image"]
+        assert len(image_content) == 1
+        assert image_content[0]["source"]["type"] == "base64"
+        assert image_content[0]["source"]["media_type"] == "image/png"