openai · dedrisian-oai · Aug 28, 2025 · Aug 26, 2025 · Aug 27, 2025
diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
@@ -516,6 +516,7 @@ impl Session {
                 include_apply_patch_tool: config.include_apply_patch_tool,
                 include_web_search_request: config.tools_web_search_request,
                 use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
+                include_view_image_tool: config.include_view_image_tool,
             }),
             user_instructions,
             base_instructions,
@@ -1103,6 +1104,7 @@ async fn submission_loop(
                     include_apply_patch_tool: config.include_apply_patch_tool,
                     include_web_search_request: config.tools_web_search_request,
                     use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
+                    include_view_image_tool: config.include_view_image_tool,
                 });
 
                 let new_turn_context = TurnContext {
@@ -1185,6 +1187,7 @@ async fn submission_loop(
                             include_web_search_request: config.tools_web_search_request,
                             use_streamable_shell_tool: config
                                 .use_experimental_streamable_shell_tool,
+                            include_view_image_tool: config.include_view_image_tool,
                         }),
                         user_instructions: turn_context.user_instructions.clone(),
                         base_instructions: turn_context.base_instructions.clone(),
@@ -2069,6 +2072,36 @@ async fn handle_function_call(
             )
             .await
         }
+        "view_image" => {
+            #[derive(serde::Deserialize)]
+            struct SeeImageArgs {
+                path: String,
+            }
+            let args = match serde_json::from_str::<SeeImageArgs>(&arguments) {
+                Ok(a) => a,
+                Err(e) => {
+                    return ResponseInputItem::FunctionCallOutput {
+                        call_id,
+                        output: FunctionCallOutputPayload {
+                            content: format!("failed to parse function arguments: {e}"),
+                            success: Some(false),
+                        },
+                    };
+                }
+            };
+            let abs = turn_context.resolve_path(Some(args.path));
+            let output = match sess.inject_input(vec![InputItem::LocalImage { path: abs }]) {
+                Ok(()) => FunctionCallOutputPayload {
+                    content: "attached local image path".to_string(),
+                    success: Some(true),
+                },
+                Err(_) => FunctionCallOutputPayload {
+                    content: "unable to attach image (no active task)".to_string(),
+                    success: Some(false),
+                },
+            };
+            ResponseInputItem::FunctionCallOutput { call_id, output }
+        }
         "apply_patch" => {
             let args = match serde_json::from_str::<ApplyPatchToolArgs>(&arguments) {
                 Ok(a) => a,

diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs
@@ -178,6 +178,9 @@ pub struct Config {
     pub preferred_auth_method: AuthMode,
 
     pub use_experimental_streamable_shell_tool: bool,
+
+    /// Include the `view_image` tool that lets the agent attach a local image path to context.
+    pub include_view_image_tool: bool,
 }
 
 impl Config {
@@ -497,6 +500,10 @@ pub struct ToolsToml {
     // Renamed from `web_search_request`; keep alias for backwards compatibility.
     #[serde(default, alias = "web_search_request")]
     pub web_search: Option<bool>,
+
+    /// Enable the `view_image` tool that lets the agent attach local images.
+    #[serde(default)]
+    pub view_image: Option<bool>,
 }
 
 impl ConfigToml {
@@ -586,6 +593,7 @@ pub struct ConfigOverrides {
     pub base_instructions: Option<String>,
     pub include_plan_tool: Option<bool>,
     pub include_apply_patch_tool: Option<bool>,
+    pub include_view_image_tool: Option<bool>,
     pub disable_response_storage: Option<bool>,
     pub show_raw_agent_reasoning: Option<bool>,
     pub tools_web_search_request: Option<bool>,
@@ -613,6 +621,7 @@ impl Config {
             base_instructions,
             include_plan_tool,
             include_apply_patch_tool,
+            include_view_image_tool,
             disable_response_storage,
             show_raw_agent_reasoning,
             tools_web_search_request: override_tools_web_search_request,
@@ -681,6 +690,10 @@ impl Config {
             .or(cfg.tools.as_ref().and_then(|t| t.web_search))
             .unwrap_or(false);
 
+        let include_view_image_tool = include_view_image_tool
+            .or(cfg.tools.as_ref().and_then(|t| t.view_image))
+            .unwrap_or(true);
+
         let model = model
             .or(config_profile.model)
             .or(cfg.model)
@@ -784,6 +797,7 @@ impl Config {
             use_experimental_streamable_shell_tool: cfg
                 .experimental_use_exec_command_tool
                 .unwrap_or(false),
+            include_view_image_tool,
         };
         Ok(config)
     }
@@ -1152,6 +1166,7 @@ disable_response_storage = true
                 responses_originator_header: "codex_cli_rs".to_string(),
                 preferred_auth_method: AuthMode::ChatGPT,
                 use_experimental_streamable_shell_tool: false,
+                include_view_image_tool: true,
             },
             o3_profile_config
         );
@@ -1208,6 +1223,7 @@ disable_response_storage = true
             responses_originator_header: "codex_cli_rs".to_string(),
             preferred_auth_method: AuthMode::ChatGPT,
             use_experimental_streamable_shell_tool: false,
+            include_view_image_tool: true,
         };
 
         assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
@@ -1279,6 +1295,7 @@ disable_response_storage = true
             responses_originator_header: "codex_cli_rs".to_string(),
             preferred_auth_method: AuthMode::ChatGPT,
             use_experimental_streamable_shell_tool: false,
+            include_view_image_tool: true,
         };
 
         assert_eq!(expected_zdr_profile_config, zdr_profile_config);

diff --git a/codex-rs/core/src/openai_tools.rs b/codex-rs/core/src/openai_tools.rs
@@ -67,6 +67,7 @@ pub(crate) struct ToolsConfig {
     pub plan_tool: bool,
     pub apply_patch_tool_type: Option<ApplyPatchToolType>,
     pub web_search_request: bool,
+    pub include_view_image_tool: bool,
 }
 
 pub(crate) struct ToolsConfigParams<'a> {
@@ -77,6 +78,7 @@ pub(crate) struct ToolsConfigParams<'a> {
     pub(crate) include_apply_patch_tool: bool,
     pub(crate) include_web_search_request: bool,
     pub(crate) use_streamable_shell_tool: bool,
+    pub(crate) include_view_image_tool: bool,
 }
 
 impl ToolsConfig {
@@ -89,6 +91,7 @@ impl ToolsConfig {
             include_apply_patch_tool,
             include_web_search_request,
             use_streamable_shell_tool,
+            include_view_image_tool,
         } = params;
         let mut shell_type = if *use_streamable_shell_tool {
             ConfigShellToolType::StreamableShell
@@ -120,6 +123,7 @@ impl ToolsConfig {
             plan_tool: *include_plan_tool,
             apply_patch_tool_type,
             web_search_request: *include_web_search_request,
+            include_view_image_tool: *include_view_image_tool,
         }
     }
 }
@@ -292,6 +296,30 @@ The shell tool is used to execute shell commands.
         },
     })
 }
+
+fn create_view_image_tool() -> OpenAiTool {
+    // Support only local filesystem path.
+    let mut properties = BTreeMap::new();
+    properties.insert(
+        "path".to_string(),
+        JsonSchema::String {
+            description: Some("Local filesystem path to an image file".to_string()),
+        },
+    );
+
+    OpenAiTool::Function(ResponsesApiTool {
+        name: "view_image".to_string(),
+        description:
+            "Attach a local image (by filesystem path) to the conversation context for this turn."
+                .to_string(),
+        strict: false,
+        parameters: JsonSchema::Object {
+            properties,
+            required: Some(vec!["path".to_string()]),
+            additional_properties: Some(false),
+        },
+    })
+}
 /// TODO(dylan): deprecate once we get rid of json tool
 #[derive(Serialize, Deserialize)]
 pub(crate) struct ApplyPatchToolArgs {
@@ -541,6 +569,11 @@ pub(crate) fn get_openai_tools(
         tools.push(OpenAiTool::WebSearch {});
     }
 
+    // Include the view_image tool so the agent can attach images to context.
+    if config.include_view_image_tool {
+        tools.push(create_view_image_tool());
+    }
+
     if let Some(mcp_tools) = mcp_tools {
         // Ensure deterministic ordering to maximize prompt cache hits.
         // HashMap iteration order is non-deterministic, so sort by fully-qualified tool name.
@@ -604,10 +637,14 @@ mod tests {
             include_apply_patch_tool: false,
             include_web_search_request: true,
             use_streamable_shell_tool: false,
+            include_view_image_tool: true,
         });
         let tools = get_openai_tools(&config, Some(HashMap::new()));
 
-        assert_eq_tool_names(&tools, &["local_shell", "update_plan", "web_search"]);
+        assert_eq_tool_names(
+            &tools,
+            &["local_shell", "update_plan", "web_search", "view_image"],
+        );
     }
 
     #[test]
@@ -621,10 +658,14 @@ mod tests {
             include_apply_patch_tool: false,
             include_web_search_request: true,
             use_streamable_shell_tool: false,
+            include_view_image_tool: true,
         });
         let tools = get_openai_tools(&config, Some(HashMap::new()));
 
-        assert_eq_tool_names(&tools, &["shell", "update_plan", "web_search"]);
+        assert_eq_tool_names(
+            &tools,
+            &["shell", "update_plan", "web_search", "view_image"],
+        );
     }
 
     #[test]
@@ -638,6 +679,7 @@ mod tests {
             include_apply_patch_tool: false,
             include_web_search_request: true,
             use_streamable_shell_tool: false,
+            include_view_image_tool: true,
         });
         let tools = get_openai_tools(
             &config,
@@ -679,11 +721,16 @@ mod tests {
 
         assert_eq_tool_names(
             &tools,
-            &["shell", "web_search", "test_server/do_something_cool"],
+            &[
+                "shell",
+                "web_search",
+                "view_image",
+                "test_server/do_something_cool",
+            ],
         );
 
         assert_eq!(
-            tools[2],
+            tools[3],
             OpenAiTool::Function(ResponsesApiTool {
                 name: "test_server/do_something_cool".to_string(),
                 parameters: JsonSchema::Object {
@@ -737,6 +784,7 @@ mod tests {
             include_apply_patch_tool: false,
             include_web_search_request: false,
             use_streamable_shell_tool: false,
+            include_view_image_tool: true,
         });
 
         // Intentionally construct a map with keys that would sort alphabetically.
@@ -794,6 +842,7 @@ mod tests {
             &tools,
             &[
                 "shell",
+                "view_image",
                 "test_server/cool",
                 "test_server/do",
                 "test_server/something",
@@ -812,6 +861,7 @@ mod tests {
             include_apply_patch_tool: false,
             include_web_search_request: true,
             use_streamable_shell_tool: false,
+            include_view_image_tool: true,
         });
 
         let tools = get_openai_tools(
@@ -837,10 +887,13 @@ mod tests {
             )])),
         );
 
-        assert_eq_tool_names(&tools, &["shell", "web_search", "dash/search"]);
+        assert_eq_tool_names(
+            &tools,
+            &["shell", "web_search", "view_image", "dash/search"],
+        );
 
         assert_eq!(
-            tools[2],
+            tools[3],
             OpenAiTool::Function(ResponsesApiTool {
                 name: "dash/search".to_string(),
                 parameters: JsonSchema::Object {
@@ -870,6 +923,7 @@ mod tests {
             include_apply_patch_tool: false,
             include_web_search_request: true,
             use_streamable_shell_tool: false,
+            include_view_image_tool: true,
         });
 
         let tools = get_openai_tools(
@@ -893,9 +947,12 @@ mod tests {
             )])),
         );
 
-        assert_eq_tool_names(&tools, &["shell", "web_search", "dash/paginate"]);
+        assert_eq_tool_names(
+            &tools,
+            &["shell", "web_search", "view_image", "dash/paginate"],
+        );
         assert_eq!(
-            tools[2],
+            tools[3],
             OpenAiTool::Function(ResponsesApiTool {
                 name: "dash/paginate".to_string(),
                 parameters: JsonSchema::Object {
@@ -923,6 +980,7 @@ mod tests {
             include_apply_patch_tool: false,
             include_web_search_request: true,
             use_streamable_shell_tool: false,
+            include_view_image_tool: true,
         });
 
         let tools = get_openai_tools(
@@ -946,9 +1004,9 @@ mod tests {
             )])),
         );
 
-        assert_eq_tool_names(&tools, &["shell", "web_search", "dash/tags"]);
+        assert_eq_tool_names(&tools, &["shell", "web_search", "view_image", "dash/tags"]);
         assert_eq!(
-            tools[2],
+            tools[3],
             OpenAiTool::Function(ResponsesApiTool {
                 name: "dash/tags".to_string(),
                 parameters: JsonSchema::Object {
@@ -979,6 +1037,7 @@ mod tests {
             include_apply_patch_tool: false,
             include_web_search_request: true,
             use_streamable_shell_tool: false,
+            include_view_image_tool: true,
         });
 
         let tools = get_openai_tools(
@@ -1002,9 +1061,9 @@ mod tests {
             )])),
         );
 
-        assert_eq_tool_names(&tools, &["shell", "web_search", "dash/value"]);
+        assert_eq_tool_names(&tools, &["shell", "web_search", "view_image", "dash/value"]);
         assert_eq!(
-            tools[2],
+            tools[3],
             OpenAiTool::Function(ResponsesApiTool {
                 name: "dash/value".to_string(),
                 parameters: JsonSchema::Object {

diff --git a/codex-rs/core/tests/suite/prompt_caching.rs b/codex-rs/core/tests/suite/prompt_caching.rs
@@ -191,7 +191,7 @@ async fn prompt_tools_are_consistent_across_requests() {
     let expected_instructions: &str = include_str!("../../prompt.md");
     // our internal implementation is responsible for keeping tools in sync
     // with the OpenAI schema, so we just verify the tool presence here
-    let expected_tools_names: &[&str] = &["shell", "update_plan", "apply_patch"];
+    let expected_tools_names: &[&str] = &["shell", "update_plan", "apply_patch", "view_image"];
     let body0 = requests[0].body_json::<serde_json::Value>().unwrap();
     assert_eq!(
         body0["instructions"],

diff --git a/codex-rs/exec/src/lib.rs b/codex-rs/exec/src/lib.rs
@@ -148,6 +148,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
         base_instructions: None,
         include_plan_tool: None,
         include_apply_patch_tool: None,
+        include_view_image_tool: None,
         disable_response_storage: oss.then_some(true),
         show_raw_agent_reasoning: oss.then_some(true),
         tools_web_search_request: None,