Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions codex-rs/core/src/codex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,7 @@ impl Session {
include_apply_patch_tool: config.include_apply_patch_tool,
include_web_search_request: config.tools_web_search_request,
use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
include_view_image_tool: config.include_view_image_tool,
}),
user_instructions,
base_instructions,
Expand Down Expand Up @@ -1103,6 +1104,7 @@ async fn submission_loop(
include_apply_patch_tool: config.include_apply_patch_tool,
include_web_search_request: config.tools_web_search_request,
use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
include_view_image_tool: config.include_view_image_tool,
});

let new_turn_context = TurnContext {
Expand Down Expand Up @@ -1185,6 +1187,7 @@ async fn submission_loop(
include_web_search_request: config.tools_web_search_request,
use_streamable_shell_tool: config
.use_experimental_streamable_shell_tool,
include_view_image_tool: config.include_view_image_tool,
}),
user_instructions: turn_context.user_instructions.clone(),
base_instructions: turn_context.base_instructions.clone(),
Expand Down Expand Up @@ -2069,6 +2072,36 @@ async fn handle_function_call(
)
.await
}
"view_image" => {
#[derive(serde::Deserialize)]
struct SeeImageArgs {
path: String,
}
let args = match serde_json::from_str::<SeeImageArgs>(&arguments) {
Ok(a) => a,
Err(e) => {
return ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: format!("failed to parse function arguments: {e}"),
success: Some(false),
},
};
}
};
let abs = turn_context.resolve_path(Some(args.path));
let output = match sess.inject_input(vec![InputItem::LocalImage { path: abs }]) {
Ok(()) => FunctionCallOutputPayload {
content: "attached local image path".to_string(),
success: Some(true),
},
Err(_) => FunctionCallOutputPayload {
content: "unable to attach image (no active task)".to_string(),
success: Some(false),
},
};
ResponseInputItem::FunctionCallOutput { call_id, output }
}
"apply_patch" => {
let args = match serde_json::from_str::<ApplyPatchToolArgs>(&arguments) {
Ok(a) => a,
Expand Down
17 changes: 17 additions & 0 deletions codex-rs/core/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ pub struct Config {
pub preferred_auth_method: AuthMode,

pub use_experimental_streamable_shell_tool: bool,

/// Include the `view_image` tool that lets the agent attach a local image path to context.
pub include_view_image_tool: bool,
}

impl Config {
Expand Down Expand Up @@ -497,6 +500,10 @@ pub struct ToolsToml {
// Renamed from `web_search_request`; keep alias for backwards compatibility.
#[serde(default, alias = "web_search_request")]
pub web_search: Option<bool>,

/// Enable the `view_image` tool that lets the agent attach local images.
#[serde(default)]
pub view_image: Option<bool>,
}

impl ConfigToml {
Expand Down Expand Up @@ -586,6 +593,7 @@ pub struct ConfigOverrides {
pub base_instructions: Option<String>,
pub include_plan_tool: Option<bool>,
pub include_apply_patch_tool: Option<bool>,
pub include_view_image_tool: Option<bool>,
pub disable_response_storage: Option<bool>,
pub show_raw_agent_reasoning: Option<bool>,
pub tools_web_search_request: Option<bool>,
Expand Down Expand Up @@ -613,6 +621,7 @@ impl Config {
base_instructions,
include_plan_tool,
include_apply_patch_tool,
include_view_image_tool,
disable_response_storage,
show_raw_agent_reasoning,
tools_web_search_request: override_tools_web_search_request,
Expand Down Expand Up @@ -681,6 +690,10 @@ impl Config {
.or(cfg.tools.as_ref().and_then(|t| t.web_search))
.unwrap_or(false);

let include_view_image_tool = include_view_image_tool
.or(cfg.tools.as_ref().and_then(|t| t.view_image))
.unwrap_or(true);

let model = model
.or(config_profile.model)
.or(cfg.model)
Expand Down Expand Up @@ -784,6 +797,7 @@ impl Config {
use_experimental_streamable_shell_tool: cfg
.experimental_use_exec_command_tool
.unwrap_or(false),
include_view_image_tool,
};
Ok(config)
}
Expand Down Expand Up @@ -1152,6 +1166,7 @@ disable_response_storage = true
responses_originator_header: "codex_cli_rs".to_string(),
preferred_auth_method: AuthMode::ChatGPT,
use_experimental_streamable_shell_tool: false,
include_view_image_tool: true,
},
o3_profile_config
);
Expand Down Expand Up @@ -1208,6 +1223,7 @@ disable_response_storage = true
responses_originator_header: "codex_cli_rs".to_string(),
preferred_auth_method: AuthMode::ChatGPT,
use_experimental_streamable_shell_tool: false,
include_view_image_tool: true,
};

assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
Expand Down Expand Up @@ -1279,6 +1295,7 @@ disable_response_storage = true
responses_originator_header: "codex_cli_rs".to_string(),
preferred_auth_method: AuthMode::ChatGPT,
use_experimental_streamable_shell_tool: false,
include_view_image_tool: true,
};

assert_eq!(expected_zdr_profile_config, zdr_profile_config);
Expand Down
83 changes: 71 additions & 12 deletions codex-rs/core/src/openai_tools.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ pub(crate) struct ToolsConfig {
pub plan_tool: bool,
pub apply_patch_tool_type: Option<ApplyPatchToolType>,
pub web_search_request: bool,
pub include_view_image_tool: bool,
}

pub(crate) struct ToolsConfigParams<'a> {
Expand All @@ -77,6 +78,7 @@ pub(crate) struct ToolsConfigParams<'a> {
pub(crate) include_apply_patch_tool: bool,
pub(crate) include_web_search_request: bool,
pub(crate) use_streamable_shell_tool: bool,
pub(crate) include_view_image_tool: bool,
}

impl ToolsConfig {
Expand All @@ -89,6 +91,7 @@ impl ToolsConfig {
include_apply_patch_tool,
include_web_search_request,
use_streamable_shell_tool,
include_view_image_tool,
} = params;
let mut shell_type = if *use_streamable_shell_tool {
ConfigShellToolType::StreamableShell
Expand Down Expand Up @@ -120,6 +123,7 @@ impl ToolsConfig {
plan_tool: *include_plan_tool,
apply_patch_tool_type,
web_search_request: *include_web_search_request,
include_view_image_tool: *include_view_image_tool,
}
}
}
Expand Down Expand Up @@ -292,6 +296,30 @@ The shell tool is used to execute shell commands.
},
})
}

fn create_view_image_tool() -> OpenAiTool {
// Support only local filesystem path.
let mut properties = BTreeMap::new();
properties.insert(
"path".to_string(),
JsonSchema::String {
description: Some("Local filesystem path to an image file".to_string()),
},
);

OpenAiTool::Function(ResponsesApiTool {
name: "view_image".to_string(),
description:
"Attach a local image (by filesystem path) to the conversation context for this turn."
.to_string(),
strict: false,
parameters: JsonSchema::Object {
properties,
required: Some(vec!["path".to_string()]),
additional_properties: Some(false),
},
})
}
/// TODO(dylan): deprecate once we get rid of json tool
#[derive(Serialize, Deserialize)]
pub(crate) struct ApplyPatchToolArgs {
Expand Down Expand Up @@ -541,6 +569,11 @@ pub(crate) fn get_openai_tools(
tools.push(OpenAiTool::WebSearch {});
}

// Include the view_image tool so the agent can attach images to context.
if config.include_view_image_tool {
tools.push(create_view_image_tool());
}

if let Some(mcp_tools) = mcp_tools {
// Ensure deterministic ordering to maximize prompt cache hits.
// HashMap iteration order is non-deterministic, so sort by fully-qualified tool name.
Expand Down Expand Up @@ -604,10 +637,14 @@ mod tests {
include_apply_patch_tool: false,
include_web_search_request: true,
use_streamable_shell_tool: false,
include_view_image_tool: true,
});
let tools = get_openai_tools(&config, Some(HashMap::new()));

assert_eq_tool_names(&tools, &["local_shell", "update_plan", "web_search"]);
assert_eq_tool_names(
&tools,
&["local_shell", "update_plan", "web_search", "view_image"],
);
}

#[test]
Expand All @@ -621,10 +658,14 @@ mod tests {
include_apply_patch_tool: false,
include_web_search_request: true,
use_streamable_shell_tool: false,
include_view_image_tool: true,
});
let tools = get_openai_tools(&config, Some(HashMap::new()));

assert_eq_tool_names(&tools, &["shell", "update_plan", "web_search"]);
assert_eq_tool_names(
&tools,
&["shell", "update_plan", "web_search", "view_image"],
);
}

#[test]
Expand All @@ -638,6 +679,7 @@ mod tests {
include_apply_patch_tool: false,
include_web_search_request: true,
use_streamable_shell_tool: false,
include_view_image_tool: true,
});
let tools = get_openai_tools(
&config,
Expand Down Expand Up @@ -679,11 +721,16 @@ mod tests {

assert_eq_tool_names(
&tools,
&["shell", "web_search", "test_server/do_something_cool"],
&[
"shell",
"web_search",
"view_image",
"test_server/do_something_cool",
],
);

assert_eq!(
tools[2],
tools[3],
OpenAiTool::Function(ResponsesApiTool {
name: "test_server/do_something_cool".to_string(),
parameters: JsonSchema::Object {
Expand Down Expand Up @@ -737,6 +784,7 @@ mod tests {
include_apply_patch_tool: false,
include_web_search_request: false,
use_streamable_shell_tool: false,
include_view_image_tool: true,
});

// Intentionally construct a map with keys that would sort alphabetically.
Expand Down Expand Up @@ -794,6 +842,7 @@ mod tests {
&tools,
&[
"shell",
"view_image",
"test_server/cool",
"test_server/do",
"test_server/something",
Expand All @@ -812,6 +861,7 @@ mod tests {
include_apply_patch_tool: false,
include_web_search_request: true,
use_streamable_shell_tool: false,
include_view_image_tool: true,
});

let tools = get_openai_tools(
Expand All @@ -837,10 +887,13 @@ mod tests {
)])),
);

assert_eq_tool_names(&tools, &["shell", "web_search", "dash/search"]);
assert_eq_tool_names(
&tools,
&["shell", "web_search", "view_image", "dash/search"],
);

assert_eq!(
tools[2],
tools[3],
OpenAiTool::Function(ResponsesApiTool {
name: "dash/search".to_string(),
parameters: JsonSchema::Object {
Expand Down Expand Up @@ -870,6 +923,7 @@ mod tests {
include_apply_patch_tool: false,
include_web_search_request: true,
use_streamable_shell_tool: false,
include_view_image_tool: true,
});

let tools = get_openai_tools(
Expand All @@ -893,9 +947,12 @@ mod tests {
)])),
);

assert_eq_tool_names(&tools, &["shell", "web_search", "dash/paginate"]);
assert_eq_tool_names(
&tools,
&["shell", "web_search", "view_image", "dash/paginate"],
);
assert_eq!(
tools[2],
tools[3],
OpenAiTool::Function(ResponsesApiTool {
name: "dash/paginate".to_string(),
parameters: JsonSchema::Object {
Expand Down Expand Up @@ -923,6 +980,7 @@ mod tests {
include_apply_patch_tool: false,
include_web_search_request: true,
use_streamable_shell_tool: false,
include_view_image_tool: true,
});

let tools = get_openai_tools(
Expand All @@ -946,9 +1004,9 @@ mod tests {
)])),
);

assert_eq_tool_names(&tools, &["shell", "web_search", "dash/tags"]);
assert_eq_tool_names(&tools, &["shell", "web_search", "view_image", "dash/tags"]);
assert_eq!(
tools[2],
tools[3],
OpenAiTool::Function(ResponsesApiTool {
name: "dash/tags".to_string(),
parameters: JsonSchema::Object {
Expand Down Expand Up @@ -979,6 +1037,7 @@ mod tests {
include_apply_patch_tool: false,
include_web_search_request: true,
use_streamable_shell_tool: false,
include_view_image_tool: true,
});

let tools = get_openai_tools(
Expand All @@ -1002,9 +1061,9 @@ mod tests {
)])),
);

assert_eq_tool_names(&tools, &["shell", "web_search", "dash/value"]);
assert_eq_tool_names(&tools, &["shell", "web_search", "view_image", "dash/value"]);
assert_eq!(
tools[2],
tools[3],
OpenAiTool::Function(ResponsesApiTool {
name: "dash/value".to_string(),
parameters: JsonSchema::Object {
Expand Down
2 changes: 1 addition & 1 deletion codex-rs/core/tests/suite/prompt_caching.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ async fn prompt_tools_are_consistent_across_requests() {
let expected_instructions: &str = include_str!("../../prompt.md");
// our internal implementation is responsible for keeping tools in sync
// with the OpenAI schema, so we just verify the tool presence here
let expected_tools_names: &[&str] = &["shell", "update_plan", "apply_patch"];
let expected_tools_names: &[&str] = &["shell", "update_plan", "apply_patch", "view_image"];
let body0 = requests[0].body_json::<serde_json::Value>().unwrap();
assert_eq!(
body0["instructions"],
Expand Down
1 change: 1 addition & 0 deletions codex-rs/exec/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
base_instructions: None,
include_plan_tool: None,
include_apply_patch_tool: None,
include_view_image_tool: None,
disable_response_storage: oss.then_some(true),
show_raw_agent_reasoning: oss.then_some(true),
tools_web_search_request: None,
Expand Down
Loading