Skip to content

Commit 8da770c

Browse files
committed
refactor: route zsh-fork through unified exec
1 parent 0d6d479 commit 8da770c

8 files changed

Lines changed: 846 additions & 70 deletions

File tree

codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs

Lines changed: 206 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ use app_test_support::McpProcess;
1111
use app_test_support::create_final_assistant_message_sse_response;
1212
use app_test_support::create_mock_responses_server_sequence;
1313
use app_test_support::create_mock_responses_server_sequence_unchecked;
14-
use app_test_support::create_shell_command_sse_response;
1514
use app_test_support::to_response;
1615
use codex_app_server_protocol::CommandAction;
1716
use codex_app_server_protocol::CommandExecutionApprovalDecision;
@@ -35,9 +34,11 @@ use codex_core::features::Feature;
3534
use core_test_support::responses;
3635
use core_test_support::skip_if_no_network;
3736
use pretty_assertions::assert_eq;
37+
use serde_json::json;
3838
use std::collections::BTreeMap;
3939
use std::path::Path;
4040
use tempfile::TempDir;
41+
use tokio::time::sleep;
4142
use tokio::time::timeout;
4243

4344
#[cfg(windows)]
@@ -61,10 +62,8 @@ async fn turn_start_shell_zsh_fork_executes_command_v2() -> Result<()> {
6162
};
6263
eprintln!("using zsh path for zsh-fork test: {}", zsh_path.display());
6364

64-
let responses = vec![create_shell_command_sse_response(
65-
vec!["echo".to_string(), "hi".to_string()],
66-
None,
67-
Some(5000),
65+
let responses = vec![create_zsh_fork_exec_command_sse_response(
66+
"echo hi",
6867
"call-zsh-fork",
6968
)?];
7069
let server = create_mock_responses_server_sequence(responses).await;
@@ -74,7 +73,7 @@ async fn turn_start_shell_zsh_fork_executes_command_v2() -> Result<()> {
7473
"never",
7574
&BTreeMap::from([
7675
(Feature::ShellZshFork, true),
77-
(Feature::UnifiedExec, false),
76+
(Feature::UnifiedExec, true),
7877
(Feature::ShellSnapshot, false),
7978
]),
8079
&zsh_path,
@@ -172,14 +171,8 @@ async fn turn_start_shell_zsh_fork_exec_approval_decline_v2() -> Result<()> {
172171
eprintln!("using zsh path for zsh-fork test: {}", zsh_path.display());
173172

174173
let responses = vec![
175-
create_shell_command_sse_response(
176-
vec![
177-
"python3".to_string(),
178-
"-c".to_string(),
179-
"print(42)".to_string(),
180-
],
181-
None,
182-
Some(5000),
174+
create_zsh_fork_exec_command_sse_response(
175+
"python3 -c 'print(42)'",
183176
"call-zsh-fork-decline",
184177
)?,
185178
create_final_assistant_message_sse_response("done")?,
@@ -191,7 +184,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_decline_v2() -> Result<()> {
191184
"untrusted",
192185
&BTreeMap::from([
193186
(Feature::ShellZshFork, true),
194-
(Feature::UnifiedExec, false),
187+
(Feature::UnifiedExec, true),
195188
(Feature::ShellSnapshot, false),
196189
]),
197190
&zsh_path,
@@ -307,14 +300,8 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> {
307300
};
308301
eprintln!("using zsh path for zsh-fork test: {}", zsh_path.display());
309302

310-
let responses = vec![create_shell_command_sse_response(
311-
vec![
312-
"python3".to_string(),
313-
"-c".to_string(),
314-
"print(42)".to_string(),
315-
],
316-
None,
317-
Some(5000),
303+
let responses = vec![create_zsh_fork_exec_command_sse_response(
304+
"python3 -c 'print(42)'",
318305
"call-zsh-fork-cancel",
319306
)?];
320307
let server = create_mock_responses_server_sequence(responses).await;
@@ -324,7 +311,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> {
324311
"untrusted",
325312
&BTreeMap::from([
326313
(Feature::ShellZshFork, true),
327-
(Feature::UnifiedExec, false),
314+
(Feature::UnifiedExec, true),
328315
(Feature::ShellSnapshot, false),
329316
]),
330317
&zsh_path,
@@ -422,6 +409,181 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> {
422409
Ok(())
423410
}
424411

412+
#[tokio::test]
413+
async fn turn_start_shell_zsh_fork_interrupt_kills_approved_subcommand_v2() -> Result<()> {
414+
skip_if_no_network!(Ok(()));
415+
416+
let tmp = TempDir::new()?;
417+
let codex_home = tmp.path().join("codex_home");
418+
std::fs::create_dir(&codex_home)?;
419+
let workspace = tmp.path().join("workspace");
420+
std::fs::create_dir(&workspace)?;
421+
let pid_file = workspace.join("approved-subcommand.pid");
422+
let pid_file_display = pid_file.display().to_string();
423+
assert!(
424+
!pid_file_display.contains('\''),
425+
"test workspace path should not contain single quotes: {pid_file_display}"
426+
);
427+
428+
let Some(zsh_path) = find_test_zsh_path()? else {
429+
eprintln!("skipping zsh fork interrupt cleanup test: no zsh executable found");
430+
return Ok(());
431+
};
432+
if !supports_exec_wrapper_intercept(&zsh_path) {
433+
eprintln!(
434+
"skipping zsh fork interrupt cleanup test: zsh does not support EXEC_WRAPPER intercepts ({})",
435+
zsh_path.display()
436+
);
437+
return Ok(());
438+
}
439+
let zsh_path_display = zsh_path.display().to_string();
440+
eprintln!("using zsh path for zsh-fork test: {zsh_path_display}");
441+
442+
let shell_command =
443+
format!("/bin/sh -c 'echo $$ > \"{pid_file_display}\" && exec /bin/sleep 100'");
444+
let tool_call_arguments = serde_json::to_string(&json!({
445+
"cmd": shell_command,
446+
"yield_time_ms": 30_000,
447+
}))?;
448+
let response = responses::sse(vec![
449+
responses::ev_response_created("resp-1"),
450+
responses::ev_function_call(
451+
"call-zsh-fork-interrupt-cleanup",
452+
"exec_command",
453+
&tool_call_arguments,
454+
),
455+
responses::ev_completed("resp-1"),
456+
]);
457+
let no_op_response = responses::sse(vec![
458+
responses::ev_response_created("resp-2"),
459+
responses::ev_completed("resp-2"),
460+
]);
461+
let server =
462+
create_mock_responses_server_sequence_unchecked(vec![response, no_op_response]).await;
463+
create_config_toml(
464+
&codex_home,
465+
&server.uri(),
466+
"untrusted",
467+
&BTreeMap::from([
468+
(Feature::ShellZshFork, true),
469+
(Feature::UnifiedExec, true),
470+
(Feature::ShellSnapshot, false),
471+
]),
472+
&zsh_path,
473+
)?;
474+
475+
let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?;
476+
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
477+
478+
let start_id = mcp
479+
.send_thread_start_request(ThreadStartParams {
480+
model: Some("mock-model".to_string()),
481+
cwd: Some(workspace.to_string_lossy().into_owned()),
482+
..Default::default()
483+
})
484+
.await?;
485+
let start_resp: JSONRPCResponse = timeout(
486+
DEFAULT_READ_TIMEOUT,
487+
mcp.read_stream_until_response_message(RequestId::Integer(start_id)),
488+
)
489+
.await??;
490+
let ThreadStartResponse { thread, .. } = to_response::<ThreadStartResponse>(start_resp)?;
491+
492+
let turn_id = mcp
493+
.send_turn_start_request(TurnStartParams {
494+
thread_id: thread.id.clone(),
495+
input: vec![V2UserInput::Text {
496+
text: "run the long-lived command".to_string(),
497+
text_elements: Vec::new(),
498+
}],
499+
cwd: Some(workspace.clone()),
500+
approval_policy: Some(codex_app_server_protocol::AskForApproval::UnlessTrusted),
501+
sandbox_policy: Some(codex_app_server_protocol::SandboxPolicy::WorkspaceWrite {
502+
writable_roots: vec![workspace.clone().try_into()?],
503+
read_only_access: codex_app_server_protocol::ReadOnlyAccess::FullAccess,
504+
network_access: false,
505+
exclude_tmpdir_env_var: false,
506+
exclude_slash_tmp: false,
507+
}),
508+
model: Some("mock-model".to_string()),
509+
effort: Some(codex_protocol::openai_models::ReasoningEffort::Medium),
510+
summary: Some(codex_protocol::config_types::ReasoningSummary::Auto),
511+
..Default::default()
512+
})
513+
.await?;
514+
let turn_resp: JSONRPCResponse = timeout(
515+
DEFAULT_READ_TIMEOUT,
516+
mcp.read_stream_until_response_message(RequestId::Integer(turn_id)),
517+
)
518+
.await??;
519+
let TurnStartResponse { turn } = to_response::<TurnStartResponse>(turn_resp)?;
520+
521+
let mut saw_target_approval = false;
522+
while !saw_target_approval {
523+
let server_req = timeout(
524+
DEFAULT_READ_TIMEOUT,
525+
mcp.read_stream_until_request_message(),
526+
)
527+
.await??;
528+
let ServerRequest::CommandExecutionRequestApproval { request_id, params } = server_req
529+
else {
530+
panic!("expected CommandExecutionRequestApproval request");
531+
};
532+
let approval_command = params.command.clone().unwrap_or_default();
533+
saw_target_approval = approval_command.contains("/bin/sh")
534+
&& approval_command.contains(&pid_file_display)
535+
&& !approval_command.contains(&zsh_path_display);
536+
mcp.send_response(
537+
request_id,
538+
serde_json::to_value(CommandExecutionRequestApprovalResponse {
539+
decision: CommandExecutionApprovalDecision::Accept,
540+
})?,
541+
)
542+
.await?;
543+
}
544+
545+
let pid = timeout(DEFAULT_READ_TIMEOUT, async {
546+
loop {
547+
if let Ok(contents) = std::fs::read_to_string(&pid_file) {
548+
return Ok::<i32, anyhow::Error>(contents.trim().parse()?);
549+
}
550+
sleep(std::time::Duration::from_millis(20)).await;
551+
}
552+
})
553+
.await??;
554+
let still_running = std::process::Command::new("/bin/kill")
555+
.args(["-0", &pid.to_string()])
556+
.status()?
557+
.success();
558+
assert!(
559+
still_running,
560+
"expected approved intercepted subprocess pid {pid} to be running before interrupt"
561+
);
562+
563+
mcp.interrupt_turn_and_wait_for_aborted(
564+
thread.id.clone(),
565+
turn.id.clone(),
566+
DEFAULT_READ_TIMEOUT,
567+
)
568+
.await?;
569+
570+
timeout(DEFAULT_READ_TIMEOUT, async {
571+
loop {
572+
let still_running = std::process::Command::new("/bin/kill")
573+
.args(["-0", &pid.to_string()])
574+
.status()?
575+
.success();
576+
if !still_running {
577+
return Ok::<(), anyhow::Error>(());
578+
}
579+
sleep(std::time::Duration::from_millis(20)).await;
580+
}
581+
})
582+
.await??;
583+
584+
Ok(())
585+
}
586+
425587
#[tokio::test]
426588
async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() -> Result<()> {
427589
skip_if_no_network!(Ok(()));
@@ -453,16 +615,15 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
453615
first_file.display(),
454616
second_file.display()
455617
);
456-
let tool_call_arguments = serde_json::to_string(&serde_json::json!({
457-
"command": shell_command,
458-
"workdir": serde_json::Value::Null,
459-
"timeout_ms": 5000
618+
let tool_call_arguments = serde_json::to_string(&json!({
619+
"cmd": shell_command,
620+
"yield_time_ms": 5000,
460621
}))?;
461622
let response = responses::sse(vec![
462623
responses::ev_response_created("resp-1"),
463624
responses::ev_function_call(
464625
"call-zsh-fork-subcommand-decline",
465-
"shell_command",
626+
"exec_command",
466627
&tool_call_arguments,
467628
),
468629
responses::ev_completed("resp-1"),
@@ -483,7 +644,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
483644
"untrusted",
484645
&BTreeMap::from([
485646
(Feature::ShellZshFork, true),
486-
(Feature::UnifiedExec, false),
647+
(Feature::UnifiedExec, true),
487648
(Feature::ShellSnapshot, false),
488649
]),
489650
&zsh_path,
@@ -694,6 +855,21 @@ async fn create_zsh_test_mcp_process(codex_home: &Path, zdotdir: &Path) -> Resul
694855
McpProcess::new_with_env(codex_home, &[("ZDOTDIR", Some(zdotdir.as_str()))]).await
695856
}
696857

858+
fn create_zsh_fork_exec_command_sse_response(
859+
command: &str,
860+
call_id: &str,
861+
) -> anyhow::Result<String> {
862+
let tool_call_arguments = serde_json::to_string(&json!({
863+
"cmd": command,
864+
"yield_time_ms": 5000,
865+
}))?;
866+
Ok(responses::sse(vec![
867+
responses::ev_response_created("resp-1"),
868+
responses::ev_function_call(call_id, "exec_command", &tool_call_arguments),
869+
responses::ev_completed("resp-1"),
870+
]))
871+
}
872+
697873
fn create_config_toml(
698874
codex_home: &Path,
699875
server_uri: &str,

0 commit comments

Comments
 (0)