From 6a501fbcb2ec56dd816ba2b4a9b384dee6d5e220 Mon Sep 17 00:00:00 2001 From: jif-oai Date: Fri, 26 Jun 2026 15:56:48 +0100 Subject: [PATCH] Cache remote Bash environment exports --- .../core/src/unified_exec/process_manager.rs | 18 +- .../src/unified_exec/process_manager_tests.rs | 2 + codex-rs/exec-server-protocol/src/protocol.rs | 31 ++- codex-rs/exec-server/src/bash_env_cache.rs | 242 ++++++++++++++++++ codex-rs/exec-server/src/lib.rs | 1 + codex-rs/exec-server/src/local_process.rs | 12 +- codex-rs/exec-server/tests/exec_process.rs | 76 ++++++ .../rmcp-client/src/stdio_server_launcher.rs | 1 + 8 files changed, 375 insertions(+), 8 deletions(-) create mode 100644 codex-rs/exec-server/src/bash_env_cache.rs diff --git a/codex-rs/core/src/unified_exec/process_manager.rs b/codex-rs/core/src/unified_exec/process_manager.rs index ef22918cd299..34d60b716153 100644 --- a/codex-rs/core/src/unified_exec/process_manager.rs +++ b/codex-rs/core/src/unified_exec/process_manager.rs @@ -21,6 +21,7 @@ use crate::exec_policy::ExecApprovalRequest; use crate::sandboxing::ExecOptions; use crate::sandboxing::ExecRequest; use crate::sandboxing::ExecServerEnvConfig; +use crate::shell::ShellType; use crate::tools::context::ExecCommandToolOutput; use crate::tools::events::ToolEmitter; use crate::tools::events::ToolEventCtx; @@ -56,6 +57,7 @@ use crate::unified_exec::process::OutputBuffer; use crate::unified_exec::process::OutputHandles; use crate::unified_exec::process::SpawnLifecycleHandle; use crate::unified_exec::process::UnifiedExecProcess; +use codex_features::Feature; use codex_network_proxy::NetworkProxy; use codex_protocol::config_types::ShellEnvironmentPolicy; use codex_protocol::error::CodexErr; @@ -129,6 +131,7 @@ fn exec_env_policy_from_shell_policy( .iter() .map(std::string::ToString::to_string) .collect(), + bash_env_cache_scope: None, } } @@ -1122,10 +1125,19 @@ impl UnifiedExecProcessManager { let active_permission_profile = context.turn.config.permissions.active_permission_profile(); inject_permission_profile_env(&mut env, active_permission_profile.as_ref()); let env = apply_unified_exec_env(env); + let mut exec_server_policy = exec_env_policy_from_shell_policy( + &context.turn.config.permissions.shell_environment_policy, + ); + if context.turn.config.features.enabled(Feature::ShellSnapshot) + && request.turn_environment.environment.is_remote() + && request.shell_type == ShellType::Bash + && !request.tty + && matches!(request.command.as_slice(), [_, option, _] if option == "-c") + { + exec_server_policy.bash_env_cache_scope = Some(request.turn_environment.cwd().clone()); + } let exec_server_env_config = ExecServerEnvConfig { - policy: exec_env_policy_from_shell_policy( - &context.turn.config.permissions.shell_environment_policy, - ), + policy: exec_server_policy, local_policy_env, }; let mut orchestrator = ToolOrchestrator::new(); diff --git a/codex-rs/core/src/unified_exec/process_manager_tests.rs b/codex-rs/core/src/unified_exec/process_manager_tests.rs index 096cbe0a1359..abea0d3daa71 100644 --- a/codex-rs/core/src/unified_exec/process_manager_tests.rs +++ b/codex-rs/core/src/unified_exec/process_manager_tests.rs @@ -100,6 +100,7 @@ fn exec_env_policy_excludes_runtime_permission_profile() { exclude: vec![CODEX_PERMISSION_PROFILE_ENV_VAR.to_string()], r#set: HashMap::from([("KEEP".to_string(), "value".to_string())]), include_only: Vec::new(), + bash_env_cache_scope: None, } ); } @@ -142,6 +143,7 @@ fn exec_server_params_use_path_uri_and_env_policy_overlay_contract() { exclude: Vec::new(), r#set: HashMap::new(), include_only: Vec::new(), + bash_env_cache_scope: None, }, local_policy_env: HashMap::from([ ("HOME".to_string(), "/client-home".to_string()), diff --git a/codex-rs/exec-server-protocol/src/protocol.rs b/codex-rs/exec-server-protocol/src/protocol.rs index a265aaed0c3a..7c9676a4b772 100644 --- a/codex-rs/exec-server-protocol/src/protocol.rs +++ b/codex-rs/exec-server-protocol/src/protocol.rs @@ -154,6 +154,10 @@ pub struct ExecEnvPolicy { pub exclude: Vec, pub r#set: HashMap, pub include_only: Vec, + /// Optional workspace scope for caching exports produced by Bash's `BASH_ENV`. + /// Older exec-server peers ignore this field, so reuse is opportunistic. + #[serde(default)] + pub bash_env_cache_scope: Option, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] @@ -566,6 +570,7 @@ mod base64_bytes { #[cfg(test)] mod tests { use super::EnvironmentInfo; + use super::ExecEnvPolicy; use super::ExecExitedNotification; use super::ExecParams; use super::FsReadFileParams; @@ -574,21 +579,29 @@ mod tests { use super::ShellInfo; use codex_file_system::FileSystemSandboxContext; use codex_network_proxy::ManagedNetworkSandboxContext; + use codex_protocol::config_types::ShellEnvironmentPolicyInherit; use codex_protocol::models::PermissionProfile; use codex_utils_path_uri::PathUri; use pretty_assertions::assert_eq; use std::collections::HashMap; #[test] - fn exec_params_managed_network_context_round_trips_and_defaults_for_legacy_peers() { + fn exec_params_optional_context_round_trips_and_defaults_for_legacy_peers() { let cwd = PathUri::from_host_native_path(std::env::current_dir().expect("current directory")) .expect("cwd URI"); let params = ExecParams { process_id: ProcessId::from("managed-network"), argv: vec!["true".to_string()], - cwd, - env_policy: None, + cwd: cwd.clone(), + env_policy: Some(ExecEnvPolicy { + inherit: ShellEnvironmentPolicyInherit::Core, + ignore_default_excludes: false, + exclude: Vec::new(), + r#set: HashMap::new(), + include_only: Vec::new(), + bash_env_cache_scope: Some(cwd.clone()), + }), env: HashMap::new(), tty: false, pipe_stdin: false, @@ -609,6 +622,10 @@ mod tests { "allowLocalBinding": false, }) ); + assert_eq!( + serialized["envPolicy"]["bashEnvCacheScope"], + serde_json::to_value(cwd).expect("serialize cwd") + ); let round_trip: ExecParams = serde_json::from_value(serialized.clone()).expect("deserialize exec params"); assert_eq!(round_trip, params); @@ -617,10 +634,18 @@ mod tests { .as_object_mut() .expect("exec params object") .remove("managedNetwork"); + serialized["envPolicy"] + .as_object_mut() + .expect("env policy object") + .remove("bashEnvCacheScope"); let legacy: ExecParams = serde_json::from_value(serialized).expect("deserialize legacy exec params"); assert!(legacy.enforce_managed_network); assert_eq!(legacy.managed_network, None); + assert_eq!( + legacy.env_policy.expect("env policy").bash_env_cache_scope, + None + ); } #[test] diff --git a/codex-rs/exec-server/src/bash_env_cache.rs b/codex-rs/exec-server/src/bash_env_cache.rs new file mode 100644 index 000000000000..cc235ac653ae --- /dev/null +++ b/codex-rs/exec-server/src/bash_env_cache.rs @@ -0,0 +1,242 @@ +use std::collections::HashMap; + +use crate::ExecServerRuntimePaths; +use crate::protocol::ExecParams; + +#[cfg(unix)] +mod imp { + use std::path::Path; + use std::sync::Arc; + use std::time::Duration; + + use codex_file_system::FileSystemSandboxContext; + use codex_network_proxy::ManagedNetworkSandboxContext; + use codex_utils_path_uri::PathUri; + use tokio::sync::Mutex; + use tokio::sync::OnceCell; + use tokio::time::timeout; + use uuid::Uuid; + + use super::*; + use crate::process_sandbox::prepare_exec_request; + + const CAPTURE_TIMEOUT: Duration = Duration::from_secs(10); + const MAX_CAPTURE_BYTES: usize = 1024 * 1024; + + #[derive(Default)] + pub(crate) struct BashEnvCache { + entry: Mutex>, + } + + struct CacheEntry { + key: CacheKey, + environment: Arc>>>, + } + + #[derive(PartialEq, Eq)] + struct CacheKey { + scope: PathUri, + shell: String, + cwd: PathUri, + environment: HashMap, + sandbox: Option, + enforce_managed_network: bool, + managed_network: Option, + } + + impl BashEnvCache { + pub(crate) async fn environment_for_launch( + &self, + params: &ExecParams, + environment: HashMap, + runtime_paths: Option<&ExecServerRuntimePaths>, + ) -> HashMap { + let Some(key) = CacheKey::new(params, &environment) else { + return environment; + }; + + let cached_environment = { + let mut entry = self.entry.lock().await; + if let Some(cached) = entry.as_ref() + && cached.key == key + { + Arc::clone(&cached.environment) + } else { + let environment = Arc::new(OnceCell::new()); + *entry = Some(CacheEntry { + key, + environment: Arc::clone(&environment), + }); + environment + } + }; + cached_environment + .get_or_init(|| capture_environment(params, &environment, runtime_paths)) + .await + .clone() + .unwrap_or(environment) + } + } + + impl CacheKey { + fn new(params: &ExecParams, environment: &HashMap) -> Option { + let scope = params.env_policy.as_ref()?.bash_env_cache_scope.as_ref()?; + let [shell, option, _script] = params.argv.as_slice() else { + return None; + }; + if params.tty + || params.pipe_stdin + || params.arg0.is_some() + || option != "-c" + || Path::new(shell).file_name()?.to_str()? != "bash" + || !params.cwd.starts_with(scope) + || environment.get("BASH_ENV").is_none_or(String::is_empty) + { + return None; + } + + Some(Self { + scope: scope.clone(), + shell: shell.clone(), + cwd: params.cwd.clone(), + environment: environment.clone(), + sandbox: params.sandbox.clone(), + enforce_managed_network: params.enforce_managed_network, + managed_network: params.managed_network.clone(), + }) + } + } + + async fn capture_environment( + params: &ExecParams, + environment: &HashMap, + runtime_paths: Option<&ExecServerRuntimePaths>, + ) -> Option> { + let nonce = Uuid::new_v4().simple().to_string(); + let start_marker = format!("__CODEX_BASH_ENV_START_{nonce}__"); + let end_marker = format!("__CODEX_BASH_ENV_END_{nonce}__"); + let mut capture = params.clone(); + capture.argv = vec![ + params.argv.first()?.clone(), + "-c".to_string(), + format!( + "builtin printf '%s' '{start_marker}'; if builtin command env -0; then builtin printf '%s' '{end_marker}'; else builtin exit 125; fi" + ), + ]; + let prepared = prepare_exec_request(&capture, environment.clone(), runtime_paths).ok()?; + let (program, args) = prepared.command.split_first()?; + let spawned = codex_utils_pty::spawn_pipe_process_no_stdin( + program, + args, + prepared.cwd.as_path(), + &prepared.env, + &prepared.arg0, + ) + .await + .ok()?; + + let captured = timeout(CAPTURE_TIMEOUT, async move { + let _session = spawned.session; + let (stdout, stderr, exit_code) = tokio::join!( + collect_output(spawned.stdout_rx), + collect_output(spawned.stderr_rx), + spawned.exit_rx, + ); + (exit_code.ok()? == 0 && stderr?.is_empty()).then_some(parse_capture( + stdout?, + &start_marker, + &end_marker, + )?) + }) + .await + .ok() + .flatten()?; + + Some(sanitize_environment(captured, environment)) + } + + async fn collect_output(mut receiver: tokio::sync::mpsc::Receiver>) -> Option> { + let mut output = Vec::new(); + while let Some(chunk) = receiver.recv().await { + if output.len().checked_add(chunk.len())? > MAX_CAPTURE_BYTES { + return None; + } + output.extend_from_slice(&chunk); + } + Some(output) + } + + fn parse_capture( + output: Vec, + start_marker: &str, + end_marker: &str, + ) -> Option> { + let payload = output.strip_prefix(start_marker.as_bytes())?; + let payload = payload.strip_suffix(end_marker.as_bytes())?; + if !payload.is_empty() && !payload.ends_with(&[0]) { + return None; + } + + payload + .split(|byte| *byte == 0) + .filter(|entry| !entry.is_empty()) + .map(|entry| { + let separator = entry.iter().position(|byte| *byte == b'=')?; + let key = std::str::from_utf8(&entry[..separator]).ok()?.to_string(); + let value = std::str::from_utf8(&entry[separator + 1..]) + .ok()? + .to_string(); + Some((key, value)) + }) + .collect() + } + + fn sanitize_environment( + mut captured: HashMap, + original: &HashMap, + ) -> HashMap { + captured.remove("BASH_ENV"); + captured.retain(|key, _| !shell_managed(key)); + captured.extend( + original + .iter() + .filter(|(key, _)| shell_managed(key)) + .map(|(key, value)| (key.clone(), value.clone())), + ); + captured + } + + fn shell_managed(key: &str) -> bool { + key.starts_with("BASH_FUNC_") + || matches!( + key, + "BASHOPTS" + | "BASH_ARGV0" + | "BASH_EXECUTION_STRING" + | "OLDPWD" + | "PWD" + | "SHELLOPTS" + | "SHLVL" + | "_" + ) + } +} + +#[cfg(unix)] +pub(crate) use imp::BashEnvCache; + +#[cfg(not(unix))] +#[derive(Default)] +pub(crate) struct BashEnvCache; + +#[cfg(not(unix))] +impl BashEnvCache { + pub(crate) async fn environment_for_launch( + &self, + _params: &ExecParams, + environment: HashMap, + _runtime_paths: Option<&ExecServerRuntimePaths>, + ) -> HashMap { + environment + } +} diff --git a/codex-rs/exec-server/src/lib.rs b/codex-rs/exec-server/src/lib.rs index 0068c2cc1ac3..0bca866efd65 100644 --- a/codex-rs/exec-server/src/lib.rs +++ b/codex-rs/exec-server/src/lib.rs @@ -1,3 +1,4 @@ +mod bash_env_cache; mod client; mod client_api; mod client_transport; diff --git a/codex-rs/exec-server/src/local_process.rs b/codex-rs/exec-server/src/local_process.rs index 3c488f0ad7c3..a193e89e5f5f 100644 --- a/codex-rs/exec-server/src/local_process.rs +++ b/codex-rs/exec-server/src/local_process.rs @@ -33,6 +33,7 @@ use crate::ExecServerError; use crate::ExecServerRuntimePaths; use crate::ProcessId; use crate::StartedExecProcess; +use crate::bash_env_cache::BashEnvCache; use crate::process::ExecProcessEventLog; use crate::process_sandbox::prepare_exec_request; use crate::protocol::EXEC_CLOSED_METHOD; @@ -140,6 +141,7 @@ enum ProcessEntry { struct Inner { notifications: std::sync::RwLock>, processes: Mutex>, + bash_env_cache: BashEnvCache, telemetry: ExecServerTelemetry, } @@ -195,6 +197,7 @@ impl LocalProcess { inner: Arc::new(Inner { notifications: std::sync::RwLock::new(Some(notifications)), processes: Mutex::new(HashMap::new()), + bash_env_cache: BashEnvCache::default(), telemetry, }), runtime_paths, @@ -234,8 +237,12 @@ impl LocalProcess { params: ExecParams, ) -> Result<(ExecResponse, watch::Sender, ExecProcessEventLog), JSONRPCErrorError> { let process_id = params.process_id.clone(); - let prepared = - prepare_exec_request(¶ms, child_env(¶ms), self.runtime_paths.as_ref())?; + let environment = self + .inner + .bash_env_cache + .environment_for_launch(¶ms, child_env(¶ms), self.runtime_paths.as_ref()) + .await; + let prepared = prepare_exec_request(¶ms, environment, self.runtime_paths.as_ref())?; let (program, args) = prepared .command .split_first() @@ -1107,6 +1114,7 @@ mod tests { exclude: Vec::new(), r#set: HashMap::from([("POLICY_SET".to_string(), "policy".to_string())]), include_only: Vec::new(), + bash_env_cache_scope: None, }); let mut expected = HashMap::from([ diff --git a/codex-rs/exec-server/tests/exec_process.rs b/codex-rs/exec-server/tests/exec_process.rs index 73377da60b12..50ffa981fa9a 100644 --- a/codex-rs/exec-server/tests/exec_process.rs +++ b/codex-rs/exec-server/tests/exec_process.rs @@ -7,6 +7,8 @@ use anyhow::Context; use anyhow::Result; use codex_exec_server::Environment; use codex_exec_server::ExecBackend; +#[cfg(unix)] +use codex_exec_server::ExecEnvPolicy; use codex_exec_server::ExecOutputStream; use codex_exec_server::ExecParams; use codex_exec_server::ExecProcess; @@ -16,6 +18,8 @@ use codex_exec_server::ProcessSignal; use codex_exec_server::ReadResponse; use codex_exec_server::StartedExecProcess; use codex_exec_server::WriteStatus; +#[cfg(unix)] +use codex_protocol::config_types::ShellEnvironmentPolicyInherit; use codex_utils_path_uri::PathUri; use pretty_assertions::assert_eq; use tempfile::TempDir; @@ -96,6 +100,68 @@ async fn assert_exec_process_starts_and_exits(use_remote: bool) -> Result<()> { Ok(()) } +#[cfg(unix)] +async fn assert_bash_env_exports_are_cached(use_remote: bool) -> Result<()> { + let context = create_process_context(use_remote).await?; + let temp = tempfile::tempdir()?; + let bash_env = temp.path().join("bash_env.sh"); + let counter = temp.path().join("counter"); + std::fs::write( + &bash_env, + "printf x >> \"$COUNTER_FILE\"\nexport CACHED_VALUE=ready\nexport PATH=\"$PATH:/from-bash-env\"\n", + )?; + let cwd = PathUri::from_host_native_path(temp.path())?; + + for index in 0..2 { + let process = context + .backend + .start(ExecParams { + process_id: format!("bash-env-cache-{index}").into(), + argv: vec![ + "/bin/bash".to_string(), + "-c".to_string(), + "printf '%s|%s' \"$CACHED_VALUE\" \"$PATH\"".to_string(), + ], + cwd: cwd.clone(), + env_policy: Some(ExecEnvPolicy { + inherit: ShellEnvironmentPolicyInherit::None, + ignore_default_excludes: true, + exclude: Vec::new(), + r#set: HashMap::new(), + include_only: Vec::new(), + bash_env_cache_scope: Some(cwd.clone()), + }), + env: HashMap::from([ + ( + "BASH_ENV".to_string(), + bash_env.to_string_lossy().into_owned(), + ), + ( + "COUNTER_FILE".to_string(), + counter.to_string_lossy().into_owned(), + ), + ("PATH".to_string(), "/usr/bin:/bin".to_string()), + ]), + tty: false, + pipe_stdin: false, + arg0: None, + sandbox: None, + enforce_managed_network: false, + managed_network: None, + }) + .await?; + let wake = process.process.subscribe_wake(); + let (output, exit_code, closed) = + collect_process_output_from_reads(process.process, wake).await?; + assert_eq!(output, "ready|/usr/bin:/bin:/from-bash-env"); + assert_eq!(exit_code, Some(0)); + assert!(closed); + } + + assert_eq!(std::fs::read_to_string(counter)?, "x"); + Ok(()) +} + async fn read_process_until_change( session: Arc, wake_rx: &mut watch::Receiver, @@ -928,3 +994,13 @@ async fn exec_process_signal_reports_unsupported_on_windows(use_remote: bool) -> async fn exec_process_preserves_queued_events_before_subscribe(use_remote: bool) -> Result<()> { assert_exec_process_preserves_queued_events_before_subscribe(use_remote).await } + +#[test_case(false ; "local")] +#[test_case(true ; "remote")] +#[cfg(unix)] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +// Serialize tests that launch a real exec-server process through the full CLI. +#[serial_test::serial(remote_exec_server)] +async fn bash_env_exports_are_cached(use_remote: bool) -> Result<()> { + assert_bash_env_exports_are_cached(use_remote).await +} diff --git a/codex-rs/rmcp-client/src/stdio_server_launcher.rs b/codex-rs/rmcp-client/src/stdio_server_launcher.rs index 7d03e1437e0a..7c89ace5e980 100644 --- a/codex-rs/rmcp-client/src/stdio_server_launcher.rs +++ b/codex-rs/rmcp-client/src/stdio_server_launcher.rs @@ -581,6 +581,7 @@ impl ExecutorStdioServerLauncher { exclude: Vec::new(), r#set: HashMap::new(), include_only, + bash_env_cache_scope: None, } } }