diff --git a/CHANGELOG.md b/CHANGELOG.md index 120829d..eab8ccb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,64 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [4.0.0] - 2026-06-21 + +Milestone release: **filesystem-first agents** ("eve parity"). A single directory +now defines a durable agent by convention — `instructions.md` (role slot), +`agent.acl` (config), `skills/`, `schedules/` (cron), and `tools/` — served by a +`serve` daemon that runs each schedule as a full harness turn. No breaking changes +to existing APIs; the new surface is additive and gated behind the `serve` feature. + +### Added + +- **Filesystem-first agent directories (`AgentDir`) + the `serve` daemon.** A + directory with a required `instructions.md` (injected as a prompt *slot*, so the + harness keeps `BOUNDARIES`/response-format/verification authoritative) plus + optional `agent.acl`, `skills/`, `schedules/`, and `tools/` loads via + `AgentDir::load` into existing config objects — no new runtime, no new prompt + system. `serve_agent_dir` runs each enabled cron schedule on its own durable + `schedule:` session; every fire is a FULL `AgentSession::send` turn + (context, tool visibility, safety gate, verification), never a raw model call. + Cron accepts 5- and 6-field expressions (UTC). Exposed in the Node and Python + SDKs (`serveAgentDir` / `serve_agent_dir`) returning a `ServeHandle`. Gated + behind the `serve` Cargo feature. +- **`tools/` declarative tools — `kind: mcp`.** A `tools/.md` with + `kind: mcp` registers an MCP server into each schedule session through the + normal `add_mcp_server` path (namespaced `mcp____`, gated by the + session permission policy). Duplicate names and unknown kinds fail closed at load. +- **Rehydrate-on-boot for the serve daemon.** When a `SessionStore` is configured + (e.g. via `SessionOptions::with_file_session_store`), `serve_agent_dir` now + *resumes* any schedule whose `schedule:` session already exists in the + store instead of starting it fresh, so a daemon restart keeps the accumulated + conversation context. Resume restores history only — the current + `instructions.md` / `skills/` / `tools/` are re-applied each boot, so editing + the agent dir still takes effect. With no store configured, every boot starts + fresh (unchanged). Reuses the existing `Agent::resume_session` path; no new + persistence machinery. +- **Sandboxed `script` tools for filesystem-first agents (`tools/ kind: script`).** + A `tools/.md` with `kind: script` now becomes a model-visible tool backed + by the existing sandboxed QuickJS `program` path — no new sandbox. The spec pins + the workspace-relative `.js`/`.mjs` `path`, the `allowed_tools` allow-list, and + the `limits` (timeout / tool-calls / output); the model supplies only `inputs`. + - New `AgentDirScriptTool` registers through the same non-shadowing + `register_dynamic_tool` path as builtins/MCP, so a `tools/` entry can add a + name but never replace a builtin. The model's call to the script tool is + permission-gated like any tool; the script's *inner* `ctx.tool` calls are + bounded by the pinned `allowed_tools` list + the QuickJS sandbox (no + fs/net/proc/env), but are NOT re-checked against the session permission policy. + The complement to `kind: mcp` (both now ship). + - The `allowed_tools` list is the security boundary for a directory script, so + the loader **fails it closed**: an omitted list grants NO tools (not all of + them); list only the minimum, and avoid high-authority tools unless the + directory is fully trusted. + - Fails closed at load (not at first call): a non-`.js`/`.mjs` `path`, a path + that escapes the workspace (absolute / `..`), an out-of-range sandbox limit + (zero, or an effectively-unbounded `timeoutMs`), an unknown `kind`, or a + duplicate tool name is a directory-load error. A `tools/` file is semi-trusted, + so limits are bounded (≤10 min / ≤1000 calls / ≤16 MiB). + - The serve daemon installs the agent dir's `tools/` into every schedule + session, so scheduled turns can call them. + ## [3.6.2] - 2026-06-14 Release-engineering fix for 3.6.0/3.6.1 (no library code changes). Both prior diff --git a/Cargo.lock b/Cargo.lock index cc21a4a..a0d96b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -37,7 +37,7 @@ dependencies = [ [[package]] name = "a3s-code-core" -version = "3.6.2" +version = "4.0.0" dependencies = [ "a3s-acl 0.2.0", "a3s-ahp", diff --git a/core/Cargo.toml b/core/Cargo.toml index 785aa3b..78c1dda 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "a3s-code-core" -version = "3.6.2" +version = "4.0.0" edition = "2021" authors = ["A3S Lab Team"] license = "MIT" @@ -139,8 +139,8 @@ s3 = [ "dep:aws-smithy-types", "dep:aws-smithy-runtime-api", ] -# Enable the durable serve layer: cron schedules + (later) channels + serve daemon -# for filesystem-first agents. Library-only embedders pay nothing without it. +# Enable the durable serve layer: cron schedules + serve daemon for +# filesystem-first agents. Library-only embedders pay nothing without it. serve = ["dep:cron"] [dev-dependencies] diff --git a/core/src/agent_api.rs b/core/src/agent_api.rs index cd9fc6c..df87471 100644 --- a/core/src/agent_api.rs +++ b/core/src/agent_api.rs @@ -414,6 +414,13 @@ impl Agent { /// /// The `options` must include a `session_store` (or `with_file_session_store`) /// that contains the saved session. + /// + /// The resumed session uses the **workspace stored in the snapshot**, not a + /// workspace from `options`. The store is therefore a trust boundary: its + /// contents drive the resumed workspace and the persisted runtime policies. + /// + /// Runtime: this loads the snapshot via `block_in_place`, so it must be called + /// on a multi-threaded Tokio runtime (it panics on a current-thread runtime). pub fn resume_session( &self, session_id: &str, @@ -1311,6 +1318,15 @@ impl AgentSession { .await } + /// The session's tool executor, for installing agent-dir `tools/` entries + /// (e.g. a `kind = "script"` tool) into the live registry. Internal seam used + /// by [`serve::install_agent_dir_tools`](crate::serve::install_agent_dir_tools) + /// (the only caller, hence the `serve` gate). + #[cfg(feature = "serve")] + pub(crate) fn tool_executor(&self) -> &Arc { + &self.tool_executor + } + /// Remove an MCP server from this session. /// /// Disconnects the server and unregisters all its tools from the executor. diff --git a/core/src/config/agent_dir.rs b/core/src/config/agent_dir.rs index 765a8c5..9e2af15 100644 --- a/core/src/config/agent_dir.rs +++ b/core/src/config/agent_dir.rs @@ -11,10 +11,9 @@ //! ├── agent.acl (optional) model/providers/queue (CodeConfig). Default if absent. //! ├── skills/ (optional) *.md skills, appended to CodeConfig.skill_dirs. //! ├── schedules/ (optional) *.md cron jobs (YAML frontmatter `cron:` + body=prompt). -//! ├── channels/ (optional) *.{md,acl} inbound adapters — parsed here, served later. -//! └── tools/ (optional) *.md tool specs (`kind: mcp`) → MCP servers -//! │ registered into the session (sandboxed-script -//! │ `kind` is the next increment). +//! └── tools/ (optional) *.md tool specs: `kind: mcp` → MCP server, +//! │ `kind: script` → sandboxed QuickJS tool. Both +//! │ register into the session as ordinary tools. //! ``` //! //! [`AgentDir::load`] SYNTHESIZES existing config objects rather than adding a new @@ -42,21 +41,6 @@ pub struct ScheduleSpec { pub enabled: bool, } -/// An inbound channel adapter spec, parsed from `channels/.{md,acl}`. -/// -/// Parsed so the directory convention is complete; the serve layer does not yet -/// implement adapters (channels are design-only for now). `frontmatter` carries -/// the raw adapter options for whichever adapter eventually handles `kind`. -#[derive(Debug, Clone, PartialEq)] -pub struct ChannelSpec { - /// Channel name (frontmatter `name`, else the file stem). - pub name: String, - /// Adapter kind: `http`, `slack`, `discord`, … - pub kind: String, - /// Raw frontmatter (YAML) for the adapter to interpret. - pub frontmatter: String, -} - /// A tool definition parsed from `tools/.md`, dispatched by `kind`. /// /// Tool *definition* may come from the directory, but visibility and safety stay @@ -69,6 +53,10 @@ pub enum ToolSpec { /// `kind = "mcp"` → an MCP server connected into the session, contributing its /// `list_tools()` as `mcp____*` tools. Mcp(McpServerConfig), + /// `kind = "script"` → a sandboxed QuickJS tool over the existing `program` + /// path. The model sees a named tool; the script `path`, allow-list, and + /// limits are pinned by the spec. + Script(ScriptToolSpec), } impl ToolSpec { @@ -76,31 +64,71 @@ impl ToolSpec { pub fn name(&self) -> &str { match self { ToolSpec::Mcp(cfg) => &cfg.name, + ToolSpec::Script(spec) => &spec.name, } } - /// The spec kind discriminant (currently only `mcp`). + /// The spec kind discriminant (`mcp` or `script`). pub fn kind(&self) -> &str { match self { ToolSpec::Mcp(_) => "mcp", + ToolSpec::Script(_) => "script", } } } +/// A sandboxed QuickJS tool parsed from a `kind = "script"` file. Names a +/// workspace-relative `.js`/`.mjs` source and pins the sandbox allow-list + +/// limits; the model supplies only `inputs`. Executed via the existing `program` +/// tool path — no new sandbox. The model's call to it is permission-gated like any +/// tool; the script's inner `ctx.tool` calls are bounded by `allowed_tools` + the +/// sandbox (NOT the session permission policy), so the allow-list is the boundary. +#[derive(Debug, Clone)] +pub struct ScriptToolSpec { + /// Model-visible tool name (registry key; unique within `tools/`). + pub name: String, + /// Model-facing description (frontmatter `description`, else the file body). + pub description: String, + /// Workspace-relative path to the `.js`/`.mjs` source. + pub path: PathBuf, + /// Tools the script may call through `ctx`. The agent-dir loader fails closed: + /// an omitted list becomes `Some(vec![])` (the script may call NO tools), so a + /// directory author must opt each tool in explicitly. `program` is always + /// excluded (no script-launches-script). This allow-list — not the session + /// permission policy — is what bounds a script's inner `ctx.tool` calls, so it + /// is the security boundary for directory-authored scripts. + pub allowed_tools: Option>, + /// Sandbox limits (timeout / tool-call / output caps); defaults apply when unset. + pub limits: ScriptToolLimits, +} + +/// Sandbox limits for a `kind = "script"` tool. Mirrors the three numeric fields +/// the `program` tool's `ScriptLimits` accepts and is serialized to it verbatim +/// (camelCase keys), so no new limit machinery is introduced. +#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ScriptToolLimits { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub timeout_ms: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub max_tool_calls: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub max_output_bytes: Option, +} + /// A loaded agent directory: synthesized [`CodeConfig`] + prompt slots + parsed -/// schedule/channel specs. Build a session from `config` + `prompt_slots`. +/// schedule + tool specs. Build a session from `config` + `prompt_slots`. /// /// Distinct from [`CodeConfig::agent_dirs`](crate::config::CodeConfig) / /// `register_agent_dir`, which scan a directory for **worker/subagent** /// definitions. An `AgentDir` is the eve-style *primary* agent — the directory -/// that defines this agent's prompt, skills, schedules, and channels. +/// that defines this agent's prompt, skills, schedules, and tools. #[derive(Debug, Clone)] pub struct AgentDir { pub dir: PathBuf, pub config: CodeConfig, pub prompt_slots: SystemPromptSlots, pub schedules: Vec, - pub channels: Vec, pub tools: Vec, } @@ -143,7 +171,6 @@ impl AgentDir { } let schedules = load_schedules(&dir.join("schedules"))?; - let channels = load_channels(&dir.join("channels"))?; let tools = load_tools(&dir.join("tools"))?; Ok(Self { @@ -151,7 +178,6 @@ impl AgentDir { config, prompt_slots, schedules, - channels, tools, }) } @@ -202,28 +228,47 @@ fn load_schedules(dir: &Path) -> Result> { Ok(out) } -fn load_channels(dir: &Path) -> Result> { - let mut out = Vec::new(); - for path in md_files(dir, &["md", "acl"])? { - let content = std::fs::read_to_string(&path) - .map_err(|e| CodeError::Context(format!("read {}: {e}", path.display())))?; - let (front, _body) = split_frontmatter(&content); - let front = front.ok_or_else(|| { - CodeError::Context(format!( - "channel {} has no frontmatter (need `kind:`)", - path.display() - )) - })?; - let meta: ChannelFront = serde_yaml::from_str(&front).map_err(|e| { - CodeError::Context(format!("channel {} frontmatter: {e}", path.display())) - })?; - out.push(ChannelSpec { - name: meta.name.unwrap_or_else(|| file_stem(&path)), - kind: meta.kind, - frontmatter: front, - }); +/// Upper bounds for a `kind = "script"` tool's sandbox limits. A `tools/` file is +/// semi-trusted (the whole point of the guardrail), so an author cannot set an +/// effectively-unbounded `timeoutMs` that hangs the harness, nor a zero that makes +/// the tool silently non-functional. Generous ceilings; the program tool's own +/// defaults (30s / 20 calls / 64 KiB) apply when a field is unset. +const SCRIPT_MAX_TIMEOUT_MS: u64 = 600_000; // 10 minutes +const SCRIPT_MAX_TOOL_CALLS: usize = 1_000; +const SCRIPT_MAX_OUTPUT_BYTES: usize = 16 * 1024 * 1024; // 16 MiB + +/// Reject zero or above-ceiling limits at load (fail closed). Unset fields keep +/// the program tool's defaults. +fn validate_script_limits( + limits: ScriptToolLimits, +) -> std::result::Result { + fn check( + v: Option, + max: T, + one: T, + field: &str, + ) -> std::result::Result<(), String> { + if let Some(v) = v { + if v < one || v > max { + return Err(format!("limit {field}={v} is out of range [1, {max}]")); + } + } + Ok(()) } - Ok(out) + check(limits.timeout_ms, SCRIPT_MAX_TIMEOUT_MS, 1, "timeoutMs")?; + check( + limits.max_tool_calls, + SCRIPT_MAX_TOOL_CALLS, + 1, + "maxToolCalls", + )?; + check( + limits.max_output_bytes, + SCRIPT_MAX_OUTPUT_BYTES, + 1, + "maxOutputBytes", + )?; + Ok(limits) } fn load_tools(dir: &Path) -> Result> { @@ -232,7 +277,7 @@ fn load_tools(dir: &Path) -> Result> { for path in md_files(dir, &["md"])? { let content = std::fs::read_to_string(&path) .map_err(|e| CodeError::Context(format!("read {}: {e}", path.display())))?; - let (front, _body) = split_frontmatter(&content); + let (front, body) = split_frontmatter(&content); let front = front.ok_or_else(|| { CodeError::Context(format!( "tool {} has no YAML frontmatter (need `kind:`)", @@ -254,9 +299,52 @@ fn load_tools(dir: &Path) -> Result> { })?; ToolSpec::Mcp(cfg) } + "script" => { + let meta: ScriptFront = serde_yaml::from_str(&front).map_err(|e| { + CodeError::Context(format!( + "tool {} (kind=script) frontmatter: {e}", + path.display() + )) + })?; + // Fail closed at load (not at first call), consistent with the + // runtime guards the script runs under: a non-JS source, a path + // that escapes the workspace, or an out-of-range sandbox limit are + // all directory-load errors rather than first-call surprises. + let p = meta.path.to_string_lossy(); + if !(p.ends_with(".js") || p.ends_with(".mjs")) { + return Err(CodeError::Context(format!( + "tool {} (kind=script) path `{p}` must point to a .js or .mjs file", + path.display() + ))); + } + crate::workspace::validate_relative_pattern(&p, "script path").map_err(|e| { + CodeError::Context(format!("tool {} (kind=script): {e}", path.display())) + })?; + let limits = + validate_script_limits(meta.limits.unwrap_or_default()).map_err(|e| { + CodeError::Context(format!("tool {} (kind=script): {e}", path.display())) + })?; + let description = meta + .description + .map(|d| d.trim().to_string()) + .filter(|d| !d.is_empty()) + .unwrap_or_else(|| body.trim().to_string()); + ToolSpec::Script(ScriptToolSpec { + name: meta.name.unwrap_or_else(|| file_stem(&path)), + description, + path: meta.path, + // Fail closed: a directory-authored script is semi-trusted and + // its inner `ctx.tool` calls are NOT re-checked by the session + // permission policy (only by this allow-list + the sandbox), so + // an omitted list grants NO tools rather than all of them. The + // author must opt each tool in explicitly. + allowed_tools: Some(meta.allowed_tools.unwrap_or_default()), + limits, + }) + } other => { return Err(CodeError::Context(format!( - "tool {} has unsupported kind `{other}` (supported: `mcp`)", + "tool {} has unsupported kind `{other}` (supported: `mcp`, `script`)", path.display() ))); } @@ -310,15 +398,23 @@ struct ScheduleFront { } #[derive(serde::Deserialize)] -struct ChannelFront { +struct ToolFront { kind: String, - #[serde(default)] - name: Option, } +/// Frontmatter for a `kind = "script"` tool. The `kind` key is ignored here +/// (already matched); unknown keys are tolerated like the other loaders. #[derive(serde::Deserialize)] -struct ToolFront { - kind: String, +struct ScriptFront { + #[serde(default)] + name: Option, + path: PathBuf, + #[serde(default)] + description: Option, + #[serde(default)] + allowed_tools: Option>, + #[serde(default)] + limits: Option, } #[cfg(test)] @@ -331,7 +427,6 @@ mod tests { let _ = std::fs::remove_dir_all(&base); std::fs::create_dir_all(base.join("skills")).unwrap(); std::fs::create_dir_all(base.join("schedules")).unwrap(); - std::fs::create_dir_all(base.join("channels")).unwrap(); std::fs::create_dir_all(base.join("tools")).unwrap(); std::fs::write( base.join("instructions.md"), @@ -349,13 +444,13 @@ mod tests { ) .unwrap(); std::fs::write( - base.join("channels/web.md"), - "---\nkind: http\nport: 8787\n---\nInbound HTTP channel.\n", + base.join("tools/github.md"), + "---\nkind: mcp\nname: github\ntransport: stdio\ncommand: echo\nargs: [\"hi\"]\n---\nGitHub MCP tools.\n", ) .unwrap(); std::fs::write( - base.join("tools/github.md"), - "---\nkind: mcp\nname: github\ntransport: stdio\ncommand: echo\nargs: [\"hi\"]\n---\nGitHub MCP tools.\n", + base.join("tools/search.md"), + "---\nkind: script\nname: search-auth\npath: scripts/search.js\nallowed_tools: [grep, read]\nlimits:\n timeoutMs: 30000\n maxToolCalls: 10\n---\nFind auth-related files.\n", ) .unwrap(); base @@ -387,18 +482,84 @@ mod tests { assert_eq!(s.prompt, "Generate the daily report and post it."); assert!(s.enabled); - // channels/*.md → parsed kind (adapters not yet implemented). - assert_eq!(agent.channels.len(), 1); - assert_eq!(agent.channels[0].kind, "http"); - - // tools/*.md (kind=mcp) → parsed MCP server spec. - assert_eq!(agent.tools.len(), 1); + // tools/*.md → parsed by kind (sorted by path: github.md, then search.md). + assert_eq!(agent.tools.len(), 2); assert_eq!(agent.tools[0].kind(), "mcp"); assert_eq!(agent.tools[0].name(), "github"); + // kind=script → ScriptToolSpec with pinned path, allow-list, limits; the + // body becomes the model-facing description. + assert_eq!(agent.tools[1].kind(), "script"); + assert_eq!(agent.tools[1].name(), "search-auth"); + let ToolSpec::Script(s) = &agent.tools[1] else { + panic!("expected a script tool"); + }; + assert_eq!(s.path, PathBuf::from("scripts/search.js")); + assert_eq!(s.description, "Find auth-related files."); + assert_eq!( + s.allowed_tools.as_deref(), + Some(["grep".to_string(), "read".to_string()].as_slice()) + ); + assert_eq!(s.limits.timeout_ms, Some(30000)); + assert_eq!(s.limits.max_tool_calls, Some(10)); + let _ = std::fs::remove_dir_all(&dir); } + /// One script tool per file, written under a unique temp dir, must fail to load. + fn assert_script_tool_load_err(tag: &str, frontmatter: &str) { + let base = std::env::temp_dir().join(format!("a3s-agentdir-{tag}-{}", std::process::id())); + let _ = std::fs::remove_dir_all(&base); + std::fs::create_dir_all(base.join("tools")).unwrap(); + std::fs::write(base.join("instructions.md"), "role").unwrap(); + std::fs::write(base.join("tools/x.md"), frontmatter).unwrap(); + assert!( + AgentDir::load(&base).is_err(), + "expected load error for: {frontmatter}" + ); + let _ = std::fs::remove_dir_all(&base); + } + + #[test] + fn script_tool_non_js_path_is_an_error() { + // path must end .js/.mjs — fail closed at load, not at first call. + assert_script_tool_load_err( + "py", + "---\nkind: script\nname: x\npath: scripts/run.py\n---\n", + ); + } + + #[test] + fn script_tool_escaping_path_is_an_error() { + // Absolute and parent-traversal paths are rejected at load (fail closed), + // matching the runtime workspace boundary. + assert_script_tool_load_err( + "abs", + "---\nkind: script\nname: x\npath: /etc/evil.js\n---\n", + ); + assert_script_tool_load_err( + "dotdot", + "---\nkind: script\nname: x\npath: ../../escape.js\n---\n", + ); + } + + #[test] + fn script_tool_out_of_range_limits_are_an_error() { + // Zero disables the tool; u64::MAX disables the sandbox timeout. Both rejected. + assert_script_tool_load_err( + "zero", + "---\nkind: script\nname: x\npath: a.js\nlimits:\n timeoutMs: 0\n---\n", + ); + assert_script_tool_load_err( + "huge", + "---\nkind: script\nname: x\npath: a.js\nlimits:\n timeoutMs: 18446744073709551615\n---\n", + ); + assert_script_tool_load_err( + "calls", + "---\nkind: script\nname: x\npath: a.js\nlimits:\n maxToolCalls: 0\n---\n", + ); + } + #[test] fn unknown_tool_kind_is_an_error() { let base = @@ -425,6 +586,59 @@ mod tests { let _ = std::fs::remove_dir_all(&base); } + #[test] + fn script_tool_accepts_mjs_and_frontmatter_description_wins_over_body() { + let base = std::env::temp_dir().join(format!("a3s-agentdir-mjs-{}", std::process::id())); + let _ = std::fs::remove_dir_all(&base); + std::fs::create_dir_all(base.join("tools")).unwrap(); + std::fs::write(base.join("instructions.md"), "role").unwrap(); + std::fs::write( + base.join("tools/x.md"), + "---\nkind: script\nname: x\npath: a.mjs\ndescription: from frontmatter\n---\nbody description\n", + ) + .unwrap(); + + let agent = AgentDir::load(&base).unwrap(); + let ToolSpec::Script(s) = &agent.tools[0] else { + panic!("expected script tool"); + }; + assert_eq!(s.path, PathBuf::from("a.mjs"), ".mjs is accepted"); + assert_eq!( + s.description, "from frontmatter", + "frontmatter description takes precedence over the body" + ); + let _ = std::fs::remove_dir_all(&base); + } + + #[test] + fn script_tool_omitted_allow_list_fails_closed_to_empty() { + // A directory script with no `allowed_tools` must default to an EMPTY + // allow-list (no tools), not "all tools" — its inner ctx.tool calls are + // not re-checked by the session permission policy, so the allow-list is + // the boundary and an omission must grant nothing. + let base = + std::env::temp_dir().join(format!("a3s-agentdir-noallow-{}", std::process::id())); + let _ = std::fs::remove_dir_all(&base); + std::fs::create_dir_all(base.join("tools")).unwrap(); + std::fs::write(base.join("instructions.md"), "role").unwrap(); + std::fs::write( + base.join("tools/x.md"), + "---\nkind: script\nname: x\npath: a.js\n---\n", + ) + .unwrap(); + + let agent = AgentDir::load(&base).unwrap(); + let ToolSpec::Script(s) = &agent.tools[0] else { + panic!("expected script tool"); + }; + assert_eq!( + s.allowed_tools.as_deref(), + Some([].as_slice()), + "omitted allowed_tools must fail closed to an empty list, not None/all" + ); + let _ = std::fs::remove_dir_all(&base); + } + #[test] fn missing_instructions_is_an_error() { let base = std::env::temp_dir().join(format!("a3s-agentdir-empty-{}", std::process::id())); diff --git a/core/src/config/mod.rs b/core/src/config/mod.rs index a5e681c..1043870 100644 --- a/core/src/config/mod.rs +++ b/core/src/config/mod.rs @@ -17,7 +17,7 @@ mod search; #[cfg(test)] mod tests; -pub use agent_dir::{AgentDir, ChannelSpec, ScheduleSpec, ToolSpec}; +pub use agent_dir::{AgentDir, ScheduleSpec, ScriptToolLimits, ScriptToolSpec, ToolSpec}; pub use provider::{ModelConfig, ModelCost, ModelLimit, ModelModalities, ProviderConfig}; pub use search::{ BrowserBackend, DocumentCacheConfig, DocumentOcrConfig, DocumentParserConfig, HeadlessConfig, diff --git a/core/src/serve/daemon.rs b/core/src/serve/daemon.rs index 5c812bb..39f176a 100644 --- a/core/src/serve/daemon.rs +++ b/core/src/serve/daemon.rs @@ -3,12 +3,25 @@ //! //! Each schedule fires on its OWN session (stable id `schedule:`), so a //! schedule's repeated fires accumulate context/memory while distinct schedules -//! stay isolated. The agent dir's `instructions.md` (prompt slots) and `skills/` -//! (`skill_dirs`) are injected into every schedule session via [`SessionOptions`]. +//! stay isolated. The agent dir's `instructions.md` (prompt slots), `skills/` +//! (`skill_dirs`), and `tools/` (MCP servers and sandboxed scripts) are injected +//! into every schedule session via [`SessionOptions`] / `install_agent_dir_tools`. //! -//! Channels and full multi-session rehydration attach here next; the design keeps -//! every triggered run a FULL harness turn (`AgentSession::send`), never a raw -//! model call. +//! Rehydrate-on-boot: when a [`SessionStore`](crate::store::SessionStore) is +//! configured, a schedule whose `schedule:` session already exists in the +//! store is RESUMED (its conversation history is restored), so a daemon restart +//! keeps the accumulated context instead of starting cold. The current +//! `instructions.md`/`skills/`/`tools/` still win on resume (resume restores +//! history, not the prompt), so editing the agent dir takes effect on the next +//! boot. With no store configured, every boot starts a fresh session — same as +//! before. Two caveats: rehydrate restores conversation context, NOT missed +//! fires — a schedule that would have fired while the daemon was down is not +//! caught up; and resume loads the stored session via `block_in_place`, so the +//! daemon must run on a multi-threaded Tokio runtime (the default `#[tokio::main]` +//! and both bundled SDK runtimes are multi-threaded). +//! +//! Every triggered run is a FULL harness turn (`AgentSession::send`) — context, +//! tool visibility, safety gate, verification — never a raw model call. use std::collections::HashMap; use std::sync::Arc; @@ -44,11 +57,11 @@ impl ScheduleSink for SessionScheduleSink { /// Serve an agent directory's schedules until `cancel` fires. /// -/// Builds one session per enabled schedule (stable id `schedule:`), -/// injecting the agent dir's `prompt_slots` and `skill_dirs`. `extra` merges into -/// every schedule session's [`SessionOptions`] (model, `llm_client`, -/// `session_store`, …) — `prompt_slots`/`session_id` set there are NOT overridden, -/// so a host can pin them per schedule if it wants. +/// Builds one durable session per enabled schedule (`schedule:`), injecting +/// the agent dir's `prompt_slots`, `skill_dirs`, and `tools/`. `extra` merges into +/// each schedule session's [`SessionOptions`] (model, `llm_client`, +/// `session_store`, …); a host-set `prompt_slots` is honored, but `session_id` is +/// always derived per schedule. pub async fn serve_agent_dir( agent: &Agent, agent_dir: &AgentDir, @@ -57,23 +70,17 @@ pub async fn serve_agent_dir( cancel: CancellationToken, ) -> Result<()> { let extra = extra.unwrap_or_default(); - let mut sessions = HashMap::new(); + let mut sessions = HashMap::new(); for spec in agent_dir.schedules.iter().filter(|s| s.enabled) { - let mut opts = extra.clone(); - if opts.prompt_slots.is_none() { - opts.prompt_slots = Some(agent_dir.prompt_slots.clone()); - } - opts.skill_dirs - .extend(agent_dir.config.skill_dirs.iter().cloned()); - if opts.session_id.is_none() { - opts.session_id = Some(format!("schedule:{}", spec.name)); - } - let session = agent.session(workspace.clone(), Some(opts))?; - // Install the agent dir's tools/ (e.g. MCP servers) into each schedule - // session, so a scheduled turn can call them. Connection is fallible and - // surfaces here (fail at startup, not at first call). - super::tools::install_agent_dir_tools(&session, &agent_dir.tools).await?; + let session = build_session( + agent, + agent_dir, + format!("schedule:{}", spec.name), + workspace.clone(), + &extra, + ) + .await?; sessions.insert(spec.name.clone(), Arc::new(session)); } @@ -83,6 +90,54 @@ pub async fn serve_agent_dir( Ok(()) } +/// Build one durable serve session under the explicit `session_id` +/// (`schedule:`), injecting the agent dir's prompt slots, skills, and +/// `tools/`. +/// +/// Rehydrate-on-boot: if a `SessionStore` is configured (via `extra`) and already +/// holds `session_id`, RESUME it so prior context is restored; otherwise create a +/// fresh session. Resume re-applies the freshly loaded prompt slots / skills / +/// tools (it restores history, not the prompt) and uses the stored workspace; a +/// fresh session uses `workspace`. +/// +/// The caller owns the id; a host-set `extra.session_id` is intentionally ignored +/// (it would collide every schedule onto one shared, store-clobbering id). +async fn build_session( + agent: &Agent, + agent_dir: &AgentDir, + session_id: String, + workspace: impl Into, + extra: &SessionOptions, +) -> Result { + let mut opts = extra.clone(); + if opts.prompt_slots.is_none() { + opts.prompt_slots = Some(agent_dir.prompt_slots.clone()); + } + opts.skill_dirs + .extend(agent_dir.config.skill_dirs.iter().cloned()); + opts.session_id = Some(session_id.clone()); + + // Resume only when the store already has this session; the borrow of + // `opts.session_store` is released before `opts` is moved into the chosen + // builder. `exists` returns the crate Result, so `?` propagates cleanly. + let resume = match &opts.session_store { + Some(store) => store.exists(&session_id).await?, + None => false, + }; + let session = if resume { + agent.resume_session(&session_id, opts)? + } else { + agent.session(workspace, Some(opts))? + }; + + // Install the agent dir's tools/ (MCP servers + sandboxed scripts) into the + // session, so a triggered turn can call them. Connection is fallible and + // surfaces here (fail at startup, not at first call). Done for both fresh and + // resumed sessions — tools are not persisted, they are re-installed each boot. + super::tools::install_agent_dir_tools(&session, &agent_dir.tools).await?; + Ok(session) +} + #[cfg(test)] mod tests { use super::*; @@ -109,7 +164,6 @@ providers "anthropic" { ..Default::default() }, schedules, - channels: vec![], tools: vec![], } } @@ -143,4 +197,103 @@ providers "anthropic" { .await .unwrap(); } + + fn tick_dir() -> AgentDir { + agent_dir_with(vec![ScheduleSpec { + name: "tick".to_string(), + cron: "0 9 * * *".to_string(), + prompt: "do the scheduled work".to_string(), + enabled: true, + }]) + } + + /// Rehydrate-on-boot: a schedule whose session already exists in the store is + /// RESUMED, so a daemon restart keeps the accumulated conversation history. + /// (multi_thread: `resume_session` loads via `block_in_place`.) + #[tokio::test(flavor = "multi_thread")] + async fn rehydrates_existing_schedule_session_from_store() { + use crate::llm::Message; + let store: Arc = + Arc::new(crate::store::MemorySessionStore::new()); + let agent = Agent::from_config(test_agent_config()).await.unwrap(); + + // Simulate a prior daemon run: persist the schedule's session, then inject + // a message into the stored snapshot (seeds via public API only — the + // history field is private to agent_api). + let seed_opts = SessionOptions::new() + .with_session_store(store.clone()) + .with_session_id("schedule:tick"); + agent + .session("/tmp/ws", Some(seed_opts)) + .unwrap() + .save() + .await + .unwrap(); + let mut data = store.load("schedule:tick").await.unwrap().unwrap(); + data.messages = vec![Message::user("prior turn")]; + store.save(&data).await.unwrap(); + + // A fresh daemon boot must RESUME this schedule, not start empty. + let dir = tick_dir(); + let extra = SessionOptions::new().with_session_store(store.clone()); + let session = build_session( + &agent, + &dir, + "schedule:tick".to_string(), + "/tmp/ws".to_string(), + &extra, + ) + .await + .unwrap(); + + assert_eq!(session.session_id(), "schedule:tick"); + let history = session.history(); + assert_eq!(history.len(), 1, "prior context must rehydrate on boot"); + assert_eq!(history[0].text(), "prior turn"); + } + + /// No prior session in the store → a fresh, empty session (the resume branch + /// is not taken). Also covers the no-store case implicitly (fresh either way). + #[tokio::test(flavor = "multi_thread")] + async fn fresh_schedule_session_when_store_has_no_prior() { + let store: Arc = + Arc::new(crate::store::MemorySessionStore::new()); + let agent = Agent::from_config(test_agent_config()).await.unwrap(); + let dir = tick_dir(); + let extra = SessionOptions::new().with_session_store(store.clone()); + let session = build_session( + &agent, + &dir, + "schedule:tick".to_string(), + "/tmp/ws".to_string(), + &extra, + ) + .await + .unwrap(); + assert_eq!(session.session_id(), "schedule:tick"); + assert!( + session.history().is_empty(), + "no prior session → fresh empty history" + ); + } + + /// No store configured at all → fresh session, and the resume path (the only + /// `block_in_place` caller) is never taken, so it works on a current-thread + /// runtime (plain `#[tokio::test]`). + #[tokio::test] + async fn fresh_schedule_session_when_no_store_configured() { + let agent = Agent::from_config(test_agent_config()).await.unwrap(); + let dir = tick_dir(); + let session = build_session( + &agent, + &dir, + "schedule:tick".to_string(), + "/tmp/ws".to_string(), + &SessionOptions::new(), + ) + .await + .unwrap(); + assert_eq!(session.session_id(), "schedule:tick"); + assert!(session.history().is_empty()); + } } diff --git a/core/src/serve/mod.rs b/core/src/serve/mod.rs index 0530bcf..db79ae7 100644 --- a/core/src/serve/mod.rs +++ b/core/src/serve/mod.rs @@ -1,14 +1,16 @@ //! Durable serve layer for filesystem-first agents. //! //! Builds strictly on top of a3s-code's existing primitives — no new execution -//! machinery. Today it provides cron [`schedule`]s; the serve daemon (a session -//! registry persisted via `SessionStore`, graceful shutdown, rehydrate-on-boot) -//! and inbound channels attach here next. Gated behind the `serve` feature so -//! library-only embedders pay nothing. +//! machinery. It provides cron [`schedule`]s and the serve [`daemon`], which +//! installs the agent dir's `tools/` (MCP + sandboxed scripts) into each schedule +//! session. When a [`SessionStore`](crate::store::SessionStore) is configured, +//! schedule sessions rehydrate from it on boot, so a daemon restart resumes the +//! accumulated context rather than starting cold. Gated behind the `serve` +//! feature so library-only embedders pay nothing. //! -//! Invariant: every schedule/channel-triggered run is a FULL harness turn -//! (context, tool visibility, safety gate, verification) via `AgentSession::send`, -//! never a raw model call. +//! Invariant: every schedule-triggered run is a FULL harness turn (context, tool +//! visibility, safety gate, verification) via `AgentSession::send`, never a raw +//! model call. pub mod daemon; pub mod schedule; diff --git a/core/src/serve/schedule.rs b/core/src/serve/schedule.rs index e6c1e94..293bf29 100644 --- a/core/src/serve/schedule.rs +++ b/core/src/serve/schedule.rs @@ -150,6 +150,20 @@ mod tests { assert!(ScheduledJob::parse(spec("bad", "not a cron", true)).is_err()); } + #[test] + fn parses_6_field_cron_with_seconds() { + // 6-field form is passed through verbatim (the 5-field form gets a leading + // `0` seconds field). "30 0 9 * * *" → 09:00:30 daily. + let job = ScheduledJob::parse(spec("sec", "30 0 9 * * *", true)).unwrap(); + let before = DateTime::parse_from_rfc3339("2026-01-01T08:00:00Z") + .unwrap() + .with_timezone(&Utc); + assert_eq!( + job.next_fire_after(before).unwrap().to_rfc3339(), + "2026-01-01T09:00:30+00:00" + ); + } + #[test] fn scheduler_skips_disabled_and_rejects_bad_cron() { let s = Scheduler::new([ diff --git a/core/src/serve/tools.rs b/core/src/serve/tools.rs index babe89f..a9b5406 100644 --- a/core/src/serve/tools.rs +++ b/core/src/serve/tools.rs @@ -1,15 +1,18 @@ //! Install an agent directory's `tools/` specs into a live session. //! //! Parsing happens in [`AgentDir::load`](crate::config::AgentDir) (always, like -//! `schedules`/`channels`); *installation* is a session-time operation here so it +//! `schedules`); *installation* is a session-time operation here so it //! reuses the same fallible, harness-owned registration the SDK's //! [`add_mcp_server`](crate::AgentSession::add_mcp_server) already exposes — tool //! definition comes from the directory, but visibility and the safety gate stay //! with the harness. +use std::sync::Arc; + use crate::agent_api::AgentSession; use crate::config::ToolSpec; use crate::error::Result; +use crate::tools::AgentDirScriptTool; /// Install each parsed [`ToolSpec`] into `session`. /// @@ -18,12 +21,24 @@ use crate::error::Result; /// session's permission policy like any other tool. Connection is fallible and /// surfaces here (e.g. a missing `command` binary), so a misconfigured tool fails /// at serve startup rather than silently at first call. +/// +/// `script` specs register a sandboxed QuickJS tool ([`AgentDirScriptTool`]) into +/// the session registry via the same non-shadowing `register_dynamic_tool` path +/// builtins/MCP use — it cannot replace a builtin, and the model's call to it is +/// permission-gated like any tool. The script's *inner* `ctx.tool` calls are +/// bounded by the spec's pinned (fail-closed) allow-list and the QuickJS sandbox +/// rather than the session permission policy — see [`AgentDirScriptTool`]. pub async fn install_agent_dir_tools(session: &AgentSession, specs: &[ToolSpec]) -> Result<()> { for spec in specs { match spec { ToolSpec::Mcp(config) => { session.add_mcp_server(config.clone()).await?; } + ToolSpec::Script(script) => { + let registry = Arc::clone(session.tool_executor().registry()); + let tool = Arc::new(AgentDirScriptTool::new(script.clone(), registry)); + session.tool_executor().register_dynamic_tool(tool); + } } } Ok(()) @@ -53,4 +68,71 @@ providers "anthropic" { // Empty specs → no MCP connect attempted, returns Ok without a live server. install_agent_dir_tools(&session, &[]).await.unwrap(); } + + fn script_spec(name: &str, description: &str) -> ToolSpec { + ToolSpec::Script(crate::config::ScriptToolSpec { + name: name.to_string(), + description: description.to_string(), + path: std::path::PathBuf::from("scripts/x.js"), + allowed_tools: None, + limits: crate::config::ScriptToolLimits::default(), + }) + } + + #[tokio::test] + async fn install_registers_script_tool_visibly() { + let agent = Agent::from_config(test_config()).await.unwrap(); + let session = agent.session("/tmp/ws", None).unwrap(); + + install_agent_dir_tools(&session, &[script_spec("repo-search", "search the repo")]) + .await + .unwrap(); + + let registry = session.tool_executor().registry(); + assert!( + registry.contains("repo-search"), + "script tool is registered" + ); + let tool = registry.get("repo-search").unwrap(); + assert_eq!(tool.description(), "search the repo", "it's our tool"); + } + + #[tokio::test] + async fn install_mcp_with_bad_command_fails_at_startup() { + let agent = Agent::from_config(test_config()).await.unwrap(); + let session = agent.session("/tmp/ws", None).unwrap(); + + // A stdio MCP server whose command does not exist must surface at install + // time (fail at startup, not silently at first call). Built via the same + // YAML path the agent-dir loader uses. + let cfg: crate::mcp::McpServerConfig = serde_yaml::from_str( + "name: ghost\ntransport: stdio\ncommand: a3s-nonexistent-mcp-binary-xyz\n", + ) + .unwrap(); + + let result = install_agent_dir_tools(&session, &[ToolSpec::Mcp(cfg)]).await; + assert!( + result.is_err(), + "a missing MCP command must fail install_agent_dir_tools, not be swallowed" + ); + } + + #[tokio::test] + async fn install_script_cannot_shadow_a_builtin() { + let agent = Agent::from_config(test_config()).await.unwrap(); + let session = agent.session("/tmp/ws", None).unwrap(); + let registry = session.tool_executor().registry(); + let builtin_bash_desc = registry.get("bash").unwrap().description().to_string(); + + // A script that tries to take the `bash` name must be rejected, not shadow it. + install_agent_dir_tools(&session, &[script_spec("bash", "HIJACKED")]) + .await + .unwrap(); + + assert_eq!( + registry.get("bash").unwrap().description(), + builtin_bash_desc, + "builtin bash must be unchanged (script registration rejected)" + ); + } } diff --git a/core/src/tools/agent_dir_script_tool.rs b/core/src/tools/agent_dir_script_tool.rs new file mode 100644 index 0000000..9f4436d --- /dev/null +++ b/core/src/tools/agent_dir_script_tool.rs @@ -0,0 +1,270 @@ +//! A `tools/` `kind = "script"` entry exposed as a model-visible tool. +//! +//! Thin facade over the existing [`ProgramTool`] QuickJS path: the script `path`, +//! `allowed_tools`, and sandbox `limits` are pinned by the [`ScriptToolSpec`]; the +//! model only supplies `inputs`. It adds NO new sandbox — execution, the frozen +//! `ctx`, the memory/stack/timeout caps, and the per-call tool-call/output limits +//! are all the program tool's. +//! +//! Safety boundary: the model's call to THIS tool is permission-gated like any +//! tool (the harness owns visibility and the gate). But the script's inner +//! `ctx.tool` calls go through `ToolRegistry::execute_with_context` directly — +//! they are bounded by the pinned `allowed_tools` list, the `maxToolCalls` +//! counter, and the QuickJS sandbox (no fs/net/proc/env), and they run against the +//! session workspace, but they are NOT re-evaluated against the session permission +//! policy / HITL. The allow-list is therefore the boundary for what a script may +//! reach, which is why the agent-dir loader fails it closed (empty by default). + +use std::sync::Arc; + +use anyhow::Result; +use async_trait::async_trait; +use serde_json::json; + +use crate::config::ScriptToolSpec; +use crate::tools::types::{Tool, ToolContext, ToolOutput}; +use crate::tools::{ProgramTool, ToolRegistry}; + +/// A named, pre-parameterized `program` script call. +pub struct AgentDirScriptTool { + spec: ScriptToolSpec, + program: ProgramTool, +} + +impl AgentDirScriptTool { + /// `registry` must be the session's registry so the script's `ctx.tool` + /// calls resolve against the session's actual tools (and the allow-list). + pub fn new(spec: ScriptToolSpec, registry: Arc) -> Self { + Self { + spec, + program: ProgramTool::new(registry), + } + } +} + +#[async_trait] +impl Tool for AgentDirScriptTool { + fn name(&self) -> &str { + &self.spec.name + } + + fn description(&self) -> &str { + &self.spec.description + } + + fn parameters(&self) -> serde_json::Value { + // The model controls only `inputs`; path/allow-list/limits are pinned. + json!({ + "type": "object", + "additionalProperties": false, + "properties": { + "inputs": { + "type": "object", + "description": "JSON inputs passed to the script's async run(ctx, inputs)." + } + }, + "required": [] + }) + } + + async fn execute(&self, args: &serde_json::Value, ctx: &ToolContext) -> Result { + let inputs = args.get("inputs").cloned().unwrap_or_else(|| json!({})); + + // Build exactly the args the `program` tool already accepts, with the + // spec's path/allow-list/limits pinned. `limits` serializes to the + // camelCase keys (timeoutMs/…) the program tool reads. + let mut program_args = json!({ + "type": "script", + "language": "javascript", + "path": self.spec.path.to_string_lossy(), + "limits": self.spec.limits, + "inputs": inputs, + }); + if let Some(allowed) = &self.spec.allowed_tools { + program_args["allowed_tools"] = json!(allowed); + } + + self.program.execute(&program_args, ctx).await + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::ScriptToolLimits; + use crate::tools::types::ToolOutput; + use std::path::PathBuf; + + /// Minimal host tool the script calls through `ctx`. + struct EchoTool; + + #[async_trait] + impl Tool for EchoTool { + fn name(&self) -> &str { + "echo" + } + fn description(&self) -> &str { + "echo" + } + fn parameters(&self) -> serde_json::Value { + json!({ "type": "object" }) + } + async fn execute( + &self, + args: &serde_json::Value, + _ctx: &ToolContext, + ) -> Result { + let msg = args.get("message").and_then(|v| v.as_str()).unwrap_or(""); + Ok(ToolOutput::success(format!("echo:{msg}"))) + } + } + + fn spec(path: &str, allowed: Option>) -> ScriptToolSpec { + ScriptToolSpec { + name: "echo-runner".to_string(), + description: "runs echo".to_string(), + path: PathBuf::from(path), + allowed_tools: allowed, + limits: ScriptToolLimits::default(), + } + } + + /// The spec's pinned `limits` actually reach the sandbox: a script that calls + /// a tool twice under `maxToolCalls: 1` has its second call rejected (proving + /// the wrapper forwards limits rather than silently dropping them). + #[tokio::test] + async fn script_tool_pinned_limits_are_enforced() { + let dir = tempfile::tempdir().unwrap(); + std::fs::write( + dir.path().join("twice.js"), + r#"async function run(ctx) { + await ctx.tool("echo", { message: "1" }); + await ctx.tool("echo", { message: "2" }); + return { ok: true }; + }"#, + ) + .unwrap(); + + let registry = Arc::new(ToolRegistry::new(dir.path().to_path_buf())); + registry.register(Arc::new(EchoTool)); + + let mut s = spec("twice.js", Some(vec!["echo".to_string()])); + s.limits.max_tool_calls = Some(1); + let tool = AgentDirScriptTool::new(s, registry); + + let out = tool + .execute(&json!({}), &ToolContext::new(dir.path().to_path_buf())) + .await + .unwrap(); + + assert!( + out.content.contains("maxToolCalls") || !out.success, + "second tool call must be blocked by the pinned limit: {}", + out.content + ); + } + + /// `inputs` from the caller reach the script's `run(ctx, inputs)` second arg; + /// a missing `inputs` defaults to `{}` (no panic). + #[tokio::test] + async fn script_tool_passes_inputs_and_defaults_when_missing() { + let dir = tempfile::tempdir().unwrap(); + std::fs::write( + dir.path().join("echoback.js"), + r#"async function run(ctx, inputs) { + return { got: inputs && inputs.name ? inputs.name : "DEFAULT" }; + }"#, + ) + .unwrap(); + let registry = Arc::new(ToolRegistry::new(dir.path().to_path_buf())); + let tool = AgentDirScriptTool::new(spec("echoback.js", Some(vec![])), registry); + + let with = tool + .execute( + &json!({ "inputs": { "name": "ada" } }), + &ToolContext::new(dir.path().to_path_buf()), + ) + .await + .unwrap(); + assert!( + with.content.contains("ada"), + "inputs reach the script: {}", + with.content + ); + + let without = tool + .execute(&json!({}), &ToolContext::new(dir.path().to_path_buf())) + .await + .unwrap(); + assert!( + without.content.contains("DEFAULT"), + "missing inputs defaults to {{}}: {}", + without.content + ); + } + + /// The wrapper runs the pinned script through the QuickJS path and the script + /// can call an allowed host tool via `ctx`. + #[tokio::test] + async fn script_tool_runs_pinned_script_and_calls_allowed_tool() { + let dir = tempfile::tempdir().unwrap(); + std::fs::write( + dir.path().join("echo.js"), + r#"async function run(ctx, inputs) { + const r = await ctx.tool("echo", { message: inputs.message }); + return { echoed: r.output }; + }"#, + ) + .unwrap(); + + let registry = Arc::new(ToolRegistry::new(dir.path().to_path_buf())); + registry.register(Arc::new(EchoTool)); + + let tool = + AgentDirScriptTool::new(spec("echo.js", Some(vec!["echo".to_string()])), registry); + assert_eq!(tool.name(), "echo-runner"); + + let out = tool + .execute( + &json!({ "inputs": { "message": "hi" } }), + &ToolContext::new(dir.path().to_path_buf()), + ) + .await + .unwrap(); + + assert!(out.success, "script should succeed: {}", out.content); + assert!(out.content.contains("echo:hi"), "got: {}", out.content); + } + + /// The pinned allow-list is enforced: a tool not in `allowed_tools` is blocked + /// even though it is registered. + #[tokio::test] + async fn script_tool_allow_list_blocks_unlisted_tool() { + let dir = tempfile::tempdir().unwrap(); + std::fs::write( + dir.path().join("echo.js"), + r#"async function run(ctx, inputs) { + const r = await ctx.tool("echo", { message: "x" }); + return { echoed: r.output }; + }"#, + ) + .unwrap(); + + let registry = Arc::new(ToolRegistry::new(dir.path().to_path_buf())); + registry.register(Arc::new(EchoTool)); + + // allowed_tools = [] → echo is NOT permitted. + let tool = AgentDirScriptTool::new(spec("echo.js", Some(vec![])), registry); + let out = tool + .execute(&json!({}), &ToolContext::new(dir.path().to_path_buf())) + .await + .unwrap(); + + // The ctx.tool("echo") call is rejected inside the sandbox; the script + // throws and the program run surfaces a failure (not echo:x). + assert!( + !out.content.contains("echo:x"), + "allow-list must block echo" + ); + } +} diff --git a/core/src/tools/mod.rs b/core/src/tools/mod.rs index 645bdfa..21682d9 100644 --- a/core/src/tools/mod.rs +++ b/core/src/tools/mod.rs @@ -9,6 +9,7 @@ //! └── builtin tools (bash, read, write, edit, grep, glob, ls, patch, web_fetch, web_search) //! ``` +mod agent_dir_script_tool; mod artifacts; pub(crate) mod builtin; pub(crate) mod process; @@ -19,6 +20,7 @@ pub mod skill; pub mod task; mod types; +pub use agent_dir_script_tool::AgentDirScriptTool; pub use artifacts::{ArtifactStore, ArtifactStoreLimits, ToolArtifact}; pub(crate) use builtin::register_skill; pub use builtin::{ diff --git a/core/src/workspace/mod.rs b/core/src/workspace/mod.rs index 5213884..b95d350 100644 --- a/core/src/workspace/mod.rs +++ b/core/src/workspace/mod.rs @@ -926,7 +926,7 @@ fn has_windows_path_prefix(input: &str) -> bool { input.starts_with("\\\\") || input.starts_with("//") } -fn validate_relative_pattern(pattern: &str, label: &str) -> Result<()> { +pub(crate) fn validate_relative_pattern(pattern: &str, label: &str) -> Result<()> { let pattern = pattern.trim(); if pattern.is_empty() { bail!("{label} cannot be empty"); diff --git a/core/tests/test_agent_dir_eve.rs b/core/tests/test_agent_dir_eve.rs new file mode 100644 index 0000000..e6a3d81 --- /dev/null +++ b/core/tests/test_agent_dir_eve.rs @@ -0,0 +1,171 @@ +//! End-to-end integration test for the eve-style filesystem-first agent directory +//! convention: a single on-disk directory with EVERY supported sub-convention +//! (instructions, agent.acl, skills/, schedules/, tools/) loads into a +//! fully-populated [`AgentDir`]. Hermetic — no provider, no network. + +use std::fs; +use std::path::{Path, PathBuf}; + +use a3s_code_core::config::{AgentDir, ToolSpec}; + +/// Write `content` to `dir/rel`, creating parent dirs. +fn write(dir: &Path, rel: &str, content: &str) { + let path = dir.join(rel); + fs::create_dir_all(path.parent().unwrap()).unwrap(); + fs::write(path, content).unwrap(); +} + +/// A realistic agent dir exercising every sub-convention at once. +fn build_full_agent_dir() -> tempfile::TempDir { + let dir = tempfile::tempdir().unwrap(); + let p = dir.path(); + + write(p, "instructions.md", "You are a release agent. Be terse.\n"); + + // agent.acl → CodeConfig override (observable via default_model). + write( + p, + "agent.acl", + r#" +default_model = "anthropic/claude-sonnet-4-20250514" +providers "anthropic" { + api_key = "test-key" + models "claude-sonnet-4-20250514" { name = "Claude Sonnet 4" } +} +"#, + ); + + // skills/ → appended to skill_dirs. + write( + p, + "skills/summarize.md", + "---\nname: summarize\ndescription: summarize text\n---\n# Summarize\n", + ); + + // schedules/ → one enabled (named), one explicitly disabled. + write( + p, + "schedules/daily.md", + "---\ncron: \"0 9 * * *\"\nname: daily-report\n---\nGenerate the daily report.\n", + ); + write( + p, + "schedules/paused.md", + "---\ncron: \"*/5 * * * *\"\nenabled: false\n---\nThis one is paused.\n", + ); + + // tools/ → one mcp, one sandboxed script. + write( + p, + "tools/github.md", + "---\nkind: mcp\nname: github\ntransport: stdio\ncommand: echo\nargs: [\"hi\"]\n---\nGitHub MCP tools.\n", + ); + write( + p, + "tools/search.md", + "---\nkind: script\nname: search-auth\npath: scripts/search.js\nallowed_tools: [grep, read]\nlimits:\n timeoutMs: 20000\n maxToolCalls: 8\n---\nFind auth-related files.\n", + ); + + dir +} + +#[test] +fn full_agent_dir_loads_every_convention() { + let dir = build_full_agent_dir(); + let agent = AgentDir::load(dir.path()).expect("agent dir loads"); + + // instructions.md → role slot (trimmed), NOT a system-prompt override. + assert_eq!( + agent.prompt_slots.role.as_deref(), + Some("You are a release agent. Be terse.") + ); + + // agent.acl → CodeConfig override is applied. + assert_eq!( + agent.config.default_model.as_deref(), + Some("anthropic/claude-sonnet-4-20250514"), + "agent.acl default_model must override the default config" + ); + + // skills/ → appended to skill_dirs. + assert!( + agent + .config + .skill_dirs + .iter() + .any(|d| d.ends_with("skills")), + "skills/ dir must be appended to skill_dirs" + ); + + // schedules/ → both parsed (sorted by path: daily, then paused), enabled flags + // preserved; the serve layer is what skips disabled ones. + assert_eq!(agent.schedules.len(), 2); + let daily = agent + .schedules + .iter() + .find(|s| s.name == "daily-report") + .expect("named schedule"); + assert_eq!(daily.cron, "0 9 * * *"); + assert_eq!(daily.prompt, "Generate the daily report."); + assert!(daily.enabled); + let paused = agent + .schedules + .iter() + .find(|s| s.name == "paused") + .expect("file-stem-named schedule"); + assert!(!paused.enabled, "enabled: false must be honored"); + + // tools/ → one mcp + one script (sorted by path: github, then search). + assert_eq!(agent.tools.len(), 2); + let gh = agent.tools.iter().find(|t| t.name() == "github").unwrap(); + assert_eq!(gh.kind(), "mcp"); + + let search = agent + .tools + .iter() + .find(|t| t.name() == "search-auth") + .expect("script tool"); + assert_eq!(search.kind(), "script"); + let ToolSpec::Script(spec) = search else { + panic!("expected a script tool spec"); + }; + assert_eq!(spec.path, PathBuf::from("scripts/search.js")); + assert_eq!(spec.description, "Find auth-related files."); + assert_eq!( + spec.allowed_tools.as_deref(), + Some(["grep".to_string(), "read".to_string()].as_slice()) + ); + assert_eq!(spec.limits.timeout_ms, Some(20000)); + assert_eq!(spec.limits.max_tool_calls, Some(8)); +} + +/// A minimal dir — only the required `instructions.md`, no optional subdirs — loads +/// with empty spec lists and the default config. +#[test] +fn minimal_agent_dir_loads_with_empty_specs() { + let dir = tempfile::tempdir().unwrap(); + write(dir.path(), "instructions.md", "Minimal agent.\n"); + + let agent = AgentDir::load(dir.path()).expect("minimal dir loads"); + assert_eq!(agent.prompt_slots.role.as_deref(), Some("Minimal agent.")); + assert!(agent.schedules.is_empty()); + assert!(agent.tools.is_empty()); + // No agent.acl → default config (no default_model pinned). + assert!(agent.config.default_model.is_none()); +} + +/// Loading a path that is not a directory, or a dir missing the required +/// instructions.md, is an error (fail closed). +#[test] +fn missing_dir_or_instructions_is_an_error() { + // Not a directory. + assert!(AgentDir::load("/nonexistent/a3s/agent/dir").is_err()); + + // Directory exists but has no instructions.md. + let dir = tempfile::tempdir().unwrap(); + fs::create_dir_all(dir.path().join("skills")).unwrap(); + assert!( + AgentDir::load(dir.path()).is_err(), + "instructions.md is required" + ); +} diff --git a/manual/AGENT_DIR_TOOLS_DESIGN.md b/manual/AGENT_DIR_TOOLS_DESIGN.md index 5b9d960..57ea414 100644 --- a/manual/AGENT_DIR_TOOLS_DESIGN.md +++ b/manual/AGENT_DIR_TOOLS_DESIGN.md @@ -1,6 +1,11 @@ # Agent-Dir `tools/` Mapping — Design Doc -Status: DESIGN ONLY (no implementation). Scope: how the optional `tools/` +Status: IMPLEMENTED. Both backends ship: `kind = "mcp"` and `kind = "script"`. +The loader (`load_tools` in `core/src/config/agent_dir.rs`) parses both into +`ToolSpec::{Mcp, Script}`; `serve::install_agent_dir_tools` registers them at +session build — MCP via `add_mcp_server`, script via the new +`AgentDirScriptTool` (`core/src/tools/agent_dir_script_tool.rs`), a thin facade +over the existing `program` QuickJS path. Scope: how the optional `tools/` subdirectory of an eve-style agent directory becomes *executable* tools in A3S Code without ever running arbitrary host JavaScript or arbitrary host processes. @@ -10,9 +15,9 @@ processes. > NEVER turned into a free-running host JS/native process, and it NEVER gets to > define its own tool-visibility or safety policy. Tool *definition* is allowed > from the directory; tool *visibility* and *safety* remain harness-owned. This -> is the deliberate divergence from eve's user-defined-tools model already -> documented in `core/src/config/agent_dir.rs` (the module header) and is the -> reason `tools/` is currently only a reserved placeholder (`agent_dir.rs:15`). +> is the deliberate divergence from eve's user-defined-tools model, documented in +> the `core/src/config/agent_dir.rs` module header, and is why the directory +> selects between two harness-owned backends rather than running arbitrary code. --- @@ -28,13 +33,12 @@ agent/ ├── agent.acl (optional) → CodeConfig ├── skills/ (optional) → CodeConfig.skill_dirs ├── schedules/ (optional) → Vec (serve layer) -├── channels/ (optional) → Vec (design-only) -└── tools/ (optional) → reserved (THIS DOC) +└── tools/ (optional) → Vec (load_tools, THIS DOC) ``` -`tools/` is parsed by NOTHING today — line 15 of `agent_dir.rs` is a comment -only, and `AgentDir::load` does not walk the subdirectory. Two runtime seams -already exist that this design reuses verbatim; we are wiring, not inventing: +`tools/` is parsed by `load_tools` into `AgentDir::tools` and installed at session +build time. Two runtime seams already exist that this design reuses verbatim; we +wired, we did not invent: 1. MCP registration — `AgentSession::add_mcp_server(McpServerConfig)` (`core/src/agent_api.rs:1305`) → `SessionExtensionRuntime::add_mcp_server` @@ -61,20 +65,15 @@ backends to instantiate and with what bounded parameters. Each entry is a single declarative file. We do NOT use a `.js` file as the manifest (a bare `.js` is ambiguous — is it the program source or a config?), -and we do NOT execute the manifest. Two accepted forms, chosen to match the rest -of the convention and the existing config loaders: +and we do NOT execute the manifest. The accepted form (as shipped) is: -- `tools/.acl` — HCL/ACL (preferred per repo Code Style: "Prefer HCL over - TOML"). Parsed with the same path `CodeConfig::from_file` already uses for - `agent.acl`. - `tools/.md` — YAML frontmatter + body, identical mechanics to - `schedules/*.md` and `channels/*.{md,acl}`. Reuses `split_frontmatter` - (`agent_dir.rs:202`) and `md_files` (`agent_dir.rs:126`). The body, when - present, is treated as the tool *description* surfaced to the model. + `schedules/*.md`. Reuses `split_frontmatter` and `md_files` (so only `*.md` + files are read). The body, when present, is treated as the tool *description* + surfaced to the model. -A `kind:` discriminant selects the backend. This mirrors `ChannelFront.kind` -exactly (`agent_dir.rs:230`), so there is one established pattern for "a spec -file that names which adapter handles it." +A `kind:` discriminant selects the backend — the same "a spec file names which +adapter handles it" pattern the schedule loader uses for its frontmatter. ```text kind = "mcp" → MCP server connection (backend 1) @@ -92,19 +91,23 @@ The spec is a 1:1 surface over the *already-deserializable* hand-written `Deserialize` accepting the flat ACL form (`transport = "stdio" | "http" | "streamable-http"`, plus `command`/`args` or `url`/`headers`), `env`, `oauth`, and `tool_timeout_secs`. So the loader does -*no new parsing*: it deserializes the file body into `McpServerConfig`. - -```hcl -# tools/github.acl -kind = "mcp" -name = "github" # used for the mcp__github__* prefix -transport = "stdio" -command = "npx" -args = ["-y", "@modelcontextprotocol/server-github"] -enabled = true +*no new parsing*: it deserializes the file's YAML frontmatter into +`McpServerConfig`. + +```md +--- +# tools/github.md +kind: mcp +name: github # used for the mcp__github__* prefix +transport: stdio +command: npx +args: ["-y", "@modelcontextprotocol/server-github"] +enabled: true # secrets come from the process env, not the file (see Security) -env = { GITHUB_TOKEN = "${env:GITHUB_TOKEN}" } -tool_timeout_secs = 60 +env: { GITHUB_TOKEN: "${env:GITHUB_TOKEN}" } +tool_timeout_secs: 60 +--- +GitHub issues and PR tools. (← optional body = model-facing description) ``` ### 2.3 `kind = "script"` spec → bounded `program` invocation @@ -133,8 +136,8 @@ The source must define `async function run(ctx, inputs)` — enforced by ### 2.4 Loader output (parse, don't wire, in `AgentDir::load`) -Following the `schedules`/`channels` precedent, `AgentDir::load` only *parses* -`tools/` into typed specs and stores them on `AgentDir`. Actual registration is +Following the `schedules` precedent, `AgentDir::load` only *parses* `tools/` into +typed specs and stores them on `AgentDir`. Actual registration is done at session build time (§3), exactly as `add_mcp_server` is a session-level operation today. Proposed shape (names illustrative, not prescriptive): @@ -154,7 +157,7 @@ pub struct ScriptToolSpec { ``` Parsing rules (fail closed, mirroring existing loaders): -- No frontmatter / missing `kind` → error, like `schedules`/`channels`. +- No frontmatter / missing `kind` → error, like `schedules`. - `kind = "mcp"` with a malformed `McpServerConfig` → propagate the serde error. - `kind = "script"` whose `path` does not end `.js`/`.mjs` → error at load (do not defer to first call). @@ -236,20 +239,25 @@ chokepoint is why "tool definition from the dir" cannot smuggle past 5. Backend-internal guards remain: - `script`: the QuickJS VM has no fs/net/proc/env; the *only* outbound capability is `ctx.tool(...)`, and `execute_host_tool_json` - (`program_tool.rs:436`) re-checks the per-script `allowed_tools` set and - the `maxToolCalls` counter on every hop. Crucially, those inner `ctx` calls - re-enter `registry.execute_with_context`, so a script calling `bash` is - still a `bash` execution subject to the same boundary checks. + (`program_tool.rs:436`) enforces the per-script `allowed_tools` set and the + `maxToolCalls` counter on every hop. Note the boundary precisely: those inner + `ctx` calls go through `ToolRegistry::execute_with_context` directly — they + are NOT re-evaluated against the session `PermissionChecker`/HITL (that gate + runs in the agent loop for the model-selected `program`/script call, not for + the script's internal hops). So the **allow-list is the boundary** for what a + directory script may reach, and the loader fails it closed (empty by default). - `mcp`: A3S never runs the server's code; it exchanges JSON-RPC. The server is a separate process/endpoint owned by the transport layer. -Net: a `tools/` file can *add a callable name*, but every actual invocation is -double-gated (permission check + backend sandbox), and the name itself is -non-shadowing and harness-namespaced (`mcp__…` for MCP). There is no path by -which a directory file executes arbitrary host JS or an arbitrary host process. +Net: a `tools/` file can *add a callable name*. The model-selected call to it is +permission-gated like any tool, and the name is non-shadowing and +harness-namespaced (`mcp__…` for MCP). A `script`'s inner tool calls are bounded +by its pinned allow-list + the QuickJS sandbox rather than the permission policy. +There is no path by which a directory file executes arbitrary host JS or an +arbitrary host process. ```text -tools/.{acl,md} +tools/.md │ AgentDir::load (parse only) ▼ ToolSpec ──┬─ Mcp(McpServerConfig) ─► session.add_mcp_server ─► McpToolWrapper (mcp__name__*) @@ -262,12 +270,13 @@ ToolSpec ──┬─ Mcp(McpServerConfig) ─► session.add_mcp_server ─► │ MCP: JSON-RPC to server │ script: QuickJS VM, │ frozen ctx, allow-list, - ▼ limits — ctx.tool re-checks + ▼ limits — ctx.tool allow-list + (NOT the permission policy) ``` --- -## 5. (d) Minimal trait/seam to add later +## 5. (d) The minimal trait/seam (as built) Goal: the smallest possible new surface, reusing both existing backends. Two trait-free functions plus one tiny `Tool` impl. No new manager, no new sandbox, @@ -276,9 +285,9 @@ no new permission path (Rule 2: this is an *extension*, the core is untouched). ### Seam 1 — parse (in `config/agent_dir.rs`, alongside `load_schedules`) ```rust -fn load_tools(dir: &Path) -> Result>; // mirrors load_schedules/load_channels +fn load_tools(dir: &Path) -> Result>; // mirrors load_schedules ``` -Called from `AgentDir::load` after `channels`; stores `tools: Vec` on +Called from `AgentDir::load` after `schedules`; stores `tools: Vec` on `AgentDir`. Pure parsing — fail closed, no I/O beyond reading the files. ### Seam 2 — register (session build time, NOT inside `AgentDir::load`) diff --git a/manual/CHANNELS_DESIGN.md b/manual/CHANNELS_DESIGN.md deleted file mode 100644 index 30ae4b3..0000000 --- a/manual/CHANNELS_DESIGN.md +++ /dev/null @@ -1,444 +0,0 @@ -# Inbound Channels — Design Doc (eve parity) - -Status: DESIGN ONLY. No implementation. This document specifies the seam for -turning external inbound messages (HTTP, Slack, Discord) into full harness turns -on a filesystem-first agent directory. It is the channel companion to the cron -`schedule` layer that already ships under the `serve` feature. - -Scope anchors (already in tree): - -- `core/src/config/agent_dir.rs` — parses `channels/*.{md,acl}` into - `ChannelSpec { name, kind, frontmatter }` and stores them on - `AgentDir { channels: Vec, .. }`. This is done; this doc does NOT - change the parser. -- `core/src/serve/schedule.rs` — `ScheduleSink` trait + `Scheduler` (cron loop). -- `core/src/serve/daemon.rs` — `SessionScheduleSink` + `serve_agent_dir(..)`. -- `core/src/serve/mod.rs` — `serve` feature module, re-exports. - -The channel layer MIRRORS the schedule layer. Where a schedule is a *time* -source that fires a fixed prompt, a channel is a *message* source that fires an -arriving message. Both converge on the SAME invariant and the SAME execution -primitive (`AgentSession::send`). - ---- - -## 0. The core invariant (read this first) - -> Every inbound channel message becomes a FULL harness turn via -> `AgentSession::send`, never a raw model call. - -This is the entire reason the layer exists, and it is non-negotiable. An adapter -NEVER touches `LlmClient`, never assembles a prompt string and calls a provider, -never short-circuits tool/visibility/safety. The ONLY thing an adapter is -allowed to do with an inbound message is: - -```rust -let result: AgentResult = session.send(&message.text, None).await?; -``` - -`AgentSession::send(&self, prompt: &str, history: Option<&[Message]>) -> Result` -is what carries: - -- **Context**: the agent dir's `instructions.md` (injected as a prompt SLOT via - `SessionOptions::prompt_slots`, not a system-prompt override), the session's - accumulated history (`history = None` => use+update the session's own history), - and any `skill_dirs`/context providers configured on the session. -- **Tool visibility**: tools are harness-owned. The adapter does not declare, - hide, or inject tools — this is the deliberate divergence from eve's - user-defined-tools model already documented in `agent_dir.rs`. -- **Safety gate**: permission checker, security provider (taint/sanitization), - HITL confirmation, and budget guard all run inside `send`. -- **Verification**: `AgentResult.verification_reports` is populated by the - harness's completion-evidence path; the adapter may surface it but never - replaces it. - -A reviewer can enforce the invariant with one grep: an adapter module that -imports `crate::llm::LlmClient` (other than to *forward* a host-supplied one into -`SessionOptions`) is wrong. The only execution call in any adapter is -`session.send(..)`. - -Inbound text is UNTRUSTED. It enters as the `prompt` argument of a turn, which -means the harness's existing prompt-injection / taint defenses (security -provider, BOUNDARIES) are exactly the defenses that apply. Adapters add transport -authentication on top (signature verification), they do not add or weaken -model-layer safety. - ---- - -## 1. The `ChannelAdapter` trait seam - -Lives in a new file `core/src/serve/channel.rs`, parallel to `schedule.rs`. - -Two responsibilities, split into two traits to keep transport concerns out of -the session-routing core (single responsibility, mirrors how `ScheduleSink` is -separate from `Scheduler`): - -### 1.1 `ChannelSink` — the harness-binding seam (core, stable) - -The exact analogue of `ScheduleSink`. The serve daemon implements it; it is the -ONLY place a turn is fired. Adapters depend on this trait, never on -`AgentSession` directly — so the "must go through `send`" rule is structurally -enforced (an adapter literally has no other method to call). - -```rust -/// An inbound message normalized from any transport. The adapter produces it; -/// the sink turns it into a harness turn. -#[derive(Debug, Clone)] -pub struct InboundMessage { - /// Logical channel name (the `ChannelSpec.name`), e.g. "support-web". - pub channel: String, - /// Transport-stable conversation key used to derive the session id. - /// HTTP: caller-supplied conversation id (or a per-connection uuid). - /// Slack: channel_id + thread_ts. Discord: channel_id (+ thread id). - pub conversation: String, - /// The user's text — fired verbatim as the turn prompt. - pub text: String, - /// Opaque principal (Slack user id, Discord author id, HTTP auth subject). - /// Forwarded into SessionOptions.principal; never interpreted by core. - pub principal: Option, -} - -/// What to do when a channel message arrives. The serve daemon implements this -/// to drive the message into `AgentSession::send` — a FULL harness turn -/// (context, tool visibility, safety gate, verification), never a raw model call. -#[async_trait::async_trait] -pub trait ChannelSink: Send + Sync { - /// Fire one inbound message as a harness turn and return the reply. - /// Returns the reply so adapters can route it back (see §4). - async fn deliver(&self, msg: InboundMessage) -> crate::error::Result; -} - -/// The harness's answer, shaped for routing back to the transport. -#[derive(Debug, Clone)] -pub struct ChannelReply { - /// AgentResult.text — the text to send back to the user. - pub text: String, - /// Echo of the conversation key, so the adapter knows where to route. - pub conversation: String, - /// True when verification is NeedsReview (adapter may flag the reply). - pub needs_review: bool, -} -``` - -Note `deliver` RETURNS the reply rather than the sink pushing it. This keeps the -sink transport-agnostic (it knows nothing about Slack `chat.postMessage` or HTTP -response bodies) and lets each adapter route replies in its native idiom (§4). - -### 1.2 `ChannelAdapter` — the transport seam (extension, per-protocol) - -One impl per protocol. Owns the socket/listener, authenticates the transport, -normalizes wire payloads into `InboundMessage`, calls `sink.deliver(..)`, and -routes `ChannelReply` back. It runs until cancelled — exactly like a -`Scheduler::run` job loop. - -```rust -#[async_trait::async_trait] -pub trait ChannelAdapter: Send + Sync { - /// Stable kind string matched against `ChannelSpec.kind` ("http"/"slack"/"discord"). - fn kind(&self) -> &'static str; - - /// Bind the transport and serve inbound messages until `cancel` fires. - /// Every accepted message is normalized and handed to `sink.deliver(..)`; - /// the returned `ChannelReply` is routed back over the transport. - async fn serve( - &self, - spec: &crate::config::ChannelSpec, - sink: std::sync::Arc, - cancel: tokio_util::sync::CancellationToken, - ) -> crate::error::Result<()>; -} -``` - -Adapter construction parses `ChannelSpec.frontmatter` (raw YAML) into a -per-adapter typed options struct (`HttpOptions`, `SlackOptions`, `DiscordOptions`) -with `serde`. The frontmatter is already captured by the parser; adapters own its -interpretation, consistent with the `agent_dir.rs` doc comment ("`frontmatter` -carries the raw adapter options for whichever adapter eventually handles `kind`"). - -Why two traits and not one: `ChannelSink` is **core** (per Rule 2 — it is the -harness-binding contract, non-replaceable). `ChannelAdapter` is an **extension** — -each transport is replaceable/addable without touching the sink. A host could -ship an SQS or WhatsApp adapter by implementing `ChannelAdapter` alone. - ---- - -## 2. The three inbound adapters - -All three are feature-gated sub-modules under `core/src/serve/channels/` and -share the §1 traits. Each is one file (one protocol per file). They differ ONLY -in transport + auth + reply routing; the body of each handler is the same three -steps: normalize -> `sink.deliver` -> route reply. - -### 2.1 HTTP (`channels/http.rs`, kind = `"http"`) - -- **Transport**: a small HTTP listener (server side via a gated `axum` dep — see - §6; `reqwest`/`tokio` are already deps). One `POST /message` endpoint. -- **Frontmatter** (`HttpOptions`): `port: u16`, optional `path: String` - (default `/message`), optional `auth_token: String` (bearer) or - `auth_token_env: String`. -- **Auth**: constant-time bearer-token compare; 401 on mismatch. No token => - bind loopback only and log a warning (never expose an unauthenticated agent to - `0.0.0.0`). -- **Request body**: `{ "conversation": "", "text": "..." }`. Missing - `conversation` => generate a per-request uuid (stateless one-shot turn). -- **Reply routing**: synchronous — the `ChannelReply.text` is the HTTP 200 JSON - body `{ "text": "...", "needs_review": bool }`. This is the simplest adapter - and the reference impl for the trait. - -### 2.2 Slack (`channels/slack.rs`, kind = `"slack"`) - -- **Transport**: Slack Events API over HTTP (recommended for parity with eve and - to avoid a socket-mode websocket dependency). The adapter exposes one webhook - endpoint Slack POSTs events to. -- **Frontmatter** (`SlackOptions`): `signing_secret_env`, `bot_token_env`, - optional `port`/`path`, optional `event_types` filter (default - `app_mention` + `message.im`). -- **Auth**: verify the `X-Slack-Signature` HMAC-SHA256 over - `v0:{X-Slack-Request-Timestamp}:{body}` using the signing secret; reject stale - timestamps (replay guard). Respond to Slack's `url_verification` challenge. -- **Normalize**: `conversation = format!("{channel_id}:{thread_ts_or_ts}")` so a - thread maps to one session; strip the bot mention from `text`; `principal = - event.user`. ACK Slack within 3s (return 200 immediately) and run `deliver` on - a spawned task — Slack retries on slow ACK, so the turn must not block the ACK. -- **Reply routing**: asynchronous — post `ChannelReply.text` back via - `chat.postMessage` to the same channel/thread using the bot token. This is the - first adapter where reply routing is decoupled from the inbound request, which - is exactly why `deliver` returns the reply rather than writing a response body. - -### 2.3 Discord (`channels/discord.rs`, kind = `"discord"`) - -- **Transport**: Discord Gateway websocket (Discord has no inbound-webhook model - for receiving messages; a bot must hold a gateway connection). The adapter - maintains the gateway heartbeat and subscribes to `MESSAGE_CREATE`. -- **Frontmatter** (`DiscordOptions`): `bot_token_env`, optional - `application_id`, optional `allowed_channels: Vec`, intent flags. -- **Auth**: the bot token authenticates the gateway connection itself; inbound - messages are trusted as gateway-delivered. Ignore messages authored by the bot - (loop guard) and messages outside `allowed_channels`. -- **Normalize**: `conversation = channel_id` (or thread id when present); - `text = message.content`; `principal = author.id`. -- **Reply routing**: asynchronous — `POST /channels/{id}/messages` with - `ChannelReply.text`. - -Adapter parity summary: - -| kind | transport | inbound auth | reply routing | session key | -|---------|----------------------|----------------------|--------------------------|-------------| -| http | HTTP listener | bearer token | sync HTTP 200 body | conversation id / per-req uuid | -| slack | Events API webhook | HMAC signature | async `chat.postMessage` | channel:thread_ts | -| discord | Gateway websocket | bot-token connection | async REST `messages` | channel/thread id | - ---- - -## 3. Session id mapping (one session per conversation) - -The schedule layer keys sessions by schedule name (`schedule:`). Channels -key sessions by **conversation**, because that is what should accumulate history. - -```text -session_id = format!("channel:{}:{}", msg.channel, msg.conversation) -``` - -- `msg.channel` is the `ChannelSpec.name` — isolates two different channels that - happen to share a conversation id. -- `msg.conversation` is the transport-stable thread key from §2. - -Properties this gives us, by mirroring the schedule design: - -- **Continuity**: repeated messages in the same thread reuse the same - `AgentSession`, so `send(text, None)` accumulates context across turns (the - harness owns history; the adapter passes `None`). -- **Isolation**: distinct threads / channels get distinct sessions; no - cross-talk. -- **Durability (later)**: because the id is stable and derived (not random), the - same `SessionStore`-backed rehydrate-on-boot path the daemon doc mentions for - schedules applies unchanged — a restarted daemon resumes a conversation's - session by recomputing its id. - -Session lifecycle: unlike schedules (fixed, known at boot), channel -conversations are dynamic and unbounded. The `SessionChannelSink` therefore -holds a `tokio::sync::Mutex>>` and lazily -creates a session on first message for a conversation (get-or-create under the -lock), reusing the agent dir's `prompt_slots` + `skill_dirs` exactly as -`serve_agent_dir` does for schedules. A retention/idle policy (LRU cap, idle -eviction via `session.close()`) is REQUIRED to avoid unbounded session growth — -this is the one place channels need machinery schedules do not. Recommend a -configurable `max_live_sessions` with LRU close; default conservative. - ---- - -## 4. Reply routing - -The harness produces `AgentResult.text`. The sink wraps it as -`ChannelReply { text, conversation, needs_review }` and returns it from -`deliver`. The ADAPTER routes it, because only the adapter knows the transport: - -- **HTTP**: serialize the reply as the synchronous HTTP response body. The - caller's `conversation` is echoed so a stateless client can correlate. -- **Slack/Discord**: the inbound request was already ACKed (Slack) or is a - fire-and-forget gateway event (Discord); the adapter routes the reply - out-of-band to `conversation` via the platform's send API, using credentials - from the adapter's frontmatter (`bot_token_env`). - -`needs_review` is derived from `AgentResult.verification_reports` (true when the -summary status is `NeedsReview`). Adapters MAY use it (e.g. prefix the Slack -reply with a warning, or set an HTTP header) but MUST NOT drop the reply — the -harness already decided the turn completed; surfacing review state is a -presentation concern. - -Errors: when `deliver` returns `Err` (closed session, send failure), HTTP -returns 5xx; Slack/Discord log and post a terse "couldn't process that" to the -conversation. The error is never the raw `CodeError` (don't leak internals to an -untrusted channel). - ---- - -## 5. Where adapters plug into `serve_agent_dir` - -Today `serve_agent_dir` builds per-schedule sessions, then runs one `Scheduler`. -Channels attach in the SAME function as a second concurrent driver, joined under -the same `cancel` token. Sketch (matches existing signatures — -`Agent::session(workspace, Some(SessionOptions))`, `AgentSession::send`, -`async_trait`): - -```rust -// in core/src/serve/daemon.rs, inside serve_agent_dir(..), after the schedule wiring: - -// 1. One sink shared by every adapter; it owns lazy per-conversation sessions -// and is the ONLY caller of AgentSession::send for channels. -let channel_sink: Arc = Arc::new(SessionChannelSink::new( - agent, // to build sessions on demand (Agent::session takes &self) - agent_dir.clone(), // prompt_slots + skill_dirs source - workspace.clone().into(), - extra.clone(), // merged SessionOptions, same rules as schedules -)); - -// 2. One adapter task per enabled channel, selected by kind. -let mut adapter_handles = Vec::new(); -for spec in &agent_dir.channels { - let adapter: Arc = match spec.kind.as_str() { - "http" => Arc::new(HttpChannelAdapter::default()), - "slack" => Arc::new(SlackChannelAdapter::default()), - "discord" => Arc::new(DiscordChannelAdapter::default()), - other => { tracing::warn!(kind = %other, "unknown channel kind; skipping"); continue; } - }; - let (spec, sink, cancel) = (spec.clone(), Arc::clone(&channel_sink), cancel.clone()); - adapter_handles.push(tokio::spawn(async move { - if let Err(e) = adapter.serve(&spec, sink, cancel).await { - tracing::warn!(channel = %spec.name, error = %e, "channel adapter stopped"); - } - })); -} - -// 3. Run schedules and channels concurrently; both stop on `cancel`. -// (Today the fn ends `scheduler.run(sink, cancel).await; Ok(())`. The new form -// joins the scheduler future with the adapter handles under the same token.) -``` - -Key points: - -- `SessionChannelSink` is the `ChannelSink` impl and lives in `daemon.rs` next to - `SessionScheduleSink`, for the same reason: it is the one place that holds - `AgentSession`s and fires `send`. The dispatch-by-`kind` match is the channel - analogue of `Scheduler::new(specs)`. -- Per-channel `SessionOptions` follow the EXACT merge rules already in - `serve_agent_dir`: `prompt_slots` defaults to `agent_dir.prompt_slots` if the - host didn't pin one; `skill_dirs` extends with `agent_dir.config.skill_dirs`; - `session_id` is set by the sink per conversation (§3); `principal` is filled - from `InboundMessage.principal`. -- No new execution machinery: the daemon stays a thin wiring layer over - `Agent`/`AgentSession`, consistent with `serve/mod.rs`'s "builds strictly on - top of existing primitives" promise. - ---- - -## 6. Feature gating - -Channels stay under the existing `serve` feature, but transport deps are split so -a host can take HTTP without dragging in Slack/Discord clients. Proposed -`Cargo.toml` additions (mirrors the existing `serve = ["dep:cron"]` shape): - -```toml -[features] -# unchanged: cron schedules -serve = ["dep:cron"] - -# Channel transports — each additive, each pulls only what it needs. -serve-channels = ["serve"] # the ChannelAdapter/ChannelSink seam -serve-http = ["serve-channels", "dep:axum"] # HTTP inbound adapter -serve-slack = ["serve-channels", "dep:axum", "dep:hmac", "dep:sha2"] # Events API + signature -serve-discord = ["serve-channels", "dep:tokio-tungstenite"] # gateway websocket -``` - -- `core/src/serve/channel.rs` (the traits) is gated on `serve-channels`. -- `core/src/serve/channels/http.rs` on `serve-http`, `slack.rs` on - `serve-slack`, `discord.rs` on `serve-discord`. -- The dispatch `match` in `serve_agent_dir` `#[cfg(..)]`s each arm so an - unbuilt transport is a clean "unknown kind; skipping" rather than a build - break. -- `serve/mod.rs` re-exports `ChannelSink`, `ChannelAdapter`, `InboundMessage`, - `ChannelReply` under `#[cfg(feature = "serve-channels")]`, alongside the - existing `ScheduleSink`/`Scheduler` exports. -- Library-only embedders with no `serve*` feature pay nothing — the parser - (`ChannelSpec`) is always present (it is just data), but no transport, - no server deps, no adapter code compiles. - ---- - -## 7. Test plan (TDD, no live network) - -Following the schedule layer's test style (sink counters, pre-cancelled tokens): - -1. **Sink invariant** — test `SessionChannelSink` with a record/replay - `LlmClient` injected via `SessionOptions.llm_client`, assert `deliver(msg)` - produces a session whose id is `channel::` and that the - reply text equals the stubbed model text. Proves the message went through - `send` (history grew, verification ran), not a raw call. -2. **Session reuse/isolation** — two `deliver`s with the same conversation reuse - one session (history length grows); different conversations get distinct - sessions. -3. **Adapter normalization (offline)** — feed each adapter a canned wire payload - (a captured Slack event JSON, a Discord `MESSAGE_CREATE`, an HTTP body) and - assert the produced `InboundMessage` fields (conversation key, stripped text, - principal). No socket — the normalize function is pure and unit-testable. -4. **Auth** — Slack signature verify accepts a correctly-signed fixture and - rejects a tampered body / stale timestamp; HTTP bearer compare rejects a bad - token. -5. **Cancellation** — `adapter.serve(spec, sink, pre_cancelled_token)` returns - `Ok(())` promptly without binding (mirrors - `serve_builds_per_schedule_session_and_stops_on_cancel`). -6. **Unknown kind** — `serve_agent_dir` with a `ChannelSpec { kind: "telegram" }` - logs and skips, does not error. - -No test leaves a socket bound or a temp file behind (CLAUDE.md TDD rule). - ---- - -## 8. What this design deliberately does NOT do - -- No user-defined tools per channel (harness owns tools — the documented eve - divergence). -- No bypass path: there is no API on `ChannelSink` other than `deliver`, and - `deliver` only calls `send`. A raw-model fast path is structurally impossible. -- No new persistence model: durable rehydration rides the same stable-session-id - + `SessionStore` mechanism the daemon already plans for schedules. -- No outbound-only "notification" channels — out of scope; this is the *inbound* - message seam. (A schedule that posts to Slack is the outbound story and already - works via a tool inside a scheduled turn.) -- No per-message model override — model is the session/agent-dir concern, not a - transport concern. - ---- - -## 9. Open design questions (flagged for review, not blocking the seam) - -1. Reply for multi-turn tool-using sessions can be slow; Slack/Discord want a - fast ACK + async reply (handled), but HTTP's synchronous reply could hit - client timeouts. Option: HTTP gains an optional async mode (`202 Accepted` + - callback URL). Deferred — sync is the correct reference default. -2. Session eviction policy numbers (`max_live_sessions`, idle TTL) — needs a - real-workload default; the mechanism (LRU + `close()`) is settled, the - constants are not. -3. Whether `InboundMessage` should carry attachments (Slack files, Discord - embeds) to use `send_with_attachments` instead of `send`. The seam allows it - (add a field + branch in the sink) but v1 is text-only for parity. diff --git a/sdk/node/Cargo.lock b/sdk/node/Cargo.lock index a6e9f9b..3ad220d 100644 --- a/sdk/node/Cargo.lock +++ b/sdk/node/Cargo.lock @@ -37,7 +37,7 @@ dependencies = [ [[package]] name = "a3s-code-core" -version = "3.6.2" +version = "4.0.0" dependencies = [ "a3s-acl 0.2.0", "a3s-ahp", @@ -92,7 +92,7 @@ dependencies = [ [[package]] name = "a3s-code-node" -version = "3.6.2" +version = "4.0.0" dependencies = [ "a3s-code-core", "anyhow", diff --git a/sdk/node/Cargo.toml b/sdk/node/Cargo.toml index 77eccad..8f3555f 100644 --- a/sdk/node/Cargo.toml +++ b/sdk/node/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "a3s-code-node" -version = "3.6.2" +version = "4.0.0" edition = "2021" authors = ["A3S Lab Team"] license = "MIT" @@ -11,7 +11,7 @@ description = "A3S Code Node.js bindings - Native addon via napi-rs" crate-type = ["cdylib"] [dependencies] -a3s-code-core = { version = "3.6.2", path = "../../core", features = ["ahp", "s3", "serve"] } +a3s-code-core = { version = "4.0.0", path = "../../core", features = ["ahp", "s3", "serve"] } napi = { version = "2", features = ["async", "napi6", "serde-json"] } napi-derive = "2" tokio = { version = "1.35", features = ["full"] } diff --git a/sdk/node/examples/package-lock.json b/sdk/node/examples/package-lock.json index 0ff4826..b940d50 100644 --- a/sdk/node/examples/package-lock.json +++ b/sdk/node/examples/package-lock.json @@ -18,7 +18,7 @@ }, "..": { "name": "@a3s-lab/code", - "version": "3.6.2", + "version": "4.0.0", "license": "MIT", "devDependencies": { "@napi-rs/cli": "^2", @@ -27,12 +27,12 @@ "typescript": "^5.9.3" }, "optionalDependencies": { - "@a3s-lab/code-darwin-arm64": "3.6.2", - "@a3s-lab/code-linux-arm64-gnu": "3.6.2", - "@a3s-lab/code-linux-arm64-musl": "3.6.2", - "@a3s-lab/code-linux-x64-gnu": "3.6.2", - "@a3s-lab/code-linux-x64-musl": "3.6.2", - "@a3s-lab/code-win32-x64-msvc": "3.6.2" + "@a3s-lab/code-darwin-arm64": "4.0.0", + "@a3s-lab/code-linux-arm64-gnu": "4.0.0", + "@a3s-lab/code-linux-arm64-musl": "4.0.0", + "@a3s-lab/code-linux-x64-gnu": "4.0.0", + "@a3s-lab/code-linux-x64-musl": "4.0.0", + "@a3s-lab/code-win32-x64-msvc": "4.0.0" } }, "node_modules/@a3s-lab/code": { diff --git a/sdk/node/generated.d.ts b/sdk/node/generated.d.ts index d3fd504..30d1e4a 100644 --- a/sdk/node/generated.d.ts +++ b/sdk/node/generated.d.ts @@ -1237,11 +1237,12 @@ export declare class Agent { /** * Serve a filesystem-first agent directory's cron schedules until stopped. * - * Loads the directory by convention (`instructions.md` required, optional - * `agent.acl`, `skills/`, `schedules/*.md`) and starts one durable session - * per enabled schedule (stable id `schedule:`). Each schedule fires as - * a FULL harness turn (context, tool visibility, safety gate, verification), - * never a raw model call. + * Loads the directory by convention: `instructions.md` (required), optional + * `agent.acl`, `skills/`, `schedules/*.md` (cron jobs), and `tools/*.md` + * (`kind: mcp` servers or `kind: script` sandboxed QuickJS tools). It starts + * one durable session per enabled schedule (stable id `schedule:`) with + * the agent dir's tools installed; each schedule fires as a FULL harness turn + * (context, tool visibility, safety gate, verification), never a raw model call. * * Returns immediately with a {@link ServeHandle}; the daemon runs in the * background until `handle.stop()` is called. The handle MUST be kept and @@ -1253,7 +1254,7 @@ export declare class Agent { * await handle.stop(); * ``` * - * @param dir - Path to the agent directory to serve (defines schedules/skills/prompt) + * @param dir - Path to the agent directory (prompt/skills/schedules/tools) * @param workspace - Workspace directory each scheduled turn operates in * @param options - Optional session overrides merged into every schedule session * (model, llmClient, sessionStore, …); `promptSlots`/`sessionId` set here are diff --git a/sdk/node/package-lock.json b/sdk/node/package-lock.json index 114cdbd..026fcb4 100644 --- a/sdk/node/package-lock.json +++ b/sdk/node/package-lock.json @@ -1,12 +1,12 @@ { "name": "@a3s-lab/code", - "version": "3.6.2", + "version": "4.0.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@a3s-lab/code", - "version": "3.6.2", + "version": "4.0.0", "license": "MIT", "devDependencies": { "@napi-rs/cli": "^2", @@ -15,12 +15,12 @@ "typescript": "^5.9.3" }, "optionalDependencies": { - "@a3s-lab/code-darwin-arm64": "3.6.2", - "@a3s-lab/code-linux-arm64-gnu": "3.6.2", - "@a3s-lab/code-linux-arm64-musl": "3.6.2", - "@a3s-lab/code-linux-x64-gnu": "3.6.2", - "@a3s-lab/code-linux-x64-musl": "3.6.2", - "@a3s-lab/code-win32-x64-msvc": "3.6.2" + "@a3s-lab/code-darwin-arm64": "4.0.0", + "@a3s-lab/code-linux-arm64-gnu": "4.0.0", + "@a3s-lab/code-linux-arm64-musl": "4.0.0", + "@a3s-lab/code-linux-x64-gnu": "4.0.0", + "@a3s-lab/code-linux-x64-musl": "4.0.0", + "@a3s-lab/code-win32-x64-msvc": "4.0.0" } }, "node_modules/@a3s-lab/code-darwin-arm64": { diff --git a/sdk/node/package.json b/sdk/node/package.json index 6866568..7a73d97 100644 --- a/sdk/node/package.json +++ b/sdk/node/package.json @@ -1,6 +1,6 @@ { "name": "@a3s-lab/code", - "version": "3.6.2", + "version": "4.0.0", "description": "A3S Code - Native Node.js bindings for the coding-agent runtime", "main": "index.js", "types": "index.d.ts", @@ -43,11 +43,11 @@ "test:helpers": "node test-helpers.mjs" }, "optionalDependencies": { - "@a3s-lab/code-darwin-arm64": "3.6.2", - "@a3s-lab/code-linux-x64-gnu": "3.6.2", - "@a3s-lab/code-linux-x64-musl": "3.6.2", - "@a3s-lab/code-linux-arm64-gnu": "3.6.2", - "@a3s-lab/code-linux-arm64-musl": "3.6.2", - "@a3s-lab/code-win32-x64-msvc": "3.6.2" + "@a3s-lab/code-darwin-arm64": "4.0.0", + "@a3s-lab/code-linux-x64-gnu": "4.0.0", + "@a3s-lab/code-linux-x64-musl": "4.0.0", + "@a3s-lab/code-linux-arm64-gnu": "4.0.0", + "@a3s-lab/code-linux-arm64-musl": "4.0.0", + "@a3s-lab/code-win32-x64-msvc": "4.0.0" } } diff --git a/sdk/node/src/lib.rs b/sdk/node/src/lib.rs index 437fb14..bfbff0f 100644 --- a/sdk/node/src/lib.rs +++ b/sdk/node/src/lib.rs @@ -3005,11 +3005,12 @@ impl Agent { /// Serve a filesystem-first agent directory's cron schedules until stopped. /// - /// Loads the directory by convention (`instructions.md` required, optional - /// `agent.acl`, `skills/`, `schedules/*.md`) and starts one durable session - /// per enabled schedule (stable id `schedule:`). Each schedule fires as - /// a FULL harness turn (context, tool visibility, safety gate, verification), - /// never a raw model call. + /// Loads the directory by convention: `instructions.md` (required), optional + /// `agent.acl`, `skills/`, `schedules/*.md` (cron jobs), and `tools/*.md` + /// (`kind: mcp` servers or `kind: script` sandboxed QuickJS tools). It starts + /// one durable session per enabled schedule (stable id `schedule:`) with + /// the agent dir's tools installed; each schedule fires as a FULL harness turn + /// (context, tool visibility, safety gate, verification), never a raw model call. /// /// Returns immediately with a {@link ServeHandle}; the daemon runs in the /// background until `handle.stop()` is called. The handle MUST be kept and @@ -3021,7 +3022,7 @@ impl Agent { /// await handle.stop(); /// ``` /// - /// @param dir - Path to the agent directory to serve (defines schedules/skills/prompt) + /// @param dir - Path to the agent directory (prompt/skills/schedules/tools) /// @param workspace - Workspace directory each scheduled turn operates in /// @param options - Optional session overrides merged into every schedule session /// (model, llmClient, sessionStore, …); `promptSlots`/`sessionId` set here are diff --git a/sdk/node/test_serve.mjs b/sdk/node/test_serve.mjs index 93af364..74dd228 100644 --- a/sdk/node/test_serve.mjs +++ b/sdk/node/test_serve.mjs @@ -36,7 +36,7 @@ function mkConfigFile() { return p } -function writeAgentDir({ withSchedule }) { +function writeAgentDir({ withSchedule, withTools }) { const base = fs.mkdtempSync(path.join(os.tmpdir(), 'a3s-node-serve-')) fs.writeFileSync(path.join(base, 'instructions.md'), 'You are a terse test agent. Answer in one word.') if (withSchedule) { @@ -46,6 +46,13 @@ function writeAgentDir({ withSchedule }) { '---\ncron: "* * * * * *"\nname: tick\n---\nReply with exactly one word: PONG', ) } + if (withTools) { + fs.mkdirSync(path.join(base, 'tools')) + fs.writeFileSync( + path.join(base, 'tools', 'echo.md'), + '---\nkind: script\nname: echo-tool\npath: scripts/echo.js\n---\nEcho tool.\n', + ) + } return base } @@ -79,6 +86,21 @@ assert.equal(typeof Agent.prototype.serveAgentDir, 'function', 'Agent.serveAgent console.log('node sdk serve handle lifecycle ok') } +// ── Unit (hermetic): an agent dir with a kind:script tool loads + serves ───── +{ + const agent = await Agent.create(mkConfigFile()) + const dir = writeAgentDir({ withSchedule: false, withTools: true }) + const ws = fs.mkdtempSync(path.join(os.tmpdir(), 'a3s-node-serve-tools-ws-')) + // serveAgentDir loads the dir synchronously (AgentDir::load parses tools/ — + // here a `kind: script` spec); a malformed tool spec would throw here. A valid + // one yields a healthy handle that stops cleanly. + const handle = await agent.serveAgentDir(dir, ws) + assert.equal(handle.isStopped(), false, 'a tools/ kind:script agent dir should serve') + await handle.stop() + assert.equal(handle.isStopped(), true) + console.log('node sdk serve with kind:script tool ok') +} + // ── Integration (real provider, skipped without config) ───────────────────── { const config = repoConfig() diff --git a/sdk/python-bootstrap/pyproject.toml b/sdk/python-bootstrap/pyproject.toml index d976a8e..0bf50b3 100644 --- a/sdk/python-bootstrap/pyproject.toml +++ b/sdk/python-bootstrap/pyproject.toml @@ -7,7 +7,7 @@ name = "a3s-code" # Keep in sync with crates/code core release. The bootstrap loader fetches # the matching native wheel from `https://github.com/AI45Lab/Code/releases/tag/v` # at import time. -version = "3.6.2" +version = "4.0.0" description = "A3S Code Python SDK — pure-Python bootstrap that fetches the native wheel from GitHub Releases" readme = "README.md" license = {text = "MIT"} diff --git a/sdk/python-bootstrap/src/a3s_code/_bootstrap.py b/sdk/python-bootstrap/src/a3s_code/_bootstrap.py index fc1ecc3..62e0ba3 100644 --- a/sdk/python-bootstrap/src/a3s_code/_bootstrap.py +++ b/sdk/python-bootstrap/src/a3s_code/_bootstrap.py @@ -31,7 +31,7 @@ # Version is the bootstrap's own version, which equals the matching native # wheel version on GH Releases. Bumped by the release workflow. -__version__ = "3.6.2" +__version__ = "4.0.0" _DEFAULT_BASE_URL = "https://github.com/AI45Lab/Code/releases/download" _REQUEST_TIMEOUT_S = 120 diff --git a/sdk/python/Cargo.lock b/sdk/python/Cargo.lock index 4cb2a3d..825482e 100644 --- a/sdk/python/Cargo.lock +++ b/sdk/python/Cargo.lock @@ -37,7 +37,7 @@ dependencies = [ [[package]] name = "a3s-code-core" -version = "3.6.2" +version = "4.0.0" dependencies = [ "a3s-acl 0.2.0", "a3s-ahp", @@ -92,7 +92,7 @@ dependencies = [ [[package]] name = "a3s-code-py" -version = "3.6.2" +version = "4.0.0" dependencies = [ "a3s-code-core", "anyhow", diff --git a/sdk/python/Cargo.toml b/sdk/python/Cargo.toml index 53e6d01..34d8a26 100644 --- a/sdk/python/Cargo.toml +++ b/sdk/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "a3s-code-py" -version = "3.6.2" +version = "4.0.0" edition = "2021" authors = ["A3S Lab Team"] license = "MIT" @@ -12,7 +12,7 @@ name = "a3s_code" crate-type = ["cdylib"] [dependencies] -a3s-code-core = { version = "3.6.2", path = "../../core", features = ["ahp", "s3", "serve"] } +a3s-code-core = { version = "4.0.0", path = "../../core", features = ["ahp", "s3", "serve"] } pyo3 = "0.23" tokio = { version = "1.35", features = ["full"] } serde_json = "1.0" diff --git a/sdk/python/pyproject.toml b/sdk/python/pyproject.toml index 5fe64f0..329eeda 100644 --- a/sdk/python/pyproject.toml +++ b/sdk/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "a3s-code" -version = "3.6.2" +version = "4.0.0" description = "A3S Code - Native Python bindings for the coding-agent runtime" readme = "README.md" license = {text = "MIT"} diff --git a/sdk/python/src/lib.rs b/sdk/python/src/lib.rs index dd34ba0..a879f35 100644 --- a/sdk/python/src/lib.rs +++ b/sdk/python/src/lib.rs @@ -1071,20 +1071,22 @@ impl PyAgent { /// Serve a filesystem-first agent directory's cron schedules until stopped. /// - /// Loads the directory by convention (`instructions.md` required, optional - /// `agent.acl`, `skills/`, `schedules/*.md`) and starts one durable session - /// per enabled schedule (stable id `schedule:`). Each schedule fires as - /// a FULL harness turn (context, tool visibility, safety gate, verification), - /// never a raw model call. + /// Loads the directory by convention: `instructions.md` (required), optional + /// `agent.acl`, `skills/`, `schedules/*.md` (cron jobs), and `tools/*.md` + /// (`kind: mcp` servers or `kind: script` sandboxed QuickJS tools). It starts + /// one durable session per enabled schedule (stable id `schedule:`) with + /// the agent dir's tools installed; each schedule fires as a FULL harness turn + /// (context, tool visibility, safety gate, verification), never a raw model call. /// /// Returns immediately with a `ServeHandle`; the daemon runs in the /// background until `handle.stop()` is called. Dropping the handle does NOT /// cancel the daemon. /// /// Args: - /// dir: Path to the agent directory to serve (schedules/skills/prompt) + /// dir: Path to the agent directory (prompt/skills/schedules/tools) /// workspace: Workspace directory each scheduled turn operates in /// options: Optional SessionOptions merged into every schedule session + /// (model, llm_client, session_store, …) #[pyo3(signature = (dir, workspace, options=None))] fn serve_agent_dir( &self, diff --git a/sdk/python/tests/test_serve.py b/sdk/python/tests/test_serve.py index b93dbe7..207870b 100644 --- a/sdk/python/tests/test_serve.py +++ b/sdk/python/tests/test_serve.py @@ -34,7 +34,7 @@ """.strip() -def _write_agent_dir(*, with_schedule: bool) -> str: +def _write_agent_dir(*, with_schedule: bool, with_tools: bool = False) -> str: base = tempfile.mkdtemp(prefix="a3s-code-serve-") pathlib.Path(base, "instructions.md").write_text( "You are a terse test agent. Answer in one word." @@ -45,9 +45,31 @@ def _write_agent_dir(*, with_schedule: bool) -> str: (sched / "tick.md").write_text( '---\ncron: "* * * * * *"\nname: tick\n---\nReply with exactly one word: PONG' ) + if with_tools: + tools = pathlib.Path(base, "tools") + tools.mkdir() + (tools / "echo.md").write_text( + "---\nkind: script\nname: echo-tool\npath: scripts/echo.js\n---\nEcho tool.\n" + ) return base +def test_serve_with_script_tool() -> None: + """Unit (hermetic): serving a dir that contains a `tools/` `kind: script` + spec succeeds. serve_agent_dir runs AgentDir::load synchronously (parsing + tools/), so a malformed tool spec would raise here; a valid one yields a + healthy handle that stops cleanly. No provider call is made.""" + agent = Agent.create(INLINE_CONFIG) + agent_dir = _write_agent_dir(with_schedule=False, with_tools=True) + workspace = tempfile.mkdtemp(prefix="a3s-code-serve-tools-ws-") + + handle = agent.serve_agent_dir(agent_dir, workspace) + assert handle.is_stopped() is False, "a tools/ kind:script agent dir should serve" + handle.stop() + assert handle.is_stopped() is True + print("python sdk serve with kind:script tool ok") + + def test_serve_handle_lifecycle() -> None: """Unit (hermetic): serving a dir with no schedules returns a ServeHandle that reports not-stopped, and stop() is idempotent and reflected by @@ -105,6 +127,7 @@ def test_serve_real_schedule() -> None: def main() -> None: test_serve_handle_lifecycle() + test_serve_with_script_tool() test_serve_real_schedule()