diff --git a/.changeset/nanoclaw-and-harness-snapping.md b/.changeset/nanoclaw-and-harness-snapping.md new file mode 100644 index 0000000..45d4498 --- /dev/null +++ b/.changeset/nanoclaw-and-harness-snapping.md @@ -0,0 +1,8 @@ +--- +"@tangle-network/agent-interface": minor +--- + +Make agent-interface the single source of truth for harness↔model snapping, and correct nanoclaw's capabilities. + +- `nanoclaw` is now treated as router-backed (runs any model via the Tangle router) instead of Anthropic-locked, and its reasoning ceiling is `none` (its runner sends no thinking flag) instead of `ultracode`. +- Add `snapModelToHarness(harness, modelId, candidateIds)` and `snapHarnessToModel(harness, modelId)` so consumers (sandbox-ui, agent-app) import the catalog-aware snap logic instead of hand-rolling divergent copies. diff --git a/packages/agent-interface/package.json b/packages/agent-interface/package.json index 2525b0c..a415e4b 100644 --- a/packages/agent-interface/package.json +++ b/packages/agent-interface/package.json @@ -47,6 +47,8 @@ "build": "tsc -p tsconfig.json", "check-types": "tsc --noEmit", "clean": "rm -rf dist", + "test": "vitest run", + "test:watch": "vitest", "prepare": "pnpm run build" }, "dependencies": { @@ -54,6 +56,7 @@ }, "devDependencies": { "@types/node": "catalog:", - "typescript": "^6.0.3" + "typescript": "^6.0.3", + "vitest": "catalog:" } } diff --git a/packages/agent-interface/src/harness-capabilities.test.ts b/packages/agent-interface/src/harness-capabilities.test.ts new file mode 100644 index 0000000..d69f9e4 --- /dev/null +++ b/packages/agent-interface/src/harness-capabilities.test.ts @@ -0,0 +1,137 @@ +import { describe, expect, it } from "vitest"; +import { + harnessProviders, + harnessReasoningEfforts, + harnessSupportsModel, + modelProvider, + preferredHarnessForModel, + reasoningEffortsFor, + snapHarnessToModel, + snapModelToHarness, +} from "./harness-capabilities.js"; + +const CATALOG = [ + "anthropic/claude-opus-4-6", + "anthropic/claude-sonnet-4-6", + "openai/gpt-5", + "openai/gpt-5-mini", + "moonshot/kimi-k2", + "zai/glm-4.7", +]; + +describe("modelProvider", () => { + it("extracts the provider prefix, or null for a bare id", () => { + expect(modelProvider("anthropic/claude-opus-4-6")).toBe("anthropic"); + expect(modelProvider("openrouter/openai/gpt-5")).toBe("openrouter"); + expect(modelProvider("gemini-2.5-flash-lite")).toBeNull(); + expect(modelProvider("")).toBeNull(); + }); +}); + +describe("harness ↔ model compatibility", () => { + it("vendor-locked harnesses only accept their provider; router harnesses accept any", () => { + expect(harnessSupportsModel("claude-code", "anthropic/claude-sonnet-4-6")).toBe(true); + expect(harnessSupportsModel("claude-code", "openai/gpt-5")).toBe(false); + expect(harnessSupportsModel("codex", "openai/gpt-5")).toBe(true); + expect(harnessSupportsModel("codex", "anthropic/claude-sonnet-4-6")).toBe(false); + expect(harnessSupportsModel("kimi-code", "moonshot/kimi-k2")).toBe(true); + expect(harnessSupportsModel("opencode", "openai/gpt-5")).toBe(true); + }); + + it("aliases resolve to their base runner's lock", () => { + expect(harnessSupportsModel("claude", "openai/gpt-5")).toBe(false); + expect(harnessSupportsModel("kimi", "moonshot/kimi-k2")).toBe(true); + }); + + it("nanoclaw is router-backed — it runs any provider", () => { + expect(harnessProviders("nanoclaw")).toBeNull(); + expect(harnessSupportsModel("nanoclaw", "openai/gpt-5")).toBe(true); + expect(harnessSupportsModel("nanoclaw", "anthropic/claude-sonnet-4-6")).toBe(true); + }); + + it("provider-less / sentinel ids are compatible everywhere", () => { + expect(harnessSupportsModel("claude-code", "default")).toBe(true); + expect(harnessSupportsModel("codex", "gemini-2.5-flash-lite")).toBe(true); + }); + + it("preferredHarnessForModel maps a vendor provider to its native harness", () => { + expect(preferredHarnessForModel("anthropic/claude-opus-4-6")).toBe("claude-code"); + expect(preferredHarnessForModel("openai/gpt-5")).toBe("codex"); + expect(preferredHarnessForModel("moonshot/kimi-k2")).toBe("kimi-code"); + expect(preferredHarnessForModel("zai/glm-4.7")).toBeNull(); + expect(preferredHarnessForModel("default")).toBeNull(); + }); +}); + +describe("snapModelToHarness", () => { + it("snaps an incompatible model to the harness's best catalog id (opus before sonnet)", () => { + expect(snapModelToHarness("claude-code", "openai/gpt-5", CATALOG)).toBe("anthropic/claude-opus-4-6"); + expect(snapModelToHarness("codex", "anthropic/claude-sonnet-4-6", CATALOG)).toBe("openai/gpt-5"); + expect(snapModelToHarness("kimi-code", "openai/gpt-5", CATALOG)).toBe("moonshot/kimi-k2"); + }); + + it("prefers the standard-frontier gpt over a mini variant despite lexical order", () => { + expect( + snapModelToHarness("codex", "anthropic/claude-opus-4-6", ["openai/gpt-5-mini", "openai/gpt-5"]), + ).toBe("openai/gpt-5"); + }); + + it("leaves an already-compatible model unchanged", () => { + expect(snapModelToHarness("claude-code", "anthropic/claude-sonnet-4-6", CATALOG)).toBe( + "anthropic/claude-sonnet-4-6", + ); + expect(snapModelToHarness("opencode", "openai/gpt-5", CATALOG)).toBe("openai/gpt-5"); + expect(snapModelToHarness("nanoclaw", "openai/gpt-5", CATALOG)).toBe("openai/gpt-5"); + }); + + it("returns the original id when the catalog holds nothing compatible", () => { + expect(snapModelToHarness("claude-code", "openai/gpt-5", ["openai/gpt-5", "zai/glm-4.7"])).toBe( + "openai/gpt-5", + ); + }); +}); + +describe("snapHarnessToModel", () => { + it("adopts the model's native harness when the current one can't run it", () => { + expect(snapHarnessToModel("claude-code", "openai/gpt-5")).toBe("codex"); + expect(snapHarnessToModel("codex", "anthropic/claude-opus-4-6")).toBe("claude-code"); + expect(snapHarnessToModel("claude-code", "moonshot/kimi-k2")).toBe("kimi-code"); + }); + + it("keeps the harness when it already runs the model", () => { + expect(snapHarnessToModel("claude-code", "anthropic/claude-opus-4-6")).toBe("claude-code"); + expect(snapHarnessToModel("nanoclaw", "openai/gpt-5")).toBe("nanoclaw"); + }); + + it("falls back to opencode for a provider with no native harness", () => { + expect(snapHarnessToModel("codex", "zai/glm-4.7")).toBe("opencode"); + }); +}); + +describe("reasoning effort support", () => { + it("clamps each harness to its native ceiling", () => { + expect(harnessReasoningEfforts("cli-base")).toEqual(["none"]); + expect(harnessReasoningEfforts("codex")).toEqual(["none", "minimal", "low", "medium", "high"]); + expect(harnessReasoningEfforts("kimi-code")).toEqual(["none", "minimal", "low", "medium", "high"]); + expect(harnessReasoningEfforts("claude-code")).toContain("ultracode"); + expect(harnessReasoningEfforts("opencode")).toContain("ultracode"); + }); + + it("nanoclaw expresses only `none` (its runner sends no thinking flag)", () => { + expect(harnessReasoningEfforts("nanoclaw")).toEqual(["none"]); + }); + + it("narrows by the model's own capability", () => { + expect(reasoningEffortsFor("claude-code", { supportsReasoning: false })).toEqual(["none"]); + expect(reasoningEffortsFor("claude-code", { maxEffort: "medium" })).toEqual([ + "none", + "minimal", + "low", + "medium", + ]); + // model ceiling above the harness ceiling can't widen it + expect(reasoningEffortsFor("codex", { maxEffort: "ultracode" })).toEqual( + harnessReasoningEfforts("codex"), + ); + }); +}); diff --git a/packages/agent-interface/src/harness-capabilities.ts b/packages/agent-interface/src/harness-capabilities.ts index a8b8af2..75e6f8b 100644 --- a/packages/agent-interface/src/harness-capabilities.ts +++ b/packages/agent-interface/src/harness-capabilities.ts @@ -34,10 +34,13 @@ export const reasoningLadder: readonly ReasoningEffort[] = [ * Provider prefixes a harness is vendor-locked to (canonical-id prefix, e.g. `anthropic`, `openai`). * A harness with no entry is router-backed: it runs any model. Keyed by the BASE runner — aliases * (`claude`/`claudish`/`kimi`) resolve through `canonicalizeHarness` first. + * + * `nanoclaw` is deliberately absent despite the "claw" name: its runner routes every provider through + * the Tangle router (canonical model id straight to the gateway), so it is router-backed like + * `opencode` — not Anthropic-locked. */ const harnessProviderLock: Partial> = { "claude-code": ["anthropic"], - nanoclaw: ["anthropic"], codex: ["openai"], "kimi-code": ["moonshot"], }; @@ -80,20 +83,82 @@ export function preferredHarnessForModel(modelId: string): HarnessType | null { return null; } +// ── Harness ↔ model snapping (catalog-aware) ───────────────────────────────── + +/** + * Per-harness ranking patterns for {@link snapModelToHarness}, best first; within one pattern the + * highest version wins (numeric-aware). Only vendor-locked harnesses need an entry — a router-backed + * harness never snaps (it runs the model as-is). Keyed by the BASE runner (aliases canonicalized). + */ +const harnessPreferredModelPatterns: Partial< + Record +> = { + "claude-code": [ + /^anthropic\/claude-opus-[\d.-]+$/, + /^anthropic\/claude-sonnet-[\d.-]+$/, + /^anthropic\//, + ], + codex: [/^openai\/gpt-\d+(\.\d+)?$/, /^openai\/gpt/, /^openai\//], + "kimi-code": [/^moonshot\//], +}; + +const numericDesc = new Intl.Collator(undefined, { + numeric: true, + sensitivity: "base", +}); + +/** + * Keep `modelId` when the harness can run it; otherwise return the harness's best compatible id from + * `candidateIds` (preferred patterns in order, highest version within a pattern). When nothing in the + * candidate list fits, the original id is returned unchanged so the caller sees the incompatibility + * instead of a silent wrong substitution. `candidateIds` are canonical ("provider/model") ids — the + * caller maps its own catalog shape down to ids, keeping this layer catalog-agnostic. + */ +export function snapModelToHarness( + harness: HarnessType, + modelId: string, + candidateIds: readonly string[], +): string { + if (harnessSupportsModel(harness, modelId)) return modelId; + const patterns = + harnessPreferredModelPatterns[canonicalizeHarness(harness)] ?? []; + for (const pattern of patterns) { + const matches = candidateIds + .filter((id) => pattern.test(id)) + .sort((a, b) => numericDesc.compare(b, a)); + if (matches.length > 0) return matches[0]!; + } + return candidateIds.find((id) => harnessSupportsModel(harness, id)) ?? modelId; +} + +/** + * Keep the harness when it can run `modelId`; otherwise return the model's native harness + * (anthropic → claude-code, openai → codex, moonshot → kimi-code), falling back to the router-backed + * `opencode` for everything else. + */ +export function snapHarnessToModel( + harness: HarnessType, + modelId: string, +): HarnessType { + if (harnessSupportsModel(harness, modelId)) return harness; + return preferredHarnessForModel(modelId) ?? "opencode"; +} + // ── Reasoning-effort support ────────────────────────────────────────────────── /** * The highest reasoning effort a harness's runtime can express (its native clamp ceiling). Grounded * in cli-bridge: codex's `model_reasoning_effort` caps at `high` (xhigh/ultracode clamp down); kimi's * `--thinking` is binary, so `high` is its "on"; claude-code carries the full range; `cli-base` has - * no agent and thus no thinking. Router/model-driven harnesses default to the full range. + * no agent and thus no thinking; `nanoclaw`'s runner sends no thinking flag, so it expresses only + * `none`. Router/model-driven harnesses default to the full range. */ const harnessReasoningCeiling: Partial> = { "cli-base": "none", codex: "high", "kimi-code": "high", "claude-code": "ultracode", - nanoclaw: "ultracode", + nanoclaw: "none", }; /** The reasoning efforts a harness can express, independent of model — `none` up to its ceiling. */ diff --git a/packages/agent-interface/tsconfig.json b/packages/agent-interface/tsconfig.json index bc0dad3..9e28990 100644 --- a/packages/agent-interface/tsconfig.json +++ b/packages/agent-interface/tsconfig.json @@ -11,5 +11,6 @@ "skipLibCheck": true, "types": ["node"] }, - "include": ["src"] + "include": ["src"], + "exclude": ["src/**/*.test.ts"] } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 837755b..9034a96 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -61,6 +61,9 @@ importers: typescript: specifier: ^6.0.3 version: 6.0.3 + vitest: + specifier: 'catalog:' + version: 4.1.9(@opentelemetry/api@1.9.1)(@types/node@25.6.0)(vite@8.1.0(@types/node@25.6.0)(yaml@2.9.0)) packages/agent-provider-cli-bridge: dependencies: