Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .changeset/nanoclaw-and-harness-snapping.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
"@tangle-network/agent-interface": minor
---

Make agent-interface the single source of truth for harness↔model snapping, and correct nanoclaw's capabilities.

- `nanoclaw` is now treated as router-backed (runs any model via the Tangle router) instead of Anthropic-locked, and its reasoning ceiling is `none` (its runner sends no thinking flag) instead of `ultracode`.
- Add `snapModelToHarness(harness, modelId, candidateIds)` and `snapHarnessToModel(harness, modelId)` so consumers (sandbox-ui, agent-app) import the catalog-aware snap logic instead of hand-rolling divergent copies.
5 changes: 4 additions & 1 deletion packages/agent-interface/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,16 @@
"build": "tsc -p tsconfig.json",
"check-types": "tsc --noEmit",
"clean": "rm -rf dist",
"test": "vitest run",
"test:watch": "vitest",
"prepare": "pnpm run build"
},
"dependencies": {
"zod": "catalog:"
},
"devDependencies": {
"@types/node": "catalog:",
"typescript": "^6.0.3"
"typescript": "^6.0.3",
"vitest": "catalog:"
}
}
137 changes: 137 additions & 0 deletions packages/agent-interface/src/harness-capabilities.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import { describe, expect, it } from "vitest";
import {
harnessProviders,
harnessReasoningEfforts,
harnessSupportsModel,
modelProvider,
preferredHarnessForModel,
reasoningEffortsFor,
snapHarnessToModel,
snapModelToHarness,
} from "./harness-capabilities.js";

const CATALOG = [
"anthropic/claude-opus-4-6",
"anthropic/claude-sonnet-4-6",
"openai/gpt-5",
"openai/gpt-5-mini",
"moonshot/kimi-k2",
"zai/glm-4.7",
];

describe("modelProvider", () => {
it("extracts the provider prefix, or null for a bare id", () => {
expect(modelProvider("anthropic/claude-opus-4-6")).toBe("anthropic");
expect(modelProvider("openrouter/openai/gpt-5")).toBe("openrouter");
expect(modelProvider("gemini-2.5-flash-lite")).toBeNull();
expect(modelProvider("")).toBeNull();
});
});

describe("harness ↔ model compatibility", () => {
it("vendor-locked harnesses only accept their provider; router harnesses accept any", () => {
expect(harnessSupportsModel("claude-code", "anthropic/claude-sonnet-4-6")).toBe(true);
expect(harnessSupportsModel("claude-code", "openai/gpt-5")).toBe(false);
expect(harnessSupportsModel("codex", "openai/gpt-5")).toBe(true);
expect(harnessSupportsModel("codex", "anthropic/claude-sonnet-4-6")).toBe(false);
expect(harnessSupportsModel("kimi-code", "moonshot/kimi-k2")).toBe(true);
expect(harnessSupportsModel("opencode", "openai/gpt-5")).toBe(true);
});

it("aliases resolve to their base runner's lock", () => {
expect(harnessSupportsModel("claude", "openai/gpt-5")).toBe(false);
expect(harnessSupportsModel("kimi", "moonshot/kimi-k2")).toBe(true);
});

it("nanoclaw is router-backed — it runs any provider", () => {
expect(harnessProviders("nanoclaw")).toBeNull();
expect(harnessSupportsModel("nanoclaw", "openai/gpt-5")).toBe(true);
expect(harnessSupportsModel("nanoclaw", "anthropic/claude-sonnet-4-6")).toBe(true);
});

it("provider-less / sentinel ids are compatible everywhere", () => {
expect(harnessSupportsModel("claude-code", "default")).toBe(true);
expect(harnessSupportsModel("codex", "gemini-2.5-flash-lite")).toBe(true);
});

it("preferredHarnessForModel maps a vendor provider to its native harness", () => {
expect(preferredHarnessForModel("anthropic/claude-opus-4-6")).toBe("claude-code");
expect(preferredHarnessForModel("openai/gpt-5")).toBe("codex");
expect(preferredHarnessForModel("moonshot/kimi-k2")).toBe("kimi-code");
expect(preferredHarnessForModel("zai/glm-4.7")).toBeNull();
expect(preferredHarnessForModel("default")).toBeNull();
});
});

describe("snapModelToHarness", () => {
it("snaps an incompatible model to the harness's best catalog id (opus before sonnet)", () => {
expect(snapModelToHarness("claude-code", "openai/gpt-5", CATALOG)).toBe("anthropic/claude-opus-4-6");
expect(snapModelToHarness("codex", "anthropic/claude-sonnet-4-6", CATALOG)).toBe("openai/gpt-5");
expect(snapModelToHarness("kimi-code", "openai/gpt-5", CATALOG)).toBe("moonshot/kimi-k2");
});

it("prefers the standard-frontier gpt over a mini variant despite lexical order", () => {
expect(
snapModelToHarness("codex", "anthropic/claude-opus-4-6", ["openai/gpt-5-mini", "openai/gpt-5"]),
).toBe("openai/gpt-5");
});

it("leaves an already-compatible model unchanged", () => {
expect(snapModelToHarness("claude-code", "anthropic/claude-sonnet-4-6", CATALOG)).toBe(
"anthropic/claude-sonnet-4-6",
);
expect(snapModelToHarness("opencode", "openai/gpt-5", CATALOG)).toBe("openai/gpt-5");
expect(snapModelToHarness("nanoclaw", "openai/gpt-5", CATALOG)).toBe("openai/gpt-5");
});

it("returns the original id when the catalog holds nothing compatible", () => {
expect(snapModelToHarness("claude-code", "openai/gpt-5", ["openai/gpt-5", "zai/glm-4.7"])).toBe(
"openai/gpt-5",
);
});
});

describe("snapHarnessToModel", () => {
it("adopts the model's native harness when the current one can't run it", () => {
expect(snapHarnessToModel("claude-code", "openai/gpt-5")).toBe("codex");
expect(snapHarnessToModel("codex", "anthropic/claude-opus-4-6")).toBe("claude-code");
expect(snapHarnessToModel("claude-code", "moonshot/kimi-k2")).toBe("kimi-code");
});

it("keeps the harness when it already runs the model", () => {
expect(snapHarnessToModel("claude-code", "anthropic/claude-opus-4-6")).toBe("claude-code");
expect(snapHarnessToModel("nanoclaw", "openai/gpt-5")).toBe("nanoclaw");
});

it("falls back to opencode for a provider with no native harness", () => {
expect(snapHarnessToModel("codex", "zai/glm-4.7")).toBe("opencode");
});
});

describe("reasoning effort support", () => {
it("clamps each harness to its native ceiling", () => {
expect(harnessReasoningEfforts("cli-base")).toEqual(["none"]);
expect(harnessReasoningEfforts("codex")).toEqual(["none", "minimal", "low", "medium", "high"]);
expect(harnessReasoningEfforts("kimi-code")).toEqual(["none", "minimal", "low", "medium", "high"]);
expect(harnessReasoningEfforts("claude-code")).toContain("ultracode");
expect(harnessReasoningEfforts("opencode")).toContain("ultracode");
});

it("nanoclaw expresses only `none` (its runner sends no thinking flag)", () => {
expect(harnessReasoningEfforts("nanoclaw")).toEqual(["none"]);
});

it("narrows by the model's own capability", () => {
expect(reasoningEffortsFor("claude-code", { supportsReasoning: false })).toEqual(["none"]);
expect(reasoningEffortsFor("claude-code", { maxEffort: "medium" })).toEqual([
"none",
"minimal",
"low",
"medium",
]);
// model ceiling above the harness ceiling can't widen it
expect(reasoningEffortsFor("codex", { maxEffort: "ultracode" })).toEqual(
harnessReasoningEfforts("codex"),
);
});
});
71 changes: 68 additions & 3 deletions packages/agent-interface/src/harness-capabilities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,13 @@ export const reasoningLadder: readonly ReasoningEffort[] = [
* Provider prefixes a harness is vendor-locked to (canonical-id prefix, e.g. `anthropic`, `openai`).
* A harness with no entry is router-backed: it runs any model. Keyed by the BASE runner — aliases
* (`claude`/`claudish`/`kimi`) resolve through `canonicalizeHarness` first.
*
* `nanoclaw` is deliberately absent despite the "claw" name: its runner routes every provider through
* the Tangle router (canonical model id straight to the gateway), so it is router-backed like
* `opencode` — not Anthropic-locked.
*/
const harnessProviderLock: Partial<Record<HarnessType, readonly string[]>> = {
"claude-code": ["anthropic"],
nanoclaw: ["anthropic"],
codex: ["openai"],
"kimi-code": ["moonshot"],
};
Expand Down Expand Up @@ -80,20 +83,82 @@ export function preferredHarnessForModel(modelId: string): HarnessType | null {
return null;
}

// ── Harness ↔ model snapping (catalog-aware) ─────────────────────────────────

/**
* Per-harness ranking patterns for {@link snapModelToHarness}, best first; within one pattern the
* highest version wins (numeric-aware). Only vendor-locked harnesses need an entry — a router-backed
* harness never snaps (it runs the model as-is). Keyed by the BASE runner (aliases canonicalized).
*/
const harnessPreferredModelPatterns: Partial<
Record<HarnessType, readonly RegExp[]>
> = {
"claude-code": [
/^anthropic\/claude-opus-[\d.-]+$/,
/^anthropic\/claude-sonnet-[\d.-]+$/,
/^anthropic\//,
],
codex: [/^openai\/gpt-\d+(\.\d+)?$/, /^openai\/gpt/, /^openai\//],
"kimi-code": [/^moonshot\//],
};

const numericDesc = new Intl.Collator(undefined, {
numeric: true,
sensitivity: "base",
});

/**
* Keep `modelId` when the harness can run it; otherwise return the harness's best compatible id from
* `candidateIds` (preferred patterns in order, highest version within a pattern). When nothing in the
* candidate list fits, the original id is returned unchanged so the caller sees the incompatibility
* instead of a silent wrong substitution. `candidateIds` are canonical ("provider/model") ids — the
* caller maps its own catalog shape down to ids, keeping this layer catalog-agnostic.
*/
export function snapModelToHarness(
harness: HarnessType,
modelId: string,
candidateIds: readonly string[],
): string {
if (harnessSupportsModel(harness, modelId)) return modelId;
const patterns =
harnessPreferredModelPatterns[canonicalizeHarness(harness)] ?? [];
for (const pattern of patterns) {
const matches = candidateIds
.filter((id) => pattern.test(id))
.sort((a, b) => numericDesc.compare(b, a));
if (matches.length > 0) return matches[0]!;
}
return candidateIds.find((id) => harnessSupportsModel(harness, id)) ?? modelId;
}

/**
* Keep the harness when it can run `modelId`; otherwise return the model's native harness
* (anthropic → claude-code, openai → codex, moonshot → kimi-code), falling back to the router-backed
* `opencode` for everything else.
*/
export function snapHarnessToModel(
harness: HarnessType,
modelId: string,
): HarnessType {
if (harnessSupportsModel(harness, modelId)) return harness;
return preferredHarnessForModel(modelId) ?? "opencode";
}

// ── Reasoning-effort support ──────────────────────────────────────────────────

/**
* The highest reasoning effort a harness's runtime can express (its native clamp ceiling). Grounded
* in cli-bridge: codex's `model_reasoning_effort` caps at `high` (xhigh/ultracode clamp down); kimi's
* `--thinking` is binary, so `high` is its "on"; claude-code carries the full range; `cli-base` has
* no agent and thus no thinking. Router/model-driven harnesses default to the full range.
* no agent and thus no thinking; `nanoclaw`'s runner sends no thinking flag, so it expresses only
* `none`. Router/model-driven harnesses default to the full range.
*/
const harnessReasoningCeiling: Partial<Record<HarnessType, ReasoningEffort>> = {
"cli-base": "none",
codex: "high",
"kimi-code": "high",
"claude-code": "ultracode",
nanoclaw: "ultracode",
nanoclaw: "none",
};

/** The reasoning efforts a harness can express, independent of model — `none` up to its ceiling. */
Expand Down
3 changes: 2 additions & 1 deletion packages/agent-interface/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@
"skipLibCheck": true,
"types": ["node"]
},
"include": ["src"]
"include": ["src"],
"exclude": ["src/**/*.test.ts"]
}
3 changes: 3 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading