+ {/* Text size — zooms the transcript. A panel-level control, so it
+ lives in the header action row rather than over the composer. */}
+
+
+
+
+
+
+
+
+
+
+
+ {/* Conversation — the full-panel history view, the host-swappable
+ renderer, or the built-in timeline. */}
+
+ {view === "history" ? (
+ {
+ chat.switchThread(id);
+ setView("chat");
+ }}
+ onDelete={(id) => void deleteThread(id)}
+ />
+ ) : (
+ // The text-size control zooms the transcript only — not the history
+ // view's search box and buttons. `zoom` scales every descendant
+ // uniformly regardless of which renderer draws the conversation; an
+ // inline `font-size` would not (the transcript's text utilities set
+ // absolute rem sizes), and `transform: scale` would break the scroll
+ // container by keeping the original layout box.
+
+ )}
+
+ {/* Composer: the model picker sits directly above the input, so the model
+ the next turn will use reads as part of the composer. */}
+
+ {/* Running indicator: while a turn streams, the composer's Send becomes a
+ Stop button — on its own an easy-to-miss signal. This animated row makes
+ "the assistant is working" unmistakable regardless of the transcript
+ renderer in use. */}
+ {streaming && (
+
+ Connect the items above, then confirm — your proposal stays here until
+ you do.
+
+
+ )}
+
+ {proposal.retryError && (
+
+ {proposal.retryError}
+
+ )}
+
+
+
+
+
+
+ );
+}
+
+function openConnect(target: string, navigate?: (path: string) => void) {
+ // Protocol-relative URLs (//host) inherit the page scheme and point off-site —
+ // never a legitimate connect target, so reject outright.
+ if (target.startsWith("//")) return;
+ // Canonicalize before the scheme check so it can't be smuggled past with
+ // leading whitespace or an embedded tab/newline that browsers strip (a regex
+ // guard misses those). Only http(s) may EVER navigate — via window.open OR
+ // window.location.assign — which closes the `javascript:`/`data:` XSS vector.
+ let url: URL;
+ try {
+ url = new URL(target, window.location.origin);
+ } catch {
+ return;
+ }
+ if (url.protocol !== "http:" && url.protocol !== "https:") return;
+ // A bare relative path (no scheme) is in-app navigation → host router; an
+ // absolute http(s) URL is an external link → new tab.
+ if (/^[a-z][a-z0-9+.-]*:/i.test(target)) {
+ window.open(url.href, "_blank", "noopener,noreferrer");
+ } else if (navigate) {
+ navigate(target);
+ } else {
+ window.location.assign(url.href);
+ }
+}
+
+function RequirementRow({
+ req,
+ navigate,
+}: {
+ req: ConnectionRequirement;
+ navigate?: (path: string) => void;
+}) {
+ const label = providerLabel(req.provider);
+ const isApp = req.kind === "github_app";
+ const kindLabel = isApp ? `${label} App` : label;
+ const statusText = req.connected
+ ? isApp
+ ? "installed"
+ : "connected"
+ : isApp
+ ? "not installed"
+ : "not connected";
+ // connectUrl === null means "no connect target to offer" (e.g. a github_app
+ // requirement on a deploy with no app slug) — show the status without a link.
+ const canConnect = !req.connected && req.connectUrl !== null;
+ const target = req.connectUrl ?? "/app/integrations";
+
+ return (
+
+
+
+ {kindLabel}
+
+ {/* Filled vs outlined dot is a non-color (shape) cue for the
+ connected state, so it reads for color-blind users too — the
+ status text alone would lean on color. */}
+
+
+ {statusText}
+
+
+
+ {canConnect && (
+
+ )}
+
+ );
+}
diff --git a/src/assistant/ResizeHandle.test.tsx b/src/assistant/ResizeHandle.test.tsx
new file mode 100644
index 0000000..396ffd1
--- /dev/null
+++ b/src/assistant/ResizeHandle.test.tsx
@@ -0,0 +1,81 @@
+// @vitest-environment jsdom
+import { cleanup, fireEvent, render } from "@testing-library/react";
+import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
+import { ResizeHandle } from "./ResizeHandle";
+
+// jsdom's PointerEvent doesn't carry button/clientX from the event init, which
+// would trip the primary-button guard. Back it with MouseEvent (which does) so
+// the drag handlers receive real coordinates.
+class PointerEventPolyfill extends MouseEvent {
+ readonly pointerId: number;
+ constructor(type: string, params: PointerEventInit = {}) {
+ super(type, params);
+ this.pointerId = params.pointerId ?? 0;
+ }
+}
+beforeAll(() => {
+ window.PointerEvent = PointerEventPolyfill as unknown as typeof PointerEvent;
+});
+
+afterEach(() => cleanup());
+
+function setup(width = 500, maxWidth = 1000) {
+ const onPreview = vi.fn();
+ const onCommit = vi.fn();
+ const onNudge = vi.fn();
+ const { getByRole } = render(
+ ,
+ );
+ const el = getByRole("separator") as HTMLElement;
+ // jsdom's pointer-capture support is incomplete; stub so the handlers run.
+ el.setPointerCapture = vi.fn();
+ el.releasePointerCapture = vi.fn();
+ el.hasPointerCapture = () => true;
+ return { el, onPreview, onCommit, onNudge };
+}
+
+describe("ResizeHandle", () => {
+ it("exposes the current/min/max width via ARIA", () => {
+ const { el } = setup(620, 980);
+ expect(el.getAttribute("aria-valuenow")).toBe("620");
+ expect(el.getAttribute("aria-valuemin")).toBe("360");
+ expect(el.getAttribute("aria-valuemax")).toBe("980");
+ });
+
+ it("previews during drag (right-anchored: dragging left widens) and commits on release", () => {
+ const { el, onPreview, onCommit } = setup(500);
+ fireEvent.pointerDown(el, { clientX: 800, button: 0, pointerId: 1 });
+ fireEvent.pointerMove(el, { clientX: 750, pointerId: 1 });
+ // startWidth 500 + (800 - 750) = 550
+ expect(onPreview).toHaveBeenLastCalledWith(550);
+ expect(onCommit).not.toHaveBeenCalled();
+
+ fireEvent.pointerMove(el, { clientX: 700, pointerId: 1 });
+ expect(onPreview).toHaveBeenLastCalledWith(600);
+
+ fireEvent.pointerUp(el, { pointerId: 1 });
+ // Persisted once, with the last previewed width.
+ expect(onCommit).toHaveBeenCalledTimes(1);
+ expect(onCommit).toHaveBeenCalledWith(600);
+ });
+
+ it("does not preview pointer moves before a drag starts", () => {
+ const { el, onPreview } = setup();
+ fireEvent.pointerMove(el, { clientX: 700, pointerId: 1 });
+ expect(onPreview).not.toHaveBeenCalled();
+ });
+
+ it("nudges by ±24 on arrow keys", () => {
+ const { el, onNudge } = setup();
+ fireEvent.keyDown(el, { key: "ArrowLeft" });
+ expect(onNudge).toHaveBeenLastCalledWith(24);
+ fireEvent.keyDown(el, { key: "ArrowRight" });
+ expect(onNudge).toHaveBeenLastCalledWith(-24);
+ });
+});
diff --git a/src/assistant/ResizeHandle.tsx b/src/assistant/ResizeHandle.tsx
new file mode 100644
index 0000000..7a6c8e6
--- /dev/null
+++ b/src/assistant/ResizeHandle.tsx
@@ -0,0 +1,104 @@
+import { type KeyboardEvent, type PointerEvent, useRef } from "react";
+import { MIN_PANEL_WIDTH } from "./usePanelPrefs";
+
+/**
+ * Drag-to-resize grip on the assistant drawer's left edge. Pointer capture keeps
+ * the drag alive while the cursor moves anywhere on screen; arrow keys resize in
+ * coarse steps for keyboard users. The drawer is right-anchored, so dragging
+ * left widens it. The in-memory width updates every move (`onPreview`); the
+ * final width is persisted once on release (`onCommit`).
+ */
+export function ResizeHandle({
+ width,
+ maxWidth,
+ onPreview,
+ onCommit,
+ onNudge,
+}: {
+ width: number;
+ maxWidth: number;
+ /** Live (non-persisted) width update during a drag. The value is the raw
+ * pointer-derived width and is NOT clamped — the consumer must clamp it to its
+ * own min/max (the bundled `usePanelWidth.previewWidth` does). */
+ onPreview: (next: number) => void;
+ /** Persist the final width (drag end). */
+ onCommit: (next: number) => void;
+ /** Keyboard resize delta (clamped + persisted). */
+ onNudge: (deltaPx: number) => void;
+}) {
+ const dragRef = useRef<{
+ startX: number;
+ startWidth: number;
+ lastWidth: number;
+ } | null>(null);
+
+ const onPointerDown = (e: PointerEvent) => {
+ if (e.button !== 0) return; // primary button / touch / pen only
+ e.preventDefault();
+ e.currentTarget.setPointerCapture(e.pointerId);
+ dragRef.current = {
+ startX: e.clientX,
+ startWidth: width,
+ lastWidth: width,
+ };
+ };
+
+ const onPointerMove = (e: PointerEvent) => {
+ const drag = dragRef.current;
+ if (!drag) return;
+ // Update the in-memory width every tick (smooth), but don't persist — that
+ // would hit localStorage on every pointermove.
+ const next = drag.startWidth + (drag.startX - e.clientX);
+ // Skip sub-pixel jitter so a touch drag doesn't re-render the panel subtree
+ // on every noise event.
+ if (Math.abs(next - drag.lastWidth) < 1) return;
+ drag.lastWidth = next;
+ onPreview(next);
+ };
+
+ const endDrag = (e: PointerEvent) => {
+ const drag = dragRef.current;
+ if (!drag) return;
+ dragRef.current = null;
+ if (e.currentTarget.hasPointerCapture(e.pointerId)) {
+ e.currentTarget.releasePointerCapture(e.pointerId);
+ }
+ // Persist once, on release.
+ onCommit(drag.lastWidth);
+ };
+
+ const onKeyDown = (e: KeyboardEvent) => {
+ const STEP = 24;
+ if (e.key === "ArrowLeft") {
+ e.preventDefault();
+ onNudge(STEP);
+ } else if (e.key === "ArrowRight") {
+ e.preventDefault();
+ onNudge(-STEP);
+ }
+ };
+
+ return (
+ // biome-ignore lint/a11y/useSemanticElements: a focusable drag handle is an ARIA window-splitter (role=separator); no native HTML element provides this.
+
+
+
+ );
+}
diff --git a/src/assistant/client-context.tsx b/src/assistant/client-context.tsx
new file mode 100644
index 0000000..8ffe0b1
--- /dev/null
+++ b/src/assistant/client-context.tsx
@@ -0,0 +1,42 @@
+/**
+ * Provides the configured {@link AssistantClient} to the assistant hooks. The
+ * host (a same-origin app, or a cross-origin embedder) builds one client with
+ * its transport config and supplies it here; the hooks read it rather than
+ * importing a hard-wired same-origin transport, which is what makes the panel
+ * portable across hosts.
+ */
+
+import { createContext, type ReactNode, useContext } from "react";
+import type { AssistantClient } from "./client";
+
+const AssistantClientContext = createContext(null);
+
+export function AssistantClientProvider({
+ client,
+ children,
+}: {
+ client: AssistantClient;
+ children: ReactNode;
+}) {
+ return (
+
+ {children}
+
+ );
+}
+
+/**
+ * The assistant client for the current host. Throws when no provider is mounted
+ * rather than silently falling back to a default transport — a missing provider
+ * is a wiring bug, and a hidden default would mask it (and could target the
+ * wrong origin).
+ */
+export function useAssistantClient(): AssistantClient {
+ const client = useContext(AssistantClientContext);
+ if (!client) {
+ throw new Error(
+ "useAssistantClient must be used within an ",
+ );
+ }
+ return client;
+}
diff --git a/src/assistant/client.test.ts b/src/assistant/client.test.ts
new file mode 100644
index 0000000..3806de6
--- /dev/null
+++ b/src/assistant/client.test.ts
@@ -0,0 +1,498 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { createAssistantClient } from "./client";
+import type { AssistantStreamEvent } from "./types";
+
+// One client for every case: the transport config is irrelevant to these tests
+// (they stub `fetch`), so a same-origin base is fine. The point under test is the
+// request shaping and the defensive wire parsing, which are config-independent.
+const client = createAssistantClient({ baseUrl: "/api/v1/assistant" });
+
+function sseBody(frames: string[]): ReadableStream {
+ const enc = new TextEncoder();
+ return new ReadableStream({
+ start(controller) {
+ for (const f of frames) controller.enqueue(enc.encode(f));
+ controller.close();
+ },
+ });
+}
+
+afterEach(() => {
+ vi.unstubAllGlobals();
+});
+
+describe("streamChat", () => {
+ it("emits each SSE event as a typed event in wire order, dropping pings", async () => {
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({
+ ok: true,
+ body: sseBody([
+ 'event: thread\ndata: {"threadId":"T1","turnId":"R1"}\n\n',
+ 'event: delta\ndata: {"text":"Hel"}\n\n',
+ "event: ping\ndata: {}\n\n",
+ 'event: delta\ndata: {"text":"lo"}\n\n',
+ 'event: usage\ndata: {"promptTokens":1,"completionTokens":2,"costUsd":0.001,"balanceUsd":3.2}\n\n',
+ 'event: done\ndata: {"turnId":"R1","status":"completed"}\n\n',
+ ]),
+ }),
+ );
+
+ const events: AssistantStreamEvent[] = [];
+ await client.streamChat(
+ { message: "hi" },
+ (e) => events.push(e),
+ new AbortController().signal,
+ );
+
+ expect(events.map((e) => e.type)).toEqual([
+ "thread",
+ "delta",
+ "delta",
+ "usage",
+ "done",
+ ]);
+ expect(events[1]).toEqual({ type: "delta", data: { text: "Hel" } });
+ expect(events[3]).toEqual({
+ type: "usage",
+ data: {
+ promptTokens: 1,
+ completionTokens: 2,
+ costUsd: 0.001,
+ balanceUsd: 3.2,
+ replayed: false,
+ },
+ });
+ });
+
+ it("surfaces a pre-stream HTTP error body as a single error event", async () => {
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({
+ ok: false,
+ status: 402,
+ body: null,
+ json: async () => ({
+ success: false,
+ error: {
+ code: "INSUFFICIENT_BALANCE",
+ message: "Credit balance is exhausted",
+ },
+ }),
+ }),
+ );
+
+ const events: AssistantStreamEvent[] = [];
+ await client.streamChat(
+ { message: "hi" },
+ (e) => events.push(e),
+ new AbortController().signal,
+ );
+
+ expect(events).toEqual([
+ {
+ type: "error",
+ data: {
+ code: "INSUFFICIENT_BALANCE",
+ message: "Credit balance is exhausted",
+ },
+ },
+ ]);
+ });
+
+ it("emits STREAM_CLOSED when the body ends without a done or error frame", async () => {
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({
+ ok: true,
+ body: sseBody([
+ 'event: thread\ndata: {"threadId":"T","turnId":"R"}\n\n',
+ 'event: delta\ndata: {"text":"partial"}\n\n',
+ ]),
+ }),
+ );
+ const events: AssistantStreamEvent[] = [];
+ await client.streamChat(
+ { message: "hi" },
+ (e) => events.push(e),
+ new AbortController().signal,
+ );
+ expect(events.at(-1)).toEqual({
+ type: "error",
+ data: {
+ code: "STREAM_CLOSED",
+ message: "The assistant stream ended unexpectedly",
+ },
+ });
+ });
+
+ it("does not append STREAM_CLOSED when the turn ends with done", async () => {
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({
+ ok: true,
+ body: sseBody([
+ 'event: done\ndata: {"turnId":"R","status":"completed"}\n\n',
+ ]),
+ }),
+ );
+ const events: AssistantStreamEvent[] = [];
+ await client.streamChat(
+ { message: "hi" },
+ (e) => events.push(e),
+ new AbortController().signal,
+ );
+ expect(events.map((e) => e.type)).toEqual(["done"]);
+ });
+
+ it("emits NO_BODY for an ok response with a null body", async () => {
+ vi.stubGlobal("fetch", vi.fn().mockResolvedValue({ ok: true, body: null }));
+ const events: AssistantStreamEvent[] = [];
+ await client.streamChat(
+ { message: "hi" },
+ (e) => events.push(e),
+ new AbortController().signal,
+ );
+ expect(events).toEqual([
+ {
+ type: "error",
+ data: {
+ code: "NO_BODY",
+ message: "The assistant stream is unavailable",
+ },
+ },
+ ]);
+ });
+
+ it("drops malformed frames missing required fields rather than coercing them", async () => {
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({
+ ok: true,
+ body: sseBody([
+ // thread missing turnId, tool_proposal missing callId — both dropped.
+ 'event: thread\ndata: {"threadId":"T"}\n\n',
+ 'event: tool_proposal\ndata: {"proposalId":"p","name":"create_workflow","args":{}}\n\n',
+ // a well-formed delta still parses.
+ 'event: delta\ndata: {"text":"ok"}\n\n',
+ 'event: done\ndata: {"turnId":"R","status":"completed"}\n\n',
+ ]),
+ }),
+ );
+ const events: AssistantStreamEvent[] = [];
+ await client.streamChat(
+ { message: "hi" },
+ (e) => events.push(e),
+ new AbortController().signal,
+ );
+ expect(events.map((e) => e.type)).toEqual(["delta", "done"]);
+ // No "undefined" string leaked into a thread/proposal event.
+ expect(events.some((e) => e.type === "thread")).toBe(false);
+ expect(events.some((e) => e.type === "tool_proposal")).toBe(false);
+ });
+
+ it("preserves each requirement's kind and connectUrl on a live tool_proposal", async () => {
+ // A GitHub PR-review workflow references github twice: the event source is
+ // the GitHub App installation (kind: github_app, here NOT installed) and the
+ // action grant is the OAuth connection (kind: integration, here connected).
+ // The card distinguishes them only by `kind`/`connectUrl`; dropping those on
+ // the live path collapsed both to a "GitHub / not connected" integration row,
+ // showing the same provider as connected AND not connected at once.
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({
+ ok: true,
+ body: sseBody([
+ `event: tool_proposal\ndata: ${JSON.stringify({
+ proposalId: "p1",
+ callId: "c1",
+ name: "create_workflow",
+ args: { yaml: "name: pr-review-opencode" },
+ requirements: [
+ {
+ provider: "github",
+ kind: "github_app",
+ connected: false,
+ connectUrl: "https://github.com/apps/tangle/installations/new",
+ },
+ {
+ provider: "github",
+ kind: "integration",
+ connected: true,
+ connectUrl: "/app/integrations",
+ },
+ ],
+ })}\n\n`,
+ 'event: done\ndata: {"turnId":"R","status":"completed"}\n\n',
+ ]),
+ }),
+ );
+ const events: AssistantStreamEvent[] = [];
+ await client.streamChat(
+ { message: "review my PRs" },
+ (e) => events.push(e),
+ new AbortController().signal,
+ );
+ const proposal = events.find((e) => e.type === "tool_proposal");
+ expect(proposal).toBeTruthy();
+ if (proposal?.type !== "tool_proposal") return;
+ expect(proposal.data.requirements).toEqual([
+ {
+ provider: "github",
+ kind: "github_app",
+ connected: false,
+ connectUrl: "https://github.com/apps/tangle/installations/new",
+ },
+ {
+ provider: "github",
+ kind: "integration",
+ connected: true,
+ connectUrl: "/app/integrations",
+ },
+ ]);
+ });
+
+ it("parses CRLF-framed events end to end", async () => {
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({
+ ok: true,
+ body: sseBody([
+ 'event: delta\r\ndata: {"text":"hi"}\r\n\r\n',
+ 'event: done\r\ndata: {"turnId":"R","status":"completed"}\r\n\r\n',
+ ]),
+ }),
+ );
+ const events: AssistantStreamEvent[] = [];
+ await client.streamChat(
+ { message: "hi" },
+ (e) => events.push(e),
+ new AbortController().signal,
+ );
+ expect(events.map((e) => e.type)).toEqual(["delta", "done"]);
+ expect(events[0]).toEqual({ type: "delta", data: { text: "hi" } });
+ });
+
+ it("forwards threadId and turnKey in the request body", async () => {
+ const fetchMock = vi.fn().mockResolvedValue({
+ ok: true,
+ body: sseBody([
+ 'event: done\ndata: {"turnId":"R","status":"completed"}\n\n',
+ ]),
+ });
+ vi.stubGlobal("fetch", fetchMock);
+
+ await client.streamChat(
+ { message: "continue", threadId: "T9", turnKey: "k1" },
+ () => {},
+ new AbortController().signal,
+ );
+
+ const [, init] = fetchMock.mock.calls[0]!;
+ expect(JSON.parse(init.body)).toEqual({
+ message: "continue",
+ threadId: "T9",
+ turnKey: "k1",
+ });
+ });
+});
+
+describe("confirmProposal", () => {
+ it("returns the tool output on a successful confirmation", async () => {
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({
+ ok: true,
+ json: async () => ({ success: true, output: { created: true } }),
+ }),
+ );
+ const result = await client.confirmProposal("prop_1");
+ expect(result).toEqual({ ok: true, output: { created: true } });
+ });
+
+ it("returns the server's error message when the proposal can't be run", async () => {
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({
+ ok: false,
+ status: 409,
+ json: async () => ({
+ success: false,
+ error: {
+ code: "PROPOSAL_EXPIRED",
+ message: "This proposal has expired",
+ },
+ }),
+ }),
+ );
+ const result = await client.confirmProposal("prop_1");
+ expect(result).toEqual({ ok: false, error: "This proposal has expired" });
+ });
+});
+
+describe("fetchModels", () => {
+ it("parses the catalog (prompt price + context) and reports a list", async () => {
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({
+ ok: true,
+ json: async () => ({
+ default: "anthropic/claude-sonnet-4-5",
+ models: [
+ {
+ slug: "anthropic/claude-sonnet-4-5",
+ label: "Claude Sonnet 4.5",
+ promptUsdPerMillion: 3,
+ contextTokens: 200000,
+ },
+ { slug: "openai/gpt-4o", label: "GPT 4o" },
+ ],
+ }),
+ }),
+ );
+ const res = await client.fetchModels();
+ expect(res.ok).toBe(true);
+ expect(res.data.default).toBe("anthropic/claude-sonnet-4-5");
+ expect(res.data.models[0]).toEqual({
+ slug: "anthropic/claude-sonnet-4-5",
+ label: "Claude Sonnet 4.5",
+ promptUsdPerMillion: 3,
+ contextTokens: 200000,
+ });
+ expect(res.data.models[1]).toEqual({
+ slug: "openai/gpt-4o",
+ label: "GPT 4o",
+ });
+ });
+
+ it("reports an empty model list as not-ok so the caller retries", async () => {
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({
+ ok: true,
+ json: async () => ({ default: "x", models: [] }),
+ }),
+ );
+ expect((await client.fetchModels()).ok).toBe(false);
+ });
+
+ it("reports a transport failure as not-ok with an empty list", async () => {
+ vi.stubGlobal("fetch", vi.fn().mockResolvedValue({ ok: false }));
+ const res = await client.fetchModels();
+ expect(res.ok).toBe(false);
+ expect(res.data.models).toEqual([]);
+ });
+});
+
+describe("fetchThreads", () => {
+ it("parses the thread list, tolerating a null title", async () => {
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({
+ ok: true,
+ json: async () => ({
+ threads: [
+ {
+ id: "t1",
+ title: "Hello",
+ createdAt: "2026-06-15T00:00:00Z",
+ updatedAt: "2026-06-15T01:00:00Z",
+ },
+ { id: "t2", title: null, createdAt: "x", updatedAt: "y" },
+ ],
+ }),
+ }),
+ );
+ expect(await client.fetchThreads()).toEqual([
+ {
+ id: "t1",
+ title: "Hello",
+ createdAt: "2026-06-15T00:00:00Z",
+ updatedAt: "2026-06-15T01:00:00Z",
+ },
+ { id: "t2", title: null, createdAt: "x", updatedAt: "y" },
+ ]);
+ });
+
+ it("returns null on a failed request", async () => {
+ vi.stubGlobal("fetch", vi.fn().mockResolvedValue({ ok: false }));
+ expect(await client.fetchThreads()).toBeNull();
+ });
+});
+
+describe("fetchThreadHistory", () => {
+ it("restores messages and proposals, dropping malformed requirement elements", async () => {
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({
+ ok: true,
+ status: 200,
+ json: async () => ({
+ messages: [{ id: "t1:u", role: "user", text: "review my PR" }],
+ proposals: [
+ {
+ proposalId: "p1",
+ callId: "c1",
+ name: "create_workflow",
+ args: { yaml: "name: pr-review" },
+ requirements: [
+ {
+ provider: "github",
+ kind: "github_app",
+ connected: false,
+ connectUrl: null,
+ },
+ // Malformed: non-string provider — must be dropped, not crash
+ // the card (providerLabel would call .toLowerCase() on it).
+ { provider: 123, connected: false },
+ // Malformed: missing the boolean `connected` — dropped.
+ { provider: "slack" },
+ ],
+ },
+ ],
+ }),
+ }),
+ );
+ const result = await client.fetchThreadHistory("t1");
+ expect(result.status).toBe("ok");
+ if (result.status !== "ok") return;
+ expect(result.messages).toEqual([
+ { id: "t1:u", role: "user", text: "review my PR" },
+ ]);
+ expect(result.proposals).toHaveLength(1);
+ // Only the well-formed requirement survives.
+ expect(result.proposals[0]!.requirements).toEqual([
+ {
+ provider: "github",
+ kind: "github_app",
+ connected: false,
+ connectUrl: null,
+ },
+ ]);
+ });
+
+ it("tolerates a response with no proposals field (older server)", async () => {
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({
+ ok: true,
+ status: 200,
+ json: async () => ({
+ messages: [{ id: "t1:u", role: "user", text: "hi" }],
+ }),
+ }),
+ );
+ const result = await client.fetchThreadHistory("t1");
+ expect(result.status).toBe("ok");
+ if (result.status !== "ok") return;
+ expect(result.proposals).toEqual([]);
+ });
+
+ it("reports a deleted thread as gone (404)", async () => {
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({ ok: false, status: 404 }),
+ );
+ expect((await client.fetchThreadHistory("t1")).status).toBe("gone");
+ });
+});
diff --git a/src/assistant/client.ts b/src/assistant/client.ts
new file mode 100644
index 0000000..7480ca0
--- /dev/null
+++ b/src/assistant/client.ts
@@ -0,0 +1,616 @@
+/**
+ * Configurable network client for the assistant panel: the chat SSE stream, the
+ * model/thread/history reads, and the proposal-confirmation call.
+ *
+ * The transport is injected via {@link AssistantClientConfig} so the same UI can
+ * run in different hosts: a same-origin app authenticates with the session
+ * cookie (`credentials: "include"`) and an `X-Requested-With` marker, while a
+ * cross-origin host points `baseUrl` at the API and supplies a bearer token via
+ * `headers`. The request shapes and the defensive wire parsing are identical
+ * across hosts — only the base URL and the auth headers vary.
+ */
+
+import { readSSEEvents } from "./sse";
+import type {
+ AssistantStreamEvent,
+ ChatMessage,
+ ChatRequest,
+ ConnectionRequirement,
+ PendingProposal,
+} from "./types";
+
+/** Host-supplied transport configuration for {@link createAssistantClient}. */
+export interface AssistantClientConfig {
+ /**
+ * Base URL the five assistant endpoints hang off, with no trailing slash —
+ * e.g. `"/api/v1/assistant"` for a same-origin SSR edge, or
+ * `"https://id.tangle.tools/api/v1/assistant"` cross-origin. Each method
+ * appends its own path (`/chat`, `/models`, `/threads`, …).
+ */
+ baseUrl: string;
+ /**
+ * `fetch` credentials mode. Defaults to `"include"` so a same-origin cookie
+ * session authenticates; a token-based cross-origin host may pass `"omit"`
+ * and carry the credential in {@link AssistantClientConfig.headers}.
+ */
+ credentials?: RequestCredentials;
+ /**
+ * Headers applied to every request — the auth token and/or the CSRF marker.
+ * Called per request so a rotating token is read fresh, never captured once.
+ */
+ headers?: () => Record;
+}
+
+export interface AssistantModelOption {
+ slug: string;
+ label: string;
+ /** USD per million prompt tokens, when the catalog carries pricing. */
+ promptUsdPerMillion?: number;
+ /** Context window in tokens, when known. */
+ contextTokens?: number;
+}
+
+export interface AssistantModels {
+ /** The slug the server uses when a turn selects no model. */
+ default: string | null;
+ models: AssistantModelOption[];
+}
+
+/** One past conversation in the history switcher. */
+export interface AssistantThreadSummary {
+ id: string;
+ /** Truncated first user message; may be null for an untitled thread. */
+ title: string | null;
+ createdAt: string;
+ updatedAt: string;
+}
+
+/**
+ * Outcome of a model-list fetch. `ok` drives caching: the caller caches on `ok`
+ * and retries on `!ok`. An EMPTY list is reported as `!ok` — the server always
+ * offers at least the default model when the router is reachable, so an empty
+ * menu means the catalog couldn't be loaded and should be retried, not cached
+ * for the whole session.
+ */
+export interface AssistantModelsResult {
+ ok: boolean;
+ data: AssistantModels;
+}
+
+/**
+ * Outcome of a thread-history restore. The three cases drive different recovery:
+ * `ok` rehydrates the transcript; `gone` (the thread 404s — deleted or from a
+ * reset DB) tells the caller to drop the dead thread id so the next turn starts
+ * fresh; `error` (transient/network/aborted) keeps the thread id and simply
+ * doesn't restore, so a later attempt or send still targets the live thread.
+ */
+export type ThreadHistoryResult =
+ | { status: "ok"; messages: ChatMessage[]; proposals: PendingProposal[] }
+ | { status: "gone" }
+ | { status: "error" };
+
+export type ConfirmResult =
+ | { ok: true; output: unknown; retryable?: boolean }
+ | { ok: false; error: string };
+
+/** The assistant network surface, bound to one host's transport config. */
+export interface AssistantClient {
+ fetchModels(signal?: AbortSignal): Promise;
+ fetchThreads(signal?: AbortSignal): Promise;
+ fetchThreadHistory(
+ threadId: string,
+ signal?: AbortSignal,
+ ): Promise;
+ streamChat(
+ req: ChatRequest,
+ onEvent: (event: AssistantStreamEvent) => void,
+ signal: AbortSignal,
+ ): Promise;
+ confirmProposal(proposalId: string): Promise;
+ /** Delete a thread and its server-side turns/proposals. Resolves `{ ok }`; a
+ * 404 (already gone) is treated as success so a double-delete is harmless.
+ * Optional so a host with no delete endpoint stays a valid client — the panel
+ * hides the delete affordance when it's absent (see `useAssistantThreads`). */
+ deleteThread?(threadId: string): Promise<{ ok: boolean }>;
+}
+
+const EMPTY_MODELS: AssistantModels = { default: null, models: [] };
+
+/** A parsed event payload narrowed to a plain (non-array) object, or null. The
+ * shared parser JSON-parses each `data:` payload; a non-object (e.g. a
+ * malformed frame it left as a raw string) is dropped. */
+function asObject(v: unknown): Record | null {
+ return v && typeof v === "object" && !Array.isArray(v)
+ ? (v as Record)
+ : null;
+}
+
+/** A required wire string: the value when it's a non-empty string, else null so
+ * a malformed frame is dropped rather than coerced to "undefined". */
+function reqStr(v: unknown): string | null {
+ return typeof v === "string" && v !== "" ? v : null;
+}
+
+function numOrNull(v: unknown): number | null {
+ return typeof v === "number" && Number.isFinite(v) ? v : null;
+}
+
+/**
+ * Validate one restored connection requirement, or null to drop a malformed
+ * element. The card reads `provider`/`connected`/`kind`/`connectUrl` directly
+ * (e.g. `providerLabel(r.provider)` calls `.toLowerCase()`), so an element with
+ * a non-string provider would throw at render — validate the shape rather than
+ * trusting the wire blindly, mirroring `parseRestoredProposal`'s own posture.
+ */
+function parseRequirement(raw: unknown): ConnectionRequirement | null {
+ if (!raw || typeof raw !== "object") return null;
+ const r = raw as Record;
+ if (typeof r.provider !== "string" || r.provider === "") return null;
+ if (typeof r.connected !== "boolean") return null;
+ const kind =
+ r.kind === "integration" || r.kind === "github_app" ? r.kind : undefined;
+ // connectUrl is `string | null` on the wire; anything else is dropped to
+ // undefined so the card falls back to its kind-based default.
+ const connectUrl =
+ typeof r.connectUrl === "string" || r.connectUrl === null
+ ? r.connectUrl
+ : undefined;
+ return {
+ provider: r.provider,
+ connected: r.connected,
+ ...(kind ? { kind } : {}),
+ ...(connectUrl !== undefined ? { connectUrl } : {}),
+ };
+}
+
+/** Parse a proposal's connection requirements, dropping malformed entries.
+ * Returns undefined when absent (non-authoring proposal) so the field stays
+ * optional rather than an empty array. Delegates to `parseRequirement` so the
+ * live `tool_proposal` path and the restore-from-history path preserve the
+ * SAME fields — notably `kind` and `connectUrl`, which the card needs to tell a
+ * missing GitHub App installation ("GitHub App / not installed", Install link)
+ * apart from a missing OAuth connection ("GitHub / not connected", Connect
+ * link); dropping `kind` here collapsed both to the integration rendering. */
+function parseRequirements(v: unknown): ConnectionRequirement[] | undefined {
+ if (!Array.isArray(v)) return undefined;
+ const out: ConnectionRequirement[] = [];
+ for (const item of v) {
+ const parsed = parseRequirement(item);
+ if (parsed) out.push(parsed);
+ }
+ return out;
+}
+
+/**
+ * Map a parsed SSE event (name + already-parsed data) to a typed stream event.
+ * Unknown event names (e.g. the `ping` keepalive) and malformed payloads yield
+ * null and are dropped.
+ */
+function toStreamEvent(
+ event: string | null,
+ data: unknown,
+): AssistantStreamEvent | null {
+ const obj = asObject(data);
+ if (!obj) return null;
+ switch (event) {
+ case "thread": {
+ const threadId = reqStr(obj.threadId);
+ const turnId = reqStr(obj.turnId);
+ if (!threadId || !turnId) return null;
+ return {
+ type: "thread",
+ data: { threadId, turnId, model: reqStr(obj.model) },
+ };
+ }
+ case "delta": {
+ // "" is a valid (if empty) delta, but a non-string is malformed.
+ if (typeof obj.text !== "string") return null;
+ return { type: "delta", data: { text: obj.text } };
+ }
+ case "reasoning": {
+ if (typeof obj.text !== "string") return null;
+ return { type: "reasoning", data: { text: obj.text } };
+ }
+ case "tool_call": {
+ const callId = reqStr(obj.callId);
+ const name = reqStr(obj.name);
+ if (!callId || !name) return null;
+ return { type: "tool_call", data: { callId, name } };
+ }
+ case "tool_result": {
+ const callId = reqStr(obj.callId);
+ const name = reqStr(obj.name);
+ if (!callId || !name) return null;
+ return {
+ type: "tool_result",
+ data: {
+ callId,
+ name,
+ ok: Boolean(obj.ok),
+ output: obj.output,
+ error: obj.error as { code: string; message: string } | undefined,
+ },
+ };
+ }
+ case "tool_proposal": {
+ const callId = reqStr(obj.callId);
+ const name = reqStr(obj.name);
+ if (!callId || !name) return null;
+ return {
+ type: "tool_proposal",
+ data: {
+ proposalId: obj.proposalId == null ? null : reqStr(obj.proposalId),
+ callId,
+ name,
+ args: obj.args,
+ requirements: parseRequirements(obj.requirements),
+ },
+ };
+ }
+ case "usage":
+ return {
+ type: "usage",
+ data: {
+ promptTokens: numOrNull(obj.promptTokens),
+ completionTokens: numOrNull(obj.completionTokens),
+ costUsd: numOrNull(obj.costUsd),
+ balanceUsd: numOrNull(obj.balanceUsd),
+ replayed: Boolean(obj.replayed),
+ },
+ };
+ case "done": {
+ const turnId = reqStr(obj.turnId);
+ const status = reqStr(obj.status);
+ if (!turnId || !status) return null;
+ return {
+ type: "done",
+ data: {
+ turnId,
+ status,
+ proposed: Boolean(obj.proposed),
+ capped: Boolean(obj.capped),
+ },
+ };
+ }
+ case "error":
+ return {
+ type: "error",
+ data: {
+ code: reqStr(obj.code) ?? "STREAM_FAILED",
+ message: reqStr(obj.message) ?? "The assistant stream failed",
+ },
+ };
+ default:
+ return null;
+ }
+}
+
+/**
+ * Read the JSON error body of a non-2xx chat response. Pre-stream failures
+ * (auth, validation, insufficient balance, busy thread) are returned as
+ * `{ success: false, error: { code, message } }` rather than an SSE stream.
+ */
+async function readErrorEvent(res: Response): Promise {
+ try {
+ const body = (await res.json()) as {
+ error?: { code?: string; message?: string };
+ };
+ return {
+ type: "error",
+ data: {
+ code: body.error?.code ?? `HTTP_${res.status}`,
+ message: body.error?.message ?? `Request failed (${res.status})`,
+ },
+ };
+ } catch {
+ return {
+ type: "error",
+ data: {
+ code: `HTTP_${res.status}`,
+ message: `Request failed (${res.status})`,
+ },
+ };
+ }
+}
+
+/** Parse one restored proposal from the history payload into a `PendingProposal`,
+ * or null when the row is malformed (dropped rather than rendered as a broken
+ * card). Mirrors the live `tool_proposal` event shape: a server-minted id, the
+ * tool call id + name, the stored args, and — for an authoring proposal — the
+ * freshly-recomputed connection requirements the card renders. */
+function parseRestoredProposal(raw: unknown): PendingProposal | null {
+ if (!raw || typeof raw !== "object") return null;
+ const r = raw as Record;
+ if (typeof r.proposalId !== "string" || r.proposalId === "") return null;
+ if (typeof r.callId !== "string" || r.callId === "") return null;
+ if (typeof r.name !== "string" || r.name === "") return null;
+ // Validate each requirement element rather than casting the array wholesale —
+ // a malformed entry would otherwise reach the card and throw at render.
+ const requirements = Array.isArray(r.requirements)
+ ? r.requirements
+ .map(parseRequirement)
+ .filter((x): x is ConnectionRequirement => x !== null)
+ : undefined;
+ return {
+ proposalId: r.proposalId,
+ callId: r.callId,
+ name: r.name,
+ args: r.args,
+ ...(requirements ? { requirements } : {}),
+ };
+}
+
+/**
+ * Build an assistant client bound to one host's transport. The returned methods
+ * carry no module state, so a host may create one client per config (or share a
+ * single same-origin client for the whole app).
+ */
+export function createAssistantClient(
+ config: AssistantClientConfig,
+): AssistantClient {
+ const base = config.baseUrl.replace(/\/+$/, "");
+ const credentials: RequestCredentials = config.credentials ?? "include";
+ const authHeaders = (): Record => config.headers?.() ?? {};
+ const url = (path: string): string => `${base}${path}`;
+
+ /**
+ * POST JSON and flatten the response to `{ success, data, error }`. On a
+ * non-2xx the server's `{ error: { message } }` is collapsed to a string; on a
+ * 2xx the body's `data` envelope is unwrapped when present.
+ */
+ async function postJson(
+ path: string,
+ body: unknown,
+ ): Promise<{ success: boolean; data?: T; error?: string }> {
+ try {
+ const res = await fetch(url(path), {
+ method: "POST",
+ headers: { ...authHeaders(), "Content-Type": "application/json" },
+ credentials,
+ body: JSON.stringify(body),
+ });
+ const json = (await res.json()) as {
+ data?: T;
+ error?: { message?: string };
+ };
+ if (!res.ok) {
+ return {
+ success: false,
+ error: json?.error?.message || `HTTP ${res.status}`,
+ };
+ }
+ return { success: true, data: (json?.data ?? json) as T };
+ } catch (err) {
+ return {
+ success: false,
+ error: err instanceof Error ? err.message : "Request failed",
+ };
+ }
+ }
+
+ return {
+ async fetchModels(signal) {
+ try {
+ const res = await fetch(url("/models"), {
+ method: "GET",
+ headers: authHeaders(),
+ credentials,
+ signal,
+ });
+ if (!res.ok) return { ok: false, data: EMPTY_MODELS };
+ const body = (await res.json()) as {
+ default?: unknown;
+ models?: Array<{
+ slug?: unknown;
+ label?: unknown;
+ promptUsdPerMillion?: unknown;
+ contextTokens?: unknown;
+ }>;
+ };
+ // A well-formed response must carry a models array; anything else is
+ // treated as a failure so the caller retries rather than caching garbage.
+ if (!Array.isArray(body.models))
+ return { ok: false, data: EMPTY_MODELS };
+ const models: AssistantModelOption[] = [];
+ for (const m of body.models) {
+ const slug = typeof m.slug === "string" ? m.slug : null;
+ if (!slug) continue;
+ const label = typeof m.label === "string" ? m.label : slug;
+ const option: AssistantModelOption = { slug, label };
+ if (typeof m.promptUsdPerMillion === "number") {
+ option.promptUsdPerMillion = m.promptUsdPerMillion;
+ }
+ if (typeof m.contextTokens === "number") {
+ option.contextTokens = m.contextTokens;
+ }
+ models.push(option);
+ }
+ return {
+ // Empty ⇒ catalog unavailable: report not-ok so the caller retries next
+ // mount instead of caching an empty picker for the session.
+ ok: models.length > 0,
+ data: {
+ default: typeof body.default === "string" ? body.default : null,
+ models,
+ },
+ };
+ } catch {
+ return { ok: false, data: EMPTY_MODELS };
+ }
+ },
+
+ async fetchThreads(signal) {
+ try {
+ const res = await fetch(url("/threads"), {
+ method: "GET",
+ headers: authHeaders(),
+ credentials,
+ signal,
+ });
+ if (!res.ok) return null;
+ const body = (await res.json()) as {
+ threads?: Array<{
+ id?: unknown;
+ title?: unknown;
+ createdAt?: unknown;
+ updatedAt?: unknown;
+ }>;
+ };
+ if (!Array.isArray(body.threads)) return null;
+ const out: AssistantThreadSummary[] = [];
+ for (const t of body.threads) {
+ const id = typeof t.id === "string" ? t.id : null;
+ if (!id) continue;
+ out.push({
+ id,
+ title: typeof t.title === "string" ? t.title : null,
+ createdAt: typeof t.createdAt === "string" ? t.createdAt : "",
+ updatedAt: typeof t.updatedAt === "string" ? t.updatedAt : "",
+ });
+ }
+ return out;
+ } catch {
+ return null;
+ }
+ },
+
+ async fetchThreadHistory(threadId, signal) {
+ try {
+ const res = await fetch(
+ url(`/threads/${encodeURIComponent(threadId)}/messages`),
+ {
+ method: "GET",
+ headers: authHeaders(),
+ credentials,
+ signal,
+ },
+ );
+ // A 404 means the thread no longer exists — distinct from a transient
+ // failure: the caller must drop the dead id rather than keep retrying it.
+ if (res.status === 404) return { status: "gone" };
+ if (!res.ok) return { status: "error" };
+ const body = (await res.json()) as {
+ messages?: Array<{ id?: unknown; role?: unknown; text?: unknown }>;
+ proposals?: unknown[];
+ };
+ if (!Array.isArray(body.messages)) return { status: "error" };
+ const out: ChatMessage[] = [];
+ for (const m of body.messages) {
+ const id = typeof m.id === "string" ? m.id : null;
+ const role =
+ m.role === "user" || m.role === "assistant" ? m.role : null;
+ const text = typeof m.text === "string" ? m.text : null;
+ // Skip a malformed row rather than coercing it into a blank bubble.
+ if (id && role && text != null) out.push({ id, role, text });
+ }
+ // Restore unconfirmed proposals so the card survives reload. Absent on an
+ // older server (or a non-tool deployment) → an empty list, no cards.
+ const proposals: PendingProposal[] = [];
+ if (Array.isArray(body.proposals)) {
+ for (const p of body.proposals) {
+ const parsed = parseRestoredProposal(p);
+ if (parsed) proposals.push(parsed);
+ }
+ }
+ return { status: "ok", messages: out, proposals };
+ } catch {
+ if (signal?.aborted) return { status: "error" };
+ return { status: "error" };
+ }
+ },
+
+ async streamChat(req, onEvent, signal) {
+ // Raw fetch, not `postJson`: that helper reads `res.json()`, which would
+ // consume the body and defeat streaming. CSRF is covered the same way as
+ // every other authenticated POST in the app — a SameSite cookie plus the
+ // configured `X-Requested-With` marker (in `authHeaders`) and Origin
+ // validation server-side; this only adds the SSE `Accept`.
+ const res = await fetch(url("/chat"), {
+ method: "POST",
+ headers: {
+ ...authHeaders(),
+ "Content-Type": "application/json",
+ Accept: "text/event-stream",
+ },
+ credentials,
+ body: JSON.stringify(req),
+ signal,
+ });
+
+ if (!res.ok) {
+ onEvent(await readErrorEvent(res));
+ return;
+ }
+ if (!res.body) {
+ onEvent({
+ type: "error",
+ data: {
+ code: "NO_BODY",
+ message: "The assistant stream is unavailable",
+ },
+ });
+ return;
+ }
+
+ // A well-formed turn ends with a `done` (or, on failure, an `error`) frame.
+ // If the body closes without one, settle anyway so the UI doesn't hang in
+ // the streaming state forever.
+ let settled = false;
+ await readSSEEvents(res.body, (frame) => {
+ const ev = toStreamEvent(frame.eventType ?? null, frame.data);
+ if (!ev) return;
+ if (ev.type === "done" || ev.type === "error") settled = true;
+ onEvent(ev);
+ });
+ if (!settled) {
+ onEvent({
+ type: "error",
+ data: {
+ code: "STREAM_CLOSED",
+ message: "The assistant stream ended unexpectedly",
+ },
+ });
+ }
+ },
+
+ async confirmProposal(proposalId) {
+ const res = await postJson<{
+ success?: boolean;
+ output?: unknown;
+ retryable?: boolean;
+ error?: { code: string; message: string };
+ }>("/tools/execute", { proposalId });
+
+ // postJson reports transport/HTTP success in `success`; on a non-2xx it has
+ // already flattened the server's error to a message string.
+ if (!res.success) {
+ return {
+ ok: false,
+ error: res.error ?? "The action could not be completed",
+ };
+ }
+ const body = res.data;
+ if (body?.success) {
+ return { ok: true, output: body.output, retryable: body.retryable };
+ }
+ return {
+ ok: false,
+ error: body?.error?.message ?? "The action could not be completed",
+ };
+ },
+
+ async deleteThread(threadId) {
+ try {
+ const res = await fetch(url(`/threads/${encodeURIComponent(threadId)}`), {
+ method: "DELETE",
+ headers: authHeaders(),
+ credentials,
+ });
+ // 404 ⇒ already gone; treat as success so a retry/double-delete is a no-op.
+ return { ok: res.ok || res.status === 404 };
+ } catch {
+ return { ok: false };
+ }
+ },
+ };
+}
diff --git a/src/assistant/index.ts b/src/assistant/index.ts
new file mode 100644
index 0000000..1557026
--- /dev/null
+++ b/src/assistant/index.ts
@@ -0,0 +1,35 @@
+/**
+ * `@tangle-network/agent-app/assistant` — the in-app assistant/copilot surface,
+ * portable across hosts. A host supplies a transport via {@link createAssistantClient}
+ * and {@link AssistantClientProvider}; the dock, panel, hooks, and proposal card
+ * consume it and render on web-react's chat components (`ChatComposer`,
+ * `ChatMessages`, `ModelPicker`). The markdown renderer, per-tool detail
+ * renderers, and workflow-graph renderer are injected so this subpath carries no
+ * product-specific dependency.
+ */
+
+export * from "./types";
+export * from "./client";
+export * from "./client-context";
+export {
+ useAssistantChat,
+ type AssistantChat,
+ type UseAssistantChatOptions,
+} from "./useAssistantChat";
+export { useAssistantModels } from "./useAssistantModels";
+export { useAssistantThreads, type AssistantThreads } from "./useAssistantThreads";
+
+export { AssistantDock, type AssistantDockProps } from "./AssistantDock";
+export { AssistantPanel, type AssistantPanelProps } from "./AssistantPanel";
+export {
+ AssistantTranscript,
+ type AssistantTranscriptProps,
+ adaptTranscript,
+ assistantIsThinking,
+} from "./transcript";
+export { ProposalCard, type ProposalCardProps } from "./ProposalCard";
+export {
+ AssistantLauncherProvider,
+ useAssistantLauncher,
+ type AssistantLauncher,
+} from "./launcher";
diff --git a/src/assistant/launcher.tsx b/src/assistant/launcher.tsx
new file mode 100644
index 0000000..0ee6085
--- /dev/null
+++ b/src/assistant/launcher.tsx
@@ -0,0 +1,69 @@
+/**
+ * Shared launcher state for the assistant dock. The dock is mounted once in the
+ * app shell, but other surfaces (e.g. the Workflows page) need to open it and
+ * prefill the composer with a starter prompt. This context owns the dock's
+ * open state plus a one-shot composer seed, so any `/app/*` surface can call
+ * `openAssistant("Create a workflow that …")` without reaching into the dock.
+ */
+
+import {
+ createContext,
+ type ReactNode,
+ useCallback,
+ useContext,
+ useMemo,
+ useState,
+} from "react";
+
+export interface AssistantLauncher {
+ /** Whether the assistant drawer is open. */
+ open: boolean;
+ /** A one-shot starter prompt to prefill the composer with, or null. */
+ seed: string | null;
+ /** Open the drawer; optionally prefill the composer with `seed`. */
+ openAssistant: (seed?: string) => void;
+ closeAssistant: () => void;
+ /** Clear the pending seed once the composer has applied it (consume-once). */
+ clearSeed: () => void;
+}
+
+const AssistantLauncherContext = createContext(null);
+
+export function AssistantLauncherProvider({
+ children,
+}: {
+ children: ReactNode;
+}) {
+ const [open, setOpen] = useState(false);
+ const [seed, setSeed] = useState(null);
+
+ const openAssistant = useCallback((next?: string) => {
+ // Only replace the seed when one is supplied, so opening the drawer with no
+ // argument never clobbers a starter another caller just set.
+ if (next != null) setSeed(next);
+ setOpen(true);
+ }, []);
+ const closeAssistant = useCallback(() => setOpen(false), []);
+ const clearSeed = useCallback(() => setSeed(null), []);
+
+ const value = useMemo(
+ () => ({ open, seed, openAssistant, closeAssistant, clearSeed }),
+ [open, seed, openAssistant, closeAssistant, clearSeed],
+ );
+
+ return (
+
+ {children}
+
+ );
+}
+
+export function useAssistantLauncher(): AssistantLauncher {
+ const ctx = useContext(AssistantLauncherContext);
+ if (!ctx) {
+ throw new Error(
+ "useAssistantLauncher must be used within an AssistantLauncherProvider",
+ );
+ }
+ return ctx;
+}
diff --git a/src/assistant/persistence.test.ts b/src/assistant/persistence.test.ts
new file mode 100644
index 0000000..680e19b
--- /dev/null
+++ b/src/assistant/persistence.test.ts
@@ -0,0 +1,85 @@
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { loadThread, saveThread } from "./persistence";
+
+// platform-web's vitest runs in the node environment, which has no Web Storage.
+// Install a minimal in-memory Storage so the persistence functions exercise
+// their real localStorage code path.
+function installMemoryStorage(): Map {
+ const store = new Map();
+ const storage: Storage = {
+ get length() {
+ return store.size;
+ },
+ clear: () => store.clear(),
+ getItem: (k) => (store.has(k) ? (store.get(k) as string) : null),
+ key: (i) => Array.from(store.keys())[i] ?? null,
+ removeItem: (k) => {
+ store.delete(k);
+ },
+ setItem: (k, v) => {
+ store.set(k, String(v));
+ },
+ };
+ Object.defineProperty(globalThis, "localStorage", {
+ configurable: true,
+ value: storage,
+ });
+ return store;
+}
+
+let store: Map;
+beforeEach(() => {
+ store = installMemoryStorage();
+});
+afterEach(() => {
+ store.clear();
+});
+
+describe("per-user thread-id persistence", () => {
+ it("round-trips the thread id and model for a signed-in user", () => {
+ saveThread("userA", { threadId: "T_A", model: "anthropic/x" });
+ expect(loadThread("userA")).toEqual({
+ threadId: "T_A",
+ model: "anthropic/x",
+ });
+ });
+
+ it("persists only the thread id + model, never message text", () => {
+ saveThread("userA", { threadId: "T_A", model: null });
+ const raw = store.get("assistant:v1:userA") ?? "{}";
+ expect(JSON.parse(raw)).toEqual({ threadId: "T_A", model: null });
+ expect(raw).not.toMatch(/messages/);
+ });
+
+ it("isolates thread ids and models between users", () => {
+ saveThread("userA", { threadId: "T_A", model: "m_a" });
+ saveThread("userB", { threadId: "T_B", model: "m_b" });
+ expect(loadThread("userA")).toEqual({ threadId: "T_A", model: "m_a" });
+ expect(loadThread("userB")).toEqual({ threadId: "T_B", model: "m_b" });
+ });
+
+ it("returns null thread id and model when nothing is stored", () => {
+ expect(loadThread("userA")).toEqual({ threadId: null, model: null });
+ });
+});
+
+describe("anonymous sessions are not persisted", () => {
+ it("never writes under a null user", () => {
+ saveThread(null, { threadId: "T", model: null });
+ expect(store.size).toBe(0);
+ });
+
+ it("always loads empty for a null user", () => {
+ expect(loadThread(null)).toEqual({ threadId: null, model: null });
+ });
+
+ it("does not let one anonymous session read another's data", () => {
+ // Even if something were written under the legacy "anon" key, a null load
+ // ignores it — anonymous users share nothing.
+ store.set(
+ "assistant:v1:anon",
+ JSON.stringify({ threadId: "leak", model: null }),
+ );
+ expect(loadThread(null)).toEqual({ threadId: null, model: null });
+ });
+});
diff --git a/src/assistant/persistence.ts b/src/assistant/persistence.ts
new file mode 100644
index 0000000..bf9086e
--- /dev/null
+++ b/src/assistant/persistence.ts
@@ -0,0 +1,64 @@
+/**
+ * Thread-id persistence for the assistant panel. Only the opaque server thread
+ * id is kept in localStorage so a reload continues the same server-side
+ * conversation. The message transcript is deliberately NOT cached: it can
+ * contain workflow definitions, integration data, or pasted secrets, and
+ * localStorage survives logout and is readable by any script on the origin —
+ * caching it would be a privacy regression on shared devices.
+ *
+ * The visible transcript is instead rehydrated from the server on load (the
+ * `GET /assistant/threads/:id/messages` endpoint, called by `useAssistantChat`),
+ * keyed by this persisted thread id — so a reload restores the prior
+ * conversation without ever caching its contents locally.
+ *
+ * Keyed by user id so two accounts on one browser never share a thread.
+ * Anonymous (null-user) sessions are NOT persisted — otherwise every
+ * unauthenticated visitor on a shared device would read the same "anon" thread.
+ */
+
+const VERSION = "v1";
+
+export interface PersistedThread {
+ threadId: string | null;
+ /** The user's last-selected model slug — a non-sensitive UI preference, so
+ * unlike the transcript it is safe to cache. null → use the server default. */
+ model: string | null;
+}
+
+/** Storage key for a signed-in user, or null for an anonymous session (which is
+ * never persisted). */
+function keyFor(userId: string | null): string | null {
+ return userId ? `assistant:${VERSION}:${userId}` : null;
+}
+
+export function loadThread(userId: string | null): PersistedThread {
+ const key = keyFor(userId);
+ if (!key) return { threadId: null, model: null };
+ try {
+ const raw = localStorage.getItem(key);
+ if (!raw) return { threadId: null, model: null };
+ const parsed = JSON.parse(raw) as Partial;
+ return {
+ threadId: typeof parsed.threadId === "string" ? parsed.threadId : null,
+ model: typeof parsed.model === "string" ? parsed.model : null,
+ };
+ } catch {
+ return { threadId: null, model: null };
+ }
+}
+
+export function saveThread(
+ userId: string | null,
+ thread: PersistedThread,
+): void {
+ const key = keyFor(userId);
+ if (!key) return;
+ try {
+ localStorage.setItem(
+ key,
+ JSON.stringify({ threadId: thread.threadId, model: thread.model }),
+ );
+ } catch {
+ // Storage unavailable (private mode, quota) — persistence is best-effort.
+ }
+}
diff --git a/src/assistant/presentation.test.ts b/src/assistant/presentation.test.ts
new file mode 100644
index 0000000..7fafc8a
--- /dev/null
+++ b/src/assistant/presentation.test.ts
@@ -0,0 +1,363 @@
+import { describe, expect, it } from "vitest";
+import {
+ describeFailure,
+ describeOutcome,
+ describeProposal,
+ isLowBalance,
+ presentError,
+ resolveConfirmation,
+} from "./presentation";
+import type { PendingProposal } from "./types";
+
+function proposal(name: string, args: unknown): PendingProposal {
+ return { proposalId: "p", callId: "c", name, args };
+}
+
+describe("presentError", () => {
+ it("maps INSUFFICIENT_BALANCE to an Add credits CTA pointing at billing", () => {
+ const v = presentError(
+ "INSUFFICIENT_BALANCE",
+ "Credit balance is exhausted",
+ );
+ expect(v.cta).toEqual({ label: "Add credits", to: "/app/billing" });
+ expect(v.message).toMatch(/out of credits/i);
+ });
+
+ it("maps INTEGRATION_DISCONNECTED to a Connect CTA pointing at integrations", () => {
+ const v = presentError(
+ "INTEGRATION_DISCONNECTED",
+ "GitHub is not connected.",
+ );
+ expect(v.cta).toEqual({
+ label: "Connect an integration",
+ to: "/app/integrations",
+ });
+ });
+
+ it("shows model-misconfiguration without a CTA", () => {
+ const v = presentError("MODEL_ACCESS_UNCONFIGURED", "x");
+ expect(v.cta).toBeNull();
+ expect(v.message).toMatch(/model access/i);
+ });
+
+ it("falls back to the server message for unknown codes", () => {
+ const v = presentError("WEIRD_CODE", "Something specific happened");
+ expect(v.cta).toBeNull();
+ expect(v.message).toBe("Something specific happened");
+ });
+});
+
+describe("describeProposal", () => {
+ it("surfaces the YAML for create_workflow", () => {
+ const v = describeProposal(
+ proposal("create_workflow", { yaml: "name: x" }),
+ );
+ expect(v.title).toBe("Create workflow");
+ expect(v.preview).toEqual({
+ label: "Workflow definition",
+ content: "name: x",
+ kind: "workflow",
+ });
+ expect(v.fields).toEqual([]);
+ });
+
+ it("surfaces the workflow YAML plus named skills for author_workflow", () => {
+ const v = describeProposal(
+ proposal("author_workflow", {
+ yaml: "name: pr",
+ skills: [
+ {
+ name: "pr-reviewer",
+ description: "Reviews PRs",
+ systemPrompt: "x",
+ },
+ ],
+ }),
+ );
+ expect(v.title).toBe("Create workflow");
+ expect(v.preview).toEqual({
+ label: "Workflow definition",
+ content: "name: pr",
+ kind: "workflow",
+ });
+ expect(v.skills).toEqual([
+ { name: "pr-reviewer", description: "Reviews PRs" },
+ ]);
+ });
+
+ it("surfaces the YAML plus the id for update_workflow", () => {
+ const v = describeProposal(
+ proposal("update_workflow", { id: "wf_1", yaml: "name: y" }),
+ );
+ expect(v.preview).toEqual({
+ label: "Workflow definition",
+ content: "name: y",
+ kind: "workflow",
+ });
+ expect(v.fields).toEqual([{ label: "Workflow id", value: "wf_1" }]);
+ });
+
+ it("previews a skill's instructions for create_skill", () => {
+ const v = describeProposal(
+ proposal("create_skill", {
+ name: "pr-reviewer",
+ description: "Reviews PRs",
+ systemPrompt: "Review the diff.",
+ }),
+ );
+ expect(v.title).toBe("Create skill");
+ expect(v.preview).toEqual({
+ label: "Instructions",
+ content: "Review the diff.",
+ kind: "text",
+ });
+ expect(v.fields).toEqual([
+ { label: "Name", value: "pr-reviewer" },
+ { label: "Description", value: "Reviews PRs" },
+ ]);
+ });
+
+ it("titles set_workflow_enabled by the target state", () => {
+ expect(
+ describeProposal(
+ proposal("set_workflow_enabled", { id: "w", enabled: true }),
+ ).title,
+ ).toBe("Enable workflow");
+ expect(
+ describeProposal(
+ proposal("set_workflow_enabled", { id: "w", enabled: false }),
+ ).title,
+ ).toBe("Disable workflow");
+ });
+
+ it("lists only the provided fields for create_api_key", () => {
+ const v = describeProposal(
+ proposal("create_api_key", { name: "ci", budgetUsd: 5 }),
+ );
+ expect(v.preview).toBeNull();
+ expect(v.fields).toEqual([
+ { label: "Name", value: "ci" },
+ { label: "Budget (USD)", value: "5" },
+ ]);
+ });
+
+ it("falls back to a humanized title + JSON fields for an unknown tool", () => {
+ const v = describeProposal(
+ proposal("do_something_new", { a: 1, b: "two" }),
+ );
+ expect(v.title).toBe("Do something new");
+ expect(v.fields).toEqual([
+ { label: "a", value: "1" },
+ { label: "b", value: "two" },
+ ]);
+ });
+
+ it("yields a null preview (no empty block) when the workflow yaml is missing", () => {
+ expect(
+ describeProposal(proposal("create_workflow", null)).preview,
+ ).toBeNull();
+ expect(
+ describeProposal(proposal("create_workflow", {})).preview,
+ ).toBeNull();
+ });
+});
+
+describe("describeOutcome", () => {
+ it("names the created workflow when present", () => {
+ expect(
+ describeOutcome("create_workflow", {
+ created: true,
+ workflow: { name: "nightly" },
+ }),
+ ).toBe('Created workflow "nightly".');
+ });
+
+ it("has a sensible default for unknown tools", () => {
+ expect(describeOutcome("mystery", {})).toBe("Action completed.");
+ });
+});
+
+describe("describeFailure", () => {
+ it("joins structured { message } errors", () => {
+ expect(
+ describeFailure({ errors: [{ message: "a" }, { message: "b" }] }),
+ ).toBe("a; b");
+ });
+
+ it("surfaces bare string error elements (not just { message } objects)", () => {
+ // A tool could return string errors; they must not be swallowed into the
+ // generic fallback.
+ expect(describeFailure({ errors: ["something went wrong"] })).toBe(
+ "something went wrong",
+ );
+ });
+
+ it("falls back to message, then not-found/conflict, then a generic line", () => {
+ expect(describeFailure({ message: "boom" })).toBe("boom");
+ expect(describeFailure({ notFound: true })).toBe("That no longer exists.");
+ expect(describeFailure({ conflict: true })).toBe(
+ "It changed since it was loaded. Try again.",
+ );
+ expect(describeFailure({})).toBe("The action could not be completed.");
+ });
+});
+
+describe("isLowBalance", () => {
+ it("is true below the threshold, false at/above it, false when unknown", () => {
+ expect(isLowBalance(0.5)).toBe(true);
+ expect(isLowBalance(1)).toBe(false);
+ expect(isLowBalance(5)).toBe(false);
+ expect(isLowBalance(null)).toBe(false);
+ });
+});
+
+describe("resolveConfirmation", () => {
+ it("notes the outcome and clears the error on a clean success", () => {
+ const r = resolveConfirmation("create_workflow", {
+ ok: true,
+ output: { created: true, workflow: { name: "nightly" } },
+ });
+ expect(r.statusText).toBe('Created workflow "nightly".');
+ expect(r.error).toBeNull();
+ });
+
+ it("flags a disconnected integration from the structured NOT_CONNECTED outcome", () => {
+ const r = resolveConfirmation("invoke_integration", {
+ ok: true,
+ output: {
+ ok: false,
+ code: "NOT_CONNECTED",
+ message:
+ 'No active "github" connection. Connect github first, then retry.',
+ },
+ });
+ expect(r.statusText).toBeNull();
+ expect(r.error).toEqual({
+ code: "INTEGRATION_DISCONNECTED",
+ message:
+ 'No active "github" connection. Connect github first, then retry.',
+ });
+ });
+
+ it("does NOT mislabel a success whose output merely contains 'not connected' text", () => {
+ // Regression: scanning the whole serialized output used to false-positive on
+ // an unrelated field. A successful create with such a name is a clean success.
+ const r = resolveConfirmation("create_workflow", {
+ ok: true,
+ output: { created: true, workflow: { name: "MyServiceNotConnected" } },
+ });
+ expect(r.error).toBeNull();
+ expect(r.statusText).toBe('Created workflow "MyServiceNotConnected".');
+ });
+
+ it("treats a non-NOT_CONNECTED integration outcome as a normal success note", () => {
+ // Only the NOT_CONNECTED signal maps to the connect CTA; a genuine success
+ // (ok: true) is just a completion note.
+ const r = resolveConfirmation("invoke_integration", {
+ ok: true,
+ output: { ok: true, output: { url: "https://example.com/issues/1" } },
+ });
+ expect(r.error).toBeNull();
+ expect(r.statusText).toBe("Integration action completed.");
+ });
+
+ it("maps a failed confirmation to TOOL_FAILED", () => {
+ const r = resolveConfirmation("create_workflow", {
+ ok: false,
+ error: "This proposal has expired",
+ });
+ expect(r.statusText).toBeNull();
+ expect(r.error).toEqual({
+ code: "TOOL_FAILED",
+ message: "This proposal has expired",
+ });
+ });
+
+ it("surfaces a rejected workflow create (created:false) as a failure, not a success note", () => {
+ // Regression: the create tools report failure by RETURNING { created:false,
+ // errors } inside an HTTP-200 { success:true } envelope. This used to fall
+ // through to describeOutcome → "Workflow created." with no error.
+ const r = resolveConfirmation("create_workflow", {
+ ok: true,
+ output: {
+ created: false,
+ errors: [
+ {
+ path: "do.0.agent.run",
+ message:
+ "action kind 'agent.run' is not available on this deployment",
+ },
+ ],
+ },
+ });
+ expect(r.statusText).toBeNull();
+ expect(r.error).toEqual({
+ code: "TOOL_FAILED",
+ message: "action kind 'agent.run' is not available on this deployment",
+ });
+ });
+
+ it("joins multiple workflow errors into the failure message", () => {
+ const r = resolveConfirmation("author_workflow", {
+ ok: true,
+ output: {
+ created: false,
+ errors: [
+ {
+ path: "connections (github)",
+ message: "no active github connection",
+ },
+ {
+ path: "on.provider_event.connection (github)",
+ message: "connect github first",
+ },
+ ],
+ },
+ });
+ expect(r.error?.code).toBe("TOOL_FAILED");
+ expect(r.error?.message).toBe(
+ "no active github connection; connect github first",
+ );
+ });
+
+ it("surfaces update_workflow notFound / conflict as failures", () => {
+ const notFound = resolveConfirmation("update_workflow", {
+ ok: true,
+ output: { updated: false, notFound: true },
+ });
+ expect(notFound.statusText).toBeNull();
+ expect(notFound.error?.code).toBe("TOOL_FAILED");
+
+ const conflict = resolveConfirmation("update_workflow", {
+ ok: true,
+ output: { updated: false, conflict: true },
+ });
+ expect(conflict.error?.code).toBe("TOOL_FAILED");
+ });
+
+ it("surfaces set_workflow_enabled not-found (ok:false) as a failure", () => {
+ const r = resolveConfirmation("set_workflow_enabled", {
+ ok: true,
+ output: { ok: false, notFound: true },
+ });
+ expect(r.statusText).toBeNull();
+ expect(r.error?.code).toBe("TOOL_FAILED");
+ });
+
+ it("surfaces delete_skill no-op (deleted:false) as a failure", () => {
+ const r = resolveConfirmation("delete_skill", {
+ ok: true,
+ output: { deleted: false, usedByWorkflowIds: [] },
+ });
+ expect(r.error?.code).toBe("TOOL_FAILED");
+ });
+
+ it("does not flag a successful create (created:true) carrying other fields", () => {
+ const r = resolveConfirmation("set_workflow_enabled", {
+ ok: true,
+ output: { ok: true, workflow: { enabled: false } },
+ });
+ expect(r.error).toBeNull();
+ expect(r.statusText).toBe("Workflow disabled.");
+ });
+});
diff --git a/src/assistant/presentation.ts b/src/assistant/presentation.ts
new file mode 100644
index 0000000..2465173
--- /dev/null
+++ b/src/assistant/presentation.ts
@@ -0,0 +1,412 @@
+/**
+ * Pure view-model mappers for the assistant panel: how an error code becomes an
+ * inline message + actionable next step, and how a proposed tool call becomes a
+ * confirmation card. Kept free of React so the rendering decisions are unit
+ * testable in isolation.
+ */
+
+import type { PendingProposal } from "./types";
+
+/** USD balance below which the panel surfaces a low-balance warning. Mirrors
+ * the wallet warning threshold on the Billing page. */
+export const LOW_BALANCE_THRESHOLD = 1;
+
+export interface ErrorCta {
+ label: string;
+ to: string;
+}
+
+export interface ErrorView {
+ message: string;
+ cta: ErrorCta | null;
+}
+
+const ADD_CREDITS_CTA: ErrorCta = { label: "Add credits", to: "/app/billing" };
+const CONNECT_CTA: ErrorCta = {
+ label: "Connect an integration",
+ to: "/app/integrations",
+};
+
+/**
+ * Map a server error code + message to what the user sees and can do next.
+ * Codes with a clear remedy carry a CTA; the rest fall back to the server's own
+ * message, which is already written for the end user.
+ */
+export function presentError(code: string, message: string): ErrorView {
+ switch (code) {
+ case "INSUFFICIENT_BALANCE":
+ return {
+ message:
+ "You're out of credits. Add credits to keep using the assistant.",
+ cta: ADD_CREDITS_CTA,
+ };
+ case "MODEL_ACCESS_UNCONFIGURED":
+ return {
+ message:
+ "Model access isn't configured for your account yet. Please contact support.",
+ cta: null,
+ };
+ case "BILLING_UNAVAILABLE":
+ return {
+ message: "Billing is temporarily unavailable. Try again in a moment.",
+ cta: null,
+ };
+ case "TOO_MANY_STREAMS":
+ return {
+ message:
+ "You have too many assistant requests in flight. Wait a moment and retry.",
+ cta: null,
+ };
+ case "THREAD_BUSY":
+ case "TURN_IN_PROGRESS":
+ return {
+ message: "A previous request is still finishing. Try again shortly.",
+ cta: null,
+ };
+ case "INTEGRATION_DISCONNECTED":
+ return {
+ message: `${message} Connect the integration, then ask again.`,
+ cta: CONNECT_CTA,
+ };
+ case "TOOL_FAILED":
+ case "NETWORK":
+ return { message: message || "Something went wrong.", cta: null };
+ default:
+ return { message: message || "Something went wrong.", cta: null };
+ }
+}
+
+export interface ProposalField {
+ label: string;
+ value: string;
+}
+
+/** A new skill minted alongside a workflow, shown as a named line on the card
+ * so the user sees what's being created without the raw skills JSON. */
+export interface ProposalSkill {
+ name: string;
+ description: string | null;
+}
+
+export interface ProposalView {
+ /** Verb-first heading, e.g. "Create workflow". */
+ title: string;
+ /** A body preview with its own label — a workflow's YAML (`kind: "workflow"`,
+ * rendered as a node graph with a YAML toggle) or a skill's instructions
+ * (`kind: "text"`, shown verbatim). Null when the action has no body. */
+ preview: { label: string; content: string; kind: "workflow" | "text" } | null;
+ /** Scalar arguments to show as a key/value list. */
+ fields: ProposalField[];
+ /** New skills minted alongside a workflow (author_workflow); omitted otherwise. */
+ skills?: ProposalSkill[];
+}
+
+function asRecord(args: unknown): Record {
+ return args && typeof args === "object" && !Array.isArray(args)
+ ? (args as Record)
+ : {};
+}
+
+function str(v: unknown): string {
+ if (v == null) return "";
+ return typeof v === "string" ? v : JSON.stringify(v);
+}
+
+/** A non-empty string value, else null — so an empty/absent body yields no
+ * (empty) preview rather than a blank monospace block. */
+function nonEmptyStr(v: unknown): string | null {
+ return typeof v === "string" && v.trim() !== "" ? v : null;
+}
+
+/** Map an author_workflow `skills` arg to display lines, dropping malformed
+ * entries. Returns undefined when there are no new skills to show. */
+function parseProposalSkills(v: unknown): ProposalSkill[] | undefined {
+ if (!Array.isArray(v) || v.length === 0) return undefined;
+ const out: ProposalSkill[] = [];
+ for (const item of v) {
+ const rec = asRecord(item);
+ const name = nonEmptyStr(rec.name);
+ if (!name) continue;
+ out.push({ name, description: nonEmptyStr(rec.description) });
+ }
+ return out.length > 0 ? out : undefined;
+}
+
+/** Humanize an unknown tool name (`set_workflow_enabled` → "Set workflow enabled"). */
+export function humanizeToolName(name: string): string {
+ const spaced = name.replace(/_/g, " ").trim();
+ return spaced.charAt(0).toUpperCase() + spaced.slice(1);
+}
+
+/** Present-tense labels for the inline tool-activity chips ("Validating
+ * workflow…"). Falls back to a humanized tool name for any unmapped tool, so a
+ * newly added read-only tool still renders a sensible label. */
+const TOOL_ACTIVITY_LABELS: Record = {
+ get_workflow_schema: "Reading the workflow format",
+ list_workflows: "Listing workflows",
+ get_workflow: "Reading workflow",
+ validate_workflow: "Validating workflow",
+ list_skills: "Listing skills",
+ get_skill: "Reading skill",
+ list_integrations: "Checking integrations",
+ get_credit_balance: "Checking balance",
+ get_usage: "Checking usage",
+ list_api_keys: "Listing API keys",
+};
+
+export function describeToolActivity(name: string): string {
+ return TOOL_ACTIVITY_LABELS[name] ?? humanizeToolName(name);
+}
+
+/**
+ * Describe a proposed mutating action for its confirmation card. Workflow
+ * create/update surface a YAML preview (the issue's required behavior); other
+ * actions surface their scalar arguments. Unknown tools fall back to a generic
+ * heading plus a JSON dump of the arguments so a newly added mutating tool is
+ * never silently un-renderable.
+ */
+export function describeProposal(proposal: PendingProposal): ProposalView {
+ const args = asRecord(proposal.args);
+ const workflowYaml = nonEmptyStr(args.yaml);
+ const workflowPreview = workflowYaml
+ ? {
+ label: "Workflow definition",
+ content: workflowYaml,
+ kind: "workflow" as const,
+ }
+ : null;
+ switch (proposal.name) {
+ case "create_workflow":
+ return { title: "Create workflow", preview: workflowPreview, fields: [] };
+ // author_workflow creates a workflow PLUS the new skills it needs in one
+ // unit; show the YAML and name each new skill rather than dumping the raw
+ // skills JSON (the card is the canonical, readable view of the proposal).
+ case "author_workflow":
+ return {
+ title: "Create workflow",
+ preview: workflowPreview,
+ fields: [],
+ skills: parseProposalSkills(args.skills),
+ };
+ case "update_workflow":
+ return {
+ title: "Update workflow",
+ preview: workflowPreview,
+ fields: [{ label: "Workflow id", value: str(args.id) }],
+ };
+ case "set_workflow_enabled":
+ return {
+ title: args.enabled ? "Enable workflow" : "Disable workflow",
+ preview: null,
+ fields: [{ label: "Workflow id", value: str(args.id) }],
+ };
+ case "create_skill": {
+ const prompt = nonEmptyStr(args.systemPrompt);
+ const fields: ProposalField[] = [
+ { label: "Name", value: str(args.name) },
+ ];
+ if (nonEmptyStr(args.description))
+ fields.push({ label: "Description", value: str(args.description) });
+ return {
+ title: "Create skill",
+ preview: prompt
+ ? { label: "Instructions", content: prompt, kind: "text" as const }
+ : null,
+ fields,
+ };
+ }
+ case "update_skill": {
+ const prompt = nonEmptyStr(args.systemPrompt);
+ const fields: ProposalField[] = [
+ { label: "Skill id", value: str(args.id) },
+ ];
+ if (nonEmptyStr(args.name))
+ fields.push({ label: "Name", value: str(args.name) });
+ if (nonEmptyStr(args.description))
+ fields.push({ label: "Description", value: str(args.description) });
+ return {
+ title: "Update skill",
+ preview: prompt
+ ? { label: "Instructions", content: prompt, kind: "text" as const }
+ : null,
+ fields,
+ };
+ }
+ case "delete_skill":
+ return {
+ title: "Delete skill",
+ preview: null,
+ fields: [{ label: "Skill id", value: str(args.id) }],
+ };
+ case "create_api_key": {
+ const fields: ProposalField[] = [
+ { label: "Name", value: str(args.name) },
+ ];
+ if (args.product != null)
+ fields.push({ label: "Product", value: str(args.product) });
+ if (args.budgetUsd != null)
+ fields.push({ label: "Budget (USD)", value: str(args.budgetUsd) });
+ return { title: "Create API key", preview: null, fields };
+ }
+ case "revoke_api_key":
+ return {
+ title: "Revoke API key",
+ preview: null,
+ fields: [{ label: "Key id", value: str(args.keyId) }],
+ };
+ case "invoke_integration": {
+ const fields: ProposalField[] = [
+ { label: "Action", value: str(args.path) },
+ ];
+ if (args.input != null)
+ fields.push({ label: "Input", value: str(args.input) });
+ return { title: "Run integration action", preview: null, fields };
+ }
+ default: {
+ const fields = Object.entries(args).map(([label, value]) => ({
+ label,
+ value: str(value),
+ }));
+ return { title: humanizeToolName(proposal.name), preview: null, fields };
+ }
+ }
+}
+
+/**
+ * Summarize a confirmed action's result for the transcript. Best-effort and
+ * defensive: the output shape is the tool's return value, which varies by tool.
+ */
+export function describeOutcome(name: string, output: unknown): string {
+ const o = asRecord(output);
+ switch (name) {
+ case "author_workflow":
+ case "create_workflow": {
+ const wf = asRecord(o.workflow);
+ const skillCount = Array.isArray(o.skills) ? o.skills.length : 0;
+ if (wf.name) {
+ return skillCount > 0
+ ? `Created workflow "${str(wf.name)}" and ${skillCount} skill${skillCount === 1 ? "" : "s"}.`
+ : `Created workflow "${str(wf.name)}".`;
+ }
+ return "Workflow created.";
+ }
+ case "update_workflow": {
+ const wf = asRecord(o.workflow);
+ return wf.name
+ ? `Updated workflow "${str(wf.name)}".`
+ : "Workflow updated.";
+ }
+ case "set_workflow_enabled": {
+ const wf = asRecord(o.workflow);
+ return wf.enabled ? "Workflow enabled." : "Workflow disabled.";
+ }
+ case "create_api_key":
+ return o.prefix
+ ? `Created API key (${str(o.prefix)}…). Copy it from the API Keys page.`
+ : "API key created.";
+ case "revoke_api_key":
+ return "API key revoked.";
+ case "invoke_integration":
+ return "Integration action completed.";
+ default:
+ return "Action completed.";
+ }
+}
+
+/**
+ * Summarize a confirmed action's FAILURE for the error banner. Mutating tools
+ * report a domain failure by returning a negative outcome (see
+ * `resolveConfirmation`); this turns that outcome into a human message,
+ * preferring the server's own `errors[]`/`message` (already end-user-written)
+ * and falling back to the not-found/conflict markers. Best-effort and
+ * defensive: the shape varies by tool.
+ */
+export function describeFailure(output: unknown): string {
+ const o = asRecord(output);
+ const errors = Array.isArray(o.errors) ? o.errors : [];
+ const joined = errors
+ // An element may be a structured `{ message }` (the workflow compiler shape)
+ // or a bare string — surface either rather than dropping a string-only error.
+ .map((e) => (typeof e === "string" ? e : str(asRecord(e).message)))
+ .filter((m) => m.length > 0)
+ .join("; ");
+ if (joined) return joined;
+ if (typeof o.message === "string" && o.message) return o.message;
+ if (o.notFound === true) return "That no longer exists.";
+ if (o.conflict === true) return "It changed since it was loaded. Try again.";
+ return "The action could not be completed.";
+}
+
+export function isLowBalance(balanceUsd: number | null): boolean {
+ return balanceUsd != null && balanceUsd < LOW_BALANCE_THRESHOLD;
+}
+
+/** The outcome of a confirmed tool call, mirroring `ConfirmResult` from the
+ * stream layer without coupling presentation to it. */
+export type ConfirmOutcome =
+ | { ok: true; output: unknown }
+ | { ok: false; error: string };
+
+export interface ConfirmResolution {
+ /** Transcript note to append, or null when there's nothing to say. */
+ statusText: string | null;
+ /** Error banner to surface, or null on a clean success. */
+ error: { code: string; message: string } | null;
+}
+
+/**
+ * Decide what a confirmed proposal's result means for the transcript and the
+ * error banner. The `invoke_integration` tool reports a not-connected provider
+ * as a structured `{ ok: false, code: "NOT_CONNECTED" }` outcome inside `output`
+ * (see the hub integration invoker); that exact signal maps to an
+ * `INTEGRATION_DISCONNECTED` error so the panel can offer a "Connect" step.
+ * Other failures surface as `TOOL_FAILED`. Pure so the classification is
+ * unit-testable without the hook.
+ */
+export function resolveConfirmation(
+ name: string,
+ result: ConfirmOutcome,
+): ConfirmResolution {
+ if (result.ok) {
+ const out = asRecord(result.output);
+ // Match the precise structured signal, not a substring of the whole output —
+ // scanning the serialized blob false-positives on unrelated text (e.g. a
+ // workflow named "…NotConnected").
+ if (out.ok === false && out.code === "NOT_CONNECTED") {
+ return {
+ statusText: null,
+ error: {
+ code: "INTEGRATION_DISCONNECTED",
+ message: str(out.message) || "That integration isn't connected.",
+ },
+ };
+ }
+ // A mutating tool reports a DOMAIN failure by RETURNING a negative outcome
+ // inside an HTTP-200 success envelope — `callTool` only sets `ok:false` for
+ // an unexpected throw, so a rejected create/update/delete arrives here as a
+ // "success" whose body says it failed. The negative markers, by tool:
+ // workflow/skill create → `created:false`; update → `updated:false`; skill
+ // delete → `deleted:false`; set-enabled / integration → `ok:false`. Surface
+ // the cause instead of reading it as a completion note (the bug where a
+ // rejected workflow create showed "Action completed.").
+ if (
+ out.created === false ||
+ out.updated === false ||
+ out.deleted === false ||
+ out.ok === false
+ ) {
+ return {
+ statusText: null,
+ error: { code: "TOOL_FAILED", message: describeFailure(result.output) },
+ };
+ }
+ return { statusText: describeOutcome(name, result.output), error: null };
+ }
+ // A failed confirmation (proposal expired, tool error, network). The
+ // not-connected case never arrives here — it is an HTTP-200 success whose
+ // output carries `ok: false` — so this path is always a genuine failure.
+ return {
+ statusText: null,
+ error: { code: "TOOL_FAILED", message: result.error },
+ };
+}
diff --git a/src/assistant/provider-label.ts b/src/assistant/provider-label.ts
new file mode 100644
index 0000000..65dbd64
--- /dev/null
+++ b/src/assistant/provider-label.ts
@@ -0,0 +1,23 @@
+/**
+ * Display label for a connector slug ("github" → "GitHub"). Kept in its own
+ * module — separate from the graph model — so consumers in the always-loaded app
+ * shell (e.g. ProposalIntegrations) can import the label without pulling the
+ * `yaml` parser (a `model.ts` dependency) into the main bundle.
+ */
+
+const PROVIDER_LABELS: Record = {
+ github: "GitHub",
+ gitlab: "GitLab",
+ slack: "Slack",
+ stripe: "Stripe",
+ notion: "Notion",
+ linear: "Linear",
+ discord: "Discord",
+};
+
+export function providerLabel(provider: string): string {
+ const key = provider.toLowerCase();
+ return (
+ PROVIDER_LABELS[key] ?? provider.charAt(0).toUpperCase() + provider.slice(1)
+ );
+}
diff --git a/src/assistant/reducer.test.ts b/src/assistant/reducer.test.ts
new file mode 100644
index 0000000..ea9eb0f
--- /dev/null
+++ b/src/assistant/reducer.test.ts
@@ -0,0 +1,981 @@
+import { describe, expect, it } from "vitest";
+import { describeProposal, presentError } from "./presentation";
+import {
+ type AssistantState,
+ assistantReducer,
+ initialAssistantState,
+ selectVisibleState,
+} from "./reducer";
+import type { AssistantStreamEvent, ChatMessage } from "./types";
+
+function send(state: AssistantState, text: string): AssistantState {
+ return assistantReducer(state, {
+ type: "send",
+ messageId: "u1",
+ assistantId: "a1",
+ text,
+ });
+}
+
+function stream(
+ state: AssistantState,
+ event: AssistantStreamEvent,
+): AssistantState {
+ return assistantReducer(state, { type: "stream", event });
+}
+
+function assistantText(state: AssistantState, id = "a1"): string | undefined {
+ return state.messages.find((m) => m.id === id)?.text;
+}
+
+describe("send", () => {
+ it("appends the user message and an empty assistant bubble, entering streaming", () => {
+ const s = send(initialAssistantState(), "Hi there");
+ expect(s.status).toBe("streaming");
+ expect(s.streamingId).toBe("a1");
+ expect(s.messages).toEqual([
+ { id: "u1", role: "user", text: "Hi there" },
+ { id: "a1", role: "assistant", text: "" },
+ ]);
+ });
+
+ it("clears prior error and usage on a new turn", () => {
+ const dirty: AssistantState = {
+ ...initialAssistantState(),
+ error: { code: "X", message: "old" },
+ usage: { costUsd: 1, balanceUsd: 1, promptTokens: null, completionTokens: null, durationMs: null, replayed: false },
+ };
+ const s = send(dirty, "again");
+ expect(s.error).toBeNull();
+ expect(s.usage).toBeNull();
+ });
+
+ it("preserves an unresolved proposal rather than silently dropping it", () => {
+ const withProposal: AssistantState = {
+ ...initialAssistantState(),
+ pendingProposals: [
+ { proposalId: "p", callId: "c", name: "create_workflow", args: {} },
+ ],
+ };
+ // The hook/composer block sending while a proposal is pending; even if a
+ // send slips through, the reducer must not orphan the server-side proposal.
+ const s = send(withProposal, "again");
+ expect(s.pendingProposals).toEqual(withProposal.pendingProposals);
+ });
+});
+
+describe("streaming renders incrementally", () => {
+ it("appends each delta to the live assistant message in order", () => {
+ let s = send(initialAssistantState(), "Hi");
+ s = stream(s, { type: "thread", data: { threadId: "T1", turnId: "R1" } });
+ expect(s.threadId).toBe("T1");
+
+ s = stream(s, { type: "delta", data: { text: "Hello" } });
+ expect(assistantText(s)).toBe("Hello");
+
+ s = stream(s, { type: "delta", data: { text: " world" } });
+ expect(assistantText(s)).toBe("Hello world");
+
+ s = stream(s, {
+ type: "usage",
+ data: {
+ promptTokens: 10,
+ completionTokens: 5,
+ costUsd: 0.0002,
+ balanceUsd: 4.5,
+ },
+ });
+ s = stream(s, {
+ type: "done",
+ data: { turnId: "R1", status: "completed" },
+ });
+ expect(s.status).toBe("idle");
+ expect(s.streamingId).toBeNull();
+ expect(s.usage).toEqual({
+ costUsd: 0.0002,
+ balanceUsd: 4.5,
+ promptTokens: 10,
+ completionTokens: 5,
+ durationMs: null,
+ replayed: false,
+ });
+ // The fully streamed reply is preserved.
+ expect(assistantText(s)).toBe("Hello world");
+ });
+});
+
+describe("a capped turn is surfaced, never silent", () => {
+ it("appends a step-limit status note when done.capped is true", () => {
+ let s = send(initialAssistantState(), "build me a complex workflow");
+ s = stream(s, { type: "delta", data: { text: "Let me check…" } });
+ s = stream(s, {
+ type: "done",
+ data: { turnId: "R9", status: "completed", capped: true },
+ });
+ expect(s.status).toBe("idle");
+ // The partial reply is kept, plus a status note telling the user it stopped
+ // at the step limit (so a partial answer isn't read as a complete one).
+ expect(assistantText(s)).toBe("Let me check…");
+ const note = s.messages.at(-1);
+ expect(note?.role).toBe("status");
+ expect(note?.text).toContain("step limit");
+ });
+
+ it("adds no note on a normal (uncapped) completion", () => {
+ let s = send(initialAssistantState(), "hi");
+ s = stream(s, { type: "delta", data: { text: "Hello" } });
+ s = stream(s, {
+ type: "done",
+ data: { turnId: "R1", status: "completed" },
+ });
+ expect(s.messages.some((m) => m.role === "status")).toBe(false);
+ });
+});
+
+describe("a proposed workflow shows its YAML and requires confirmation", () => {
+ const yaml = "name: nightly\non:\n schedule: { cron: '0 3 * * *' }";
+
+ it("parks a workflow proposal awaiting confirmation, exposing the YAML, without running it", () => {
+ let s = send(initialAssistantState(), "make a nightly workflow");
+ s = stream(s, {
+ type: "tool_proposal",
+ data: {
+ proposalId: "prop_1",
+ callId: "call_1",
+ name: "create_workflow",
+ args: { yaml },
+ },
+ });
+ s = stream(s, {
+ type: "done",
+ data: { turnId: "R1", status: "completed", proposed: true },
+ });
+
+ expect(s.status).toBe("awaiting_confirm");
+ expect(s.pendingProposals).toHaveLength(1);
+
+ const view = describeProposal(s.pendingProposals[0]!);
+ expect(view.title).toBe("Create workflow");
+ expect(view.preview).toEqual({
+ label: "Workflow definition",
+ content: yaml,
+ kind: "workflow",
+ });
+
+ // No side effect ran: the only messages are the user's and the (empty,
+ // now-dropped) assistant bubble — no "created" status note.
+ expect(s.messages.some((m) => m.role === "status")).toBe(false);
+ // The empty assistant bubble for a pure-proposal turn is dropped.
+ expect(s.messages).toHaveLength(1);
+ });
+
+ it("removes the proposal and notes the outcome once confirmed", () => {
+ let s = send(initialAssistantState(), "make a workflow");
+ s = stream(s, {
+ type: "tool_proposal",
+ data: {
+ proposalId: "prop_1",
+ callId: "call_1",
+ name: "create_workflow",
+ args: { yaml },
+ },
+ });
+ s = stream(s, {
+ type: "done",
+ data: { turnId: "R1", status: "completed", proposed: true },
+ });
+
+ s = assistantReducer(s, {
+ type: "proposal_resolved",
+ callId: "call_1",
+ status: {
+ id: "st1",
+ role: "status",
+ text: 'Created workflow "nightly".',
+ },
+ error: null,
+ });
+ expect(s.pendingProposals).toHaveLength(0);
+ expect(s.status).toBe("idle");
+ expect(s.messages.at(-1)).toEqual({
+ id: "st1",
+ role: "status",
+ text: 'Created workflow "nightly".',
+ });
+ });
+
+ it("does not double-add a proposal with a repeated callId", () => {
+ let s = send(initialAssistantState(), "x");
+ const ev: AssistantStreamEvent = {
+ type: "tool_proposal",
+ data: {
+ proposalId: "prop_1",
+ callId: "call_1",
+ name: "create_workflow",
+ args: { yaml },
+ },
+ };
+ s = stream(s, ev);
+ s = stream(s, ev);
+ expect(s.pendingProposals).toHaveLength(1);
+ });
+
+ it("carries connection requirements through to the pending proposal", () => {
+ let s = send(initialAssistantState(), "review my PRs");
+ s = stream(s, {
+ type: "tool_proposal",
+ data: {
+ proposalId: "prop_1",
+ callId: "call_1",
+ name: "create_workflow",
+ args: { yaml },
+ requirements: [{ provider: "github", connected: false }],
+ },
+ });
+ expect(s.pendingProposals[0]?.requirements).toEqual([
+ { provider: "github", connected: false },
+ ]);
+ });
+
+ it("keeps the card and shows the reason on a retryable confirm failure", () => {
+ let s = send(initialAssistantState(), "review my PRs");
+ s = stream(s, {
+ type: "tool_proposal",
+ data: {
+ proposalId: "prop_1",
+ callId: "call_1",
+ name: "create_workflow",
+ args: { yaml },
+ requirements: [{ provider: "github", connected: false }],
+ },
+ });
+ s = stream(s, {
+ type: "done",
+ data: { turnId: "R1", status: "completed", proposed: true },
+ });
+
+ s = assistantReducer(s, {
+ type: "proposal_retry_failed",
+ callId: "call_1",
+ message: "connect github first",
+ });
+ // The card stays (re-confirmable) with the reason attached, and there's no
+ // top-level error banner — the message lives on the card next to Connect.
+ expect(s.pendingProposals).toHaveLength(1);
+ expect(s.pendingProposals[0]?.retryError).toBe("connect github first");
+ expect(s.status).toBe("awaiting_confirm");
+ expect(s.error).toBeNull();
+ });
+
+ it("does not resurrect awaiting_confirm when no matching proposal remains", () => {
+ // Defensive: if the card is already gone (no pending proposals) and the
+ // conversation has settled to idle, a stray proposal_retry_failed must NOT
+ // force the composer back into awaiting_confirm and wedge it.
+ const idle: AssistantState = { ...initialAssistantState(), status: "idle" };
+ const s = assistantReducer(idle, {
+ type: "proposal_retry_failed",
+ callId: "gone",
+ message: "connect github first",
+ });
+ expect(s.pendingProposals).toHaveLength(0);
+ expect(s.status).toBe("idle");
+ });
+});
+
+describe("credit-exhausted surfaces the add-credits CTA", () => {
+ it("stores the error and maps it to an Add credits action to billing", () => {
+ let s = send(initialAssistantState(), "do a thing");
+ s = stream(s, {
+ type: "error",
+ data: {
+ code: "INSUFFICIENT_BALANCE",
+ message: "Credit balance is exhausted",
+ },
+ });
+
+ expect(s.status).toBe("idle");
+ expect(s.streamingId).toBeNull();
+ // The empty assistant bubble is dropped on a pre-delta failure.
+ expect(s.messages).toHaveLength(1);
+
+ const error = s.error;
+ if (!error) throw new Error("expected an error to be set");
+ expect(error.code).toBe("INSUFFICIENT_BALANCE");
+
+ const view = presentError(error.code, error.message);
+ expect(view.cta).toEqual({ label: "Add credits", to: "/app/billing" });
+ });
+});
+
+describe("transcript edge cases", () => {
+ it("keeps a partially streamed reply when the user stops", () => {
+ let s = send(initialAssistantState(), "hi");
+ s = stream(s, { type: "delta", data: { text: "partial" } });
+ s = assistantReducer(s, { type: "stopped" });
+ expect(s.status).toBe("idle");
+ expect(s.streamingId).toBeNull();
+ expect(assistantText(s)).toBe("partial");
+ });
+
+ it("drops the empty assistant bubble when stopped before the first delta", () => {
+ let s = send(initialAssistantState(), "hi");
+ // No delta arrived; the assistant bubble is still empty.
+ s = assistantReducer(s, { type: "stopped" });
+ expect(s.status).toBe("idle");
+ expect(s.streamingId).toBeNull();
+ // Only the user's message remains — no permanent blank assistant bubble.
+ expect(s.messages).toEqual([{ id: "u1", role: "user", text: "hi" }]);
+ });
+
+ it("shows a running tool chip on tool_call and resolves it on tool_result", () => {
+ let s = send(initialAssistantState(), "list my workflows");
+ s = stream(s, {
+ type: "tool_call",
+ data: { callId: "c1", name: "list_workflows" },
+ });
+ const running = s.messages.find((m) => m.id === "tool-c1");
+ expect(running?.role).toBe("tool");
+ expect(running?.tool).toEqual({
+ name: "list_workflows",
+ status: "running",
+ });
+ // The empty assistant bubble is finalized so the chip isn't preceded by a
+ // blank bubble, and the next delta will open a fresh segment.
+ expect(s.streamingId).toBeNull();
+
+ s = stream(s, {
+ type: "tool_result",
+ data: { callId: "c1", name: "list_workflows", ok: true, output: {} },
+ });
+ // The SAME chip is updated in place — no duplicate, now marked ok.
+ expect(s.messages.filter((m) => m.id === "tool-c1")).toHaveLength(1);
+ expect(s.messages.find((m) => m.id === "tool-c1")?.tool?.status).toBe("ok");
+ expect(s.status).toBe("streaming");
+ });
+
+ it("carries tool args on the chip and preserves them across tool_result", () => {
+ let s = send(initialAssistantState(), "get workflow wf_1");
+ s = stream(s, {
+ type: "tool_call",
+ data: { callId: "c1", name: "get_workflow", args: { id: "wf_1" } },
+ });
+ expect(s.messages.find((m) => m.id === "tool-c1")?.tool?.args).toEqual({
+ id: "wf_1",
+ });
+ s = stream(s, {
+ type: "tool_result",
+ data: { callId: "c1", name: "get_workflow", ok: true, output: {} },
+ });
+ // tool_result carries no args of its own, so the call's args must survive.
+ expect(s.messages.find((m) => m.id === "tool-c1")?.tool?.args).toEqual({
+ id: "wf_1",
+ });
+ });
+
+ it("records cost, tokens, and duration from the usage event", () => {
+ let s = send(initialAssistantState(), "hi");
+ s = stream(s, {
+ type: "usage",
+ data: {
+ promptTokens: 12,
+ completionTokens: 34,
+ costUsd: 0.001,
+ balanceUsd: 9.5,
+ durationMs: 1500,
+ },
+ });
+ expect(s.usage).toEqual({
+ costUsd: 0.001,
+ balanceUsd: 9.5,
+ promptTokens: 12,
+ completionTokens: 34,
+ durationMs: 1500,
+ replayed: false,
+ });
+ });
+
+ it("marks the tool chip failed and carries the error text", () => {
+ let s = send(initialAssistantState(), "get workflow zzz");
+ s = stream(s, {
+ type: "tool_call",
+ data: { callId: "c1", name: "get_workflow" },
+ });
+ s = stream(s, {
+ type: "tool_result",
+ data: {
+ callId: "c1",
+ name: "get_workflow",
+ ok: false,
+ error: { code: "UNKNOWN", message: "not found" },
+ },
+ });
+ const chip = s.messages.find((m) => m.id === "tool-c1");
+ expect(chip?.tool?.status).toBe("failed");
+ expect(chip?.text).toBe("not found");
+ });
+
+ it("retains the tool result outcome on the chip so a renderer can show the body", () => {
+ let s = send(initialAssistantState(), "list my workflows");
+ s = stream(s, {
+ type: "tool_call",
+ data: { callId: "c1", name: "list_workflows" },
+ });
+ s = stream(s, {
+ type: "tool_result",
+ data: { callId: "c1", name: "list_workflows", ok: true, output: { count: 2 } },
+ });
+ expect(s.messages.find((m) => m.id === "tool-c1")?.tool?.outcome).toEqual({
+ ok: true,
+ result: { count: 2 },
+ });
+ });
+
+ it("retains the error outcome on a failed tool chip", () => {
+ let s = send(initialAssistantState(), "get workflow zzz");
+ s = stream(s, {
+ type: "tool_call",
+ data: { callId: "c1", name: "get_workflow" },
+ });
+ s = stream(s, {
+ type: "tool_result",
+ data: {
+ callId: "c1",
+ name: "get_workflow",
+ ok: false,
+ error: { code: "UNKNOWN", message: "not found" },
+ },
+ });
+ expect(s.messages.find((m) => m.id === "tool-c1")?.tool?.outcome).toEqual({
+ ok: false,
+ error: { code: "UNKNOWN", message: "not found" },
+ });
+ });
+
+ it("splits pre- and post-tool reasoning into distinct assistant bubbles", () => {
+ let s = send(initialAssistantState(), "build a workflow");
+ s = stream(s, {
+ type: "delta",
+ data: { text: "Let me check the format." },
+ });
+ s = stream(s, {
+ type: "tool_call",
+ data: { callId: "c1", name: "get_workflow_schema" },
+ });
+ s = stream(s, {
+ type: "tool_result",
+ data: { callId: "c1", name: "get_workflow_schema", ok: true, output: {} },
+ });
+ s = stream(s, { type: "delta", data: { text: "Here is your workflow:" } });
+ s = stream(s, {
+ type: "done",
+ data: { turnId: "R1", status: "completed" },
+ });
+
+ // Two separate assistant bubbles, not one concatenated blob, with the tool
+ // chip between them — in wire order.
+ const assistantTexts = s.messages
+ .filter((m) => m.role === "assistant")
+ .map((m) => m.text);
+ expect(assistantTexts).toEqual([
+ "Let me check the format.",
+ "Here is your workflow:",
+ ]);
+ const roles = s.messages.map((m) => m.role);
+ expect(roles).toEqual(["user", "assistant", "tool", "assistant"]);
+ });
+
+ it("reset returns to the initial empty state", () => {
+ let s = send(initialAssistantState(), "hi");
+ s = stream(s, { type: "thread", data: { threadId: "T", turnId: "R" } });
+ s = assistantReducer(s, { type: "reset" });
+ expect(s).toEqual(initialAssistantState());
+ });
+});
+
+describe("restore_history rehydrates a thread, but never clobbers live state", () => {
+ const restored: ChatMessage[] = [
+ { id: "t1:u", role: "user", text: "earlier question" },
+ { id: "t1:a", role: "assistant", text: "earlier answer" },
+ ];
+
+ function idleThread(): AssistantState {
+ return {
+ ...initialAssistantState(),
+ ownerId: "userA",
+ threadId: "T1",
+ };
+ }
+
+ it("applies the server transcript when the thread is idle and empty", () => {
+ const s = assistantReducer(idleThread(), {
+ type: "restore_history",
+ ownerId: "userA",
+ threadId: "T1",
+ messages: restored,
+ proposals: [],
+ });
+ expect(s.messages).toEqual(restored);
+ });
+
+ it("ignores a restore once a turn has started", () => {
+ let s = idleThread();
+ s = send(s, "new question"); // status streaming, messages non-empty
+ const before = s.messages;
+ s = assistantReducer(s, {
+ type: "restore_history",
+ ownerId: "userA",
+ threadId: "T1",
+ messages: restored,
+ proposals: [],
+ });
+ expect(s.messages).toBe(before);
+ });
+
+ it("ignores a restore for a different owner or thread (late/cross response)", () => {
+ const base = idleThread();
+ expect(
+ assistantReducer(base, {
+ type: "restore_history",
+ ownerId: "userB",
+ threadId: "T1",
+ messages: restored,
+ proposals: [],
+ }).messages,
+ ).toEqual([]);
+ expect(
+ assistantReducer(base, {
+ type: "restore_history",
+ ownerId: "userA",
+ threadId: "T2",
+ messages: restored,
+ proposals: [],
+ }).messages,
+ ).toEqual([]);
+ });
+
+ it("does not clobber an already-restored idle transcript (idle + non-empty)", () => {
+ const existing: ChatMessage[] = [
+ { id: "old:u", role: "user", text: "already here" },
+ { id: "old:a", role: "assistant", text: "already answered" },
+ ];
+ const s: AssistantState = { ...idleThread(), messages: existing };
+ const next = assistantReducer(s, {
+ type: "restore_history",
+ ownerId: "userA",
+ threadId: "T1",
+ messages: restored,
+ proposals: [],
+ });
+ // A late/duplicate restore must not overwrite a transcript already shown.
+ expect(next.messages).toBe(existing);
+ });
+
+ it("restores unconfirmed proposals and re-enters awaiting_confirm", () => {
+ const s = assistantReducer(idleThread(), {
+ type: "restore_history",
+ ownerId: "userA",
+ threadId: "T1",
+ messages: restored,
+ proposals: [
+ {
+ proposalId: "p1",
+ callId: "c1",
+ name: "create_workflow",
+ args: { yaml: "name: pr-review" },
+ requirements: [
+ {
+ provider: "github",
+ kind: "github_app",
+ connected: false,
+ connectUrl: null,
+ },
+ ],
+ },
+ ],
+ });
+ // The card comes back AND the conversation is gated on confirming it again.
+ expect(s.messages).toEqual(restored);
+ expect(s.pendingProposals).toHaveLength(1);
+ expect(s.pendingProposals[0]?.proposalId).toBe("p1");
+ expect(s.status).toBe("awaiting_confirm");
+ });
+
+ it("never clobbers a live pending proposal already in state", () => {
+ const liveProposal = {
+ proposalId: "live",
+ callId: "c1",
+ name: "create_workflow",
+ args: { yaml: "name: live" },
+ };
+ // An idle+empty conversation that still carries a live, unconfirmed proposal:
+ // a late/duplicate restore must defer to it rather than overwrite it with a
+ // stale server snapshot (the exact proposal loss this PR prevents).
+ const s: AssistantState = {
+ ...idleThread(),
+ pendingProposals: [liveProposal],
+ };
+ const next = assistantReducer(s, {
+ type: "restore_history",
+ ownerId: "userA",
+ threadId: "T1",
+ messages: restored,
+ proposals: [
+ {
+ proposalId: "stale",
+ callId: "c2",
+ name: "create_workflow",
+ args: { yaml: "name: stale" },
+ },
+ ],
+ });
+ // The guard returns the prior state untouched — the live proposal survives.
+ expect(next).toBe(s);
+ expect(next.pendingProposals).toEqual([liveProposal]);
+ });
+});
+
+describe("thread_gone drops a dead thread id, guarded like restore", () => {
+ function idleThread(): AssistantState {
+ return { ...initialAssistantState(), ownerId: "userA", threadId: "T1" };
+ }
+
+ it("clears the thread id when the thread is gone and the conversation is idle/empty", () => {
+ const s = assistantReducer(idleThread(), {
+ type: "thread_gone",
+ ownerId: "userA",
+ threadId: "T1",
+ });
+ expect(s.threadId).toBeNull();
+ });
+
+ it("leaves a started turn untouched", () => {
+ let s = idleThread();
+ s = send(s, "hi"); // streaming + non-empty
+ s = assistantReducer(s, {
+ type: "thread_gone",
+ ownerId: "userA",
+ threadId: "T1",
+ });
+ expect(s.threadId).toBe("T1");
+ });
+
+ it("leaves an idle thread with a restored transcript untouched (idle + non-empty)", () => {
+ // A restore succeeded (idle, messages present) and a late thread_gone arrives;
+ // the guard must keep the thread id AND the visible transcript intact.
+ const existing: ChatMessage[] = [
+ { id: "t:u", role: "user", text: "earlier" },
+ { id: "t:a", role: "assistant", text: "earlier reply" },
+ ];
+ const s: AssistantState = { ...idleThread(), messages: existing };
+ const next = assistantReducer(s, {
+ type: "thread_gone",
+ ownerId: "userA",
+ threadId: "T1",
+ });
+ expect(next.threadId).toBe("T1");
+ expect(next.messages).toBe(existing);
+ });
+
+ it("ignores a stale/cross thread_gone for a different owner or thread", () => {
+ const base = idleThread();
+ expect(
+ assistantReducer(base, {
+ type: "thread_gone",
+ ownerId: "userB",
+ threadId: "T1",
+ }).threadId,
+ ).toBe("T1");
+ expect(
+ assistantReducer(base, {
+ type: "thread_gone",
+ ownerId: "userA",
+ threadId: "T2",
+ }).threadId,
+ ).toBe("T1");
+ });
+});
+
+describe("hydrate replaces the conversation for a new user", () => {
+ it("loads the new thread and drops all transient state from the prior user", () => {
+ // Build up a dirty mid-conversation state for user A.
+ let s = send(initialAssistantState(), "secret from user A");
+ s = stream(s, { type: "thread", data: { threadId: "T_A", turnId: "R" } });
+ s = stream(s, { type: "delta", data: { text: "private reply" } });
+ s = stream(s, {
+ type: "tool_proposal",
+ data: {
+ proposalId: "p",
+ callId: "c",
+ name: "create_workflow",
+ args: { yaml: "name: x" },
+ },
+ });
+
+ // Switching users hydrates user B's (empty) thread; none of A's data remains.
+ const hydrated = assistantReducer(s, {
+ type: "hydrate",
+ ownerId: "userB",
+ threadId: null,
+ messages: [],
+ });
+ expect(hydrated).toEqual({ ...initialAssistantState(), ownerId: "userB" });
+ expect(hydrated.messages).toEqual([]);
+ expect(hydrated.pendingProposals).toEqual([]);
+ expect(hydrated.streamingId).toBeNull();
+ });
+
+ it("restores a persisted thread stamped with the incoming user", () => {
+ const prior: ChatMessage[] = [
+ { id: "u", role: "user", text: "earlier question" },
+ { id: "a", role: "assistant", text: "earlier answer" },
+ ];
+ const s = assistantReducer(initialAssistantState(), {
+ type: "hydrate",
+ ownerId: "userB",
+ threadId: "T_B",
+ messages: prior,
+ });
+ expect(s.ownerId).toBe("userB");
+ expect(s.threadId).toBe("T_B");
+ expect(s.messages).toEqual(prior);
+ expect(s.status).toBe("idle");
+ });
+});
+
+describe("selectVisibleState masks a foreign owner", () => {
+ function ownedState(ownerId: string): AssistantState {
+ return assistantReducer(initialAssistantState(), {
+ type: "hydrate",
+ ownerId,
+ threadId: "T",
+ messages: [{ id: "m", role: "user", text: "private" }],
+ });
+ }
+
+ it("returns the state unchanged when the owner matches the current user", () => {
+ const s = ownedState("userA");
+ expect(selectVisibleState(s, "userA")).toBe(s);
+ });
+
+ it("returns a fresh empty state when the owner does not match", () => {
+ // The single commit after an auth switch (state still belongs to A, prop is
+ // now B) must never expose A's transcript.
+ const s = ownedState("userA");
+ const visible = selectVisibleState(s, "userB");
+ expect(visible).toEqual(initialAssistantState());
+ expect(visible.messages).toEqual([]);
+ expect(visible.threadId).toBeNull();
+ });
+
+ it("masks a signed-in user's state from an anonymous (null) viewer", () => {
+ const s = ownedState("userA");
+ expect(selectVisibleState(s, null).messages).toEqual([]);
+ });
+
+ it("keeps reset's cleared state visible (owner preserved)", () => {
+ const reset = assistantReducer(ownedState("userA"), { type: "reset" });
+ expect(selectVisibleState(reset, "userA")).toBe(reset);
+ expect(reset.ownerId).toBe("userA");
+ });
+});
+
+describe("proposal_resolved targets the card by callId", () => {
+ function withTwoProposals(): AssistantState {
+ return {
+ ...initialAssistantState(),
+ status: "awaiting_confirm",
+ pendingProposals: [
+ { proposalId: "p1", callId: "c1", name: "create_workflow", args: {} },
+ { proposalId: "p2", callId: "c2", name: "revoke_api_key", args: {} },
+ ],
+ };
+ }
+
+ it("removes exactly the card whose callId matches", () => {
+ const s = assistantReducer(withTwoProposals(), {
+ type: "proposal_resolved",
+ callId: "c1",
+ status: null,
+ error: null,
+ });
+ expect(s.pendingProposals.map((p) => p.callId)).toEqual(["c2"]);
+ expect(s.status).toBe("awaiting_confirm");
+ });
+
+ it("resolves to idle once the last card is removed", () => {
+ let s = withTwoProposals();
+ s = assistantReducer(s, {
+ type: "proposal_resolved",
+ callId: "c1",
+ status: null,
+ error: null,
+ });
+ s = assistantReducer(s, {
+ type: "proposal_resolved",
+ callId: "c2",
+ status: null,
+ error: null,
+ });
+ expect(s.pendingProposals).toEqual([]);
+ expect(s.status).toBe("idle");
+ });
+});
+
+describe("in-memory messages are capped", () => {
+ it("bounds the transcript and keeps the most recent + streaming message", () => {
+ // Seed a state already at/over the cap with identifiable messages.
+ const seeded: AssistantState = {
+ ...initialAssistantState(),
+ ownerId: "userA",
+ messages: Array.from({ length: 205 }, (_, i) => ({
+ id: `old-${i}`,
+ role: "assistant" as const,
+ text: `old ${i}`,
+ })),
+ };
+ const afterSend = assistantReducer(seeded, {
+ type: "send",
+ messageId: "u-new",
+ assistantId: "a-new",
+ text: "newest",
+ });
+ expect(afterSend.messages.length).toBeLessThanOrEqual(200);
+ // The just-sent user message and the streaming assistant bubble survive.
+ const ids = afterSend.messages.map((m) => m.id);
+ expect(ids).toContain("u-new");
+ expect(ids).toContain("a-new");
+ expect(afterSend.streamingId).toBe("a-new");
+ // The oldest messages were dropped.
+ expect(ids).not.toContain("old-0");
+ });
+});
+
+describe("an incomplete turn leaves no confirmable proposal", () => {
+ function withBufferedProposal() {
+ let s = send(initialAssistantState(), "do a mutating thing");
+ s = stream(s, {
+ type: "tool_proposal",
+ data: {
+ proposalId: "p1",
+ callId: "c1",
+ name: "create_workflow",
+ args: { yaml: "name: x" },
+ },
+ });
+ expect(s.pendingProposals).toHaveLength(1);
+ return s;
+ }
+
+ it("clears the proposal when the stream errors", () => {
+ const s = stream(withBufferedProposal(), {
+ type: "error",
+ data: { code: "STREAM_FAILED", message: "boom" },
+ });
+ expect(s.pendingProposals).toEqual([]);
+ expect(s.status).toBe("idle");
+ });
+
+ it("clears the proposal when the user stops the turn", () => {
+ const s = assistantReducer(withBufferedProposal(), { type: "stopped" });
+ expect(s.pendingProposals).toEqual([]);
+ });
+
+ it("clears the proposal on a network stream failure", () => {
+ const s = assistantReducer(withBufferedProposal(), {
+ type: "stream_failed",
+ error: { code: "NETWORK", message: "lost" },
+ });
+ expect(s.pendingProposals).toEqual([]);
+ });
+
+ it("keeps the proposal when the turn completes cleanly", () => {
+ const s = stream(withBufferedProposal(), {
+ type: "done",
+ data: { turnId: "R1", status: "completed", proposed: true },
+ });
+ expect(s.pendingProposals).toHaveLength(1);
+ expect(s.status).toBe("awaiting_confirm");
+ });
+});
+
+describe("switch_thread opens a past conversation", () => {
+ it("pins the chosen thread and clears the conversation for a restore", () => {
+ // A live conversation on thread A with a message and a selection.
+ let s: AssistantState = {
+ ...initialAssistantState(),
+ ownerId: "u1",
+ threadId: "thr_A",
+ messages: [{ id: "m1", role: "user", text: "hello" }],
+ };
+ s = assistantReducer(s, { type: "switch_thread", threadId: "thr_B" });
+ expect(s.threadId).toBe("thr_B");
+ expect(s.ownerId).toBe("u1");
+ expect(s.messages).toEqual([]);
+ expect(s.status).toBe("idle");
+ // The follow-up restore for the switched-to thread now applies.
+ const restored = assistantReducer(s, {
+ type: "restore_history",
+ ownerId: "u1",
+ threadId: "thr_B",
+ messages: [{ id: "b1", role: "user", text: "earlier" }],
+ proposals: [],
+ });
+ expect(restored.messages).toEqual([
+ { id: "b1", role: "user", text: "earlier" },
+ ]);
+ });
+
+ it("is a no-op when the chosen thread is already active", () => {
+ const s: AssistantState = {
+ ...initialAssistantState(),
+ ownerId: "u1",
+ threadId: "thr_A",
+ messages: [{ id: "m1", role: "user", text: "hello" }],
+ };
+ const next = assistantReducer(s, {
+ type: "switch_thread",
+ threadId: "thr_A",
+ });
+ expect(next).toBe(s);
+ });
+});
+
+describe("history_failed drops the active thread and surfaces the error", () => {
+ it("resets to a fresh state (no thread) with the error, on the failed thread", () => {
+ const s: AssistantState = {
+ ...initialAssistantState(),
+ ownerId: "u1",
+ threadId: "T",
+ };
+ const next = assistantReducer(s, {
+ type: "history_failed",
+ ownerId: "u1",
+ threadId: "T",
+ error: { code: "HISTORY_LOAD_FAILED", message: "nope" },
+ });
+ // The thread is dropped so the next send can't run against hidden context.
+ expect(next.threadId).toBeNull();
+ expect(next.ownerId).toBe("u1");
+ expect(next.messages).toEqual([]);
+ expect(next.error).toEqual({
+ code: "HISTORY_LOAD_FAILED",
+ message: "nope",
+ });
+ });
+
+ it("ignores a late failure once the conversation moved on", () => {
+ const s: AssistantState = {
+ ...initialAssistantState(),
+ ownerId: "u1",
+ threadId: "OTHER",
+ messages: [{ id: "m", role: "user", text: "hi" }],
+ };
+ const next = assistantReducer(s, {
+ type: "history_failed",
+ ownerId: "u1",
+ threadId: "T",
+ error: { code: "x", message: "y" },
+ });
+ expect(next).toBe(s);
+ });
+});
diff --git a/src/assistant/reducer.ts b/src/assistant/reducer.ts
new file mode 100644
index 0000000..db6867f
--- /dev/null
+++ b/src/assistant/reducer.ts
@@ -0,0 +1,564 @@
+/**
+ * Pure state machine driving the assistant panel. Every UI transition — a user
+ * message, each streamed SSE event, a stream failure, a confirmed/cancelled
+ * proposal, a manual stop — is modeled as an action here, so the panel's
+ * behavior can be verified without a DOM or a live stream.
+ */
+
+import type {
+ AssistantStreamEvent,
+ ChatMessage,
+ PendingProposal,
+ ToolOutcome,
+ UsageInfo,
+} from "./types";
+
+export type ChatStatus = "idle" | "streaming" | "awaiting_confirm";
+
+/** Cap on in-memory messages. A session can survive route changes and drawer
+ * open/close for a long time, so the transcript is bounded to the most recent
+ * turns. The streaming message is always appended last, so trimming from the
+ * front never drops it. */
+const MAX_MESSAGES = 200;
+
+function capMessages(messages: ChatMessage[]): ChatMessage[] {
+ return messages.length > MAX_MESSAGES
+ ? messages.slice(-MAX_MESSAGES)
+ : messages;
+}
+
+export interface AssistantState {
+ /** The signed-in user this conversation belongs to. Carried in state so it
+ * moves atomically with the data it labels — persistence keys off it, and a
+ * render whose owner doesn't match the current user is masked (see
+ * `selectVisibleState`), preventing a one-frame cross-account leak. */
+ ownerId: string | null;
+ /** Persisted across reloads to continue the same server-side thread. */
+ threadId: string | null;
+ messages: ChatMessage[];
+ status: ChatStatus;
+ /** Id of the assistant message currently accumulating deltas, if any. Set to
+ * null mid-turn when a tool runs, so the next text delta opens a fresh
+ * assistant bubble — keeping each reasoning segment visually distinct. */
+ streamingId: string | null;
+ /** Base id for the current turn's assistant bubbles (the `send` assistant id).
+ * Post-tool segments derive a unique id from it; null between turns. */
+ streamBaseId: string | null;
+ /** How many assistant bubbles the current turn has opened (0 = just the first).
+ * Drives the per-segment bubble id so segments never collide across turns. */
+ segmentSeq: number;
+ pendingProposals: PendingProposal[];
+ /** Cost/balance from the most recently settled turn. */
+ usage: UsageInfo | null;
+ /** Model slug the current/most-recent turn ran against (from the thread
+ * event), or null before any turn this session. */
+ model: string | null;
+ /** The current turn's accumulated reasoning/thinking text (reasoning models
+ * stream this before the answer). Shown dim while the answer is still pending;
+ * reset at the start of each turn. Null when the model emits no reasoning. */
+ reasoning: string | null;
+ error: { code: string; message: string } | null;
+}
+
+export type AssistantAction =
+ | { type: "send"; messageId: string; assistantId: string; text: string }
+ | { type: "stream"; event: AssistantStreamEvent }
+ | { type: "stream_failed"; error: { code: string; message: string } }
+ | {
+ type: "proposal_resolved";
+ /** The card's canonical identity (the model's tool-call id). */
+ callId: string;
+ status: ChatMessage | null;
+ /** Set on a failed confirmation; null clears any prior error. */
+ error: { code: string; message: string } | null;
+ }
+ | {
+ type: "proposal_retry_failed";
+ /** The card to KEEP (a retryable confirm failure — an unconnected
+ * integration). The card stays confirmable; the message is shown on it. */
+ callId: string;
+ message: string;
+ }
+ | { type: "stopped" }
+ | {
+ type: "hydrate";
+ ownerId: string | null;
+ threadId: string | null;
+ messages: ChatMessage[];
+ }
+ | {
+ type: "restore_history";
+ ownerId: string | null;
+ threadId: string;
+ messages: ChatMessage[];
+ /** Unconfirmed proposals restored alongside the transcript so a card
+ * survives reload. Empty when none are pending. */
+ proposals: PendingProposal[];
+ }
+ | { type: "thread_gone"; ownerId: string | null; threadId: string }
+ | {
+ type: "history_failed";
+ ownerId: string | null;
+ threadId: string;
+ error: { code: string; message: string };
+ }
+ | { type: "switch_thread"; threadId: string }
+ | { type: "reset" };
+
+export function initialAssistantState(): AssistantState {
+ return {
+ ownerId: null,
+ threadId: null,
+ messages: [],
+ status: "idle",
+ streamingId: null,
+ streamBaseId: null,
+ segmentSeq: 0,
+ pendingProposals: [],
+ usage: null,
+ model: null,
+ reasoning: null,
+ error: null,
+ };
+}
+
+/**
+ * The state safe to render for `userId`. When the conversation in state belongs
+ * to a different user (the single commit between an auth change and the hydrate
+ * that follows it), return a fresh empty state instead of the prior user's
+ * transcript — so an account's messages and proposals are never shown, even for
+ * one frame, under another account.
+ */
+export function selectVisibleState(
+ state: AssistantState,
+ userId: string | null,
+): AssistantState {
+ return state.ownerId === userId ? state : initialAssistantState();
+}
+
+/** Drop the streaming assistant bubble if it never received any text (e.g. a
+ * turn that only proposed an action, or a failure before the first delta). */
+function dropEmptyStreaming(
+ messages: ChatMessage[],
+ streamingId: string | null,
+): ChatMessage[] {
+ if (!streamingId) return messages;
+ const msg = messages.find((m) => m.id === streamingId);
+ if (msg && msg.role === "assistant" && msg.text === "") {
+ return messages.filter((m) => m.id !== streamingId);
+ }
+ return messages;
+}
+
+function appendDelta(
+ messages: ChatMessage[],
+ streamingId: string | null,
+ text: string,
+): ChatMessage[] {
+ if (!streamingId) return messages;
+ return messages.map((m) =>
+ m.id === streamingId ? { ...m, text: m.text + text } : m,
+ );
+}
+
+function applyStreamEvent(
+ state: AssistantState,
+ event: AssistantStreamEvent,
+): AssistantState {
+ switch (event.type) {
+ case "thread":
+ return {
+ ...state,
+ threadId: event.data.threadId,
+ // The model the server actually ran this turn against (lets the picker
+ // reflect reality even when the user never explicitly chose one).
+ model: event.data.model ?? state.model,
+ };
+
+ case "delta": {
+ if (state.streamingId) {
+ return {
+ ...state,
+ messages: appendDelta(
+ state.messages,
+ state.streamingId,
+ event.data.text,
+ ),
+ };
+ }
+ // No open bubble: a tool ran and finalized the prior segment. Open a fresh
+ // assistant bubble for this next reasoning segment so the agent's pre- and
+ // post-tool text are distinct messages, not one concatenated blob.
+ if (!state.streamBaseId) return state; // stray delta outside a turn
+ const segmentSeq = state.segmentSeq + 1;
+ const id = `${state.streamBaseId}-s${segmentSeq}`;
+ return {
+ ...state,
+ segmentSeq,
+ streamingId: id,
+ messages: capMessages([
+ ...state.messages,
+ { id, role: "assistant", text: event.data.text },
+ ]),
+ };
+ }
+
+ case "reasoning":
+ // Accumulate the turn's reasoning text; the panel shows it dim while the
+ // answer is still pending so a long thinking gap doesn't read as frozen.
+ return {
+ ...state,
+ reasoning: (state.reasoning ?? "") + event.data.text,
+ };
+
+ case "tool_call": {
+ // A read-only tool started. Finalize the current text segment (dropping it
+ // if it never received text, so a tool that runs before any preamble leaves
+ // no empty bubble), then append a live "running" activity chip. Dedupe by
+ // callId so a re-delivered event can't double-add the chip.
+ const trimmed = dropEmptyStreaming(state.messages, state.streamingId);
+ const chipId = `tool-${event.data.callId}`;
+ if (trimmed.some((m) => m.id === chipId)) {
+ return { ...state, messages: trimmed, streamingId: null };
+ }
+ const chip: ChatMessage = {
+ id: chipId,
+ role: "tool",
+ text: "",
+ tool: {
+ name: event.data.name,
+ status: "running",
+ args: event.data.args,
+ },
+ };
+ return {
+ ...state,
+ streamingId: null,
+ messages: capMessages([...trimmed, chip]),
+ };
+ }
+
+ case "tool_result": {
+ // Resolve the activity chip the matching tool_call opened: mark it ok, or
+ // failed with the error text. (Mutating tools never arrive here — they are
+ // proposed and confirmed through the execute endpoint.)
+ const chipId = `tool-${event.data.callId}`;
+ const status = event.data.ok ? "ok" : "failed";
+ const errText = event.data.ok
+ ? ""
+ : (event.data.error?.message ?? "unknown error");
+ // Retain the outcome on the chip so a renderer can show the tool's result
+ // body, not just name + status. (The error text stays in `.text` too, for
+ // the built-in timeline's one-line chip.)
+ const outcome: ToolOutcome = event.data.ok
+ ? { ok: true, result: event.data.output }
+ : { ok: false, error: event.data.error };
+ if (state.messages.some((m) => m.id === chipId)) {
+ return {
+ ...state,
+ messages: state.messages.map((m) =>
+ m.id === chipId
+ ? {
+ ...m,
+ text: errText,
+ // Preserve the args the matching tool_call recorded — the
+ // result event doesn't carry them.
+ tool: {
+ name: event.data.name,
+ status,
+ args: m.tool?.args,
+ outcome,
+ },
+ }
+ : m,
+ ),
+ };
+ }
+ // Defensive: a result with no preceding tool_call (shouldn't happen now
+ // that the server emits both) — surface it as a finished chip rather than
+ // dropping the information.
+ const chip: ChatMessage = {
+ id: chipId,
+ role: "tool",
+ text: errText,
+ tool: { name: event.data.name, status, outcome },
+ };
+ return { ...state, messages: capMessages([...state.messages, chip]) };
+ }
+
+ case "tool_proposal": {
+ if (state.pendingProposals.some((p) => p.callId === event.data.callId)) {
+ return state;
+ }
+ // event.data carries requirements (when authoring) — stored verbatim as
+ // the PendingProposal so the card can render them.
+ return {
+ ...state,
+ pendingProposals: [...state.pendingProposals, event.data],
+ };
+ }
+
+ case "usage":
+ return {
+ ...state,
+ usage: {
+ costUsd: event.data.costUsd,
+ balanceUsd: event.data.balanceUsd,
+ promptTokens: event.data.promptTokens,
+ completionTokens: event.data.completionTokens,
+ durationMs: event.data.durationMs ?? null,
+ replayed: event.data.replayed ?? false,
+ },
+ };
+
+ case "done": {
+ let messages = dropEmptyStreaming(state.messages, state.streamingId);
+ // A capped turn hit the per-turn step limit before the model finished its
+ // plan (e.g. partway through authoring a workflow). Surface it as a status
+ // note so a partial reply is never mistaken for a complete answer, and the
+ // user knows they can ask it to continue (the thread keeps the context).
+ if (event.data.capped) {
+ messages = capMessages([
+ ...messages,
+ {
+ id: `cap-${event.data.turnId}`,
+ role: "status",
+ text: "I reached the step limit for this turn. Ask me to continue and I'll pick up where I left off.",
+ },
+ ]);
+ }
+ return {
+ ...state,
+ messages,
+ streamingId: null,
+ status: state.pendingProposals.length > 0 ? "awaiting_confirm" : "idle",
+ };
+ }
+
+ case "error": {
+ const messages = dropEmptyStreaming(state.messages, state.streamingId);
+ return {
+ ...state,
+ messages,
+ streamingId: null,
+ status: "idle",
+ // A turn that didn't complete cleanly leaves no confirmable action: a
+ // proposal buffered before the failure must not stay actionable.
+ pendingProposals: [],
+ error: { code: event.data.code, message: event.data.message },
+ };
+ }
+ }
+}
+
+export function assistantReducer(
+ state: AssistantState,
+ action: AssistantAction,
+): AssistantState {
+ switch (action.type) {
+ case "send":
+ // Pending proposals are intentionally preserved, not cleared: a new turn
+ // must never silently drop an unconfirmed mutating action (which would
+ // orphan its server-side proposal). The hook + composer block sending
+ // while any proposal is pending, so a correct flow never reaches here with
+ // one outstanding.
+ return {
+ ...state,
+ messages: capMessages([
+ ...state.messages,
+ { id: action.messageId, role: "user", text: action.text },
+ { id: action.assistantId, role: "assistant", text: "" },
+ ]),
+ status: "streaming",
+ streamingId: action.assistantId,
+ // The first bubble is segment 0; tool-finalized segments derive their id
+ // from this base, so they stay unique across turns.
+ streamBaseId: action.assistantId,
+ segmentSeq: 0,
+ usage: null,
+ reasoning: null,
+ error: null,
+ };
+
+ case "stream":
+ return applyStreamEvent(state, action.event);
+
+ case "stream_failed": {
+ const messages = dropEmptyStreaming(state.messages, state.streamingId);
+ return {
+ ...state,
+ messages,
+ streamingId: null,
+ status: "idle",
+ // A failed turn leaves no confirmable action (see the `error` case).
+ pendingProposals: [],
+ error: action.error,
+ };
+ }
+
+ case "proposal_resolved": {
+ // Identify the card by callId — the model's tool-call id, which is always
+ // present (proposalId can be null) and unique per proposal. confirm/cancel
+ // always carry the card's own callId, so this removes exactly that card and
+ // never leaves one stuck.
+ const pendingProposals = state.pendingProposals.filter(
+ (p) => p.callId !== action.callId,
+ );
+ const messages = action.status
+ ? capMessages([...state.messages, action.status])
+ : state.messages;
+ return {
+ ...state,
+ messages,
+ pendingProposals,
+ error: action.error,
+ status:
+ pendingProposals.length > 0
+ ? "awaiting_confirm"
+ : state.status === "awaiting_confirm"
+ ? "idle"
+ : state.status,
+ };
+ }
+
+ case "proposal_retry_failed": {
+ // A retryable confirm failure (an unconnected integration): KEEP the card
+ // and attach the message to it so the user can connect the provider and
+ // confirm again. No top-level error banner; the message lives on the card,
+ // next to the requirements + connect affordance that fix it.
+ const pendingProposals = state.pendingProposals.map((p) =>
+ p.callId === action.callId ? { ...p, retryError: action.message } : p,
+ );
+ // Derive status from whether a proposal is still pending — never force
+ // `awaiting_confirm` unconditionally. In the live flow the card is always
+ // still here (this handler keeps it), but computing the status the same
+ // way `proposal_resolved` does keeps the state machine correct-by-
+ // construction: a dropped proposal can't resurrect a stale awaiting state.
+ return {
+ ...state,
+ pendingProposals,
+ status:
+ pendingProposals.length > 0
+ ? "awaiting_confirm"
+ : state.status === "awaiting_confirm"
+ ? "idle"
+ : state.status,
+ };
+ }
+
+ case "stopped":
+ // Keep whatever text already streamed; just stop accumulating. Drop the
+ // assistant bubble if it never received a delta (stopped before the first
+ // token), and drop a proposal buffered before the stop — the aborted turn
+ // leaves no confirmable action.
+ return {
+ ...state,
+ messages: dropEmptyStreaming(state.messages, state.streamingId),
+ status: "idle",
+ streamingId: null,
+ pendingProposals: [],
+ };
+
+ case "hydrate":
+ // Replace the whole conversation with a freshly loaded thread, dropping
+ // all transient state, and stamp it with its owner. Used when the
+ // signed-in user changes under a mounted panel so one account's transcript
+ // can never carry into another.
+ return {
+ ...initialAssistantState(),
+ ownerId: action.ownerId,
+ threadId: action.threadId,
+ messages: action.messages,
+ };
+
+ case "restore_history":
+ // Apply the server-restored transcript ONLY if the conversation hasn't
+ // moved on since the fetch began: same owner + thread, still idle, nothing
+ // shown yet, AND no live proposal already pending. Otherwise the user
+ // already started interacting (or switched account / started a new chat)
+ // and a late restore must not clobber the live state or resurrect a prior
+ // thread. The `pendingProposals` check is belt-and-suspenders: an idle,
+ // empty conversation has no pending proposals (the `done`/resolve handlers
+ // keep `idle ⟺ no proposals`), so this can't fire in normal flow — but it
+ // makes the restore self-protective against a future invariant change
+ // rather than relying on a cross-handler guarantee to avoid dropping a
+ // live, unconfirmed proposal (the exact loss this PR exists to prevent).
+ if (
+ state.ownerId !== action.ownerId ||
+ state.threadId !== action.threadId ||
+ state.status !== "idle" ||
+ state.messages.length > 0 ||
+ state.pendingProposals.length > 0
+ ) {
+ return state;
+ }
+ // Restore the transcript AND any unconfirmed proposals — a card is
+ // otherwise client-ephemeral, so without this the user loses the ability to
+ // confirm a pending action on reload. A restored proposal puts the
+ // conversation back in `awaiting_confirm` so the card renders and the
+ // composer stays gated until it's resolved.
+ return {
+ ...state,
+ messages: capMessages(action.messages),
+ pendingProposals: action.proposals,
+ status: action.proposals.length > 0 ? "awaiting_confirm" : state.status,
+ };
+
+ case "thread_gone":
+ // The persisted thread no longer exists server-side (history restore got a
+ // 404). Drop the dead id — but only if the conversation hasn't moved on
+ // (same owner + thread, still idle, nothing shown), mirroring the
+ // restore_history guard — so the next send starts a fresh thread instead of
+ // 404-ing forever against a thread that's gone. A started turn / new chat /
+ // switched account is left untouched.
+ if (
+ state.ownerId !== action.ownerId ||
+ state.threadId !== action.threadId ||
+ state.status !== "idle" ||
+ state.messages.length > 0
+ ) {
+ return state;
+ }
+ return { ...state, threadId: null };
+
+ case "history_failed":
+ // A switched-to thread's transcript couldn't be loaded. Drop the active
+ // thread and surface the error — the thread carries server-side context
+ // (prior turns, possibly secrets) that isn't on screen, so the next send
+ // must NOT run against it; it starts a fresh thread instead. Guarded like
+ // restore_history so a late failure can't reset a conversation that has
+ // since moved on (a started turn / new chat / switched account).
+ if (
+ state.ownerId !== action.ownerId ||
+ state.threadId !== action.threadId ||
+ state.status !== "idle" ||
+ state.messages.length > 0
+ ) {
+ return state;
+ }
+ return {
+ ...initialAssistantState(),
+ ownerId: state.ownerId,
+ error: action.error,
+ };
+
+ case "switch_thread":
+ // Open an existing thread the user picked from history. Reset to a clean
+ // idle state for the SAME owner and pin the chosen thread id, so the
+ // follow-up history fetch (restore_history) — which only applies on a
+ // matching owner+thread that is still idle/empty — lands its transcript
+ // here. A no-op if it's already the active thread.
+ if (state.threadId === action.threadId) return state;
+ return {
+ ...initialAssistantState(),
+ ownerId: state.ownerId,
+ threadId: action.threadId,
+ };
+
+ case "reset":
+ // "New chat" for the same user — start a fresh thread while the prior ones
+ // stay reachable from history. Keep the owner so the cleared state stays
+ // visible (a null owner would be masked by selectVisibleState).
+ return { ...initialAssistantState(), ownerId: state.ownerId };
+ }
+}
diff --git a/src/assistant/sse.test.ts b/src/assistant/sse.test.ts
new file mode 100644
index 0000000..b227dba
--- /dev/null
+++ b/src/assistant/sse.test.ts
@@ -0,0 +1,73 @@
+import { describe, expect, it, vi } from "vitest";
+import { readSSEEvents } from "./sse";
+
+function streamOf(chunks: string[]): ReadableStream {
+ const enc = new TextEncoder();
+ return new ReadableStream({
+ start(controller) {
+ for (const c of chunks) controller.enqueue(enc.encode(c));
+ controller.close();
+ },
+ });
+}
+
+describe("readSSEEvents", () => {
+ it("emits each event with its name and parsed data, in order", async () => {
+ const events: Array<{ type?: string; data: unknown }> = [];
+ await readSSEEvents(
+ streamOf([
+ 'event: thread\ndata: {"threadId":"T","turnId":"R"}\n\n',
+ 'event: delta\ndata: {"text":"hi"}\n\n',
+ ]),
+ (e) => events.push({ type: e.eventType, data: e.data }),
+ );
+ expect(events).toEqual([
+ { type: "thread", data: { threadId: "T", turnId: "R" } },
+ { type: "delta", data: { text: "hi" } },
+ ]);
+ });
+
+ it("reassembles a multi-byte character split across chunks", async () => {
+ const enc = new TextEncoder();
+ const bytes = enc.encode('data: {"text":"😀"}\n\n');
+ const splitAt = bytes.length - 4; // mid-emoji
+ const stream = new ReadableStream({
+ start(c) {
+ c.enqueue(bytes.slice(0, splitAt));
+ c.enqueue(bytes.slice(splitAt));
+ c.close();
+ },
+ });
+ const events: unknown[] = [];
+ await readSSEEvents(stream, (e) => events.push(e.data));
+ expect(events).toEqual([{ text: "😀" }]);
+ });
+
+ it("releases the reader lock even when a handler throws", async () => {
+ const stream = streamOf(["event: a\ndata: 1\n\n"]);
+ await expect(
+ readSSEEvents(stream, () => {
+ throw new Error("boom");
+ }),
+ ).rejects.toThrow("boom");
+ // Lock released by the finally → a new reader can be acquired.
+ expect(() => stream.getReader()).not.toThrow();
+ });
+
+ it("cancels the underlying stream when a handler throws", async () => {
+ const cancel = vi.fn();
+ const stream = new ReadableStream({
+ start(c) {
+ c.enqueue(new TextEncoder().encode("event: a\ndata: 1\n\n"));
+ },
+ cancel,
+ });
+ await expect(
+ readSSEEvents(stream, () => {
+ throw new Error("boom");
+ }),
+ ).rejects.toThrow("boom");
+ // A failed turn must not leave the response open and buffering.
+ expect(cancel).toHaveBeenCalled();
+ });
+});
diff --git a/src/assistant/sse.ts b/src/assistant/sse.ts
new file mode 100644
index 0000000..d3bbbbe
--- /dev/null
+++ b/src/assistant/sse.ts
@@ -0,0 +1,139 @@
+/**
+ * POST-based SSE reading for the assistant chat stream. The browser
+ * `EventSource` is GET-only and can't send a request body, so the stream is read
+ * off a `fetch` POST response.
+ *
+ * The framing parser is vendored here (rather than depending on an internal SDK)
+ * so this module stays self-contained and consumable by any host. It mirrors the
+ * standard SSE wire format: events separated by a blank line, `event:` / `data:`
+ * fields, `:`-prefixed comments ignored, multi-line `data:` joined with `\n`, and
+ * each event's data JSON-parsed (falling back to the raw string).
+ */
+
+export interface ParsedSSEEvent {
+ data: T;
+ rawData: string;
+ eventId?: string;
+ eventType?: string;
+}
+
+/**
+ * Incremental SSE parser. Feed decoded string chunks via `push()`; call
+ * `flush()` once the stream closes to emit any final buffered event.
+ */
+export class SSEChunkParser {
+ private buffer = "";
+ private current: { id?: string; event?: string; data?: string } = {};
+
+ push(chunk: string): ParsedSSEEvent[] {
+ this.buffer += chunk;
+ const lines = this.buffer.split("\n");
+ // The last element is a (possibly empty) partial line; hold it for the next
+ // chunk so an event split across reads isn't parsed half-formed.
+ this.buffer = lines.pop() ?? "";
+ return this.processLines(lines);
+ }
+
+ flush(): ParsedSSEEvent[] {
+ const lines = this.buffer ? [this.buffer] : [];
+ this.buffer = "";
+ const events = this.processLines(lines);
+ const finalEvent = this.parseCurrent();
+ if (finalEvent) {
+ events.push(finalEvent);
+ this.current = {};
+ }
+ return events;
+ }
+
+ private processLines(lines: string[]): ParsedSSEEvent[] {
+ const events: ParsedSSEEvent[] = [];
+ for (const rawLine of lines) {
+ // Tolerate CRLF framing: strip a trailing CR the "\n" split left behind.
+ const line = rawLine.endsWith("\r") ? rawLine.slice(0, -1) : rawLine;
+
+ if (line.startsWith(":")) continue; // comment / keepalive
+
+ if (line === "") {
+ const parsed = this.parseCurrent();
+ if (parsed) events.push(parsed);
+ this.current = {};
+ continue;
+ }
+
+ if (line.startsWith("id:")) {
+ this.current.id = line.slice(3).trim();
+ } else if (line.startsWith("event:")) {
+ this.current.event = line.slice(6).trim();
+ } else if (line.startsWith("data:")) {
+ let value = line.slice(5);
+ if (value.startsWith(" ")) value = value.slice(1);
+ this.current.data =
+ this.current.data !== undefined
+ ? `${this.current.data}\n${value}`
+ : value;
+ }
+ }
+ return events;
+ }
+
+ private parseCurrent(): ParsedSSEEvent | null {
+ if (this.current.data === undefined) return null;
+ const rawData = this.current.data.trim();
+ if (!rawData) return null;
+ let data: T;
+ try {
+ data = JSON.parse(rawData) as T;
+ } catch {
+ // A non-JSON payload is surfaced as the raw string; the caller's typed
+ // mapper drops anything that isn't a well-formed object.
+ data = rawData as unknown as T;
+ }
+ return {
+ data,
+ rawData,
+ eventId: this.current.id,
+ eventType: this.current.event,
+ };
+ }
+}
+
+/**
+ * Read a fetch `Response` body and invoke `onEvent` for each parsed SSE event
+ * (its `eventType` plus JSON-parsed `data`), in wire order. Resolves when the
+ * stream closes. The caller owns abort (via the fetch `signal`).
+ */
+export async function readSSEEvents(
+ body: ReadableStream,
+ onEvent: (event: ParsedSSEEvent) => void,
+): Promise {
+ const reader = body.getReader();
+ const decoder = new TextDecoder();
+ const parser = new SSEChunkParser();
+ // releaseLock in finally so an abort (read rejects) or a throwing consumer
+ // can't leave the reader lock held on the stream.
+ try {
+ while (true) {
+ const { done, value } = await reader.read();
+ if (done) break;
+ for (const event of parser.push(decoder.decode(value, { stream: true }))) {
+ onEvent(event);
+ }
+ }
+ // Flush any bytes the streaming decoder held back (a multi-byte character
+ // split across the final chunk), then any event still buffered in the parser.
+ const tail = decoder.decode();
+ if (tail) {
+ for (const event of parser.push(tail)) onEvent(event);
+ }
+ for (const event of parser.flush()) onEvent(event);
+ } catch (err) {
+ // Cancel the underlying stream so a read error or a throwing handler doesn't
+ // leave the response open and buffering for the rest of the session. Swallow
+ // a cancel failure so it can't mask the original error.
+ await reader.cancel(err).catch(() => {});
+ throw err;
+ } finally {
+ reader.releaseLock();
+ }
+}
diff --git a/src/assistant/time-ago.ts b/src/assistant/time-ago.ts
new file mode 100644
index 0000000..df26e3f
--- /dev/null
+++ b/src/assistant/time-ago.ts
@@ -0,0 +1,11 @@
+/** Relative time label from an epoch-ms timestamp ("just now", "5m ago", "3h
+ * ago"). Self-contained so the assistant subpath carries no design-system dep. */
+export function timeAgo(ts: number): string {
+ const secs = Math.floor((Date.now() - ts) / 1000)
+ if (secs < 5) return "just now"
+ if (secs < 60) return `${secs}s ago`
+ const mins = Math.floor(secs / 60)
+ if (mins < 60) return `${mins}m ago`
+ const hrs = Math.floor(mins / 60)
+ return `${hrs}h ago`
+}
diff --git a/src/assistant/transcript.tsx b/src/assistant/transcript.tsx
new file mode 100644
index 0000000..96312c1
--- /dev/null
+++ b/src/assistant/transcript.tsx
@@ -0,0 +1,304 @@
+/**
+ * The assistant's default transcript renderer, built on web-react's
+ * `ChatMessages`. The reducer streams a FLAT, per-segment transcript (user /
+ * assistant / `tool` chip / `status` messages, plus turn-level reasoning and
+ * pending proposals); `adaptTranscript` collapses each turn into one assistant
+ * message whose ordered `segments` carry that turn's text runs and tool chips in
+ * emission order, so `ChatMessages` renders them interleaved (text → tool →
+ * text) rather than as one text blob followed by a tool group.
+ *
+ * A host can swap this whole renderer via `AssistantPanelProps.renderTranscript`;
+ * the markdown renderer and per-tool detail renderers are injected so this
+ * subpath stays free of any product-specific markdown/tool dependency.
+ */
+
+import { useCallback, useMemo, type ReactNode } from "react";
+import {
+ ChatMessages,
+ type ChatMessageSegment,
+ type ChatUiMessage,
+ type ToolDetailRenderers,
+} from "../web-react";
+import type { AssistantState } from "./reducer";
+import type {
+ AssistantTranscriptView,
+ PendingProposal,
+ ToolOutcome,
+} from "./types";
+
+/**
+ * True while a turn is streaming but the model hasn't emitted its first answer
+ * token yet — drives the "thinking" affordance so a reasoning gap reads as
+ * working, not a frozen panel.
+ */
+export function assistantIsThinking(state: AssistantState): boolean {
+ if (state.status !== "streaming") return false;
+ const streaming = state.streamingId
+ ? state.messages.find((m) => m.id === state.streamingId)
+ : undefined;
+ // Thinking until the open assistant bubble receives text (a tool_call closes
+ // the bubble, so a running tool also reads as no-open-bubble = still working).
+ return !streaming || streaming.text === "";
+}
+
+type ToolStatus = Extract["call"]["status"];
+
+const TOOL_STATUS: Record = {
+ running: "running",
+ ok: "done",
+ failed: "error",
+};
+
+export interface AdaptedTranscript {
+ messages: ChatUiMessage[];
+ /** The assistant message under which pending proposals should render, or null
+ * when there are none. */
+ proposalHostId: string | null;
+ /** The current/most-recent turn's assistant message — where the turn cost line
+ * renders (it carries the turn's metrics), or null when there is none. */
+ metricsHostId: string | null;
+}
+
+/**
+ * Reshape a `ToolOutcome` into what web-react's tool-detail card reads. A success
+ * (`{ ok: true, result }`) already matches. A failure keeps its error under
+ * `outcome.error`, but web-react reads a top-level `outcome.message`/`code` — so
+ * flatten it, else an expanded failed tool card shows a generic "Tool failed"
+ * instead of the real server error.
+ */
+function adaptToolResult(outcome: ToolOutcome): unknown {
+ if (outcome.ok) return { ok: true, result: outcome.result };
+ return { ok: false, message: outcome.error?.message, code: outcome.error?.code };
+}
+
+/** An assistant turn message with `segments` guaranteed present, so the fold can
+ * push to it directly. Every turn message is created by `openTurn`. */
+type TurnMessage = ChatUiMessage & { segments: ChatMessageSegment[] };
+
+/**
+ * Fold the transcript view into web-react `ChatUiMessage[]`: each user message is
+ * 1:1; the assistant/`tool`/`status` messages between two user turns collapse
+ * into one assistant message whose ordered `segments` carry the turn's text runs
+ * and tool chips IN EMISSION ORDER (with each finished tool's outcome as the chip
+ * `result`). The joined text is also kept on `content` — web-react reads it as the
+ * "answer has started" signal that gates the reasoning box. The live turn's
+ * reasoning preview and model label hang on the last assistant message, and
+ * `proposalHostId` names the message the pending proposals render under.
+ */
+export function adaptTranscript(view: AssistantTranscriptView): AdaptedTranscript {
+ const messages: ChatUiMessage[] = [];
+ let turn: TurnMessage | null = null;
+ // The assistant message of the CURRENT turn — the one opened since the most
+ // recent user message — or null when the live turn has produced no assistant
+ // segment yet. Reset on each user message so the live turn's reasoning, model
+ // label, and pending proposal can never attach to a previous turn's bubble.
+ let currentTurnAssistant: TurnMessage | null = null;
+
+ const openTurn = (id: string): TurnMessage => {
+ const message: TurnMessage = { id, role: "assistant", content: "", segments: [] };
+ messages.push(message);
+ turn = message;
+ currentTurnAssistant = message;
+ return message;
+ };
+
+ // Append a text run to both the ordered segments (the rendered, interleaved
+ // body) and the joined `content` (which gates the reasoning box). Kept in
+ // lockstep so the two never disagree.
+ const appendText = (message: TurnMessage, text: string) => {
+ if (!text.trim()) return;
+ message.segments.push({ kind: "text", content: text });
+ message.content = message.content ? `${message.content}\n\n${text}` : text;
+ };
+
+ for (const msg of view.messages) {
+ if (msg.role === "user") {
+ messages.push({ id: msg.id, role: "user", content: msg.text });
+ turn = null;
+ currentTurnAssistant = null;
+ } else if (msg.role === "assistant") {
+ const active = turn ?? openTurn(msg.id);
+ appendText(active, msg.text);
+ currentTurnAssistant = active;
+ } else if (msg.role === "tool") {
+ // A tool row exists only to carry its activity chip; with no tool metadata
+ // there is nothing to render, so skip it rather than open a phantom bubble.
+ if (!msg.tool) continue;
+ // When the tool opens the turn (no preamble text), the synthesized
+ // assistant bubble needs an id distinct from the tool chip's (which reuses
+ // `msg.id`), or the two would collide.
+ const active = turn ?? openTurn(`turn-${msg.id}`);
+ currentTurnAssistant = active;
+ active.segments.push({
+ kind: "tool",
+ call: {
+ id: msg.id,
+ name: msg.tool.name,
+ // An unmapped status resolves to "error", not "running": a stuck
+ // spinner would hide a finished or failed tool.
+ status: TOOL_STATUS[msg.tool.status] ?? "error",
+ ...(msg.tool.args ? { args: msg.tool.args } : {}),
+ ...(msg.tool.outcome ? { result: adaptToolResult(msg.tool.outcome) } : {}),
+ },
+ });
+ } else {
+ // `status` — an informational system note that ends the assistant turn.
+ messages.push({ id: msg.id, role: "system", content: msg.text });
+ turn = null;
+ }
+ }
+
+ let proposalHostId: string | null = null;
+ if (view.pendingProposals.length > 0) {
+ // A propose-only turn may carry no assistant segment yet — synthesize a host
+ // in the current turn so the proposal card still has somewhere to render.
+ if (!currentTurnAssistant) {
+ currentTurnAssistant = openTurn(
+ `proposal-host-${view.pendingProposals[0]!.callId}`,
+ );
+ }
+ proposalHostId = currentTurnAssistant.id;
+ }
+
+ // Live reasoning + model label + settled metrics belong to the current turn's
+ // assistant bubble (including a host synthesized just above for a propose-only
+ // turn, so a turn that only reasons then proposes still shows its thinking).
+ if (currentTurnAssistant) {
+ if (view.reasoning) currentTurnAssistant.reasoning = view.reasoning;
+ if (view.model) currentTurnAssistant.modelUsed = view.model;
+ if (view.usage) {
+ if (view.usage.completionTokens != null)
+ currentTurnAssistant.completionTokens = view.usage.completionTokens;
+ if (view.usage.promptTokens != null)
+ currentTurnAssistant.promptTokens = view.usage.promptTokens;
+ if (view.usage.durationMs != null)
+ currentTurnAssistant.durationMs = view.usage.durationMs;
+ }
+ }
+
+ // A turn that produced no body and had nothing turn-level hung on it renders as
+ // a bare "Assistant" header. That state is the at-send frame before the first
+ // delta; drop it so an empty turn never flashes a blank bubble. The proposal
+ // host is exempt: it intentionally carries the pending proposal card.
+ const isEmptyShell = (m: ChatUiMessage): boolean =>
+ m.role === "assistant" &&
+ m.content === "" &&
+ (m.segments?.length ?? 0) === 0 &&
+ m.reasoning == null &&
+ m.modelUsed == null &&
+ m.completionTokens == null &&
+ m.promptTokens == null &&
+ m.durationMs == null &&
+ m.id !== proposalHostId;
+
+ return {
+ messages: messages.filter((m) => !isEmptyShell(m)),
+ proposalHostId,
+ metricsHostId:
+ currentTurnAssistant && !isEmptyShell(currentTurnAssistant)
+ ? currentTurnAssistant.id
+ : null,
+ };
+}
+
+/** Sub-cent turn costs need more precision than dollars-and-cents. */
+function formatTurnCost(costUsd: number): string {
+ return costUsd < 0.01 ? `$${costUsd.toFixed(4)}` : `$${costUsd.toFixed(2)}`;
+}
+
+/** A named component (rather than calling `render()` inline in a map) gives React
+ * a stable, keyed element per proposal so cards reconcile instead of remount. */
+function ProposalSlot({
+ proposal,
+ render,
+}: {
+ proposal: PendingProposal;
+ render: (proposal: PendingProposal) => ReactNode;
+}) {
+ return <>{render(proposal)}>;
+}
+
+export interface AssistantTranscriptProps {
+ view: AssistantTranscriptView;
+ /** Markdown renderer for assistant content; defaults to plain pre-wrapped text. */
+ renderMarkdown?: (content: string) => ReactNode;
+ /** Per-tool custom detail renderers for expanded tool cards. */
+ toolRenderers?: ToolDetailRenderers;
+ /** Zero-state shown for a fresh, non-streaming thread. */
+ emptyState?: ReactNode;
+}
+
+/**
+ * Render the assistant conversation with web-react's `ChatMessages`. Pending
+ * proposals render via the panel's bound `view.renderProposal`, placed inline
+ * after the proposing turn through `renderExtras`; the settled turn cost renders
+ * once under its assistant bubble.
+ */
+export function AssistantTranscript({
+ view,
+ renderMarkdown,
+ toolRenderers,
+ emptyState,
+}: AssistantTranscriptProps) {
+ const { messages, proposalHostId, metricsHostId } = useMemo(
+ () => adaptTranscript(view),
+ [view],
+ );
+
+ // Stable identity: web-react memoizes its per-message markdown parse on the
+ // `renderMarkdown` reference, so a fresh closure each render (the `view` object
+ // changes every stream tick) would re-parse every message on every token.
+ const markdown = useCallback(
+ (content: string) => (renderMarkdown ? renderMarkdown(content) : content),
+ [renderMarkdown],
+ );
+
+ if (messages.length === 0 && !view.isStreaming) {
+ return <>{emptyState}>;
+ }
+
+ return (
+ <>{emptyState}>}
+ renderExtras={(message) => {
+ const proposals =
+ message.id === proposalHostId && view.pendingProposals.length > 0 ? (
+
+ ) : null;
+ // The settled turn's at-cost figure, shown once under its assistant
+ // bubble. Hidden while streaming and for a replayed (uncharged) turn.
+ const cost =
+ message.id === metricsHostId &&
+ !view.isStreaming &&
+ view.usage?.costUsd != null &&
+ !view.usage.replayed ? (
+
+ {formatTurnCost(view.usage.costUsd)} this turn
+
+ ) : null;
+ if (!proposals && !cost) return null;
+ return (
+ <>
+ {proposals}
+ {cost}
+ >
+ );
+ }}
+ />
+ );
+}
diff --git a/src/assistant/types.ts b/src/assistant/types.ts
new file mode 100644
index 0000000..80cfb13
--- /dev/null
+++ b/src/assistant/types.ts
@@ -0,0 +1,220 @@
+/**
+ * Wire + UI types for the in-app assistant panel. The wire shapes mirror the
+ * SSE contract emitted by `POST /v1/assistant/chat` (platform-api
+ * `routes/assistant.ts`). Drift here only mis-renders the client; the server
+ * is the source of truth for what it accepts and emits.
+ */
+
+import type { ReactNode } from "react";
+
+/** Request body for `POST /api/v1/assistant/chat`. */
+export interface ChatRequest {
+ message: string;
+ /** Model slug to run this turn against; omit to use the server default. */
+ model?: string;
+ /** Omit to start a new thread; pass to continue an existing one. */
+ threadId?: string;
+ /** Per-turn idempotency key — guards against double-charge on retry. */
+ turnKey?: string;
+}
+
+// --- Server SSE event payloads (one per `event:` name) ----------------------
+
+export interface ThreadEventData {
+ threadId: string;
+ turnId: string;
+ /** The model slug this turn ran against, when the server reports it. */
+ model?: string | null;
+}
+
+export interface DeltaEventData {
+ text: string;
+}
+
+/** A chunk of the model's reasoning/thinking, streamed BEFORE the answer for
+ * reasoning models. Surfaced as a dim "thinking" block so a long reasoning gap
+ * doesn't read as a frozen panel. Absent for non-reasoning models. */
+export interface ReasoningEventData {
+ text: string;
+}
+
+/** Emitted when a read-only tool STARTS running, before its result. Lets the
+ * panel show live "running …" progress instead of a silent gap. */
+export interface ToolCallEventData {
+ callId: string;
+ name: string;
+ /** The parsed arguments the agent invoked the tool with. Lets a renderer show
+ * exactly what was called. Omitted by servers predating the field. */
+ args?: Record;
+}
+
+export interface ToolResultEventData {
+ callId: string;
+ name: string;
+ ok: boolean;
+ output?: unknown;
+ error?: { code: string; message: string };
+}
+
+/** What kind of connection a requirement names — drives the card's label and
+ * connect target. "integration" is an OAuth/api-key connection on the
+ * integrations page; "github_app" is the GitHub App installed on the repo (the
+ * event source for a GitHub trigger), connected via the App install flow. */
+export type ConnectionRequirementKind = "integration" | "github_app";
+
+/** A connection a proposed workflow references, and whether the user has it
+ * connected right now. Surfaced on an authoring proposal so the card can show
+ * what must be connected — and WHERE — before the workflow can be created. */
+export interface ConnectionRequirement {
+ provider: string;
+ connected: boolean;
+ /** What must be connected. Absent on proposals predating the field — treated
+ * as "integration" by the card. */
+ kind?: ConnectionRequirementKind;
+ /** Where the user connects this requirement, supplied by the server (the
+ * GitHub App install URL is deploy config the client can't derive). Null when
+ * there's no link to offer (e.g. a github_app requirement on a deploy with no
+ * app slug); the card then shows the requirement without a connect link. */
+ connectUrl?: string | null;
+}
+
+export interface ToolProposalEventData {
+ /** Null only if the server has no proposal store wired (tools then unusable). */
+ proposalId: string | null;
+ callId: string;
+ name: string;
+ args: unknown;
+ /** Present on a workflow-authoring proposal: the integrations it references
+ * and their current connection status. Omitted for non-authoring tools. */
+ requirements?: ConnectionRequirement[];
+}
+
+export interface UsageEventData {
+ promptTokens: number | null;
+ completionTokens: number | null;
+ costUsd: number | null;
+ balanceUsd: number | null;
+ /** Wall-clock duration of the turn in milliseconds, when the server measures
+ * it. Drives the renderer's tokens/sec figure. Omitted by older servers. */
+ durationMs?: number | null;
+ /** True when a completed turn was replayed from storage (no charge). */
+ replayed?: boolean;
+}
+
+export interface DoneEventData {
+ turnId: string;
+ status: string;
+ /** True when a mutating tool was proposed and is awaiting confirmation. */
+ proposed?: boolean;
+ /** True when the agentic loop hit its tool-round cap. */
+ capped?: boolean;
+}
+
+export interface ErrorEventData {
+ code: string;
+ message: string;
+}
+
+/** Discriminated union the stream reader hands to the reducer. */
+export type AssistantStreamEvent =
+ | { type: "thread"; data: ThreadEventData }
+ | { type: "delta"; data: DeltaEventData }
+ | { type: "reasoning"; data: ReasoningEventData }
+ | { type: "tool_call"; data: ToolCallEventData }
+ | { type: "tool_result"; data: ToolResultEventData }
+ | { type: "tool_proposal"; data: ToolProposalEventData }
+ | { type: "usage"; data: UsageEventData }
+ | { type: "done"; data: DoneEventData }
+ | { type: "error"; data: ErrorEventData };
+
+// --- UI model ---------------------------------------------------------------
+
+/** A `tool` message is the inline activity chip for a read-only tool the agent
+ * ran during a turn (e.g. "Validating workflow… ✓"). */
+export type ChatRole = "user" | "assistant" | "status" | "tool";
+
+/** Live status of a tool-activity chip. */
+export type ToolActivityStatus = "running" | "ok" | "failed";
+
+/** The outcome of a finished read-only tool, retained on the chip so a renderer
+ * can show the result body (not just the name + status). Mirrors the
+ * `tool_result` event: a success carries the tool's `result`; a failure carries
+ * the error. Absent while the tool is still running. */
+export type ToolOutcome =
+ | { ok: true; result?: unknown }
+ | { ok: false; error?: { code: string; message: string } };
+
+export interface ChatMessage {
+ id: string;
+ role: ChatRole;
+ text: string;
+ /** Present only on `tool` messages — the activity's tool name, the arguments it
+ * was called with, its state, and (once finished) its outcome. */
+ tool?: {
+ name: string;
+ status: ToolActivityStatus;
+ args?: Record;
+ outcome?: ToolOutcome;
+ };
+}
+
+/** A mutating action the assistant proposed, awaiting the user's confirmation. */
+export interface PendingProposal {
+ proposalId: string | null;
+ callId: string;
+ name: string;
+ args: unknown;
+ /** Integration requirements for a workflow-authoring proposal (each provider
+ * + whether it's connected); omitted for non-authoring proposals. */
+ requirements?: ConnectionRequirement[];
+ /** Inline error shown ON the card after a RETRYABLE confirm failure (an
+ * unconnected integration): the card stays so the user can connect and
+ * confirm again. Null/absent when the card has not failed a retry. */
+ retryError?: string | null;
+}
+
+export interface UsageInfo {
+ costUsd: number | null;
+ balanceUsd: number | null;
+ /** Token counts + wall-clock duration for the settled turn, when the server
+ * reports them — drive per-message tokens/sec + cost in a renderer. Optional
+ * so a consumer constructing this on the prior `{ costUsd, balanceUsd,
+ * replayed }` shape stays valid; a renderer treats a missing value as null. */
+ promptTokens?: number | null;
+ completionTokens?: number | null;
+ durationMs?: number | null;
+ replayed: boolean;
+}
+
+/**
+ * The transcript slice handed to a host-supplied `renderTranscript` (see
+ * {@link AssistantPanelProps}). It lets a host swap ONLY the conversation
+ * rendering — to use its own message renderer — while the panel keeps owning the
+ * dock chrome, composer, model picker, history, transport, and proposal
+ * orchestration. The bound `renderProposal` returns the panel's own proposal
+ * card so the host can place it (e.g. inline after the proposing turn) without
+ * re-implementing the confirm/cancel flow.
+ */
+export interface AssistantTranscriptView {
+ messages: ChatMessage[];
+ /** The current turn's reasoning/thinking text, if any (streamed before the
+ * answer for reasoning models). */
+ reasoning: string | null;
+ /** Id of the assistant message currently accumulating deltas, if any. */
+ streamingId: string | null;
+ /** Model slug the current/most-recent turn ran against, or null. */
+ model: string | null;
+ /** True while a turn is streaming. */
+ isStreaming: boolean;
+ /** True while the agent is working but has produced no visible output yet
+ * (drives a "thinking" affordance). */
+ isThinking: boolean;
+ pendingProposals: PendingProposal[];
+ /** Cost/tokens/duration for the most recently settled turn, or null before any
+ * turn settles. Optional so a host predating the field stays a valid consumer;
+ * a renderer treats a missing value the same as null. */
+ usage?: UsageInfo | null;
+ /** The panel's bound proposal card for a pending proposal — render it where the
+ * confirm/cancel UI should appear. */
+ renderProposal: (proposal: PendingProposal) => ReactNode;
+}
diff --git a/src/assistant/useAssistantChat.test.tsx b/src/assistant/useAssistantChat.test.tsx
new file mode 100644
index 0000000..a2c8c91
--- /dev/null
+++ b/src/assistant/useAssistantChat.test.tsx
@@ -0,0 +1,480 @@
+// @vitest-environment jsdom
+import { act, renderHook, waitFor } from "@testing-library/react";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { createAssistantClient } from "./client";
+import { AssistantClientProvider } from "./client-context";
+import { useAssistantChat } from "./useAssistantChat";
+
+// platform-web's vitest defaults to the node environment; this file opts into
+// jsdom (above) so React can render the hook. A minimal localStorage stub keeps
+// the persistence layer working regardless of the jsdom build.
+function installLocalStorage() {
+ const store = new Map();
+ Object.defineProperty(globalThis, "localStorage", {
+ configurable: true,
+ value: {
+ get length() {
+ return store.size;
+ },
+ clear: () => store.clear(),
+ getItem: (k: string) => (store.has(k) ? store.get(k) : null) ?? null,
+ key: (i: number) => Array.from(store.keys())[i] ?? null,
+ removeItem: (k: string) => store.delete(k),
+ setItem: (k: string, v: string) => store.set(k, String(v)),
+ },
+ });
+}
+
+const enc = new TextEncoder();
+
+/** An SSE Response whose body emits the given frames then closes. */
+function sseResponse(frames: string[]): Response {
+ const body = new ReadableStream({
+ start(c) {
+ for (const f of frames) c.enqueue(enc.encode(f));
+ c.close();
+ },
+ });
+ return { ok: true, body } as unknown as Response;
+}
+
+/** A Response whose SSE body is pushed/closed by the test, to model events that
+ * arrive after a user switch or reset. */
+function controllableSse() {
+ let controller!: ReadableStreamDefaultController;
+ const body = new ReadableStream({
+ start(c) {
+ controller = c;
+ },
+ });
+ return {
+ response: { ok: true, body } as unknown as Response,
+ push: (frame: string) => controller.enqueue(enc.encode(frame)),
+ close: () => controller.close(),
+ };
+}
+
+function jsonResponse(value: unknown): Response {
+ return {
+ ok: true,
+ json: async () => value,
+ } as unknown as Response;
+}
+
+const DONE = 'event: done\ndata: {"turnId":"R","status":"completed"}\n\n';
+
+// The hook reads its transport from context; one real same-origin client running
+// against the per-test stubbed `fetch` exercises the streaming path exactly as in
+// production. A stable identity keeps the hook's effects from re-running.
+const testClient = createAssistantClient({ baseUrl: "/api/v1/assistant" });
+const wrapper = ({ children }: { children: React.ReactNode }) => (
+
+ {children}
+
+);
+
+beforeEach(() => {
+ installLocalStorage();
+});
+afterEach(() => {
+ vi.unstubAllGlobals();
+ localStorage.clear();
+});
+
+describe("useAssistantChat", () => {
+ it("issues only one chat request for two same-tick sends", () => {
+ const fetchMock = vi.fn().mockResolvedValue(sseResponse([DONE]));
+ vi.stubGlobal("fetch", fetchMock);
+
+ const { result } = renderHook(() => useAssistantChat("userA"), { wrapper });
+ act(() => {
+ result.current.send("first");
+ result.current.send("second");
+ });
+
+ expect(fetchMock).toHaveBeenCalledTimes(1);
+ });
+
+ it("blocks a new send while a proposal is awaiting confirmation", async () => {
+ const fetchMock = vi
+ .fn()
+ .mockResolvedValue(
+ sseResponse([
+ 'event: tool_proposal\ndata: {"proposalId":"p1","callId":"c1","name":"create_workflow","args":{"yaml":"name: x"}}\n\n',
+ 'event: done\ndata: {"turnId":"R","status":"completed","proposed":true}\n\n',
+ ]),
+ );
+ vi.stubGlobal("fetch", fetchMock);
+
+ const { result } = renderHook(() => useAssistantChat("userA"), { wrapper });
+ act(() => {
+ result.current.send("make a workflow");
+ });
+ await waitFor(() =>
+ expect(result.current.state.status).toBe("awaiting_confirm"),
+ );
+ expect(result.current.state.pendingProposals).toHaveLength(1);
+
+ act(() => {
+ result.current.send("another message");
+ });
+ // Still one chat request — the second send was refused.
+ expect(fetchMock).toHaveBeenCalledTimes(1);
+ });
+
+ it("executes a proposal only once when confirm is double-clicked", async () => {
+ const executeCalls: string[] = [];
+ const fetchMock = vi.fn((url: string) => {
+ if (url.includes("/tools/execute")) {
+ executeCalls.push(url);
+ return Promise.resolve(
+ jsonResponse({ success: true, output: { created: true } }),
+ );
+ }
+ return Promise.resolve(
+ sseResponse([
+ 'event: tool_proposal\ndata: {"proposalId":"p1","callId":"c1","name":"create_workflow","args":{"yaml":"name: x"}}\n\n',
+ 'event: done\ndata: {"turnId":"R","status":"completed","proposed":true}\n\n',
+ ]),
+ );
+ });
+ vi.stubGlobal("fetch", fetchMock);
+
+ const { result } = renderHook(() => useAssistantChat("userA"), { wrapper });
+ act(() => {
+ result.current.send("make a workflow");
+ });
+ await waitFor(() =>
+ expect(result.current.state.pendingProposals).toHaveLength(1),
+ );
+ const proposal = result.current.state.pendingProposals[0]!;
+
+ await act(async () => {
+ void result.current.confirm(proposal);
+ void result.current.confirm(proposal);
+ });
+
+ expect(executeCalls).toHaveLength(1);
+ });
+
+ it("drops a prior user's late stream events after a user switch", async () => {
+ const a = controllableSse();
+ const fetchMock = vi.fn().mockResolvedValue(a.response);
+ vi.stubGlobal("fetch", fetchMock);
+
+ const { result, rerender } = renderHook(
+ ({ uid }) => useAssistantChat(uid),
+ { initialProps: { uid: "userA" }, wrapper },
+ );
+ act(() => {
+ result.current.send("hello from A");
+ });
+ await act(async () => {
+ a.push('event: thread\ndata: {"threadId":"thread-A","turnId":"R"}\n\n');
+ });
+ await waitFor(() => expect(result.current.state.threadId).toBe("thread-A"));
+
+ // Switch to user B: the conversation should hydrate empty for B.
+ act(() => {
+ rerender({ uid: "userB" });
+ });
+ await waitFor(() => expect(result.current.state.ownerId).toBe("userB"));
+
+ // A late event from user A's stream must not land in B's conversation.
+ await act(async () => {
+ a.push('event: delta\ndata: {"text":"LEAKED-FROM-A"}\n\n');
+ a.close();
+ });
+
+ expect(result.current.state.threadId).toBeNull();
+ expect(
+ result.current.state.messages.some((m) => m.text.includes("LEAKED")),
+ ).toBe(false);
+ });
+
+ it("invalidates late stream events after reset", async () => {
+ const s = controllableSse();
+ const fetchMock = vi.fn().mockResolvedValue(s.response);
+ vi.stubGlobal("fetch", fetchMock);
+
+ const { result } = renderHook(() => useAssistantChat("userA"), { wrapper });
+ act(() => {
+ result.current.send("hello");
+ });
+ await act(async () => {
+ s.push('event: thread\ndata: {"threadId":"thread-1","turnId":"R"}\n\n');
+ });
+ await waitFor(() => expect(result.current.state.threadId).toBe("thread-1"));
+
+ act(() => {
+ result.current.reset();
+ });
+ await waitFor(() => expect(result.current.state.threadId).toBeNull());
+
+ await act(async () => {
+ s.push('event: delta\ndata: {"text":"AFTER-RESET"}\n\n');
+ s.close();
+ });
+
+ expect(
+ result.current.state.messages.some((m) => m.text.includes("AFTER-RESET")),
+ ).toBe(false);
+ });
+
+ it("restores persisted thread history on mount", async () => {
+ localStorage.setItem(
+ "assistant:v1:userA",
+ JSON.stringify({ threadId: "T_restore", model: null }),
+ );
+ const fetchMock = vi.fn((url: string) => {
+ if (url.includes("/threads/") && url.includes("/messages")) {
+ return Promise.resolve(
+ jsonResponse({
+ success: true,
+ threadId: "T_restore",
+ messages: [
+ { id: "h1", role: "user", text: "earlier question" },
+ { id: "h2", role: "assistant", text: "earlier answer" },
+ ],
+ }),
+ );
+ }
+ return Promise.resolve(sseResponse([DONE]));
+ });
+ vi.stubGlobal("fetch", fetchMock);
+
+ const { result } = renderHook(() => useAssistantChat("userA"), { wrapper });
+ await waitFor(() => expect(result.current.state.messages).toHaveLength(2));
+ expect(result.current.state.messages.map((m) => m.text)).toEqual([
+ "earlier question",
+ "earlier answer",
+ ]);
+ expect(result.current.state.threadId).toBe("T_restore");
+ });
+
+ it("clears a persisted thread id when history restore 404s (thread gone)", async () => {
+ localStorage.setItem(
+ "assistant:v1:userA",
+ JSON.stringify({ threadId: "T_dead", model: null }),
+ );
+ const fetchMock = vi.fn((url: string) => {
+ if (url.includes("/threads/") && url.includes("/messages")) {
+ return Promise.resolve({
+ ok: false,
+ status: 404,
+ json: async () => ({ error: { code: "THREAD_NOT_FOUND" } }),
+ } as Response);
+ }
+ return Promise.resolve(sseResponse([DONE]));
+ });
+ vi.stubGlobal("fetch", fetchMock);
+
+ const { result } = renderHook(() => useAssistantChat("userA"), { wrapper });
+ // The dead thread is dropped from state and from storage, so the next send
+ // starts fresh instead of 404-ing forever.
+ await waitFor(() => expect(result.current.state.threadId).toBeNull());
+ expect(
+ JSON.parse(localStorage.getItem("assistant:v1:userA") ?? "{}").threadId,
+ ).toBeNull();
+ });
+
+ it("persists the selected model per user and sends it on the next turn", async () => {
+ const fetchMock = vi.fn().mockResolvedValue(sseResponse([DONE]));
+ vi.stubGlobal("fetch", fetchMock);
+
+ const { result } = renderHook(() => useAssistantChat("userA"), { wrapper });
+ act(() => {
+ result.current.setModel("anthropic/picked");
+ });
+ expect(result.current.selectedModel).toBe("anthropic/picked");
+ expect(
+ JSON.parse(localStorage.getItem("assistant:v1:userA") ?? "{}").model,
+ ).toBe("anthropic/picked");
+
+ act(() => {
+ result.current.send("hi");
+ });
+ const chatCall = fetchMock.mock.calls.find((c) =>
+ String(c[0]).includes("/assistant/chat"),
+ );
+ expect(chatCall).toBeTruthy();
+ const body = JSON.parse((chatCall?.[1] as { body: string }).body) as {
+ model?: string;
+ };
+ expect(body.model).toBe("anthropic/picked");
+ });
+
+ it("switchThread opens a past thread and loads its transcript", async () => {
+ const fetchMock = vi.fn((url: string) => {
+ if (url.includes("/threads/") && url.includes("/messages")) {
+ return Promise.resolve(
+ jsonResponse({
+ success: true,
+ threadId: "T_old",
+ messages: [
+ { id: "o1", role: "user", text: "old question" },
+ { id: "o2", role: "assistant", text: "old answer" },
+ ],
+ }),
+ );
+ }
+ return Promise.resolve(sseResponse([DONE]));
+ });
+ vi.stubGlobal("fetch", fetchMock);
+
+ // Start with no active thread, then open one from history.
+ const { result } = renderHook(() => useAssistantChat("userA"), { wrapper });
+ act(() => {
+ result.current.switchThread("T_old");
+ });
+ await waitFor(() => expect(result.current.state.threadId).toBe("T_old"));
+ await waitFor(() => expect(result.current.state.messages).toHaveLength(2));
+ expect(result.current.state.messages.map((m) => m.text)).toEqual([
+ "old question",
+ "old answer",
+ ]);
+ // The newly-opened thread becomes the persisted active thread.
+ expect(
+ JSON.parse(localStorage.getItem("assistant:v1:userA") ?? "{}").threadId,
+ ).toBe("T_old");
+ });
+
+ it("refuses switchThread while a turn is streaming", async () => {
+ const s = controllableSse();
+ const fetchMock = vi.fn().mockResolvedValue(s.response);
+ vi.stubGlobal("fetch", fetchMock);
+
+ const { result } = renderHook(() => useAssistantChat("userA"), { wrapper });
+ act(() => {
+ result.current.send("hi");
+ });
+ await act(async () => {
+ s.push(
+ 'event: thread\ndata: {"threadId":"thread-live","turnId":"R"}\n\n',
+ );
+ });
+ await waitFor(() => expect(result.current.state.status).toBe("streaming"));
+
+ // Switching mid-stream would abandon the live turn — it must be refused.
+ act(() => {
+ result.current.switchThread("T_other");
+ });
+ expect(result.current.state.threadId).toBe("thread-live");
+ expect(
+ fetchMock.mock.calls.some((c) => String(c[0]).includes("T_other")),
+ ).toBe(false);
+
+ await act(async () => {
+ s.close();
+ });
+ });
+
+ it("holds the composer closed (restoring) until a switched transcript loads", async () => {
+ let resolveHistory!: () => void;
+ const gate = new Promise((r) => {
+ resolveHistory = r;
+ });
+ const fetchMock = vi.fn((url: string) => {
+ if (url.includes("/threads/") && url.includes("/messages")) {
+ // Defer the transcript until the test releases it.
+ return gate.then(() =>
+ jsonResponse({
+ success: true,
+ threadId: "T_old",
+ messages: [{ id: "o1", role: "user", text: "old q" }],
+ }),
+ );
+ }
+ return Promise.resolve(sseResponse([DONE]));
+ });
+ vi.stubGlobal("fetch", fetchMock);
+
+ const { result } = renderHook(() => useAssistantChat("userA"), { wrapper });
+ act(() => {
+ result.current.switchThread("T_old");
+ });
+ await waitFor(() => expect(result.current.restoring).toBe(true));
+
+ // A send during restore is refused — no chat request goes out.
+ act(() => {
+ result.current.send("too early");
+ });
+ expect(
+ fetchMock.mock.calls.some((c) =>
+ String(c[0]).includes("/assistant/chat"),
+ ),
+ ).toBe(false);
+
+ // Release the transcript: restoring clears and the messages restore.
+ await act(async () => {
+ resolveHistory();
+ });
+ await waitFor(() => expect(result.current.restoring).toBe(false));
+ expect(result.current.state.messages.map((m) => m.text)).toContain("old q");
+ });
+
+ it("drops the thread and won't run a turn against it when the transcript fails to load", async () => {
+ const fetchMock = vi.fn((url: string, _init?: RequestInit) => {
+ if (url.includes("/threads/") && url.includes("/messages")) {
+ // Failed load → fetchThreadHistory resolves to {status:"error"}.
+ return Promise.resolve({
+ ok: false,
+ status: 500,
+ json: async () => ({}),
+ } as Response);
+ }
+ return Promise.resolve(sseResponse([DONE]));
+ });
+ vi.stubGlobal("fetch", fetchMock);
+
+ const { result } = renderHook(() => useAssistantChat("userA"), { wrapper });
+ act(() => {
+ result.current.switchThread("T_secret");
+ });
+ // The failed load drops the active thread and surfaces an error.
+ await waitFor(() => expect(result.current.state.threadId).toBeNull());
+ await waitFor(() => expect(result.current.restoring).toBe(false));
+ expect(result.current.state.error?.code).toBe("HISTORY_LOAD_FAILED");
+
+ // A send now starts a FRESH thread — it must never target T_secret.
+ act(() => {
+ result.current.send("hello");
+ });
+ await waitFor(() => {
+ const chatCall = fetchMock.mock.calls.find((c) =>
+ String(c[0]).includes("/assistant/chat"),
+ );
+ expect(chatCall).toBeTruthy();
+ const body = JSON.parse((chatCall?.[1] as { body: string }).body) as {
+ threadId?: string;
+ };
+ expect(body.threadId).toBeUndefined();
+ });
+ });
+
+ it("clears the selected model after a MODEL_NOT_ALLOWED rejection", async () => {
+ const fetchMock = vi.fn((url: string) => {
+ if (url.includes("/assistant/chat")) {
+ return Promise.resolve({
+ ok: false,
+ status: 400,
+ json: async () => ({
+ error: { code: "MODEL_NOT_ALLOWED", message: "no" },
+ }),
+ } as Response);
+ }
+ return Promise.resolve(sseResponse([DONE]));
+ });
+ vi.stubGlobal("fetch", fetchMock);
+
+ const { result } = renderHook(() => useAssistantChat("userA"), { wrapper });
+ act(() => {
+ result.current.setModel("anthropic/removed");
+ });
+ act(() => {
+ result.current.send("hi");
+ });
+ // The rejected model is cleared so the next send falls back to the default.
+ await waitFor(() => expect(result.current.selectedModel).toBeNull());
+ });
+});
diff --git a/src/assistant/useAssistantChat.ts b/src/assistant/useAssistantChat.ts
new file mode 100644
index 0000000..85b1d03
--- /dev/null
+++ b/src/assistant/useAssistantChat.ts
@@ -0,0 +1,605 @@
+/**
+ * React binding for the assistant panel: owns the reducer, drives the chat SSE
+ * stream and the proposal-confirmation call, and persists the thread across
+ * reloads. All rendering decisions live in the pure reducer + presentation
+ * helpers; this hook is the glue between them and the network.
+ */
+
+import { useCallback, useEffect, useReducer, useRef, useState } from "react";
+import { useAssistantClient } from "./client-context";
+import { loadThread, saveThread } from "./persistence";
+import { resolveConfirmation } from "./presentation";
+import {
+ type AssistantState,
+ assistantReducer,
+ initialAssistantState,
+ selectVisibleState,
+} from "./reducer";
+import type { ChatMessage, PendingProposal } from "./types";
+
+/** Host integration callbacks for {@link useAssistantChat}. */
+export interface UseAssistantChatOptions {
+ /**
+ * Called after a workflow-mutating tool (`create_workflow`, `author_workflow`,
+ * …) is confirmed successfully — the host re-fetches its workflow list so the
+ * result appears without a manual reload. Replaces the in-app cross-module
+ * signal the platform used.
+ */
+ onWorkflowMutation?: () => void;
+}
+
+const EMPTY_IDS: ReadonlySet = new Set();
+
+/** Confirmed tools whose success changes the caller's workflow set — on success
+ * the Workflows page is signaled to refetch so the result appears without a
+ * manual reload. */
+const WORKFLOW_MUTATING_TOOLS: ReadonlySet = new Set([
+ "create_workflow",
+ "author_workflow",
+ "update_workflow",
+ "set_workflow_enabled",
+]);
+
+function statusMessage(text: string): ChatMessage {
+ return { id: `status-${uuid()}`, role: "status", text };
+}
+
+function uuid(): string {
+ return crypto.randomUUID();
+}
+
+export interface AssistantChat {
+ state: AssistantState;
+ /** Proposal ids whose confirmation is currently in flight (for disabling). */
+ confirmingIds: ReadonlySet;
+ /** The user's selected model slug, or null to use the server default. */
+ selectedModel: string | null;
+ /** Choose the model for subsequent turns (persisted per user). */
+ setModel: (model: string | null) => void;
+ send: (message: string) => void;
+ stop: () => void;
+ confirm: (proposal: PendingProposal) => Promise;
+ cancel: (proposal: PendingProposal) => void;
+ reset: () => void;
+ /** Open an existing thread from history, loading its transcript. */
+ switchThread: (threadId: string) => void;
+ /** True while a switched-to thread's transcript is loading — the composer is
+ * held closed until it resolves so a turn can't run against hidden context. */
+ restoring: boolean;
+}
+
+export function useAssistantChat(
+ userId: string | null,
+ options?: UseAssistantChatOptions,
+): AssistantChat {
+ const [state, dispatch] = useReducer(
+ assistantReducer,
+ userId,
+ (uid): AssistantState => {
+ return {
+ ...initialAssistantState(),
+ ownerId: uid,
+ threadId: loadThread(uid).threadId,
+ };
+ },
+ );
+
+ const abortRef = useRef(null);
+ // Aborts an in-flight thread-history restore when the user changes or the
+ // panel unmounts, so a late response can't land in a different conversation.
+ const historyAbortRef = useRef(null);
+ // Records which userId the state has already been hydrated for, so the
+ // user-change effect fires exactly once per switch. Persistence does NOT key
+ // off this — it keys off `state.ownerId`, which moves atomically with the data.
+ const hydratedUserRef = useRef(userId);
+ // Monotonic token identifying the authoritative stream. Aborting is async, so
+ // a superseded stream can still have buffered events in flight; each stream
+ // captures its token at start and its callbacks no-op once the token moves on.
+ // This is what stops a prior user's late events from landing in a new user's
+ // hydrated state (a cross-account leak that abort alone cannot prevent).
+ const streamSeqRef = useRef(0);
+ // Same idea for the (non-abortable) confirmation request: it captures this
+ // token before awaiting and no-ops once the conversation it belonged to has
+ // been replaced by a user switch or reset, so a late confirmation response
+ // cannot append into a different user's hydrated thread.
+ const confirmSeqRef = useRef(0);
+ // Latest state + current userId, readable from event-handler closures without
+ // re-creating them. Written during render (not a passive effect) so the owner
+ // guard in `send`/`confirm` sees the new user on the very commit after an auth
+ // change — a passive effect would lag by a frame, leaving a window where a send
+ // could still target the prior user's thread.
+ //
+ // These refs are read only in event handlers, which fire after a commit (by
+ // which point React has committed the latest render and updated them), so they
+ // reflect committed state at read time. A render thrown away under concurrent
+ // mode could momentarily leave a ref pointing at uncommitted state, but the
+ // failure degrades safely: the owner guard (`ownerId !== userIdRef.current`)
+ // would at worst DROP a send — never leak — and a mismatched thread id is
+ // rejected server-side (404) regardless.
+ const stateRef = useRef(state);
+ const userIdRef = useRef(userId);
+ stateRef.current = state;
+ userIdRef.current = userId;
+
+ // The transport, held in a ref so the event-handler callbacks (send/confirm/
+ // switchThread) keep stable identities while still reaching the current client.
+ const client = useAssistantClient();
+ const clientRef = useRef(client);
+ clientRef.current = client;
+
+ // Host callbacks held in a ref so the event-handler callbacks below keep
+ // stable identities while still calling the latest-supplied handler.
+ const onWorkflowMutationRef = useRef(options?.onWorkflowMutation);
+ onWorkflowMutationRef.current = options?.onWorkflowMutation;
+
+ // Proposal ids whose confirmation request is in flight. The ref is the
+ // synchronous guard against a double-click issuing a duplicate execute; the
+ // state mirror drives disabling the card's buttons.
+ const confirmingRef = useRef>(new Set());
+ const [confirmingIds, setConfirmingIds] =
+ useState>(EMPTY_IDS);
+
+ // Synchronous "a chat request is in flight" guard. Set before the fetch and
+ // cleared when it settles/stops/resets, so two submits in the same tick can't
+ // both pass the status check and start two billable streams.
+ const sendingRef = useRef(false);
+
+ // True while a thread opened from history is loading its transcript. The
+ // composer is disabled until it resolves so a send can't run a turn against
+ // server-side context the user can't yet see. The ref is the synchronous guard
+ // for `send`; the state drives the disabled composer. Only `switchThread` sets
+ // it (the mount restore resumes the user's own current thread).
+ const restoringRef = useRef(false);
+ const [restoring, setRestoring] = useState(false);
+ const setRestoringBoth = useCallback((v: boolean) => {
+ restoringRef.current = v;
+ setRestoring(v);
+ }, []);
+
+ // The user's selected model (a per-user preference, persisted alongside the
+ // thread id). A ref mirror lets `send` read the current choice without being
+ // re-created. null → the server's default model.
+ const [selectedModel, setSelectedModel] = useState(
+ () => loadThread(userId).model,
+ );
+ const selectedModelRef = useRef(selectedModel);
+ selectedModelRef.current = selectedModel;
+
+ // When the signed-in user changes under a mounted panel (auth refresh, or a
+ // mount before auth resolved), invalidate the in-flight stream, abort it, and
+ // reload that user's own thread. Without this, the prior user's transcript
+ // would persist under the new user's key — a cross-account leak.
+ useEffect(() => {
+ if (hydratedUserRef.current === userId) return;
+ streamSeqRef.current += 1;
+ confirmSeqRef.current += 1;
+ abortRef.current?.abort();
+ // Abort a pending thread-history load and re-open the composer so a switch
+ // that was mid-restore for the prior user doesn't leave the new user's
+ // conversation wedged closed.
+ historyAbortRef.current?.abort();
+ setRestoringBoth(false);
+ sendingRef.current = false;
+ confirmingRef.current.clear();
+ setConfirmingIds(EMPTY_IDS);
+ hydratedUserRef.current = userId;
+ // Load the new user's own model preference (the prior user's must not carry
+ // over). The ref is updated synchronously so a send on this commit reads it.
+ const nextModel = loadThread(userId).model;
+ selectedModelRef.current = nextModel;
+ setSelectedModel(nextModel);
+ dispatch({
+ type: "hydrate",
+ ownerId: userId,
+ threadId: loadThread(userId).threadId,
+ messages: [],
+ });
+ }, [userId, setRestoringBoth]);
+
+ // Restore the visible transcript for the persisted thread from the server,
+ // keyed by user (runs on mount and whenever the signed-in user changes). The
+ // transcript is never cached client-side — it can carry workflow YAML or
+ // pasted secrets, and localStorage survives logout — so a reload starts blank
+ // and the prior conversation is rehydrated here from the durable thread.
+ // The reducer applies it only if the conversation is still idle/empty for this
+ // exact owner+thread, so a late response can never clobber a started turn, a
+ // new chat, or a switched account.
+ useEffect(() => {
+ const threadId = loadThread(userId).threadId;
+ if (!userId || !threadId) return;
+ const ac = new AbortController();
+ historyAbortRef.current?.abort();
+ historyAbortRef.current = ac;
+ void clientRef.current
+ .fetchThreadHistory(threadId, ac.signal)
+ .then((result) => {
+ if (ac.signal.aborted) return;
+ if (result.status === "ok") {
+ // An existing thread with no completed turns AND no pending proposals
+ // yields nothing to restore — keep the thread id (it's live). A pending
+ // proposal alone is enough to restore (its card must come back).
+ if (result.messages.length > 0 || result.proposals.length > 0) {
+ dispatch({
+ type: "restore_history",
+ ownerId: userId,
+ threadId,
+ messages: result.messages,
+ proposals: result.proposals,
+ });
+ }
+ } else if (result.status === "gone") {
+ // The thread was deleted server-side (404). Drop the dead id so the next
+ // send starts fresh instead of 404-ing forever. The reducer setting
+ // threadId to null cascades to the persistence effect, clearing storage.
+ dispatch({ type: "thread_gone", ownerId: userId, threadId });
+ }
+ // status === "error" → transient; keep the thread id and don't restore.
+ })
+ .catch(() => {
+ // fetchThreadHistory returns a typed {status:"error"} rather than
+ // rejecting; guard a future change from leaking an unhandled rejection.
+ });
+ return () => ac.abort();
+ }, [userId]);
+
+ // Persist thread id + transcript whenever the conversation settles, under the
+ // owner carried in state. Because `state.ownerId` and the data it labels move
+ // together (set atomically by the reducer), a write can never land under a
+ // different user's key. Skipped while streaming: `state.messages` gets a fresh
+ // reference on every delta, so persisting per-delta would hammer localStorage;
+ // the turn is saved once it settles (status leaves "streaming").
+ useEffect(() => {
+ if (state.status === "streaming") return;
+ // Skip while the data's owner doesn't match the live user — the window
+ // between an auth change and the hydrate that follows it. `selectedModelRef`
+ // lives outside the reducer, so it can already hold the NEW user's preference
+ // while `state.ownerId` still holds the OLD user's id; persisting then would
+ // write the new user's model under the old user's key. The owner guard closes
+ // that cross-account write, matching the same pattern `send`/`confirm` use.
+ if (state.ownerId !== userIdRef.current) return;
+ saveThread(state.ownerId, {
+ threadId: state.threadId,
+ model: selectedModelRef.current,
+ });
+ // `selectedModel` is intentionally NOT a dependency: a model-only change is
+ // persisted immediately by `setModel` itself, so this effect only needs to
+ // re-run on the conversation transitions above. If that direct persist is
+ // ever removed, add `selectedModel` here.
+ }, [state.ownerId, state.threadId, state.status]);
+
+ // Abort any in-flight stream on unmount so a closed panel stops billing.
+ useEffect(() => {
+ return () => abortRef.current?.abort();
+ }, []);
+
+ const send = useCallback((message: string) => {
+ // Synchronous in-flight guard — closes the window where two submits in the
+ // same tick both read a not-yet-committed `idle` status and start two
+ // billable streams.
+ if (sendingRef.current) return;
+ const text = message.trim();
+ const current = stateRef.current;
+ // Refuse if the loaded conversation doesn't belong to the current user. This
+ // only differs for the brief committed frame between an auth change and the
+ // hydrate that follows it, where `current` still holds the prior user's
+ // thread — sending then would attach this message to that thread id.
+ if (current.ownerId !== userIdRef.current) return;
+ // Refuse while a switched-to thread's transcript is still loading — sending
+ // now would run a turn against context the user can't see yet.
+ if (restoringRef.current) return;
+ // Refuse while a turn is streaming, or while a mutating proposal is awaiting
+ // the user's decision — a new turn must not abandon an unresolved proposal.
+ if (!text || current.status === "streaming") return;
+ if (current.pendingProposals.length > 0) return;
+
+ dispatch({
+ type: "send",
+ messageId: uuid(),
+ assistantId: uuid(),
+ text,
+ });
+
+ const seq = ++streamSeqRef.current;
+ const ac = new AbortController();
+ abortRef.current = ac;
+ sendingRef.current = true;
+ clientRef.current
+ .streamChat(
+ {
+ message: text,
+ // null → omit, so the server applies its default model.
+ model: selectedModelRef.current ?? undefined,
+ threadId: current.threadId ?? undefined,
+ turnKey: uuid(),
+ },
+ (event) => {
+ // Drop events from a stream that has been superseded (new turn, stop,
+ // reset, or user switch) so they cannot mutate unrelated state.
+ if (streamSeqRef.current === seq) dispatch({ type: "stream", event });
+ },
+ ac.signal,
+ )
+ .catch((err: unknown) => {
+ // A user-initiated abort is reported via the `stopped` action, not as a
+ // failure; a superseded stream is ignored entirely. Only surface a
+ // genuine network/parse error for the still-current stream.
+ if (ac.signal.aborted || streamSeqRef.current !== seq) return;
+ dispatch({
+ type: "stream_failed",
+ error: {
+ code: "NETWORK",
+ message:
+ err instanceof Error ? err.message : "The connection failed",
+ },
+ });
+ })
+ .finally(() => {
+ // This stream is over (settled, failed, or aborted) — allow the next
+ // send. A superseded stream clearing the flag is harmless: a newer send
+ // already set its own.
+ if (streamSeqRef.current === seq) sendingRef.current = false;
+ });
+ }, []);
+
+ const stop = useCallback(() => {
+ streamSeqRef.current += 1;
+ abortRef.current?.abort();
+ sendingRef.current = false;
+ dispatch({ type: "stopped" });
+ }, []);
+
+ const confirm = useCallback(async (proposal: PendingProposal) => {
+ const pid = proposal.proposalId;
+ if (!pid) {
+ dispatch({
+ type: "proposal_resolved",
+ callId: proposal.callId,
+ status: null,
+ error: {
+ code: "TOOL_FAILED",
+ message: "This action can no longer be confirmed.",
+ },
+ });
+ return;
+ }
+
+ // Guard against a double-click issuing a duplicate execute for the same
+ // proposal — the second would otherwise hit PROPOSAL_ALREADY_CONSUMED and
+ // overwrite the first's success with an error.
+ if (confirmingRef.current.has(pid)) return;
+ confirmingRef.current.add(pid);
+ setConfirmingIds(new Set(confirmingRef.current));
+
+ // Snapshot the conversation generation; if a user switch or reset replaces
+ // the conversation while this request is in flight, the late response must
+ // not land in the new conversation.
+ const seq = confirmSeqRef.current;
+ try {
+ const result = await clientRef.current.confirmProposal(pid);
+ if (confirmSeqRef.current !== seq) return;
+ // A RETRYABLE failure (the workflow references an integration that isn't
+ // connected): the server re-opened the proposal, so KEEP the card and show
+ // the reason on it. The user connects the integration (the card's Connect
+ // button) and confirms again — no need to re-ask the assistant.
+ if (result.ok && result.retryable) {
+ const { error } = resolveConfirmation(proposal.name, result);
+ dispatch({
+ type: "proposal_retry_failed",
+ callId: proposal.callId,
+ message:
+ error?.message ??
+ "Connect the required integration, then confirm again.",
+ });
+ return;
+ }
+ const { statusText, error } = resolveConfirmation(proposal.name, result);
+ dispatch({
+ type: "proposal_resolved",
+ callId: proposal.callId,
+ status: statusText ? statusMessage(statusText) : null,
+ error,
+ });
+ // A successful workflow mutation won't show on an already-open Workflows
+ // page (it fetches on mount and shares no cache) — signal it to refetch.
+ // `error === null` is the clean-success signal from resolveConfirmation.
+ if (!error && WORKFLOW_MUTATING_TOOLS.has(proposal.name)) {
+ onWorkflowMutationRef.current?.();
+ }
+ } catch (err) {
+ if (confirmSeqRef.current !== seq) return;
+ // confirmProposal returns a typed outcome rather than throwing, but guard
+ // anyway: an unexpected throw must still clear the card and surface an
+ // error instead of escaping as an unhandled rejection.
+ dispatch({
+ type: "proposal_resolved",
+ callId: proposal.callId,
+ status: null,
+ error: {
+ code: "TOOL_FAILED",
+ message:
+ err instanceof Error
+ ? err.message
+ : "The action could not be completed",
+ },
+ });
+ } finally {
+ // Only touch the in-flight set if this confirmation still owns the
+ // conversation; a switch/reset already cleared it.
+ if (confirmSeqRef.current === seq) {
+ confirmingRef.current.delete(pid);
+ setConfirmingIds(new Set(confirmingRef.current));
+ }
+ }
+ }, []);
+
+ const cancel = useCallback((proposal: PendingProposal) => {
+ // Once a confirmation is in flight the action is already running server-side
+ // and can't be cancelled — ignore a cancel click that races it, so the
+ // transcript can't show both "cancelled" and the action's success.
+ if (proposal.proposalId && confirmingRef.current.has(proposal.proposalId)) {
+ return;
+ }
+ dispatch({
+ type: "proposal_resolved",
+ callId: proposal.callId,
+ status: statusMessage("Action cancelled."),
+ error: null,
+ });
+ }, []);
+
+ const reset = useCallback(() => {
+ streamSeqRef.current += 1;
+ confirmSeqRef.current += 1;
+ abortRef.current?.abort();
+ historyAbortRef.current?.abort();
+ setRestoringBoth(false);
+ sendingRef.current = false;
+ confirmingRef.current.clear();
+ setConfirmingIds(EMPTY_IDS);
+ dispatch({ type: "reset" });
+ }, [setRestoringBoth]);
+
+ // Open a past thread from the history switcher: invalidate any in-flight
+ // stream/confirmation (so their late events can't land in the switched-to
+ // conversation), pin the chosen thread, persist it, then load its transcript.
+ // Guarded by owner so a stale render can't switch under a different account.
+ const switchThread = useCallback(
+ (threadId: string) => {
+ const current = stateRef.current;
+ const uid = userIdRef.current;
+ if (current.ownerId !== uid) return;
+ if (threadId === current.threadId) return;
+ // Refuse while a turn is streaming or a proposal is awaiting confirmation —
+ // the same guard `send` uses. Navigating away would abandon the live turn /
+ // unresolved proposal; the user finishes it, or "New chat" is the explicit
+ // discard. The DISABLED switcher in the UI is the primary guard; this is the
+ // backstop, so it silently no-ops by design for a programmatic/keyboard
+ // caller that bypasses the disabled state.
+ if (
+ current.status === "streaming" ||
+ current.pendingProposals.length > 0
+ ) {
+ return;
+ }
+ streamSeqRef.current += 1;
+ confirmSeqRef.current += 1;
+ abortRef.current?.abort();
+ sendingRef.current = false;
+ confirmingRef.current.clear();
+ setConfirmingIds(EMPTY_IDS);
+ dispatch({ type: "switch_thread", threadId });
+ // Hold the composer closed until the transcript loads — a send before then
+ // would run a turn against context the user can't yet see.
+ setRestoringBoth(true);
+ // Persist the new active thread now so a reload before the next turn restores
+ // it (the settled-save effect also covers it). Keep the current model choice.
+ saveThread(uid, { threadId, model: selectedModelRef.current });
+ // Load the chosen thread's transcript, superseding any in-flight history
+ // fetch. restore_history only applies on a matching idle owner+thread, so a
+ // mid-load send or a second switch can't be clobbered by this response.
+ const ac = new AbortController();
+ historyAbortRef.current?.abort();
+ historyAbortRef.current = ac;
+ void clientRef.current
+ .fetchThreadHistory(threadId, ac.signal)
+ .then((result) => {
+ if (ac.signal.aborted) return;
+ if (result.status === "ok") {
+ if (result.messages.length > 0 || result.proposals.length > 0) {
+ dispatch({
+ type: "restore_history",
+ ownerId: uid,
+ threadId,
+ messages: result.messages,
+ proposals: result.proposals,
+ });
+ }
+ } else if (result.status === "gone") {
+ dispatch({ type: "thread_gone", ownerId: uid, threadId });
+ } else {
+ // Transient load failure: drop the active thread (it carries
+ // server-side context the user can't see) and surface a visible
+ // error. The next send then starts a FRESH thread rather than running
+ // against the unloaded conversation's hidden context.
+ dispatch({
+ type: "history_failed",
+ ownerId: uid,
+ threadId,
+ error: {
+ code: "HISTORY_LOAD_FAILED",
+ message:
+ "Couldn't load that conversation. You're in a new chat — reopen it from history to try again.",
+ },
+ });
+ }
+ // Re-open the composer once the load settles (success shows the
+ // transcript; failure dropped the thread + showed the error above).
+ // Only the current fetch clears it — a superseded one returned early on
+ // `aborted`.
+ setRestoringBoth(false);
+ })
+ .catch(() => {
+ // fetchThreadHistory returns a typed {status:"error"} rather than
+ // rejecting; guard a future change from leaking an unhandled rejection.
+ if (!ac.signal.aborted) setRestoringBoth(false);
+ });
+ },
+ [setRestoringBoth],
+ );
+
+ // Choose the model for subsequent turns. The model preference is per user, not
+ // per turn, so persist it immediately (alongside the current thread id) rather
+ // than waiting for a turn to settle.
+ const setModel = useCallback((model: string | null) => {
+ selectedModelRef.current = model;
+ setSelectedModel(model);
+ // Same owner guard the persistence effect uses: in the window between an auth
+ // change and the hydrate that follows it, stateRef still holds the prior
+ // user's thread while userIdRef already points to the new user. Persisting
+ // then would write the new user's model (and the OLD user's thread id) under
+ // a mismatched key. Skip it — the state still updates, and the next settled
+ // save persists it once the owner is consistent.
+ const currentUserId = userIdRef.current;
+ if (stateRef.current.ownerId === currentUserId) {
+ saveThread(currentUserId, {
+ threadId: stateRef.current.threadId,
+ model,
+ });
+ }
+ }, []);
+
+ // Reactive recovery: if a turn is rejected because the selected model is no
+ // longer offered (e.g. ASSISTANT_MODELS was changed and a stale slug was still
+ // persisted), clear the selection so the NEXT send falls back to the server
+ // default instead of failing again. The picker also reconciles proactively
+ // once the model list loads; this closes the window where a send races that
+ // load. Guarded by owner so a late error from a prior user can't act here.
+ useEffect(() => {
+ if (
+ state.error?.code === "MODEL_NOT_ALLOWED" &&
+ state.ownerId === userIdRef.current &&
+ selectedModelRef.current !== null
+ ) {
+ setModel(null);
+ }
+ }, [state.error, state.ownerId, setModel]);
+
+ // Expose only state that belongs to the current user: between an auth change
+ // and the hydrate effect that follows it, the raw state still holds the prior
+ // user's conversation, which must never render. Operations (send/confirm) read
+ // the raw state via `stateRef`, so they still act on the true thread.
+ const visibleState = selectVisibleState(state, userId);
+ return {
+ state: visibleState,
+ confirmingIds,
+ selectedModel,
+ setModel,
+ send,
+ stop,
+ confirm,
+ cancel,
+ reset,
+ switchThread,
+ restoring,
+ };
+}
diff --git a/src/assistant/useAssistantModels.test.tsx b/src/assistant/useAssistantModels.test.tsx
new file mode 100644
index 0000000..79ada3b
--- /dev/null
+++ b/src/assistant/useAssistantModels.test.tsx
@@ -0,0 +1,312 @@
+// @vitest-environment jsdom
+import { act, render, renderHook, screen, waitFor } from "@testing-library/react";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import type {
+ AssistantClient,
+ AssistantModelsResult,
+} from "./client";
+
+const OK_NONEMPTY: AssistantModelsResult = {
+ ok: true,
+ data: {
+ default: "anthropic/m",
+ models: [{ slug: "anthropic/m", label: "M" }],
+ },
+};
+const OK_EMPTY: AssistantModelsResult = {
+ ok: true,
+ data: { default: "anthropic/m", models: [] },
+};
+const FAILED: AssistantModelsResult = {
+ ok: false,
+ data: { default: null, models: [] },
+};
+
+// The hook caches the model list in module-level singletons (cache + inflight),
+// so each test resets the module graph to start from a clean cache. The provider
+// and the hook MUST come from the same post-reset module instance so they share
+// one React context — otherwise `useAssistantClient` can't see the provider.
+async function load() {
+ const { AssistantClientProvider } = await import(
+ "./client-context"
+ );
+ const { useAssistantModels } = await import("./useAssistantModels");
+ const fetchModels = vi.fn();
+ // Only `fetchModels` is exercised here; the rest satisfy the interface.
+ const client: AssistantClient = {
+ fetchModels,
+ fetchThreads: vi.fn(),
+ fetchThreadHistory: vi.fn(),
+ streamChat: vi.fn(),
+ confirmProposal: vi.fn(),
+ deleteThread: vi.fn(),
+ };
+ const wrapper = ({ children }: { children: React.ReactNode }) => (
+
+ {children}
+
+ );
+ return { useAssistantModels, fetchModels, wrapper };
+}
+
+beforeEach(() => {
+ vi.resetModules();
+ vi.clearAllMocks();
+});
+afterEach(() => {
+ vi.restoreAllMocks();
+});
+
+describe("useAssistantModels", () => {
+ it("fetches once and serves later mounts from the module cache", async () => {
+ const { useAssistantModels, fetchModels, wrapper } = await load();
+ fetchModels.mockResolvedValue(OK_NONEMPTY);
+
+ const first = renderHook(() => useAssistantModels(), { wrapper });
+ await waitFor(() => expect(first.result.current.models).toHaveLength(1));
+ expect(fetchModels).toHaveBeenCalledTimes(1);
+
+ // A second mount reads the cache synchronously — no second fetch.
+ const second = renderHook(() => useAssistantModels(), { wrapper });
+ expect(second.result.current.models).toHaveLength(1);
+ expect(fetchModels).toHaveBeenCalledTimes(1);
+ });
+
+ it("scopes the cache per client — a different client fetches its own catalog", async () => {
+ const { AssistantClientProvider } = await import("./client-context");
+ const { useAssistantModels } = await import("./useAssistantModels");
+ const makeClient = (models: AssistantModelsResult): AssistantClient => ({
+ fetchModels: vi.fn().mockResolvedValue(models),
+ fetchThreads: vi.fn(),
+ fetchThreadHistory: vi.fn(),
+ streamChat: vi.fn(),
+ confirmProposal: vi.fn(),
+ deleteThread: vi.fn(),
+ });
+ const clientA = makeClient(OK_NONEMPTY);
+ const clientB = makeClient({
+ ok: true,
+ data: {
+ default: "b/m",
+ models: [
+ { slug: "b/m", label: "B" },
+ { slug: "b/m2", label: "B2" },
+ ],
+ },
+ });
+ const wrap =
+ (client: AssistantClient) =>
+ ({ children }: { children: React.ReactNode }) => (
+
+ {children}
+
+ );
+
+ const a = renderHook(() => useAssistantModels(), { wrapper: wrap(clientA) });
+ await waitFor(() => expect(a.result.current.models).toHaveLength(1));
+ expect(clientA.fetchModels).toHaveBeenCalledTimes(1);
+
+ // A different client must fetch its OWN catalog, never serve A's cache.
+ const b = renderHook(() => useAssistantModels(), { wrapper: wrap(clientB) });
+ await waitFor(() => expect(b.result.current.models).toHaveLength(2));
+ expect(clientB.fetchModels).toHaveBeenCalledTimes(1);
+ expect(b.result.current.models[0]!.slug).toBe("b/m");
+ });
+
+ it("clears the previous catalog immediately when the client is swapped", async () => {
+ const { AssistantClientProvider } = await import("./client-context");
+ const { useAssistantModels } = await import("./useAssistantModels");
+
+ const clientA: AssistantClient = {
+ fetchModels: vi.fn().mockResolvedValue(OK_NONEMPTY),
+ fetchThreads: vi.fn(),
+ fetchThreadHistory: vi.fn(),
+ streamChat: vi.fn(),
+ confirmProposal: vi.fn(),
+ deleteThread: vi.fn(),
+ };
+ let resolveB: (v: AssistantModelsResult) => void = () => {};
+ const clientB: AssistantClient = {
+ fetchModels: vi.fn().mockReturnValue(
+ new Promise((r) => {
+ resolveB = r;
+ }),
+ ),
+ fetchThreads: vi.fn(),
+ fetchThreadHistory: vi.fn(),
+ streamChat: vi.fn(),
+ confirmProposal: vi.fn(),
+ deleteThread: vi.fn(),
+ };
+
+ function Show() {
+ const m = useAssistantModels();
+ return (
+
{m.models.map((x) => x.slug).join(",")}
+ );
+ }
+ function Harness({ client }: { client: AssistantClient }) {
+ return (
+
+
+
+ );
+ }
+
+ const { rerender } = render();
+ await waitFor(() =>
+ expect(screen.getByTestId("models").textContent).toBe("anthropic/m"),
+ );
+
+ // Swap to a client whose fetch is still pending — the old catalog must vanish
+ // immediately, not linger until the new request resolves.
+ rerender();
+ expect(screen.getByTestId("models").textContent).toBe("");
+
+ await act(async () => {
+ resolveB({
+ ok: true,
+ data: { default: "b/m", models: [{ slug: "b/m", label: "B" }] },
+ });
+ });
+ await waitFor(() =>
+ expect(screen.getByTestId("models").textContent).toBe("b/m"),
+ );
+ });
+
+ it("does not cache a FAILED fetch — the next mount retries", async () => {
+ const { useAssistantModels, fetchModels, wrapper } = await load();
+ fetchModels
+ .mockResolvedValueOnce(FAILED)
+ .mockResolvedValueOnce(OK_NONEMPTY);
+
+ const first = renderHook(() => useAssistantModels(), { wrapper });
+ await waitFor(() => expect(fetchModels).toHaveBeenCalledTimes(1));
+ expect(first.result.current.models).toHaveLength(0);
+ first.unmount();
+
+ // The failure wasn't cached, so the next mount fetches again — and succeeds.
+ const second = renderHook(() => useAssistantModels(), { wrapper });
+ await waitFor(() => expect(second.result.current.models).toHaveLength(1));
+ expect(fetchModels).toHaveBeenCalledTimes(2);
+ });
+
+ it("caches a SUCCESSFUL but empty list — no refetch (single-model deploy)", async () => {
+ const { useAssistantModels, fetchModels, wrapper } = await load();
+ fetchModels.mockResolvedValue(OK_EMPTY);
+
+ const first = renderHook(() => useAssistantModels(), { wrapper });
+ await waitFor(() => expect(fetchModels).toHaveBeenCalledTimes(1));
+ expect(first.result.current.models).toHaveLength(0);
+ first.unmount();
+
+ // A successful (if empty) response is cached, so a later mount does NOT
+ // re-fetch — it isn't treated like a failure.
+ const second = renderHook(() => useAssistantModels(), { wrapper });
+ expect(second.result.current.models).toHaveLength(0);
+ expect(fetchModels).toHaveBeenCalledTimes(1);
+ });
+
+ it("dedupes concurrent in-flight fetches across simultaneous mounts", async () => {
+ const { useAssistantModels, fetchModels, wrapper } = await load();
+ let resolve: (v: AssistantModelsResult) => void = () => {};
+ fetchModels.mockReturnValue(
+ new Promise((r) => {
+ resolve = r;
+ }),
+ );
+
+ // Two mounts before the fetch resolves → one shared in-flight fetch.
+ const a = renderHook(() => useAssistantModels(), { wrapper });
+ const b = renderHook(() => useAssistantModels(), { wrapper });
+ expect(fetchModels).toHaveBeenCalledTimes(1);
+
+ resolve(OK_NONEMPTY);
+ await waitFor(() => expect(a.result.current.models).toHaveLength(1));
+ expect(b.result.current.models).toHaveLength(1);
+ expect(fetchModels).toHaveBeenCalledTimes(1);
+ });
+
+ it("does not update state after unmount (clean teardown mid-fetch)", async () => {
+ const { useAssistantModels, fetchModels, wrapper } = await load();
+ let resolve: (v: AssistantModelsResult) => void = () => {};
+ fetchModels.mockReturnValue(
+ new Promise((r) => {
+ resolve = r;
+ }),
+ );
+ const errs: unknown[] = [];
+ const spy = vi.spyOn(console, "error").mockImplementation((e) => {
+ errs.push(e);
+ });
+
+ const { unmount } = renderHook(() => useAssistantModels(), { wrapper });
+ unmount();
+ resolve(OK_NONEMPTY);
+ await Promise.resolve();
+ // No act()/state-update-after-unmount warning was emitted.
+ expect(errs).toHaveLength(0);
+ spy.mockRestore();
+ });
+
+ it("ignores a late result from a swapped-away client", async () => {
+ const { AssistantClientProvider } = await import("./client-context");
+ const { useAssistantModels } = await import("./useAssistantModels");
+
+ let resolveA: (v: AssistantModelsResult) => void = () => {};
+ const clientA: AssistantClient = {
+ fetchModels: vi.fn().mockReturnValue(
+ new Promise((r) => {
+ resolveA = r;
+ }),
+ ),
+ fetchThreads: vi.fn(),
+ fetchThreadHistory: vi.fn(),
+ streamChat: vi.fn(),
+ confirmProposal: vi.fn(),
+ deleteThread: vi.fn(),
+ };
+ const clientB: AssistantClient = {
+ fetchModels: vi.fn().mockResolvedValue({
+ ok: true,
+ data: { default: "b/m", models: [{ slug: "b/m", label: "B" }] },
+ }),
+ fetchThreads: vi.fn(),
+ fetchThreadHistory: vi.fn(),
+ streamChat: vi.fn(),
+ confirmProposal: vi.fn(),
+ deleteThread: vi.fn(),
+ };
+
+ function Show() {
+ const m = useAssistantModels();
+ return (
+
{m.models.map((x) => x.slug).join(",")}
+ );
+ }
+ function Harness({ client }: { client: AssistantClient }) {
+ return (
+
+
+
+ );
+ }
+
+ const { rerender } = render(); // A's fetch pending
+ expect(clientA.fetchModels).toHaveBeenCalledTimes(1);
+
+ rerender(); // swap to B (resolves immediately)
+ await waitFor(() =>
+ expect(screen.getByTestId("models").textContent).toBe("b/m"),
+ );
+
+ // A's request resolves LATE — it must not replace the displayed (B's) catalog.
+ await act(async () => {
+ resolveA({
+ ok: true,
+ data: { default: "a/m", models: [{ slug: "a/m", label: "A" }] },
+ });
+ });
+ expect(screen.getByTestId("models").textContent).toBe("b/m");
+ });
+});
diff --git a/src/assistant/useAssistantModels.ts b/src/assistant/useAssistantModels.ts
new file mode 100644
index 0000000..d3fd3f7
--- /dev/null
+++ b/src/assistant/useAssistantModels.ts
@@ -0,0 +1,83 @@
+/**
+ * The assistant's selectable models for the composer's picker. The list is
+ * deployment config (not per-user), so it is fetched once per transport and
+ * shared across panel mounts via a cache keyed by the `AssistantClient`.
+ *
+ * The cache has no TTL: it lives for the page session and is revalidated by a
+ * page refresh — acceptable for deployment-config data that changes only on a
+ * redeploy. An empty/failed fetch is NOT cached, so it retries on the next mount.
+ *
+ * Keyed by client so a host that swaps the transport (a tenant/origin/account
+ * change) never serves the previous client's catalog or skips the fetch for the
+ * new one. A WeakMap lets a discarded client's bucket be collected with it.
+ *
+ * Client-only by design (a `createRoot` SPA, no React SSR), so the cache lives
+ * in one browser tab and is never shared across server requests. Under Strict
+ * Mode's double-mount the two effect runs share the one in-flight fetch; the
+ * first run's `active = false` makes its callback a no-op, the second commits —
+ * dedup holds, no torn state.
+ */
+
+import { useEffect, useReducer } from "react";
+import type {
+ AssistantClient,
+ AssistantModels,
+ AssistantModelsResult,
+} from "./client";
+import { useAssistantClient } from "./client-context";
+
+const EMPTY: AssistantModels = { default: null, models: [] };
+
+interface ModelCache {
+ cache: AssistantModels | null;
+ inflight: Promise | null;
+}
+
+const byClient = new WeakMap();
+
+function cacheFor(client: AssistantClient): ModelCache {
+ let entry = byClient.get(client);
+ if (!entry) {
+ entry = { cache: null, inflight: null };
+ byClient.set(client, entry);
+ }
+ return entry;
+}
+
+export function useAssistantModels(): AssistantModels {
+ const client = useAssistantClient();
+ // The per-client cache is the source of truth; `bump` just forces a re-read
+ // once an async fetch settles. Deriving the return value during render (below)
+ // — rather than mirroring it into state via an effect — means a client swap
+ // shows the new client's catalog (or empty) on the SAME commit, with no stale
+ // frame from the previous client.
+ const [, bump] = useReducer((n: number) => n + 1, 0);
+
+ useEffect(() => {
+ const entry = cacheFor(client);
+ if (entry.cache) return;
+ let active = true;
+ entry.inflight ??= client.fetchModels();
+ void entry.inflight
+ .then((result) => {
+ // Cache a successful fetch (a well-formed, non-empty list) and release
+ // the settled promise — the cache now guards re-fetch. A failed fetch OR
+ // an empty list (reported as !ok — catalog unavailable) leaves the cache
+ // unset so the next mount retries instead of serving an empty picker.
+ if (result.ok) entry.cache = result.data;
+ entry.inflight = null;
+ if (active) bump();
+ })
+ .catch(() => {
+ // fetchModels swallows its own errors, so this only fires if a future
+ // change lets it reject — release the slot so the next mount retries.
+ entry.inflight = null;
+ });
+ return () => {
+ active = false;
+ };
+ }, [client]);
+
+ // Always the CURRENT client's catalog — synchronously correct across a swap.
+ return cacheFor(client).cache ?? EMPTY;
+}
diff --git a/src/assistant/useAssistantThreads.test.tsx b/src/assistant/useAssistantThreads.test.tsx
new file mode 100644
index 0000000..a94689c
--- /dev/null
+++ b/src/assistant/useAssistantThreads.test.tsx
@@ -0,0 +1,259 @@
+// @vitest-environment jsdom
+import {
+ act,
+ render,
+ renderHook,
+ screen,
+ waitFor,
+} from "@testing-library/react";
+import { type ReactNode, useEffect } from "react";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import type { AssistantClient, AssistantThreadSummary } from "./client";
+import { AssistantClientProvider } from "./client-context";
+import { useAssistantThreads } from "./useAssistantThreads";
+
+function thread(id: string): AssistantThreadSummary {
+ return { id, title: id, createdAt: "", updatedAt: "" };
+}
+
+function setup() {
+ const fetchThreads = vi.fn();
+ const deleteThread = vi.fn();
+ const client: AssistantClient = {
+ fetchModels: vi.fn(),
+ fetchThreads,
+ fetchThreadHistory: vi.fn(),
+ streamChat: vi.fn(),
+ confirmProposal: vi.fn(),
+ deleteThread,
+ };
+ const wrapper = ({ children }: { children: ReactNode }) => (
+ {children}
+ );
+ return { fetchThreads, deleteThread, wrapper };
+}
+
+beforeEach(() => {
+ vi.clearAllMocks();
+});
+afterEach(() => {
+ vi.restoreAllMocks();
+});
+
+describe("useAssistantThreads", () => {
+ it("does not fetch on mount and populates only on refresh", async () => {
+ const { fetchThreads, wrapper } = setup();
+ fetchThreads.mockResolvedValue([thread("t1")]);
+ const { result } = renderHook(() => useAssistantThreads("userA"), {
+ wrapper,
+ });
+ // Lazy by design — no request until the consumer asks for it.
+ expect(fetchThreads).not.toHaveBeenCalled();
+ expect(result.current.loaded).toBe(false);
+
+ act(() => result.current.refresh());
+ await waitFor(() => expect(result.current.threads).toHaveLength(1));
+ expect(result.current.loaded).toBe(true);
+ });
+
+ it("optimistically removes a thread and calls deleteThread", async () => {
+ const { fetchThreads, deleteThread, wrapper } = setup();
+ fetchThreads.mockResolvedValue([thread("t1"), thread("t2")]);
+ deleteThread.mockResolvedValue({ ok: true });
+ const { result } = renderHook(() => useAssistantThreads("userA"), {
+ wrapper,
+ });
+ act(() => result.current.refresh());
+ await waitFor(() => expect(result.current.threads).toHaveLength(2));
+
+ await act(async () => {
+ await result.current.remove("t1");
+ });
+ expect(deleteThread).toHaveBeenCalledWith("t1");
+ expect(result.current.threads.map((t) => t.id)).toEqual(["t2"]);
+ });
+
+ it("restores the list when a delete fails", async () => {
+ const { fetchThreads, deleteThread, wrapper } = setup();
+ fetchThreads.mockResolvedValue([thread("t1"), thread("t2")]);
+ deleteThread.mockResolvedValue({ ok: false });
+ const { result } = renderHook(() => useAssistantThreads("userA"), {
+ wrapper,
+ });
+ act(() => result.current.refresh());
+ await waitFor(() => expect(result.current.threads).toHaveLength(2));
+
+ await act(async () => {
+ await result.current.remove("t1");
+ });
+ // The optimistic removal is rolled back by a reload, so the row returns.
+ await waitFor(() =>
+ expect(result.current.threads.map((t) => t.id)).toEqual(["t1", "t2"]),
+ );
+ });
+
+ it("a refresh during a pending delete does not resurrect the removed row", async () => {
+ const { fetchThreads, deleteThread, wrapper } = setup();
+ fetchThreads.mockResolvedValue([thread("t1"), thread("t2")]);
+ let resolveDelete: (v: { ok: boolean }) => void = () => {};
+ deleteThread.mockReturnValue(
+ new Promise<{ ok: boolean }>((r) => {
+ resolveDelete = r;
+ }),
+ );
+ const { result } = renderHook(() => useAssistantThreads("userA"), {
+ wrapper,
+ });
+ act(() => result.current.refresh());
+ await waitFor(() => expect(result.current.threads).toHaveLength(2));
+
+ // Begin deleting t2 (stays pending) — optimistically removed.
+ let removed!: Promise<{ ok: boolean }>;
+ act(() => {
+ removed = result.current.remove("t2");
+ });
+ expect(result.current.threads.map((t) => t.id)).toEqual(["t1"]);
+
+ // A refresh resolves while the delete is still in flight, returning [t1, t2]
+ // (the server hasn't applied the delete yet) — t2 must stay filtered.
+ await act(async () => {
+ result.current.refresh();
+ });
+ await waitFor(() =>
+ expect(result.current.threads.map((t) => t.id)).toEqual(["t1"]),
+ );
+
+ // The delete then succeeds; the row remains gone.
+ await act(async () => {
+ resolveDelete({ ok: true });
+ await removed;
+ });
+ expect(result.current.threads.map((t) => t.id)).toEqual(["t1"]);
+ });
+
+ it("keeps a pending delete filtered across a scope swap and back", async () => {
+ const { fetchThreads, deleteThread, wrapper } = setup();
+ fetchThreads.mockResolvedValue([thread("t1"), thread("t2")]);
+ // The DELETE never resolves — it is still in flight across the swap.
+ deleteThread.mockReturnValue(new Promise<{ ok: boolean }>(() => {}));
+ const { result, rerender } = renderHook(
+ ({ uid }) => useAssistantThreads(uid),
+ { initialProps: { uid: "userA" }, wrapper },
+ );
+ act(() => result.current.refresh());
+ await waitFor(() => expect(result.current.threads).toHaveLength(2));
+
+ // Delete t2 (stays pending) — optimistically removed under userA.
+ act(() => {
+ result.current.remove("t2");
+ });
+ expect(result.current.threads.map((t) => t.id)).toEqual(["t1"]);
+
+ // Swap to userB and back to userA while the DELETE is still in flight. The
+ // scope swap must NOT clear the pending-delete set.
+ act(() => rerender({ uid: "userB" }));
+ act(() => rerender({ uid: "userA" }));
+
+ // A refresh returns [t1, t2] (the server has not applied the delete yet);
+ // t2 must stay filtered rather than resurrecting.
+ await act(async () => {
+ result.current.refresh();
+ });
+ await waitFor(() =>
+ expect(result.current.threads.map((t) => t.id)).toEqual(["t1"]),
+ );
+ });
+
+ it("drops a result that resolves after the user changed", async () => {
+ const { fetchThreads, wrapper } = setup();
+ let resolveA: (v: AssistantThreadSummary[] | null) => void = () => {};
+ fetchThreads.mockReturnValueOnce(
+ new Promise((r) => {
+ resolveA = r;
+ }),
+ );
+ const { result, rerender } = renderHook(
+ ({ uid }) => useAssistantThreads(uid),
+ { initialProps: { uid: "userA" }, wrapper },
+ );
+ act(() => result.current.refresh());
+ // Switch to user B before A's request resolves.
+ act(() => rerender({ uid: "userB" }));
+ // A's request now resolves — its threads must NOT land under user B.
+ await act(async () => {
+ resolveA([thread("a-thread")]);
+ });
+ expect(result.current.threads).toEqual([]);
+ });
+
+ it("clears the prior user's threads immediately on a user change", async () => {
+ const { fetchThreads, wrapper } = setup();
+ fetchThreads.mockResolvedValue([thread("t1")]);
+ const { result, rerender } = renderHook(
+ ({ uid }) => useAssistantThreads(uid),
+ { initialProps: { uid: "userA" }, wrapper },
+ );
+ act(() => result.current.refresh());
+ await waitFor(() => expect(result.current.threads).toHaveLength(1));
+
+ act(() => rerender({ uid: "userB" }));
+ expect(result.current.threads).toEqual([]);
+ expect(result.current.loaded).toBe(false);
+ });
+
+ it("drops an old client's result after a transport swap for the same user", async () => {
+ let resolveOld: (v: AssistantThreadSummary[] | null) => void = () => {};
+ const oldClient: AssistantClient = {
+ fetchThreads: vi.fn().mockReturnValue(
+ new Promise((r) => {
+ resolveOld = r;
+ }),
+ ),
+ fetchModels: vi.fn(),
+ fetchThreadHistory: vi.fn(),
+ streamChat: vi.fn(),
+ confirmProposal: vi.fn(),
+ deleteThread: vi.fn(),
+ };
+ const newClient: AssistantClient = {
+ fetchThreads: vi.fn().mockResolvedValue([]),
+ fetchModels: vi.fn(),
+ fetchThreadHistory: vi.fn(),
+ streamChat: vi.fn(),
+ confirmProposal: vi.fn(),
+ deleteThread: vi.fn(),
+ };
+
+ function Show() {
+ const t = useAssistantThreads("userA");
+ const { refresh } = t;
+ // Fire one fetch on mount so the OLD client's request is in flight.
+ useEffect(() => {
+ refresh();
+ }, [refresh]);
+ return (
+
{t.threads.map((x) => x.id).join(",")}
+ );
+ }
+ function Harness({ client }: { client: AssistantClient }) {
+ return (
+
+
+
+ );
+ }
+
+ const { rerender } = render();
+ expect(oldClient.fetchThreads).toHaveBeenCalledTimes(1);
+
+ // Swap the transport for the SAME user before the old request resolves.
+ rerender();
+
+ // The old client's request now resolves — its threads must NOT land under the
+ // new client (aborted + guarded by the captured client).
+ await act(async () => {
+ resolveOld([thread("old-thread")]);
+ });
+ expect(screen.getByTestId("threads").textContent).toBe("");
+ });
+});
diff --git a/src/assistant/useAssistantThreads.ts b/src/assistant/useAssistantThreads.ts
new file mode 100644
index 0000000..88cbf39
--- /dev/null
+++ b/src/assistant/useAssistantThreads.ts
@@ -0,0 +1,191 @@
+/**
+ * The user's recent assistant chat threads for the history switcher. Unlike the
+ * model list (deployment config, fetched once), the thread list changes as the
+ * user chats, so it is fetched ON DEMAND — call `refresh()` to (re)load it; the
+ * panel does so when the history view opens and after a turn settles a new
+ * thread into being. It does NOT fetch on mount, so `threads` stays empty and
+ * `loaded` false until the first `refresh()`.
+ *
+ * Self-protective across account AND transport swaps. The list is tagged with the
+ * (user, client) it belongs to and is masked to empty on the SAME commit if that
+ * no longer matches the current props — so a swap never shows the prior scope's
+ * threads for even one frame. In flight, a late result is dropped if either the
+ * user or the client changed, and the request is aborted on the swap.
+ */
+
+import { useCallback, useEffect, useRef, useState } from "react";
+import type { AssistantClient, AssistantThreadSummary } from "./client";
+import { useAssistantClient } from "./client-context";
+
+export interface AssistantThreads {
+ threads: AssistantThreadSummary[];
+ loading: boolean;
+ /** True once a fetch has settled at least once (drives empty-vs-loading copy). */
+ loaded: boolean;
+ /** Load (or reload) the thread list. Must be called to populate `threads` —
+ * the hook never fetches on mount (the panel calls this when history opens). */
+ refresh: () => void;
+ /** Delete a thread. Optimistically drops it from the list (within the current
+ * owner scope); on failure the list is reloaded to restore the true state. A
+ * no-op resolving `{ ok: false }` when the client has no `deleteThread`. */
+ remove: (threadId: string) => Promise<{ ok: boolean }>;
+ /** Whether the configured client supports deletion — drives whether a host
+ * shows the delete affordance. */
+ canRemove: boolean;
+}
+
+interface ThreadsState {
+ threads: AssistantThreadSummary[];
+ loading: boolean;
+ loaded: boolean;
+ /** The (user, client) the data belongs to; the hook masks to empty unless both
+ * match the current props, so a swap can't show the prior scope's list. */
+ ownerUserId: string | null;
+ ownerClient: AssistantClient | null;
+}
+
+export function useAssistantThreads(userId: string | null): AssistantThreads {
+ const client = useAssistantClient();
+ const userRef = useRef(userId);
+ userRef.current = userId;
+ const clientRef = useRef(client);
+ clientRef.current = client;
+ const abortRef = useRef(null);
+ // Ids being (or already) deleted. A refresh whose fetch began before the
+ // server delete completed can return a row we optimistically removed; filter
+ // these out of every refresh commit so a deleted thread never reappears.
+ const pendingDeletesRef = useRef>(new Set());
+
+ const [state, setState] = useState(() => ({
+ threads: [],
+ loading: false,
+ loaded: false,
+ ownerUserId: userId,
+ ownerClient: client,
+ }));
+
+ const refresh = useCallback(() => {
+ // Capture the user AND client this fetch is FOR; the commit and the owner tag
+ // both use them, so a result can never land under a different scope.
+ const requestedUserId = userRef.current;
+ const requestedClient = clientRef.current;
+ if (!requestedUserId) {
+ setState({
+ threads: [],
+ loading: false,
+ loaded: true,
+ ownerUserId: requestedUserId,
+ ownerClient: requestedClient,
+ });
+ return;
+ }
+ // Supersede any in-flight fetch so a rapid re-open can't land a stale list.
+ abortRef.current?.abort();
+ const ac = new AbortController();
+ abortRef.current = ac;
+ setState((s) => ({
+ ...s,
+ loading: true,
+ ownerUserId: requestedUserId,
+ ownerClient: requestedClient,
+ }));
+ const isCurrent = () =>
+ !ac.signal.aborted &&
+ userRef.current === requestedUserId &&
+ clientRef.current === requestedClient;
+ void requestedClient
+ .fetchThreads(ac.signal)
+ .then((result) => {
+ if (!isCurrent()) return;
+ setState((s) => ({
+ // null = transient failure: keep the prior list, just drop the spinner.
+ // Drop any in-flight/finished deletions so a stale fetch can't resurrect
+ // a row we already removed.
+ threads: (result ?? s.threads).filter(
+ (t) => !pendingDeletesRef.current.has(t.id),
+ ),
+ loading: false,
+ loaded: true,
+ ownerUserId: requestedUserId,
+ ownerClient: requestedClient,
+ }));
+ })
+ .catch(() => {
+ // fetchThreads returns null rather than rejecting; this guards a future
+ // change (or a throw in a state setter) from wedging the spinner.
+ if (isCurrent()) {
+ setState((s) => ({ ...s, loading: false, loaded: true }));
+ }
+ });
+ }, []);
+
+ const remove = useCallback(
+ async (threadId: string) => {
+ const requestedClient = clientRef.current;
+ const requestedUserId = userRef.current;
+ // A client without delete support can't remove anything — no-op rather
+ // than optimistically drop a row that will never be deleted server-side.
+ if (!requestedClient.deleteThread) return { ok: false };
+ // Mark it deleting so a concurrent refresh's commit filters it out, then
+ // optimistically drop the row — but only within the scope we're deleting
+ // under (never mutate a swapped-in scope's list).
+ pendingDeletesRef.current.add(threadId);
+ setState((s) =>
+ s.ownerClient === requestedClient && s.ownerUserId === requestedUserId
+ ? { ...s, threads: s.threads.filter((t) => t.id !== threadId) }
+ : s,
+ );
+ // Normalize a rejecting client to `{ ok: false }` so the rollback below
+ // always runs (the bundled client resolves, but the interface allows any).
+ let res: { ok: boolean };
+ try {
+ res = await requestedClient.deleteThread(threadId);
+ } catch {
+ res = { ok: false };
+ }
+ // On failure, un-mark it and reload to restore the row we optimistically
+ // removed (only if we're still in the same scope). On success it stays
+ // marked — the thread is gone for good and must never resurface.
+ if (!res.ok) {
+ pendingDeletesRef.current.delete(threadId);
+ if (
+ userRef.current === requestedUserId &&
+ clientRef.current === requestedClient
+ ) {
+ refresh();
+ }
+ }
+ return res;
+ },
+ [refresh],
+ );
+
+ // Abort an in-flight fetch on a scope swap (its result is already masked and
+ // the commit guard rejects it; this just frees the network promptly) and on
+ // unmount, so a late `.then` can't act after the panel closed.
+ useEffect(() => {
+ return () => abortRef.current?.abort();
+ }, [userId, client]);
+
+ // Drop the pending-delete ids only on true unmount — NOT on a scope swap. A
+ // remove() awaiting its DELETE can outlive a swap-and-return to the same
+ // scope; clearing on the swap would un-filter that id and let a stale refresh
+ // resurrect the row. Thread ids are server-minted and globally unique, so the
+ // set never false-filters another scope's list, and within one mount it only
+ // grows by the user's own deletions (released here on unmount).
+ useEffect(() => {
+ return () => pendingDeletesRef.current.clear();
+ }, []);
+
+ // Mask synchronously: a list owned by a different (user, client) than the
+ // current props is hidden on the same commit — no one-frame cross-scope leak.
+ const stale = state.ownerUserId !== userId || state.ownerClient !== client;
+ return {
+ threads: stale ? [] : state.threads,
+ loading: stale ? false : state.loading,
+ loaded: stale ? false : state.loaded,
+ refresh,
+ remove,
+ canRemove: typeof client.deleteThread === "function",
+ };
+}
diff --git a/src/assistant/usePanelPrefs.test.ts b/src/assistant/usePanelPrefs.test.ts
new file mode 100644
index 0000000..fe86c85
--- /dev/null
+++ b/src/assistant/usePanelPrefs.test.ts
@@ -0,0 +1,183 @@
+// @vitest-environment jsdom
+import { act, renderHook } from "@testing-library/react";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import {
+ DEFAULT_FONT_SCALE,
+ DEFAULT_PANEL_WIDTH,
+ MAX_FONT_SCALE,
+ MIN_FONT_SCALE,
+ MIN_PANEL_WIDTH,
+ useFontScale,
+ useIsDesktop,
+ usePanelWidth,
+} from "./usePanelPrefs";
+
+const WIDTH_KEY = "assistant.panel.width";
+const FONT_SCALE_KEY = "assistant.panel.fontScale";
+
+// jsdom (default about:blank origin) doesn't provide window.localStorage, so
+// install a fresh in-memory shim per test. vi.stubGlobal mirrors it onto both
+// globalThis and window, matching how the hook reads `window.localStorage`.
+beforeEach(() => {
+ let store: Record = {};
+ vi.stubGlobal("localStorage", {
+ getItem: (k: string) => (k in store ? store[k] : null),
+ setItem: (k: string, v: string) => {
+ store[k] = String(v);
+ },
+ removeItem: (k: string) => {
+ delete store[k];
+ },
+ clear: () => {
+ store = {};
+ },
+ key: (i: number) => Object.keys(store)[i] ?? null,
+ get length() {
+ return Object.keys(store).length;
+ },
+ });
+});
+
+afterEach(() => {
+ vi.unstubAllGlobals();
+});
+
+describe("usePanelWidth", () => {
+ it("defaults to DEFAULT_PANEL_WIDTH when nothing is stored", () => {
+ const { result } = renderHook(() => usePanelWidth());
+ expect(result.current.width).toBe(DEFAULT_PANEL_WIDTH);
+ });
+
+ it("reads a stored width on mount", () => {
+ window.localStorage.setItem(WIDTH_KEY, "520");
+ const { result } = renderHook(() => usePanelWidth());
+ expect(result.current.width).toBe(520);
+ });
+
+ it("falls back to the default for an empty/corrupt stored value", () => {
+ window.localStorage.setItem(WIDTH_KEY, "");
+ const { result } = renderHook(() => usePanelWidth());
+ expect(result.current.width).toBe(DEFAULT_PANEL_WIDTH);
+ });
+
+ it("clamps setWidth to the min bound and persists", () => {
+ const { result } = renderHook(() => usePanelWidth());
+ act(() => result.current.setWidth(10));
+ expect(result.current.width).toBe(MIN_PANEL_WIDTH);
+ expect(window.localStorage.getItem(WIDTH_KEY)).toBe(
+ String(MIN_PANEL_WIDTH),
+ );
+ });
+
+ it("clamps setWidth to the viewport-derived max", () => {
+ const { result } = renderHook(() => usePanelWidth());
+ act(() => result.current.setWidth(100_000));
+ expect(result.current.width).toBe(result.current.maxWidth);
+ });
+
+ it("previewWidth updates the width WITHOUT persisting (no per-tick writes)", () => {
+ const { result } = renderHook(() => usePanelWidth());
+ act(() => result.current.previewWidth(640));
+ expect(result.current.width).toBe(640);
+ expect(window.localStorage.getItem(WIDTH_KEY)).toBeNull();
+ });
+
+ it("restores the explicit width after a transient viewport shrink", () => {
+ const original = window.innerWidth;
+ const setInnerWidth = (w: number) =>
+ Object.defineProperty(window, "innerWidth", {
+ value: w,
+ configurable: true,
+ writable: true,
+ });
+ try {
+ setInnerWidth(1600);
+ const { result } = renderHook(() => usePanelWidth());
+ act(() => result.current.setWidth(800));
+ expect(result.current.width).toBe(800);
+
+ // Shrink the viewport: the display clamps, but the preference is retained.
+ act(() => {
+ setInnerWidth(700);
+ window.dispatchEvent(new Event("resize"));
+ });
+ expect(result.current.width).toBeLessThan(800);
+
+ // Grow it back: the original 800 preference is restored, not the clamp.
+ act(() => {
+ setInnerWidth(1600);
+ window.dispatchEvent(new Event("resize"));
+ });
+ expect(result.current.width).toBe(800);
+ } finally {
+ setInnerWidth(original);
+ }
+ });
+
+ it("nudgeWidth applies a delta, clamps, and persists", () => {
+ window.localStorage.setItem(WIDTH_KEY, "500");
+ const { result } = renderHook(() => usePanelWidth());
+ act(() => result.current.nudgeWidth(24));
+ expect(result.current.width).toBe(524);
+ expect(window.localStorage.getItem(WIDTH_KEY)).toBe("524");
+ });
+});
+
+describe("useFontScale", () => {
+ it("defaults to 1", () => {
+ const { result } = renderHook(() => useFontScale());
+ expect(result.current.scale).toBe(DEFAULT_FONT_SCALE);
+ });
+
+ it("steps on the grid and persists", () => {
+ const { result } = renderHook(() => useFontScale());
+ act(() => result.current.increase());
+ expect(result.current.scale).toBeCloseTo(1.125);
+ expect(Number(window.localStorage.getItem(FONT_SCALE_KEY))).toBeCloseTo(
+ 1.125,
+ );
+ act(() => result.current.decrease());
+ act(() => result.current.decrease());
+ expect(result.current.scale).toBeCloseTo(0.875);
+ });
+
+ it("clamps at the max/min and toggles the can-flags", () => {
+ const { result } = renderHook(() => useFontScale());
+ for (let i = 0; i < 20; i++) act(() => result.current.increase());
+ expect(result.current.scale).toBe(MAX_FONT_SCALE);
+ expect(result.current.canIncrease).toBe(false);
+ for (let i = 0; i < 20; i++) act(() => result.current.decrease());
+ expect(result.current.scale).toBe(MIN_FONT_SCALE);
+ expect(result.current.canDecrease).toBe(false);
+ });
+
+ it("reads a stored scale on mount", () => {
+ window.localStorage.setItem(FONT_SCALE_KEY, "1.25");
+ const { result } = renderHook(() => useFontScale());
+ expect(result.current.scale).toBeCloseTo(1.25);
+ });
+});
+
+describe("useIsDesktop", () => {
+ afterEach(() => {
+ vi.unstubAllGlobals();
+ });
+
+ it("reflects the matchMedia result", () => {
+ vi.stubGlobal("matchMedia", (query: string) => ({
+ matches: true,
+ media: query,
+ addEventListener: () => {},
+ removeEventListener: () => {},
+ }));
+ const { result } = renderHook(() => useIsDesktop());
+ expect(result.current).toBe(true);
+ });
+
+ it("falls back to the desktop default (no throw) when matchMedia is absent", () => {
+ vi.stubGlobal("matchMedia", undefined);
+ expect(() => renderHook(() => useIsDesktop())).not.toThrow();
+ const { result } = renderHook(() => useIsDesktop());
+ expect(result.current).toBe(true);
+ });
+});
diff --git a/src/assistant/usePanelPrefs.ts b/src/assistant/usePanelPrefs.ts
new file mode 100644
index 0000000..bb978e6
--- /dev/null
+++ b/src/assistant/usePanelPrefs.ts
@@ -0,0 +1,212 @@
+/**
+ * Persisted, user-adjustable presentation preferences for the assistant drawer:
+ * its width (drag-to-resize) and a font-size scale. Both survive reloads via
+ * localStorage and are clamped to sane bounds. Kept out of the components so the
+ * SSR-safe persistence and clamping live in one place and stay testable.
+ */
+
+import { useCallback, useEffect, useRef, useState } from "react";
+
+/** Narrowest the drawer may be dragged — below this the chat is unusable. */
+export const MIN_PANEL_WIDTH = 360;
+/** Default drawer width — matches the previous fixed `max-w-md` (28rem). */
+export const DEFAULT_PANEL_WIDTH = 448;
+/** Widest the drawer may occupy, as a fraction of the viewport. */
+const MAX_PANEL_WIDTH_FRACTION = 0.95;
+
+/** Font-size scale bounds and step for the A−/A+ control. 1 = the design
+ * default; the panel applies this as a CSS `zoom` on the transcript so the
+ * whole conversation scales uniformly. */
+export const MIN_FONT_SCALE = 0.875;
+export const MAX_FONT_SCALE = 1.5;
+export const DEFAULT_FONT_SCALE = 1;
+const FONT_SCALE_STEP = 0.125;
+
+const WIDTH_KEY = "assistant.panel.width";
+const FONT_SCALE_KEY = "assistant.panel.fontScale";
+
+function readNumber(key: string): number | null {
+ try {
+ const raw = window.localStorage.getItem(key);
+ // Empty/whitespace → fall back to the default. Without this, `Number("")`
+ // is 0 (finite), which would clamp to MIN instead of using the default.
+ if (raw == null || raw.trim() === "") return null;
+ const n = Number(raw);
+ return Number.isFinite(n) ? n : null;
+ } catch {
+ return null;
+ }
+}
+
+function writeNumber(key: string, value: number): void {
+ try {
+ window.localStorage.setItem(key, String(value));
+ } catch {
+ // Storage can be unavailable (private mode, quota) — preferences are a
+ // convenience, never a hard dependency, so a failed write is silent.
+ }
+}
+
+/** The largest width the drawer may take on the current viewport. Returns a
+ * large FINITE fallback with no window (the app is a client SPA, so this is
+ * belt-and-suspenders) — Infinity would be an invalid `aria-valuemax`. */
+function maxPanelWidth(): number {
+ if (typeof window === "undefined") return 9999;
+ return Math.round(window.innerWidth * MAX_PANEL_WIDTH_FRACTION);
+}
+
+function clampWidth(value: number): number {
+ return Math.min(
+ Math.max(Math.round(value), MIN_PANEL_WIDTH),
+ maxPanelWidth(),
+ );
+}
+
+function clampScale(value: number): number {
+ // Round to the step grid so repeated +/- never accumulates float drift.
+ const stepped = Math.round(value / FONT_SCALE_STEP) * FONT_SCALE_STEP;
+ return Math.min(Math.max(stepped, MIN_FONT_SCALE), MAX_FONT_SCALE);
+}
+
+export interface PanelWidth {
+ /** Current width in px. Apply as an inline `width` only on desktop. */
+ width: number;
+ /** Current max allowed width in px (viewport-derived; updates on resize).
+ * Exposed for an accurate `aria-valuemax` on the resize control. */
+ maxWidth: number;
+ /** Set an absolute width (clamped + persisted). Use on drag end / discrete
+ * changes — NOT on every drag tick. */
+ setWidth: (next: number) => void;
+ /** Set an absolute width (clamped, NOT persisted). Use during a live drag so
+ * the panel tracks the pointer without thrashing localStorage every tick. */
+ previewWidth: (next: number) => void;
+ /** Nudge by a delta (keyboard resize); clamped + persisted. */
+ nudgeWidth: (deltaPx: number) => void;
+}
+
+/**
+ * The drawer's persisted width. Initialized to the default so first render is
+ * stable; the stored value is read in an effect and applied after mount.
+ * Re-clamps on viewport resize so a stored width can never exceed the current
+ * window, and tracks the live max for the resize control's ARIA bounds.
+ */
+export function usePanelWidth(): PanelWidth {
+ const [width, setWidthState] = useState(DEFAULT_PANEL_WIDTH);
+ const [maxWidth, setMaxWidth] = useState(() => maxPanelWidth());
+ // The user's explicit width preference. The rendered `width` is this clamped
+ // to the current viewport; a transient shrink clamps only the display, so when
+ // the viewport grows back the preference is restored (rather than the shrink
+ // permanently overwriting the choice).
+ const desiredRef = useRef(DEFAULT_PANEL_WIDTH);
+
+ useEffect(() => {
+ setMaxWidth(maxPanelWidth());
+ const stored = readNumber(WIDTH_KEY);
+ if (stored != null) {
+ desiredRef.current = Math.max(Math.round(stored), MIN_PANEL_WIDTH);
+ setWidthState(clampWidth(stored));
+ }
+ }, []);
+
+ // On viewport change, recompute the display from the DESIRED preference (not
+ // the possibly-already-clamped current width) so growing the window restores
+ // it. Keep the reported max current too, for an accurate aria-valuemax.
+ useEffect(() => {
+ const onResize = () => {
+ setMaxWidth(maxPanelWidth());
+ setWidthState(clampWidth(desiredRef.current));
+ };
+ window.addEventListener("resize", onResize);
+ return () => window.removeEventListener("resize", onResize);
+ }, []);
+
+ const setWidth = useCallback((next: number) => {
+ const clamped = clampWidth(next);
+ desiredRef.current = clamped;
+ writeNumber(WIDTH_KEY, clamped);
+ setWidthState(clamped);
+ }, []);
+
+ const previewWidth = useCallback((next: number) => {
+ // Live drag: display only — no persist, no change to the desired preference
+ // (drag end calls setWidth to commit).
+ setWidthState(clampWidth(next));
+ }, []);
+
+ const nudgeWidth = useCallback((deltaPx: number) => {
+ const clamped = clampWidth(desiredRef.current + deltaPx);
+ desiredRef.current = clamped;
+ writeNumber(WIDTH_KEY, clamped);
+ setWidthState(clamped);
+ }, []);
+
+ return { width, maxWidth, setWidth, previewWidth, nudgeWidth };
+}
+
+export interface FontScale {
+ scale: number;
+ increase: () => void;
+ decrease: () => void;
+ canIncrease: boolean;
+ canDecrease: boolean;
+}
+
+/** The panel's persisted font-size scale, with bounded A−/A+ controls. */
+export function useFontScale(): FontScale {
+ const [scale, setScaleState] = useState(DEFAULT_FONT_SCALE);
+
+ useEffect(() => {
+ const stored = readNumber(FONT_SCALE_KEY);
+ if (stored != null) setScaleState(clampScale(stored));
+ }, []);
+
+ const step = useCallback((delta: number) => {
+ setScaleState((prev) => {
+ const clamped = clampScale(prev + delta);
+ writeNumber(FONT_SCALE_KEY, clamped);
+ return clamped;
+ });
+ }, []);
+
+ return {
+ scale,
+ increase: () => step(FONT_SCALE_STEP),
+ decrease: () => step(-FONT_SCALE_STEP),
+ canIncrease: scale < MAX_FONT_SCALE - 1e-9,
+ canDecrease: scale > MIN_FONT_SCALE + 1e-9,
+ };
+}
+
+/**
+ * Whether the viewport is at the `md` breakpoint or wider. The drawer is a
+ * full-screen sheet below `md` (no resize), and a width-constrained side panel
+ * at/above it. Defaults to `true` for SSR/first paint — the dialog only renders
+ * after a client interaction, by which point the effect has corrected it.
+ */
+export function useIsDesktop(): boolean {
+ // Read matchMedia synchronously on first render (client SPA) so the drawer
+ // never first-paints at desktop width on a mobile viewport; the default only
+ // applies in the no-window/no-matchMedia case.
+ const [isDesktop, setIsDesktop] = useState(() =>
+ typeof window !== "undefined" && typeof window.matchMedia === "function"
+ ? window.matchMedia("(min-width: 768px)").matches
+ : true,
+ );
+ useEffect(() => {
+ // Mirror the initializer guard: a runtime without matchMedia (jsdom, some
+ // embedded webviews) keeps the desktop default rather than throwing — the
+ // dock is in the always-mounted shell, so a throw here would break it.
+ if (
+ typeof window === "undefined" ||
+ typeof window.matchMedia !== "function"
+ ) {
+ return;
+ }
+ const mq = window.matchMedia("(min-width: 768px)");
+ const update = () => setIsDesktop(mq.matches);
+ update();
+ mq.addEventListener("change", update);
+ return () => mq.removeEventListener("change", update);
+ }, []);
+ return isDesktop;
+}
diff --git a/src/test-setup.ts b/src/test-setup.ts
new file mode 100644
index 0000000..6dde2c2
--- /dev/null
+++ b/src/test-setup.ts
@@ -0,0 +1,11 @@
+import { cleanup } from "@testing-library/react"
+import { afterEach } from "vitest"
+
+// Unmount React trees rendered by @testing-library between tests. Without a
+// global hook (this repo doesn't run with `globals: true`), rendered DOM
+// accumulates across `it` blocks and role/text queries match multiple copies.
+// Guarded on `document` so the hook is a harmless no-op in node-environment
+// (non-DOM) test files, which share this setup.
+afterEach(() => {
+ if (typeof document !== "undefined") cleanup()
+})
diff --git a/src/web-react/chat-composer.test.tsx b/src/web-react/chat-composer.test.tsx
new file mode 100644
index 0000000..d0c602e
--- /dev/null
+++ b/src/web-react/chat-composer.test.tsx
@@ -0,0 +1,137 @@
+// @vitest-environment jsdom
+import { afterEach, describe, expect, it, vi } from 'vitest'
+import { cleanup, fireEvent, render, screen } from '@testing-library/react'
+
+import { ChatComposer } from './chat-composer'
+import { ModelPicker } from './controls'
+import type { CatalogModel } from '../runtime/model-catalog'
+
+afterEach(cleanup)
+
+function type(el: HTMLElement, value: string) {
+ fireEvent.change(el, { target: { value } })
+}
+
+describe('ChatComposer', () => {
+ it('sends the trimmed message on Enter and clears the input (uncontrolled)', () => {
+ const onSend = vi.fn()
+ render()
+ const input = screen.getByLabelText('Message input') as HTMLTextAreaElement
+
+ type(input, ' hello world ')
+ fireEvent.keyDown(input, { key: 'Enter' })
+
+ expect(onSend).toHaveBeenCalledExactlyOnceWith('hello world')
+ expect(input.value).toBe('')
+ })
+
+ it('does not send on Shift+Enter (newline) or while composing (IME)', () => {
+ const onSend = vi.fn()
+ render()
+ const input = screen.getByLabelText('Message input')
+
+ type(input, 'draft')
+ fireEvent.keyDown(input, { key: 'Enter', shiftKey: true })
+ // isComposing is read off nativeEvent; simulate an active IME candidate.
+ fireEvent.keyDown(input, { key: 'Enter', isComposing: true })
+
+ expect(onSend).not.toHaveBeenCalled()
+ })
+
+ it('disables Send when empty and enables it once there is text', () => {
+ render()
+ const send = screen.getByLabelText('Send message') as HTMLButtonElement
+ expect(send.disabled).toBe(true)
+
+ type(screen.getByLabelText('Message input'), 'x')
+ expect(send.disabled).toBe(false)
+ })
+
+ it('swaps Send for Stop while streaming and calls onCancel', () => {
+ const onCancel = vi.fn()
+ render()
+
+ expect(screen.queryByLabelText('Send message')).toBeNull()
+ fireEvent.click(screen.getByLabelText('Stop response'))
+ expect(onCancel).toHaveBeenCalledOnce()
+ })
+
+ it('is controllable via value/onValueChange and does not self-clear', () => {
+ const onValueChange = vi.fn()
+ const onSend = vi.fn()
+ const { rerender } = render(
+ ,
+ )
+ const input = screen.getByLabelText('Message input') as HTMLTextAreaElement
+ expect(input.value).toBe('hi')
+
+ fireEvent.keyDown(input, { key: 'Enter' })
+ expect(onSend).toHaveBeenCalledWith('hi')
+ // The composer asks the host to clear; it does NOT mutate a controlled value
+ // itself, so the displayed value only changes when the host re-renders.
+ expect(onValueChange).toHaveBeenLastCalledWith('')
+ expect(input.value).toBe('hi')
+ rerender()
+ expect(input.value).toBe('')
+ })
+
+ it('hides attachment affordances unless onAttach is provided', () => {
+ const { rerender } = render()
+ expect(screen.queryByLabelText('Attach files')).toBeNull()
+
+ rerender()
+ expect(screen.getByLabelText('Attach files')).toBeTruthy()
+ })
+
+ it('renders pending-file chips and removes them', () => {
+ const onRemoveFile = vi.fn()
+ render(
+ ,
+ )
+ expect(screen.getByText('data.csv')).toBeTruthy()
+ fireEvent.click(screen.getByLabelText('Remove data.csv'))
+ expect(onRemoveFile).toHaveBeenCalledExactlyOnceWith('f1')
+ })
+
+ it('focuses the input on Cmd/Ctrl+L', () => {
+ render()
+ const input = screen.getByLabelText('Message input')
+ expect(document.activeElement).not.toBe(input)
+ fireEvent.keyDown(document, { key: 'l', metaKey: true })
+ expect(document.activeElement).toBe(input)
+ })
+})
+
+function model(partial: Partial & Pick): CatalogModel {
+ return { supportsTools: true, supportsReasoning: false, featured: false, ...partial }
+}
+
+describe('ModelPicker priorityGroup', () => {
+ it('pins a labeled section above Recommended and does not duplicate the model below', () => {
+ const models = [
+ model({ id: 'tuner/ft-1', name: 'My Fine-Tune', provider: 'tuner' }),
+ model({ id: 'anthropic/opus', name: 'Claude Opus', provider: 'anthropic', featured: true }),
+ ]
+ render(
+ m.provider === 'tuner' }}
+ />,
+ )
+ // Open the popover.
+ fireEvent.click(screen.getByRole('button'))
+
+ expect(screen.getByText('Your Fine-Tuned Models')).toBeTruthy()
+ expect(screen.getByText('Recommended')).toBeTruthy()
+ // The fine-tuned model appears exactly once (in the priority section, not
+ // also under a "tuner" provider group).
+ expect(screen.getAllByText('My Fine-Tune')).toHaveLength(1)
+ })
+})
diff --git a/src/web-react/chat-composer.tsx b/src/web-react/chat-composer.tsx
new file mode 100644
index 0000000..34d91e2
--- /dev/null
+++ b/src/web-react/chat-composer.tsx
@@ -0,0 +1,406 @@
+/**
+ * ChatComposer — the shared message input every agent app used to hand-roll:
+ * an auto-resizing textarea (Enter sends, Shift+Enter inserts a newline), an
+ * opt-in attach + drag-and-drop surface with pending-file chips, a streaming
+ * Stop/Send toggle, a slot for inline controls (model picker, reasoning
+ * effort), and a Cmd/Ctrl+L focus shortcut.
+ *
+ * Styling contract matches the rest of `web-react`: Tailwind over the shared
+ * design tokens (`bg-card`, `border-border`, `text-foreground`, `bg-primary`, …)
+ * and inline-SVG glyphs. It defines NO `--chat-*` / `--brand-*` custom
+ * properties, so it themes correctly in any shell that provides the standard
+ * tokens — the input renders on-palette instead of collapsing to unstyled
+ * fallbacks when a host hasn't defined a private chat-token set.
+ */
+
+import {
+ useCallback,
+ useEffect,
+ useRef,
+ useState,
+ type ChangeEvent,
+ type DragEvent,
+ type KeyboardEvent,
+ type ReactNode,
+} from 'react'
+
+// ── glyphs (no icon-library dependency) ───────────────────────────────────
+
+function SendGlyph({ className }: { className?: string }) {
+ return (
+
+ )
+}
+
+function StopGlyph({ className }: { className?: string }) {
+ return (
+
+ )
+}
+
+function PaperclipGlyph({ className }: { className?: string }) {
+ return (
+
+ )
+}
+
+function FolderGlyph({ className }: { className?: string }) {
+ return (
+
+ )
+}
+
+function CloseGlyph({ className }: { className?: string }) {
+ return (
+
+ )
+}
+
+function UploadGlyph({ className }: { className?: string }) {
+ return (
+
+ )
+}
+
+// ── component ──────────────────────────────────────────────────────────────
+
+export interface ComposerFile {
+ id: string
+ name: string
+ size?: number
+ kind: 'file' | 'folder'
+ /** Number of files inside, for a folder chip. */
+ fileCount?: number
+ status: 'pending' | 'uploading' | 'ready' | 'error'
+}
+
+export interface ChatComposerProps {
+ /** Send the trimmed, non-empty message. Attached files travel separately via
+ * `onAttach` + `pendingFiles` (the host consumes and clears them on send). */
+ onSend: (message: string) => void
+ /** Stop the in-flight turn; shown in place of Send while `isStreaming`. */
+ onCancel?: () => void
+ isStreaming?: boolean
+ /** Block input + send (e.g. while restoring). Distinct from `isStreaming`,
+ * which keeps the textarea editable so the next turn can be composed. */
+ disabled?: boolean
+ placeholder?: string
+
+ /** Controlled value. Omit for self-managed internal state (cleared on send). */
+ value?: string
+ onValueChange?: (value: string) => void
+ /** Initial text in uncontrolled mode; ignored when `value` is provided. */
+ initialValue?: string
+
+ /** Inline controls (e.g. `` + `` or
+ * ``). Rendered in a row above the input by default. */
+ controls?: ReactNode
+ controlsPlacement?: 'above' | 'footer'
+
+ /** Attachments are opt-in: pass `onAttach` to show the attach button, accept
+ * drag-and-drop onto the input, and render `pendingFiles` chips. */
+ onAttach?: (files: FileList) => void
+ onAttachFolder?: (files: FileList) => void
+ pendingFiles?: ComposerFile[]
+ onRemoveFile?: (id: string) => void
+ accept?: string
+ dropTitle?: string
+ dropDescription?: string
+
+ /** Cmd/Ctrl+L focuses the input and shows the hint. Default true. */
+ focusShortcut?: boolean
+ /** Send button label. Default "Send". */
+ sendLabel?: string
+ className?: string
+}
+
+const MAX_HEIGHT = 168
+
+export function ChatComposer({
+ onSend,
+ onCancel,
+ isStreaming = false,
+ disabled = false,
+ placeholder = 'Message the agent…',
+ value,
+ onValueChange,
+ initialValue,
+ controls,
+ controlsPlacement = 'above',
+ onAttach,
+ onAttachFolder,
+ pendingFiles = [],
+ onRemoveFile,
+ accept,
+ dropTitle = 'Drop files to add context',
+ dropDescription = 'They attach to your next message.',
+ focusShortcut = true,
+ sendLabel = 'Send',
+ className,
+}: ChatComposerProps) {
+ const isControlled = value !== undefined
+ const [internal, setInternal] = useState(initialValue ?? '')
+ const text = isControlled ? value : internal
+
+ const textareaRef = useRef(null)
+ const fileInputRef = useRef(null)
+ const folderInputRef = useRef(null)
+ const [dragOver, setDragOver] = useState(false)
+ const dragDepth = useRef(0)
+
+ const setText = useCallback(
+ (next: string) => {
+ if (!isControlled) setInternal(next)
+ onValueChange?.(next)
+ },
+ [isControlled, onValueChange],
+ )
+
+ // Keep the textarea height in sync with the content for BOTH typed and
+ // external (controlled) value changes — one effect covers both paths.
+ useEffect(() => {
+ const el = textareaRef.current
+ if (!el) return
+ el.style.height = 'auto'
+ el.style.height = `${Math.min(el.scrollHeight, MAX_HEIGHT)}px`
+ }, [text])
+
+ // Cmd/Ctrl+L focuses the composer from anywhere — the shortcut the hint
+ // advertises. Scoped to when the shortcut is enabled and not disabled.
+ useEffect(() => {
+ if (!focusShortcut || disabled) return
+ function onKeyDown(e: globalThis.KeyboardEvent) {
+ if ((e.metaKey || e.ctrlKey) && e.key.toLowerCase() === 'l') {
+ e.preventDefault()
+ textareaRef.current?.focus()
+ }
+ }
+ document.addEventListener('keydown', onKeyDown)
+ return () => document.removeEventListener('keydown', onKeyDown)
+ }, [focusShortcut, disabled])
+
+ const canSend = text.trim().length > 0 && !isStreaming && !disabled
+
+ const send = useCallback(() => {
+ const trimmed = text.trim()
+ if (!trimmed || isStreaming || disabled) return
+ onSend(trimmed)
+ setText('')
+ }, [text, isStreaming, disabled, onSend, setText])
+
+ const handleKeyDown = (e: KeyboardEvent) => {
+ // Respect IME composition — Enter commits the candidate, it doesn't send.
+ if (e.nativeEvent.isComposing) return
+ if (e.key === 'Enter' && !e.shiftKey) {
+ e.preventDefault()
+ send()
+ }
+ }
+
+ const handleFileChange = (e: ChangeEvent) => {
+ if (e.target.files?.length) onAttach?.(e.target.files)
+ e.target.value = ''
+ }
+
+ const handleFolderChange = (e: ChangeEvent) => {
+ if (e.target.files?.length) (onAttachFolder ?? onAttach)?.(e.target.files)
+ e.target.value = ''
+ }
+
+ const handleDragEnter = useCallback((e: DragEvent) => {
+ e.preventDefault()
+ e.stopPropagation()
+ dragDepth.current++
+ if (e.dataTransfer?.types.includes('Files')) setDragOver(true)
+ }, [])
+
+ const handleDragLeave = useCallback((e: DragEvent) => {
+ e.preventDefault()
+ e.stopPropagation()
+ dragDepth.current--
+ if (dragDepth.current <= 0) {
+ dragDepth.current = 0
+ setDragOver(false)
+ }
+ }, [])
+
+ const handleDragOver = useCallback((e: DragEvent) => {
+ e.preventDefault()
+ e.stopPropagation()
+ if (e.dataTransfer) e.dataTransfer.dropEffect = 'copy'
+ }, [])
+
+ const handleDrop = useCallback(
+ (e: DragEvent) => {
+ e.preventDefault()
+ e.stopPropagation()
+ dragDepth.current = 0
+ setDragOver(false)
+ const files = e.dataTransfer?.files
+ if (files?.length) onAttach?.(files)
+ },
+ [onAttach],
+ )
+
+ const folderChips = pendingFiles.filter((f) => f.kind === 'folder')
+ const fileChips = pendingFiles.filter((f) => f.kind !== 'folder')
+ const showFooter = controls != null && controlsPlacement === 'footer'
+ const showAbove = controls != null && controlsPlacement === 'above'
+
+ return (
+