From af42b84753fb93d0d00f68c32319c805a08858a3 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Mon, 20 Apr 2026 15:12:03 -0400 Subject: [PATCH] feat(server): target-bearing runtime resolution EvaluationRequest now accepts optional target_type + target_id. When both are set, the server resolves the target row via (tenant_id, target_type, external_id) and merges target_controls into the effective set. Direct agent controls and policy-derived controls mask any target-level setting; target_controls with enabled=true contribute otherwise. Missing target returns 404. No-target requests remain unchanged. --- models/src/agent_control_models/evaluation.py | 36 ++- models/tests/test_evaluation.py | 57 ++++ sdks/python/tests/test_evaluation.py | 2 + .../generated/funcs/evaluation-evaluate.ts | 8 + .../generated/models/evaluation-request.ts | 20 ++ .../src/generated/sdk/evaluation.ts | 8 + .../tests/evaluation-request.test.ts | 49 ++++ .../endpoints/evaluation.py | 39 ++- .../agent_control_server/services/controls.py | 53 +++- .../test_evaluation_target_resolution.py | 276 ++++++++++++++++++ 10 files changed, 541 insertions(+), 7 deletions(-) create mode 100644 models/tests/test_evaluation.py create mode 100644 sdks/typescript/tests/evaluation-request.test.ts create mode 100644 server/tests/test_evaluation_target_resolution.py diff --git a/models/src/agent_control_models/evaluation.py b/models/src/agent_control_models/evaluation.py index 458c91a5..a91d4b70 100644 --- a/models/src/agent_control_models/evaluation.py +++ b/models/src/agent_control_models/evaluation.py @@ -1,7 +1,7 @@ """Evaluation-related models.""" from typing import Literal -from pydantic import Field, field_validator +from pydantic import Field, field_validator, model_validator from .agent import AGENT_NAME_MIN_LENGTH, AGENT_NAME_PATTERN, Step, normalize_agent_name from .base import BaseModel @@ -19,6 +19,12 @@ class EvaluationRequest(BaseModel): agent_name: Unique identifier of the agent making the request step: Step payload for evaluation stage: 'pre' (before execution) or 'post' (after execution) + target_type: Optional opaque target kind (e.g. 'environment') for + target-bearing requests. When supplied with ``target_id``, the + server merges any controls attached to that target into the + effective set. Omit for the agent-only path. + target_id: Caller-supplied external identifier of the target, paired + with ``target_type``. """ agent_name: str = Field( ..., @@ -32,6 +38,22 @@ class EvaluationRequest(BaseModel): stage: Literal["pre", "post"] = Field( ..., description="Evaluation stage: 'pre' or 'post'" ) + target_type: str | None = Field( + default=None, + max_length=64, + description=( + "Optional target kind for target-bearing requests " + "(e.g. 'environment'). Must be provided together with target_id." + ), + ) + target_id: str | None = Field( + default=None, + max_length=255, + description=( + "Optional external identifier of the target. Must be provided " + "together with target_type." + ), + ) model_config = { "json_schema_extra": { @@ -87,6 +109,18 @@ class EvaluationRequest(BaseModel): def validate_and_normalize_agent_name(cls, value: str) -> str: return normalize_agent_name(str(value)) + @model_validator(mode="after") + def validate_target_fields_paired(self) -> "EvaluationRequest": + """target_type and target_id must be supplied together or not at all.""" + has_type = self.target_type is not None + has_id = self.target_id is not None + if has_type != has_id: + raise ValueError( + "target_type and target_id must be provided together; " + "pass both or omit both." + ) + return self + class EvaluationResponse(BaseModel): """ diff --git a/models/tests/test_evaluation.py b/models/tests/test_evaluation.py new file mode 100644 index 00000000..63c5bc10 --- /dev/null +++ b/models/tests/test_evaluation.py @@ -0,0 +1,57 @@ +"""Unit tests for EvaluationRequest target field pairing semantics.""" + +from __future__ import annotations + +import pytest +from pydantic import ValidationError + +from agent_control_models import EvaluationRequest +from agent_control_models.agent import Step + + +def _step() -> Step: + return Step(type="llm", name="chat", input="hello", output=None) + + +def test_both_target_fields_unset_is_valid() -> None: + req = EvaluationRequest( + agent_name="test-agent", + step=_step(), + stage="pre", + ) + assert req.target_type is None + assert req.target_id is None + + +def test_both_target_fields_set_is_valid() -> None: + req = EvaluationRequest( + agent_name="test-agent", + step=_step(), + stage="pre", + target_type="environment", + target_id="env-prod-123", + ) + assert req.target_type == "environment" + assert req.target_id == "env-prod-123" + + +def test_only_target_type_set_raises() -> None: + with pytest.raises(ValidationError) as excinfo: + EvaluationRequest( + agent_name="test-agent", + step=_step(), + stage="pre", + target_type="environment", + ) + assert "target_type and target_id must be provided together" in str(excinfo.value) + + +def test_only_target_id_set_raises() -> None: + with pytest.raises(ValidationError) as excinfo: + EvaluationRequest( + agent_name="test-agent", + step=_step(), + stage="pre", + target_id="env-prod-123", + ) + assert "target_type and target_id must be provided together" in str(excinfo.value) diff --git a/sdks/python/tests/test_evaluation.py b/sdks/python/tests/test_evaluation.py index 4c7a647b..7f770da4 100644 --- a/sdks/python/tests/test_evaluation.py +++ b/sdks/python/tests/test_evaluation.py @@ -65,6 +65,8 @@ def json(self) -> dict[str, object]: "context": None, }, "stage": "pre", + "target_type": None, + "target_id": None, }, headers=None, ) diff --git a/sdks/typescript/src/generated/funcs/evaluation-evaluate.ts b/sdks/typescript/src/generated/funcs/evaluation-evaluate.ts index 2c66e38e..ebc410a8 100644 --- a/sdks/typescript/src/generated/funcs/evaluation-evaluate.ts +++ b/sdks/typescript/src/generated/funcs/evaluation-evaluate.ts @@ -36,6 +36,14 @@ import { Result } from "../types/fp.js"; * ``EvaluationResponse`` and does not build or ingest observability events * on the server; SDKs reconstruct and emit those events separately through * the observability ingestion endpoint. + * + * Target-bearing requests (``target_type`` and ``target_id`` both set) merge + * controls attached to that target into the effective set, with the same + * deduplication/precedence rules used by the runtime resolver: when the + * same control is attached via both the agent/policy path and the target + * path, the agent/policy attachment wins. The request body carries the + * caller-supplied external target identifier; the server resolves it to + * the internal target row via the tenant context. */ export function evaluationEvaluate( client: AgentControlSDKCore, diff --git a/sdks/typescript/src/generated/models/evaluation-request.ts b/sdks/typescript/src/generated/models/evaluation-request.ts index 231d4c94..d301b8b7 100644 --- a/sdks/typescript/src/generated/models/evaluation-request.ts +++ b/sdks/typescript/src/generated/models/evaluation-request.ts @@ -31,6 +31,12 @@ export type Stage = ClosedEnum; * agent_name: Unique identifier of the agent making the request * step: Step payload for evaluation * stage: 'pre' (before execution) or 'post' (after execution) + * target_type: Optional opaque target kind (e.g. 'environment') for + * target-bearing requests. When supplied with ``target_id``, the + * server merges any controls attached to that target into the + * effective set. Omit for the agent-only path. + * target_id: Caller-supplied external identifier of the target, paired + * with ``target_type``. */ export type EvaluationRequest = { /** @@ -45,6 +51,14 @@ export type EvaluationRequest = { * Runtime payload for an agent step invocation. */ step: Step; + /** + * Optional external identifier of the target. Must be provided together with target_type. + */ + targetId?: string | null | undefined; + /** + * Optional target kind for target-bearing requests (e.g. 'environment'). Must be provided together with target_id. + */ + targetType?: string | null | undefined; }; /** @internal */ @@ -55,6 +69,8 @@ export type EvaluationRequest$Outbound = { agent_name: string; stage: string; step: Step$Outbound; + target_id?: string | null | undefined; + target_type?: string | null | undefined; }; /** @internal */ @@ -66,10 +82,14 @@ export const EvaluationRequest$outboundSchema: z.ZodMiniType< agentName: z.string(), stage: Stage$outboundSchema, step: Step$outboundSchema, + targetId: z.optional(z.nullable(z.string())), + targetType: z.optional(z.nullable(z.string())), }), z.transform((v) => { return remap$(v, { agentName: "agent_name", + targetId: "target_id", + targetType: "target_type", }); }), ); diff --git a/sdks/typescript/src/generated/sdk/evaluation.ts b/sdks/typescript/src/generated/sdk/evaluation.ts index b2887e1b..db43412d 100644 --- a/sdks/typescript/src/generated/sdk/evaluation.ts +++ b/sdks/typescript/src/generated/sdk/evaluation.ts @@ -18,6 +18,14 @@ export class Evaluation extends ClientSDK { * ``EvaluationResponse`` and does not build or ingest observability events * on the server; SDKs reconstruct and emit those events separately through * the observability ingestion endpoint. + * + * Target-bearing requests (``target_type`` and ``target_id`` both set) merge + * controls attached to that target into the effective set, with the same + * deduplication/precedence rules used by the runtime resolver: when the + * same control is attached via both the agent/policy path and the target + * path, the agent/policy attachment wins. The request body carries the + * caller-supplied external target identifier; the server resolves it to + * the internal target row via the tenant context. */ async evaluate( request: models.EvaluationRequest, diff --git a/sdks/typescript/tests/evaluation-request.test.ts b/sdks/typescript/tests/evaluation-request.test.ts new file mode 100644 index 00000000..e504b94e --- /dev/null +++ b/sdks/typescript/tests/evaluation-request.test.ts @@ -0,0 +1,49 @@ +import { describe, expect, it } from "vitest"; + +import { evaluationRequestToJSON } from "../src/generated/models/evaluation-request"; +import type { EvaluationRequest } from "../src/generated/models/evaluation-request"; + +describe("EvaluationRequest serialization", () => { + const baseRequest: EvaluationRequest = { + agentName: "test-agent-01", + stage: "pre", + step: { + type: "llm", + name: "chat", + input: "hello", + }, + }; + + it("omits target fields from the wire payload when unset", () => { + const wire = JSON.parse(evaluationRequestToJSON(baseRequest)) as Record; + expect(wire.agent_name).toBe("test-agent-01"); + expect("target_type" in wire).toBe(false); + expect("target_id" in wire).toBe(false); + }); + + it("forwards targetType and targetId as snake_case on the wire", () => { + const request: EvaluationRequest = { + ...baseRequest, + targetType: "environment", + targetId: "env-prod-123", + }; + + const wire = JSON.parse(evaluationRequestToJSON(request)) as Record; + expect(wire.target_type).toBe("environment"); + expect(wire.target_id).toBe("env-prod-123"); + expect("targetType" in wire).toBe(false); + expect("targetId" in wire).toBe(false); + }); + + it("accepts null for target fields without failing", () => { + const request: EvaluationRequest = { + ...baseRequest, + targetType: null, + targetId: null, + }; + + const wire = JSON.parse(evaluationRequestToJSON(request)) as Record; + expect(wire.target_type).toBeNull(); + expect(wire.target_id).toBeNull(); + }); +}); diff --git a/server/src/agent_control_server/endpoints/evaluation.py b/server/src/agent_control_server/endpoints/evaluation.py index 31df6fea..2d1f62ba 100644 --- a/server/src/agent_control_server/endpoints/evaluation.py +++ b/server/src/agent_control_server/endpoints/evaluation.py @@ -18,8 +18,9 @@ from ..db import get_async_db from ..errors import APIValidationError, NotFoundError from ..logging_utils import get_logger -from ..models import Agent +from ..models import Agent, Target from ..services.controls import list_runtime_controls_for_agent +from ..tenancy import get_tenant_id router = APIRouter(prefix="/evaluation", tags=["evaluation"]) @@ -126,6 +127,7 @@ def _sanitize_evaluation_response(response: EvaluationResponse) -> EvaluationRes async def evaluate( request: EvaluationRequest, client: RequireAPIKey, + tenant_id: str = Depends(get_tenant_id), db: AsyncSession = Depends(get_async_db), ) -> EvaluationResponse: """Analyze content for safety and control violations. @@ -134,6 +136,14 @@ async def evaluate( ``EvaluationResponse`` and does not build or ingest observability events on the server; SDKs reconstruct and emit those events separately through the observability ingestion endpoint. + + Target-bearing requests (``target_type`` and ``target_id`` both set) merge + controls attached to that target into the effective set, with the same + deduplication/precedence rules used by the runtime resolver: when the + same control is attached via both the agent/policy path and the target + path, the agent/policy attachment wins. The request body carries the + caller-supplied external target identifier; the server resolves it to + the internal target row via the tenant context. """ del client # Authentication is still required by dependency injection. @@ -150,10 +160,37 @@ async def evaluate( hint="Register the agent via initAgent before evaluating.", ) + resolved_target_id: int | None = None + if request.target_type is not None and request.target_id is not None: + target_result = await db.execute( + select(Target.id).where( + Target.tenant_id == tenant_id, + Target.target_type == request.target_type, + Target.external_id == request.target_id, + ) + ) + resolved_target_id = target_result.scalar_one_or_none() + if resolved_target_id is None: + raise NotFoundError( + error_code=ErrorCode.TARGET_NOT_FOUND, + detail=( + f"Target (type='{request.target_type}', " + f"id='{request.target_id}') not found in this tenant" + ), + resource="Target", + resource_id=request.target_id, + hint=( + "Create the target via POST /api/v1/targets before sending " + "target-bearing evaluation requests, or retry without " + "target_type / target_id for the OSS path." + ), + ) + runtime_controls = await list_runtime_controls_for_agent( request.agent_name, db, allow_invalid_step_name_regex=True, + target_id=resolved_target_id, ) engine_controls = [ControlAdapter(c.id, c.name, c.control) for c in runtime_controls] diff --git a/server/src/agent_control_server/services/controls.py b/server/src/agent_control_server/services/controls.py index 7042972b..4c2329c4 100644 --- a/server/src/agent_control_server/services/controls.py +++ b/server/src/agent_control_server/services/controls.py @@ -17,7 +17,13 @@ from ..errors import APIValidationError from ..logging_utils import get_logger -from ..models import Control, agent_controls, agent_policies, policy_controls +from ..models import ( + Control, + TargetControl, + agent_controls, + agent_policies, + policy_controls, +) from .control_definitions import ( parse_control_definition_or_api_error, parse_runtime_control_definition_or_api_error, @@ -41,8 +47,25 @@ class RuntimeControl: async def _list_db_controls_for_agent( agent_name: str, db: AsyncSession, + *, + target_id: int | None = None, ) -> Sequence[Control]: - """Return DB Control rows for the controls associated with an agent.""" + """Return DB Control rows for the controls associated with an agent. + + When ``target_id`` is None, returns the classic set: direct agent + controls plus policy-derived controls. When provided, also merges in + controls attached to the given target. Resolution rules: + + - A control appears in the effective set if it is attached via + ``agent_controls`` or via ``policy_controls`` through an assigned + policy, regardless of target state. + - A control attached only via ``target_controls`` contributes to the + effective set when ``target_controls.enabled`` is true. + - When the same control appears from multiple sources, the UNION + deduplicates by ``control_id``; the agent-side row effectively + masks the target-side attachment so agent-level attachment takes + precedence over target-level state. + """ policy_control_ids = ( select(policy_controls.c.control_id.label("control_id")) .select_from( @@ -55,7 +78,19 @@ async def _list_db_controls_for_agent( direct_control_ids = select(agent_controls.c.control_id.label("control_id")).where( agent_controls.c.agent_name == agent_name ) - control_ids_subquery = union(policy_control_ids, direct_control_ids).subquery() + + if target_id is None: + control_ids_subquery = union(policy_control_ids, direct_control_ids).subquery() + else: + target_control_ids = select( + TargetControl.control_id.label("control_id") + ).where( + TargetControl.target_id == target_id, + TargetControl.enabled.is_(True), + ) + control_ids_subquery = union( + policy_control_ids, direct_control_ids, target_control_ids + ).subquery() stmt = ( select(Control) @@ -206,9 +241,17 @@ async def list_runtime_controls_for_agent( db: AsyncSession, *, allow_invalid_step_name_regex: bool = False, + target_id: int | None = None, ) -> list[RuntimeControl]: - """Return runtime-parsed controls for evaluation hot paths.""" - db_controls = await _list_db_controls_for_agent(agent_name, db) + """Return runtime-parsed controls for evaluation hot paths. + + When ``target_id`` is provided, the effective set includes enabled + ``target_controls`` attached to that target. When None, behavior matches + the classic agent + policy resolution path. + """ + db_controls = await _list_db_controls_for_agent( + agent_name, db, target_id=target_id + ) runtime_controls: list[RuntimeControl] = [] for c in db_controls: diff --git a/server/tests/test_evaluation_target_resolution.py b/server/tests/test_evaluation_target_resolution.py new file mode 100644 index 00000000..3ed618aa --- /dev/null +++ b/server/tests/test_evaluation_target_resolution.py @@ -0,0 +1,276 @@ +"""End-to-end tests for target-bearing evaluation resolution.""" + +from __future__ import annotations + +import uuid + +from fastapi.testclient import TestClient + +from .utils import VALID_CONTROL_PAYLOAD + +API_PREFIX = "/api/v1" +TENANT_HEADER = "X-Tenant-Id" + + +def _unique(prefix: str) -> str: + return f"{prefix}-{uuid.uuid4().hex[:10]}" + + +def _register_agent(client: TestClient, agent_name: str | None = None) -> str: + name = agent_name or f"agent-{uuid.uuid4().hex[:12]}" + resp = client.post( + f"{API_PREFIX}/agents/initAgent", + json={"agent": {"agent_name": name}, "steps": []}, + ) + assert resp.status_code == 200, resp.text + return name + + +def _deny_on_secret_payload() -> dict: + payload = dict(VALID_CONTROL_PAYLOAD) + payload["description"] = "Deny when input contains 'secret'" + payload["condition"] = { + "selector": {"path": "input"}, + "evaluator": {"name": "regex", "config": {"pattern": "secret"}}, + } + payload["action"] = {"decision": "deny"} + return payload + + +def _create_control(client: TestClient, *, payload: dict | None = None) -> tuple[int, str]: + data = payload if payload is not None else _deny_on_secret_payload() + name = _unique("ctrl") + resp = client.put(f"{API_PREFIX}/controls", json={"name": name, "data": data}) + assert resp.status_code == 200, resp.text + return int(resp.json()["control_id"]), name + + +def _create_target( + client: TestClient, *, tenant: str | None = None +) -> tuple[int, str]: + external_id = _unique("ext") + headers = {TENANT_HEADER: tenant} if tenant else {} + resp = client.post( + f"{API_PREFIX}/targets", + headers=headers, + json={"target_type": "environment", "external_id": external_id}, + ) + assert resp.status_code == 201, resp.text + return int(resp.json()["target_id"]), external_id + + +def _attach_control( + client: TestClient, + target_id: int, + control_id: int, + *, + enabled: bool = True, + tenant: str | None = None, +) -> None: + headers = {TENANT_HEADER: tenant} if tenant else {} + resp = client.post( + f"{API_PREFIX}/targets/{target_id}/controls/{control_id}", + headers=headers, + json={"enabled": enabled}, + ) + assert resp.status_code == 200, resp.text + + +def _evaluate( + client: TestClient, + *, + agent_name: str, + input_text: str, + target_type: str | None = None, + target_external_id: str | None = None, + tenant: str | None = None, +) -> dict: + body: dict[str, object] = { + "agent_name": agent_name, + "step": {"type": "llm", "name": "test-step", "input": input_text, "output": None}, + "stage": "pre", + } + if target_type is not None: + body["target_type"] = target_type + if target_external_id is not None: + body["target_id"] = target_external_id + + headers = {TENANT_HEADER: tenant} if tenant else {} + resp = client.post(f"{API_PREFIX}/evaluation", headers=headers, json=body) + assert resp.status_code == 200, resp.text + return resp.json() + + +# --------------------------------------------------------------------------- +# Core merge semantics +# --------------------------------------------------------------------------- + + +def test_target_control_enabled_contributes_to_effective_set( + client: TestClient, +) -> None: + agent_name = _register_agent(client) + control_id, control_name = _create_control(client) + target_id, external_id = _create_target(client) + _attach_control(client, target_id, control_id, enabled=True) + + result = _evaluate( + client, + agent_name=agent_name, + input_text="contains a secret", + target_type="environment", + target_external_id=external_id, + ) + + assert result["is_safe"] is False + assert any(m["control_name"] == control_name for m in result["matches"]) + + +def test_target_control_disabled_is_hidden_from_effective_set( + client: TestClient, +) -> None: + agent_name = _register_agent(client) + control_id, _ = _create_control(client) + target_id, external_id = _create_target(client) + _attach_control(client, target_id, control_id, enabled=False) + + result = _evaluate( + client, + agent_name=agent_name, + input_text="contains a secret", + target_type="environment", + target_external_id=external_id, + ) + + assert result["is_safe"] is True + assert not result.get("matches") + + +def test_no_target_request_ignores_target_controls(client: TestClient) -> None: + """Regression: OSS no-target evaluation is unchanged by any target_controls.""" + agent_name = _register_agent(client) + control_id, _ = _create_control(client) + target_id, _ = _create_target(client) + _attach_control(client, target_id, control_id, enabled=True) + + result = _evaluate( + client, agent_name=agent_name, input_text="contains a secret" + ) + + assert result["is_safe"] is True + assert not result.get("matches") + + +def test_agent_control_masks_target_disable(client: TestClient) -> None: + """If a control is attached directly to an agent, target-level disable has no effect.""" + agent_name = _register_agent(client) + control_id, control_name = _create_control(client) + # Attach control directly to agent via the existing endpoint. + direct_resp = client.post( + f"{API_PREFIX}/agents/{agent_name}/controls/{control_id}" + ) + assert direct_resp.status_code == 200, direct_resp.text + + target_id, external_id = _create_target(client) + _attach_control(client, target_id, control_id, enabled=False) + + result = _evaluate( + client, + agent_name=agent_name, + input_text="contains a secret", + target_type="environment", + target_external_id=external_id, + ) + + assert result["is_safe"] is False + assert any(m["control_name"] == control_name for m in result["matches"]) + + +def test_agent_control_and_target_enabled_both_contribute_once( + client: TestClient, +) -> None: + """Deduplication: same control via agent_controls and target_controls counts once.""" + agent_name = _register_agent(client) + control_id, control_name = _create_control(client) + client.post(f"{API_PREFIX}/agents/{agent_name}/controls/{control_id}") + target_id, external_id = _create_target(client) + _attach_control(client, target_id, control_id, enabled=True) + + result = _evaluate( + client, + agent_name=agent_name, + input_text="contains a secret", + target_type="environment", + target_external_id=external_id, + ) + + matches = [m for m in result["matches"] if m["control_name"] == control_name] + assert len(matches) == 1 + + +# --------------------------------------------------------------------------- +# Target resolution edge cases +# --------------------------------------------------------------------------- + + +def test_target_bearing_request_with_unknown_target_returns_404( + client: TestClient, +) -> None: + agent_name = _register_agent(client) + body = { + "agent_name": agent_name, + "step": {"type": "llm", "name": "test-step", "input": "hi", "output": None}, + "stage": "pre", + "target_type": "environment", + "target_id": "unknown-external-id", + } + resp = client.post(f"{API_PREFIX}/evaluation", json=body) + assert resp.status_code == 404 + assert resp.json()["error_code"] == "TARGET_NOT_FOUND" + + +def test_target_resolution_is_tenant_scoped(client: TestClient) -> None: + """A target created in tenant-a must not be visible from tenant-b.""" + agent_name = _register_agent(client) + _, external_id = _create_target(client, tenant="tenant-a") + + body = { + "agent_name": agent_name, + "step": {"type": "llm", "name": "test-step", "input": "hi", "output": None}, + "stage": "pre", + "target_type": "environment", + "target_id": external_id, + } + resp = client.post( + f"{API_PREFIX}/evaluation", headers={TENANT_HEADER: "tenant-b"}, json=body + ) + assert resp.status_code == 404 + assert resp.json()["error_code"] == "TARGET_NOT_FOUND" + + +def test_only_target_type_without_target_id_fails_validation( + client: TestClient, +) -> None: + agent_name = _register_agent(client) + body = { + "agent_name": agent_name, + "step": {"type": "llm", "name": "test-step", "input": "hi", "output": None}, + "stage": "pre", + "target_type": "environment", + } + resp = client.post(f"{API_PREFIX}/evaluation", json=body) + assert resp.status_code == 422 + + +def test_only_target_id_without_target_type_fails_validation( + client: TestClient, +) -> None: + agent_name = _register_agent(client) + body = { + "agent_name": agent_name, + "step": {"type": "llm", "name": "test-step", "input": "hi", "output": None}, + "stage": "pre", + "target_id": "some-external-id", + } + resp = client.post(f"{API_PREFIX}/evaluation", json=body) + assert resp.status_code == 422