Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion models/src/agent_control_models/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Evaluation-related models."""
from typing import Literal

from pydantic import Field, field_validator
from pydantic import Field, field_validator, model_validator

from .agent import AGENT_NAME_MIN_LENGTH, AGENT_NAME_PATTERN, Step, normalize_agent_name
from .base import BaseModel
Expand All @@ -19,6 +19,12 @@ class EvaluationRequest(BaseModel):
agent_name: Unique identifier of the agent making the request
step: Step payload for evaluation
stage: 'pre' (before execution) or 'post' (after execution)
target_type: Optional opaque target kind (e.g. 'environment') for
target-bearing requests. When supplied with ``target_id``, the
server merges any controls attached to that target into the
effective set. Omit for the agent-only path.
target_id: Caller-supplied external identifier of the target, paired
with ``target_type``.
"""
agent_name: str = Field(
...,
Expand All @@ -32,6 +38,22 @@ class EvaluationRequest(BaseModel):
stage: Literal["pre", "post"] = Field(
..., description="Evaluation stage: 'pre' or 'post'"
)
target_type: str | None = Field(
default=None,
max_length=64,
description=(
"Optional target kind for target-bearing requests "
"(e.g. 'environment'). Must be provided together with target_id."
),
)
target_id: str | None = Field(
default=None,
max_length=255,
description=(
"Optional external identifier of the target. Must be provided "
"together with target_type."
),
)

model_config = {
"json_schema_extra": {
Expand Down Expand Up @@ -87,6 +109,18 @@ class EvaluationRequest(BaseModel):
def validate_and_normalize_agent_name(cls, value: str) -> str:
return normalize_agent_name(str(value))

@model_validator(mode="after")
def validate_target_fields_paired(self) -> "EvaluationRequest":
"""target_type and target_id must be supplied together or not at all."""
has_type = self.target_type is not None
has_id = self.target_id is not None
if has_type != has_id:
raise ValueError(
"target_type and target_id must be provided together; "
"pass both or omit both."
)
return self


class EvaluationResponse(BaseModel):
"""
Expand Down
57 changes: 57 additions & 0 deletions models/tests/test_evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""Unit tests for EvaluationRequest target field pairing semantics."""

from __future__ import annotations

import pytest
from pydantic import ValidationError

from agent_control_models import EvaluationRequest
from agent_control_models.agent import Step


def _step() -> Step:
return Step(type="llm", name="chat", input="hello", output=None)


def test_both_target_fields_unset_is_valid() -> None:
req = EvaluationRequest(
agent_name="test-agent",
step=_step(),
stage="pre",
)
assert req.target_type is None
assert req.target_id is None


def test_both_target_fields_set_is_valid() -> None:
req = EvaluationRequest(
agent_name="test-agent",
step=_step(),
stage="pre",
target_type="environment",
target_id="env-prod-123",
)
assert req.target_type == "environment"
assert req.target_id == "env-prod-123"


def test_only_target_type_set_raises() -> None:
with pytest.raises(ValidationError) as excinfo:
EvaluationRequest(
agent_name="test-agent",
step=_step(),
stage="pre",
target_type="environment",
)
assert "target_type and target_id must be provided together" in str(excinfo.value)


def test_only_target_id_set_raises() -> None:
with pytest.raises(ValidationError) as excinfo:
EvaluationRequest(
agent_name="test-agent",
step=_step(),
stage="pre",
target_id="env-prod-123",
)
assert "target_type and target_id must be provided together" in str(excinfo.value)
2 changes: 2 additions & 0 deletions sdks/python/tests/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ def json(self) -> dict[str, object]:
"context": None,
},
"stage": "pre",
"target_type": None,
"target_id": None,
},
headers=None,
)
Expand Down
8 changes: 8 additions & 0 deletions sdks/typescript/src/generated/funcs/evaluation-evaluate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@ import { Result } from "../types/fp.js";
* ``EvaluationResponse`` and does not build or ingest observability events
* on the server; SDKs reconstruct and emit those events separately through
* the observability ingestion endpoint.
*
* Target-bearing requests (``target_type`` and ``target_id`` both set) merge
* controls attached to that target into the effective set, with the same
* deduplication/precedence rules used by the runtime resolver: when the
* same control is attached via both the agent/policy path and the target
* path, the agent/policy attachment wins. The request body carries the
* caller-supplied external target identifier; the server resolves it to
* the internal target row via the tenant context.
*/
export function evaluationEvaluate(
client: AgentControlSDKCore,
Expand Down
20 changes: 20 additions & 0 deletions sdks/typescript/src/generated/models/evaluation-request.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ export type Stage = ClosedEnum<typeof Stage>;
* agent_name: Unique identifier of the agent making the request
* step: Step payload for evaluation
* stage: 'pre' (before execution) or 'post' (after execution)
* target_type: Optional opaque target kind (e.g. 'environment') for
* target-bearing requests. When supplied with ``target_id``, the
* server merges any controls attached to that target into the
* effective set. Omit for the agent-only path.
* target_id: Caller-supplied external identifier of the target, paired
* with ``target_type``.
*/
export type EvaluationRequest = {
/**
Expand All @@ -45,6 +51,14 @@ export type EvaluationRequest = {
* Runtime payload for an agent step invocation.
*/
step: Step;
/**
* Optional external identifier of the target. Must be provided together with target_type.
*/
targetId?: string | null | undefined;
/**
* Optional target kind for target-bearing requests (e.g. 'environment'). Must be provided together with target_id.
*/
targetType?: string | null | undefined;
};

/** @internal */
Expand All @@ -55,6 +69,8 @@ export type EvaluationRequest$Outbound = {
agent_name: string;
stage: string;
step: Step$Outbound;
target_id?: string | null | undefined;
target_type?: string | null | undefined;
};

/** @internal */
Expand All @@ -66,10 +82,14 @@ export const EvaluationRequest$outboundSchema: z.ZodMiniType<
agentName: z.string(),
stage: Stage$outboundSchema,
step: Step$outboundSchema,
targetId: z.optional(z.nullable(z.string())),
targetType: z.optional(z.nullable(z.string())),
}),
z.transform((v) => {
return remap$(v, {
agentName: "agent_name",
targetId: "target_id",
targetType: "target_type",
});
}),
);
Expand Down
8 changes: 8 additions & 0 deletions sdks/typescript/src/generated/sdk/evaluation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ export class Evaluation extends ClientSDK {
* ``EvaluationResponse`` and does not build or ingest observability events
* on the server; SDKs reconstruct and emit those events separately through
* the observability ingestion endpoint.
*
* Target-bearing requests (``target_type`` and ``target_id`` both set) merge
* controls attached to that target into the effective set, with the same
* deduplication/precedence rules used by the runtime resolver: when the
* same control is attached via both the agent/policy path and the target
* path, the agent/policy attachment wins. The request body carries the
* caller-supplied external target identifier; the server resolves it to
* the internal target row via the tenant context.
*/
async evaluate(
request: models.EvaluationRequest,
Expand Down
49 changes: 49 additions & 0 deletions sdks/typescript/tests/evaluation-request.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import { describe, expect, it } from "vitest";

import { evaluationRequestToJSON } from "../src/generated/models/evaluation-request";
import type { EvaluationRequest } from "../src/generated/models/evaluation-request";

describe("EvaluationRequest serialization", () => {
const baseRequest: EvaluationRequest = {
agentName: "test-agent-01",
stage: "pre",
step: {
type: "llm",
name: "chat",
input: "hello",
},
};

it("omits target fields from the wire payload when unset", () => {
const wire = JSON.parse(evaluationRequestToJSON(baseRequest)) as Record<string, unknown>;
expect(wire.agent_name).toBe("test-agent-01");
expect("target_type" in wire).toBe(false);
expect("target_id" in wire).toBe(false);
});

it("forwards targetType and targetId as snake_case on the wire", () => {
const request: EvaluationRequest = {
...baseRequest,
targetType: "environment",
targetId: "env-prod-123",
};

const wire = JSON.parse(evaluationRequestToJSON(request)) as Record<string, unknown>;
expect(wire.target_type).toBe("environment");
expect(wire.target_id).toBe("env-prod-123");
expect("targetType" in wire).toBe(false);
expect("targetId" in wire).toBe(false);
});

it("accepts null for target fields without failing", () => {
const request: EvaluationRequest = {
...baseRequest,
targetType: null,
targetId: null,
};

const wire = JSON.parse(evaluationRequestToJSON(request)) as Record<string, unknown>;
expect(wire.target_type).toBeNull();
expect(wire.target_id).toBeNull();
});
});
39 changes: 38 additions & 1 deletion server/src/agent_control_server/endpoints/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@
from ..db import get_async_db
from ..errors import APIValidationError, NotFoundError
from ..logging_utils import get_logger
from ..models import Agent
from ..models import Agent, Target
from ..services.controls import list_runtime_controls_for_agent
from ..tenancy import get_tenant_id

router = APIRouter(prefix="/evaluation", tags=["evaluation"])

Expand Down Expand Up @@ -126,6 +127,7 @@ def _sanitize_evaluation_response(response: EvaluationResponse) -> EvaluationRes
async def evaluate(
request: EvaluationRequest,
client: RequireAPIKey,
tenant_id: str = Depends(get_tenant_id),
db: AsyncSession = Depends(get_async_db),
) -> EvaluationResponse:
"""Analyze content for safety and control violations.
Expand All @@ -134,6 +136,14 @@ async def evaluate(
``EvaluationResponse`` and does not build or ingest observability events
on the server; SDKs reconstruct and emit those events separately through
the observability ingestion endpoint.

Target-bearing requests (``target_type`` and ``target_id`` both set) merge
controls attached to that target into the effective set, with the same
deduplication/precedence rules used by the runtime resolver: when the
same control is attached via both the agent/policy path and the target
path, the agent/policy attachment wins. The request body carries the
caller-supplied external target identifier; the server resolves it to
the internal target row via the tenant context.
"""
del client # Authentication is still required by dependency injection.

Expand All @@ -150,10 +160,37 @@ async def evaluate(
hint="Register the agent via initAgent before evaluating.",
)

resolved_target_id: int | None = None
if request.target_type is not None and request.target_id is not None:
target_result = await db.execute(
select(Target.id).where(
Target.tenant_id == tenant_id,
Target.target_type == request.target_type,
Target.external_id == request.target_id,
)
)
resolved_target_id = target_result.scalar_one_or_none()
if resolved_target_id is None:
raise NotFoundError(
error_code=ErrorCode.TARGET_NOT_FOUND,
detail=(
f"Target (type='{request.target_type}', "
f"id='{request.target_id}') not found in this tenant"
),
resource="Target",
resource_id=request.target_id,
hint=(
"Create the target via POST /api/v1/targets before sending "
"target-bearing evaluation requests, or retry without "
"target_type / target_id for the OSS path."
),
)

runtime_controls = await list_runtime_controls_for_agent(
request.agent_name,
db,
allow_invalid_step_name_regex=True,
target_id=resolved_target_id,
)
engine_controls = [ControlAdapter(c.id, c.name, c.control) for c in runtime_controls]

Expand Down
Loading
Loading