diff --git a/README.md b/README.md
index 3e44d86d..7842f5bf 100644
--- a/README.md
+++ b/README.md
@@ -83,37 +83,53 @@ Per-model `model_settings` can include:
 
 ### Backends
 
-The runner can drive two SDKs behind a common interface:
+The runner can drive three SDKs behind a common interface:
 
 - **`openai_agents`** (default) — the OpenAI Agents Python SDK. Supports
   multi-personality handoffs, both `chat_completions` and `responses`
   `api_type`, `temperature`, `parallel_tool_calls`,
   `exclude_from_context`, and MCP over stdio, SSE, and streamable HTTP.
-- **`copilot_sdk`** (optional, `pip install seclab-taskflow-agent[copilot]`)
-  — the GitHub Copilot Python SDK. Supports streaming, `reasoning_effort`,
-  MCP over stdio/SSE/HTTP, and per-tool permission gating. The SDK
-  selects its own wire protocol per model, so the YAML `api_type` field
-  is not honoured; multi-personality handoffs, `temperature`, and
-  `parallel_tool_calls` are likewise not available. Taskflows that use
-  unsupported fields fail at load time with a `BackendCapabilityError`
-  naming the offending field.
-
-Selection precedence:
-
-1. `backend:` field in the model config document.
-2. `SECLAB_TASKFLOW_BACKEND` environment variable.
-3. Endpoint auto-default (`api.githubcopilot.com` prefers `copilot_sdk`
-   when the optional dependency is installed).
-4. `openai_agents`.
+- **`copilot_sdk`** — the GitHub Copilot Python SDK. Supports streaming,
+  `reasoning_effort`, MCP over stdio/SSE/HTTP, and per-tool permission
+  gating. The SDK selects its own wire protocol per model, so the YAML
+  `api_type` field is not honoured; multi-personality handoffs,
+  `temperature`, and `parallel_tool_calls` are likewise not available.
+  Taskflows that use unsupported fields fail at load time with a
+  `BackendCapabilityError` naming the offending field.
+- **`anthropic_sdk`** — the Anthropic Python SDK, driving the native
+  Messages API (`/v1/messages`). Supports streaming, tool calling via
+  MCP, and adaptive thinking with configurable `reasoning.effort`
+  (`low`, `medium`, `high`, `max`). Handoffs are not supported.
+  Designed for use with CAPI's Anthropic endpoint; auth uses
+  `Authorization: Bearer` (not `x-api-key`).
+
+Selection precedence (highest to lowest):
+
+1. Per-task `backend:` in the task's own `model_settings` block (overrides
+   the model-level value for that one task; see `_resolve_task_model()`).
+2. Per-model `backend:` in the model config's `model_settings` (allows
+   mixed backends in a single taskflow).
+3. `backend:` field at the top level of the model config document
+   (global default).
+4. `SECLAB_TASKFLOW_BACKEND` environment variable.
+5. `openai_agents`.
 
 ```yaml
 seclab-taskflow-agent:
   version: "1.0"
   filetype: model_config
-backend: copilot_sdk
 models:
-  fast: gpt-5-mini
-  slow: claude-opus-4.6
+  code_analysis: claude-opus-4.7
+  general_tasks: gpt-5.4-mini
+model_settings:
+  code_analysis:
+    api_type: messages
+    backend: anthropic_sdk
+    reasoning:
+      effort: high
+  general_tasks:
+    api_type: responses
+    backend: openai_agents
 ```
 
 ### Session Recovery
diff --git a/doc/GRAMMAR.md b/doc/GRAMMAR.md
index b7e16ee9..88b0bf9f 100644
--- a/doc/GRAMMAR.md
+++ b/doc/GRAMMAR.md
@@ -524,6 +524,7 @@ api_type: chat_completions        # default for all models
 models:
   gpt_default: gpt-4.1
   gpt_responses: gpt-5.1
+  claude_native: claude-opus-4.7
 model_settings:
   gpt_default:
     temperature: 0.7
@@ -532,6 +533,11 @@ model_settings:
     endpoint: https://api.githubcopilot.com
     token: CAPI_TOKEN             # env var name containing the API key
     temperature: 0.5
+  claude_native:
+    api_type: messages            # use the Anthropic Messages API
+    backend: anthropic_sdk
+    reasoning:
+      effort: high
 ```
 
 The following keys in `model_settings` are handled by the engine and are not
@@ -539,9 +545,9 @@ passed to the underlying model provider:
 
 | Key | Description | Default |
 |-----|-------------|---------|
-| `api_type` | `"chat_completions"` or `"responses"` | Inherited from top-level `api_type`, or `"chat_completions"` |
+| `api_type` | `"chat_completions"`, `"responses"`, or `"messages"` | Inherited from top-level `api_type`, or `"chat_completions"` |
+| `backend` | SDK adapter: `"openai_agents"`, `"copilot_sdk"`, or `"anthropic_sdk"` | Inherited from top-level `backend`, or `"openai_agents"` |
 | `endpoint` | API base URL for this model | The global `AI_API_ENDPOINT` env var |
 | `token` | Name of an environment variable containing the API key | Uses `AI_API_TOKEN` / `COPILOT_TOKEN` |
 
-All other keys (e.g. `temperature`, `top_p`) are passed through as model
-parameters to the OpenAI SDK.
+All other keys (e.g. `temperature`, `top_p`, `reasoning`) are forwarded to the selected SDK backend. Each backend decides what to do with each key: `openai_agents` accepts the standard OpenAI parameter set; `anthropic_sdk` forwards a curated subset (currently `temperature`, `top_p`, `reasoning`, `max_tokens`, `stream_thinking`, `prompt_caching`) and silently ignores keys outside that set; `copilot_sdk` consumes the keys its SDK exposes (e.g. `reasoning_effort`) and **rejects** unsupported keys at validate time with `BackendCapabilityError` (currently `temperature` and `parallel_tool_calls`) rather than silently dropping them. Consult the backend-specific docs if in doubt.
diff --git a/pyproject.toml b/pyproject.toml
index 6f805a7f..aa163f75 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,6 +38,7 @@ classifiers = [
 dependencies = [
   "aiofiles==24.1.0",
   "annotated-types==0.7.0",
+  "anthropic>=0.50,<1",
   "anyio==4.9.0",
   "attrs==25.3.0",
   "Authlib==1.6.12",
@@ -55,6 +56,7 @@ dependencies = [
   "email-validator==2.3.0",
   "exceptiongroup==1.3.0",
   "fastmcp==3.2.0",
+  "github-copilot-sdk>=0.2.2,<0.3",
   "griffe==1.7.3",
   "h11==0.16.0",
   "httpcore==1.0.9",
@@ -124,15 +126,6 @@ dependencies = [
 [project.scripts]
 seclab-taskflow-agent = "seclab_taskflow_agent.cli:app"
 
-[project.optional-dependencies]
-# Pulls in the GitHub Copilot SDK (public preview) so the copilot_sdk
-# backend can be selected. Requires Python >= 3.11. Pinned to the
-# 0.2.x line because the SDK may ship breaking changes between minor
-# versions while still in preview.
-copilot = [
-  "github-copilot-sdk>=0.2.2,<0.3",
-]
-
 [project.urls]
 Source = "https://github.com/GitHubSecurityLab/seclab-taskflow-agent"
 Issues = "https://github.com/GitHubSecurityLab/seclab-taskflow-agent/issues"
diff --git a/src/seclab_taskflow_agent/capi.py b/src/seclab_taskflow_agent/capi.py
index a605258f..3d635c0d 100644
--- a/src/seclab_taskflow_agent/capi.py
+++ b/src/seclab_taskflow_agent/capi.py
@@ -50,6 +50,7 @@ class APIProvider:
     models_catalog: str = "/models"
     default_model: str = "gpt-4.1"
     extra_headers: Mapping[str, str] = field(default_factory=dict)
+    bearer_auth: bool = True  # Use Authorization: Bearer (not x-api-key)
 
     def __post_init__(self) -> None:
         # Ensure base_url ends with / so httpx URL.join() preserves the path
@@ -110,7 +111,7 @@ class _OpenAIProvider(APIProvider):
     we maintain a prefix allowlist of known chat-completion model families.
     """
 
-    _CHAT_PREFIXES = ("gpt-3.5", "gpt-4", "o1", "o3", "o4", "chatgpt-")
+    _CHAT_PREFIXES = ("gpt-3.5", "gpt-4", "gpt-5", "o1", "o3", "o4", "chatgpt-")
 
     def check_tool_calls(self, _model: str, model_info: dict) -> bool:
         model_id = model_info.get("id", "").lower()
@@ -172,8 +173,9 @@ def get_provider(endpoint: str | None = None) -> APIProvider:
         if upstream:
             return dataclasses.replace(upstream, base_url=url)
 
-    # Unknown endpoint — return a generic provider with the given base URL
-    return APIProvider(name="custom", base_url=url, default_model="please-set-default-model-via-env")
+    # Unknown endpoint — return a generic provider using native SDK auth.
+    return APIProvider(name="custom", base_url=url, bearer_auth=False,
+                       default_model="please-set-default-model-via-env")
 
 
 # ---------------------------------------------------------------------------
diff --git a/src/seclab_taskflow_agent/mcp_utils.py b/src/seclab_taskflow_agent/mcp_utils.py
index 92968986..36d1df7e 100644
--- a/src/seclab_taskflow_agent/mcp_utils.py
+++ b/src/seclab_taskflow_agent/mcp_utils.py
@@ -97,6 +97,41 @@ async def list_tools(self, *args: Any, **kwargs: Any) -> list[Any]:
             namespaced_tools.append(tool_copy)
         return namespaced_tools
 
+    async def list_tools_unfiltered(self) -> list[Any]:
+        """List tools directly from the MCP session, namespace-prefixed.
+
+        Bypasses any tool_filter configured on the wrapped openai-agents
+        server (which would require ``run_context`` and ``agent`` arguments
+        that aren't available when listing tools outside the openai-agents
+        run loop -- e.g. when handing tools to a different SDK at build
+        time).
+
+        Prefixing is idempotent: if a tool's name already starts with this
+        wrapper's namespace (e.g. because the underlying session returned a
+        previously-namespaced object), the existing prefix is stripped
+        before re-applying so calling this method multiple times never
+        yields ``<ns><ns>name``.
+
+        Raises ``RuntimeError`` if the underlying server has no active
+        MCP session yet (caller should ensure the server is connected
+        before calling this).
+        """
+        session = getattr(self._obj, "session", None)
+        if session is None:
+            raise RuntimeError(
+                f"MCPNamespaceWrap({self._obj!r}): underlying server has no "
+                "active MCP session; cannot list tools unfiltered"
+            )
+        result = await session.list_tools()
+        namespaced_tools: list[Any] = []
+        for tool in result.tools:
+            tool_copy = tool.copy() if hasattr(tool, "copy") else tool
+            # Idempotent: strip existing prefix before re-applying
+            base_name = tool_copy.name.removeprefix(self.namespace)
+            tool_copy.name = f"{self.namespace}{base_name}"
+            namespaced_tools.append(tool_copy)
+        return namespaced_tools
+
     def confirm_tool(self, tool_name: str, args: list[Any]) -> bool:
         """Interactively prompt the user for tool-call confirmation.
 
diff --git a/src/seclab_taskflow_agent/models.py b/src/seclab_taskflow_agent/models.py
index eff05ee6..837e4e2c 100644
--- a/src/seclab_taskflow_agent/models.py
+++ b/src/seclab_taskflow_agent/models.py
@@ -31,10 +31,10 @@
 from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
 
 # Valid API type values for model configuration.
-ApiType = Literal["chat_completions", "responses"]
+ApiType = Literal["chat_completions", "responses", "messages"]
 
 # Valid backend names. Must stay in sync with ``sdk._KNOWN``.
-BackendSdk = Literal["openai_agents", "copilot_sdk"]
+BackendSdk = Literal["openai_agents", "copilot_sdk", "anthropic_sdk"]
 
 
 # ---------------------------------------------------------------------------
diff --git a/src/seclab_taskflow_agent/runner.py b/src/seclab_taskflow_agent/runner.py
index 12d36bd8..b5ed43fd 100644
--- a/src/seclab_taskflow_agent/runner.py
+++ b/src/seclab_taskflow_agent/runner.py
@@ -126,12 +126,12 @@ def _resolve_task_model(
     model_dict: dict[str, str],
     models_params: dict[str, dict[str, Any]],
     default_api_type: str = "chat_completions",
-) -> tuple[str, dict[str, Any], str, str | None, str | None]:
+) -> tuple[str, dict[str, Any], str, str | None, str | None, str | None]:
     """Resolve the final model name, settings, and per-model overrides.
 
     Returns:
-        A tuple of ``(model_id, model_settings, api_type, endpoint, token)``
-        where *endpoint* and *token* are ``None`` when not overridden.
+        A tuple of ``(model_id, model_settings, api_type, endpoint, token, backend)``
+        where *endpoint*, *token*, and *backend* are ``None`` when not overridden.
 
     Raises:
         ValueError: If task-level model_settings is not a dictionary.
@@ -141,6 +141,7 @@ def _resolve_task_model(
     api_type: str = default_api_type
     endpoint: str | None = None
     token: str | None = None
+    backend: str | None = None
 
     if logical_name in model_keys:
         if logical_name in models_params:
@@ -151,6 +152,7 @@ def _resolve_task_model(
     api_type = model_settings.pop("api_type", api_type)
     endpoint = model_settings.pop("endpoint", None)
     token = model_settings.pop("token", None)
+    backend = model_settings.pop("backend", None)
 
     task_model_settings: dict[str, Any] | Any = task.model_settings or {}
     if not isinstance(task_model_settings, dict):
@@ -161,9 +163,10 @@ def _resolve_task_model(
     api_type = task_settings.pop("api_type", api_type)
     endpoint = task_settings.pop("endpoint", endpoint)
     token = task_settings.pop("token", token)
+    backend = task_settings.pop("backend", backend)
 
     model_settings.update(task_settings)
-    return logical_name, model_settings, api_type, endpoint, token
+    return logical_name, model_settings, api_type, endpoint, token, backend
 
 
 async def _build_prompts_to_run(
@@ -600,8 +603,8 @@ async def on_handoff_hook(context: RunContextWrapper[TContext], agent: Agent[TCo
             if task.uses:
                 task = _merge_reusable_task(available_tools, task)
 
-            # Resolve model (name, settings, api_type, optional endpoint/token)
-            model, model_settings, task_api_type, task_endpoint, task_token = _resolve_task_model(
+            # Resolve model (name, settings, api_type, optional endpoint/token/backend)
+            model, model_settings, task_api_type, task_endpoint, task_token, task_backend = _resolve_task_model(
                 task, model_keys, model_dict, models_params, default_api_type=api_type,
             )
 
@@ -697,7 +700,7 @@ async def _deploy(ra: dict, pp: str) -> bool:
                                     api_type=task_api_type,
                                     endpoint=task_endpoint,
                                     token=task_token,
-                                    backend=backend,
+                                    backend=task_backend or backend,
                                     agent_hooks=TaskAgentHooks(on_handoff=on_handoff_hook),
                                 )
 
diff --git a/src/seclab_taskflow_agent/sdk/__init__.py b/src/seclab_taskflow_agent/sdk/__init__.py
index 15086922..5eefafb2 100644
--- a/src/seclab_taskflow_agent/sdk/__init__.py
+++ b/src/seclab_taskflow_agent/sdk/__init__.py
@@ -3,9 +3,9 @@
 
 """Backend factory for the agent runner.
 
-Two backends are supported: ``openai_agents`` (default) and
-``copilot_sdk`` (optional, requires ``pip install
-seclab-taskflow-agent[copilot]``).
+Three backends are supported: ``openai_agents`` (default), ``copilot_sdk``,
+and ``anthropic_sdk``.  All three are always available because per-task
+backend selection means any SDK may be needed at runtime.
 """
 
 from __future__ import annotations
@@ -33,7 +33,7 @@
 )
 
 _ENV_VAR = "SECLAB_TASKFLOW_BACKEND"
-_KNOWN = ("openai_agents", "copilot_sdk")
+_KNOWN = ("openai_agents", "copilot_sdk", "anthropic_sdk")
 _BACKENDS: dict[str, AgentBackend] = {}
 
 
@@ -46,10 +46,16 @@ def get_backend(name: str) -> AgentBackend:
             from .openai_agents.backend import OpenAIAgentsBackend
 
             _BACKENDS[name] = OpenAIAgentsBackend()
-        else:
+        elif name == "copilot_sdk":
             from .copilot_sdk.backend import CopilotSDKBackend
 
             _BACKENDS[name] = CopilotSDKBackend()
+        elif name == "anthropic_sdk":
+            from .anthropic_sdk.backend import AnthropicSDKBackend
+
+            _BACKENDS[name] = AnthropicSDKBackend()
+        else:
+            raise ValueError(f"No backend implementation for {name!r}")
     return _BACKENDS[name]
 
 
@@ -64,8 +70,9 @@ def resolve_backend_name(
     ``SECLAB_TASKFLOW_BACKEND`` env var > ``openai_agents``.
 
     Backend selection is always deterministic — there is no auto-detection
-    based on endpoint URL.  Use ``backend: copilot_sdk`` in model config
-    or set ``SECLAB_TASKFLOW_BACKEND=copilot_sdk`` to opt in.
+    based on endpoint URL.  Use ``backend: copilot_sdk`` or ``backend:
+    anthropic_sdk`` in model config (or set
+    ``SECLAB_TASKFLOW_BACKEND=<name>``) to opt in.
 
     The *endpoint* parameter is accepted for forward compatibility but
     is not used for backend selection.
diff --git a/src/seclab_taskflow_agent/sdk/anthropic_sdk/__init__.py b/src/seclab_taskflow_agent/sdk/anthropic_sdk/__init__.py
new file mode 100644
index 00000000..03ec0700
--- /dev/null
+++ b/src/seclab_taskflow_agent/sdk/anthropic_sdk/__init__.py
@@ -0,0 +1,4 @@
+# SPDX-FileCopyrightText: GitHub, Inc.
+# SPDX-License-Identifier: MIT
+
+"""Anthropic SDK backend adapter."""
diff --git a/src/seclab_taskflow_agent/sdk/anthropic_sdk/backend.py b/src/seclab_taskflow_agent/sdk/anthropic_sdk/backend.py
new file mode 100644
index 00000000..21dab7e0
--- /dev/null
+++ b/src/seclab_taskflow_agent/sdk/anthropic_sdk/backend.py
@@ -0,0 +1,360 @@
+# SPDX-FileCopyrightText: GitHub, Inc.
+# SPDX-License-Identifier: MIT
+
+"""Anthropic SDK backend adapter.
+
+Drives the Anthropic Messages API (``/v1/messages``) via the official
+``anthropic`` Python SDK. Supports streaming, tool calling via MCP
+servers, and extended thinking.
+
+Auth note: The Anthropic SDK sends ``x-api-key`` by default, but
+providers that use Bearer auth (see ``APIProvider.bearer_auth``)
+need ``Authorization: Bearer`` instead.  We pass the bearer header
+via ``default_headers`` and set ``api_key`` to a placeholder so the
+SDK doesn't send the real token via x-api-key.
+"""
+
+from __future__ import annotations
+
+__all__ = ["AnthropicSDKBackend"]
+
+import logging
+import os
+from collections.abc import AsyncIterator
+from dataclasses import dataclass, field
+from typing import Any
+
+from ..base import AgentSpec, StreamEvent, TextDelta, ToolEnd
+from ..errors import (
+    BackendBadRequestError,
+    BackendCapabilityError,
+    BackendMaxTurnsError,
+    BackendRateLimitError,
+    BackendTimeoutError,
+    BackendUnexpectedError,
+)
+
+logger = logging.getLogger(__name__)
+
+_VALID_REASONING = ("low", "medium", "high", "max")
+
+
+def _mcp_tools_to_anthropic(tools: list[Any]) -> list[dict[str, Any]]:
+    """Convert MCP tool definitions to Anthropic tool format."""
+    anthropic_tools = []
+    for tool in tools:
+        schema = tool.inputSchema if hasattr(tool, "inputSchema") else {}
+        description = getattr(tool, "description", None) or tool.name
+        anthropic_tools.append({
+            "name": tool.name,
+            "description": description,
+            "input_schema": schema or {"type": "object", "properties": {}},
+        })
+    return anthropic_tools
+
+
+def _call_tool_result_to_text(result: Any) -> str:
+    """Extract text from an MCP CallToolResult.
+
+    Preserves empty strings: a tool that returns ``TextContent(text="")``
+    is returning an explicit empty result, not "no content".  Only fall
+    back to ``str(result)`` (a noisy repr) when there are genuinely no
+    text-bearing content blocks at all.
+    """
+    content = getattr(result, "content", [])
+    parts = []
+    for c in content:
+        text = getattr(c, "text", None)
+        if text is not None:
+            parts.append(text)
+    return "\n".join(parts) if parts else str(result)
+
+
+@dataclass
+class _AnthropicHandle:
+    """Holds the Anthropic client and conversation state."""
+    client: Any
+    system_prompt: str
+    model: str
+    max_tokens: int
+    tools: list[dict[str, Any]]
+    mcp_server_map: dict[str, Any]  # tool_name -> MCP server handle
+    model_settings: dict[str, Any] = field(default_factory=dict)
+    stream_thinking: bool = False
+    exclude_from_context: bool = False
+
+
+class AnthropicSDKBackend:
+    """Adapter that drives the Anthropic Python SDK."""
+
+    name = "anthropic_sdk"
+
+    def validate(self, spec: AgentSpec) -> None:
+        if spec.handoffs or spec.in_handoff_graph:
+            raise BackendCapabilityError(
+                "anthropic_sdk: agent handoffs are not supported"
+            )
+        if not spec.model:
+            raise BackendBadRequestError("anthropic_sdk: model is required")
+
+    async def build(
+        self,
+        spec: AgentSpec,
+        *,
+        run_hooks: Any = None,
+        agent_hooks: Any = None,
+    ) -> _AnthropicHandle:
+        del run_hooks, agent_hooks
+
+        import anthropic
+
+        from ...capi import get_AI_endpoint, get_AI_token, get_provider
+
+        # Resolve token: per-model env var override, then standard token chain.
+        # Wrap RuntimeError from get_AI_token (env var not set) so the runner
+        # surfaces it as a request error rather than an internal exception.
+        token = os.getenv(spec.token_env, "") if spec.token_env else ""
+        if not token:
+            try:
+                token = get_AI_token()
+            except RuntimeError as exc:
+                raise BackendBadRequestError(
+                    f"anthropic_sdk: no API token available ({exc})"
+                ) from exc
+        if not token:
+            raise BackendBadRequestError(
+                "anthropic_sdk: no API token available "
+                "(checked spec.token_env then standard token chain)"
+            )
+
+        endpoint = spec.endpoint or get_AI_endpoint()
+        provider = get_provider(endpoint)
+
+        # Providers with bearer_auth=True need Authorization: Bearer instead
+        # of the Anthropic SDK's native x-api-key header. Use a placeholder
+        # api_key so the SDK doesn't also send the real token via x-api-key.
+        # Endpoints not in the provider registry default to native SDK auth.
+        headers: dict[str, str] = dict(provider.extra_headers)
+        if provider.bearer_auth:
+            headers["Authorization"] = f"Bearer {token}"
+
+        client = anthropic.AsyncAnthropic(
+            api_key="placeholder" if provider.bearer_auth else token,
+            base_url=endpoint,
+            default_headers=headers or None,
+        )
+
+        # Collect tools from MCP servers and apply blocked_tools filter.
+        # We get raw tool lists via list_tools_unfiltered() rather than
+        # list_tools(), which would require run_context/agent args to
+        # invoke the openai-agents tool_filter -- args we don't have
+        # outside the openai-agents run loop.
+        #
+        # blocked_tools in taskflow YAML are raw (un-namespaced) names,
+        # consistent with how openai_agents and copilot_sdk consume them.
+        # list_tools_unfiltered() returns namespace-prefixed names (the
+        # MCP server wrapper applies the prefix). Match against both
+        # forms so blocking works regardless of which name the taskflow
+        # author used; key mcp_server_map by the namespaced name because
+        # that's what Anthropic will send back in tool_use blocks.
+        all_tools: list[dict[str, Any]] = []
+        mcp_server_map: dict[str, Any] = {}
+        blocked = set(spec.blocked_tools or [])
+
+        def _is_blocked(tool: Any, namespace: str) -> bool:
+            name = tool.name
+            if name in blocked:
+                return True
+            return name.startswith(namespace) and name[len(namespace):] in blocked
+
+        for mcp_spec in spec.mcp_servers:
+            native_server = mcp_spec.params.get("_native")
+            if native_server is None:
+                continue
+            try:
+                mcp_tools = await native_server.list_tools_unfiltered()
+                namespace = getattr(native_server, "namespace", "")
+                kept = [t for t in mcp_tools if not _is_blocked(t, namespace)]
+                for tool in kept:
+                    mcp_server_map[tool.name] = native_server
+                all_tools.extend(_mcp_tools_to_anthropic(kept))
+            except Exception:
+                logger.exception("Failed to list tools from MCP server %s", mcp_spec.name)
+
+        # Resolve max_tokens from model_settings or default
+        max_tokens = spec.model_settings.get("max_tokens", 16384)
+        stream_thinking = spec.model_settings.get("stream_thinking", False)
+
+        return _AnthropicHandle(
+            client=client,
+            system_prompt=spec.instructions or "",
+            model=spec.model,
+            max_tokens=max_tokens,
+            tools=all_tools,
+            mcp_server_map=mcp_server_map,
+            model_settings=spec.model_settings,
+            stream_thinking=stream_thinking,
+            exclude_from_context=spec.exclude_from_context,
+        )
+
+    async def run_streamed(
+        self,
+        agent: Any,
+        prompt: str,
+        *,
+        max_turns: int,
+    ) -> AsyncIterator[StreamEvent]:
+        handle: _AnthropicHandle = agent
+        messages: list[dict[str, Any]] = [
+            {"role": "user", "content": prompt},
+        ]
+
+        # Build optional params
+        create_kwargs: dict[str, Any] = {}
+
+        # Pass through temperature/top_p if set
+        temperature = handle.model_settings.get("temperature")
+        if temperature is not None:
+            create_kwargs["temperature"] = float(temperature)
+        top_p = handle.model_settings.get("top_p")
+        if top_p is not None:
+            create_kwargs["top_p"] = float(top_p)
+
+        reasoning = handle.model_settings.get("reasoning")
+        if isinstance(reasoning, dict):
+            effort = reasoning.get("effort")
+            if effort:
+                if effort not in _VALID_REASONING:
+                    raise BackendBadRequestError(
+                        f"anthropic_sdk: invalid reasoning effort {effort!r} "
+                        f"(expected one of {_VALID_REASONING})"
+                    )
+                create_kwargs["thinking"] = {"type": "adaptive"}
+                create_kwargs["output_config"] = {"effort": effort}
+
+        # Automatic prompt caching: place an ephemeral cache breakpoint at
+        # the longest cacheable prefix (tools + system + accumulated
+        # messages). The breakpoint moves forward on each turn, so
+        # multi-turn agent loops get cache reads on every turn after the
+        # first -- typically 50%+ cost reduction on token-heavy audits.
+        # All current Claude models (and the Anthropic-compatible CAPI
+        # proxy) support cache_control. Default on; explicit opt-out for
+        # callers pointed at proxies that don't support it.
+        prompt_caching = handle.model_settings.get("prompt_caching", True)
+        if prompt_caching:
+            ttl = prompt_caching if isinstance(prompt_caching, str) else "5m"
+            cache_block: dict[str, Any] = {"type": "ephemeral"}
+            if ttl != "5m":
+                cache_block["ttl"] = ttl
+            create_kwargs["cache_control"] = cache_block
+
+        import anthropic
+
+        for turn in range(max_turns):
+            try:
+                async with handle.client.messages.stream(
+                    model=handle.model,
+                    max_tokens=handle.max_tokens,
+                    system=handle.system_prompt,
+                    messages=messages,
+                    tools=handle.tools or anthropic.NOT_GIVEN,
+                    **create_kwargs,
+                ) as stream:
+                    async for event in stream:
+                        if hasattr(event, "type"):
+                            if event.type == "content_block_delta":
+                                delta = event.delta
+                                if hasattr(delta, "text"):
+                                    yield TextDelta(text=delta.text)
+                                elif hasattr(delta, "thinking") and handle.stream_thinking:
+                                    yield TextDelta(text=delta.thinking)
+
+                    response = await stream.get_final_message()
+
+            except anthropic.RateLimitError as exc:
+                raise BackendRateLimitError(str(exc)) from exc
+            except anthropic.APITimeoutError as exc:
+                raise BackendTimeoutError(str(exc)) from exc
+            except anthropic.APIStatusError as exc:
+                # Map all 4xx (auth, permission, not_found, conflict,
+                # unprocessable, bad_request) to BackendBadRequestError so
+                # the runner surfaces them as request errors rather than
+                # internal exceptions. 5xx and unclassified errors fall
+                # through to BackendUnexpectedError.
+                status = getattr(exc, "status_code", None)
+                if isinstance(status, int) and 400 <= status < 500:
+                    raise BackendBadRequestError(str(exc)) from exc
+                raise BackendUnexpectedError(str(exc)) from exc
+            except anthropic.APIError as exc:
+                raise BackendUnexpectedError(str(exc)) from exc
+
+            if response.stop_reason == "end_turn":
+                return
+            if response.stop_reason != "tool_use":
+                return
+
+            # Process tool calls
+            tool_use_blocks = [
+                b for b in response.content if b.type == "tool_use"
+            ]
+            if not tool_use_blocks:
+                return
+
+            # Add assistant message with all content blocks
+            messages.append({"role": "assistant", "content": response.content})
+
+            # Execute each tool call and collect results
+            tool_results: list[dict[str, Any]] = []
+            for tool_block in tool_use_blocks:
+                tool_name = tool_block.name
+                tool_input = tool_block.input
+
+                server = handle.mcp_server_map.get(tool_name)
+                if server is None:
+                    logger.warning("Tool %s not found in MCP servers", tool_name)
+                    tool_results.append({
+                        "type": "tool_result",
+                        "tool_use_id": tool_block.id,
+                        "content": f"Error: tool '{tool_name}' not found",
+                        "is_error": True,
+                    })
+                    yield ToolEnd(tool_name=tool_name, text=f"Error: tool '{tool_name}' not found")
+                    continue
+
+                try:
+                    result = await server.call_tool(
+                        tool_name,
+                        arguments=tool_input if isinstance(tool_input, dict) else {},
+                    )
+                    result_text = _call_tool_result_to_text(result)
+                    tool_results.append({
+                        "type": "tool_result",
+                        "tool_use_id": tool_block.id,
+                        "content": result_text,
+                    })
+                    yield ToolEnd(tool_name=tool_name, text=result_text)
+                except Exception as exc:
+                    logger.exception("Tool call %s failed", tool_name)
+                    error_text = f"Error calling {tool_name}: {exc}"
+                    tool_results.append({
+                        "type": "tool_result",
+                        "tool_use_id": tool_block.id,
+                        "content": error_text,
+                        "is_error": True,
+                    })
+                    yield ToolEnd(tool_name=tool_name, text=error_text)
+
+            # exclude_from_context: stop after tool results are emitted
+            # so they are available to the runner but not fed back into
+            # the model context (matches copilot_sdk behavior).
+            if handle.exclude_from_context:
+                return
+
+            messages.append({"role": "user", "content": tool_results})
+
+        raise BackendMaxTurnsError(f"Exceeded max_turns ({max_turns})")
+
+    async def aclose(self, agent: Any) -> None:
+        handle: _AnthropicHandle = agent
+        if handle is not None and handle.client is not None:
+            await handle.client.close()
diff --git a/tests/test_capi_extended.py b/tests/test_capi_extended.py
index e3a1188b..36c97159 100644
--- a/tests/test_capi_extended.py
+++ b/tests/test_capi_extended.py
@@ -80,6 +80,17 @@ def test_openai_endpoint_o_series(self, monkeypatch):
             models = {mid: {"id": mid}}
             assert supports_tool_calls(mid, models) is True
 
+    def test_openai_endpoint_gpt5_series(self, monkeypatch):
+        """OpenAI endpoint returns True for gpt-5 family (regression: the
+        default_model was bumped to gpt-5.5 but _CHAT_PREFIXES needed
+        updating to include 'gpt-5')."""
+        monkeypatch.setenv("AI_API_ENDPOINT", "https://api.openai.com/v1")
+        for mid in ("gpt-5", "gpt-5.5", "gpt-5.5-mini", "gpt-5.6"):
+            models = {mid: {"id": mid}}
+            assert supports_tool_calls(mid, models) is True, (
+                f"{mid} should be recognized as a tool-call-capable chat model"
+            )
+
     def test_openai_endpoint_non_chat_model(self, monkeypatch):
         """OpenAI endpoint returns False for embeddings/audio/image models."""
         monkeypatch.setenv("AI_API_ENDPOINT", "https://api.openai.com/v1")
diff --git a/tests/test_mcp_utils.py b/tests/test_mcp_utils.py
new file mode 100644
index 00000000..1480b98e
--- /dev/null
+++ b/tests/test_mcp_utils.py
@@ -0,0 +1,147 @@
+# SPDX-FileCopyrightText: GitHub, Inc.
+# SPDX-License-Identifier: MIT
+
+"""Tests for MCPNamespaceWrap."""
+
+from __future__ import annotations
+
+import asyncio
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from seclab_taskflow_agent.mcp_utils import MCPNamespaceWrap, compress_name
+
+
+class _FakeTool:
+    """Tool with a copy() method (mimics mcp.types.Tool)."""
+
+    def __init__(self, name: str, description: str = "", input_schema: dict | None = None):
+        self.name = name
+        self.description = description
+        self.inputSchema = input_schema or {}
+
+    def copy(self) -> _FakeTool:
+        return _FakeTool(self.name, self.description, dict(self.inputSchema))
+
+
+def _make_wrapper(server_name: str, session=None) -> MCPNamespaceWrap:
+    """Construct an MCPNamespaceWrap around a mock underlying server."""
+    obj = MagicMock()
+    obj.name = server_name
+    obj.session = session
+    return MCPNamespaceWrap(confirms=[], obj=obj)
+
+
+# -- list_tools_unfiltered() --
+
+
+def test_list_tools_unfiltered_prefixes_names_from_session():
+    """Tools from session.list_tools() should be namespace-prefixed."""
+    tools = [_FakeTool("read_file", "Read a file"), _FakeTool("write_file", "Write a file")]
+    session = MagicMock()
+    session.list_tools = AsyncMock(return_value=SimpleNamespace(tools=tools))
+    wrapper = _make_wrapper("RepoContext", session=session)
+
+    result = asyncio.run(wrapper.list_tools_unfiltered())
+
+    ns = compress_name("RepoContext")
+    assert len(result) == 2
+    assert result[0].name == f"{ns}read_file"
+    assert result[1].name == f"{ns}write_file"
+
+
+def test_list_tools_unfiltered_no_double_prefix_when_called_twice():
+    """Calling list_tools_unfiltered twice should not double-prefix names."""
+    session = MagicMock()
+    # Fresh tools each call (mimics MCP session returning fresh objects)
+    session.list_tools = AsyncMock(
+        side_effect=lambda: SimpleNamespace(tools=[_FakeTool("get_repo")])
+    )
+    wrapper = _make_wrapper("RepoContext", session=session)
+
+    async def _run():
+        a = await wrapper.list_tools_unfiltered()
+        b = await wrapper.list_tools_unfiltered()
+        return a, b
+
+    result1, result2 = asyncio.run(_run())
+
+    ns = compress_name("RepoContext")
+    assert result1[0].name == f"{ns}get_repo"
+    assert result2[0].name == f"{ns}get_repo"
+    # Crucially, the second result is NOT double-prefixed
+    assert not result2[0].name.startswith(f"{ns}{ns}")
+
+
+def test_list_tools_unfiltered_preserves_tool_attributes():
+    """The copy of each tool should preserve description and input schema."""
+    schema = {"type": "object", "properties": {"path": {"type": "string"}}}
+    tools = [_FakeTool("read_file", "Read a file", schema)]
+    session = MagicMock()
+    session.list_tools = AsyncMock(return_value=SimpleNamespace(tools=tools))
+    wrapper = _make_wrapper("RepoContext", session=session)
+
+    result = asyncio.run(wrapper.list_tools_unfiltered())
+
+    assert result[0].description == "Read a file"
+    assert result[0].inputSchema == schema
+
+
+def test_list_tools_unfiltered_raises_when_session_missing():
+    """Should raise RuntimeError if the underlying server has no session yet."""
+    wrapper = _make_wrapper("RepoContext", session=None)
+
+    with pytest.raises(RuntimeError, match=r"no.*active MCP session"):
+        asyncio.run(wrapper.list_tools_unfiltered())
+
+
+def test_list_tools_unfiltered_does_not_share_state_with_caller():
+    """Mutating returned tool names must not affect the underlying tools."""
+    original = _FakeTool("read_file")
+    session = MagicMock()
+    session.list_tools = AsyncMock(return_value=SimpleNamespace(tools=[original]))
+    wrapper = _make_wrapper("Repo", session=session)
+
+    result = asyncio.run(wrapper.list_tools_unfiltered())
+    result[0].name = "MUTATED"
+
+    # Original tool should still have its name (copy() worked)
+    assert original.name == "read_file"
+
+
+def test_list_tools_unfiltered_idempotent_on_prefixed_input():
+    """If the session returns a tool whose name is already namespace-prefixed
+    (e.g. because of a cached/reused tool object), the prefix must NOT be
+    applied a second time. Required for safe repeated/reentrant calls."""
+    ns = compress_name("RepoContext")
+    pre_prefixed = _FakeTool(f"{ns}read_file", "Read a file")
+    session = MagicMock()
+    session.list_tools = AsyncMock(return_value=SimpleNamespace(tools=[pre_prefixed]))
+    wrapper = _make_wrapper("RepoContext", session=session)
+
+    result = asyncio.run(wrapper.list_tools_unfiltered())
+
+    # Result must have exactly one prefix, not two
+    assert result[0].name == f"{ns}read_file"
+    assert not result[0].name.startswith(f"{ns}{ns}")
+
+
+# -- list_tools() (regression) --
+
+
+def test_list_tools_existing_behaviour_unchanged():
+    """Existing list_tools() should still forward args and prefix names."""
+    tools = [_FakeTool("read_file")]
+    obj = MagicMock()
+    obj.name = "RepoContext"
+    obj.list_tools = AsyncMock(return_value=tools)
+    obj.session = MagicMock()
+    wrapper = MCPNamespaceWrap(confirms=[], obj=obj)
+
+    result = asyncio.run(wrapper.list_tools(run_context="ctx", agent="agent"))
+
+    obj.list_tools.assert_awaited_once_with(run_context="ctx", agent="agent")
+    ns = compress_name("RepoContext")
+    assert result[0].name == f"{ns}read_file"
diff --git a/tests/test_runner.py b/tests/test_runner.py
index a7713953..5cb1c26a 100644
--- a/tests/test_runner.py
+++ b/tests/test_runner.py
@@ -187,7 +187,7 @@ class TestResolveTaskModel:
 
     def test_logical_name_mapped_to_provider_id(self):
         """A logical model name is resolved to the provider model ID."""
-        model_id, _, _, _, _ = _resolve_task_model(
+        model_id, _, _, _, _, _ = _resolve_task_model(
             TaskDefinition(model="fast"),
             model_keys=["fast"],
             model_dict={"fast": "gpt-4o-mini"},
@@ -197,7 +197,7 @@ def test_logical_name_mapped_to_provider_id(self):
 
     def test_model_settings_from_config(self):
         """Settings from models_params are included in the result."""
-        _, settings, _, _, _ = _resolve_task_model(
+        _, settings, _, _, _, _ = _resolve_task_model(
             TaskDefinition(model="fast"),
             model_keys=["fast"],
             model_dict={"fast": "gpt-4o-mini"},
@@ -208,7 +208,7 @@ def test_model_settings_from_config(self):
 
     def test_task_level_settings_override_config(self):
         """Task-level model_settings override config-level settings."""
-        _, settings, _, _, _ = _resolve_task_model(
+        _, settings, _, _, _, _ = _resolve_task_model(
             TaskDefinition(model="fast", model_settings={"temperature": 0.2}),
             model_keys=["fast"],
             model_dict={"fast": "gpt-4o-mini"},
@@ -218,8 +218,8 @@ def test_task_level_settings_override_config(self):
         assert settings["max_tokens"] == 100
 
     def test_engine_keys_extracted(self):
-        """Engine keys (api_type, endpoint, token) are popped from settings."""
-        _, settings, api_type, endpoint, token = _resolve_task_model(
+        """Engine keys (api_type, endpoint, token, backend) are popped from settings."""
+        _, settings, api_type, endpoint, token, backend = _resolve_task_model(
             TaskDefinition(model="fast"),
             model_keys=["fast"],
             model_dict={"fast": "gpt-4o-mini"},
@@ -228,6 +228,7 @@ def test_engine_keys_extracted(self):
                     "api_type": "responses",
                     "endpoint": "https://custom.api",
                     "token": "secret",
+                    "backend": "anthropic_sdk",
                     "temperature": 0.5,
                 }
             },
@@ -235,16 +236,18 @@ def test_engine_keys_extracted(self):
         assert api_type == "responses"
         assert endpoint == "https://custom.api"
         assert token == "secret"  # noqa: S105
+        assert backend == "anthropic_sdk"
         assert "api_type" not in settings
         assert "endpoint" not in settings
         assert "token" not in settings
+        assert "backend" not in settings
         assert settings["temperature"] == 0.5
 
     def test_default_model_when_empty(self):
         """Empty model string falls back to DEFAULT_MODEL."""
         from seclab_taskflow_agent.agent import DEFAULT_MODEL
 
-        model_id, _, _, _, _ = _resolve_task_model(
+        model_id, _, _, _, _, _ = _resolve_task_model(
             TaskDefinition(model=""),
             model_keys=[],
             model_dict={},
@@ -254,7 +257,7 @@ def test_default_model_when_empty(self):
 
     def test_model_not_in_keys_passes_through(self):
         """A model name not in model_keys passes through as-is."""
-        model_id, _, _, _, _ = _resolve_task_model(
+        model_id, _, _, _, _, _ = _resolve_task_model(
             TaskDefinition(model="claude-3-opus"),
             model_keys=["fast", "smart"],
             model_dict={"fast": "gpt-4o-mini", "smart": "gpt-4o"},
@@ -264,17 +267,18 @@ def test_model_not_in_keys_passes_through(self):
 
     def test_task_engine_keys_override_config(self):
         """Task-level model_settings can override engine keys from config."""
-        _, _, api_type, endpoint, token = _resolve_task_model(
+        _, _, api_type, endpoint, token, backend = _resolve_task_model(
             TaskDefinition(
                 model="fast",
-                model_settings={"api_type": "responses", "endpoint": "https://task.api"},
+                model_settings={"api_type": "responses", "endpoint": "https://task.api", "backend": "anthropic_sdk"},
             ),
             model_keys=["fast"],
             model_dict={"fast": "gpt-4o-mini"},
-            models_params={"fast": {"api_type": "chat_completions"}},
+            models_params={"fast": {"api_type": "chat_completions", "backend": "openai_agents"}},
         )
         assert api_type == "responses"
         assert endpoint == "https://task.api"
+        assert backend == "anthropic_sdk"
 
 
 # ===================================================================
diff --git a/tests/test_sdk_anthropic_adapter.py b/tests/test_sdk_anthropic_adapter.py
new file mode 100644
index 00000000..bb8bf64d
--- /dev/null
+++ b/tests/test_sdk_anthropic_adapter.py
@@ -0,0 +1,630 @@
+# SPDX-FileCopyrightText: GitHub, Inc.
+# SPDX-License-Identifier: MIT
+
+"""Tests for the Anthropic SDK adapter."""
+
+from __future__ import annotations
+
+import pytest
+
+from seclab_taskflow_agent.sdk import get_backend
+from seclab_taskflow_agent.sdk.base import AgentSpec
+from seclab_taskflow_agent.sdk.anthropic_sdk.backend import (
+    AnthropicSDKBackend,
+    _mcp_tools_to_anthropic,
+    _call_tool_result_to_text,
+    _VALID_REASONING,
+)
+from seclab_taskflow_agent.sdk.errors import (
+    BackendBadRequestError,
+    BackendCapabilityError,
+)
+
+
+def _spec(**overrides) -> AgentSpec:
+    base = {
+        "name": "a",
+        "instructions": "You are a test agent.",
+        "model": "claude-opus-4.7",
+    }
+    base.update(overrides)
+    return AgentSpec(**base)
+
+
+def _make_fake_client(captured: dict, *, stop_reason: str = "end_turn", content: list | None = None):
+    """Build a minimal fake Anthropic client that records messages.stream() kwargs.
+
+    The returned client exposes ``client.messages.stream(**kwargs)``; ``kwargs`` is
+    written into *captured* so tests can assert on what the backend would have sent
+    to the real SDK.  The stream yields nothing and ``get_final_message()`` returns
+    a stub with the requested ``stop_reason``/``content``.
+    """
+    final_content = content if content is not None else []
+
+    class _EmptyAsyncIter:
+        def __aiter__(self):
+            return self
+
+        async def __anext__(self):
+            raise StopAsyncIteration
+
+    class _FakeStreamCtx:
+        async def __aenter__(self):
+            return self
+
+        async def __aexit__(self, *exc):
+            return False
+
+        def __aiter__(self):
+            return _EmptyAsyncIter()
+
+        async def get_final_message(self):
+            return type("M", (), {"stop_reason": stop_reason, "content": final_content})()
+
+    class _FakeMessages:
+        def stream(self, **kwargs):
+            captured.update(kwargs)
+            return _FakeStreamCtx()
+
+    class _FakeClient:
+        def __init__(self):
+            self.messages = _FakeMessages()
+
+    return _FakeClient()
+
+
+# -- Backend registration --
+
+
+def test_get_backend_returns_anthropic_sdk_instance():
+    backend = get_backend("anthropic_sdk")
+    assert isinstance(backend, AnthropicSDKBackend)
+    assert backend.name == "anthropic_sdk"
+
+
+# -- validate() --
+
+
+def test_validate_accepts_minimal_spec():
+    AnthropicSDKBackend().validate(_spec())
+
+
+def test_validate_rejects_handoffs():
+    backend = AnthropicSDKBackend()
+    with pytest.raises(BackendCapabilityError, match="handoffs"):
+        backend.validate(_spec(handoffs=[_spec(name="b")]))
+
+
+def test_validate_rejects_handoff_graph():
+    backend = AnthropicSDKBackend()
+    with pytest.raises(BackendCapabilityError, match="handoffs"):
+        backend.validate(_spec(in_handoff_graph=True))
+
+
+def test_validate_rejects_empty_model():
+    backend = AnthropicSDKBackend()
+    with pytest.raises(BackendBadRequestError, match="model is required"):
+        backend.validate(_spec(model=""))
+
+
+def test_validate_accepts_exclude_from_context():
+    AnthropicSDKBackend().validate(_spec(exclude_from_context=True))
+
+
+# -- _mcp_tools_to_anthropic() --
+
+
+class _FakeTool:
+    def __init__(self, name, description=None, input_schema=None):  # noqa: N803
+        self.name = name
+        self.description = description
+        self.inputSchema = input_schema
+
+
+def test_mcp_tools_to_anthropic_basic():
+    tools = [
+        _FakeTool(
+            "read_file",
+            "Read a file",
+            {"type": "object", "properties": {"path": {"type": "string"}}},
+        )
+    ]
+    result = _mcp_tools_to_anthropic(tools)
+    assert len(result) == 1
+    assert result[0]["name"] == "read_file"
+    assert result[0]["description"] == "Read a file"
+    assert result[0]["input_schema"]["properties"]["path"]["type"] == "string"
+
+
+def test_mcp_tools_to_anthropic_none_description():
+    """Tools with None description should fall back to tool name."""
+    tools = [_FakeTool("my_tool", description=None)]
+    result = _mcp_tools_to_anthropic(tools)
+    assert result[0]["description"] == "my_tool"
+
+
+def test_mcp_tools_to_anthropic_empty_description():
+    """Tools with empty string description should fall back to tool name."""
+    tools = [_FakeTool("my_tool", description="")]
+    result = _mcp_tools_to_anthropic(tools)
+    assert result[0]["description"] == "my_tool"
+
+
+def test_mcp_tools_to_anthropic_no_schema():
+    """Tools without inputSchema should get a default empty object schema."""
+    tools = [_FakeTool("my_tool", "desc")]
+    result = _mcp_tools_to_anthropic(tools)
+    assert result[0]["input_schema"] == {"type": "object", "properties": {}}
+
+
+def test_mcp_tools_to_anthropic_none_schema():
+    """Tools with None inputSchema should get a default empty object schema."""
+    tools = [_FakeTool("my_tool", "desc", input_schema=None)]
+    result = _mcp_tools_to_anthropic(tools)
+    assert result[0]["input_schema"] == {"type": "object", "properties": {}}
+
+
+# -- _call_tool_result_to_text() --
+
+
+class _FakeContent:
+    def __init__(self, text):
+        self.text = text
+
+
+class _FakeResult:
+    def __init__(self, contents):
+        self.content = contents
+
+
+def test_call_tool_result_to_text_single():
+    result = type("R", (), {"content": [_FakeContent("hello")]})()
+    assert _call_tool_result_to_text(result) == "hello"
+
+
+def test_call_tool_result_to_text_multiple():
+    result = type("R", (), {"content": [_FakeContent("a"), _FakeContent("b")]})()
+    assert _call_tool_result_to_text(result) == "a\nb"
+
+
+def test_call_tool_result_to_text_empty():
+    result = type("R", (), {"content": []})()
+    text = _call_tool_result_to_text(result)
+    assert isinstance(text, str)
+
+
+def test_call_tool_result_to_text_preserves_empty_string():
+    """A tool returning TextContent(text='') is reporting an explicit
+    empty result. The helper must return '' verbatim, not fall back to
+    str(result) (which is a noisy repr of the result object).
+
+    Regression for the truthy-check bug: ``if text:`` was treating ''
+    the same as None and dropping it, causing the empty content list
+    branch to fire and emit ``str(result)`` to the model.
+    """
+    result = type("R", (), {"content": [_FakeContent("")]})()
+    assert _call_tool_result_to_text(result) == ""
+
+
+def test_call_tool_result_to_text_preserves_empty_among_nonempty():
+    """Empty TextContent should join with neighbors as ''."""
+    result = type("R", (), {"content": [_FakeContent("a"), _FakeContent(""), _FakeContent("b")]})()
+    assert _call_tool_result_to_text(result) == "a\n\nb"
+
+
+# -- bearer_auth via provider registry --
+
+
+def test_known_provider_uses_bearer_auth():
+    """Known providers (CAPI, GitHub Models) should have bearer_auth=True."""
+    from seclab_taskflow_agent.capi import get_provider
+
+    provider = get_provider("https://api.githubcopilot.com")
+    assert provider.bearer_auth is True
+
+    provider = get_provider("https://models.github.ai/inference")
+    assert provider.bearer_auth is True
+
+
+def test_unknown_endpoint_uses_native_auth():
+    """Unknown endpoints should default to native SDK auth (bearer_auth=False)."""
+    from seclab_taskflow_agent.capi import get_provider
+
+    provider = get_provider("https://api.anthropic.com")
+    assert provider.bearer_auth is False
+    assert provider.name == "custom"
+
+
+def test_awf_proxy_inherits_upstream_bearer_auth(monkeypatch):
+    """AWF proxy should inherit bearer_auth from the upstream provider."""
+    from seclab_taskflow_agent.capi import get_provider
+
+    monkeypatch.setenv("AWF_COPILOT_PROXY", "api.githubcopilot.com")
+    provider = get_provider("http://localhost:8080")
+    assert provider.bearer_auth is True
+    assert provider.base_url == "http://localhost:8080/"
+
+
+# -- reasoning validation --
+
+
+def test_valid_reasoning_values():
+    assert _VALID_REASONING == ("low", "medium", "high", "max")
+
+
+# -- reasoning effort validation (runtime) --
+
+
+def test_invalid_reasoning_effort_not_in_valid():
+    """Invalid reasoning.effort values should not be in _VALID_REASONING."""
+    from seclab_taskflow_agent.sdk.anthropic_sdk.backend import _VALID_REASONING
+
+    assert "ultra" not in _VALID_REASONING
+    assert "high" in _VALID_REASONING
+    assert "low" in _VALID_REASONING
+    assert "max" in _VALID_REASONING
+
+
+def test_invalid_reasoning_effort_raises_at_runtime():
+    """run_streamed raises BackendBadRequestError for invalid effort."""
+    import asyncio
+
+    from seclab_taskflow_agent.sdk.anthropic_sdk.backend import _AnthropicHandle
+
+    handle = _AnthropicHandle(
+        client=None,
+        system_prompt="",
+        model="test",
+        max_tokens=100,
+        tools=[],
+        mcp_server_map={},
+        model_settings={"reasoning": {"effort": "ultra"}},
+    )
+    backend = AnthropicSDKBackend()
+
+    async def _run():
+        async for _ in backend.run_streamed(handle, "hi", max_turns=1):
+            pass
+
+    with pytest.raises(BackendBadRequestError, match="invalid reasoning effort"):
+        asyncio.run(_run())
+
+
+# -- prompt caching --
+
+
+def test_prompt_caching_enabled_by_default():
+    """All Claude models support cache_control; default to on so callers
+    get the cost savings without explicit opt-in. Explicit opt-out via
+    prompt_caching=False remains available for proxies that don't support
+    cache_control."""
+    import asyncio
+
+    from seclab_taskflow_agent.sdk.anthropic_sdk.backend import _AnthropicHandle
+
+    captured: dict = {}
+    handle = _AnthropicHandle(
+        client=_make_fake_client(captured),
+        system_prompt="",
+        model="claude-mythos-5",
+        max_tokens=100,
+        tools=[],
+        mcp_server_map={},
+        model_settings={},
+    )
+    backend = AnthropicSDKBackend()
+
+    async def _run():
+        async for _ in backend.run_streamed(handle, "hi", max_turns=1):
+            pass
+
+    asyncio.run(_run())
+    assert captured.get("cache_control") == {"type": "ephemeral"}, (
+        f"expected default cache_control={{type: ephemeral}}, got {captured.get('cache_control')!r}"
+    )
+
+
+def test_prompt_caching_explicit_opt_out():
+    """prompt_caching=False must suppress cache_control entirely (for
+    callers pointed at proxies that don't support it)."""
+    import asyncio
+
+    from seclab_taskflow_agent.sdk.anthropic_sdk.backend import _AnthropicHandle
+
+    captured: dict = {}
+    handle = _AnthropicHandle(
+        client=_make_fake_client(captured),
+        system_prompt="",
+        model="claude-mythos-5",
+        max_tokens=100,
+        tools=[],
+        mcp_server_map={},
+        model_settings={"prompt_caching": False},
+    )
+    backend = AnthropicSDKBackend()
+
+    async def _run():
+        async for _ in backend.run_streamed(handle, "hi", max_turns=1):
+            pass
+
+    asyncio.run(_run())
+    assert "cache_control" not in captured, (
+        f"cache_control should be absent when explicitly opted out, got {captured}"
+    )
+
+
+def test_prompt_caching_1h_ttl_passes_ttl_field():
+    """When prompt_caching='1h', cache_control must include the 1h ttl."""
+    import asyncio
+
+    from seclab_taskflow_agent.sdk.anthropic_sdk.backend import _AnthropicHandle
+
+    captured: dict = {}
+    handle = _AnthropicHandle(
+        client=_make_fake_client(captured),
+        system_prompt="",
+        model="claude-mythos-5",
+        max_tokens=100,
+        tools=[],
+        mcp_server_map={},
+        model_settings={"prompt_caching": "1h"},
+    )
+    backend = AnthropicSDKBackend()
+
+    async def _run():
+        async for _ in backend.run_streamed(handle, "hi", max_turns=1):
+            pass
+
+    asyncio.run(_run())
+    assert captured.get("cache_control") == {"type": "ephemeral", "ttl": "1h"}, (
+        f"expected cache_control with 1h ttl, got {captured.get('cache_control')!r}"
+    )
+
+
+# -- blocked_tools filtering --
+
+
+def test_blocked_tools_matches_raw_name_against_namespaced_tool(monkeypatch):
+    """Regression: taskflow YAML blocked_tools uses raw (un-namespaced)
+    names like 'read_file', but list_tools_unfiltered() returns
+    namespace-prefixed names like '{hash}read_file'. The filter must
+    match the raw name against the un-prefixed portion of the
+    namespaced tool, otherwise blocking is silently bypassed.
+
+    See PR #265 review thread and openai_agents/copilot_sdk for
+    how blocked_tools are consumed elsewhere (both use raw names).
+    """
+    monkeypatch.setenv("AI_API_TOKEN", "test-token")
+    import asyncio
+    from unittest.mock import AsyncMock, MagicMock
+
+    from seclab_taskflow_agent.mcp_utils import MCPNamespaceWrap, compress_name
+    from seclab_taskflow_agent.sdk.base import MCPServerSpec
+
+    class _FakeTool:
+        def __init__(self, name):
+            self.name = name
+            self.description = ""
+            self.inputSchema = {}
+
+        def copy(self):
+            t = _FakeTool(self.name)
+            return t
+
+    # Build a wrapper whose session.list_tools returns two raw tools.
+    # list_tools_unfiltered() will return them with namespace prefix.
+    obj = MagicMock()
+    obj.name = "RepoContext"
+    ns = compress_name("RepoContext")
+    obj.session = MagicMock()
+    obj.session.list_tools = AsyncMock(
+        return_value=type("R", (), {"tools": [_FakeTool("read_file"), _FakeTool("safe_helper")]})()
+    )
+    wrap = MCPNamespaceWrap(confirms=[], obj=obj)
+
+    spec = AgentSpec(
+        name="t",
+        instructions="",
+        model="claude-mythos-preview",
+        mcp_servers=[MCPServerSpec(name="rc", kind="stdio", params={"_native": wrap})],
+        blocked_tools=["read_file"],  # raw name from YAML
+    )
+    backend = AnthropicSDKBackend()
+    handle = asyncio.run(backend.build(spec))
+
+    # The blocked tool must be absent from both the tool list AND the
+    # server map keys (which use the namespaced form).
+    tool_names = [t["name"] for t in handle.tools]
+    assert f"{ns}read_file" not in tool_names, (
+        f"blocked raw name 'read_file' should have filtered out '{ns}read_file'; "
+        f"got tools: {tool_names}"
+    )
+    assert f"{ns}safe_helper" in tool_names, (
+        f"non-blocked tool 'safe_helper' should still be present; got: {tool_names}"
+    )
+    assert f"{ns}read_file" not in handle.mcp_server_map
+    assert f"{ns}safe_helper" in handle.mcp_server_map
+
+
+def test_blocked_tools_also_matches_already_namespaced_name(monkeypatch):
+    """Backwards-compat: if a caller already passes the namespaced name
+    in blocked_tools (e.g. they computed it externally), it should still
+    match. The filter checks both forms."""
+    monkeypatch.setenv("AI_API_TOKEN", "test-token")
+    import asyncio
+    from unittest.mock import AsyncMock, MagicMock
+
+    from seclab_taskflow_agent.mcp_utils import MCPNamespaceWrap, compress_name
+    from seclab_taskflow_agent.sdk.base import MCPServerSpec
+
+    class _FakeTool:
+        def __init__(self, name):
+            self.name = name
+            self.description = ""
+            self.inputSchema = {}
+
+        def copy(self):
+            return _FakeTool(self.name)
+
+    obj = MagicMock()
+    obj.name = "RepoContext"
+    ns = compress_name("RepoContext")
+    obj.session = MagicMock()
+    obj.session.list_tools = AsyncMock(
+        return_value=type("R", (), {"tools": [_FakeTool("read_file")]})()
+    )
+    wrap = MCPNamespaceWrap(confirms=[], obj=obj)
+
+    spec = AgentSpec(
+        name="t",
+        instructions="",
+        model="claude-mythos-preview",
+        mcp_servers=[MCPServerSpec(name="rc", kind="stdio", params={"_native": wrap})],
+        blocked_tools=[f"{ns}read_file"],  # already namespaced
+    )
+    backend = AnthropicSDKBackend()
+    handle = asyncio.run(backend.build(spec))
+
+    assert handle.tools == [], (
+        f"blocked namespaced name should filter out the tool; got: {handle.tools}"
+    )
+
+
+# -- token validation --
+
+
+def test_build_raises_bad_request_when_no_token_available(monkeypatch):
+    """build() must fail loudly when no API token can be resolved.
+
+    Otherwise the Anthropic client gets created with an empty 'Bearer '
+    header and the failure surfaces later as an opaque 401 mid-stream
+    instead of a clear BackendBadRequestError at build time.
+
+    Clears every variable consulted by ``capi.get_AI_token``
+    (``AI_API_TOKEN`` then ``COPILOT_TOKEN``) to keep the test
+    deterministic regardless of the runner's ambient environment.
+    """
+    import asyncio
+
+    # Must clear *every* env var the token chain consults; missing
+    # COPILOT_TOKEN here would make the test flaky on runners that
+    # happen to have it set (e.g. CI machines authed to copilot).
+    monkeypatch.delenv("AI_API_TOKEN", raising=False)
+    monkeypatch.delenv("COPILOT_TOKEN", raising=False)
+
+    spec = AgentSpec(
+        name="t",
+        instructions="",
+        model="claude-mythos-preview",
+        endpoint="https://api.githubcopilot.com",
+    )
+    backend = AnthropicSDKBackend()
+    with pytest.raises(BackendBadRequestError, match="no API token"):
+        asyncio.run(backend.build(spec))
+
+
+# -- exception mapping (4xx -> BackendBadRequestError) --
+
+
+@pytest.mark.parametrize("status_code", [400, 401, 403, 404, 409, 422])
+def test_4xx_api_status_errors_map_to_bad_request(monkeypatch, status_code):
+    """Any 4xx APIStatusError must surface as BackendBadRequestError so the
+    runner logs it as a request error rather than an internal exception.
+    Previously only BadRequestError (400) was mapped, leaving auth/permission/
+    not-found errors (401/403/404) to surface as BackendUnexpectedError."""
+    import asyncio
+    import anthropic
+    import httpx
+
+    from seclab_taskflow_agent.sdk.anthropic_sdk.backend import _AnthropicHandle
+
+    response = httpx.Response(
+        status_code=status_code,
+        request=httpx.Request("POST", "https://test.example/v1/messages"),
+    )
+
+    class _RaisingStreamCtx:
+        async def __aenter__(self):
+            raise anthropic.APIStatusError(
+                f"http {status_code}", response=response, body=None
+            )
+
+        async def __aexit__(self, *exc):
+            return False
+
+    class _FakeMessages:
+        def stream(self, **kwargs):  # noqa: ARG002
+            return _RaisingStreamCtx()
+
+    class _FakeClient:
+        def __init__(self):
+            self.messages = _FakeMessages()
+
+    handle = _AnthropicHandle(
+        client=_FakeClient(),
+        system_prompt="",
+        model="claude-mythos-5",
+        max_tokens=100,
+        tools=[],
+        mcp_server_map={},
+        model_settings={"prompt_caching": False},
+    )
+    backend = AnthropicSDKBackend()
+
+    async def _run():
+        async for _ in backend.run_streamed(handle, "hi", max_turns=1):
+            pass
+
+    with pytest.raises(BackendBadRequestError):
+        asyncio.run(_run())
+
+
+def test_5xx_api_status_errors_map_to_unexpected(monkeypatch):
+    """5xx APIStatusError must still surface as BackendUnexpectedError (not
+    BackendBadRequestError); the request itself was well-formed."""
+    import asyncio
+    import anthropic
+    import httpx
+
+    from seclab_taskflow_agent.sdk.anthropic_sdk.backend import _AnthropicHandle
+    from seclab_taskflow_agent.sdk.errors import BackendUnexpectedError
+
+    response = httpx.Response(
+        status_code=503,
+        request=httpx.Request("POST", "https://test.example/v1/messages"),
+    )
+
+    class _RaisingStreamCtx:
+        async def __aenter__(self):
+            raise anthropic.InternalServerError(
+                "service unavailable", response=response, body=None
+            )
+
+        async def __aexit__(self, *exc):
+            return False
+
+    class _FakeMessages:
+        def stream(self, **kwargs):  # noqa: ARG002
+            return _RaisingStreamCtx()
+
+    class _FakeClient:
+        def __init__(self):
+            self.messages = _FakeMessages()
+
+    handle = _AnthropicHandle(
+        client=_FakeClient(),
+        system_prompt="",
+        model="claude-mythos-5",
+        max_tokens=100,
+        tools=[],
+        mcp_server_map={},
+        model_settings={"prompt_caching": False},
+    )
+    backend = AnthropicSDKBackend()
+
+    async def _run():
+        async for _ in backend.run_streamed(handle, "hi", max_turns=1):
+            pass
+
+    with pytest.raises(BackendUnexpectedError):
+        asyncio.run(_run())
diff --git a/tests/test_sdk_base.py b/tests/test_sdk_base.py
index 54dd17e5..f2fb1c38 100644
--- a/tests/test_sdk_base.py
+++ b/tests/test_sdk_base.py
@@ -38,24 +38,23 @@ def test_resolve_backend_default_is_openai_agents(monkeypatch):
     assert sdk.resolve_backend_name() == "openai_agents"
 
 
-def test_resolve_backend_copilot_endpoint_prefers_copilot_when_installed(monkeypatch):
+def test_resolve_backend_copilot_endpoint_does_not_auto_select(monkeypatch):
+    """Backend selection is always explicit -- endpoint URL is not used."""
     monkeypatch.delenv("SECLAB_TASKFLOW_BACKEND", raising=False)
-    pytest.importorskip("copilot")
     assert (
         sdk.resolve_backend_name(endpoint="https://api.githubcopilot.com")
-        == "copilot_sdk"
+        == "openai_agents"
     )
 
 
-def test_resolve_backend_copilot_endpoint_falls_back_when_missing(monkeypatch):
+def test_resolve_backend_explicit_overrides_endpoint(monkeypatch):
     monkeypatch.delenv("SECLAB_TASKFLOW_BACKEND", raising=False)
-    # Force the optional import to fail by stashing a sentinel in sys.modules.
-    import sys
-
-    monkeypatch.setitem(sys.modules, "copilot", None)
     assert (
-        sdk.resolve_backend_name(endpoint="https://api.githubcopilot.com")
-        == "openai_agents"
+        sdk.resolve_backend_name(
+            explicit="anthropic_sdk",
+            endpoint="https://api.githubcopilot.com",
+        )
+        == "anthropic_sdk"
     )