Skip to content
Open
226 changes: 226 additions & 0 deletions agent/agent_runtime_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,231 @@ def strip_think_blocks(agent, content: str) -> str:
return content


# ── Reasoning-prose stripper ────────────────────────────────────────
# Some chat-tuned reasoning models (notably minimax-m3, kimi-k2.5/2.6)
# emit their chain-of-thought as natural-language sentences directly in
# the visible ``content`` field instead of using ``<think>…</think>`` XML
# tags or the structured ``reasoning_content`` channel. Examples that
# leaked to chat in the wild:
#
# - "Let me check what the gateway is doing."
# - "Found it. The error is at gateway/run.py:17678."
# - "Now the **real** bug surface for ... — let me check how the agent
# output gets transformed BEFORE it reaches truncate_message."
#
# The patterns cluster around reasoning meta-verbs (let me X, now I see,
# now the real X, found it, aha, I can see) typically at the start of a
# sentence. The fix: detect a leading "reasoning preamble" (one or more
# sentences matching the patterns) and drop it, keeping any substantive
# answer that follows. Also drop trailing reasoning sentences — the
# min/max-m3 pattern frequently emits one final "Found it." or "Got it."
# after the real answer. Conservative on purpose — the helper refuses
# to touch short messages (under 40 chars) and short-circuits to the
# input when stripping would leave < 8 chars of visible answer.
# CLI / TUI passes should bypass this helper (callers gate on platform)
# so the reasoning is still visible to the operator working locally.

_REASONING_PROSE_OPENERS = (
# "Let me / Let's" + verb (lowercase ASCII only, then word boundary)
r"\blet(?:'s|s| me| us)\s+(?:also\s+|just\s+|first\s+|actually\s+|quickly\s+|"
r"now\s+|try\s+to\s+)?(?P<action>think|check|look|trace|find|examine|"
r"reason|verify|recall|consider|review|recheck|re-?verify|re-?check|"
r"test|push|step|back|back-?out|backtrack|skip|read|run|do|go|see|open|"
r"close|inspect|examine|investigate|walk|drill|dig|break|split|cross|"
r"poke|grep|search|scan|hit|re-?read|cross-?check|take|put|move|kill|"
r"restart|rebuild|recompile|rerun|reapply|revert|apply|patch|fix|"
r"compare|diff|map|reconstruct|retrace|simulate|verify|cross-reference|"
r"isolate|identify|catalogue|enumerate|list|count|summarise|summarize|"
r"elaborate|expand|recap)\b",
# "Now let me / Now I can / Now the real / Now I understand"
r"\bnow\s+(?:let\s+me|i\s+can|i\s+see|i\s+have|i\s+understand|"
r"the\s+real|it's\s+clear|everything|the\s+full|here|we\s+have)\b",
# "Found it. / Found the bug. / Found the root cause. / Found X"
r"\bfound\s+(?:it|the|that|an?|one|two|three|my|our|another)\b",
# "Aha —" / "Aha:"
r"\baha\b",
# "I see the / I can see / I lost / I should check / I need to / I think I"
r"\bi\s+(?:see|can\s+see|lost|should(?:n't)?|need|want|have\s+to|"
r"think\s+i|now\s+see|finally\s+see|now\s+have|now\s+need)\b",
# "Wait, / Wait —" (mid-thought correction)
r"\bwait\s*[,—-]",
# "Interesting —" / "Interesting."
r"\binteresting\b",
# "Smoking gun" / "this is the X"
r"\bthis\s+is\s+the\s+(?:smoking\s+gun|root\s+cause|bug|real\s+issue|"
r"actual\s+issue|real\s+bug|actual\s+bug|core\s+issue|key\s+issue)\b",
# "Confirmed:" / "Confirmed."
r"\bconfirmed\s*[:.]",
# "Let me give / hand / pass / set" and similar light verbs
r"\blet\s+me\s+(?:give|hand|pass|set|tell|show)\b",
# "Let me try" / "let me attempt" / "let me see"
r"\blet\s+me\s+(?:try|attempt|see|head|jump|dive)\b",
# "Got it." / "Got it —" (trailing acknowledgment)
r"\bgot\s+it\b",
# "Right," / "Right —" (trailing realization)
r"\bright\s*[,—\-]",
# "OK so" / "Okay so" (transitional opener)
r"\b(?:ok|okay)\s+so\b",
)

# Compile once at import.
# Public re-exports for the cross-module stream-time consumer
# (gateway.stream_consumer) and any future call site — see
# ``agent.reasoning_prose`` for the supported import surface. We keep
# the private names here as thin aliases for backward compatibility.
from agent.reasoning_prose import ( # noqa: E402 (re-export)
REASONING_PROSE_OPENERS_RE as _REASONING_PROSE_OPENERS_RE,
SENTENCE_END_RE as _SENTENCE_END,
)

# Sentence-end characters used to find the end of the preamble.
# (Kept as a module-level reference for any in-tree call that imported
# it before the public re-export. New code should import the public
# name from ``agent.reasoning_prose`` directly.)


def strip_reasoning_prose(
agent,
content: str,
*,
min_length: int = 25,
min_remainder: int = 8,
strip_trailing: bool = True,
) -> str:
"""Strip leading reasoning-prose sentences from assistant content.

Some chat-tuned reasoning models (notably minimax-m3 and the kimi-k2.5
family) emit their chain-of-thought as natural-language sentences
directly in the visible ``content`` field. ``strip_think_blocks``
handles XML tag variants (``<think>…</think>``) but is blind to
prose-style leaks. This helper removes the leading reasoning
preamble while preserving any substantive answer that follows.

Conservative by design:
* Refuses to touch content shorter than ``min_length`` (default
25 chars — too risky on shorter messages where the model has
a one-liner that's the actual answer).
* Walks sentences one at a time from the start. Stops as soon as
a sentence doesn't match a reasoning opener, OR as soon as the
remaining content would be shorter than ``min_remainder`` chars
(avoid leaving a fragment).
* If no opener matches, returns content unchanged.

When ``strip_trailing`` is True (default), also walks the
*trailing* sentences of the content and drops any that match the
reasoning-opener set. This catches the common min/max-m3 pattern
where the model emits its final reasoning sentence(s) at the end
of an otherwise-good answer — e.g. "Yeah, the bug is in
base.py:4722. Found it." where "Found it." would otherwise leak.
Trailing stripping is gated on the content having ≥3 sentences
so we don't over-strip a 2-sentence answer that happens to
start with a reasoning verb.

The helper is a no-op on CLI / TUI paths. Callers gate on the
platform (the gateway ``_sanitize_gateway_final_response`` is the
typical chokepoint).
"""
if not content or not isinstance(content, str):
return content or ""
text = content
if len(text) < min_length:
return text

# ── Leading preamble strip ────────────────────────────────────
# Walk the content forward, one sentence at a time, dropping any
# sentence whose first non-whitespace token is a reasoning opener.
# Crucial: we track a ``cursor`` and only ``search()`` *from* that
# cursor, never from position 0 again. An earlier implementation
# re-searched the whole ``text`` after each cut, which let a
# reasoning opener in sentence N+1 (or in a legitimate mid-message
# clause like "I can see the Submit button is red") be silently
# dropped even when the user-facing sentence was substantive. The
# cursor discipline below is the load-bearing fix for that bug.
cursor = 0
while cursor < len(text):
chunk = text[cursor:]
match = _REASONING_PROSE_OPENERS_RE.search(chunk)
if not match:
break
# The opener regex's lookbehind only fires at the *start* of
# ``chunk`` OR right after sentence-end punctuation. When
# ``cursor > 0`` and the match.start() > 0, the opener is mid-
# sentence, which means the consumer is NOT supposed to strip
# it — we've already consumed the leading preamble and any
# further opener is part of the user-facing answer. Bail out.
if match.start() != 0:
break
opener_end = match.end()
boundary = _SENTENCE_END.search(chunk, opener_end)
if not boundary:
# No sentence boundary after the opener — the opener runs
# to end of content. Drop from the opener onward; the
# cursor advances to the match start (everything before is
# also being dropped because it's only the partial preamble
# we haven't already consumed).
new_text = text[: cursor + match.start()].rstrip()
if not new_text or len(new_text) < min_remainder:
return ""
if new_text == text[:cursor]:
break
text = new_text
cursor = 0 # reset so the next pass re-anchors at the new start
else:
# Drop opener through end of its sentence; the remainder
# starts at boundary.end() within ``chunk`` (which is
# offset by ``cursor`` in the full text).
new_cursor = cursor + boundary.end()
text = text[:new_cursor].rstrip() + text[new_cursor:].lstrip()
text = text.strip()
if not text or len(text) < min_remainder:
return ""
cursor = 0 # restart anchor for the next pass
if len(text) < min_length:
return text

# ── Trailing reasoning-sentence strip ─────────────────────────
# Only if the content is long enough that a 1-2 sentence answer
# would have already been returned via the leading pass without
# truncation. This catches the min/max-m3 pattern of trailing
# "Found it." or "Got it." after a real answer.
if strip_trailing and len(text) >= min_length * 3:
# Count sentences by sentence-end punctuation. Need at least
# 3 to consider this multi-sentence (so a 2-sentence
# "Let's go. The fix is X." isn't over-stripped).
sentence_count = sum(1 for _ in _SENTENCE_END.finditer(text)) + 1
if sentence_count >= 3:
# Walk from the end: find the last sentence-end, then
# check whether the text after it matches an opener at a
# boundary. If so, drop it. Repeat once for cases where
# the final two sentences are both reasoning.
for _ in range(2):
last_boundary = None
for m in _SENTENCE_END.finditer(text):
last_boundary = m
if last_boundary is None:
break
tail_start = last_boundary.end()
tail = text[tail_start:].lstrip()
if not tail or len(tail) >= min_length:
break
# Tail must end with terminal punctuation to be a
# complete sentence (avoids over-stripping mid-sentence
# when the assistant just trails off).
if not re.search(r"[\.\!\?]\s*$", tail):
break
# Check the tail starts with an opener. Re-anchor
# _REASONING_PROSE_OPENERS_RE to the tail start so
# the boundary check fires correctly.
tail_match = _REASONING_PROSE_OPENERS_RE.match(tail)
if not tail_match:
break
# Drop the trailing reasoning sentence.
text = text[:tail_start].rstrip()
if not text or len(text) < min_remainder:
return ""

return text


def recover_with_credential_pool(
agent,
Expand Down Expand Up @@ -2344,6 +2569,7 @@ def force_close_tcp_sockets(client: Any) -> int:
"drop_thinking_only_and_merge_users",
"restore_primary_runtime",
"extract_reasoning",
"strip_reasoning_prose",
"dump_api_request_debug",
"anthropic_prompt_cache_policy",
"create_openai_client",
Expand Down
145 changes: 145 additions & 0 deletions agent/reasoning_prose.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
"""Reasoning-prose stripping — public surface for the regex primitives.

Some chat-tuned reasoning models (notably minimax-m3 and the kimi-k2.5
family) emit their chain-of-thought as natural-language sentences
directly in the visible ``content`` field, instead of using XML-style
``<think>…</think>`` tags. We strip that prose in two places:

* **Final-response chokepoint** — ``agent.agent_runtime_helpers.strip_reasoning_prose``
walks the entire response and removes leading (and optionally trailing)
reasoning sentences. This module owns the regexes that function uses.
* **Stream-time** — ``gateway.stream_consumer.GatewayStreamConsumer`` strips
the leading reasoning sentence on the *first* delta of a turn, so users
never see the chain-of-thought flash by mid-stream. It needs the same
opener regex.

Originally both call sites imported the regexes as the private
``_REASONING_PROSE_OPENERS_RE`` and ``_SENTENCE_END`` symbols from
``agent.agent_runtime_helpers``. That created a tight, non-obvious
cross-module coupling — renaming the private symbol would silently break
the gateway's stream-time stripper, with no obvious link in either file
explaining the dependency.

This module is the supported import surface. Both call sites now use
the public ``REASONING_PROSE_OPENERS_RE`` and ``SENTENCE_END_RE`` names
imported from here, so the dependency is discoverable via the import
graph and grep finds it without needing to know the leading underscore
is load-bearing.

Pattern-design notes (kept here so future maintainers don't repeat the
mistake):

The opener patterns are *tuned* for chat-tuned reasoning models. A naive
list of "thinking verbs" (let me, I think, so, actually, wait) is too
broad — those words show up in normal user-facing answers all the time
("I'll be there at 5", "wait for it", "the first thing to try is X").
The mistake we hit before: matching ``\\bi'll\\b`` or ``\\bso[,\\s]`` as
opener patterns *anywhere* in the text causes legitimate mid-sentence
phrases to be silently deleted from the user-visible reply.

So every pattern here is **start-of-message-anchored in practice** by
the consumer (``strip_reasoning_prose`` and the stream-time stripper
both walk from position 0 and stop as soon as a non-opener is hit), and
each pattern is **verb-shaped**: it must be a meta-cognitive opener, not
a content word. "Let me check" is meta; "I'll be there" is not — the
distinction is that the meta opener comes with a thinking verb
attached (``let me VERB``, ``I think I VERB``, ``so let me``), and a
trailing verb is what separates it from the conversational use.
"""

from __future__ import annotations

import re


# ── Source of truth: the opener patterns ────────────────────────────────
# Each entry is a substring of a single regex alternative. They match
# English chain-of-thought openers that some chat-tuned reasoning models
# emit directly in the assistant's visible content field.
#
# Tightness rules (read before adding new patterns):
# 1. A pattern must require a *thinking verb* attached. Bare ``\\bso\\b``,
# bare ``\\bfirst\\b``, bare ``\\bi'll\\b``, etc. will match in normal
# answers and silently delete content.
# 2. A pattern must be *anchored* by the consumer to start-of-message.
# Don't try to catch mid-message reasoning here — the whole function
# stops at the first non-opener, so any opener it matches must be
# the *first* thing in the message.
# 3. Keep the list short. Every entry has a false-positive cost. If
# you're tempted to add ``\\bactually\\b`` remember "actually works"
# is a legitimate reply, not reasoning.
_REASONING_PROSE_OPENERS: tuple[str, ...] = (
# "Let me / Let's" + (optional adverb) + thinking verb.
# The verb list is the *only* thing that separates "Let me check"
# (reasoning) from "Let's meet at 5" (content). Don't strip a verb.
r"\blet['']?s\b\s+(?:also\s+|just\s+|first\s+|actually\s+|quickly\s+"
r"|now\s+|try\s+to\s+)?"
r"(?P<verb>think|check|look|trace|find|examine|reason|verify|"
r"recall|consider|review|recheck|re-?verify|re-?check|test|push|"
r"step\s+back|backtrack|skip|read|run|do|go|see|open|close|"
r"inspect|investigate|walk|drill|dig|break|split|cross|"
r"poke|grep|search|scan|hit|re-?read|cross-?check|take|put|"
r"move|kill|restart|rebuild|recompile|rerun|reapply|revert|"
r"apply|patch|fix|compare|diff|map|reconstruct|retrace|"
r"simulate|isolate|identify|enumerate|summarise|summarize|"
r"elaborate|expand|recap|give|hand|pass|set|tell|show|try|"
r"attempt|see|head|jump|dive)\b",
# "Now let me / Now I can see / Now I understand / Now the real"
r"\bnow\s+(?:let['']?s\s+me|let\s+me|i\s+can\s+see|i\s+see|"
r"i\s+have|i\s+understand|it['']?s\s+clear|the\s+real|"
r"everything|the\s+full|here|we\s+have)\b",
# "Found it" / "Found the bug" — punctuated and unpunctuated
r"\bfound\s+(?:it|the|that|an?|one|two|three|my|our|another)\b",
# "Aha" / "Aha —" (insight beat)
r"\baha\b\s*[:—\-]?",
# "I see the X" / "I can see the X" — needs a *noun phrase* after,
# not "I see what you mean" (which is content). The
# required-following-article distinguishes them.
r"\bi\s+(?:see|can\s+see)\s+(?:the|a|an|my|our|this|that|these|those)\b",
# "Smoking gun" / "this is the root cause" / "this is the bug"
r"\bthis\s+is\s+the\s+(?:smoking\s+gun|root\s+cause|bug|"
r"real\s+issue|actual\s+issue|real\s+bug|actual\s+bug|"
r"core\s+issue|key\s+issue)\b",
# Trailing-realization beats (used for the trailing-sentence strip).
# "Got it." / "Got it —" / "Right," / "Right —" / "OK so" /
# "Okay so" / "Confirmed:". These are LESS risky to match in the
# leading pass too, because they only appear as sentence openers
# when the model is announcing its own conclusion. A user-facing
# answer doesn't start with "Got it."
r"\bgot\s+it\b\s*[:—\-.]?",
r"\bright\s*[,—\-]",
r"\b(?:ok|okay)\s+so\b",
r"\bconfirmed\s*[:.]",
)

# Compile once at import.
# The boundary lookbehind ``(?:^|(?<=[\.\!\?]\s)|(?<=\n))`` matches the
# opener at the *start* of the message OR right after a sentence-ending
# punctuation + whitespace. That's what lets the consumer walk sentence
# by sentence from the top.
REASONING_PROSE_OPENERS_RE: re.Pattern[str] = re.compile(
r"(?i)(?:^|(?<=[\.\!\?]\s)|(?<=\n))"
r"\s*"
r"(?:" + "|".join(_REASONING_PROSE_OPENERS) + r")",
flags=re.UNICODE,
)

# A "starts with reasoning opener" pattern anchored to position 0.
# Used by the stream-time consumer, which only ever looks at the start
# of the message. This is the SAFE form — it can never match mid-sentence
# because it requires position 0.
STARTS_WITH_OPENER_RE: re.Pattern[str] = re.compile(
r"\s*(?:" + "|".join(_REASONING_PROSE_OPENERS) + r")",
flags=re.UNICODE | re.IGNORECASE,
)

# Sentence-end characters used to find the end of the preamble.
# The lookahead ``(?=[A-Z"'`(\[]|\*\*[A-Z])`` requires the *next*
# sentence to start with a capital / quote / parenthesis / bolded
# capital — that keeps "Wait —" or "Right —" from matching their
# own em-dash as a sentence boundary, and it keeps comma-followed
# clauses from being treated as separate sentences.
SENTENCE_END_RE: re.Pattern[str] = re.compile(
r"(?<=[\.\!\?])\s+(?=[A-Z\"'`\(\[]|\*\*[A-Z])|$|\n\s*\n",
re.UNICODE,
)
Loading