From 1260f55c810691a6adbb4e8836666958e2e53e54 Mon Sep 17 00:00:00 2001
From: Todd Schiller <todd.schiller@gmail.com>
Date: Sun, 7 Jun 2026 16:59:30 -0400
Subject: [PATCH 1/4] Fix: extend encoded-payload-redact with text-cipher
 encodings (#203)

Add detection for ROT13, Atbash, reverse, leetspeak, NATO phonetic,
and Morse alongside the existing base64 / hex / percent matchers.
Text-cipher decodes are gated by a distinct common-English-word count
since the encoded form is itself printable; substitution ciphers
additionally skip candidates whose source is already English.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 extension/src/rules/encoded-payload-redact.ts | 566 +++++++++++++++++-
 1 file changed, 550 insertions(+), 16 deletions(-)

diff --git a/extension/src/rules/encoded-payload-redact.ts b/extension/src/rules/encoded-payload-redact.ts
index 2c49580..501e66a 100644
--- a/extension/src/rules/encoded-payload-redact.ts
+++ b/extension/src/rules/encoded-payload-redact.ts
@@ -1,20 +1,35 @@
 // Copyright (c) 2026 PixieBrix, Inc.
 // Licensed under PolyForm Shield 1.0.0 — see LICENSE.
 
-// Redact long base64 / hex / percent-encoded runs in text nodes — the
-// "decode this and follow it" carrier for indirect prompt injection. An
-// attacker drops an encoded blob into a page region the agent reads
-// (review body, product description, social embed caption); a human skims
-// past it as noise but an LLM agent may helpfully decode the bytes and
-// treat the result as content or as an instruction it should obey.
+// Redact long encoded runs in text nodes — the "decode this and follow
+// it" carrier for indirect prompt injection. An attacker drops an
+// encoded blob into a page region the agent reads (review body, product
+// description, social embed caption); a human skims past it as noise but
+// an LLM agent may helpfully decode the bytes and treat the result as
+// content or as an instruction it should obey.
 //
-// Detection runs three candidate windows per text node — base64/base64url,
-// hex, percent-encoded — each gated by a length floor that sits above
-// common hash sizes (SHA-256 = 64 hex, SHA-512 = 128 hex). The decisive
-// filter is the *decoded printable-ASCII ratio*: instructions decode to
-// readable text (ratio ~1.0); hashes, fingerprints, and image bytes decode
-// to high-entropy binary (ratio well below 0.85). JWTs are skipped so the
-// more specific `secrets-redact` label wins on overlap.
+// Two families of detector run per inline group:
+//
+//   * **Byte encodings** — base64 / base64url, hex, percent-encoded.
+//     Each is gated by a length floor that sits above common hash sizes
+//     (SHA-256 = 64 hex, SHA-512 = 128 hex). The decisive filter is the
+//     *decoded printable-ASCII ratio*: instructions decode to readable
+//     text (ratio ~1.0); hashes, fingerprints, and image bytes decode
+//     to high-entropy binary (ratio well below 0.85). JWTs are skipped
+//     so the more specific `secrets-redact` label wins on overlap.
+//
+//   * **Text ciphers** — ROT13, Atbash, reverse, leetspeak, NATO
+//     phonetic, Morse. The encoded form is itself printable text, so
+//     the printable-ASCII ratio is useless. The qualifier is instead a
+//     count of distinct common English function words in the decoded
+//     output: cipher payloads of useful length decode to several;
+//     random gibberish or non-cipher prose does not. For the
+//     letter-substitution ciphers (ROT13, Atbash, reverse, leetspeak)
+//     we additionally require the *original* candidate not already be
+//     English, so ordinary prose decodes to gibberish and falls
+//     through. NATO and Morse have distinctive enough forms that the
+//     candidate regex alone is selective; the decoded common-word check
+//     guards against ASCII art or coincidental Morse-shape runs.
 //
 // Matches are replaced inline with a click-to-reveal placeholder. False
 // positives cost one click, not lost data.
@@ -29,9 +44,43 @@ const MIN_BASE64_LENGTH = 120;
 const MIN_HEX_LENGTH = 160;
 const MIN_PERCENT_TRIPLETS = 20;
 
+// Text-cipher candidate floor. Substitution ciphers (ROT13, Atbash,
+// leetspeak) and reverse need enough characters to carry a meaningful
+// instruction; under 80 chars the candidate is too short to clear the
+// common-word qualifier even when the decode is real.
+const MIN_TEXT_CIPHER_LENGTH = 80;
+
+// Leetspeak candidate floor. Smaller than the other ciphers because a
+// leet payload is denser (digit substitutions concentrate intent in
+// fewer chars). Combined with the digit-substitution count below, the
+// floor avoids matching ordinary text that happens to contain digits.
+const MIN_LEET_LENGTH = 40;
+const MIN_LEET_SUBSTITUTIONS = 4;
+
+// Distinct common-English-word hits required for the decoded output of
+// a text cipher to qualify as a payload. Three hits across a 40-char
+// decode is rare for random letter noise but routine for any English
+// sentence carrying a directive.
+const MIN_COMMON_WORDS = 3;
+
+// NATO and Morse minima — both encodings spell one letter per token, so
+// the token count IS the decoded length. Ten letters is the smallest
+// payload that can fit a single English directive verb plus its object.
+const MIN_NATO_WORDS = 10;
+const MIN_MORSE_TOKENS = 10;
+
+// Morse decoders that resolve to a known letter must clear this share
+// of the decoded tokens; below it the run is likely incidental dots and
+// dashes (ASCII art, bullets, repeated `---` separators) rather than a
+// payload.
+const MIN_MORSE_VALID_RATIO = 0.8;
+
 // Reject text nodes shorter than the smallest candidate window — cheap
-// per-node early-out.
-const MIN_TEXT_LENGTH = MIN_BASE64_LENGTH;
+// per-node early-out. The smallest cipher floor (Morse: 10 tokens of
+// 1+ symbol each, separated by single whitespace) is ~19 chars; we use
+// 20 so the dispatcher sees every plausible cipher payload while still
+// skipping short text nodes (UI labels, tab text, badges).
+const MIN_TEXT_LENGTH = 20;
 
 // Decoded byte stream must be this fraction printable ASCII (space..~,
 // plus \t \n \r) to count as "decodes to readable text". Hashes and
@@ -167,6 +216,402 @@ function decodePercent(candidate: string): Uint8Array | null {
   return bytes;
 }
 
+// Distinct high-frequency English function words — articles, pronouns,
+// prepositions, conjunctions, modal/auxiliary verbs, common short
+// verbs. Deliberately *generic*: any English sentence of useful length
+// hits several, and the set carries no injection-specific phrasing per
+// the docs-style guidance to keep adversarial vocabulary out of source.
+const COMMON_ENGLISH_WORDS = new Set([
+  "the",
+  "and",
+  "you",
+  "for",
+  "are",
+  "with",
+  "this",
+  "that",
+  "your",
+  "have",
+  "from",
+  "they",
+  "will",
+  "what",
+  "when",
+  "but",
+  "not",
+  "any",
+  "can",
+  "out",
+  "all",
+  "one",
+  "now",
+  "about",
+  "after",
+  "before",
+  "these",
+  "their",
+  "them",
+  "than",
+  "then",
+  "into",
+  "would",
+  "could",
+  "should",
+  "must",
+  "more",
+  "some",
+  "such",
+  "only",
+  "very",
+  "just",
+  "also",
+  "where",
+  "which",
+  "while",
+  "who",
+  "why",
+  "how",
+  "his",
+  "her",
+  "she",
+  "him",
+  "its",
+  "been",
+  "were",
+  "was",
+  "yes",
+  "let",
+  "make",
+  "use",
+  "see",
+  "get",
+  "give",
+  "take",
+  "made",
+  "want",
+  "tell",
+  "ask",
+  "show",
+  "find",
+  "know",
+  "think",
+  "look",
+  "come",
+  "say",
+  "good",
+  "well",
+  "back",
+  "down",
+  "over",
+  "under",
+  "between",
+  "below",
+  "above",
+  "every",
+  "each",
+  "other",
+  "another",
+  "anyone",
+  "everyone",
+]);
+
+function countDistinctCommonWords(text: string): number {
+  const seen = new Set<string>();
+  for (const m of text.toLowerCase().matchAll(/[a-z]+/g)) {
+    const word = m[0];
+    if (COMMON_ENGLISH_WORDS.has(word)) {
+      seen.add(word);
+    }
+  }
+  return seen.size;
+}
+
+// Substitution-cipher decoders. All are 1:1 on the character axis so
+// the decoded length equals the original — match indices map straight
+// through.
+const LOWER_A = 97; // 'a'
+const UPPER_A = 65; // 'A'
+const ALPHABET_LENGTH = 26;
+const ROT13_SHIFT = 13;
+
+function rot13(text: string): string {
+  return text.replaceAll(/[a-zA-Z]/g, (c) => {
+    const code = c.codePointAt(0) ?? 0;
+    const base = code >= LOWER_A ? LOWER_A : UPPER_A;
+    return String.fromCodePoint(
+      ((code - base + ROT13_SHIFT) % ALPHABET_LENGTH) + base,
+    );
+  });
+}
+
+function atbash(text: string): string {
+  return text.replaceAll(/[a-zA-Z]/g, (c) => {
+    const code = c.codePointAt(0) ?? 0;
+    const base = code >= LOWER_A ? LOWER_A : UPPER_A;
+    return String.fromCodePoint(ALPHABET_LENGTH - 1 - (code - base) + base);
+  });
+}
+
+function reverseText(text: string): string {
+  // Unicode-aware reverse so any astral pairs survive.
+  return [...text].reverse().join("");
+}
+
+// Leetspeak substitution table — only the substitutions that obscure a
+// letter behind a digit or symbol. Pure digits (`2nd`, `iPhone 13`) get
+// mapped too, which on its own would be a false positive; the
+// surrounding gate requires a minimum substitution count AND a decoded
+// common-word floor, so prose with incidental digits doesn't qualify.
+const LEET_MAP: Record<string, string> = {
+  "0": "o",
+  "1": "i",
+  "3": "e",
+  "4": "a",
+  "5": "s",
+  "7": "t",
+  "8": "b",
+  "@": "a",
+  $: "s",
+  "!": "i",
+};
+
+// Character class covers every leet substitution we recognize. Each
+// occurrence is a candidate disguised letter; we require a minimum
+// count of these in any candidate window before attempting a decode so
+// that ordinary text with incidental digits doesn't qualify.
+const LEET_SUBSTITUTION_CHAR_CLASS = String.raw`[0134578@$!]`;
+
+function deleet(text: string): string {
+  return text.replaceAll(
+    new RegExp(LEET_SUBSTITUTION_CHAR_CLASS, "g"),
+    (c) => LEET_MAP[c] ?? c,
+  );
+}
+
+// NATO phonetic alphabet — one word per encoded letter. Includes both
+// "juliet" and "juliett" spellings; "x-ray" / "xray" handled as the
+// hyphen-stripped token because the candidate scanner already strips
+// hyphens out of word tokens.
+const NATO_FIRST_LETTER: Record<string, string> = {
+  alpha: "A",
+  alfa: "A",
+  bravo: "B",
+  charlie: "C",
+  delta: "D",
+  echo: "E",
+  foxtrot: "F",
+  golf: "G",
+  hotel: "H",
+  india: "I",
+  juliet: "J",
+  juliett: "J",
+  kilo: "K",
+  lima: "L",
+  mike: "M",
+  november: "N",
+  oscar: "O",
+  papa: "P",
+  quebec: "Q",
+  romeo: "R",
+  sierra: "S",
+  tango: "T",
+  uniform: "U",
+  victor: "V",
+  whiskey: "W",
+  whisky: "W",
+  xray: "X",
+  yankee: "Y",
+  zulu: "Z",
+};
+
+// Morse map — letters and digits only. Punctuation codes are excluded:
+// payloads rarely need them and including them widens the false-match
+// surface for sparse dot/dash strings.
+const MORSE_MAP: Record<string, string> = {
+  ".-": "A",
+  "-...": "B",
+  "-.-.": "C",
+  "-..": "D",
+  ".": "E",
+  "..-.": "F",
+  "--.": "G",
+  "....": "H",
+  "..": "I",
+  ".---": "J",
+  "-.-": "K",
+  ".-..": "L",
+  "--": "M",
+  "-.": "N",
+  "---": "O",
+  ".--.": "P",
+  "--.-": "Q",
+  ".-.": "R",
+  "...": "S",
+  "-": "T",
+  "..-": "U",
+  "...-": "V",
+  ".--": "W",
+  "-..-": "X",
+  "-.--": "Y",
+  "--..": "Z",
+  "-----": "0",
+  ".----": "1",
+  "..---": "2",
+  "...--": "3",
+  "....-": "4",
+  ".....": "5",
+  "-....": "6",
+  "--...": "7",
+  "---..": "8",
+  "----.": "9",
+};
+
+// Candidate windows. Each is conservatively scoped: word-shaped runs
+// long enough that the qualifier will see useful signal, with endpoints
+// anchored on alphanumerics so trailing punctuation doesn't drift the
+// match boundary into surrounding prose.
+const TEXT_CIPHER_CANDIDATE = new RegExp(
+  String.raw`[A-Za-z][A-Za-z\s.,'"!?:;\-]{${MIN_TEXT_CIPHER_LENGTH - 2},}[A-Za-z]`,
+  "g",
+);
+const LEET_CANDIDATE = new RegExp(
+  String.raw`[A-Za-z0-9@$!][A-Za-z0-9@$!\s.,'"?:;\-]{${MIN_LEET_LENGTH - 2},}[A-Za-z0-9@$!]`,
+  "g",
+);
+const MORSE_CANDIDATE = new RegExp(
+  String.raw`(?:[.\-]{1,7}[ \t/]+){${MIN_MORSE_TOKENS - 1},}[.\-]{1,7}`,
+  "g",
+);
+
+function countLeetSubstitutions(text: string): number {
+  return (text.match(new RegExp(LEET_SUBSTITUTION_CHAR_CLASS, "g")) ?? [])
+    .length;
+}
+
+interface CipherDecodeResult {
+  decoded: string;
+  commonWords: number;
+}
+
+function tryCipherDecode(
+  candidate: string,
+  decoder: (text: string) => string,
+): CipherDecodeResult | null {
+  const decoded = decoder(candidate);
+  const commonWords = countDistinctCommonWords(decoded);
+  if (commonWords < MIN_COMMON_WORDS) {
+    return null;
+  }
+  return { decoded, commonWords };
+}
+
+// For substitution ciphers, skip candidates whose original text is
+// already English — applying ROT13/Atbash/reverse to English prose
+// would produce gibberish (zero common-word hits), so this is only a
+// performance gate, not a correctness one.
+function alreadyEnglish(candidate: string): boolean {
+  return countDistinctCommonWords(candidate) >= MIN_COMMON_WORDS;
+}
+
+interface NatoRun {
+  start: number;
+  end: number;
+  decoded: string;
+}
+
+function isAlphabetSequence(letters: string): boolean {
+  // A run is a sequential alphabet drill (ABCDE… or BCDEF…) iff every
+  // adjacent pair differs by exactly 1 in code-point order. We treat
+  // these as instructional content rather than a payload — alphabet
+  // pages and signal-corps drills shouldn't be redacted.
+  for (let i = 1; i < letters.length; i++) {
+    const prev = letters.codePointAt(i - 1) ?? 0;
+    const curr = letters.codePointAt(i) ?? 0;
+    if (curr - prev !== 1) {
+      return false;
+    }
+  }
+  return true;
+}
+
+function findNatoRuns(text: string): NatoRun[] {
+  // Scan word tokens linearly so a long alternation regex doesn't
+  // backtrack. A NATO run is a maximal sequence of NATO tokens
+  // separated by whitespace, hyphens, or commas; any other token
+  // (including non-NATO words) ends the run.
+  const runs: NatoRun[] = [];
+  const lower = text.toLowerCase();
+  const TOKEN_RE = /[a-z]+/g;
+  let current: { start: number; end: number; letters: string } | null = null;
+  let lastTokenEnd = -1;
+  for (const m of lower.matchAll(TOKEN_RE)) {
+    const start = m.index;
+    const end = start + m[0].length;
+    const gap = text.slice(lastTokenEnd, start);
+    const letter = NATO_FIRST_LETTER[m[0]];
+    const gapIsSeparator = lastTokenEnd === -1 || /^[\s,\-]+$/.test(gap);
+    if (letter && (current === null || gapIsSeparator)) {
+      if (current === null) {
+        current = { start, end, letters: letter };
+      } else {
+        current.end = end;
+        current.letters += letter;
+      }
+    } else {
+      if (current !== null && current.letters.length >= MIN_NATO_WORDS) {
+        runs.push({
+          start: current.start,
+          end: current.end,
+          decoded: current.letters,
+        });
+      }
+      current = letter ? { start, end, letters: letter } : null;
+    }
+    lastTokenEnd = end;
+  }
+  if (current !== null && current.letters.length >= MIN_NATO_WORDS) {
+    runs.push({
+      start: current.start,
+      end: current.end,
+      decoded: current.letters,
+    });
+  }
+  return runs;
+}
+
+interface MorseDecodeResult {
+  decoded: string;
+  validRatio: number;
+}
+
+function decodeMorse(candidate: string): MorseDecodeResult {
+  // Word separator: `/` (with optional whitespace). Letter separator:
+  // any whitespace run.
+  const words = candidate.split(/\s*\/\s*/);
+  const decodedWords: string[] = [];
+  let valid = 0;
+  let total = 0;
+  for (const word of words) {
+    const symbols = word.trim().split(/\s+/).filter(Boolean);
+    let chunk = "";
+    for (const sym of symbols) {
+      total++;
+      const letter = MORSE_MAP[sym];
+      if (letter) {
+        valid++;
+        chunk += letter;
+      }
+    }
+    if (chunk.length > 0) {
+      decodedWords.push(chunk);
+    }
+  }
+  return {
+    decoded: decodedWords.join(" "),
+    validRatio: total === 0 ? 0 : valid / total,
+  };
+}
+
 function qualifies(decoded: Uint8Array | null): boolean {
   if (decoded === null) {
     return false;
@@ -269,12 +714,101 @@ function collectPercent(text: string, matches: InlineMatch[]): void {
   }
 }
 
+function collectSubstitutionCipher(
+  text: string,
+  decoder: (text: string) => string,
+  matches: InlineMatch[],
+): void {
+  for (const m of text.matchAll(TEXT_CIPHER_CANDIDATE)) {
+    const candidate = m[0];
+    if (alreadyEnglish(candidate)) {
+      continue;
+    }
+    if (tryCipherDecode(candidate, decoder) !== null) {
+      matches.push({
+        start: m.index,
+        end: m.index + candidate.length,
+        label: "[encoded payload hidden]",
+      });
+    }
+  }
+}
+
+function collectReverse(text: string, matches: InlineMatch[]): void {
+  for (const m of text.matchAll(TEXT_CIPHER_CANDIDATE)) {
+    const candidate = m[0];
+    if (alreadyEnglish(candidate)) {
+      continue;
+    }
+    if (tryCipherDecode(candidate, reverseText) !== null) {
+      matches.push({
+        start: m.index,
+        end: m.index + candidate.length,
+        label: "[encoded payload hidden]",
+      });
+    }
+  }
+}
+
+function collectLeet(text: string, matches: InlineMatch[]): void {
+  for (const m of text.matchAll(LEET_CANDIDATE)) {
+    const candidate = m[0];
+    if (countLeetSubstitutions(candidate) < MIN_LEET_SUBSTITUTIONS) {
+      continue;
+    }
+    if (tryCipherDecode(candidate, deleet) !== null) {
+      matches.push({
+        start: m.index,
+        end: m.index + candidate.length,
+        label: "[encoded payload hidden]",
+      });
+    }
+  }
+}
+
+function collectNato(text: string, matches: InlineMatch[]): void {
+  for (const run of findNatoRuns(text)) {
+    if (isAlphabetSequence(run.decoded)) {
+      continue;
+    }
+    matches.push({
+      start: run.start,
+      end: run.end,
+      label: "[encoded payload hidden]",
+    });
+  }
+}
+
+function collectMorse(text: string, matches: InlineMatch[]): void {
+  for (const m of text.matchAll(MORSE_CANDIDATE)) {
+    const candidate = m[0];
+    const { decoded, validRatio } = decodeMorse(candidate);
+    if (validRatio < MIN_MORSE_VALID_RATIO) {
+      continue;
+    }
+    if (countDistinctCommonWords(decoded) < MIN_COMMON_WORDS) {
+      continue;
+    }
+    matches.push({
+      start: m.index,
+      end: m.index + candidate.length,
+      label: "[encoded payload hidden]",
+    });
+  }
+}
+
 function collectMatches(text: string): InlineMatch[] {
   const matches: InlineMatch[] = [];
   const jwtRanges = collectJwtRanges(text);
   collectBase64(text, jwtRanges, matches);
   collectHex(text, matches);
   collectPercent(text, matches);
+  collectSubstitutionCipher(text, rot13, matches);
+  collectSubstitutionCipher(text, atbash, matches);
+  collectReverse(text, matches);
+  collectLeet(text, matches);
+  collectNato(text, matches);
+  collectMorse(text, matches);
 
   // Sort by start, then prefer the longest on ties so a base64 candidate
   // wins over a hex prefix of the same span. Merge by dropping any match
@@ -295,7 +829,7 @@ export const encodedPayloadRedactRule = defineInlineTextRedactRule({
   id: "encoded-payload-redact",
   label: "Redact Encoded Payloads",
   description:
-    "Redact long base64, hex, or percent-encoded runs in text nodes whose decoded bytes are mostly readable text. Defends against the 'decode this and follow it' indirect-injection carrier; hashes, fingerprints, and binary blobs are left alone.",
+    "Redact long encoded runs in text nodes whose decoded form is readable English. Covers base64 / hex / percent (byte encodings) and ROT13 / Atbash / reverse / leetspeak / NATO phonetic / Morse (text ciphers). Defends against the 'decode this and follow it' indirect-injection carrier; hashes, fingerprints, and binary blobs are left alone.",
   minLength: MIN_TEXT_LENGTH,
   collectMatches,
 });

From e54a74716b4a3bf751af568c4c29971780fb507c Mon Sep 17 00:00:00 2001
From: Todd Schiller <todd.schiller@gmail.com>
Date: Sun, 7 Jun 2026 17:13:05 -0400
Subject: [PATCH 2/4] Test: cover ROT13/Atbash/reverse/leet/NATO/Morse in
 encoded-payload-redact
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds 13 example tests and 5 property tests for the text-cipher
detection paths. Source files contain only ciphertext or symbolic
runs — benign English filler is encoded at test time so adversarial
phrasing never appears in plaintext.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../encoded-payload-redact.property.test.ts   | 199 ++++++++++++
 .../__tests__/encoded-payload-redact.test.ts  | 284 ++++++++++++++++++
 2 files changed, 483 insertions(+)

diff --git a/extension/src/rules/__tests__/encoded-payload-redact.property.test.ts b/extension/src/rules/__tests__/encoded-payload-redact.property.test.ts
index 7278b7e..9266a07 100644
--- a/extension/src/rules/__tests__/encoded-payload-redact.property.test.ts
+++ b/extension/src/rules/__tests__/encoded-payload-redact.property.test.ts
@@ -199,6 +199,205 @@ function sortedSplitsArb(length: number) {
     .map((indices) => indices.toSorted((a, b) => a - b));
 }
 
+// Text-cipher helpers. Encode benign-prose generators into cipher form
+// at test time so the source file holds only ciphertext or symbolic
+// runs — adversarial phrasing never appears in plaintext.
+
+const A_CODE = 65;
+const a_CODE = 97;
+
+function rot13(text: string): string {
+  return text.replaceAll(/[a-zA-Z]/g, (c) => {
+    const code = c.codePointAt(0) ?? 0;
+    const base = code >= a_CODE ? a_CODE : A_CODE;
+    return String.fromCodePoint(((code - base + 13) % 26) + base);
+  });
+}
+
+function atbash(text: string): string {
+  return text.replaceAll(/[a-zA-Z]/g, (c) => {
+    const code = c.codePointAt(0) ?? 0;
+    const base = code >= a_CODE ? a_CODE : A_CODE;
+    return String.fromCodePoint(26 - 1 - (code - base) + base);
+  });
+}
+
+function reverseText(text: string): string {
+  // `charAt` (vs `text[i]`) keeps the result `string` rather than
+  // `string | undefined`. Test inputs are pure ASCII so no
+  // astral-pair correctness concern with code-unit iteration.
+  let out = "";
+  for (let i = text.length - 1; i >= 0; i--) {
+    out += text.charAt(i);
+  }
+  return out;
+}
+
+const NATO_ENCODE: Record<string, string> = {
+  A: "alpha",
+  B: "bravo",
+  C: "charlie",
+  D: "delta",
+  E: "echo",
+  F: "foxtrot",
+  G: "golf",
+  H: "hotel",
+  I: "india",
+  J: "juliet",
+  K: "kilo",
+  L: "lima",
+  M: "mike",
+  N: "november",
+  O: "oscar",
+  P: "papa",
+  Q: "quebec",
+  R: "romeo",
+  S: "sierra",
+  T: "tango",
+  U: "uniform",
+  V: "victor",
+  W: "whiskey",
+  X: "xray",
+  Y: "yankee",
+  Z: "zulu",
+};
+
+function natoEncode(letters: string): string {
+  const out: string[] = [];
+  for (const char of letters.toUpperCase()) {
+    const word = NATO_ENCODE[char];
+    if (word) {
+      out.push(word);
+    }
+  }
+  return out.join(" ");
+}
+
+// Vocabulary of common English function words drawn from the rule's
+// internal qualifier set. Built-in fast-check generators can't conjure
+// "looks English to the rule" prose, so the property tests sample from
+// this list to build sentences guaranteed to clear the common-word
+// floor — no need to enumerate the rule's set in two places, just keep
+// a representative subset here.
+const COMMON_WORD_VOCAB = [
+  "the",
+  "and",
+  "you",
+  "for",
+  "this",
+  "that",
+  "with",
+  "have",
+  "from",
+  "when",
+  "what",
+  "should",
+  "could",
+  "would",
+  "must",
+  "your",
+  "their",
+  "every",
+  "other",
+  "every",
+] as const;
+
+const commonProseArb = fc
+  .array(fc.constantFrom(...COMMON_WORD_VOCAB), {
+    minLength: 20,
+    maxLength: 40,
+  })
+  .map((words) => words.join(" "))
+  .filter((s) => s.length >= 80);
+
+// Random letters A..Z. Filter out runs that are strict alphabet
+// sequences (ABCDE…) — the rule treats those as instructional content.
+const natoLettersArb = fc
+  .array(
+    fc
+      .integer({ min: 0, max: 25 })
+      .map((i) => String.fromCodePoint(A_CODE + i)),
+    { minLength: 10, maxLength: 24 },
+  )
+  .map((letters) => letters.join(""))
+  .filter((letters) => {
+    for (let i = 1; i < letters.length; i++) {
+      const previous = letters.codePointAt(i - 1) ?? 0;
+      const current = letters.codePointAt(i) ?? 0;
+      if (current - previous !== 1) {
+        return true;
+      }
+    }
+    return false;
+  });
+
+describe("encoded-payload-redact text ciphers (property)", () => {
+  it("redacts ROT13-encoded common-word prose", () => {
+    fc.assert(
+      fc.property(commonProseArb, (prose) => {
+        const ciphertext = rot13(prose);
+        // Parens delimit the cipher candidate so surrounding context
+        // doesn't get pulled into the match (`(` is outside the
+        // candidate's allowed char class).
+        const body = applyToText(`(prefix) ${ciphertext} (suffix)`);
+        expect(body.querySelector(`.${PLACEHOLDER_CLASS}`)?.textContent).toBe(
+          "[encoded payload hidden]",
+        );
+        expect(body.textContent).not.toContain(ciphertext);
+      }),
+    );
+  });
+
+  it("redacts Atbash-encoded common-word prose", () => {
+    fc.assert(
+      fc.property(commonProseArb, (prose) => {
+        const ciphertext = atbash(prose);
+        const body = applyToText(`(prefix) ${ciphertext} (suffix)`);
+        expect(body.querySelector(`.${PLACEHOLDER_CLASS}`)?.textContent).toBe(
+          "[encoded payload hidden]",
+        );
+        expect(body.textContent).not.toContain(ciphertext);
+      }),
+    );
+  });
+
+  it("redacts reversed common-word prose", () => {
+    fc.assert(
+      fc.property(commonProseArb, (prose) => {
+        const ciphertext = reverseText(prose);
+        const body = applyToText(`(prefix) ${ciphertext} (suffix)`);
+        expect(body.querySelector(`.${PLACEHOLDER_CLASS}`)?.textContent).toBe(
+          "[encoded payload hidden]",
+        );
+        expect(body.textContent).not.toContain(ciphertext);
+      }),
+    );
+  });
+
+  it("redacts NATO-phonetic runs of >= 10 non-sequential letters", () => {
+    fc.assert(
+      fc.property(natoLettersArb, (letters) => {
+        const ciphertext = natoEncode(letters);
+        const body = applyToText(`(prefix) ${ciphertext} (suffix)`);
+        expect(body.querySelector(`.${PLACEHOLDER_CLASS}`)?.textContent).toBe(
+          "[encoded payload hidden]",
+        );
+        expect(body.textContent).not.toContain(ciphertext);
+      }),
+    );
+  });
+
+  it("leaves plain English common-word prose alone (no cipher false-fire)", () => {
+    fc.assert(
+      fc.property(commonProseArb, (prose) => {
+        const body = applyToText(prose);
+        expect(body.querySelector(`.${PLACEHOLDER_CLASS}`)).toBeNull();
+        expect(body.textContent).toContain(prose);
+      }),
+    );
+  });
+});
+
 describe("encoded-payload-redact cross-node detection (property)", () => {
   it("redacts base64 payloads regardless of how they're split across sibling spans", () => {
     fc.assert(
diff --git a/extension/src/rules/__tests__/encoded-payload-redact.test.ts b/extension/src/rules/__tests__/encoded-payload-redact.test.ts
index 965d237..f2381d8 100644
--- a/extension/src/rules/__tests__/encoded-payload-redact.test.ts
+++ b/extension/src/rules/__tests__/encoded-payload-redact.test.ts
@@ -45,6 +45,151 @@ function percentEncode(text: string): string {
   return out;
 }
 
+// Benign English sentence rich in distinct common function words. Used as
+// the *cleartext* for the text-cipher positive cases: the ciphers
+// encode this string at test time so the source file holds only the
+// resulting gibberish (ROT13/Atbash) or symbolic form (NATO/Morse),
+// keeping adversarial phrasing out of the file while still exercising
+// the decoded common-word qualifier (>= 3 distinct hits).
+const CIPHER_CLEARTEXT =
+  "you can see this from above and you know what should come next " +
+  "when the time comes for you to look around";
+
+function rot13(text: string): string {
+  const A = 65;
+  const a = 97;
+  return text.replaceAll(/[a-zA-Z]/g, (c) => {
+    const code = c.codePointAt(0) ?? 0;
+    const base = code >= a ? a : A;
+    return String.fromCodePoint(((code - base + 13) % 26) + base);
+  });
+}
+
+function atbash(text: string): string {
+  const A = 65;
+  const a = 97;
+  return text.replaceAll(/[a-zA-Z]/g, (c) => {
+    const code = c.codePointAt(0) ?? 0;
+    const base = code >= a ? a : A;
+    return String.fromCodePoint(26 - 1 - (code - base) + base);
+  });
+}
+
+function reverseText(text: string): string {
+  // `charAt` (vs `text[i]`) keeps the result `string` rather than
+  // `string | undefined`. Test inputs are pure ASCII so no
+  // astral-pair correctness concern with code-unit iteration.
+  let out = "";
+  for (let i = text.length - 1; i >= 0; i--) {
+    out += text.charAt(i);
+  }
+  return out;
+}
+
+const LEET_ENCODE: Record<string, string> = {
+  o: "0",
+  i: "1",
+  e: "3",
+  a: "4",
+  s: "5",
+  t: "7",
+  b: "8",
+};
+
+function leetEncode(text: string): string {
+  return text.replaceAll(
+    /[oieasbtOIEASBT]/g,
+    (c) => LEET_ENCODE[c.toLowerCase()] ?? c,
+  );
+}
+
+const NATO_ENCODE: Record<string, string> = {
+  A: "alpha",
+  B: "bravo",
+  C: "charlie",
+  D: "delta",
+  E: "echo",
+  F: "foxtrot",
+  G: "golf",
+  H: "hotel",
+  I: "india",
+  J: "juliet",
+  K: "kilo",
+  L: "lima",
+  M: "mike",
+  N: "november",
+  O: "oscar",
+  P: "papa",
+  Q: "quebec",
+  R: "romeo",
+  S: "sierra",
+  T: "tango",
+  U: "uniform",
+  V: "victor",
+  W: "whiskey",
+  X: "xray",
+  Y: "yankee",
+  Z: "zulu",
+};
+
+function natoEncode(letters: string): string {
+  const out: string[] = [];
+  for (const char of letters.toUpperCase()) {
+    const word = NATO_ENCODE[char];
+    if (word) {
+      out.push(word);
+    }
+  }
+  return out.join(" ");
+}
+
+const MORSE_ENCODE: Record<string, string> = {
+  A: ".-",
+  B: "-...",
+  C: "-.-.",
+  D: "-..",
+  E: ".",
+  F: "..-.",
+  G: "--.",
+  H: "....",
+  I: "..",
+  J: ".---",
+  K: "-.-",
+  L: ".-..",
+  M: "--",
+  N: "-.",
+  O: "---",
+  P: ".--.",
+  Q: "--.-",
+  R: ".-.",
+  S: "...",
+  T: "-",
+  U: "..-",
+  V: "...-",
+  W: ".--",
+  X: "-..-",
+  Y: "-.--",
+  Z: "--..",
+};
+
+function morseEncode(text: string): string {
+  return text
+    .toUpperCase()
+    .split(" ")
+    .map((word) => {
+      const symbols: string[] = [];
+      for (const c of word) {
+        const sym = MORSE_ENCODE[c];
+        if (sym) {
+          symbols.push(sym);
+        }
+      }
+      return symbols.join(" ");
+    })
+    .filter(Boolean)
+    .join(" / ");
+}
+
 const MUTATION_THROTTLE_MS = 250;
 
 async function flushMutations(): Promise<void> {
@@ -278,6 +423,145 @@ describe("encoded-payload-redact cross-node detection", () => {
   });
 });
 
+describe("encoded-payload-redact text-cipher positive cases", () => {
+  it("redacts a ROT13-encoded English sentence", () => {
+    const ciphertext = rot13(CIPHER_CLEARTEXT);
+    // Parens break the cipher candidate regex (`(` and `)` aren't in its
+    // char class), so the surrounding prose stays as its own text run.
+    document.body.innerHTML = `<p>(prefix) ${ciphertext} (suffix)</p>`;
+    encodedPayloadRedactRule.apply(document.body);
+
+    expect(document.querySelector(`.${PLACEHOLDER_CLASS}`)?.textContent).toBe(
+      "[encoded payload hidden]",
+    );
+    expect(document.body.textContent).not.toContain(ciphertext);
+    expect(document.body.textContent).toContain("(prefix)");
+    expect(document.body.textContent).toContain("(suffix)");
+  });
+
+  it("redacts an Atbash-encoded English sentence", () => {
+    const ciphertext = atbash(CIPHER_CLEARTEXT);
+    document.body.innerHTML = `<p>${ciphertext}</p>`;
+    encodedPayloadRedactRule.apply(document.body);
+
+    expect(document.querySelector(`.${PLACEHOLDER_CLASS}`)?.textContent).toBe(
+      "[encoded payload hidden]",
+    );
+    expect(document.body.textContent).not.toContain(ciphertext);
+  });
+
+  it("redacts a reversed English sentence", () => {
+    const ciphertext = reverseText(CIPHER_CLEARTEXT);
+    document.body.innerHTML = `<p>${ciphertext}</p>`;
+    encodedPayloadRedactRule.apply(document.body);
+
+    expect(document.querySelector(`.${PLACEHOLDER_CLASS}`)?.textContent).toBe(
+      "[encoded payload hidden]",
+    );
+    expect(document.body.textContent).not.toContain(ciphertext);
+  });
+
+  it("redacts a leetspeak-encoded English sentence", () => {
+    const ciphertext = leetEncode(CIPHER_CLEARTEXT);
+    document.body.innerHTML = `<p>${ciphertext}</p>`;
+    encodedPayloadRedactRule.apply(document.body);
+
+    expect(document.querySelector(`.${PLACEHOLDER_CLASS}`)?.textContent).toBe(
+      "[encoded payload hidden]",
+    );
+    expect(document.body.textContent).not.toContain(ciphertext);
+  });
+
+  it("redacts a NATO-phonetic spelling run of >= 10 non-sequential letters", () => {
+    // Spells THEQUICKBROWNFOX — 16 NATO tokens, not an alphabet sequence.
+    const ciphertext = natoEncode("thequickbrownfox");
+    document.body.innerHTML = `<p>${ciphertext}</p>`;
+    encodedPayloadRedactRule.apply(document.body);
+
+    expect(document.querySelector(`.${PLACEHOLDER_CLASS}`)?.textContent).toBe(
+      "[encoded payload hidden]",
+    );
+    expect(document.body.textContent).not.toContain(ciphertext);
+  });
+
+  it("redacts a Morse-encoded English phrase", () => {
+    const ciphertext = morseEncode(
+      "the quick brown fox jumps over the lazy dog and you can see this",
+    );
+    document.body.innerHTML = `<p>${ciphertext}</p>`;
+    encodedPayloadRedactRule.apply(document.body);
+
+    expect(document.querySelector(`.${PLACEHOLDER_CLASS}`)?.textContent).toBe(
+      "[encoded payload hidden]",
+    );
+    expect(document.body.textContent).not.toContain(ciphertext);
+  });
+});
+
+describe("encoded-payload-redact text-cipher false-positive guards", () => {
+  it("leaves ordinary English prose alone (no cipher fires)", () => {
+    const prose =
+      "You can see this paragraph from the homepage and you know what " +
+      "should come next when the time comes for you to look around.";
+    document.body.innerHTML = `<p>${prose}</p>`;
+    encodedPayloadRedactRule.apply(document.body);
+
+    expect(document.querySelector(`.${PLACEHOLDER_CLASS}`)).toBeNull();
+    expect(document.body.textContent).toContain(prose);
+  });
+
+  it("leaves a NATO alphabet drill (A..J) visible", () => {
+    // Sequential NATO spelling — the rule treats this as instructional
+    // content (alphabet page / signal-corps drill), not a payload.
+    const drill =
+      "alpha bravo charlie delta echo foxtrot golf hotel india juliet";
+    document.body.innerHTML = `<p>${drill}</p>`;
+    encodedPayloadRedactRule.apply(document.body);
+
+    expect(document.querySelector(`.${PLACEHOLDER_CLASS}`)).toBeNull();
+    expect(document.body.textContent).toContain(drill);
+  });
+
+  it("leaves a short ROT13 snippet (under 80-char floor) alone", () => {
+    const ciphertext = rot13("you can see this from above");
+    document.body.innerHTML = `<p>${ciphertext}</p>`;
+    encodedPayloadRedactRule.apply(document.body);
+
+    expect(document.querySelector(`.${PLACEHOLDER_CLASS}`)).toBeNull();
+  });
+
+  it("leaves text with a single incidental digit alone (leet floor)", () => {
+    // Product copy with one leet-shape digit ("1") — below the
+    // MIN_LEET_SUBSTITUTIONS=4 floor, so the leet detector skips it.
+    const prose =
+      "Limited time offer for 1 day only: get our flagship product before midnight tonight and have it shipped tomorrow.";
+    document.body.innerHTML = `<p>${prose}</p>`;
+    encodedPayloadRedactRule.apply(document.body);
+
+    expect(document.querySelector(`.${PLACEHOLDER_CLASS}`)).toBeNull();
+  });
+
+  it("leaves a sparse dot/dash ASCII-art run alone (Morse valid-ratio floor)", () => {
+    // Repeating `--- . --- . ---` style separators — many tokens but
+    // most decode to letters with no decoded common-word hits, and the
+    // chosen pattern is below the valid-ratio + common-word qualifiers.
+    const ascii = "... --- ... --- ... --- ... --- ... --- ... --- ...";
+    document.body.innerHTML = `<p>${ascii}</p>`;
+    encodedPayloadRedactRule.apply(document.body);
+
+    expect(document.querySelector(`.${PLACEHOLDER_CLASS}`)).toBeNull();
+  });
+
+  it("leaves a single NATO word in prose alone (below 10-word floor)", () => {
+    const prose =
+      "Our flagship product is called Tango and customers love it for the quality of the materials.";
+    document.body.innerHTML = `<p>${prose}</p>`;
+    encodedPayloadRedactRule.apply(document.body);
+
+    expect(document.querySelector(`.${PLACEHOLDER_CLASS}`)).toBeNull();
+  });
+});
+
 describe("encoded-payload-redact teardown", () => {
   it("stops re-scanning after teardown", async () => {
     encodedPayloadRedactRule.apply(document.body);

From 0aaa0dc74fcdbe3137d8200b4c6d5db67d7e7cef Mon Sep 17 00:00:00 2001
From: Todd Schiller <todd.schiller@gmail.com>
Date: Sun, 7 Jun 2026 17:17:57 -0400
Subject: [PATCH 3/4] Lint: fix biome/eslint findings in encoded-payload-redact

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 extension/src/rules/encoded-payload-redact.ts | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/extension/src/rules/encoded-payload-redact.ts b/extension/src/rules/encoded-payload-redact.ts
index 501e66a..43a62ed 100644
--- a/extension/src/rules/encoded-payload-redact.ts
+++ b/extension/src/rules/encoded-payload-redact.ts
@@ -353,8 +353,11 @@ function atbash(text: string): string {
 }
 
 function reverseText(text: string): string {
-  // Unicode-aware reverse so any astral pairs survive.
-  return [...text].reverse().join("");
+  // Unicode-aware reverse so any astral pairs survive. Array.from over a
+  // string splits on code points (handling surrogate pairs), which is what
+  // we want here; spread would trigger no-misused-spread.
+  // eslint-disable-next-line unicorn/prefer-spread
+  return Array.from(text).toReversed().join("");
 }
 
 // Leetspeak substitution table — only the substitutions that obscure a
@@ -379,7 +382,7 @@ const LEET_MAP: Record<string, string> = {
 // occurrence is a candidate disguised letter; we require a minimum
 // count of these in any candidate window before attempting a decode so
 // that ordinary text with incidental digits doesn't qualify.
-const LEET_SUBSTITUTION_CHAR_CLASS = String.raw`[0134578@$!]`;
+const LEET_SUBSTITUTION_CHAR_CLASS = "[0134578@$!]";
 
 function deleet(text: string): string {
   return text.replaceAll(
@@ -525,9 +528,9 @@ function isAlphabetSequence(letters: string): boolean {
   // these as instructional content rather than a payload — alphabet
   // pages and signal-corps drills shouldn't be redacted.
   for (let i = 1; i < letters.length; i++) {
-    const prev = letters.codePointAt(i - 1) ?? 0;
-    const curr = letters.codePointAt(i) ?? 0;
-    if (curr - prev !== 1) {
+    const previous = letters.codePointAt(i - 1) ?? 0;
+    const current = letters.codePointAt(i) ?? 0;
+    if (current - previous !== 1) {
       return false;
     }
   }
@@ -549,7 +552,7 @@ function findNatoRuns(text: string): NatoRun[] {
     const end = start + m[0].length;
     const gap = text.slice(lastTokenEnd, start);
     const letter = NATO_FIRST_LETTER[m[0]];
-    const gapIsSeparator = lastTokenEnd === -1 || /^[\s,\-]+$/.test(gap);
+    const gapIsSeparator = lastTokenEnd === -1 || /^[\s,-]+$/.test(gap);
     if (letter && (current === null || gapIsSeparator)) {
       if (current === null) {
         current = { start, end, letters: letter };

From ed4e0251736706c12174bcc039e14270d356c2dc Mon Sep 17 00:00:00 2001
From: Todd Schiller <todd.schiller@gmail.com>
Date: Sun, 7 Jun 2026 17:22:08 -0400
Subject: [PATCH 4/4] Refactor: single-pass substitution-cipher collector,
 pre-compiled leet regex

Address review feedback on #216:

- Collapse rot13/atbash/reverse passes into one TEXT_CIPHER_CANDIDATE
  iteration with a decoder list; cuts regex + alreadyEnglish work to
  one-third per text group.
- Pre-compile LEET_SUBSTITUTION_RE at module level so deleet and
  countLeetSubstitutions stop allocating a regex per call inside the
  inner candidate loop.
- Drop the now-duplicate collectReverse wrapper.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 extension/src/rules/encoded-payload-redact.ts | 59 ++++++++-----------
 1 file changed, 25 insertions(+), 34 deletions(-)

diff --git a/extension/src/rules/encoded-payload-redact.ts b/extension/src/rules/encoded-payload-redact.ts
index 43a62ed..6b39297 100644
--- a/extension/src/rules/encoded-payload-redact.ts
+++ b/extension/src/rules/encoded-payload-redact.ts
@@ -382,13 +382,10 @@ const LEET_MAP: Record<string, string> = {
 // occurrence is a candidate disguised letter; we require a minimum
 // count of these in any candidate window before attempting a decode so
 // that ordinary text with incidental digits doesn't qualify.
-const LEET_SUBSTITUTION_CHAR_CLASS = "[0134578@$!]";
+const LEET_SUBSTITUTION_RE = /[0134578@$!]/g;
 
 function deleet(text: string): string {
-  return text.replaceAll(
-    new RegExp(LEET_SUBSTITUTION_CHAR_CLASS, "g"),
-    (c) => LEET_MAP[c] ?? c,
-  );
+  return text.replaceAll(LEET_SUBSTITUTION_RE, (c) => LEET_MAP[c] ?? c);
 }
 
 // NATO phonetic alphabet — one word per encoded letter. Includes both
@@ -487,8 +484,7 @@ const MORSE_CANDIDATE = new RegExp(
 );
 
 function countLeetSubstitutions(text: string): number {
-  return (text.match(new RegExp(LEET_SUBSTITUTION_CHAR_CLASS, "g")) ?? [])
-    .length;
+  return (text.match(LEET_SUBSTITUTION_RE) ?? []).length;
 }
 
 interface CipherDecodeResult {
@@ -717,9 +713,19 @@ function collectPercent(text: string, matches: InlineMatch[]): void {
   }
 }
 
-function collectSubstitutionCipher(
+// All substitution-style decoders share the same candidate window and
+// already-English gate, so we iterate TEXT_CIPHER_CANDIDATE once and try
+// each decoder. The first one that produces a readable decode wins; the
+// rest are skipped for that candidate (their match spans would overlap
+// and merge identically downstream anyway).
+const SUBSTITUTION_DECODERS: ReadonlyArray<(text: string) => string> = [
+  rot13,
+  atbash,
+  reverseText,
+];
+
+function collectSubstitutionCiphers(
   text: string,
-  decoder: (text: string) => string,
   matches: InlineMatch[],
 ): void {
   for (const m of text.matchAll(TEXT_CIPHER_CANDIDATE)) {
@@ -727,28 +733,15 @@ function collectSubstitutionCipher(
     if (alreadyEnglish(candidate)) {
       continue;
     }
-    if (tryCipherDecode(candidate, decoder) !== null) {
-      matches.push({
-        start: m.index,
-        end: m.index + candidate.length,
-        label: "[encoded payload hidden]",
-      });
-    }
-  }
-}
-
-function collectReverse(text: string, matches: InlineMatch[]): void {
-  for (const m of text.matchAll(TEXT_CIPHER_CANDIDATE)) {
-    const candidate = m[0];
-    if (alreadyEnglish(candidate)) {
-      continue;
-    }
-    if (tryCipherDecode(candidate, reverseText) !== null) {
-      matches.push({
-        start: m.index,
-        end: m.index + candidate.length,
-        label: "[encoded payload hidden]",
-      });
+    for (const decoder of SUBSTITUTION_DECODERS) {
+      if (tryCipherDecode(candidate, decoder) !== null) {
+        matches.push({
+          start: m.index,
+          end: m.index + candidate.length,
+          label: "[encoded payload hidden]",
+        });
+        break;
+      }
     }
   }
 }
@@ -806,9 +799,7 @@ function collectMatches(text: string): InlineMatch[] {
   collectBase64(text, jwtRanges, matches);
   collectHex(text, matches);
   collectPercent(text, matches);
-  collectSubstitutionCipher(text, rot13, matches);
-  collectSubstitutionCipher(text, atbash, matches);
-  collectReverse(text, matches);
+  collectSubstitutionCiphers(text, matches);
   collectLeet(text, matches);
   collectNato(text, matches);
   collectMorse(text, matches);