PolicyEngine · anth-volk · Apr 14, 2026 · Apr 15, 2026 · Apr 24, 2026 · Apr 28, 2026
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
@@ -0,0 +1,7 @@
+# Copilot Instructions
+
+Follow the repository's canonical engineering skills under
+`docs/engineering/skills/`.
+
+For tests, read `docs/engineering/skills/testing.md` before adding, moving, or
+reviewing test files. Do not duplicate or override that testing guidance here.
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -43,6 +43,18 @@ jobs:
       - run: pip install ruff>=0.9.0
       - run: ruff format --check .
 
+  quality-guards:
+    name: Quality guards
+    runs-on: ubuntu-latest
+    needs: check-fork
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.14"
+      - name: Run quality guards
+        run: python scripts/run_quality_guards.py
+
   check-changelog:
     runs-on: ubuntu-latest
     needs: check-fork
@@ -113,23 +125,25 @@ jobs:
       - uses: actions/setup-python@v5
         with:
           python-version: "3.14"
+      - uses: astral-sh/setup-uv@v5
+      - run: uv sync --dev
       - name: Install optimized test deps
-        run: pip install modal pytest numpy pandas
+        run: uv pip install modal pytest numpy pandas
       - name: Ensure PR Modal environment exists
-        run: python .github/scripts/ensure_modal_environment.py
+        run: uv run python .github/scripts/ensure_modal_environment.py
       - name: Sync Modal secrets to PR environment
-        run: python .github/scripts/sync_modal_secrets.py
+        run: uv run python .github/scripts/sync_modal_secrets.py
       - name: Deploy Modal pipeline app to PR staging
-        run: modal deploy --env="${MODAL_ENVIRONMENT}" modal_app/pipeline.py
+        run: uv run modal deploy --env="${MODAL_ENVIRONMENT}" modal_app/pipeline.py
       - name: Deploy Modal local-area app to PR staging
-        run: modal deploy --env="${MODAL_ENVIRONMENT}" modal_app/local_area.py
+        run: uv run modal deploy --env="${MODAL_ENVIRONMENT}" modal_app/local_area.py
       - name: Deploy Modal H5 test harness to PR staging
-        run: modal deploy --env="${MODAL_ENVIRONMENT}" modal_app/h5_test_harness.py
+        run: uv run modal deploy --env="${MODAL_ENVIRONMENT}" modal_app/h5_test_harness.py
       - name: Run optimized integration tests against PR staging
-        run: python -m pytest tests/optimized/ -v
+        run: uv run pytest tests/optimized/ -v
       - name: Cleanup PR Modal environment
         if: always()
-        run: python .github/scripts/delete_modal_environment.py
+        run: uv run python .github/scripts/delete_modal_environment.py
 
   smoke-test:
     runs-on: ubuntu-latest
@@ -179,6 +193,7 @@ jobs:
           fi
 
   integration-tests:
+    name: Build datasets and run integration tests on Modal
     runs-on: ubuntu-latest
     needs: [check-fork, lint, decide-test-scope]
     if: needs.decide-test-scope.outputs.run_integration == 'true'

diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml
@@ -12,9 +12,9 @@ jobs:
       - run: pip install ruff>=0.9.0
       - run: ruff format --check .
 
-  # ── Build and linear integration tests ──────────────────────
-  build-and-linear-integration-tests:
-    name: Build and linear integration tests
+  # ── Dataset build ───────────────────────────────────────────
+  build-datasets:
+    name: Build datasets
     runs-on: ubuntu-latest
     needs: lint
     if: github.event.head_commit.message != 'Update package version'
@@ -29,7 +29,7 @@ jobs:
         with:
           python-version: "3.14"
       - run: pip install modal
-      - name: Run linear integration tests on Modal
+      - name: Build datasets on Modal
         run: |
           modal run --env="${MODAL_ENVIRONMENT}" modal_app/data_build.py \
             --upload \

diff --git a/AGENTS.md b/AGENTS.md
@@ -0,0 +1,14 @@
+# Codex Instructions
+
+These instructions apply repository-wide.
+
+## Skills system
+
+Canonical AI-facing engineering skills live under `docs/engineering/skills/`.
+Use those files as the source of truth across Codex, Claude, Copilot, and other
+AI tools.
+
+When adding, moving, or reviewing tests, read
+`docs/engineering/skills/testing.md`. Do not put pytest files under
+`policyengine_us_data/tests/`, do not import from `tests.conftest`, and do not
+import helpers across test lanes.
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -7,25 +7,33 @@
 
 ## Testing
 
+Canonical testing guidance lives in `docs/engineering/skills/testing.md`. If
+this file conflicts with that skill, follow the skill and update this adapter.
+
 ### Running Tests
 - `make test-unit` - Run unit tests only (fast, no data dependencies)
 - `make test-integration` - Run integration tests (requires built H5 datasets)
 - `make test` - Run all tests
 - `pytest tests/unit/ -v` - Unit tests directly
 - `pytest tests/integration/test_cps.py -v` - Specific integration test
+- `python scripts/run_quality_guards.py` - Run layout/import quality guards
 
 ### Test Organization
-Tests are in the top-level `tests/` directory, split into two sub-directories:
+Tests are in the top-level `tests/` directory, split into these sub-directories:
 
 - **`tests/unit/`** — Self-contained tests that use synthetic data, mocks, patches, or checked-in fixtures. Run in seconds with no external dependencies.
   - `unit/datasets/` — unit tests for dataset code
   - `unit/calibration/` — unit tests for calibration code
 
 - **`tests/integration/`** — Tests that require built H5 datasets, HuggingFace downloads, Microsimulation objects, or database ETL. Named after the dataset they test.
+- **`tests/optimized/`** — Tests that exercise deployed Modal/staging seams.
 
 ### Test Placement Rules
+- **NEVER** put pytest files under `policyengine_us_data/tests/`; CI does not collect that tree
 - **NEVER** put tests that require H5 files or Microsimulation in `unit/`
 - **NEVER** put tests that use only synthetic data or mocks in `integration/`
+- **NEVER** import from `tests.conftest`; fixtures are discovered automatically and helper functions belong in local support modules
+- **NEVER** import helpers across test lanes, such as `tests.unit` from an integration test
 - Integration test files are named after their dataset dependency: `test_cps.py` tests `cps_2024.h5`
 - Sanity checks (value ranges, population counts) belong in the per-dataset integration test file, not in a separate sanity file
 - When adding a new integration test, add it to the existing per-dataset file if one exists

diff --git a/changelog.d/760.added.md b/changelog.d/760.added.md
@@ -0,0 +1 @@
+Added local H5 traceability metadata and scope fingerprinting for calibration artifacts.
diff --git a/changelog.d/test-quality-guards.changed.md b/changelog.d/test-quality-guards.changed.md
@@ -0,0 +1 @@
+Add quality guards for test layout and document the testing skill for AI tooling.
diff --git a/docs/engineering/skills/README.md b/docs/engineering/skills/README.md
@@ -0,0 +1,13 @@
+# Engineering Skills
+
+This directory is the canonical source for AI-facing engineering rules.
+
+Tool-specific instruction files such as `AGENTS.md`, `CLAUDE.md`, and
+`.github/copilot-instructions.md` should point here instead of duplicating
+implementation-specific guidance. When a rule changes, update the skill here
+first, then keep adapters thin.
+
+Current skills:
+
+- `testing.md`: test layout, fixture scope, helper placement, and quality guard
+  expectations.
diff --git a/docs/engineering/skills/testing.md b/docs/engineering/skills/testing.md
@@ -0,0 +1,54 @@
+# Testing Skill
+
+Use this skill whenever adding, moving, or reviewing tests.
+
+## Canonical Layout
+
+- Put unit tests under `tests/unit/`.
+- Put data-dependent or runtime integration tests under `tests/integration/`.
+- Put deployed Modal/staging tests under `tests/optimized/`.
+- Do not add pytest files under `policyengine_us_data/tests/`; CI does not
+  collect that tree.
+
+## Fixtures And Helpers
+
+- Keep root `tests/conftest.py` empty or very lightweight. It must not import
+  cloud clients, Modal, Hugging Face, PolicyEngine runtime-heavy modules, or
+  package modules that transitively import those dependencies.
+- Put domain-specific fixtures in the narrowest `conftest.py` that covers the
+  tests that use them.
+- Put reusable helper functions in a local `support.py`, a local fixture module,
+  or `tests/support/`.
+- Do not import from `tests.conftest`; pytest discovers fixtures automatically.
+- Do not import across test lanes, for example from `tests.integration` into
+  `tests.unit` or from `tests.unit` into `tests.integration`. Move shared helpers
+  to `tests/support/` or colocate them with the tests.
+
+## Dependency Boundaries
+
+- Unit tests should not require real network credentials, Modal, Hugging Face,
+  or GCS. Mock those seams.
+- Integration tests may require built data or heavier runtime setup, but should
+  be explicit about those requirements and skip cleanly when local artifacts are
+  unavailable.
+- CI should run tests in an environment where project dependencies are installed
+  with `uv sync --dev` or an equivalent full test dependency install. A full
+  install is required, but it is not a substitute for fixture isolation.
+
+## Quality Guards
+
+Run this before opening or updating a PR:
+
+```bash
+python scripts/run_quality_guards.py
+```
+
+The current guard enforces:
+
+- No package-internal pytest files under `policyengine_us_data/tests/`.
+- No pytest files outside the approved top-level test lanes.
+- No imports from `tests.conftest`.
+- No imports across test lanes.
+
+When adding a new guard, register it in `scripts/run_quality_guards.py` so CI
+continues to expose a single `Quality guards` job.
diff --git a/modal_app/local_area.py b/modal_app/local_area.py
@@ -29,6 +29,10 @@
 
 from modal_app.images import cpu_image as image  # noqa: E402
 from modal_app.resilience import reconcile_run_dir_fingerprint  # noqa: E402
+from policyengine_us_data.calibration.local_h5.fingerprinting import (  # noqa: E402
+    FingerprintingService,
+    PublishingInputBundle,
+)
 from policyengine_us_data.calibration.local_h5.partitioning import (  # noqa: E402
     partition_weighted_work_items,
 )
@@ -311,6 +315,63 @@ def get_version() -> str:
     return pyproject["project"]["version"]
 
 
+def _build_publishing_input_bundle(
+    *,
+    weights_path: Path,
+    dataset_path: Path,
+    db_path: Path | None,
+    geography_path: Path | None,
+    calibration_package_path: Path | None,
+    run_config_path: Path | None,
+    run_id: str,
+    version: str,
+    n_clones: int | None,
+    seed: int,
+    legacy_blocks_path: Path | None = None,
+) -> PublishingInputBundle:
+    """Build the normalized coordinator input bundle for one publish scope."""
+
+    return PublishingInputBundle(
+        weights_path=weights_path,
+        source_dataset_path=dataset_path,
+        target_db_path=db_path,
+        exact_geography_path=geography_path,
+        calibration_package_path=calibration_package_path,
+        run_config_path=run_config_path,
+        run_id=run_id,
+        version=version,
+        n_clones=n_clones,
+        seed=seed,
+        legacy_blocks_path=legacy_blocks_path,
+    )
+
+
+def _resolve_scope_fingerprint(
+    *,
+    inputs: PublishingInputBundle,
+    scope: str,
+    expected_fingerprint: str = "",
+) -> str:
+    """Compute the scope fingerprint while preserving pinned resume values."""
+
+    service = FingerprintingService()
+    traceability = service.build_traceability(inputs=inputs, scope=scope)
+    computed_fingerprint = service.compute_scope_fingerprint(traceability)
+    if expected_fingerprint:
+        if expected_fingerprint != computed_fingerprint:
+            print(
+                "WARNING: Pinned fingerprint differs from current "
+                f"{scope} scope fingerprint. "
+                "Preserving pinned value for backward-compatible resume.\n"
+                f"  Pinned:   {expected_fingerprint}\n"
+                f"  Current:  {computed_fingerprint}"
+            )
+        else:
+            print(f"Using pinned fingerprint from pipeline: {expected_fingerprint}")
+        return expected_fingerprint
+    return computed_fingerprint
+
+
 def partition_work(
     work_items: List[Dict],
     num_workers: int,
@@ -836,45 +897,26 @@ def coordinate_publish(
             validate = False
 
     # Fingerprint-based cache invalidation
-    if expected_fingerprint:
-        fingerprint = expected_fingerprint
-        print(f"Using pinned fingerprint from pipeline: {fingerprint}")
-    else:
-        geography_path_expr = (
-            f'Path("{geography_path}")' if geography_path.exists() else "None"
-        )
-        package_path_expr = (
-            f'Path("{calibration_package_path}")'
-            if calibration_package_path.exists()
-            else "None"
-        )
-        fp_result = subprocess.run(
-            _python_cmd(
-                "-c",
-                f"""
-from pathlib import Path
-from policyengine_us_data.calibration.publish_local_area import (
-    compute_input_fingerprint,
-)
-print(
-    compute_input_fingerprint(
-        Path("{weights_path}"),
-        Path("{dataset_path}"),
-        {n_clones},
+    fingerprint_inputs = _build_publishing_input_bundle(
+        weights_path=weights_path,
+        dataset_path=dataset_path,
+        db_path=db_path,
+        geography_path=geography_path,
+        calibration_package_path=(
+            calibration_package_path if calibration_package_path.exists() else None
+        ),
+        run_config_path=config_json_path if config_json_path.exists() else None,
+        run_id=run_id,
+        version=version,
+        n_clones=n_clones,
         seed=42,
-        geography_path={geography_path_expr},
-        calibration_package_path={package_path_expr},
+        legacy_blocks_path=artifacts / "stacked_blocks.npy",
+    )
+    fingerprint = _resolve_scope_fingerprint(
+        inputs=fingerprint_inputs,
+        scope="regional",
+        expected_fingerprint=expected_fingerprint,
     )
-)
-""",
-            ),
-            capture_output=True,
-            text=True,
-            env=os.environ.copy(),
-        )
-        if fp_result.returncode != 0:
-            raise RuntimeError(f"Failed to compute fingerprint: {fp_result.stderr}")
-        fingerprint = fp_result.stdout.strip()
     reconcile_action = reconcile_run_dir_fingerprint(run_dir, fingerprint)
     if reconcile_action == "resume":
         print(f"Inputs unchanged ({fingerprint}), resuming...")
@@ -1123,6 +1165,22 @@ def coordinate_national_publish(
             "geography_assignment.npz": "national_geography_assignment.npz",
         },
     )
+    fingerprint_inputs = _build_publishing_input_bundle(
+        weights_path=weights_path,
+        dataset_path=dataset_path,
+        db_path=db_path,
+        geography_path=geography_path,
+        calibration_package_path=None,
+        run_config_path=config_json_path if config_json_path.exists() else None,
+        run_id=run_id,
+        version=version,
+        n_clones=n_clones,
+        seed=42,
+    )
+    fingerprint = _resolve_scope_fingerprint(
+        inputs=fingerprint_inputs,
+        scope="national",
+    )
     run_dir = staging_dir / run_id
     run_dir.mkdir(parents=True, exist_ok=True)
 
@@ -1224,6 +1282,7 @@ def coordinate_national_publish(
             f"{version}. Run main_national_promote to publish."
         ),
         "run_id": run_id,
+        "fingerprint": fingerprint,
         "national_validation": national_validation_output,
     }
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Added local H5 traceability metadata and scope fingerprinting for calibration artifacts.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Add quality guards for test layout and document the testing skill for AI tooling.