Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/copilot-instructions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Copilot Instructions

Follow the repository's canonical engineering skills under
`docs/engineering/skills/`.

For tests, read `docs/engineering/skills/testing.md` before adding, moving, or
reviewing test files. Do not duplicate or override that testing guidance here.
31 changes: 23 additions & 8 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,18 @@ jobs:
- run: pip install ruff>=0.9.0
- run: ruff format --check .

quality-guards:
name: Quality guards
runs-on: ubuntu-latest
needs: check-fork
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.14"
- name: Run quality guards
run: python scripts/run_quality_guards.py

check-changelog:
runs-on: ubuntu-latest
needs: check-fork
Expand Down Expand Up @@ -113,23 +125,25 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: "3.14"
- uses: astral-sh/setup-uv@v5
- run: uv sync --dev
- name: Install optimized test deps
run: pip install modal pytest numpy pandas
run: uv pip install modal pytest numpy pandas
- name: Ensure PR Modal environment exists
run: python .github/scripts/ensure_modal_environment.py
run: uv run python .github/scripts/ensure_modal_environment.py
- name: Sync Modal secrets to PR environment
run: python .github/scripts/sync_modal_secrets.py
run: uv run python .github/scripts/sync_modal_secrets.py
- name: Deploy Modal pipeline app to PR staging
run: modal deploy --env="${MODAL_ENVIRONMENT}" modal_app/pipeline.py
run: uv run modal deploy --env="${MODAL_ENVIRONMENT}" modal_app/pipeline.py
- name: Deploy Modal local-area app to PR staging
run: modal deploy --env="${MODAL_ENVIRONMENT}" modal_app/local_area.py
run: uv run modal deploy --env="${MODAL_ENVIRONMENT}" modal_app/local_area.py
- name: Deploy Modal H5 test harness to PR staging
run: modal deploy --env="${MODAL_ENVIRONMENT}" modal_app/h5_test_harness.py
run: uv run modal deploy --env="${MODAL_ENVIRONMENT}" modal_app/h5_test_harness.py
- name: Run optimized integration tests against PR staging
run: python -m pytest tests/optimized/ -v
run: uv run pytest tests/optimized/ -v
- name: Cleanup PR Modal environment
if: always()
run: python .github/scripts/delete_modal_environment.py
run: uv run python .github/scripts/delete_modal_environment.py

smoke-test:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -179,6 +193,7 @@ jobs:
fi

integration-tests:
name: Build datasets and run integration tests on Modal
runs-on: ubuntu-latest
needs: [check-fork, lint, decide-test-scope]
if: needs.decide-test-scope.outputs.run_integration == 'true'
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ jobs:
- run: pip install ruff>=0.9.0
- run: ruff format --check .

# ── Build and linear integration tests ──────────────────────
build-and-linear-integration-tests:
name: Build and linear integration tests
# ── Dataset build ───────────────────────────────────────────
build-datasets:
name: Build datasets
runs-on: ubuntu-latest
needs: lint
if: github.event.head_commit.message != 'Update package version'
Expand All @@ -29,7 +29,7 @@ jobs:
with:
python-version: "3.14"
- run: pip install modal
- name: Run linear integration tests on Modal
- name: Build datasets on Modal
run: |
modal run --env="${MODAL_ENVIRONMENT}" modal_app/data_build.py \
--upload \
Expand Down
14 changes: 14 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Codex Instructions

These instructions apply repository-wide.

## Skills system

Canonical AI-facing engineering skills live under `docs/engineering/skills/`.
Use those files as the source of truth across Codex, Claude, Copilot, and other
AI tools.

When adding, moving, or reviewing tests, read
`docs/engineering/skills/testing.md`. Do not put pytest files under
`policyengine_us_data/tests/`, do not import from `tests.conftest`, and do not
import helpers across test lanes.
10 changes: 9 additions & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,33 @@

## Testing

Canonical testing guidance lives in `docs/engineering/skills/testing.md`. If
this file conflicts with that skill, follow the skill and update this adapter.

### Running Tests
- `make test-unit` - Run unit tests only (fast, no data dependencies)
- `make test-integration` - Run integration tests (requires built H5 datasets)
- `make test` - Run all tests
- `pytest tests/unit/ -v` - Unit tests directly
- `pytest tests/integration/test_cps.py -v` - Specific integration test
- `python scripts/run_quality_guards.py` - Run layout/import quality guards

### Test Organization
Tests are in the top-level `tests/` directory, split into two sub-directories:
Tests are in the top-level `tests/` directory, split into these sub-directories:

- **`tests/unit/`** — Self-contained tests that use synthetic data, mocks, patches, or checked-in fixtures. Run in seconds with no external dependencies.
- `unit/datasets/` — unit tests for dataset code
- `unit/calibration/` — unit tests for calibration code

- **`tests/integration/`** — Tests that require built H5 datasets, HuggingFace downloads, Microsimulation objects, or database ETL. Named after the dataset they test.
- **`tests/optimized/`** — Tests that exercise deployed Modal/staging seams.

### Test Placement Rules
- **NEVER** put pytest files under `policyengine_us_data/tests/`; CI does not collect that tree
- **NEVER** put tests that require H5 files or Microsimulation in `unit/`
- **NEVER** put tests that use only synthetic data or mocks in `integration/`
- **NEVER** import from `tests.conftest`; fixtures are discovered automatically and helper functions belong in local support modules
- **NEVER** import helpers across test lanes, such as `tests.unit` from an integration test
- Integration test files are named after their dataset dependency: `test_cps.py` tests `cps_2024.h5`
- Sanity checks (value ranges, population counts) belong in the per-dataset integration test file, not in a separate sanity file
- When adding a new integration test, add it to the existing per-dataset file if one exists
Expand Down
1 change: 1 addition & 0 deletions changelog.d/760.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added local H5 traceability metadata and scope fingerprinting for calibration artifacts.
1 change: 1 addition & 0 deletions changelog.d/test-quality-guards.changed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add quality guards for test layout and document the testing skill for AI tooling.
13 changes: 13 additions & 0 deletions docs/engineering/skills/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Engineering Skills

This directory is the canonical source for AI-facing engineering rules.

Tool-specific instruction files such as `AGENTS.md`, `CLAUDE.md`, and
`.github/copilot-instructions.md` should point here instead of duplicating
implementation-specific guidance. When a rule changes, update the skill here
first, then keep adapters thin.

Current skills:

- `testing.md`: test layout, fixture scope, helper placement, and quality guard
expectations.
54 changes: 54 additions & 0 deletions docs/engineering/skills/testing.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Testing Skill

Use this skill whenever adding, moving, or reviewing tests.

## Canonical Layout

- Put unit tests under `tests/unit/`.
- Put data-dependent or runtime integration tests under `tests/integration/`.
- Put deployed Modal/staging tests under `tests/optimized/`.
- Do not add pytest files under `policyengine_us_data/tests/`; CI does not
collect that tree.

## Fixtures And Helpers

- Keep root `tests/conftest.py` empty or very lightweight. It must not import
cloud clients, Modal, Hugging Face, PolicyEngine runtime-heavy modules, or
package modules that transitively import those dependencies.
- Put domain-specific fixtures in the narrowest `conftest.py` that covers the
tests that use them.
- Put reusable helper functions in a local `support.py`, a local fixture module,
or `tests/support/`.
- Do not import from `tests.conftest`; pytest discovers fixtures automatically.
- Do not import across test lanes, for example from `tests.integration` into
`tests.unit` or from `tests.unit` into `tests.integration`. Move shared helpers
to `tests/support/` or colocate them with the tests.

## Dependency Boundaries

- Unit tests should not require real network credentials, Modal, Hugging Face,
or GCS. Mock those seams.
- Integration tests may require built data or heavier runtime setup, but should
be explicit about those requirements and skip cleanly when local artifacts are
unavailable.
- CI should run tests in an environment where project dependencies are installed
with `uv sync --dev` or an equivalent full test dependency install. A full
install is required, but it is not a substitute for fixture isolation.

## Quality Guards

Run this before opening or updating a PR:

```bash
python scripts/run_quality_guards.py
```

The current guard enforces:

- No package-internal pytest files under `policyengine_us_data/tests/`.
- No pytest files outside the approved top-level test lanes.
- No imports from `tests.conftest`.
- No imports across test lanes.

When adding a new guard, register it in `scripts/run_quality_guards.py` so CI
continues to expose a single `Quality guards` job.
133 changes: 96 additions & 37 deletions modal_app/local_area.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@

from modal_app.images import cpu_image as image # noqa: E402
from modal_app.resilience import reconcile_run_dir_fingerprint # noqa: E402
from policyengine_us_data.calibration.local_h5.fingerprinting import ( # noqa: E402
FingerprintingService,
PublishingInputBundle,
)
from policyengine_us_data.calibration.local_h5.partitioning import ( # noqa: E402
partition_weighted_work_items,
)
Expand Down Expand Up @@ -311,6 +315,63 @@ def get_version() -> str:
return pyproject["project"]["version"]


def _build_publishing_input_bundle(
*,
weights_path: Path,
dataset_path: Path,
db_path: Path | None,
geography_path: Path | None,
calibration_package_path: Path | None,
run_config_path: Path | None,
run_id: str,
version: str,
n_clones: int | None,
seed: int,
legacy_blocks_path: Path | None = None,
) -> PublishingInputBundle:
"""Build the normalized coordinator input bundle for one publish scope."""

return PublishingInputBundle(
weights_path=weights_path,
source_dataset_path=dataset_path,
target_db_path=db_path,
exact_geography_path=geography_path,
calibration_package_path=calibration_package_path,
run_config_path=run_config_path,
run_id=run_id,
version=version,
n_clones=n_clones,
seed=seed,
legacy_blocks_path=legacy_blocks_path,
)


def _resolve_scope_fingerprint(
*,
inputs: PublishingInputBundle,
scope: str,
expected_fingerprint: str = "",
) -> str:
"""Compute the scope fingerprint while preserving pinned resume values."""

service = FingerprintingService()
traceability = service.build_traceability(inputs=inputs, scope=scope)
computed_fingerprint = service.compute_scope_fingerprint(traceability)
if expected_fingerprint:
if expected_fingerprint != computed_fingerprint:
print(
"WARNING: Pinned fingerprint differs from current "
f"{scope} scope fingerprint. "
"Preserving pinned value for backward-compatible resume.\n"
f" Pinned: {expected_fingerprint}\n"
f" Current: {computed_fingerprint}"
)
else:
print(f"Using pinned fingerprint from pipeline: {expected_fingerprint}")
return expected_fingerprint
return computed_fingerprint


def partition_work(
work_items: List[Dict],
num_workers: int,
Expand Down Expand Up @@ -836,45 +897,26 @@ def coordinate_publish(
validate = False

# Fingerprint-based cache invalidation
if expected_fingerprint:
fingerprint = expected_fingerprint
print(f"Using pinned fingerprint from pipeline: {fingerprint}")
else:
geography_path_expr = (
f'Path("{geography_path}")' if geography_path.exists() else "None"
)
package_path_expr = (
f'Path("{calibration_package_path}")'
if calibration_package_path.exists()
else "None"
)
fp_result = subprocess.run(
_python_cmd(
"-c",
f"""
from pathlib import Path
from policyengine_us_data.calibration.publish_local_area import (
compute_input_fingerprint,
)
print(
compute_input_fingerprint(
Path("{weights_path}"),
Path("{dataset_path}"),
{n_clones},
fingerprint_inputs = _build_publishing_input_bundle(
weights_path=weights_path,
dataset_path=dataset_path,
db_path=db_path,
geography_path=geography_path,
calibration_package_path=(
calibration_package_path if calibration_package_path.exists() else None
),
run_config_path=config_json_path if config_json_path.exists() else None,
run_id=run_id,
version=version,
n_clones=n_clones,
seed=42,
geography_path={geography_path_expr},
calibration_package_path={package_path_expr},
legacy_blocks_path=artifacts / "stacked_blocks.npy",
)
fingerprint = _resolve_scope_fingerprint(
inputs=fingerprint_inputs,
scope="regional",
expected_fingerprint=expected_fingerprint,
)
)
""",
),
capture_output=True,
text=True,
env=os.environ.copy(),
)
if fp_result.returncode != 0:
raise RuntimeError(f"Failed to compute fingerprint: {fp_result.stderr}")
fingerprint = fp_result.stdout.strip()
reconcile_action = reconcile_run_dir_fingerprint(run_dir, fingerprint)
if reconcile_action == "resume":
print(f"Inputs unchanged ({fingerprint}), resuming...")
Expand Down Expand Up @@ -1123,6 +1165,22 @@ def coordinate_national_publish(
"geography_assignment.npz": "national_geography_assignment.npz",
},
)
fingerprint_inputs = _build_publishing_input_bundle(
weights_path=weights_path,
dataset_path=dataset_path,
db_path=db_path,
geography_path=geography_path,
calibration_package_path=None,
run_config_path=config_json_path if config_json_path.exists() else None,
run_id=run_id,
version=version,
n_clones=n_clones,
seed=42,
)
fingerprint = _resolve_scope_fingerprint(
inputs=fingerprint_inputs,
scope="national",
)
run_dir = staging_dir / run_id
run_dir.mkdir(parents=True, exist_ok=True)

Expand Down Expand Up @@ -1224,6 +1282,7 @@ def coordinate_national_publish(
f"{version}. Run main_national_promote to publish."
),
"run_id": run_id,
"fingerprint": fingerprint,
"national_validation": national_validation_output,
}

Expand Down
Loading
Loading