diff --git a/MODULE.bazel b/MODULE.bazel index 7f1749b73..1d68705f9 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -300,7 +300,14 @@ download_file( urls = ["https://github.com/muttleyxd/clang-tools-static-binaries/releases/download/master-2da3e7b/clang-format-19_linux-amd64"], ) -bazel_dep(name = "aspect_rules_lint", version = "2.3.0", dev_dependency = True) +bazel_dep(name = "score_cpp_policies", dev_dependency = True) +git_override( + module_name = "score_cpp_policies", + commit = "d762cc5cd9fe6ce5920c0f246303662f71a698ee", + remote = "https://github.com/eclipse-score/score_cpp_policies.git", +) + +bazel_dep(name = "aspect_rules_lint", version = "2.5.0", dev_dependency = True) bazel_dep(name = "rules_shell", version = "0.6.1", dev_dependency = True) bazel_dep(name = "google_benchmark", version = "1.9.5", dev_dependency = True) bazel_dep(name = "buildifier_prebuilt", version = "8.5.1", dev_dependency = True) diff --git a/quality/coverage/coverage.bazelrc b/quality/coverage/coverage.bazelrc index b4186e07f..2e2ff2ffa 100644 --- a/quality/coverage/coverage.bazelrc +++ b/quality/coverage/coverage.bazelrc @@ -27,7 +27,7 @@ coverage --extra_toolchains=@ferrocene_x86_64_unknown_linux_gnu_llvm//:rust_ferr # Use llvm-cov directly for HTML report generation instead of genhtml/lcov. # The custom merger (per-test) receives profraw files and produces a zip with profdata + HTML. # The custom reporter (final) merges all profdata and generates the combined HTML report. -coverage --coverage_output_generator=//quality/coverage/llvm_cov:merger +coverage --coverage_output_generator=@score_cpp_policies//coverage:merger coverage --coverage_report_generator=//quality/coverage/llvm_cov:reporter_wrapper coverage --experimental_fetch_all_coverage_outputs diff --git a/quality/coverage/generate_coverage_html.sh b/quality/coverage/generate_coverage_html.sh index 8b682898a..a5fff36d8 100755 --- a/quality/coverage/generate_coverage_html.sh +++ b/quality/coverage/generate_coverage_html.sh @@ -103,13 +103,13 @@ if [[ -f "${JUSTIFICATION_YAML}" ]]; then mkdir -p "${JUSTIFICATION_DIR}" # Run justify.py via Bazel to produce the resolved manifest. - if bazel run //quality/coverage/llvm_cov:justify -- \ + if bazel run @score_cpp_policies//coverage:justify -- \ --yaml "${JUSTIFICATION_YAML}" \ --source-root "${BUILD_WORKSPACE_DIRECTORY}" \ --output "${JUSTIFICATION_DIR}/manifest.json"; then # Run effective_coverage.py via Bazel to post-process HTML and calculate effective coverage. - bazel run //quality/coverage/llvm_cov:effective_coverage -- \ + bazel run @score_cpp_policies//coverage:effective_coverage -- \ --html-dir "${OUTPUT_DIR}" \ --manifest "${JUSTIFICATION_DIR}/manifest.json" \ --output "${JUSTIFICATION_DIR}/report.json" diff --git a/quality/coverage/llvm_cov/BUILD b/quality/coverage/llvm_cov/BUILD index 60dc0e0b6..27cb8ab31 100644 --- a/quality/coverage/llvm_cov/BUILD +++ b/quality/coverage/llvm_cov/BUILD @@ -11,67 +11,29 @@ # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* -load("@rules_python//python:defs.bzl", "py_binary") -load("@rules_shell//shell:sh_binary.bzl", "sh_binary") - -py_binary( - name = "merger", - srcs = ["merger.py"], +load( + "@score_cpp_policies//coverage:defs.bzl", + "score_coverage_reporter", + "score_instrumented_sources_manifest", ) -py_binary( - name = "reporter", - srcs = ["reporter.py"], - data = [ - "filter_regexes.txt", - "//:MODULE.bazel", - "@llvm_toolchain//:llvm-cov", - "@llvm_toolchain//:llvm-profdata", +# Collect all C++ source files reachable from the main library targets so that +# files not linked into any test show up at 0% in the coverage report. +score_instrumented_sources_manifest( + name = "instrumented_sources", + targets = [ + "//score/message_passing:message_passing", + "//score/mw/com:com", ], - deps = ["@rules_python//python/runfiles"], ) -genrule( - name = "reporter_wrapper_gen", - srcs = [ - "filter_regexes.txt", - "//:MODULE.bazel", - ], - outs = ["reporter_wrapper.sh"], - cmd = """ -cat > $@ << EOF -#!/usr/bin/env bash -set -euo pipefail -if [[ -z "\\$${RUNFILES_DIR:-}" ]]; then - if [[ -d "\\$$0.runfiles" ]]; then - export RUNFILES_DIR="\\$$0.runfiles" - fi -fi -WORKSPACE_ROOT="\\$$(cd "\\$$(dirname "\\$$(readlink -f "\\$${RUNFILES_DIR}/$(rlocationpath //:MODULE.bazel)")")" && pwd)/" -exec "\\$${RUNFILES_DIR}/_main/quality/coverage/llvm_cov/reporter" \\\\ - --filter_regexes="$(rlocationpath filter_regexes.txt)" \\\\ - --workspace_root="\\$${WORKSPACE_ROOT}" \\\\ - "\\$$@" -EOF -chmod +x $@ -""", -) - -sh_binary( +# Wrapper target wiring in the LLVM toolchain binaries and the workspace root. +# Referenced by coverage.bazelrc via: +# coverage --coverage_report_generator=//quality/coverage/llvm_cov:reporter_wrapper +score_coverage_reporter( name = "reporter_wrapper", - srcs = [":reporter_wrapper_gen"], - data = [":reporter"], -) - -py_binary( - name = "justify", - srcs = ["justify.py"], - deps = [ - "@score_communication_pip//pyyaml", - ], -) - -py_binary( - name = "effective_coverage", - srcs = ["effective_coverage.py"], + instrumented_sources_manifest = ":instrumented_sources", + llvm_cov = "@llvm_toolchain//:llvm-cov", + llvm_profdata = "@llvm_toolchain//:llvm-profdata", + visibility = ["//visibility:public"], ) diff --git a/quality/coverage/llvm_cov/README.md b/quality/coverage/llvm_cov/README.md index 5a6c55a4f..8a264a9e2 100644 --- a/quality/coverage/llvm_cov/README.md +++ b/quality/coverage/llvm_cov/README.md @@ -1,252 +1,34 @@ # llvm-cov Coverage Tools -This directory contains the Python tools that power the coverage pipeline. They are invoked by Bazel (merger + reporter) and by the `generate_coverage_html.sh` orchestrator script (justify + effective_coverage). +The coverage tools (merger, reporter, justify, effective_coverage) have been moved to +[`@score_cpp_policies//coverage`](https://github.com/eclipse-score/score_cpp_policies). -## Tool Overview +This directory now only contains the `BUILD` file that wires the upstream tools into +this repository via the `score_coverage_reporter` macro. -| Tool | Invoked By | Purpose | -|------|-----------|---------| -| `merger.py` | Bazel (`--coverage_output_generator`) | Per-test: profraw → profdata + metadata zip | -| `reporter.py` | Bazel (`--coverage_report_generator`) | Final: merge all profdata → HTML + LCOV + summary | -| `justify.py` | `generate_coverage_html.sh` | Resolve YAML + code markers → justification manifest | -| `effective_coverage.py` | `generate_coverage_html.sh` | Post-process HTML report + calculate effective coverage | +## How it works -## Data Flow +| Bazel flag | Target | +|---|---| +| `--coverage_output_generator` | `@score_cpp_policies//coverage:merger` (set in `coverage.bazelrc`) | +| `--coverage_report_generator` | `//quality/coverage/llvm_cov:reporter_wrapper` (generated by macro) | -``` -Test execution - │ - ▼ -┌──────────┐ profraw files -│ merger.py │ ◄── from each test -└────┬─────┘ - │ Per-test zip: {profdata, metadata.json} - ▼ -┌────────────┐ All per-test zips -│ reporter.py │ ◄── listed in --reports_file -└────┬───────┘ - │ _coverage_report.dat (zip): - │ ├── html_report/ (llvm-cov show --format=html) - │ ├── lcov_report/lcov.dat (llvm-cov export --format=lcov) - │ └── text_report/summary.txt (llvm-cov report) - ▼ -┌───────────────────────────┐ -│ generate_coverage_html.sh │ ◄── bazel run //quality/coverage:generate_coverage_html -└────┬──────────────────────┘ - │ Extracts html_report/ → cpp_coverage/ - ▼ -┌─────────────┐ coverage_justifications.yaml -│ justify.py │ ◄── + source files (COV_JUSTIFIED markers) -└────┬────────┘ - │ manifest.json: {file → {line → justification}} - ▼ -┌───────────────────────┐ -│ effective_coverage.py │ ◄── manifest.json + html_report/ -└────┬──────────────────┘ - │ • Modifies HTML in-place (restyled justified lines/branches) - │ • report.json + summary.txt - ▼ - Console output: effective coverage summary -``` - -Right now we do not perform the justification and effective coverage calculation in the reporter, as it will not have -access to the whole code base, which makes the integration more difficult. This can maybe be a future improvement. - ---- - -## merger.py — Per-Test Coverage Output Generator - -**Bazel role:** `--coverage_output_generator` (replaces the default `collect_coverage.sh` output step) - -**What it does:** - -1. Receives `.profraw` files from a single test execution -2. Finds the instrumented object files from the source manifest -3. Runs `llvm-profdata merge` to create a `.profdata` file -4. Collects metadata (llvm-tools path, workspace root, excluded source patterns) -5. Packages `{profdata, metadata.json}` into a zip file for the reporter - -**Interface (called by Bazel's `collect_coverage.sh`):** - -``` -merger.py --coverage_dir= \ - --output_file= \ - --source_file_manifest= \ - --filter_sources= \ # repeatable - [--sources_to_replace_file=] -``` - -**Key behaviors:** - -- Resolves the actual workspace root by following Bazel sandbox symlinks (important for `--path-equivalence` in later stages) -- Cleans up dangling symlinks in the coverage directory that can cause `llvm-profdata` to fail -- Extracts `--ignore-filename-regex` patterns from `--filter_sources` for source filtering - ---- - -## reporter.py — Final Combined Report Generator - -**Bazel role:** `--coverage_report_generator` (replaces the default lcov-based reporter) - -**What it does:** - -1. Reads the list of per-test zip files from `--reports_file` -2. Extracts profdata + metadata from each zip -3. Merges all profdata into a single `merged_coverage.profdata` via `llvm-profdata merge` -4. Generates three output formats: - - **HTML report** via `llvm-cov show --format=html` with branch counts and expansion views - - **LCOV data** via `llvm-cov export --format=lcov` (backward compatibility with dashboards) - - **Text summary** via `llvm-cov report --summary-only` -5. Packages everything into a zip file at `_coverage_report.dat` - -**Interface (called by Bazel):** - -``` -reporter.py --reports_file= \ - --output_file= -``` - -**Source filtering:** - -The reporter applies `--ignore-filename-regex` to all `llvm-cov` commands to exclude: -- Test files and benchmarks -- External/third-party code -- Any paths matching patterns collected from `--filter_sources` during the merger phase - -These patterns are propagated via `metadata.json` in each per-test zip. - -**llvm-cov show options:** - -| Option | Purpose | -|--------|---------| -| `--show-branches=count` | Show branch coverage with execution counts | -| `--show-expansions` | Expand template instantiations inline | -| `--coverage-watermark=100,50` | Green ≥100%, yellow ≥50%, red <50% | -| `--path-equivalence=/proc/self/cwd/,` | Map sandbox paths to real source paths | -| `--Xdemangler=llvm-cxxfilt` | Demangle C++ symbol names | - ---- - -## justify.py — Justification Resolver - -**What it does:** - -Resolves all justified lines from two sources and produces a unified manifest: +The `reporter_wrapper` target is created by the `score_coverage_reporter` macro in `BUILD`. +It bakes in: +- the LLVM toolchain binaries (`@llvm_toolchain//:llvm-cov`, `:llvm-profdata`) +- the workspace root (via `//:MODULE.bazel`) +- the baseline filter regexes from `@score_cpp_policies` +- the list of instrumented sources (via `score_instrumented_sources_manifest`) -1. **YAML locations** — `file` + `line_start`/`line_end` entries in `coverage_justifications.yaml` -2. **In-code markers** — `COV_JUSTIFIED `, `COV_JUSTIFIED_START ` / `COV_JUSTIFIED_STOP` comments +## Running coverage -**Interface:** +```bash +# Run all tests with coverage instrumentation +bazel coverage //score/... --build_tests_only +# Extract HTML report, run justification processing, show effective coverage +bazel run //quality/coverage:generate_coverage_html ``` -python3 justify.py --yaml \ - --source-root \ - --output -``` - -**Output format (manifest.json):** - -```json -{ - "version": 1, - "justified_files": { - "score/mw/com/impl/some_file.cpp": { - "42": {"id": "my-id", "category": "defensive_programming", "reason": "..."}, - "43": {"id": "my-id", "category": "defensive_programming", "reason": "..."} - } - } -} -``` - -**Validation rules:** - -- Justification IDs must be unique and kebab-case (lowercase + hyphens) -- Every justification must have a non-empty `reason` -- Category must be one of: `defensive_programming`, `tool_false_positive`, `platform_specific`, `other` -- In-code `COV_JUSTIFIED ` markers must reference an ID defined in the YAML - -**In-code marker patterns:** - -| Pattern | Scope | -|---------|-------| -| `// COV_JUSTIFIED ` | Justifies the current line | -| `// COV_JUSTIFIED_START ` | Starts a justified region | -| `// COV_JUSTIFIED_STOP` | Ends the justified region | - ---- - -## effective_coverage.py — HTML Post-Processor & Coverage Calculator - -**What it does:** - -1. Loads the justification manifest from `justify.py` -2. Post-processes the llvm-cov HTML report in-place: - - **Lines:** Uncovered justified lines get class `justified-line`, count shows "J", code background turns orange - - **Branches:** Uncovered branches on justified lines get class `justified-branch` (orange text) -3. Updates the index page: - - Adds an effective coverage banner (line + branch) - - Updates per-file line% and branch% cells to show effective (raw + justified) coverage - - Updates the TOTALS row -4. Calculates and reports effective coverage metrics -5. Detects stale justifications - -**Interface:** - -``` -python3 effective_coverage.py --html-dir \ - --manifest \ - --output -``` - -**Output files:** - -| File | Content | -|------|---------| -| `report.json` | Machine-readable report: summary stats, applied justifications, stale warnings | -| `summary.txt` | Human-readable coverage summary | - -**Template instantiation handling:** - -C++ templates produce multiple instantiations of the same source line in the HTML. The tool handles this correctly: -- **Line coverage:** A line is "covered" if ANY instantiation covers it. All instantiation occurrences of a justified line are restyled. -- **Branch coverage:** A branch direction (True/False) is "truly uncovered" only if NO instantiation covers it. Only truly uncovered branch directions count toward justified branches. -- **Statistics:** Raw coverage numbers are parsed directly from the llvm-cov index page TOTALS row, guaranteeing an exact match with llvm-cov's own calculations. - -**Stale justification detection:** - -A justification is "stale" when BOTH: -- The line is already covered by tests, AND -- All branches at that line are covered - -If the line is covered but has uncovered branches, the justification is still needed (for the branches) and is NOT stale. - -**CSS classes injected into style.css:** - -| Class | Applied To | Visual | -|-------|-----------|--------| -| `.justified-line` | Count cell (``) | Right-aligned orange text, shows "J" | -| `.region.justified` | Code spans (replacing `.region.red`) | Orange background | -| `.justified-branch` | Branch direction spans (replacing `.red.branch`) | Bold orange text | -| `tr:has(> td.justified-line) > td.code` | Entire code cell for justified rows | Light orange background | - ---- - -## Bazel Build Targets - -Defined in `BUILD`: - -```python -py_binary(name = "merger", ...) # Per-test coverage output generator -py_binary(name = "reporter", ...) # Final combined report generator -py_binary(name = "justify", ...) # Justification resolver -py_binary(name = "effective_coverage", ...) # HTML post-processor -``` - -The `reporter` target includes `justify.py` and `effective_coverage.py` as `data` dependencies for best-effort in-sandbox justification processing. - -## Dependencies -- **Python 3** (system Python, no virtualenv needed) -- **PyYAML** — for parsing `coverage_justifications.yaml` (available via pip or system package) -- **llvm-profdata** — for merging profraw/profdata files (from LLVM toolchain) -- **llvm-cov** — for generating HTML, LCOV, and text reports (from LLVM toolchain) +For full tool documentation see the +[score_cpp_policies coverage README](https://github.com/eclipse-score/score_cpp_policies/blob/main/coverage/README.md). diff --git a/quality/coverage/llvm_cov/effective_coverage.py b/quality/coverage/llvm_cov/effective_coverage.py deleted file mode 100644 index 93d77e7a4..000000000 --- a/quality/coverage/llvm_cov/effective_coverage.py +++ /dev/null @@ -1,737 +0,0 @@ -#!/usr/bin/env python3 -# ******************************************************************************* -# Copyright (c) 2026 Contributors to the Eclipse Foundation -# -# See the NOTICE file(s) distributed with this work for additional -# information regarding copyright ownership. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0 -# -# SPDX-License-Identifier: Apache-2.0 -# ******************************************************************************* -"""Effective coverage calculator and HTML post-processor. - -Takes the llvm-cov HTML report and the resolved justification manifest. -Modifies the HTML to show justified lines in a distinct color (yellow/orange) -and calculates effective coverage metrics. - -Usage: - python effective_coverage.py --html-dir --manifest --output -""" - -import argparse -import json -import os -import re -import sys -from pathlib import Path -from typing import Any, Dict, List, Tuple - - -# Pattern to match a table row in llvm-cov HTML source pages -# Format: ......... -LINE_NUMBER_RE = re.compile(r"") -COVERED_LINE_TD_RE = re.compile(r"") - - -def main() -> None: - """Main entry point.""" - args = parse_args() - - # Load the justification manifest - manifest = load_manifest(args.manifest) - justified_files = manifest.get("justified_files", {}) - - # Find all source HTML files in the report - html_dir = args.html_dir - if not html_dir.exists(): - print(f"ERROR: HTML report directory not found: {html_dir}", file=sys.stderr) - sys.exit(1) - - # Parse raw coverage totals from the index page (matches llvm-cov exactly). - totals = parse_index_page_totals(html_dir) - raw_covered, raw_total = totals["lines"] - raw_branch_covered, raw_branch_total = totals["branches"] - - # Process each source HTML file (restyle justified lines + count them) - total_justified = 0 - total_stale = 0 - total_justified_branches = 0 - applied_justifications: List[Dict[str, Any]] = [] - stale_justifications: List[Dict[str, Any]] = [] - # Track per-file justification counts for index page updates - per_file_stats: Dict[str, Dict[str, int]] = {} - - source_html_files = find_source_html_files(html_dir) - for html_file in source_html_files: - rel_source_path = extract_source_path_from_html(html_file, html_dir) - if not rel_source_path: - continue - - file_justifications = find_matching_justifications( - rel_source_path, justified_files - ) - - file_stats = process_html_file( - html_file, file_justifications, applied_justifications, stale_justifications - ) - - total_justified += file_stats["justified"] - total_stale += file_stats["stale"] - total_justified_branches += file_stats["justified_branches"] - - if file_stats["justified"] > 0 or file_stats["justified_branches"] > 0: - per_file_stats[rel_source_path] = file_stats - - # Calculate stats using llvm-cov's exact numbers - raw_uncovered = raw_total - raw_covered - unjustified_uncovered = raw_uncovered - total_justified - - effective_branch_covered = raw_branch_covered + total_justified_branches - - stats = { - "total_instrumented_lines": raw_total, - "covered_lines": raw_covered, - "justified_lines": total_justified, - "unjustified_uncovered_lines": max(0, unjustified_uncovered), - "stale_justifications": total_stale, - "raw_line_coverage_pct": round(100.0 * raw_covered / raw_total, 2) if raw_total > 0 else 0.0, - "effective_line_coverage_pct": round( - 100.0 * (raw_covered + total_justified) / raw_total, 2 - ) if raw_total > 0 else 0.0, - "total_branches": raw_branch_total, - "covered_branches": raw_branch_covered, - "justified_branches": total_justified_branches, - "raw_branch_coverage_pct": round(100.0 * raw_branch_covered / raw_branch_total, 2) if raw_branch_total > 0 else 0.0, - "effective_branch_coverage_pct": round( - 100.0 * effective_branch_covered / raw_branch_total, 2 - ) if raw_branch_total > 0 else 0.0, - } - - # Inject CSS for justified lines into style.css - inject_justified_css(html_dir) - - # Update the index page with effective coverage info and per-file stats - update_index_page(html_dir, stats, per_file_stats) - - # Write output report - report = { - "version": 1, - "summary": stats, - "applied_justifications": applied_justifications, - "stale_justifications": stale_justifications, - } - - output_path = Path(args.output) - output_path.parent.mkdir(parents=True, exist_ok=True) - with open(output_path, "w", encoding="utf-8") as f: - json.dump(report, f, indent=2) - - # Write human-readable summary - summary_path = output_path.parent / "summary.txt" - write_summary(summary_path, stats, stale_justifications) - - # Print summary - print( - f"INFO: Effective line coverage: {stats['effective_line_coverage_pct']}% " - f"(raw: {stats['raw_line_coverage_pct']}%, " - f"justified: {stats['justified_lines']} lines, " - f"unjustified uncovered: {stats['unjustified_uncovered_lines']} lines)", - file=sys.stderr, - ) - if stats['justified_branches'] > 0: - print( - f"INFO: Effective branch coverage: {stats['effective_branch_coverage_pct']}% " - f"(raw: {stats['raw_branch_coverage_pct']}%, " - f"justified: {stats['justified_branches']} branches)", - file=sys.stderr, - ) - if stale_justifications: - print( - f"WARNING: {len(stale_justifications)} stale justifications " - f"(lines are actually covered, justification can be removed)", - file=sys.stderr, - ) - - -def process_html_file( - html_file: Path, - justifications: Dict[int, Dict[str, str]], - applied_justifications: List[Dict[str, Any]], - stale_justifications: List[Dict[str, Any]], -) -> Dict[str, int]: - """Process a single source HTML file. Modifies it in-place. - - Restyles justified lines: changes the count cell to show "J" with justified-line - class, and changes red code regions to justified (orange) background. - Also restyles uncovered branches on justified lines. - Only counts justified/stale lines for the justification report — raw coverage - numbers are taken from the index page to match llvm-cov exactly. - """ - file_stats = { - "justified": 0, - "stale": 0, - "justified_branches": 0, - } - - with open(html_file, "r", encoding="utf-8") as f: - content = f.read() - - if not justifications: - return file_stats - - # Determine effective line status (covered if ANY instantiation covers it) - row_pattern = re.compile( - r"
\d+
" - r"" - ) - line_effective_status: Dict[int, str] = {} - for m in row_pattern.finditer(content): - line_num = int(m.group(1)) - line_class = m.group(2) - if line_class == "covered-line": - line_effective_status[line_num] = "covered" - elif line_class == "uncovered-line": - if line_num not in line_effective_status: - line_effective_status[line_num] = "uncovered" - - # Determine which lines have truly uncovered branches (never covered in any instantiation). - # A branch direction is "truly uncovered" if no instantiation covers it. - branch_check_pattern = re.compile( - r"Branch \(" - r"(\d+:\d+)\):\s*\[(.*?)\]" - ) - covered_branch_dirs_check: Dict[str, set] = {} # branch_id → set of covered directions - uncovered_branch_dirs_check: Dict[str, set] = {} # branch_id → set of uncovered directions - branch_line_map: Dict[str, int] = {} # branch_id → line_num - - for m in branch_check_pattern.finditer(content): - line_num = int(m.group(1)) - branch_id = m.group(2) - branch_content = m.group(3) - branch_line_map[branch_id] = line_num - if branch_id not in covered_branch_dirs_check: - covered_branch_dirs_check[branch_id] = set() - uncovered_branch_dirs_check[branch_id] = set() - for direction in ("True", "False"): - if f"class='None'>{direction}" in branch_content: - covered_branch_dirs_check[branch_id].add(direction) - if f"class='red branch'>{direction}" in branch_content: - uncovered_branch_dirs_check[branch_id].add(direction) - - # Lines with truly uncovered branches (uncovered in ALL instantiations) - lines_with_uncovered_branches: set = set() - for branch_id, uncov_dirs in uncovered_branch_dirs_check.items(): - cov_dirs = covered_branch_dirs_check.get(branch_id, set()) - truly_uncovered = uncov_dirs - cov_dirs - if truly_uncovered: - lines_with_uncovered_branches.add(branch_line_map[branch_id]) - - # Determine which justified lines are stale vs applicable. - # A justification is stale only if the line is covered AND has no uncovered branches. - for line_num, justification in justifications.items(): - status = line_effective_status.get(line_num) - has_uncovered_branches = line_num in lines_with_uncovered_branches - if status == "covered" and not has_uncovered_branches: - file_stats["stale"] += 1 - stale_justifications.append({ - "file": html_file.stem, - "line": line_num, - "id": justification.get("id", ""), - "reason": "Line is already covered and has no uncovered branches — justification is stale", - }) - elif status == "uncovered": - file_stats["justified"] += 1 - applied_justifications.append({ - "file": html_file.stem, - "line": line_num, - "id": justification.get("id", ""), - "category": justification.get("category", ""), - }) - elif status == "covered" and has_uncovered_branches: - # Line is covered but has uncovered branches — justification applies to branches only - applied_justifications.append({ - "file": html_file.stem, - "line": line_num, - "id": justification.get("id", ""), - "category": justification.get("category", ""), - }) - - # Restyle justified lines in the HTML (all occurrences including instantiations). - # Full row pattern to capture and replace the entire row: - # ...
0
...
... - full_row_pattern = re.compile( - r"(
\d+
)" - r"(
)\d+(
)" - r"(
)(.*?)(
)" - ) - - modified = False - - def replace_full_row(match: re.Match) -> str: - nonlocal modified - line_num = int(match.group(2)) - if line_num not in justifications: - return match.group(0) - - justification = justifications[line_num] - reason = justification.get("reason", "").replace("'", "'").replace('"', """) - jid = justification.get("id", "") - tooltip = f"Justified [{jid}]: {reason}" - modified = True - - # Rebuild the row with justified styling: - # 1. Line number td (unchanged) - line_td = match.group(1) - # 2. Count td: change class and show "J" instead of "0" - count_td = f"
J{match.group(4)}"
-        # 3. Code td: replace 'region red' spans with 'region justified'
-        code_start = match.group(5)
-        code_content = match.group(6).replace("class='region red'", "class='region justified'")
-        code_end = match.group(7)
-
-        return line_td + count_td + code_start + code_content + code_end
-
-    new_content = full_row_pattern.sub(replace_full_row, content)
-
-    # Restyle branches on justified lines.
-    # Branch format in expansion-view:
-    # Branch (195:17):
-    #   [True: 0, ...]
-    # We find branches at justified line numbers and restyle red branch → justified branch
-    # Counting: A branch direction is "uncovered" only if ALL instantiations show it as red.
-    # (Same as llvm-cov's logic: covered if ANY instantiation covers it.)
-    branch_pattern = re.compile(
-        r"(Branch \("
-        r"(\d+:\d+)\):\s*\[)(.*?\])"
-    )
-
-    # First pass: determine which branch directions are covered in any instantiation
-    covered_branch_dirs: set = set()  # (line:col, direction) that are covered somewhere
-    for m in branch_pattern.finditer(new_content):
-        line_num = int(m.group(2))
-        if line_num not in justifications:
-            continue
-        branch_id = m.group(3)
-        branch_content = m.group(4)
-        # A direction is covered if it does NOT have 'red branch' class
-        for direction in ("True", "False"):
-            # Check if this direction appears as covered (class='None' means covered)
-            covered_marker = f"class='None'>{direction}"
-            if covered_marker in branch_content:
-                covered_branch_dirs.add((branch_id, direction))
-
-    # Second pass: restyle and count only truly uncovered branch directions
-    justified_branch_ids: set = set()  # Track unique uncovered (line:col, direction) pairs
-
-    def replace_branch(match: re.Match) -> str:
-        nonlocal modified
-        line_num = int(match.group(2))
-        if line_num not in justifications:
-            return match.group(0)
-
-        branch_content = match.group(4)
-        if "class='red branch'" not in branch_content:
-            return match.group(0)
-
-        modified = True
-        branch_id = match.group(3)  # e.g. "68:13"
-
-        # Count unique uncovered branch directions that are NEVER covered in any instantiation
-        for direction in ("True", "False"):
-            if f"class='red branch'>{direction}" in branch_content:
-                uid = (branch_id, direction)
-                if uid not in covered_branch_dirs and uid not in justified_branch_ids:
-                    justified_branch_ids.add(uid)
-                    file_stats["justified_branches"] += 1
-
-        # Restyle: red branch → justified-branch, uncovered-line → justified-line
-        branch_content = branch_content.replace(
-            "class='red branch'", "class='justified-branch'"
-        )
-        branch_content = branch_content.replace(
-            "class='uncovered-line'", "class='justified-line'"
-        )
-        return match.group(1) + branch_content
-
-    new_content = branch_pattern.sub(replace_branch, new_content)
-
-    if modified:
-        with open(html_file, "w", encoding="utf-8") as f:
-            f.write(new_content)
-
-    return file_stats
-
-
-def parse_index_page_totals(html_dir: Path) -> Dict[str, Tuple[int, int]]:
-    """Parse the TOTALS row from the llvm-cov index.html to get exact coverage numbers.
-
-    Returns dict with 'lines' and 'branches' keys, each (covered, total).
-    The index page TOTALS row format: "93.55% (17565/18777)" — func, line, branch.
-    """
-    index_file = html_dir / "index.html"
-    if not index_file.exists():
-        return {"lines": (0, 0), "branches": (0, 0)}
-
-    with open(index_file, "r", encoding="utf-8") as f:
-        content = f.read()
-
-    pct_pattern = re.compile(r"(\d+\.\d+)%\s*\((\d+)/(\d+)\)")
-    matches = pct_pattern.findall(content)
-
-    result = {"lines": (0, 0), "branches": (0, 0)}
-
-    if len(matches) >= 3:
-        # Last 3 matches are from TOTALS row: func, line, branch
-        totals_matches = matches[-3:]
-        _, line_covered, line_total = totals_matches[1]
-        result["lines"] = (int(line_covered), int(line_total))
-        _, branch_covered, branch_total = totals_matches[2]
-        result["branches"] = (int(branch_covered), int(branch_total))
-
-    if result["lines"] == (0, 0):
-        print("WARNING: Could not parse coverage totals from index.html", file=sys.stderr)
-
-    return result
-
-
-def inject_justified_css(html_dir: Path) -> None:
-    """Add CSS for justified lines to style.css."""
-    style_file = html_dir / "style.css"
-    if not style_file.exists():
-        return
-
-    justified_css = """
-/* Coverage justification styling */
-.justified-line {
-  text-align: right;
-  color: #a60;
-}
-.region.justified {
-  background-color: #fa04;
-}
-.justified-branch {
-  color: #a60;
-  font-weight: bold;
-}
-tr:has(> td.justified-line) > td.code {
-  background-color: #fff3e0;
-}
-@media (prefers-color-scheme: dark) {
-  .justified-line {
-    color: #fa0;
-  }
-  .justified-branch {
-    color: #fa0;
-  }
-  tr:has(> td.justified-line) > td.code {
-    background-color: #3d2800;
-  }
-  .region.justified {
-    background-color: #fa03;
-  }
-}
-"""
-
-    with open(style_file, "a", encoding="utf-8") as f:
-        f.write(justified_css)
-
-
-def update_index_page(html_dir: Path, stats: Dict[str, Any], per_file_stats: Dict[str, Dict[str, int]]) -> None:
-    """Update the index page with effective coverage info and per-file adjusted percentages."""
-    index_file = html_dir / "index.html"
-    if not index_file.exists():
-        return
-
-    with open(index_file, "r", encoding="utf-8") as f:
-        content = f.read()
-
-    # Banner with overall effective coverage (lines + branches)
-    branch_info = ""
-    if stats.get("justified_branches", 0) > 0:
-        branch_info = (
-            f" | Effective Branch Coverage: {stats['effective_branch_coverage_pct']}%"
-            f" (Raw: {stats['raw_branch_coverage_pct']}%, Justified: {stats['justified_branches']} branches)"
-        )
-
-    banner = (
-        f"
" - f"Effective Line Coverage: {stats['effective_line_coverage_pct']}% " - f"(Raw: {stats['raw_line_coverage_pct']}% | " - f"Justified: {stats['justified_lines']} lines | " - f"Unjustified Uncovered: {stats['unjustified_uncovered_lines']} lines)" - f"{branch_info}" - f"
" - ) - - # Insert after the tag or after the first

- if "

" in content: - content = content.replace("

", banner + "

", 1) - else: - content = content.replace("", f"{banner}", 1) - - # Update per-file rows in the index table. - # For each file with justifications, find its row and update line% and branch% cells. - # Row format:
displayname
- #
  XX.XX% (covered/total)
← function - #
  XX.XX% (covered/total)
← line - #
  XX.XX% (covered/total)
← branch - # - pct_cell_pattern = re.compile( - r"
\s*(\d+\.\d+)%\s*\((\d+)/(\d+)\)
" - ) - - for file_path, fstats in per_file_stats.items(): - justified_lines = fstats.get("justified", 0) - justified_branches = fstats.get("justified_branches", 0) - if justified_lines == 0 and justified_branches == 0: - continue - - # Find the row for this file in the index page - # The href contains the full path to the HTML file - if file_path not in content: - continue - - # Find the containing this file path - file_idx = content.find(file_path) - if file_idx < 0: - continue - row_start = content.rfind("", file_idx) - if row_start < 0 or row_end < 0: - continue - - row = content[row_start:row_end + 5] - - # Find all percentage cells in this row (func, line, branch) - cells = list(pct_cell_pattern.finditer(row)) - if len(cells) < 2: - continue - - new_row = row - # Update line coverage cell (second cell, index 1) - if justified_lines > 0 and len(cells) >= 2: - line_cell = cells[1] - covered = int(line_cell.group(3)) - total = int(line_cell.group(4)) - eff_covered = covered + justified_lines - eff_pct = round(100.0 * eff_covered / total, 2) if total > 0 else 0.0 - color = _get_coverage_color(eff_pct) - old_cell = line_cell.group(0) - new_cell = ( - f"
"
-                f"{eff_pct:>7.2f}% ({eff_covered}/{total})
" - ) - new_row = new_row.replace(old_cell, new_cell) - - # Update branch coverage cell (third cell, index 2) - if justified_branches > 0 and len(cells) >= 3: - branch_cell = cells[2] - covered = int(branch_cell.group(3)) - total = int(branch_cell.group(4)) - eff_covered = covered + justified_branches - eff_pct = round(100.0 * eff_covered / total, 2) if total > 0 else 0.0 - color = _get_coverage_color(eff_pct) - old_cell = branch_cell.group(0) - new_cell = ( - f"
"
-                f"{eff_pct:>7.2f}% ({eff_covered}/{total})
" - ) - new_row = new_row.replace(old_cell, new_cell) - - if new_row != row: - content = content.replace(row, new_row) - - # Update the TOTALS row - content = _update_totals_row(content, stats) - - with open(index_file, "w", encoding="utf-8") as f: - f.write(content) - - -def _get_coverage_color(pct: float) -> str: - """Return the llvm-cov color class for a coverage percentage.""" - if pct >= 100.0: - return "green" - elif pct >= 80.0: - return "yellow" - else: - return "red" - - -def _update_totals_row(content: str, stats: Dict[str, Any]) -> str: - """Update the TOTALS row in the index page with effective coverage numbers.""" - # Find the TOTALS row — it's the last row before - totals_idx = content.rfind("Totals") - if totals_idx < 0: - return content - - row_start = content.rfind("", totals_idx) - if row_start < 0 or row_end < 0: - return content - - row = content[row_start:row_end + 5] - - pct_cell_pattern = re.compile( - r"
\s*(\d+\.\d+)%\s*\((\d+)/(\d+)\)
" - ) - cells = list(pct_cell_pattern.finditer(row)) - - new_row = row - - # Update line coverage in totals (index 1) - if len(cells) >= 2 and stats.get("justified_lines", 0) > 0: - line_cell = cells[1] - eff_covered = stats["covered_lines"] + stats["justified_lines"] - total = stats["total_instrumented_lines"] - eff_pct = stats["effective_line_coverage_pct"] - color = _get_coverage_color(eff_pct) - old_cell = line_cell.group(0) - new_cell = ( - f"
"
-            f"{eff_pct:>7.2f}% ({eff_covered}/{total})
" - ) - new_row = new_row.replace(old_cell, new_cell) - - # Update branch coverage in totals (index 2) - if len(cells) >= 3 and stats.get("justified_branches", 0) > 0: - branch_cell = cells[2] - eff_covered = stats["covered_branches"] + stats["justified_branches"] - total = stats["total_branches"] - eff_pct = stats["effective_branch_coverage_pct"] - color = _get_coverage_color(eff_pct) - old_cell = branch_cell.group(0) - new_cell = ( - f"
"
-            f"{eff_pct:>7.2f}% ({eff_covered}/{total})
" - ) - new_row = new_row.replace(old_cell, new_cell) - - if new_row != row: - content = content.replace(row, new_row) - - return content - - -def find_source_html_files(html_dir: Path) -> List[Path]: - """Find all per-source HTML files (not index.html, style.css, etc.).""" - coverage_dir = html_dir / "coverage" - if not coverage_dir.exists(): - # Some llvm-cov versions put source files directly in html_dir - coverage_dir = html_dir - - files = [] - for html_file in coverage_dir.rglob("*.html"): - if html_file.name in ("index.html",): - continue - files.append(html_file) - return sorted(files) - - -def extract_source_path_from_html(html_file: Path, html_dir: Path) -> str: - """Extract the relative source file path from the HTML file path. - - llvm-cov creates paths like: html_report/coverage/.html - We need to extract the relative path within the project. - """ - rel = str(html_file.relative_to(html_dir)) - # Remove "coverage/" prefix if present - if rel.startswith("coverage/"): - rel = rel[len("coverage/"):] - # Remove .html suffix - if rel.endswith(".html"): - rel = rel[:-5] - return rel - - -def find_matching_justifications( - source_path: str, justified_files: Dict[str, Dict[str, Dict[str, str]]] -) -> Dict[int, Dict[str, str]]: - """Find justifications that match the given source path. - - The source_path from HTML may be an absolute path or relative. - The justified_files keys are relative to source root. - We match by suffix. - """ - result: Dict[int, Dict[str, str]] = {} - - for justified_path, line_justifications in justified_files.items(): - # Match if the source_path ends with the justified_path - if source_path.endswith(justified_path) or justified_path.endswith(source_path): - for line_str, justification in line_justifications.items(): - result[int(line_str)] = justification - - return result - - -def write_summary( - path: Path, stats: Dict[str, Any], stale: List[Dict[str, Any]] -) -> None: - """Write human-readable summary.""" - with open(path, "w", encoding="utf-8") as f: - f.write("Coverage Justification Summary\n") - f.write("=" * 40 + "\n\n") - f.write(f"Total instrumented lines: {stats['total_instrumented_lines']}\n") - f.write(f"Covered lines: {stats['covered_lines']}\n") - f.write(f"Justified lines: {stats['justified_lines']}\n") - f.write(f"Unjustified uncovered: {stats['unjustified_uncovered_lines']}\n") - f.write(f"\n") - f.write(f"Raw line coverage: {stats['raw_line_coverage_pct']}%\n") - f.write(f"Effective line coverage: {stats['effective_line_coverage_pct']}%\n") - f.write(f"\n") - if stats.get("total_branches", 0) > 0: - f.write(f"Total branches: {stats['total_branches']}\n") - f.write(f"Covered branches: {stats['covered_branches']}\n") - f.write(f"Justified branches: {stats['justified_branches']}\n") - f.write(f"Raw branch coverage: {stats['raw_branch_coverage_pct']}%\n") - f.write(f"Effective branch coverage: {stats['effective_branch_coverage_pct']}%\n") - f.write(f"\n") - if stale: - f.write(f"Stale justifications ({len(stale)}):\n") - for s in stale: - f.write(f" - {s['file']}:{s['line']} [{s['id']}]\n") - f.write("\n") - - -def load_manifest(path: Path) -> Dict[str, Any]: - """Load the justification manifest JSON.""" - if not path.exists(): - print(f"ERROR: Manifest not found: {path}", file=sys.stderr) - sys.exit(1) - with open(path, "r", encoding="utf-8") as f: - return json.load(f) - - -def parse_args() -> argparse.Namespace: - """Parse command-line arguments.""" - parser = argparse.ArgumentParser( - description="Effective coverage calculator and HTML post-processor" - ) - parser.add_argument( - "--html-dir", - type=Path, - required=True, - help="Path to llvm-cov HTML report directory", - ) - parser.add_argument( - "--manifest", - type=Path, - required=True, - help="Path to resolved justification manifest (from justify.py)", - ) - parser.add_argument( - "--output", - type=Path, - required=True, - help="Output path for justification report (JSON)", - ) - return parser.parse_args() - - -if __name__ == "__main__": - main() diff --git a/quality/coverage/llvm_cov/filter_regexes.txt b/quality/coverage/llvm_cov/filter_regexes.txt deleted file mode 100644 index c8ce6427c..000000000 --- a/quality/coverage/llvm_cov/filter_regexes.txt +++ /dev/null @@ -1,20 +0,0 @@ -# Coverage filter regexes (one per line). -# Lines matching any of these patterns are excluded from the coverage report -# via llvm-cov's --ignore-filename-regex option. -# -# NOTE: --experimental_use_llvm_covmap causes Bazel to instrument ALL targets -# regardless of --instrumentation_filter. Therefore, filtering MUST happen here -# at the report level. - -# Exclude mock files. -.*_mock.*\.(h|hpp|cpp)$ - -# Exclude external dependencies (anything under external/). -external/.* - -# Exclude test files and test directories. -.*_test\.(cpp|h|hpp)$ -.*/test/.* - -# Exclude performance benchmarks. -.*/performance_benchmarks/.* diff --git a/quality/coverage/llvm_cov/justify.py b/quality/coverage/llvm_cov/justify.py deleted file mode 100644 index 8e37292b1..000000000 --- a/quality/coverage/llvm_cov/justify.py +++ /dev/null @@ -1,402 +0,0 @@ -#!/usr/bin/env python3 -# ******************************************************************************* -# Copyright (c) 2026 Contributors to the Eclipse Foundation -# -# See the NOTICE file(s) distributed with this work for additional -# information regarding copyright ownership. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0 -# -# SPDX-License-Identifier: Apache-2.0 -# ******************************************************************************* -"""Coverage justification processor. - -Parses the YAML justification database and source files for COV_JUSTIFIED markers. -Resolves all justified lines and produces a manifest mapping file:line → justification. - -Usage: - python justify.py --yaml --source-root --output - -Supports two ways to specify justified lines: -1. YAML locations: directly specify file + line ranges in the YAML -2. In-code markers: COV_JUSTIFIED , COV_JUSTIFIED_START / COV_JUSTIFIED_STOP -""" - -import argparse -import json -import re -import sys -from pathlib import Path -from typing import Any, Dict, List, Set, Tuple - -import yaml - - -# Marker patterns -COV_JUSTIFIED_LINE_RE = re.compile(r"COV_JUSTIFIED\s+([\w-]+)") -COV_JUSTIFIED_START_RE = re.compile(r"COV_JUSTIFIED_START\s+([\w-]+)") -COV_JUSTIFIED_STOP_RE = re.compile(r"COV_JUSTIFIED_STOP") - -VALID_CATEGORIES = { - "defensive_programming", - "tool_false_positive", - "platform_specific", - "other", -} - - -def main() -> None: - """Main entry point.""" - args = parse_args() - - justifications_data = load_yaml(args.yaml) - validate_yaml(justifications_data) - - # Build lookup: id -> justification entry - justifications_by_id: Dict[str, Dict[str, Any]] = {} - for entry in justifications_data.get("justifications", []): - justifications_by_id[entry["id"]] = entry - - # Resolve all justified lines - resolved: Dict[str, Dict[int, Dict[str, str]]] = {} - warnings: List[str] = [] - errors: List[str] = [] - - # 1. Process YAML direct locations - for entry in justifications_data.get("justifications", []): - for location in entry.get("locations", []): - file_path = location["file"] - full_path = Path(args.source_root) / file_path - - if not full_path.exists(): - errors.append( - f"File not found for justification '{entry['id']}': {file_path}" - ) - continue - - lines = resolve_location_lines(location) - if file_path not in resolved: - resolved[file_path] = {} - for line in lines: - resolved[file_path][line] = { - "id": entry["id"], - "category": entry["category"], - "reason": entry["reason"].strip(), - } - - # 2. Scan source files for in-code COV_JUSTIFIED markers - source_files = collect_source_files(args.source_root, args.file_filter) - for source_file in source_files: - rel_path = str(source_file.relative_to(args.source_root)) - scan_warnings, scan_lines = scan_file_for_markers( - source_file, rel_path, justifications_by_id - ) - warnings.extend(scan_warnings) - - if scan_lines: - if rel_path not in resolved: - resolved[rel_path] = {} - for line_num, justification_info in scan_lines.items(): - resolved[rel_path][line_num] = justification_info - - # Output manifest - manifest = { - "version": 1, - "source_root": str(args.source_root), - "justified_files": { - filepath: {str(k): v for k, v in lines.items()} - for filepath, lines in sorted(resolved.items()) - }, - "warnings": warnings, - "errors": errors, - } - - output_path = Path(args.output) - output_path.parent.mkdir(parents=True, exist_ok=True) - with open(output_path, "w", encoding="utf-8") as f: - json.dump(manifest, f, indent=2) - - # Print diagnostics - total_justified_lines = sum(len(lines) for lines in resolved.values()) - print( - f"INFO: Resolved {total_justified_lines} justified lines across " - f"{len(resolved)} files.", - file=sys.stderr, - ) - if warnings: - for w in warnings: - print(f"WARNING: {w}", file=sys.stderr) - if errors: - for e in errors: - print(f"ERROR: {e}", file=sys.stderr) - sys.exit(1) - - -def resolve_location_lines(location: Dict[str, Any]) -> List[int]: - """Resolve line numbers from a YAML location entry.""" - if "lines" in location: - return location["lines"] - elif "line_start" in location and "line_end" in location: - return list(range(location["line_start"], location["line_end"] + 1)) - elif "line" in location: - return [location["line"]] - return [] - - -def scan_file_for_markers( - file_path: Path, - rel_path: str, - justifications_by_id: Dict[str, Dict[str, Any]], -) -> Tuple[List[str], Dict[int, Dict[str, str]]]: - """Scan a source file for COV_JUSTIFIED markers.""" - warnings = [] - justified_lines: Dict[int, Dict[str, str]] = {} - - try: - with open(file_path, "r", encoding="utf-8", errors="replace") as f: - lines = f.readlines() - except (IOError, OSError): - return warnings, justified_lines - - region_stack: List[Tuple[int, str]] = [] # (start_line, justification_id) - - for line_num, line in enumerate(lines, start=1): - # Check for COV_JUSTIFIED_START - start_match = COV_JUSTIFIED_START_RE.search(line) - if start_match: - jid = start_match.group(1) - if jid not in justifications_by_id: - warnings.append( - f"{rel_path}:{line_num}: COV_JUSTIFIED_START references " - f"unknown ID '{jid}'" - ) - else: - region_stack.append((line_num, jid)) - continue - - # Check for COV_JUSTIFIED_STOP - stop_match = COV_JUSTIFIED_STOP_RE.search(line) - if stop_match: - if not region_stack: - warnings.append( - f"{rel_path}:{line_num}: COV_JUSTIFIED_STOP without matching START" - ) - else: - start_line, jid = region_stack.pop() - if jid in justifications_by_id: - entry = justifications_by_id[jid] - for ln in range(start_line + 1, line_num): - justified_lines[ln] = { - "id": jid, - "category": entry["category"], - "reason": entry["reason"].strip(), - } - continue - - # Check for single-line COV_JUSTIFIED (but not START/STOP) - if "COV_JUSTIFIED_START" not in line and "COV_JUSTIFIED_STOP" not in line: - line_match = COV_JUSTIFIED_LINE_RE.search(line) - if line_match: - jid = line_match.group(1) - if jid not in justifications_by_id: - warnings.append( - f"{rel_path}:{line_num}: COV_JUSTIFIED references " - f"unknown ID '{jid}'" - ) - else: - entry = justifications_by_id[jid] - justified_lines[line_num] = { - "id": jid, - "category": entry["category"], - "reason": entry["reason"].strip(), - } - - # Check for unclosed regions - for start_line, jid in region_stack: - warnings.append( - f"{rel_path}:{start_line}: COV_JUSTIFIED_START '{jid}' without matching STOP" - ) - - return warnings, justified_lines - - -def collect_source_files(source_root: Path, file_filter: str) -> List[Path]: - """Collect source files to scan for markers.""" - extensions = file_filter.split(",") if file_filter else ["cpp", "h", "hpp", "cc"] - files = [] - for ext in extensions: - files.extend(source_root.rglob(f"*.{ext.strip()}")) - return sorted(files) - - -def load_yaml(yaml_path: Path) -> Dict[str, Any]: - """Load YAML justification database.""" - if not yaml_path.exists(): - print(f"ERROR: Justification YAML not found: {yaml_path}", file=sys.stderr) - sys.exit(1) - - with open(yaml_path, "r", encoding="utf-8") as f: - content = f.read() - - return yaml.safe_load(content) - - -def validate_yaml(data: Dict[str, Any]) -> None: - """Validate the justification YAML structure and types.""" - try: - errors = [] - - if not isinstance(data, dict): - print("ERROR: YAML validation: root must be a mapping", file=sys.stderr) - sys.exit(1) - - if "version" not in data: - errors.append("Missing 'version' field") - elif not isinstance(data["version"], int): - errors.append(f"'version' must be an integer, got {type(data['version']).__name__}") - - if "justifications" not in data: - errors.append("Missing 'justifications' field") - for e in errors: - print(f"ERROR: {e}", file=sys.stderr) - sys.exit(1) - - if not isinstance(data["justifications"], list): - errors.append( - f"'justifications' must be a list, got {type(data['justifications']).__name__}" - ) - for e in errors: - print(f"ERROR: YAML validation: {e}", file=sys.stderr) - sys.exit(1) - - seen_ids: Set[str] = set() - for i, entry in enumerate(data["justifications"]): - prefix = f"justifications[{i}]" - - if not isinstance(entry, dict): - errors.append(f"{prefix}: must be a mapping, got {type(entry).__name__}") - continue - - if "id" not in entry: - errors.append(f"{prefix}: missing 'id'") - continue - - jid = entry["id"] - if not isinstance(jid, str): - errors.append(f"{prefix}: 'id' must be a string, got {type(jid).__name__}") - continue - - if jid in seen_ids: - errors.append(f"{prefix}: duplicate ID '{jid}'") - seen_ids.add(jid) - - if not re.match(r"^[a-z0-9]+(-[a-z0-9]+)*$", jid): - errors.append(f"{prefix}: ID '{jid}' must be kebab-case") - - if "category" not in entry: - errors.append(f"{prefix}: missing 'category'") - elif not isinstance(entry["category"], str): - errors.append( - f"{prefix}: 'category' must be a string, " - f"got {type(entry['category']).__name__}" - ) - elif entry["category"] not in VALID_CATEGORIES: - errors.append( - f"{prefix}: invalid category '{entry['category']}'. " - f"Must be one of: {sorted(VALID_CATEGORIES)}" - ) - - if "reason" not in entry: - errors.append(f"{prefix}: missing 'reason'") - elif not isinstance(entry["reason"], str): - errors.append( - f"{prefix}: 'reason' must be a string, " - f"got {type(entry['reason']).__name__}" - ) - elif not entry["reason"].strip(): - errors.append(f"{prefix}: 'reason' must not be empty") - - if "locations" in entry: - if not isinstance(entry["locations"], list): - errors.append( - f"{prefix}: 'locations' must be a list, " - f"got {type(entry['locations']).__name__}" - ) - else: - for j, loc in enumerate(entry["locations"]): - loc_prefix = f"{prefix}.locations[{j}]" - if not isinstance(loc, dict): - errors.append( - f"{loc_prefix}: must be a mapping, " - f"got {type(loc).__name__}" - ) - continue - if "file" not in loc: - errors.append(f"{loc_prefix}: missing 'file'") - elif not isinstance(loc["file"], str): - errors.append( - f"{loc_prefix}: 'file' must be a string, " - f"got {type(loc['file']).__name__}" - ) - for int_field in ("line", "line_start", "line_end"): - if int_field in loc and not isinstance(loc[int_field], int): - errors.append( - f"{loc_prefix}: '{int_field}' must be an integer, " - f"got {type(loc[int_field]).__name__}" - ) - if "lines" in loc: - if not isinstance(loc["lines"], list): - errors.append( - f"{loc_prefix}: 'lines' must be a list, " - f"got {type(loc['lines']).__name__}" - ) - elif not all(isinstance(ln, int) for ln in loc["lines"]): - errors.append( - f"{loc_prefix}: 'lines' must contain only integers" - ) - - if errors: - for e in errors: - print(f"ERROR: YAML validation: {e}", file=sys.stderr) - sys.exit(1) - except Exception as error: - print(f"ERROR: YAML validation: {error}", file=sys.stderr) - sys.exit(1) - - -def parse_args() -> argparse.Namespace: - """Parse command-line arguments.""" - parser = argparse.ArgumentParser( - description="Coverage justification processor" - ) - parser.add_argument( - "--yaml", - type=Path, - required=True, - help="Path to coverage_justifications.yaml", - ) - parser.add_argument( - "--source-root", - type=Path, - required=True, - help="Root directory of source files", - ) - parser.add_argument( - "--output", - type=Path, - required=True, - help="Output path for resolved justification manifest (JSON)", - ) - parser.add_argument( - "--file-filter", - type=str, - default="cpp,h,hpp,cc", - help="Comma-separated file extensions to scan (default: cpp,h,hpp,cc)", - ) - return parser.parse_args() - - -if __name__ == "__main__": - main() diff --git a/quality/coverage/llvm_cov/merger.py b/quality/coverage/llvm_cov/merger.py deleted file mode 100644 index a2e2c195c..000000000 --- a/quality/coverage/llvm_cov/merger.py +++ /dev/null @@ -1,181 +0,0 @@ -#!/usr/bin/env python3 -# ******************************************************************************* -# Copyright (c) 2026 Contributors to the Eclipse Foundation -# -# See the NOTICE file(s) distributed with this work for additional -# information regarding copyright ownership. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0 -# -# SPDX-License-Identifier: Apache-2.0 -# ******************************************************************************* -"""Per-test coverage output generator using llvm-cov. - -This script is invoked by Bazel as the --coverage_output_generator for each test. -It receives profraw files from test execution, merges them into profdata, generates -an HTML coverage report using llvm-cov show, and packages everything into a zip file -that the reporter can later aggregate. - -Expected Bazel interface (from collect_coverage.sh): - --coverage_dir= Directory containing *.profraw files - --output_file= Where to write the output (zip) - --source_file_manifest= File listing instrumented sources and object files - --filter_sources= Source path regexes to exclude (repeatable) - [--sources_to_replace_file=] Optional source mapping file -""" - -import argparse -import json -import os -import subprocess -import sys -import zipfile -from pathlib import Path -from typing import List, Set - - -def main() -> None: - args = parse_args() - - # Get object files from the manifest. - object_files = get_object_files_from_manifest(args.source_file_manifest) - if not object_files: - print("INFO: No instrumented object files found, skipping coverage.", file=sys.stderr) - cleanup_dangling_symlinks(args.coverage_dir) - sys.exit(0) - - # Find profraw files. - profraw_files = sorted(args.coverage_dir.glob("*.profraw")) - if not profraw_files: - print("INFO: No *.profraw files found, skipping coverage.", file=sys.stderr) - cleanup_dangling_symlinks(args.coverage_dir) - sys.exit(0) - - # Merge profraw → profdata. - profdata_dir = args.coverage_dir / "profdata" - profdata_dir.mkdir(exist_ok=True) - profdata_file = profdata_dir / "target.profdata" - - run_command([ - str(os.environ.get("LLVM_PROFDATA")), "merge", - "--sparse", - "--output", str(profdata_file), - ] + [str(f) for f in profraw_files]) - - # Create meta.json with object files for the reporter. - meta_dir = args.coverage_dir / "meta" - meta_dir.mkdir(exist_ok=True) - meta = { - "object_files": [os.path.realpath(f) for f in sorted(object_files)], - } - with open(meta_dir / "meta.json", "w", encoding="utf-8") as f: - json.dump(meta, f) - - # Package into zip at output_file. - create_zip( - root=args.coverage_dir, - directories=[profdata_dir, meta_dir], - output_file=args.output_file, - ) - - # Clean up dangling symlinks in coverage_dir that would cause Bazel tree - # artifact validation to fail (e.g. the 'gcov' symlink created by - # collect_cc_coverage.sh's init_gcov() pointing into the destroyed sandbox). - cleanup_dangling_symlinks(args.coverage_dir) - - target = os.environ.get("TEST_TARGET", "unknown") - print(f"INFO: Coverage merger completed for '{target}'", file=sys.stderr) - - -def cleanup_dangling_symlinks(directory: Path) -> None: - """Remove symlinks in the coverage directory that would become dangling. - - Bazel's tree artifact validation rejects directories containing dangling - symlinks. The 'gcov' symlink created by collect_cc_coverage.sh's init_gcov() - points into the sandbox which is torn down before validation runs. Since we - use llvm-cov directly, this symlink is not needed. - """ - gcov_link = directory / "gcov" - if gcov_link.is_symlink(): - gcov_link.unlink() - - # Also remove any other symlinks pointing into sandbox paths. - for entry in directory.iterdir(): - if entry.is_symlink(): - target = os.readlink(entry) - if "sandbox" in target: - entry.unlink() - - -def get_object_files_from_manifest(source_file_manifest: Path) -> Set[str]: - """Parse the coverage manifest to find instrumented object files.""" - runfiles_dir = Path(os.environ.get("RUNFILES_DIR", "")) / os.environ.get("TEST_WORKSPACE", "_main") - exec_root = Path(os.environ.get("ROOT")) - - object_files = set() - with open(source_file_manifest, encoding="utf-8") as f: - manifests = [line.strip() for line in f.readlines()] - - for manifest in manifests: - if "objects_list.txt" in manifest: - with open(manifest, encoding="utf-8") as f: - for line in f: - obj_path = line.strip() - if not obj_path: - continue - # Try runfiles first, then exec_root. - candidate = runfiles_dir / obj_path - if candidate.exists(): - object_files.add(str(candidate)) - else: - object_files.add(str(exec_root / obj_path)) - - return object_files - - -def run_command(cmd: List[str]) -> subprocess.CompletedProcess: - """Run a command and exit on failure.""" - try: - return subprocess.run( - cmd, - check=True, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - ) - except subprocess.CalledProcessError as e: - print(f"ERROR: Command failed with code {e.returncode}:", file=sys.stderr) - print(f" {' '.join(cmd)}", file=sys.stderr) - if e.stdout: - print(e.stdout, file=sys.stderr) - sys.exit(1) - - -def create_zip(root: Path, directories: List[Path], output_file: Path) -> None: - """Create a zip file from the given directories relative to root.""" - with zipfile.ZipFile(output_file, "w", zipfile.ZIP_DEFLATED) as zf: - for directory in directories: - if not directory.exists(): - continue - for dirpath, _, files in os.walk(directory): - for filename in files: - file_path = Path(dirpath) / filename - arcname = file_path.relative_to(root) - zf.write(file_path, arcname) - - -def parse_args() -> argparse.Namespace: - """Parse command-line arguments matching the Bazel LCOV_MERGER interface.""" - parser = argparse.ArgumentParser(description="LLVM coverage merger for Bazel") - parser.add_argument("--coverage_dir", type=Path, required=True) - parser.add_argument("--output_file", type=Path, required=True) - parser.add_argument("--source_file_manifest", type=Path, required=True) - parser.add_argument("--filter_sources", action="append", default=[]) - parser.add_argument("--sources_to_replace_file", type=str, default=None) - return parser.parse_args() - - -if __name__ == "__main__": - main() diff --git a/quality/coverage/llvm_cov/reporter.py b/quality/coverage/llvm_cov/reporter.py deleted file mode 100644 index fdceed616..000000000 --- a/quality/coverage/llvm_cov/reporter.py +++ /dev/null @@ -1,320 +0,0 @@ -#!/usr/bin/env python3 -# ******************************************************************************* -# Copyright (c) 2026 Contributors to the Eclipse Foundation -# -# See the NOTICE file(s) distributed with this work for additional -# information regarding copyright ownership. -# -# This program and the accompanying materials are made available under the -# terms of the Apache License Version 2.0 which is available at -# https://www.apache.org/licenses/LICENSE-2.0 -# -# SPDX-License-Identifier: Apache-2.0 -# ******************************************************************************* -"""Final coverage report generator using llvm-cov. - -This script is invoked by Bazel as the --coverage_report_generator after all tests -complete. It reads the per-test zip files produced by the merger, merges all profdata -into one, and generates the final combined HTML report. - -Expected Bazel interface: - --reports_file= Text file listing paths to all per-test coverage outputs - --output_file= Where to write the final report (zip) -""" - -import argparse -import json -import os -import subprocess -import sys -import zipfile -from pathlib import Path -from typing import List, Set, Tuple -from python.runfiles import Runfiles - - -def main() -> None: - """Main entry point.""" - args = parse_args() - - # Read the list of per-test report files. - reports = read_reports_file(args.reports_file) - if not reports: - print("INFO: No coverage reports found.", file=sys.stderr) - write_empty_output(args.output_file) - sys.exit(0) - - # Extract profdata and object files from each per-test zip. - valid_profdata_files, valid_object_files = extract_reports(reports) - - if not valid_profdata_files or not valid_object_files: - print("INFO: No valid profdata or object files found.", file=sys.stderr) - write_empty_output(args.output_file) - sys.exit(0) - - # Get llvm tools via runfiles. - r = Runfiles.Create() - llvm_bin_path = Path(r.Rlocation("llvm_toolchain/llvm-cov")) - - # Merge all profdata files. - merged_profdata = Path.cwd() / "merged_coverage.profdata" - run_command([ - r.Rlocation("llvm_toolchain/llvm-profdata"), "merge", - "--sparse", - "--output", str(merged_profdata), - ] + sorted(valid_profdata_files)) - - # Build coverage arguments. - coverage_args = ["--instr-profile", str(merged_profdata)] - for obj in sorted(valid_object_files): - coverage_args.extend(["--object", obj]) - - # Get filter regexes and workspace root. - filter_regexes = load_filter_regexes(r, args.filter_regexes) - workspace_root = args.workspace_root - - common_show_args = { - "llvm_bin_path": llvm_bin_path, - "coverage_args": coverage_args, - "filter_regexes": sorted(filter_regexes), - "workspace_root": workspace_root, - } - - # Generate HTML report. - html_report_dir = Path.cwd() / "html_report" - run_llvm_cov_show( - **common_show_args, - output_format="html", - html_report_dir=html_report_dir, - ) - - # Generate LCOV report (for backward compatibility with dashboards). - lcov_report_dir = Path.cwd() / "lcov_report" - lcov_report_dir.mkdir(exist_ok=True) - lcov_result = run_llvm_cov_export( - llvm_bin_path=llvm_bin_path, - coverage_args=coverage_args, - filter_regexes=sorted(filter_regexes), - workspace_root=workspace_root, - ) - with open(lcov_report_dir / "lcov.dat", "w", encoding="utf-8") as f: - f.write(lcov_result.stdout) - - # Generate text summary. - text_report_dir = Path.cwd() / "text_report" - text_report_dir.mkdir(exist_ok=True) - summary = run_llvm_cov_report( - llvm_bin_path=llvm_bin_path, - coverage_args=coverage_args, - filter_regexes=sorted(filter_regexes), - ) - with open(text_report_dir / "summary.txt", "w", encoding="utf-8") as f: - f.write(summary.stdout) - print(summary.stdout, file=sys.stderr) - - # Package everything into the output zip. - directories = [html_report_dir, lcov_report_dir, text_report_dir] - create_zip( - root=Path.cwd(), - directories=directories, - output_file=args.output_file, - ) - - print(f"INFO: Coverage reporter completed. Output: {args.output_file}", file=sys.stderr) - - -def run_llvm_cov_show( - llvm_bin_path: Path, - coverage_args: List[str], - filter_regexes: List[str], - workspace_root: str, - output_format: str, - html_report_dir: Path = None, -) -> subprocess.CompletedProcess: - """Run llvm-cov show.""" - cmd = [ - str(llvm_bin_path), - "show", - f"--format={output_format}", - f"--path-equivalence=/proc/self/cwd/,{workspace_root}", - f"--compilation-dir={workspace_root}", - "--show-branches=count", - "--show-region-summary=0", - ] - - cxxfilt = llvm_bin_path.parent / "llvm-cxxfilt" - if cxxfilt.exists(): - cmd.append(f"--Xdemangler={cxxfilt}") - - for regex in filter_regexes: - adjusted = regex.replace("/proc/self/cwd/", workspace_root) - cmd.append(f"--ignore-filename-regex={adjusted}") - - if html_report_dir: - cmd.append(f"--output-dir={html_report_dir}") - cmd.append("--coverage-watermark=100,50") - cmd.append("--show-expansions") - - cmd.extend(coverage_args) - return run_command(cmd) - - -def run_llvm_cov_export( - llvm_bin_path: Path, - coverage_args: List[str], - filter_regexes: List[str], - workspace_root: str, -) -> subprocess.CompletedProcess: - """Run llvm-cov export to produce LCOV format.""" - cmd = [ - str(llvm_bin_path), - "export", - "--format=lcov", - f"--path-equivalence=/proc/self/cwd/,{workspace_root}", - f"--compilation-dir={workspace_root}", - ] - - for regex in filter_regexes: - adjusted = regex.replace("/proc/self/cwd/", workspace_root) - cmd.append(f"--ignore-filename-regex={adjusted}") - - cmd.extend(coverage_args) - return run_command(cmd) - - -def run_llvm_cov_report( - llvm_bin_path: Path, - coverage_args: List[str], - filter_regexes: List[str], -) -> subprocess.CompletedProcess: - """Run llvm-cov report for a summary.""" - cmd = [ - str(llvm_bin_path), - "report", - "--summary-only", - "--show-region-summary=0", - "--show-branch-summary=1", - ] - - for regex in filter_regexes: - cmd.append(f"--ignore-filename-regex={regex}") - - cmd.extend(coverage_args) - return run_command(cmd) - - -def extract_reports(reports: List[str]) -> Tuple[Set[str], Set[str]]: - """Extract profdata and object files from per-test zip files.""" - valid_profdata_files = set() - valid_object_files = set() - - for i, report_path in enumerate(reports): - # Skip baseline_coverage files (LCOV format, not our zip). - if "baseline_coverage" in report_path: - continue - - report = Path(report_path) - if not report.exists() or report.stat().st_size == 0: - continue - - # Check if it's a valid zip. - if not zipfile.is_zipfile(report): - continue - - profdata_name = f"coverage_report_{i:08d}.profdata" - - try: - with zipfile.ZipFile(report, "r") as archive: - # Extract meta. - meta_json = archive.read("meta/meta.json") - target_meta = json.loads(meta_json) - - # Extract profdata. - profdata_content = archive.read("profdata/target.profdata") - profdata_path = Path.cwd() / profdata_name - with open(profdata_path, "wb") as f: - f.write(profdata_content) - - valid_profdata_files.add(str(profdata_path)) - - # Collect object files. - for obj in target_meta.get("object_files", []): - if obj and Path(obj).exists(): - valid_object_files.add(os.path.realpath(obj)) - - except (zipfile.BadZipFile, KeyError, json.JSONDecodeError) as e: - print(f"WARNING: Skipping invalid report {report_path}: {e}", file=sys.stderr) - continue - - return valid_profdata_files, valid_object_files - -def read_reports_file(reports_file: Path) -> List[str]: - """Read the reports file listing all per-test coverage outputs.""" - with open(reports_file, encoding="utf-8") as f: - return [line.strip() for line in f if line.strip()] - - -def load_filter_regexes(runfiles: Runfiles, rlocation_path: str) -> List[str]: - """Load filter regexes from filter_regexes.txt via Bazel runfiles.""" - path = runfiles.Rlocation(rlocation_path) - if not path or not Path(path).exists(): - print(f"WARNING: {rlocation_path} not found in runfiles, no source filtering applied", - file=sys.stderr) - return [] - - lines = Path(path).read_text(encoding="utf-8").splitlines() - return [line.strip() for line in lines if line.strip() and not line.strip().startswith("#")] - - -def write_empty_output(output_file: Path) -> None: - """Write an empty file as output when there's nothing to report.""" - with open(output_file, "w", encoding="utf-8") as f: - f.write("") - - -def run_command(cmd: List[str]) -> subprocess.CompletedProcess: - """Run a command and exit on failure.""" - try: - return subprocess.run( - cmd, - check=True, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - ) - except subprocess.CalledProcessError as e: - print(f"ERROR: Command failed with code {e.returncode}:", file=sys.stderr) - print(f" {' '.join(cmd)}", file=sys.stderr) - if e.stdout: - print(e.stdout, file=sys.stderr) - sys.exit(1) - - -def create_zip(root: Path, directories: List[Path], output_file: Path) -> None: - """Create a zip file from the given directories relative to root.""" - with zipfile.ZipFile(output_file, "w", zipfile.ZIP_DEFLATED) as zf: - for directory in directories: - if not directory.exists(): - continue - for dirpath, _, files in os.walk(directory): - for filename in files: - file_path = Path(dirpath) / filename - arcname = file_path.relative_to(root) - zf.write(file_path, arcname) - - -def parse_args() -> argparse.Namespace: - """Parse command-line arguments matching the Bazel coverage_report_generator interface.""" - parser = argparse.ArgumentParser(description="LLVM coverage reporter for Bazel") - parser.add_argument("--output_file", type=Path, required=True) - parser.add_argument("--reports_file", type=Path, required=True) - parser.add_argument("--filter_regexes", type=str, required=True, - help="Rlocation path to the filter regexes file") - parser.add_argument("--workspace_root", type=str, required=True, - help="Real workspace root path for source path mapping") - return parser.parse_args() - - - -if __name__ == "__main__": - main()