From f7aad39a6b0f3bb5f8456b368a4158a1c138eec8 Mon Sep 17 00:00:00 2001 From: "Emrich Oliver (ETAS)" Date: Tue, 23 Jun 2026 08:25:35 +0200 Subject: [PATCH 01/11] Add centralized coverage infrastructure Introduces a reusable coverage toolchain based on llvm-cov: - coverage/merger.py: per-test profraw -> profdata + object file packaging - coverage/reporter.py: cross-test aggregation, HTML/LCOV/text reports - coverage/effective_coverage.py: justification overlay + effective metrics - coverage/justify.py: justification manifest resolution - coverage/defs.bzl: score_coverage_reporter macro for consumer wiring - coverage/coverage.bazelrc: shared coverage flags - coverage/filter_regexes.txt: baseline source exclusions - coverage/generate_coverage_html.sh: convenience entry point Adds an end-to-end example under tests/coverage exercising the pipeline with a small instrumented library, test, justification file, and consumer filter regexes. Known limitation: source files not linked into any cc_test are not yet included in the report (no instrumented object file -> invisible to llvm-cov). --- .gitignore | 4 + MODULE.bazel | 23 + README.md | 14 +- coverage/BUILD.bazel | 90 +++ coverage/README.md | 195 +++++ coverage/coverage.bazelrc | 88 +++ coverage/defs.bzl | 147 ++++ coverage/effective_coverage.py | 753 ++++++++++++++++++++ coverage/filter_regexes.txt | 52 ++ coverage/generate_coverage_html.sh | 203 ++++++ coverage/justify.py | 402 +++++++++++ coverage/merger.py | 197 +++++ coverage/reporter.py | 335 +++++++++ coverage/requirements.in | 1 + coverage/requirements_lock.txt | 83 +++ tests/.bazelrc | 10 + tests/BUILD.bazel | 4 + tests/MODULE.bazel | 9 + tests/coverage/BUILD.bazel | 52 ++ tests/coverage/coverable.cpp | 28 + tests/coverage/coverable.h | 26 + tests/coverage/coverable_test.cpp | 30 + tests/coverage/coverage_filter_regexes.txt | 3 + tests/coverage/coverage_justifications.yaml | 21 + 24 files changed, 2767 insertions(+), 3 deletions(-) create mode 100644 coverage/BUILD.bazel create mode 100644 coverage/README.md create mode 100644 coverage/coverage.bazelrc create mode 100644 coverage/defs.bzl create mode 100644 coverage/effective_coverage.py create mode 100644 coverage/filter_regexes.txt create mode 100755 coverage/generate_coverage_html.sh create mode 100644 coverage/justify.py create mode 100644 coverage/merger.py create mode 100644 coverage/reporter.py create mode 100644 coverage/requirements.in create mode 100644 coverage/requirements_lock.txt create mode 100644 tests/coverage/BUILD.bazel create mode 100644 tests/coverage/coverable.cpp create mode 100644 tests/coverage/coverable.h create mode 100644 tests/coverage/coverable_test.cpp create mode 100644 tests/coverage/coverage_filter_regexes.txt create mode 100644 tests/coverage/coverage_justifications.yaml diff --git a/.gitignore b/.gitignore index 1eeaf4d..9184d1c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,10 @@ bazel-* .bazel-* +# Generated coverage reports (output from `bazel run @score_cpp_policies//coverage:generate_coverage_html`) +cpp_coverage/ +*/cpp_coverage/ + # IDE files .vscode/ .idea/ diff --git a/MODULE.bazel b/MODULE.bazel index dcb06f4..b4e6299 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -17,3 +17,26 @@ bazel_dep(name = "bazel_skylib", version = "1.8.2") bazel_dep(name = "platforms", version = "0.0.10") bazel_dep(name = "rules_cc", version = "0.2.17") bazel_dep(name = "aspect_rules_lint", version = "2.5.0") + +# --------------------------------------------------------------------------- +# Coverage tooling (//coverage/...). Hosts the merger/reporter/justify/ +# effective_coverage Python binaries and the shell driver. Consumers do not +# need rules_python themselves to use //coverage:reporter — it is wired in +# here once for the whole policies module. +# --------------------------------------------------------------------------- +bazel_dep(name = "rules_python", version = "1.8.5") +bazel_dep(name = "rules_shell", version = "0.3.0") + +python = use_extension("@rules_python//python/extensions:python.bzl", "python") +python.toolchain( + python_version = "3.12", + is_default = True, +) + +pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip") +pip.parse( + hub_name = "score_cpp_policies_pip", + python_version = "3.12", + requirements_lock = "//coverage:requirements_lock.txt", +) +use_repo(pip, "score_cpp_policies_pip") diff --git a/README.md b/README.md index a22246d..703b35a 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ # score_cpp_policies Centralized C++ quality tool policies for Eclipse S-CORE, providing sanitizer -configurations and clang-tidy integration reusable across all S-CORE modules -(logging, communication, baselibs, etc.). +configurations, clang-tidy integration and source-based code coverage +reusable across all S-CORE modules (logging, communication, baselibs, etc.). -Planned: clang-format, code coverage policies. +Planned: clang-format. ## What This Provides @@ -15,6 +15,10 @@ Planned: clang-format, code coverage policies. - **Constraint system** — `target_compatible_with` settings for sanitizer-incompatible targets - **`clang_tidy/.clang-tidy`** — centralized default check set (conservative baseline, tailorable per module) - **`clang_tidy/clang_tidy.bazelrc`** — `--config=clang-tidy` bazelrc config consumers can import +- **`//coverage:reporter` + `score_coverage_reporter` macro** — llvm-cov source-based +coverage with a shared baseline of ignore regexes (test/mock/fake/external), pluggable +per-module extensions and an effective-coverage justification post-processor. +See [`coverage/README.md`](coverage/README.md) for the seven-step adoption guide. ## Available Sanitizer Configurations @@ -54,6 +58,10 @@ bazel_dep(name = "score_cpp_policies") Copy [`sanitizers/sanitizers.bazelrc`](sanitizers/sanitizers.bazelrc) into your repository's `.bazelrc`. +### Configure Coverage + +See [`coverage/README.md`](coverage/README.md). + ### Run Tests ```bash diff --git a/coverage/BUILD.bazel b/coverage/BUILD.bazel new file mode 100644 index 0000000..9d59cdb --- /dev/null +++ b/coverage/BUILD.bazel @@ -0,0 +1,90 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +load("@rules_python//python:defs.bzl", "py_binary") +load("@rules_shell//shell:sh_binary.bzl", "sh_binary") + +package(default_visibility = ["//visibility:public"]) + +# Files consumers may reference directly (e.g. .bazelrc imports, custom macros). +exports_files([ + "coverage.bazelrc", + "filter_regexes.txt", + "generate_coverage_html.sh", +]) + +# --------------------------------------------------------------------------- +# Per-test coverage merger. +# Bazel calls this as --coverage_output_generator once per test; it takes +# profraw files + the source-file manifest, merges them with llvm-profdata +# and packages everything (profdata + object-file list) into a zip that the +# reporter later aggregates. +# --------------------------------------------------------------------------- +py_binary( + name = "merger", + srcs = ["merger.py"], +) + +# --------------------------------------------------------------------------- +# Final coverage reporter (HTML / LCOV / text). +# Invoked indirectly via the per-consumer wrapper produced by +# `score_coverage_reporter` (see defs.bzl). Reads per-test zips, merges all +# profdata, then calls llvm-cov to render the combined report. +# +# Note: this target intentionally does NOT carry the llvm-cov / llvm-profdata +# binaries in its `data` — the consumer-side wrapper supplies them by label, +# letting each consumer pick its own llvm_toolchain repository name and +# version (see score_coverage_reporter in coverage/defs.bzl). +# --------------------------------------------------------------------------- +py_binary( + name = "reporter", + srcs = ["reporter.py"], + deps = ["@rules_python//python/runfiles"], +) + +# --------------------------------------------------------------------------- +# Coverage justification processor. +# Reads a YAML database of justifications + scans source for COV_JUSTIFIED +# markers and emits a manifest of (file, line) -> justification. +# --------------------------------------------------------------------------- +py_binary( + name = "justify", + srcs = ["justify.py"], + deps = [ + "@score_cpp_policies_pip//pyyaml", + ], +) + +# --------------------------------------------------------------------------- +# Effective coverage calculator + HTML post-processor. +# Takes the llvm-cov HTML report and the resolved justification manifest; +# rewrites the HTML to highlight justified lines and emits effective +# coverage statistics. +# --------------------------------------------------------------------------- +py_binary( + name = "effective_coverage", + srcs = ["effective_coverage.py"], +) + +# --------------------------------------------------------------------------- +# Generic post-`bazel coverage` driver. Consumers invoke this via +# `bazel run @score_cpp_policies//coverage:generate_coverage_html -- [flags]`. +# --------------------------------------------------------------------------- +sh_binary( + name = "generate_coverage_html", + srcs = ["generate_coverage_html.sh"], + data = [ + ":justify", + ":effective_coverage", + ], +) diff --git a/coverage/README.md b/coverage/README.md new file mode 100644 index 0000000..4b69ad4 --- /dev/null +++ b/coverage/README.md @@ -0,0 +1,195 @@ +# Coverage — adoption guide + +Centralized C++ source-based coverage tooling for Eclipse S-CORE modules, +built on `llvm-cov` source-based coverage. This package provides: + +| Component | What it does | +|---|---| +| `:merger` (py_binary) | Per-test profraw → profdata + object-file packaging. Wired as `--coverage_output_generator` by `coverage.bazelrc`. | +| `:reporter` (py_binary) | Final aggregation: profdata merge + llvm-cov HTML / LCOV / text. Invoked by the per-consumer wrapper produced by `score_coverage_reporter`. | +| `:justify` (py_binary) | Reads a YAML database + `COV_JUSTIFIED` source markers and emits a manifest of justified lines/branches. | +| `:effective_coverage` (py_binary) | Post-processes the llvm-cov HTML to highlight justified lines and compute effective coverage. | +| `:generate_coverage_html` (sh_binary) | One-shot driver: unzip Bazel coverage output, run justification, optional CI archive. | +| `defs.bzl :: score_coverage_reporter` | Macro consumers call to wire the report generator with their own filter regex extensions and llvm tools. | +| `coverage.bazelrc` | Generic `coverage` flags consumers import from their own `.bazelrc`. | +| `filter_regexes.txt` | Baseline `--ignore-filename-regex` set (tests, mocks, fakes, benchmarks, external/). | + +--- + +## Prerequisites + +Your repository must already have: + +1. **A Bzlmod setup** (`MODULE.bazel`). +2. **An `@llvm_toolchain`-style toolchain registered** through + `toolchains_llvm` (or any other source that produces `:llvm-cov` and + `:llvm-profdata` targets). The repository name does *not* have to be + `llvm_toolchain` — you pass the labels to the macro. +3. **A coverage-instrumented C++ toolchain** that matches the `@llvm_toolchain` + above (set via `--extra_toolchains` in your `.bazelrc`). + +--- + +## 1. Depend on `score_cpp_policies` + +```python +# MODULE.bazel +bazel_dep(name = "score_cpp_policies", version = "") +``` + +`rules_python`, `rules_shell` and the `pyyaml` pip hub are pulled in +transitively — you do **not** need to declare them yourself. + +> ⚠️ Add one line to your **root** `BUILD` / `BUILD.bazel` so the macro can +> rlocation-resolve the consumer workspace root at runtime: +> +> ```python +> exports_files(["MODULE.bazel"]) +> ``` + +## 2. Import the generic bazelrc + +```bazelrc +# .bazelrc +import %workspace%/../external/+_repo_rules+score_cpp_policies/coverage/coverage.bazelrc +``` + +Or, more portably, vendor a one-line `coverage.bazelrc` in your repo: + +```bazelrc +# .bazelrc +try-import %workspace%/coverage.bazelrc +``` + +```bazelrc +# coverage.bazelrc (vendored) +import %workspace%/external/+_repo_rules+score_cpp_policies/coverage/coverage.bazelrc +``` + +If your build uses a `local_path_override`, refer to the file by its repo +root path. (The recommended pattern is to copy the file's `import` lines +into your project's `.bazelrc` — there are no hidden flags.) + +## 3. Set your instrumentation filter + +`coverage.bazelrc` deliberately leaves `--instrumentation_filter` empty +because it is module-specific. Add one line in **your** `.bazelrc`: + +```bazelrc +coverage --instrumentation_filter="^//[/:]" +``` + +> 💡 Use `[/:]` (not just `/`) so the top-level package itself +> (e.g. `//mymod:lib`) is included, not just subpackages. + +## 4. Create your reporter wrapper + +Create a small BUILD file (e.g. `tools/coverage/BUILD.bazel`): + +```python +load("@score_cpp_policies//coverage:defs.bzl", "score_coverage_reporter") + +score_coverage_reporter( + name = "reporter_wrapper", + llvm_cov = "@llvm_toolchain//:llvm-cov", + llvm_profdata = "@llvm_toolchain//:llvm-profdata", + # OPTIONAL: extend the baseline ignore regexes with project-specific patterns. + extra_regex_files = [":coverage_filter_regexes.txt"], + visibility = ["//visibility:public"], +) + +exports_files(["coverage_filter_regexes.txt"]) +``` + +Example `tools/coverage/coverage_filter_regexes.txt`: + +```text +# Project-specific exclusions on top of the S-CORE baseline. +.*/generated/.* +.*/proto/.*\.pb\.(h|cc)$ +``` + +## 5. Point Bazel at your wrapper + +```bazelrc +# .bazelrc +coverage --coverage_report_generator=//tools/coverage:reporter_wrapper +``` + +## 6. (Optional) Set up justifications + +Create `tools/coverage/coverage_justifications.yaml`: + +```yaml +version: 1 +justifications: + - id: hw-unreachable-on-x86 + category: platform_specific + reason: | + ARM-only error path; cannot be exercised by x86 CI. + locations: + - file: mymod/src/foo.cpp + line_start: 42 + line_end: 47 +``` + +Or annotate code in place: + +```cpp +// One-liner: +return false; // COV_JUSTIFIED hw-unreachable-on-x86 + +// Region: +// COV_JUSTIFIED_START hw-unreachable-on-x86 +if (running_on_arm()) { ... } +// COV_JUSTIFIED_STOP +``` + +Valid categories: `defensive_programming`, `tool_false_positive`, +`platform_specific`, `other`. IDs must be kebab-case. + +## 7. Run it + +```bash +# Collect coverage data. +bazel coverage //... --build_tests_only + +# Build the HTML report + run justifications (if YAML exists) + show summary. +bazel run @score_cpp_policies//coverage:generate_coverage_html -- \ + --yaml tools/coverage/coverage_justifications.yaml +``` + +The HTML report appears at `cpp_coverage/index.html` by default. The +human-readable summary shows raw vs. effective line/branch coverage. + +For CI, you can also produce a zipped archive (HTML + LCOV + JUnit XMLs): + +```bash +bazel run @score_cpp_policies//coverage:generate_coverage_html -- \ + --yaml tools/coverage/coverage_justifications.yaml \ + --archive coverage_artifacts +``` + +--- + +## Customization knobs + +| Need | How | +|---|---| +| Add project-specific ignore regexes | `extra_regex_files = [":"]` on the macro | +| Different llvm version | Register your own `@my_llvm` and pass `llvm_cov = "@my_llvm//:llvm-cov"` | +| Different output directory | `--output-dir ` on `generate_coverage_html` | +| Different effective coverage threshold | `COVERAGE_THRESHOLD=95 bazel run ...:generate_coverage_html ...` | + +## Troubleshooting + +- **`html_report/ not found`** — re-run `bazel coverage` first; the script + only post-processes existing output. +- **Some `.cpp` files missing from the report** — confirm your + `--instrumentation_filter` covers the top-level package using `[/:]` + (not just `/`). +- **Test / mock files appearing in the report** — add a pattern that + matches their path or filename to your `extra_regex_files` entry. +- **`llvm-cov not found in runfiles`** — the macro arg `llvm_cov` must + point to a real binary target in your repo's repo mapping; the + default `@llvm_toolchain//:llvm-cov` requires `use_repo(llvm, "llvm_toolchain")`. diff --git a/coverage/coverage.bazelrc b/coverage/coverage.bazelrc new file mode 100644 index 0000000..e835b80 --- /dev/null +++ b/coverage/coverage.bazelrc @@ -0,0 +1,88 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +# +# Generic coverage configuration shipped by @score_cpp_policies. +# +# Consumers import this file from their own .bazelrc with: +# +# import %workspace%/path/to/coverage.bazelrc +# +# This file deliberately does NOT set the following — they are consumer- +# specific and must be configured in the consumer's .bazelrc on top of this +# file: +# +# coverage --instrumentation_filter="^//[/:]" +# coverage --coverage_report_generator=//:reporter_wrapper +# +# The reporter_wrapper target must be created with the +# `score_coverage_reporter` macro from @score_cpp_policies//coverage:defs.bzl +# so that consumer-specific filter regex extensions and the consumer +# workspace root are wired in correctly. +# +# Prerequisites the consumer's MODULE.bazel must satisfy: +# - bazel_dep(name = "score_cpp_policies", version = "...") +# - register an llvm toolchain via the toolchains_llvm extension and pass +# the resulting `:llvm-cov` and `:llvm-profdata` labels to the +# `score_coverage_reporter` macro (the repository name is freely chosen +# by the consumer) + +# --------------------------------------------------------------------------- +# Bazel coverage instrumentation flags. +# --------------------------------------------------------------------------- +# `experimental_use_llvm_covmap` switches Bazel to source-based coverage +# (instr profiles) instead of gcov-style notes. +coverage --experimental_use_llvm_covmap +coverage --experimental_generate_llvm_lcov +coverage --combined_report=lcov +coverage --experimental_fetch_all_coverage_outputs + +# --------------------------------------------------------------------------- +# Custom coverage report generators provided by @score_cpp_policies. +# The merger is consumer-agnostic and is used as-is. +# The reporter_wrapper MUST be defined per-consumer via score_coverage_reporter +# and pointed to with `--coverage_report_generator` in the consumer's .bazelrc. +# --------------------------------------------------------------------------- +coverage --coverage_output_generator=@score_cpp_policies//coverage:merger + +# --------------------------------------------------------------------------- +# Test-time environment. +# --------------------------------------------------------------------------- +# Bazel's default collect_cc_coverage.sh would call gcov; we use llvm-cov, so +# stub out gcov and suppress the auto LCOV conversion (raw profraw is what +# the merger consumes). +coverage --test_env=GENERATE_LLVM_LCOV=0 +coverage --test_env=COVERAGE_GCOV_PATH=/usr/bin/true +# Required so that LLVM writes counters incrementally; without it abnormal +# termination paths report as uncovered even when reached. +coverage --test_env=LLVM_PROFILE_CONTINUOUS_MODE=1 + +# --------------------------------------------------------------------------- +# Compile flags required for accurate llvm-cov data. +# --------------------------------------------------------------------------- +# -O0: no optimization, otherwise line/branch mapping becomes unreliable. +coverage --cxxopt=-O0 +# Required for LLVM continuous mode (paired with LLVM_PROFILE_CONTINUOUS_MODE). +coverage --cxxopt=-mllvm +coverage --cxxopt=-runtime-counter-relocation + +# --------------------------------------------------------------------------- +# Disable dynamic libraries — they create per-test .so files whose +# instrumentation can clash with the production .so files. The first object +# loaded wins, leading to flaky / order-dependent coverage gaps. +# --------------------------------------------------------------------------- +coverage --dynamic_mode=off + +# --------------------------------------------------------------------------- +# Always rerun tests in coverage mode (cached results carry no profraw). +# --------------------------------------------------------------------------- +coverage --nocache_test_results diff --git a/coverage/defs.bzl b/coverage/defs.bzl new file mode 100644 index 0000000..193dc78 --- /dev/null +++ b/coverage/defs.bzl @@ -0,0 +1,147 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +"""Public API for the S-CORE centralized coverage report generator. + +Consumers instantiate `score_coverage_reporter` in their own BUILD file to +create the `--coverage_report_generator` target that Bazel will call after +running `bazel coverage`. The macro wires in: + + 1. The S-CORE baseline filter regexes — applied first, on top of which + consumer-specific exclusions (`extra_regex_files`) are appended. + 2. The consumer's MODULE.bazel — used at runtime to resolve the real + workspace root for source path mapping in llvm-cov reports. + 3. The shared reporter binary `@score_cpp_policies//coverage:reporter`, + which performs profdata merge + HTML/LCOV/text report generation. + 4. The consumer-supplied llvm-cov and llvm-profdata binaries — passed by + label so the consumer can pick their own llvm_toolchain version and + repository name. + +Typical usage from a consumer BUILD file: + + load("@score_cpp_policies//coverage:defs.bzl", "score_coverage_reporter") + + score_coverage_reporter( + name = "reporter_wrapper", + llvm_cov = "@llvm_toolchain//:llvm-cov", + llvm_profdata = "@llvm_toolchain//:llvm-profdata", + extra_regex_files = ["coverage_filter_regexes.txt"], + visibility = ["//visibility:public"], + ) + +and from the consumer .bazelrc: + + coverage --coverage_report_generator=//tools/coverage:reporter_wrapper +""" + +load("@rules_shell//shell:sh_binary.bzl", "sh_binary") + +_BASELINE_REGEX = "@score_cpp_policies//coverage:filter_regexes.txt" +_REPORTER = "@score_cpp_policies//coverage:reporter" + +def score_coverage_reporter( + name, + llvm_cov, + llvm_profdata, + extra_regex_files = None, + **kwargs): + """Create a Bazel --coverage_report_generator wrapper for this repository. + + Args: + name: The target name. Reference it as + `--coverage_report_generator=//:` in your + coverage.bazelrc. + llvm_cov: Label of the llvm-cov binary (typically + "@llvm_toolchain//:llvm-cov"). + llvm_profdata: Label of the llvm-profdata binary (typically + "@llvm_toolchain//:llvm-profdata"). + extra_regex_files: Optional list of additional filter-regex file labels + (or strings) to concatenate AFTER the + @score_cpp_policies baseline. Use these to exclude + consumer-specific patterns (e.g. project-only + generator outputs). + **kwargs: Forwarded to the underlying sh_binary (e.g. visibility, tags). + """ + extra_regex_files = extra_regex_files or [] + + merged_name = name + "_merged_filter_regexes" + merged_out = merged_name + ".txt" + wrapper_gen_name = name + "_wrapper_gen" + wrapper_out = name + ".sh" + + # Concatenate baseline regexes + consumer extras into a single file. + # Order is irrelevant for llvm-cov; it treats them as a set. + native.genrule( + name = merged_name, + srcs = [_BASELINE_REGEX] + list(extra_regex_files), + outs = [merged_out], + cmd = "cat $(SRCS) > $@", + ) + + # Generate the wrapper shell script. It computes the consumer workspace + # root from the runfiles location of //:MODULE.bazel and then execs the + # shared reporter binary with the merged regex file, workspace root, and + # consumer-supplied llvm tool rlocation paths. + # + # Escaping note: this genrule uses an unquoted heredoc (`<< EOF`) so the + # shell would normally expand $... — we escape each `$` we want literal + # in the output script as `\\$$`: + # * `$$` is Bazel's escape for a literal `$`. + # * `\` then makes the heredoc treat that `$` as literal. + # `$(rlocationpath ...)` IS a Bazel make-variable and is intentionally + # expanded at genrule time so the actual rlocation path is baked into + # the script. + native.genrule( + name = wrapper_gen_name, + srcs = [ + ":" + merged_name, + "//:MODULE.bazel", + llvm_cov, + llvm_profdata, + ], + outs = [wrapper_out], + tools = [_REPORTER], + cmd = ("""cat > $@ << EOF +#!/usr/bin/env bash +set -euo pipefail +_SELF_DIR="\\$$(cd "\\$$(dirname "\\$$0")" && pwd)" +_SELF_NAME="\\$$(basename "\\$$0")" +if [[ -z "\\$${RUNFILES_DIR:-}" || ! -d "\\$${RUNFILES_DIR}" ]]; then + if [[ -d "\\$${_SELF_DIR}/\\$${_SELF_NAME}.runfiles" ]]; then + export RUNFILES_DIR="\\$${_SELF_DIR}/\\$${_SELF_NAME}.runfiles" + fi +fi +WORKSPACE_ROOT="\\$$(cd "\\$$(dirname "\\$$(readlink -f "\\$${RUNFILES_DIR}/$(rlocationpath //:MODULE.bazel)")")" && pwd)/" +exec "\\$${RUNFILES_DIR}/$(rlocationpath %s)" \\\\ + --filter_regexes="$(rlocationpath :%s)" \\\\ + --workspace_root="\\$${WORKSPACE_ROOT}" \\\\ + --llvm_cov="$(rlocationpath %s)" \\\\ + --llvm_profdata="$(rlocationpath %s)" \\\\ + "\\$$@" +EOF +chmod +x $@ +""" % (_REPORTER, merged_name, llvm_cov, llvm_profdata)), + ) + + sh_binary( + name = name, + srcs = [":" + wrapper_gen_name], + data = [ + ":" + merged_name, + _REPORTER, + "//:MODULE.bazel", + llvm_cov, + llvm_profdata, + ], + **kwargs + ) diff --git a/coverage/effective_coverage.py b/coverage/effective_coverage.py new file mode 100644 index 0000000..f151e54 --- /dev/null +++ b/coverage/effective_coverage.py @@ -0,0 +1,753 @@ +#!/usr/bin/env python3 +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""Effective coverage calculator and HTML post-processor. + +Takes the llvm-cov HTML report and the resolved justification manifest. +Modifies the HTML to show justified lines in a distinct color (yellow/orange) +and calculates effective coverage metrics. + +Usage: + python effective_coverage.py --html-dir --manifest --output +""" + +import argparse +import json +import os +import re +import sys +from pathlib import Path +from typing import Any, Dict, List, Tuple + + +# Pattern to match a table row in llvm-cov HTML source pages +# Format: ......... +LINE_NUMBER_RE = re.compile(r"") +COVERED_LINE_TD_RE = re.compile(r"") + + +def main() -> None: + """Main entry point.""" + args = parse_args() + + # Load the justification manifest + manifest = load_manifest(args.manifest) + justified_files = manifest.get("justified_files", {}) + + # Find all source HTML files in the report + html_dir = args.html_dir + if not html_dir.exists(): + print(f"ERROR: HTML report directory not found: {html_dir}", file=sys.stderr) + sys.exit(1) + + # Parse raw coverage totals from the index page (matches llvm-cov exactly). + totals = parse_index_page_totals(html_dir) + raw_covered, raw_total = totals["lines"] + raw_branch_covered, raw_branch_total = totals["branches"] + + # Process each source HTML file (restyle justified lines + count them) + total_justified = 0 + total_stale = 0 + total_justified_branches = 0 + applied_justifications: List[Dict[str, Any]] = [] + stale_justifications: List[Dict[str, Any]] = [] + # Track per-file justification counts for index page updates + per_file_stats: Dict[str, Dict[str, int]] = {} + + source_html_files = find_source_html_files(html_dir) + for html_file in source_html_files: + rel_source_path = extract_source_path_from_html(html_file, html_dir) + if not rel_source_path: + continue + + file_justifications = find_matching_justifications( + rel_source_path, justified_files + ) + + file_stats = process_html_file( + html_file, file_justifications, applied_justifications, stale_justifications + ) + + total_justified += file_stats["justified"] + total_stale += file_stats["stale"] + total_justified_branches += file_stats["justified_branches"] + + if file_stats["justified"] > 0 or file_stats["justified_branches"] > 0: + per_file_stats[rel_source_path] = file_stats + + # Calculate stats using llvm-cov's exact numbers + raw_uncovered = raw_total - raw_covered + unjustified_uncovered = raw_uncovered - total_justified + + effective_branch_covered = raw_branch_covered + total_justified_branches + + stats = { + "total_instrumented_lines": raw_total, + "covered_lines": raw_covered, + "justified_lines": total_justified, + "unjustified_uncovered_lines": max(0, unjustified_uncovered), + "stale_justifications": total_stale, + "raw_line_coverage_pct": round(100.0 * raw_covered / raw_total, 2) if raw_total > 0 else 0.0, + "effective_line_coverage_pct": round( + 100.0 * (raw_covered + total_justified) / raw_total, 2 + ) if raw_total > 0 else 0.0, + "total_branches": raw_branch_total, + "covered_branches": raw_branch_covered, + "justified_branches": total_justified_branches, + "raw_branch_coverage_pct": round(100.0 * raw_branch_covered / raw_branch_total, 2) if raw_branch_total > 0 else 0.0, + "effective_branch_coverage_pct": round( + 100.0 * effective_branch_covered / raw_branch_total, 2 + ) if raw_branch_total > 0 else 0.0, + } + + # Inject CSS for justified lines into style.css + inject_justified_css(html_dir) + + # Update the index page with effective coverage info and per-file stats + update_index_page(html_dir, stats, per_file_stats) + + # Write output report + report = { + "version": 1, + "summary": stats, + "applied_justifications": applied_justifications, + "stale_justifications": stale_justifications, + } + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w", encoding="utf-8") as f: + json.dump(report, f, indent=2) + + # Write human-readable summary + summary_path = output_path.parent / "summary.txt" + write_summary(summary_path, stats, stale_justifications) + + # Print summary + print( + f"INFO: Effective line coverage: {stats['effective_line_coverage_pct']}% " + f"(raw: {stats['raw_line_coverage_pct']}%, " + f"justified: {stats['justified_lines']} lines, " + f"unjustified uncovered: {stats['unjustified_uncovered_lines']} lines)", + file=sys.stderr, + ) + if stats['justified_branches'] > 0: + print( + f"INFO: Effective branch coverage: {stats['effective_branch_coverage_pct']}% " + f"(raw: {stats['raw_branch_coverage_pct']}%, " + f"justified: {stats['justified_branches']} branches)", + file=sys.stderr, + ) + if stale_justifications: + print( + f"WARNING: {len(stale_justifications)} stale justifications " + f"(lines are actually covered, justification can be removed)", + file=sys.stderr, + ) + + +def process_html_file( + html_file: Path, + justifications: Dict[int, Dict[str, str]], + applied_justifications: List[Dict[str, Any]], + stale_justifications: List[Dict[str, Any]], +) -> Dict[str, int]: + """Process a single source HTML file. Modifies it in-place. + + Restyles justified lines: changes the count cell to show "J" with justified-line + class, and changes red code regions to justified (orange) background. + Also restyles uncovered branches on justified lines. + Only counts justified/stale lines for the justification report — raw coverage + numbers are taken from the index page to match llvm-cov exactly. + """ + file_stats = { + "justified": 0, + "stale": 0, + "justified_branches": 0, + } + + with open(html_file, "r", encoding="utf-8") as f: + content = f.read() + + if not justifications: + return file_stats + + # Determine effective line status (covered if ANY instantiation covers it) + row_pattern = re.compile( + r"
\d+
" + r"" + ) + line_effective_status: Dict[int, str] = {} + for m in row_pattern.finditer(content): + line_num = int(m.group(1)) + line_class = m.group(2) + if line_class == "covered-line": + line_effective_status[line_num] = "covered" + elif line_class == "uncovered-line": + if line_num not in line_effective_status: + line_effective_status[line_num] = "uncovered" + + # Determine which lines have truly uncovered branches (never covered in any instantiation). + # A branch direction is "truly uncovered" if no instantiation covers it. + branch_check_pattern = re.compile( + r"Branch \(" + r"(\d+:\d+)\):\s*\[(.*?)\]" + ) + covered_branch_dirs_check: Dict[str, set] = {} # branch_id → set of covered directions + uncovered_branch_dirs_check: Dict[str, set] = {} # branch_id → set of uncovered directions + branch_line_map: Dict[str, int] = {} # branch_id → line_num + + for m in branch_check_pattern.finditer(content): + line_num = int(m.group(1)) + branch_id = m.group(2) + branch_content = m.group(3) + branch_line_map[branch_id] = line_num + if branch_id not in covered_branch_dirs_check: + covered_branch_dirs_check[branch_id] = set() + uncovered_branch_dirs_check[branch_id] = set() + for direction in ("True", "False"): + if f"class='None'>{direction}" in branch_content: + covered_branch_dirs_check[branch_id].add(direction) + if f"class='red branch'>{direction}" in branch_content: + uncovered_branch_dirs_check[branch_id].add(direction) + + # Lines with truly uncovered branches (uncovered in ALL instantiations) + lines_with_uncovered_branches: set = set() + for branch_id, uncov_dirs in uncovered_branch_dirs_check.items(): + cov_dirs = covered_branch_dirs_check.get(branch_id, set()) + truly_uncovered = uncov_dirs - cov_dirs + if truly_uncovered: + lines_with_uncovered_branches.add(branch_line_map[branch_id]) + + # Determine which justified lines are stale vs applicable. + # A justification is stale only if the line is covered AND has no uncovered branches. + for line_num, justification in justifications.items(): + status = line_effective_status.get(line_num) + has_uncovered_branches = line_num in lines_with_uncovered_branches + if status == "covered" and not has_uncovered_branches: + file_stats["stale"] += 1 + stale_justifications.append({ + "file": html_file.stem, + "line": line_num, + "id": justification.get("id", ""), + "reason": "Line is already covered and has no uncovered branches — justification is stale", + }) + elif status == "uncovered": + file_stats["justified"] += 1 + applied_justifications.append({ + "file": html_file.stem, + "line": line_num, + "id": justification.get("id", ""), + "category": justification.get("category", ""), + }) + elif status == "covered" and has_uncovered_branches: + # Line is covered but has uncovered branches — justification applies to branches only + applied_justifications.append({ + "file": html_file.stem, + "line": line_num, + "id": justification.get("id", ""), + "category": justification.get("category", ""), + }) + + # Restyle justified lines in the HTML (all occurrences including instantiations). + # Full row pattern to capture and replace the entire row: + # ...
0
...
... + full_row_pattern = re.compile( + r"(
\d+
)" + r"(
)\d+(
)" + r"(
)(.*?)(
)" + ) + + modified = False + + def replace_full_row(match: re.Match) -> str: + nonlocal modified + line_num = int(match.group(2)) + if line_num not in justifications: + return match.group(0) + + justification = justifications[line_num] + reason = justification.get("reason", "").replace("'", "'").replace('"', """) + jid = justification.get("id", "") + tooltip = f"Justified [{jid}]: {reason}" + modified = True + + # Rebuild the row with justified styling: + # 1. Line number td (unchanged) + line_td = match.group(1) + # 2. Count td: change class and show "J" instead of "0" + count_td = f"
J{match.group(4)}"
+        # 3. Code td: replace 'region red' spans with 'region justified'
+        code_start = match.group(5)
+        code_content = match.group(6).replace("class='region red'", "class='region justified'")
+        code_end = match.group(7)
+
+        return line_td + count_td + code_start + code_content + code_end
+
+    new_content = full_row_pattern.sub(replace_full_row, content)
+
+    # Restyle branches on justified lines.
+    # Branch format in expansion-view:
+    # Branch (195:17):
+    #   [True: 0, ...]
+    # We find branches at justified line numbers and restyle red branch → justified branch
+    # Counting: A branch direction is "uncovered" only if ALL instantiations show it as red.
+    # (Same as llvm-cov's logic: covered if ANY instantiation covers it.)
+    branch_pattern = re.compile(
+        r"(Branch \("
+        r"(\d+:\d+)\):\s*\[)(.*?\])"
+    )
+
+    # First pass: determine which branch directions are covered in any instantiation
+    covered_branch_dirs: set = set()  # (line:col, direction) that are covered somewhere
+    for m in branch_pattern.finditer(new_content):
+        line_num = int(m.group(2))
+        if line_num not in justifications:
+            continue
+        branch_id = m.group(3)
+        branch_content = m.group(4)
+        # A direction is covered if it does NOT have 'red branch' class
+        for direction in ("True", "False"):
+            # Check if this direction appears as covered (class='None' means covered)
+            covered_marker = f"class='None'>{direction}"
+            if covered_marker in branch_content:
+                covered_branch_dirs.add((branch_id, direction))
+
+    # Second pass: restyle and count only truly uncovered branch directions
+    justified_branch_ids: set = set()  # Track unique uncovered (line:col, direction) pairs
+
+    def replace_branch(match: re.Match) -> str:
+        nonlocal modified
+        line_num = int(match.group(2))
+        if line_num not in justifications:
+            return match.group(0)
+
+        branch_content = match.group(4)
+        if "class='red branch'" not in branch_content:
+            return match.group(0)
+
+        modified = True
+        branch_id = match.group(3)  # e.g. "68:13"
+
+        # Count unique uncovered branch directions that are NEVER covered in any instantiation
+        for direction in ("True", "False"):
+            if f"class='red branch'>{direction}" in branch_content:
+                uid = (branch_id, direction)
+                if uid not in covered_branch_dirs and uid not in justified_branch_ids:
+                    justified_branch_ids.add(uid)
+                    file_stats["justified_branches"] += 1
+
+        # Restyle: red branch → justified-branch, uncovered-line → justified-line
+        branch_content = branch_content.replace(
+            "class='red branch'", "class='justified-branch'"
+        )
+        branch_content = branch_content.replace(
+            "class='uncovered-line'", "class='justified-line'"
+        )
+        return match.group(1) + branch_content
+
+    new_content = branch_pattern.sub(replace_branch, new_content)
+
+    if modified:
+        with open(html_file, "w", encoding="utf-8") as f:
+            f.write(new_content)
+
+    return file_stats
+
+
+def parse_index_page_totals(html_dir: Path) -> Dict[str, Tuple[int, int]]:
+    """Parse the TOTALS row from the llvm-cov index.html to get exact coverage numbers.
+
+    Returns dict with 'lines' and 'branches' keys, each (covered, total).
+    The TOTALS row in llvm-cov HTML is always the last 
+    (or plain last bold row) and contains exactly 3 coverage cells: func, line, branch.
+    We locate the row by the 'Totals' text anchor and extract the 3 cells from it,
+    rather than relying on positional offset from the full-page match list (which
+    breaks when individual file rows also contain matching percent patterns).
+    """
+    index_file = html_dir / "index.html"
+    if not index_file.exists():
+        return {"lines": (0, 0), "branches": (0, 0)}
+
+    with open(index_file, "r", encoding="utf-8") as f:
+        content = f.read()
+
+    result = {"lines": (0, 0), "branches": (0, 0)}
+
+    # Locate the Totals row: llvm-cov emits "
Totals
" as the first cell. + totals_row_match = re.search(r"
Totals
(.*?)(?:|$)", content, re.DOTALL) + if not totals_row_match: + print("WARNING: Could not parse coverage totals from index.html", file=sys.stderr) + return result + + row_fragment = totals_row_match.group(1) + pct_pattern = re.compile(r"(\d+\.\d+)%\s*\((\d+)/(\d+)\)") + cells = pct_pattern.findall(row_fragment) + + # The 3 cells in order are: func, line, branch. + if len(cells) >= 2: + _, line_covered, line_total = cells[1] + result["lines"] = (int(line_covered), int(line_total)) + if len(cells) >= 3: + _, branch_covered, branch_total = cells[2] + result["branches"] = (int(branch_covered), int(branch_total)) + + if result["lines"] == (0, 0): + print("WARNING: Could not parse coverage totals from index.html", file=sys.stderr) + + return result + + +def inject_justified_css(html_dir: Path) -> None: + """Add CSS for justified lines to style.css.""" + style_file = html_dir / "style.css" + if not style_file.exists(): + return + + justified_css = """ +/* Coverage justification styling */ +.justified-line { + text-align: right; + color: #a60; +} +.region.justified { + background-color: #fa04; +} +.justified-branch { + color: #a60; + font-weight: bold; +} +tr:has(> td.justified-line) > td.code { + background-color: #fff3e0; +} +@media (prefers-color-scheme: dark) { + .justified-line { + color: #fa0; + } + .justified-branch { + color: #fa0; + } + tr:has(> td.justified-line) > td.code { + background-color: #3d2800; + } + .region.justified { + background-color: #fa03; + } +} +""" + + with open(style_file, "a", encoding="utf-8") as f: + f.write(justified_css) + + +def update_index_page(html_dir: Path, stats: Dict[str, Any], per_file_stats: Dict[str, Dict[str, int]]) -> None: + """Update the index page with effective coverage info and per-file adjusted percentages.""" + index_file = html_dir / "index.html" + if not index_file.exists(): + return + + with open(index_file, "r", encoding="utf-8") as f: + content = f.read() + + # Banner with overall effective coverage (lines + branches) + branch_info = "" + if stats.get("justified_branches", 0) > 0: + branch_info = ( + f" | Effective Branch Coverage: {stats['effective_branch_coverage_pct']}%" + f" (Raw: {stats['raw_branch_coverage_pct']}%, Justified: {stats['justified_branches']} branches)" + ) + + banner = ( + f"
" + f"Effective Line Coverage: {stats['effective_line_coverage_pct']}% " + f"(Raw: {stats['raw_line_coverage_pct']}% | " + f"Justified: {stats['justified_lines']} lines | " + f"Unjustified Uncovered: {stats['unjustified_uncovered_lines']} lines)" + f"{branch_info}" + f"
" + ) + + # Insert after the tag or after the first

+ if "

" in content: + content = content.replace("

", banner + "

", 1) + else: + content = content.replace("", f"{banner}", 1) + + # Update per-file rows in the index table. + # For each file with justifications, find its row and update line% and branch% cells. + # Row format:
displayname
+ #
  XX.XX% (covered/total)
← function + #
  XX.XX% (covered/total)
← line + #
  XX.XX% (covered/total)
← branch + # + pct_cell_pattern = re.compile( + r"
\s*(\d+\.\d+)%\s*\((\d+)/(\d+)\)
" + ) + + for file_path, fstats in per_file_stats.items(): + justified_lines = fstats.get("justified", 0) + justified_branches = fstats.get("justified_branches", 0) + if justified_lines == 0 and justified_branches == 0: + continue + + # Find the row for this file in the index page + # The href contains the full path to the HTML file + if file_path not in content: + continue + + # Find the containing this file path + file_idx = content.find(file_path) + if file_idx < 0: + continue + row_start = content.rfind("", file_idx) + if row_start < 0 or row_end < 0: + continue + + row = content[row_start:row_end + 5] + + # Find all percentage cells in this row (func, line, branch) + cells = list(pct_cell_pattern.finditer(row)) + if len(cells) < 2: + continue + + new_row = row + # Update line coverage cell (second cell, index 1) + if justified_lines > 0 and len(cells) >= 2: + line_cell = cells[1] + covered = int(line_cell.group(3)) + total = int(line_cell.group(4)) + eff_covered = covered + justified_lines + eff_pct = round(100.0 * eff_covered / total, 2) if total > 0 else 0.0 + color = _get_coverage_color(eff_pct) + old_cell = line_cell.group(0) + new_cell = ( + f"
"
+                f"{eff_pct:>7.2f}% ({eff_covered}/{total})
" + ) + new_row = new_row.replace(old_cell, new_cell) + + # Update branch coverage cell (third cell, index 2) + if justified_branches > 0 and len(cells) >= 3: + branch_cell = cells[2] + covered = int(branch_cell.group(3)) + total = int(branch_cell.group(4)) + eff_covered = covered + justified_branches + eff_pct = round(100.0 * eff_covered / total, 2) if total > 0 else 0.0 + color = _get_coverage_color(eff_pct) + old_cell = branch_cell.group(0) + new_cell = ( + f"
"
+                f"{eff_pct:>7.2f}% ({eff_covered}/{total})
" + ) + new_row = new_row.replace(old_cell, new_cell) + + if new_row != row: + content = content.replace(row, new_row) + + # Update the TOTALS row + content = _update_totals_row(content, stats) + + with open(index_file, "w", encoding="utf-8") as f: + f.write(content) + + +def _get_coverage_color(pct: float) -> str: + """Return the llvm-cov color class for a coverage percentage.""" + if pct >= 100.0: + return "green" + elif pct >= 80.0: + return "yellow" + else: + return "red" + + +def _update_totals_row(content: str, stats: Dict[str, Any]) -> str: + """Update the TOTALS row in the index page with effective coverage numbers.""" + # Find the TOTALS row — it's the last row before + totals_idx = content.rfind("Totals") + if totals_idx < 0: + return content + + row_start = content.rfind("", totals_idx) + if row_start < 0 or row_end < 0: + return content + + row = content[row_start:row_end + 5] + + pct_cell_pattern = re.compile( + r"
\s*(\d+\.\d+)%\s*\((\d+)/(\d+)\)
" + ) + cells = list(pct_cell_pattern.finditer(row)) + + new_row = row + + # Update line coverage in totals (index 1) + if len(cells) >= 2 and stats.get("justified_lines", 0) > 0: + line_cell = cells[1] + eff_covered = stats["covered_lines"] + stats["justified_lines"] + total = stats["total_instrumented_lines"] + eff_pct = stats["effective_line_coverage_pct"] + color = _get_coverage_color(eff_pct) + old_cell = line_cell.group(0) + new_cell = ( + f"
"
+            f"{eff_pct:>7.2f}% ({eff_covered}/{total})
" + ) + new_row = new_row.replace(old_cell, new_cell) + + # Update branch coverage in totals (index 2) + if len(cells) >= 3 and stats.get("justified_branches", 0) > 0: + branch_cell = cells[2] + eff_covered = stats["covered_branches"] + stats["justified_branches"] + total = stats["total_branches"] + eff_pct = stats["effective_branch_coverage_pct"] + color = _get_coverage_color(eff_pct) + old_cell = branch_cell.group(0) + new_cell = ( + f"
"
+            f"{eff_pct:>7.2f}% ({eff_covered}/{total})
" + ) + new_row = new_row.replace(old_cell, new_cell) + + if new_row != row: + content = content.replace(row, new_row) + + return content + + +def find_source_html_files(html_dir: Path) -> List[Path]: + """Find all per-source HTML files (not index.html, style.css, etc.).""" + coverage_dir = html_dir / "coverage" + if not coverage_dir.exists(): + # Some llvm-cov versions put source files directly in html_dir + coverage_dir = html_dir + + files = [] + for html_file in coverage_dir.rglob("*.html"): + if html_file.name in ("index.html",): + continue + files.append(html_file) + return sorted(files) + + +def extract_source_path_from_html(html_file: Path, html_dir: Path) -> str: + """Extract the relative source file path from the HTML file path. + + llvm-cov creates paths like: html_report/coverage/.html + We need to extract the relative path within the project. + """ + rel = str(html_file.relative_to(html_dir)) + # Remove "coverage/" prefix if present + if rel.startswith("coverage/"): + rel = rel[len("coverage/"):] + # Remove .html suffix + if rel.endswith(".html"): + rel = rel[:-5] + return rel + + +def find_matching_justifications( + source_path: str, justified_files: Dict[str, Dict[str, Dict[str, str]]] +) -> Dict[int, Dict[str, str]]: + """Find justifications that match the given source path. + + The source_path from HTML may be an absolute path or relative. + The justified_files keys are relative to source root. + We match by path-component suffix to avoid crossing file-name boundaries + (e.g. "bar.cpp" must not match "foobar.cpp"). + """ + result: Dict[int, Dict[str, str]] = {} + + src_parts = Path(source_path).parts + for justified_path, line_justifications in justified_files.items(): + j_parts = Path(justified_path).parts + # Accept if one path's components are a suffix of the other's components. + if (len(src_parts) >= len(j_parts) and src_parts[-len(j_parts):] == j_parts) or ( + len(j_parts) > len(src_parts) and j_parts[-len(src_parts):] == src_parts + ): + for line_str, justification in line_justifications.items(): + result[int(line_str)] = justification + + return result + + +def write_summary( + path: Path, stats: Dict[str, Any], stale: List[Dict[str, Any]] +) -> None: + """Write human-readable summary.""" + with open(path, "w", encoding="utf-8") as f: + f.write("Coverage Justification Summary\n") + f.write("=" * 40 + "\n\n") + f.write(f"Total instrumented lines: {stats['total_instrumented_lines']}\n") + f.write(f"Covered lines: {stats['covered_lines']}\n") + f.write(f"Justified lines: {stats['justified_lines']}\n") + f.write(f"Unjustified uncovered: {stats['unjustified_uncovered_lines']}\n") + f.write(f"\n") + f.write(f"Raw line coverage: {stats['raw_line_coverage_pct']}%\n") + f.write(f"Effective line coverage: {stats['effective_line_coverage_pct']}%\n") + f.write(f"\n") + if stats.get("total_branches", 0) > 0: + f.write(f"Total branches: {stats['total_branches']}\n") + f.write(f"Covered branches: {stats['covered_branches']}\n") + f.write(f"Justified branches: {stats['justified_branches']}\n") + f.write(f"Raw branch coverage: {stats['raw_branch_coverage_pct']}%\n") + f.write(f"Effective branch coverage: {stats['effective_branch_coverage_pct']}%\n") + f.write(f"\n") + if stale: + f.write(f"Stale justifications ({len(stale)}):\n") + for s in stale: + f.write(f" - {s['file']}:{s['line']} [{s['id']}]\n") + f.write("\n") + + +def load_manifest(path: Path) -> Dict[str, Any]: + """Load the justification manifest JSON.""" + if not path.exists(): + print(f"ERROR: Manifest not found: {path}", file=sys.stderr) + sys.exit(1) + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Effective coverage calculator and HTML post-processor" + ) + parser.add_argument( + "--html-dir", + type=Path, + required=True, + help="Path to llvm-cov HTML report directory", + ) + parser.add_argument( + "--manifest", + type=Path, + required=True, + help="Path to resolved justification manifest (from justify.py)", + ) + parser.add_argument( + "--output", + type=Path, + required=True, + help="Output path for justification report (JSON)", + ) + return parser.parse_args() + + +if __name__ == "__main__": + main() diff --git a/coverage/filter_regexes.txt b/coverage/filter_regexes.txt new file mode 100644 index 0000000..bcc14cf --- /dev/null +++ b/coverage/filter_regexes.txt @@ -0,0 +1,52 @@ +# Coverage filter regexes (one per line; blank lines and lines starting +# with `#` are ignored). +# +# These regexes are passed to llvm-cov as --ignore-filename-regex, matching +# against the full source path as seen by llvm-cov. +# +# This file is the baseline shipped with @score_cpp_policies. Consumers can +# extend it with their own module-specific regexes through the +# score_coverage_reporter(extra_regex_files = [...]) macro in coverage/defs.bzl. +# +# NOTE: --experimental_use_llvm_covmap causes Bazel to instrument ALL targets +# regardless of --instrumentation_filter. Therefore, source filtering MUST +# happen here at the report level. + +# --------------------------------------------------------------------------- +# External dependencies +# --------------------------------------------------------------------------- +external/.* + +# --------------------------------------------------------------------------- +# Test files and test directories +# --------------------------------------------------------------------------- +.*_test\.(cpp|cc|h|hpp)$ +.*_tests\.(cpp|cc|h|hpp)$ +.*/test/.* +.*/test_support/.* +.*/test_doubles/.* +.*_fixture.*\.(cpp|cc|h|hpp)$ +.*_fixtures.*\.(cpp|cc|h|hpp)$ + +# --------------------------------------------------------------------------- +# Mock infrastructure +# --------------------------------------------------------------------------- +.*_mock.*\.(cpp|cc|h|hpp)$ +.*/mock/.* +.*/mocks/.* +.*/mocking/.* +.*/mock_binding/.* + +# --------------------------------------------------------------------------- +# Fakes / stubs +# --------------------------------------------------------------------------- +.*_fake\.(cpp|cc|h|hpp)$ +.*_fakes\.(cpp|cc|h|hpp)$ +.*_stub\.(cpp|cc|h|hpp)$ +.*_stubs\.(cpp|cc|h|hpp)$ + +# --------------------------------------------------------------------------- +# Performance benchmarks (not part of functional coverage) +# --------------------------------------------------------------------------- +.*/performance_benchmarks/.* +.*/benchmarks/.* diff --git a/coverage/generate_coverage_html.sh b/coverage/generate_coverage_html.sh new file mode 100755 index 0000000..cebe42b --- /dev/null +++ b/coverage/generate_coverage_html.sh @@ -0,0 +1,203 @@ +#!/usr/bin/env bash +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +# +# Generic post-`bazel coverage` driver shipped by @score_cpp_policies. +# +# Extracts the HTML coverage report from the llvm-cov generated zip produced +# by `bazel coverage`. Optionally runs the justification post-processor and/or +# assembles a CI archive with HTML + LCOV + JUnit XMLs. +# +# Run via Bazel from the CONSUMER repository: +# +# bazel run @score_cpp_policies//coverage:generate_coverage_html -- \ +# [--yaml ] \ +# [--output-dir ] \ +# [--archive ] \ +# [--junit-glob ] +# +# Arguments: +# --yaml Path (relative to workspace root, or absolute) to the +# consumer's coverage_justifications.yaml. If omitted (or the +# file does not exist), justification post-processing is +# skipped. +# --output-dir Directory (relative to workspace root, or absolute) into +# which the HTML report is written. Default: cpp_coverage +# --archive If set, also create .zip containing the HTML +# report, raw LCOV data and matched JUnit XMLs. +# --junit-glob Glob (relative to workspace root) used when --archive is +# set, to locate test.xml files. Default: bazel-testlogs/** + +set -euo pipefail + +JUSTIFICATION_YAML="" +OUTPUT_DIR="cpp_coverage" +ARCHIVE_NAME="" +JUNIT_GLOB="bazel-testlogs" + +while [[ $# -gt 0 ]]; do + case "$1" in + --yaml) + JUSTIFICATION_YAML="${2:?--yaml requires a path argument}" + shift 2 + ;; + --output-dir) + OUTPUT_DIR="${2:?--output-dir requires a directory argument}" + shift 2 + ;; + --archive) + ARCHIVE_NAME="${2:?--archive requires a name argument}" + shift 2 + ;; + --junit-glob) + JUNIT_GLOB="${2:?--junit-glob requires a glob argument}" + shift 2 + ;; + -h|--help) + sed -n '17,42p' "$0" >&2 + exit 0 + ;; + *) + echo "ERROR: Unknown argument: $1" >&2 + echo " Run with --help for usage." >&2 + exit 2 + ;; + esac +done + +if [[ -z "${BUILD_WORKSPACE_DIRECTORY:-}" ]]; then + echo "ERROR: BUILD_WORKSPACE_DIRECTORY is not set. This script must be run via 'bazel run'." >&2 + exit 1 +fi + +# Locate the justify and effective_coverage binaries from the runfiles tree +# (they are declared as `data` deps of this sh_binary). Invoking them directly +# avoids a nested `bazel run` which would deadlock on Bazel's output-base lock. +_rlocation() { + local rlpath="${RUNFILES_DIR:-$0.runfiles}/${1}" + if [[ -x "${rlpath}" ]]; then echo "${rlpath}"; return 0; fi + # Fallback: try manifest (useful when RUNFILES_DIR is not set). + if [[ -f "${RUNFILES_DIR:-$0.runfiles}_manifest" ]]; then + local entry + entry=$(grep -F "${1} " "${RUNFILES_DIR:-$0.runfiles}_manifest" | head -1 | cut -d' ' -f2-) + if [[ -x "${entry}" ]]; then echo "${entry}"; return 0; fi + fi + echo "ERROR: runfile not found: ${1}" >&2 + exit 1 +} + +_JUSTIFY=$(_rlocation "score_cpp_policies+/coverage/justify") +_EFFECTIVE_COVERAGE=$(_rlocation "score_cpp_policies+/coverage/effective_coverage") + +cd "${BUILD_WORKSPACE_DIRECTORY}" + +# Resolve relative paths against the workspace root. +case "${OUTPUT_DIR}" in + /*) ;; + *) OUTPUT_DIR="${BUILD_WORKSPACE_DIRECTORY}/${OUTPUT_DIR}" ;; +esac + +if [[ -n "${JUSTIFICATION_YAML}" ]]; then + case "${JUSTIFICATION_YAML}" in + /*) ;; + *) JUSTIFICATION_YAML="${BUILD_WORKSPACE_DIRECTORY}/${JUSTIFICATION_YAML}" ;; + esac +fi + +# Coverage report generator output (the zip our reporter produced). +COVERAGE_ZIP="${BUILD_WORKSPACE_DIRECTORY}/bazel-out/_coverage/_coverage_report.dat" + +if [[ ! -f "${COVERAGE_ZIP}" ]]; then + echo "ERROR: Coverage report not found at ${COVERAGE_ZIP}" >&2 + echo " Run 'bazel coverage //... --build_tests_only' first." >&2 + exit 1 +fi + +# Extract the HTML report from the zip. +TMPDIR_EXTRACT="${TMPDIR:-/tmp}/coverage_extract_$$" +mkdir -p "${TMPDIR_EXTRACT}" +trap 'rm -rf "${TMPDIR_EXTRACT}"' EXIT + +unzip -q -o "${COVERAGE_ZIP}" -d "${TMPDIR_EXTRACT}" + +rm -rf "${OUTPUT_DIR}" +if [[ -d "${TMPDIR_EXTRACT}/html_report" ]]; then + cp -r "${TMPDIR_EXTRACT}/html_report" "${OUTPUT_DIR}" +else + echo "ERROR: html_report/ not found in ${COVERAGE_ZIP}" >&2 + exit 1 +fi + +echo "Coverage report written to: ${OUTPUT_DIR}" + +# --------------------------------------------------------------------------- +# Optional justification processing. +# --------------------------------------------------------------------------- +if [[ -n "${JUSTIFICATION_YAML}" && -f "${JUSTIFICATION_YAML}" ]]; then + echo "" + echo "Running coverage justification processing..." + + JUSTIFICATION_DIR="${TMPDIR_EXTRACT}/justification_report" + mkdir -p "${JUSTIFICATION_DIR}" + + if "${_JUSTIFY}" \ + --yaml "${JUSTIFICATION_YAML}" \ + --source-root "${BUILD_WORKSPACE_DIRECTORY}" \ + --output "${JUSTIFICATION_DIR}/manifest.json"; then + + "${_EFFECTIVE_COVERAGE}" \ + --html-dir "${OUTPUT_DIR}" \ + --manifest "${JUSTIFICATION_DIR}/manifest.json" \ + --output "${JUSTIFICATION_DIR}/report.json" + fi + + if [[ -f "${JUSTIFICATION_DIR}/summary.txt" ]]; then + echo "" + cat "${JUSTIFICATION_DIR}/summary.txt" + + EFFECTIVE_PCT=$(grep -oP 'Effective line coverage:\s+\K[0-9.]+' \ + "${JUSTIFICATION_DIR}/summary.txt" 2>/dev/null || echo "0") + + THRESHOLD="${COVERAGE_THRESHOLD:-100}" + if awk "BEGIN {exit (${EFFECTIVE_PCT} >= ${THRESHOLD}) ? 0 : 1}"; then + : + else + echo "WARNING: Effective coverage ${EFFECTIVE_PCT}% is below threshold ${THRESHOLD}%" >&2 + fi + fi +elif [[ -n "${JUSTIFICATION_YAML}" ]]; then + echo "INFO: --yaml ${JUSTIFICATION_YAML} not found, skipping justification processing." +else + echo "INFO: No --yaml provided, skipping justification processing." +fi + +# --------------------------------------------------------------------------- +# Optional CI archive. +# --------------------------------------------------------------------------- +if [[ -n "${ARCHIVE_NAME}" ]]; then + mkdir -p artifacts + + if [[ -d "${JUNIT_GLOB}" ]]; then + find "${JUNIT_GLOB}" -name 'test.xml' -exec cp --parents {} artifacts/ \; 2>/dev/null || true + fi + + cp -r "${OUTPUT_DIR}" artifacts/ + + if [[ -f "${TMPDIR_EXTRACT}/lcov_report/lcov.dat" ]]; then + cp "${TMPDIR_EXTRACT}/lcov_report/lcov.dat" artifacts/coverage_report.dat + fi + + zip -r "${ARCHIVE_NAME}.zip" artifacts/ + rm -rf artifacts/ + echo "Coverage archive written to: ${ARCHIVE_NAME}.zip" +fi diff --git a/coverage/justify.py b/coverage/justify.py new file mode 100644 index 0000000..8e37292 --- /dev/null +++ b/coverage/justify.py @@ -0,0 +1,402 @@ +#!/usr/bin/env python3 +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""Coverage justification processor. + +Parses the YAML justification database and source files for COV_JUSTIFIED markers. +Resolves all justified lines and produces a manifest mapping file:line → justification. + +Usage: + python justify.py --yaml --source-root --output + +Supports two ways to specify justified lines: +1. YAML locations: directly specify file + line ranges in the YAML +2. In-code markers: COV_JUSTIFIED , COV_JUSTIFIED_START / COV_JUSTIFIED_STOP +""" + +import argparse +import json +import re +import sys +from pathlib import Path +from typing import Any, Dict, List, Set, Tuple + +import yaml + + +# Marker patterns +COV_JUSTIFIED_LINE_RE = re.compile(r"COV_JUSTIFIED\s+([\w-]+)") +COV_JUSTIFIED_START_RE = re.compile(r"COV_JUSTIFIED_START\s+([\w-]+)") +COV_JUSTIFIED_STOP_RE = re.compile(r"COV_JUSTIFIED_STOP") + +VALID_CATEGORIES = { + "defensive_programming", + "tool_false_positive", + "platform_specific", + "other", +} + + +def main() -> None: + """Main entry point.""" + args = parse_args() + + justifications_data = load_yaml(args.yaml) + validate_yaml(justifications_data) + + # Build lookup: id -> justification entry + justifications_by_id: Dict[str, Dict[str, Any]] = {} + for entry in justifications_data.get("justifications", []): + justifications_by_id[entry["id"]] = entry + + # Resolve all justified lines + resolved: Dict[str, Dict[int, Dict[str, str]]] = {} + warnings: List[str] = [] + errors: List[str] = [] + + # 1. Process YAML direct locations + for entry in justifications_data.get("justifications", []): + for location in entry.get("locations", []): + file_path = location["file"] + full_path = Path(args.source_root) / file_path + + if not full_path.exists(): + errors.append( + f"File not found for justification '{entry['id']}': {file_path}" + ) + continue + + lines = resolve_location_lines(location) + if file_path not in resolved: + resolved[file_path] = {} + for line in lines: + resolved[file_path][line] = { + "id": entry["id"], + "category": entry["category"], + "reason": entry["reason"].strip(), + } + + # 2. Scan source files for in-code COV_JUSTIFIED markers + source_files = collect_source_files(args.source_root, args.file_filter) + for source_file in source_files: + rel_path = str(source_file.relative_to(args.source_root)) + scan_warnings, scan_lines = scan_file_for_markers( + source_file, rel_path, justifications_by_id + ) + warnings.extend(scan_warnings) + + if scan_lines: + if rel_path not in resolved: + resolved[rel_path] = {} + for line_num, justification_info in scan_lines.items(): + resolved[rel_path][line_num] = justification_info + + # Output manifest + manifest = { + "version": 1, + "source_root": str(args.source_root), + "justified_files": { + filepath: {str(k): v for k, v in lines.items()} + for filepath, lines in sorted(resolved.items()) + }, + "warnings": warnings, + "errors": errors, + } + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w", encoding="utf-8") as f: + json.dump(manifest, f, indent=2) + + # Print diagnostics + total_justified_lines = sum(len(lines) for lines in resolved.values()) + print( + f"INFO: Resolved {total_justified_lines} justified lines across " + f"{len(resolved)} files.", + file=sys.stderr, + ) + if warnings: + for w in warnings: + print(f"WARNING: {w}", file=sys.stderr) + if errors: + for e in errors: + print(f"ERROR: {e}", file=sys.stderr) + sys.exit(1) + + +def resolve_location_lines(location: Dict[str, Any]) -> List[int]: + """Resolve line numbers from a YAML location entry.""" + if "lines" in location: + return location["lines"] + elif "line_start" in location and "line_end" in location: + return list(range(location["line_start"], location["line_end"] + 1)) + elif "line" in location: + return [location["line"]] + return [] + + +def scan_file_for_markers( + file_path: Path, + rel_path: str, + justifications_by_id: Dict[str, Dict[str, Any]], +) -> Tuple[List[str], Dict[int, Dict[str, str]]]: + """Scan a source file for COV_JUSTIFIED markers.""" + warnings = [] + justified_lines: Dict[int, Dict[str, str]] = {} + + try: + with open(file_path, "r", encoding="utf-8", errors="replace") as f: + lines = f.readlines() + except (IOError, OSError): + return warnings, justified_lines + + region_stack: List[Tuple[int, str]] = [] # (start_line, justification_id) + + for line_num, line in enumerate(lines, start=1): + # Check for COV_JUSTIFIED_START + start_match = COV_JUSTIFIED_START_RE.search(line) + if start_match: + jid = start_match.group(1) + if jid not in justifications_by_id: + warnings.append( + f"{rel_path}:{line_num}: COV_JUSTIFIED_START references " + f"unknown ID '{jid}'" + ) + else: + region_stack.append((line_num, jid)) + continue + + # Check for COV_JUSTIFIED_STOP + stop_match = COV_JUSTIFIED_STOP_RE.search(line) + if stop_match: + if not region_stack: + warnings.append( + f"{rel_path}:{line_num}: COV_JUSTIFIED_STOP without matching START" + ) + else: + start_line, jid = region_stack.pop() + if jid in justifications_by_id: + entry = justifications_by_id[jid] + for ln in range(start_line + 1, line_num): + justified_lines[ln] = { + "id": jid, + "category": entry["category"], + "reason": entry["reason"].strip(), + } + continue + + # Check for single-line COV_JUSTIFIED (but not START/STOP) + if "COV_JUSTIFIED_START" not in line and "COV_JUSTIFIED_STOP" not in line: + line_match = COV_JUSTIFIED_LINE_RE.search(line) + if line_match: + jid = line_match.group(1) + if jid not in justifications_by_id: + warnings.append( + f"{rel_path}:{line_num}: COV_JUSTIFIED references " + f"unknown ID '{jid}'" + ) + else: + entry = justifications_by_id[jid] + justified_lines[line_num] = { + "id": jid, + "category": entry["category"], + "reason": entry["reason"].strip(), + } + + # Check for unclosed regions + for start_line, jid in region_stack: + warnings.append( + f"{rel_path}:{start_line}: COV_JUSTIFIED_START '{jid}' without matching STOP" + ) + + return warnings, justified_lines + + +def collect_source_files(source_root: Path, file_filter: str) -> List[Path]: + """Collect source files to scan for markers.""" + extensions = file_filter.split(",") if file_filter else ["cpp", "h", "hpp", "cc"] + files = [] + for ext in extensions: + files.extend(source_root.rglob(f"*.{ext.strip()}")) + return sorted(files) + + +def load_yaml(yaml_path: Path) -> Dict[str, Any]: + """Load YAML justification database.""" + if not yaml_path.exists(): + print(f"ERROR: Justification YAML not found: {yaml_path}", file=sys.stderr) + sys.exit(1) + + with open(yaml_path, "r", encoding="utf-8") as f: + content = f.read() + + return yaml.safe_load(content) + + +def validate_yaml(data: Dict[str, Any]) -> None: + """Validate the justification YAML structure and types.""" + try: + errors = [] + + if not isinstance(data, dict): + print("ERROR: YAML validation: root must be a mapping", file=sys.stderr) + sys.exit(1) + + if "version" not in data: + errors.append("Missing 'version' field") + elif not isinstance(data["version"], int): + errors.append(f"'version' must be an integer, got {type(data['version']).__name__}") + + if "justifications" not in data: + errors.append("Missing 'justifications' field") + for e in errors: + print(f"ERROR: {e}", file=sys.stderr) + sys.exit(1) + + if not isinstance(data["justifications"], list): + errors.append( + f"'justifications' must be a list, got {type(data['justifications']).__name__}" + ) + for e in errors: + print(f"ERROR: YAML validation: {e}", file=sys.stderr) + sys.exit(1) + + seen_ids: Set[str] = set() + for i, entry in enumerate(data["justifications"]): + prefix = f"justifications[{i}]" + + if not isinstance(entry, dict): + errors.append(f"{prefix}: must be a mapping, got {type(entry).__name__}") + continue + + if "id" not in entry: + errors.append(f"{prefix}: missing 'id'") + continue + + jid = entry["id"] + if not isinstance(jid, str): + errors.append(f"{prefix}: 'id' must be a string, got {type(jid).__name__}") + continue + + if jid in seen_ids: + errors.append(f"{prefix}: duplicate ID '{jid}'") + seen_ids.add(jid) + + if not re.match(r"^[a-z0-9]+(-[a-z0-9]+)*$", jid): + errors.append(f"{prefix}: ID '{jid}' must be kebab-case") + + if "category" not in entry: + errors.append(f"{prefix}: missing 'category'") + elif not isinstance(entry["category"], str): + errors.append( + f"{prefix}: 'category' must be a string, " + f"got {type(entry['category']).__name__}" + ) + elif entry["category"] not in VALID_CATEGORIES: + errors.append( + f"{prefix}: invalid category '{entry['category']}'. " + f"Must be one of: {sorted(VALID_CATEGORIES)}" + ) + + if "reason" not in entry: + errors.append(f"{prefix}: missing 'reason'") + elif not isinstance(entry["reason"], str): + errors.append( + f"{prefix}: 'reason' must be a string, " + f"got {type(entry['reason']).__name__}" + ) + elif not entry["reason"].strip(): + errors.append(f"{prefix}: 'reason' must not be empty") + + if "locations" in entry: + if not isinstance(entry["locations"], list): + errors.append( + f"{prefix}: 'locations' must be a list, " + f"got {type(entry['locations']).__name__}" + ) + else: + for j, loc in enumerate(entry["locations"]): + loc_prefix = f"{prefix}.locations[{j}]" + if not isinstance(loc, dict): + errors.append( + f"{loc_prefix}: must be a mapping, " + f"got {type(loc).__name__}" + ) + continue + if "file" not in loc: + errors.append(f"{loc_prefix}: missing 'file'") + elif not isinstance(loc["file"], str): + errors.append( + f"{loc_prefix}: 'file' must be a string, " + f"got {type(loc['file']).__name__}" + ) + for int_field in ("line", "line_start", "line_end"): + if int_field in loc and not isinstance(loc[int_field], int): + errors.append( + f"{loc_prefix}: '{int_field}' must be an integer, " + f"got {type(loc[int_field]).__name__}" + ) + if "lines" in loc: + if not isinstance(loc["lines"], list): + errors.append( + f"{loc_prefix}: 'lines' must be a list, " + f"got {type(loc['lines']).__name__}" + ) + elif not all(isinstance(ln, int) for ln in loc["lines"]): + errors.append( + f"{loc_prefix}: 'lines' must contain only integers" + ) + + if errors: + for e in errors: + print(f"ERROR: YAML validation: {e}", file=sys.stderr) + sys.exit(1) + except Exception as error: + print(f"ERROR: YAML validation: {error}", file=sys.stderr) + sys.exit(1) + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Coverage justification processor" + ) + parser.add_argument( + "--yaml", + type=Path, + required=True, + help="Path to coverage_justifications.yaml", + ) + parser.add_argument( + "--source-root", + type=Path, + required=True, + help="Root directory of source files", + ) + parser.add_argument( + "--output", + type=Path, + required=True, + help="Output path for resolved justification manifest (JSON)", + ) + parser.add_argument( + "--file-filter", + type=str, + default="cpp,h,hpp,cc", + help="Comma-separated file extensions to scan (default: cpp,h,hpp,cc)", + ) + return parser.parse_args() + + +if __name__ == "__main__": + main() diff --git a/coverage/merger.py b/coverage/merger.py new file mode 100644 index 0000000..90d5b1e --- /dev/null +++ b/coverage/merger.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python3 +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""Per-test coverage output generator using llvm-cov. + +This script is invoked by Bazel as the --coverage_output_generator for each test. +It receives profraw files from test execution, merges them into profdata, generates +an HTML coverage report using llvm-cov show, and packages everything into a zip file +that the reporter can later aggregate. + +Expected Bazel interface (from collect_coverage.sh): + --coverage_dir= Directory containing *.profraw files + --output_file= Where to write the output (zip) + --source_file_manifest= File listing instrumented sources and object files + --filter_sources= Source path regexes to exclude (repeatable) + [--sources_to_replace_file=] Optional source mapping file +""" + +import argparse +import json +import os +import subprocess +import sys +import zipfile +from pathlib import Path +from typing import List, Set + + +def main() -> None: + args = parse_args() + + # Get object files from the manifest. + object_files = get_object_files_from_manifest(args.source_file_manifest) + if not object_files: + print("INFO: No instrumented object files found, skipping coverage.", file=sys.stderr) + cleanup_dangling_symlinks(args.coverage_dir) + sys.exit(0) + + # Find profraw files. + profraw_files = sorted(args.coverage_dir.glob("*.profraw")) + if not profraw_files: + print("INFO: No *.profraw files found, skipping coverage.", file=sys.stderr) + cleanup_dangling_symlinks(args.coverage_dir) + sys.exit(0) + + # Merge profraw → profdata. + profdata_dir = args.coverage_dir / "profdata" + profdata_dir.mkdir(exist_ok=True) + profdata_file = profdata_dir / "target.profdata" + + llvm_profdata = os.environ.get("LLVM_PROFDATA") + if not llvm_profdata: + print( + "ERROR: LLVM_PROFDATA environment variable is not set. " + "Ensure coverage.bazelrc is imported and the llvm toolchain is registered.", + file=sys.stderr, + ) + sys.exit(1) + run_command([ + llvm_profdata, "merge", + "--sparse", + "--output", str(profdata_file), + ] + [str(f) for f in profraw_files]) + + # Create meta.json with object files for the reporter. + meta_dir = args.coverage_dir / "meta" + meta_dir.mkdir(exist_ok=True) + meta = { + "object_files": [os.path.realpath(f) for f in sorted(object_files)], + } + with open(meta_dir / "meta.json", "w", encoding="utf-8") as f: + json.dump(meta, f) + + # Package into zip at output_file. + create_zip( + root=args.coverage_dir, + directories=[profdata_dir, meta_dir], + output_file=args.output_file, + ) + + # Clean up dangling symlinks in coverage_dir that would cause Bazel tree + # artifact validation to fail (e.g. the 'gcov' symlink created by + # collect_cc_coverage.sh's init_gcov() pointing into the destroyed sandbox). + cleanup_dangling_symlinks(args.coverage_dir) + + target = os.environ.get("TEST_TARGET", "unknown") + print(f"INFO: Coverage merger completed for '{target}'", file=sys.stderr) + + +def cleanup_dangling_symlinks(directory: Path) -> None: + """Remove symlinks in the coverage directory that would become dangling. + + Bazel's tree artifact validation rejects directories containing dangling + symlinks. The 'gcov' symlink created by collect_cc_coverage.sh's init_gcov() + points into the sandbox which is torn down before validation runs. Since we + use llvm-cov directly, this symlink is not needed. + """ + gcov_link = directory / "gcov" + if gcov_link.is_symlink(): + gcov_link.unlink() + + # Also remove any other symlinks pointing into sandbox paths. + for entry in directory.iterdir(): + if entry.is_symlink(): + target = os.readlink(entry) + if "sandbox" in target: + entry.unlink() + + +def get_object_files_from_manifest(source_file_manifest: Path) -> Set[str]: + """Parse the coverage manifest to find instrumented object files.""" + runfiles_dir = Path(os.environ.get("RUNFILES_DIR", "")) / os.environ.get("TEST_WORKSPACE", "_main") + root_env = os.environ.get("ROOT") + if not root_env: + print( + "ERROR: ROOT environment variable is not set. " + "This is normally set by Bazel when invoking the coverage output generator.", + file=sys.stderr, + ) + sys.exit(1) + exec_root = Path(root_env) + + object_files = set() + with open(source_file_manifest, encoding="utf-8") as f: + manifests = [line.strip() for line in f.readlines()] + + for manifest in manifests: + if "objects_list.txt" in manifest: + with open(manifest, encoding="utf-8") as f: + for line in f: + obj_path = line.strip() + if not obj_path: + continue + # Try runfiles first, then exec_root. + candidate = runfiles_dir / obj_path + if candidate.exists(): + object_files.add(str(candidate)) + else: + object_files.add(str(exec_root / obj_path)) + + return object_files + + +def run_command(cmd: List[str]) -> subprocess.CompletedProcess: + """Run a command and exit on failure.""" + try: + return subprocess.run( + cmd, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + except subprocess.CalledProcessError as e: + print(f"ERROR: Command failed with code {e.returncode}:", file=sys.stderr) + print(f" {' '.join(cmd)}", file=sys.stderr) + if e.stdout: + print(e.stdout, file=sys.stderr) + sys.exit(1) + + +def create_zip(root: Path, directories: List[Path], output_file: Path) -> None: + """Create a zip file from the given directories relative to root.""" + with zipfile.ZipFile(output_file, "w", zipfile.ZIP_DEFLATED) as zf: + for directory in directories: + if not directory.exists(): + continue + for dirpath, _, files in os.walk(directory): + for filename in files: + file_path = Path(dirpath) / filename + arcname = file_path.relative_to(root) + zf.write(file_path, arcname) + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments matching the Bazel LCOV_MERGER interface.""" + parser = argparse.ArgumentParser(description="LLVM coverage merger for Bazel") + parser.add_argument("--coverage_dir", type=Path, required=True) + parser.add_argument("--output_file", type=Path, required=True) + parser.add_argument("--source_file_manifest", type=Path, required=True) + parser.add_argument("--filter_sources", action="append", default=[]) + parser.add_argument("--sources_to_replace_file", type=str, default=None) + return parser.parse_args() + + +if __name__ == "__main__": + main() diff --git a/coverage/reporter.py b/coverage/reporter.py new file mode 100644 index 0000000..1e53877 --- /dev/null +++ b/coverage/reporter.py @@ -0,0 +1,335 @@ +#!/usr/bin/env python3 +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""Final coverage report generator using llvm-cov. + +This script is invoked by Bazel as the --coverage_report_generator after all tests +complete. It reads the per-test zip files produced by the merger, merges all profdata +into one, and generates the final combined HTML report. + +Expected Bazel interface: + --reports_file= Text file listing paths to all per-test coverage outputs + --output_file= Where to write the final report (zip) +""" + +import argparse +import json +import os +import subprocess +import sys +import zipfile +from pathlib import Path +from typing import List, Set, Tuple +from python.runfiles import Runfiles + + +def main() -> None: + """Main entry point.""" + args = parse_args() + + # Read the list of per-test report files. + reports = read_reports_file(args.reports_file) + if not reports: + print("INFO: No coverage reports found.", file=sys.stderr) + write_empty_output(args.output_file) + sys.exit(0) + + # Extract profdata and object files from each per-test zip. + valid_profdata_files, valid_object_files = extract_reports(reports) + + if not valid_profdata_files or not valid_object_files: + print("INFO: No valid profdata or object files found.", file=sys.stderr) + write_empty_output(args.output_file) + sys.exit(0) + + # Get llvm tools via runfiles. The rlocation paths are passed in by the + # wrapper script (generated by score_coverage_reporter macro) so the + # llvm toolchain repository can be named anything the consumer prefers. + r = Runfiles.Create() + llvm_cov_resolved = r.Rlocation(args.llvm_cov) + if not llvm_cov_resolved: + print(f"ERROR: llvm-cov not found in runfiles via {args.llvm_cov}", file=sys.stderr) + sys.exit(1) + llvm_bin_path = Path(llvm_cov_resolved) + + llvm_profdata_resolved = r.Rlocation(args.llvm_profdata) + if not llvm_profdata_resolved: + print(f"ERROR: llvm-profdata not found in runfiles via {args.llvm_profdata}", file=sys.stderr) + sys.exit(1) + + # Merge all profdata files. + merged_profdata = Path.cwd() / "merged_coverage.profdata" + run_command([ + llvm_profdata_resolved, "merge", + "--sparse", + "--output", str(merged_profdata), + ] + sorted(valid_profdata_files)) + + # Build coverage arguments. + coverage_args = ["--instr-profile", str(merged_profdata)] + for obj in sorted(valid_object_files): + coverage_args.extend(["--object", obj]) + + # Get filter regexes and workspace root. + filter_regexes = load_filter_regexes(r, args.filter_regexes) + workspace_root = args.workspace_root + + common_show_args = { + "llvm_bin_path": llvm_bin_path, + "coverage_args": coverage_args, + "filter_regexes": sorted(filter_regexes), + "workspace_root": workspace_root, + } + + # Generate HTML report. + html_report_dir = Path.cwd() / "html_report" + run_llvm_cov_show( + **common_show_args, + output_format="html", + html_report_dir=html_report_dir, + ) + + # Generate LCOV report (for backward compatibility with dashboards). + lcov_report_dir = Path.cwd() / "lcov_report" + lcov_report_dir.mkdir(exist_ok=True) + lcov_result = run_llvm_cov_export( + llvm_bin_path=llvm_bin_path, + coverage_args=coverage_args, + filter_regexes=sorted(filter_regexes), + workspace_root=workspace_root, + ) + with open(lcov_report_dir / "lcov.dat", "w", encoding="utf-8") as f: + f.write(lcov_result.stdout) + + # Generate text summary. + text_report_dir = Path.cwd() / "text_report" + text_report_dir.mkdir(exist_ok=True) + summary = run_llvm_cov_report( + llvm_bin_path=llvm_bin_path, + coverage_args=coverage_args, + filter_regexes=sorted(filter_regexes), + ) + with open(text_report_dir / "summary.txt", "w", encoding="utf-8") as f: + f.write(summary.stdout) + print(summary.stdout, file=sys.stderr) + + # Package everything into the output zip. + directories = [html_report_dir, lcov_report_dir, text_report_dir] + create_zip( + root=Path.cwd(), + directories=directories, + output_file=args.output_file, + ) + + print(f"INFO: Coverage reporter completed. Output: {args.output_file}", file=sys.stderr) + + +def run_llvm_cov_show( + llvm_bin_path: Path, + coverage_args: List[str], + filter_regexes: List[str], + workspace_root: str, + output_format: str, + html_report_dir: Path = None, +) -> subprocess.CompletedProcess: + """Run llvm-cov show.""" + cmd = [ + str(llvm_bin_path), + "show", + f"--format={output_format}", + f"--path-equivalence=/proc/self/cwd/,{workspace_root}", + f"--compilation-dir={workspace_root}", + "--show-branches=count", + "--show-region-summary=0", + ] + + cxxfilt = llvm_bin_path.parent / "llvm-cxxfilt" + if cxxfilt.exists(): + cmd.append(f"--Xdemangler={cxxfilt}") + + for regex in filter_regexes: + adjusted = regex.replace("/proc/self/cwd/", workspace_root) + cmd.append(f"--ignore-filename-regex={adjusted}") + + if html_report_dir: + cmd.append(f"--output-dir={html_report_dir}") + cmd.append("--coverage-watermark=100,50") + cmd.append("--show-expansions") + + cmd.extend(coverage_args) + return run_command(cmd) + + +def run_llvm_cov_export( + llvm_bin_path: Path, + coverage_args: List[str], + filter_regexes: List[str], + workspace_root: str, +) -> subprocess.CompletedProcess: + """Run llvm-cov export to produce LCOV format.""" + cmd = [ + str(llvm_bin_path), + "export", + "--format=lcov", + f"--path-equivalence=/proc/self/cwd/,{workspace_root}", + f"--compilation-dir={workspace_root}", + ] + + for regex in filter_regexes: + adjusted = regex.replace("/proc/self/cwd/", workspace_root) + cmd.append(f"--ignore-filename-regex={adjusted}") + + cmd.extend(coverage_args) + return run_command(cmd) + + +def run_llvm_cov_report( + llvm_bin_path: Path, + coverage_args: List[str], + filter_regexes: List[str], +) -> subprocess.CompletedProcess: + """Run llvm-cov report for a summary.""" + cmd = [ + str(llvm_bin_path), + "report", + "--summary-only", + "--show-region-summary=0", + "--show-branch-summary=1", + ] + + for regex in filter_regexes: + cmd.append(f"--ignore-filename-regex={regex}") + + cmd.extend(coverage_args) + return run_command(cmd) + + +def extract_reports(reports: List[str]) -> Tuple[Set[str], Set[str]]: + """Extract profdata and object files from per-test zip files.""" + valid_profdata_files = set() + valid_object_files = set() + + for i, report_path in enumerate(reports): + # Skip baseline_coverage files (LCOV format, not our zip). + if "baseline_coverage" in report_path: + continue + + report = Path(report_path) + if not report.exists() or report.stat().st_size == 0: + continue + + # Check if it's a valid zip. + if not zipfile.is_zipfile(report): + continue + + profdata_name = f"coverage_report_{i:08d}.profdata" + + try: + with zipfile.ZipFile(report, "r") as archive: + # Extract meta. + meta_json = archive.read("meta/meta.json") + target_meta = json.loads(meta_json) + + # Extract profdata. + profdata_content = archive.read("profdata/target.profdata") + profdata_path = Path.cwd() / profdata_name + with open(profdata_path, "wb") as f: + f.write(profdata_content) + + valid_profdata_files.add(str(profdata_path)) + + # Collect object files. + for obj in target_meta.get("object_files", []): + if obj and Path(obj).exists(): + valid_object_files.add(os.path.realpath(obj)) + + except (zipfile.BadZipFile, KeyError, json.JSONDecodeError) as e: + print(f"WARNING: Skipping invalid report {report_path}: {e}", file=sys.stderr) + continue + + return valid_profdata_files, valid_object_files + +def read_reports_file(reports_file: Path) -> List[str]: + """Read the reports file listing all per-test coverage outputs.""" + with open(reports_file, encoding="utf-8") as f: + return [line.strip() for line in f if line.strip()] + + +def load_filter_regexes(runfiles: Runfiles, rlocation_path: str) -> List[str]: + """Load filter regexes from filter_regexes.txt via Bazel runfiles.""" + path = runfiles.Rlocation(rlocation_path) + if not path or not Path(path).exists(): + print(f"WARNING: {rlocation_path} not found in runfiles, no source filtering applied", + file=sys.stderr) + return [] + + lines = Path(path).read_text(encoding="utf-8").splitlines() + return [line.strip() for line in lines if line.strip() and not line.strip().startswith("#")] + + +def write_empty_output(output_file: Path) -> None: + """Write an empty file as output when there's nothing to report.""" + with open(output_file, "w", encoding="utf-8") as f: + f.write("") + + +def run_command(cmd: List[str]) -> subprocess.CompletedProcess: + """Run a command and exit on failure.""" + try: + return subprocess.run( + cmd, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + except subprocess.CalledProcessError as e: + print(f"ERROR: Command failed with code {e.returncode}:", file=sys.stderr) + print(f" {' '.join(cmd)}", file=sys.stderr) + if e.stdout: + print(e.stdout, file=sys.stderr) + sys.exit(1) + + +def create_zip(root: Path, directories: List[Path], output_file: Path) -> None: + """Create a zip file from the given directories relative to root.""" + with zipfile.ZipFile(output_file, "w", zipfile.ZIP_DEFLATED) as zf: + for directory in directories: + if not directory.exists(): + continue + for dirpath, _, files in os.walk(directory): + for filename in files: + file_path = Path(dirpath) / filename + arcname = file_path.relative_to(root) + zf.write(file_path, arcname) + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments matching the Bazel coverage_report_generator interface.""" + parser = argparse.ArgumentParser(description="LLVM coverage reporter for Bazel") + parser.add_argument("--output_file", type=Path, required=True) + parser.add_argument("--reports_file", type=Path, required=True) + parser.add_argument("--filter_regexes", type=str, required=True, + help="Rlocation path to the filter regexes file") + parser.add_argument("--workspace_root", type=str, required=True, + help="Real workspace root path for source path mapping") + parser.add_argument("--llvm_cov", type=str, required=True, + help="Rlocation path of the llvm-cov binary") + parser.add_argument("--llvm_profdata", type=str, required=True, + help="Rlocation path of the llvm-profdata binary") + return parser.parse_args() + + + +if __name__ == "__main__": + main() diff --git a/coverage/requirements.in b/coverage/requirements.in new file mode 100644 index 0000000..c3726e8 --- /dev/null +++ b/coverage/requirements.in @@ -0,0 +1 @@ +pyyaml diff --git a/coverage/requirements_lock.txt b/coverage/requirements_lock.txt new file mode 100644 index 0000000..88bca0e --- /dev/null +++ b/coverage/requirements_lock.txt @@ -0,0 +1,83 @@ +# This file is autogenerated by pip-compile equivalent for the +# @score_cpp_policies//coverage package. The single direct dependency is +# `pyyaml`, required by coverage/justify.py to parse coverage +# justification YAML files. +# +# To regenerate (when pyyaml is updated), run pip-compile on the sibling +# requirements.in file and replace the body below with the resulting lock. +# +pyyaml==6.0.3 \ + --hash=sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c \ + --hash=sha256:0150219816b6a1fa26fb4699fb7daa9caf09eb1999f3b70fb6e786805e80375a \ + --hash=sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3 \ + --hash=sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956 \ + --hash=sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6 \ + --hash=sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c \ + --hash=sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65 \ + --hash=sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a \ + --hash=sha256:1ebe39cb5fc479422b83de611d14e2c0d3bb2a18bbcb01f229ab3cfbd8fee7a0 \ + --hash=sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b \ + --hash=sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1 \ + --hash=sha256:22ba7cfcad58ef3ecddc7ed1db3409af68d023b7f940da23c6c2a1890976eda6 \ + --hash=sha256:27c0abcb4a5dac13684a37f76e701e054692a9b2d3064b70f5e4eb54810553d7 \ + --hash=sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e \ + --hash=sha256:2e71d11abed7344e42a8849600193d15b6def118602c4c176f748e4583246007 \ + --hash=sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310 \ + --hash=sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4 \ + --hash=sha256:3c5677e12444c15717b902a5798264fa7909e41153cdf9ef7ad571b704a63dd9 \ + --hash=sha256:3ff07ec89bae51176c0549bc4c63aa6202991da2d9a6129d7aef7f1407d3f295 \ + --hash=sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea \ + --hash=sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0 \ + --hash=sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e \ + --hash=sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac \ + --hash=sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9 \ + --hash=sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7 \ + --hash=sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35 \ + --hash=sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb \ + --hash=sha256:5cf4e27da7e3fbed4d6c3d8e797387aaad68102272f8f9752883bc32d61cb87b \ + --hash=sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69 \ + --hash=sha256:5ed875a24292240029e4483f9d4a4b8a1ae08843b9c54f43fcc11e404532a8a5 \ + --hash=sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b \ + --hash=sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c \ + --hash=sha256:6344df0d5755a2c9a276d4473ae6b90647e216ab4757f8426893b5dd2ac3f369 \ + --hash=sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd \ + --hash=sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824 \ + --hash=sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198 \ + --hash=sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065 \ + --hash=sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c \ + --hash=sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c \ + --hash=sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764 \ + --hash=sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196 \ + --hash=sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b \ + --hash=sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00 \ + --hash=sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac \ + --hash=sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8 \ + --hash=sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e \ + --hash=sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28 \ + --hash=sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3 \ + --hash=sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5 \ + --hash=sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4 \ + --hash=sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b \ + --hash=sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf \ + --hash=sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5 \ + --hash=sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702 \ + --hash=sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8 \ + --hash=sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788 \ + --hash=sha256:b865addae83924361678b652338317d1bd7e79b1f4596f96b96c77a5a34b34da \ + --hash=sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d \ + --hash=sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc \ + --hash=sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c \ + --hash=sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba \ + --hash=sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f \ + --hash=sha256:c3355370a2c156cffb25e876646f149d5d68f5e0a3ce86a5084dd0b64a994917 \ + --hash=sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5 \ + --hash=sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26 \ + --hash=sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f \ + --hash=sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b \ + --hash=sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be \ + --hash=sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c \ + --hash=sha256:efd7b85f94a6f21e4932043973a7ba2613b059c4a000551892ac9f1d11f5baf3 \ + --hash=sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6 \ + --hash=sha256:fa160448684b4e94d80416c0fa4aac48967a969efe22931448d853ada8baf926 \ + --hash=sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0 + # via -r requirements.in diff --git a/tests/.bazelrc b/tests/.bazelrc index 0c70808..6fbb827 100644 --- a/tests/.bazelrc +++ b/tests/.bazelrc @@ -15,6 +15,8 @@ import %workspace%/../sanitizers/sanitizers.bazelrc import %workspace%/../clang_tidy/clang_tidy.bazelrc +import %workspace%/../coverage/coverage.bazelrc + common --registry=https://raw.githubusercontent.com/eclipse-score/bazel_registry/main/ common --registry=https://bcr.bazel.build @@ -25,3 +27,11 @@ build:lsan --extra_toolchains=@llvm_toolchain//:cc-toolchain-x86_64-lin build:tsan --extra_toolchains=@llvm_toolchain//:cc-toolchain-x86_64-linux build:asan_ubsan_lsan --extra_toolchains=@llvm_toolchain//:cc-toolchain-x86_64-linux build:tsan_ubsan --extra_toolchains=@llvm_toolchain//:cc-toolchain-x86_64-linux + +# --------------------------------------------------------------------------- +# Coverage smoke-test wiring (consumer-side settings on top of the imported +# generic coverage.bazelrc). +# --------------------------------------------------------------------------- +coverage --extra_toolchains=@llvm_toolchain//:cc-toolchain-x86_64-linux +coverage --instrumentation_filter="^//coverage[/:]" +coverage --coverage_report_generator=//coverage:reporter_wrapper diff --git a/tests/BUILD.bazel b/tests/BUILD.bazel index 04527db..aa1b025 100644 --- a/tests/BUILD.bazel +++ b/tests/BUILD.bazel @@ -14,6 +14,10 @@ load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test") load("//tools/lint:linters.bzl", "clang_tidy_test") +# Required by @score_cpp_policies//coverage:defs.bzl :: score_coverage_reporter +# so the macro can rlocation-resolve the consumer workspace root at runtime. +exports_files(["MODULE.bazel"]) + # Enforce sanitizer combination constraints at build time (e.g. ASan+TSan is invalid). genrule( diff --git a/tests/MODULE.bazel b/tests/MODULE.bazel index d4e97f9..7844112 100644 --- a/tests/MODULE.bazel +++ b/tests/MODULE.bazel @@ -15,6 +15,7 @@ module(name = "score_cpp_policies_tests") bazel_dep(name = "googletest", version = "1.17.0.bcr.2") bazel_dep(name = "rules_cc", version = "0.2.17") +bazel_dep(name = "rules_python", version = "1.8.5") bazel_dep(name = "toolchains_llvm", version = "1.7.0") bazel_dep(name = "score_cpp_policies") local_path_override( @@ -39,3 +40,11 @@ llvm.toolchain( llvm_version = "19.1.7", ) use_repo(llvm, "llvm_toolchain") + +# Python toolchain — required to run @score_cpp_policies//coverage:justify and +# :effective_coverage (py_binary targets that depend on pyyaml for Python 3.12). +python = use_extension("@rules_python//python/extensions:python.bzl", "python") +python.toolchain( + python_version = "3.12", + is_default = True, +) diff --git a/tests/coverage/BUILD.bazel b/tests/coverage/BUILD.bazel new file mode 100644 index 0000000..4b7bde7 --- /dev/null +++ b/tests/coverage/BUILD.bazel @@ -0,0 +1,52 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +# +# Smoke test for @score_cpp_policies//coverage. +# +# Build verification: `bazel build //coverage/...` from the tests workspace +# exercises macro expansion, genrule wiring, py_binary loading, and sh_binary +# construction. Coverage execution: `bazel coverage //coverage/...` then +# `bazel run //coverage:generate_coverage_html -- --yaml coverage/coverage_justifications.yaml`. + +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") +load("@score_cpp_policies//coverage:defs.bzl", "score_coverage_reporter") + +cc_library( + name = "coverable", + srcs = ["coverable.cpp"], + hdrs = ["coverable.h"], + visibility = ["//visibility:public"], +) + +cc_test( + name = "coverable_test", + srcs = ["coverable_test.cpp"], + target_compatible_with = ["@score_cpp_policies//sanitizers/constraints:no_tsan"], + deps = [ + ":coverable", + "@googletest//:gtest", + "@googletest//:gtest_main", + ], +) + +# Project-specific extension of the baseline filter regexes. +# Exposed for the macro to consume. +exports_files(["coverage_filter_regexes.txt"]) + +score_coverage_reporter( + name = "reporter_wrapper", + extra_regex_files = [":coverage_filter_regexes.txt"], + llvm_cov = "@llvm_toolchain//:llvm-cov", + llvm_profdata = "@llvm_toolchain//:llvm-profdata", + visibility = ["//visibility:public"], +) diff --git a/tests/coverage/coverable.cpp b/tests/coverage/coverable.cpp new file mode 100644 index 0000000..95a9558 --- /dev/null +++ b/tests/coverage/coverable.cpp @@ -0,0 +1,28 @@ +// ******************************************************************************* +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// https://www.apache.org/licenses/LICENSE-2.0 +// +// SPDX-License-Identifier: Apache-2.0 +// ******************************************************************************* + +#include "coverage/coverable.h" + +namespace score::cpp_policies::tests { + +int classify(int value) noexcept { + if (value < 0) { + return -1; + } + if (value == 0) { + return 0; + } + return 1; +} + +} // namespace score::cpp_policies::tests diff --git a/tests/coverage/coverable.h b/tests/coverage/coverable.h new file mode 100644 index 0000000..052834a --- /dev/null +++ b/tests/coverage/coverable.h @@ -0,0 +1,26 @@ +// ******************************************************************************* +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// https://www.apache.org/licenses/LICENSE-2.0 +// +// SPDX-License-Identifier: Apache-2.0 +// ******************************************************************************* + +#ifndef SCORE_CPP_POLICIES_TESTS_COVERAGE_COVERABLE_H_ +#define SCORE_CPP_POLICIES_TESTS_COVERAGE_COVERABLE_H_ + +namespace score::cpp_policies::tests { + +// Minimal API exercised by the coverage smoke test. Two branches are exposed +// so the report has both a covered and an uncovered branch to verify the +// HTML / LCOV pipeline end-to-end. +int classify(int value) noexcept; + +} // namespace score::cpp_policies::tests + +#endif // SCORE_CPP_POLICIES_TESTS_COVERAGE_COVERABLE_H_ diff --git a/tests/coverage/coverable_test.cpp b/tests/coverage/coverable_test.cpp new file mode 100644 index 0000000..0b5236a --- /dev/null +++ b/tests/coverage/coverable_test.cpp @@ -0,0 +1,30 @@ +// ******************************************************************************* +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// https://www.apache.org/licenses/LICENSE-2.0 +// +// SPDX-License-Identifier: Apache-2.0 +// ******************************************************************************* + +#include "coverage/coverable.h" + +#include + +namespace score::cpp_policies::tests { +namespace { + +// Intentionally only covers the negative and zero branches — the positive +// branch should appear in the coverage report as uncovered, exercising the +// "uncovered branch" rendering path. +TEST(ClassifyTest, NegativeAndZero) { + EXPECT_EQ(classify(-5), -1); + EXPECT_EQ(classify(0), 0); +} + +} // namespace +} // namespace score::cpp_policies::tests diff --git a/tests/coverage/coverage_filter_regexes.txt b/tests/coverage/coverage_filter_regexes.txt new file mode 100644 index 0000000..7f8d928 --- /dev/null +++ b/tests/coverage/coverage_filter_regexes.txt @@ -0,0 +1,3 @@ +# Smoke-test consumer-specific filter regex extensions. +# These are concatenated AFTER @score_cpp_policies//coverage:filter_regexes.txt. +.*/smoke_generated/.* diff --git a/tests/coverage/coverage_justifications.yaml b/tests/coverage/coverage_justifications.yaml new file mode 100644 index 0000000..7b02617 --- /dev/null +++ b/tests/coverage/coverage_justifications.yaml @@ -0,0 +1,21 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +version: 1 +justifications: + - id: smoke-positive-branch + category: platform_specific + reason: | + Sample justification used by the score_cpp_policies smoke test. + The positive branch in coverable.cpp is intentionally not exercised + by coverable_test.cpp to verify the effective-coverage post-processor. From 9da2a21fb38de9c7cccf3714aa2707bbc3bceb6a Mon Sep 17 00:00:00 2001 From: "Emrich Oliver (ETAS)" Date: Tue, 23 Jun 2026 09:56:30 +0200 Subject: [PATCH 02/11] Surface untested source files at 0% coverage llvm-cov only reports files linked into at least one test binary. Sources that exist in the workspace but no test pulls in silently disappear from the report, causing coverage to appear higher than it actually is. Three mechanisms work together to fix this: Bazel aspect + manifest rule (coverage/defs.bzl): _collect_sources_aspect walks the dependency graph of all configured targets and collects C/C++ source files. score_instrumented_sources_manifest writes a workspace-relative path-per-line manifest. score_coverage_reporter gains an optional instrumented_sources_manifest parameter that passes the manifest to the reporter via --instrumented_sources_manifest. Reporter augmentation (coverage/reporter.py): After llvm-cov export, the reporter compares the manifest against covered sources from the LCOV output. For each missing file a synthetic 0%-coverage LCOV record is appended (SF + DA per non-blank line + LF/LH). The llvm-cov text summary TOTALS line is updated in-place using re.finditer to preserve fixed-width column alignment. Per-file HTML pages and a "Not Linked Into Tests" index section are generated for visibility. Correctness and security fixes applied during review: - Use str.replace() instead of str.format() for HTML template rendering so that { and } in C++ source bodies do not crash the reporter with KeyError/ValueError. - Separate stderr from stdout for run_llvm_cov_export and run_llvm_cov_report (separate_stderr=True) so that llvm-cov warning messages are not mixed into LCOV/summary output. - Validate that resolved manifest paths stay within workspace_root via Path.is_relative_to() before reading files. - Extend _escape_html to cover ' (') and " (") so that file paths with apostrophes do not break HTML attributes. - Count only non-blank lines for LF in synthetic LCOV records to avoid inflating the denominator in aggregate metrics. Test fixture (tests/coverage/uncovered.cpp, uncovered.h, BUILD.bazel): cc_library intentionally not linked into any cc_test. Verifies that the reporter surfaces the file at 0% coverage rather than omitting it. Docs (coverage/README.md): new section 5a with usage example. --- coverage/README.md | 42 ++++ coverage/defs.bzl | 114 ++++++++++- coverage/reporter.py | 369 +++++++++++++++++++++++++++++++++-- tests/coverage/BUILD.bazel | 29 ++- tests/coverage/uncovered.cpp | 29 +++ tests/coverage/uncovered.h | 26 +++ 6 files changed, 592 insertions(+), 17 deletions(-) create mode 100644 tests/coverage/uncovered.cpp create mode 100644 tests/coverage/uncovered.h diff --git a/coverage/README.md b/coverage/README.md index 4b69ad4..4033770 100644 --- a/coverage/README.md +++ b/coverage/README.md @@ -116,6 +116,48 @@ Example `tools/coverage/coverage_filter_regexes.txt`: coverage --coverage_report_generator=//tools/coverage:reporter_wrapper ``` +## 5a. (Optional) Surface untested files at 0% coverage + +`llvm-cov` only reports source files that are linked into at least one +exercised test. Source files that ship in the project but no test pulls in +will silently disappear from the report — which usually misrepresents +coverage as higher than it actually is. + +To surface those files at 0% coverage, build a manifest of every C/C++ +source reachable from your coverage roots and pass it to the reporter: + +```python +load( + "@score_cpp_policies//coverage:defs.bzl", + "score_coverage_reporter", + "score_instrumented_sources_manifest", +) + +score_instrumented_sources_manifest( + name = "instrumented_sources", + # The aspect walks `deps` (and `srcs`) recursively, so listing the + # top-level library/binary/test targets is enough. + targets = [ + "//mymod:lib", + "//mymod/tests:all_tests", + ], +) + +score_coverage_reporter( + name = "reporter_wrapper", + llvm_cov = "@llvm_toolchain//:llvm-cov", + llvm_profdata = "@llvm_toolchain//:llvm-profdata", + extra_regex_files = [":coverage_filter_regexes.txt"], + instrumented_sources_manifest = ":instrumented_sources", + visibility = ["//visibility:public"], +) +``` + +Anything in the manifest that the llvm-cov export does not already cover +(and that survives the configured `--ignore-filename-regex` set) is added +as a synthetic 0%-coverage record to the LCOV file and gets a per-file +HTML page plus a "Not Linked Into Tests" section on the report index. + ## 6. (Optional) Set up justifications Create `tools/coverage/coverage_justifications.yaml`: diff --git a/coverage/defs.bzl b/coverage/defs.bzl index 193dc78..3039243 100644 --- a/coverage/defs.bzl +++ b/coverage/defs.bzl @@ -49,11 +49,105 @@ load("@rules_shell//shell:sh_binary.bzl", "sh_binary") _BASELINE_REGEX = "@score_cpp_policies//coverage:filter_regexes.txt" _REPORTER = "@score_cpp_policies//coverage:reporter" +# --------------------------------------------------------------------------- +# Instrumented sources collection. +# +# llvm-cov only reports files whose object files were linked into one of the +# tests it was asked to analyse. Source files that exist in the workspace but +# are not linked into any cc_test (directly or transitively) therefore never +# appear in the coverage report - even though they would normally be +# instrumented under --instrumentation_filter. +# +# To surface those files at 0% coverage we ship: +# +# * _collect_sources_aspect - walks the dependency graph of a target, +# gathers srcs (.cpp/.cc/.cxx/.c/.C) from every cc_library, cc_binary, +# and cc_test it encounters, and aggregates them into +# InstrumentedSourcesInfo. +# * score_instrumented_sources_manifest - applies the aspect to a list of +# consumer-supplied targets and writes a text file with one +# workspace-relative source path per line. +# +# The consumer points score_coverage_reporter at this manifest via the +# optional `instrumented_sources_manifest` attribute. The reporter then +# augments the llvm-cov LCOV + HTML output with synthetic 0%-coverage entries +# for every manifest entry that did not appear in the report. +# --------------------------------------------------------------------------- + +InstrumentedSourcesInfo = provider( + doc = "Aggregate of all C/C++ source files reachable through cc_* targets.", + fields = { + "sources": "depset of File objects (workspace-local C/C++ source files)", + }, +) + +_CC_SRC_EXTS = ("cc", "cpp", "cxx", "c", "C") +_CC_KINDS = ("cc_library", "cc_binary", "cc_test") +_PROPAGATE_ATTRS = ["deps", "srcs", "implementation_deps"] + +def _collect_sources_aspect_impl(target, ctx): + direct = [] + if ctx.rule.kind in _CC_KINDS: + for src in getattr(ctx.rule.attr, "srcs", None) or []: + for f in src.files.to_list(): + if f.extension in _CC_SRC_EXTS and not f.short_path.startswith("../"): + direct.append(f) + + transitive = [] + for attr_name in _PROPAGATE_ATTRS: + for dep in getattr(ctx.rule.attr, attr_name, None) or []: + if InstrumentedSourcesInfo in dep: + transitive.append(dep[InstrumentedSourcesInfo].sources) + + return [InstrumentedSourcesInfo( + sources = depset(direct = direct, transitive = transitive), + )] + +_collect_sources_aspect = aspect( + implementation = _collect_sources_aspect_impl, + attr_aspects = _PROPAGATE_ATTRS, + provides = [InstrumentedSourcesInfo], + doc = "Collect C/C++ source files from cc_* targets reachable via deps/srcs.", +) + +def _instrumented_sources_manifest_impl(ctx): + transitive = [ + t[InstrumentedSourcesInfo].sources + for t in ctx.attr.targets + if InstrumentedSourcesInfo in t + ] + files = depset(transitive = transitive).to_list() + + # Deduplicate (Starlark has no ordered set type) and sort for determinism. + paths = sorted({f.short_path: None for f in files}.keys()) + + out = ctx.actions.declare_file(ctx.label.name + ".txt") + content = "\n".join(paths) + ("\n" if paths else "") + ctx.actions.write(output = out, content = content) + return [DefaultInfo(files = depset([out]))] + +score_instrumented_sources_manifest = rule( + implementation = _instrumented_sources_manifest_impl, + attrs = { + "targets": attr.label_list( + aspects = [_collect_sources_aspect], + mandatory = True, + doc = "Targets whose transitive cc_* source files should be listed.", + ), + }, + doc = """Emit a text manifest of C/C++ source files reachable from `targets`. + +The output is a newline-separated list of workspace-relative paths. Pass this +target to score_coverage_reporter(instrumented_sources_manifest = ...) so the +reporter can add 0%-coverage entries for files that no test linked against.""", +) + def score_coverage_reporter( name, llvm_cov, llvm_profdata, extra_regex_files = None, + instrumented_sources_manifest = None, **kwargs): """Create a Bazel --coverage_report_generator wrapper for this repository. @@ -70,6 +164,12 @@ def score_coverage_reporter( @score_cpp_policies baseline. Use these to exclude consumer-specific patterns (e.g. project-only generator outputs). + instrumented_sources_manifest: Optional label of a + `score_instrumented_sources_manifest` target. When + provided, the reporter adds 0%-coverage entries for + every file in the manifest that did not appear in + the llvm-cov report (i.e. files that no test linked + against). **kwargs: Forwarded to the underlying sh_binary (e.g. visibility, tags). """ extra_regex_files = extra_regex_files or [] @@ -79,6 +179,12 @@ def score_coverage_reporter( wrapper_gen_name = name + "_wrapper_gen" wrapper_out = name + ".sh" + manifest_srcs = [instrumented_sources_manifest] if instrumented_sources_manifest else [] + manifest_flag_line = ( + " --instrumented_sources_manifest=\"$(rlocationpath %s)\" \\\\\n" % instrumented_sources_manifest + if instrumented_sources_manifest else "" + ) + # Concatenate baseline regexes + consumer extras into a single file. # Order is irrelevant for llvm-cov; it treats them as a set. native.genrule( @@ -108,7 +214,7 @@ def score_coverage_reporter( "//:MODULE.bazel", llvm_cov, llvm_profdata, - ], + ] + manifest_srcs, outs = [wrapper_out], tools = [_REPORTER], cmd = ("""cat > $@ << EOF @@ -127,10 +233,10 @@ exec "\\$${RUNFILES_DIR}/$(rlocationpath %s)" \\\\ --workspace_root="\\$${WORKSPACE_ROOT}" \\\\ --llvm_cov="$(rlocationpath %s)" \\\\ --llvm_profdata="$(rlocationpath %s)" \\\\ - "\\$$@" +%s "\\$$@" EOF chmod +x $@ -""" % (_REPORTER, merged_name, llvm_cov, llvm_profdata)), +""" % (_REPORTER, merged_name, llvm_cov, llvm_profdata, manifest_flag_line)), ) sh_binary( @@ -142,6 +248,6 @@ chmod +x $@ "//:MODULE.bazel", llvm_cov, llvm_profdata, - ], + ] + manifest_srcs, **kwargs ) diff --git a/coverage/reporter.py b/coverage/reporter.py index 1e53877..9965d60 100644 --- a/coverage/reporter.py +++ b/coverage/reporter.py @@ -25,6 +25,7 @@ import argparse import json import os +import re import subprocess import sys import zipfile @@ -98,8 +99,6 @@ def main() -> None: output_format="html", html_report_dir=html_report_dir, ) - - # Generate LCOV report (for backward compatibility with dashboards). lcov_report_dir = Path.cwd() / "lcov_report" lcov_report_dir.mkdir(exist_ok=True) lcov_result = run_llvm_cov_export( @@ -108,8 +107,46 @@ def main() -> None: filter_regexes=sorted(filter_regexes), workspace_root=workspace_root, ) + lcov_text = lcov_result.stdout + + # Augment with 0%-coverage entries for files that no test linked against. + untested_sources: List[str] = [] + if args.instrumented_sources_manifest: + manifest_path = r.Rlocation(args.instrumented_sources_manifest) + if not manifest_path or not Path(manifest_path).exists(): + print( + f"WARNING: instrumented sources manifest not found via " + f"{args.instrumented_sources_manifest}", + file=sys.stderr, + ) + else: + covered = _covered_sources_from_lcov(lcov_text) + untested_sources = _find_untested_sources( + manifest_path=Path(manifest_path), + workspace_root=workspace_root, + covered_sources=covered, + filter_regexes=sorted(filter_regexes), + ) + if untested_sources: + print( + f"INFO: Augmenting report with {len(untested_sources)} " + f"untested source file(s).", + file=sys.stderr, + ) + lcov_text = _append_zero_coverage_lcov( + lcov_text, untested_sources, workspace_root + ) + with open(lcov_report_dir / "lcov.dat", "w", encoding="utf-8") as f: - f.write(lcov_result.stdout) + f.write(lcov_text) + + # Augment the HTML report with 0%-coverage pages for untested files. + if untested_sources: + _augment_html_with_untested( + html_report_dir=html_report_dir, + untested_sources=untested_sources, + workspace_root=workspace_root, + ) # Generate text summary. text_report_dir = Path.cwd() / "text_report" @@ -119,9 +156,12 @@ def main() -> None: coverage_args=coverage_args, filter_regexes=sorted(filter_regexes), ) + summary_text = summary.stdout + if untested_sources: + summary_text = _augment_text_summary(summary_text, untested_sources) with open(text_report_dir / "summary.txt", "w", encoding="utf-8") as f: - f.write(summary.stdout) - print(summary.stdout, file=sys.stderr) + f.write(summary_text) + print(summary_text, file=sys.stderr) # Package everything into the output zip. directories = [html_report_dir, lcov_report_dir, text_report_dir] @@ -190,7 +230,7 @@ def run_llvm_cov_export( cmd.append(f"--ignore-filename-regex={adjusted}") cmd.extend(coverage_args) - return run_command(cmd) + return run_command(cmd, separate_stderr=True) def run_llvm_cov_report( @@ -211,7 +251,7 @@ def run_llvm_cov_report( cmd.append(f"--ignore-filename-regex={regex}") cmd.extend(coverage_args) - return run_command(cmd) + return run_command(cmd, separate_stderr=True) def extract_reports(reports: List[str]) -> Tuple[Set[str], Set[str]]: @@ -283,21 +323,28 @@ def write_empty_output(output_file: Path) -> None: f.write("") -def run_command(cmd: List[str]) -> subprocess.CompletedProcess: - """Run a command and exit on failure.""" +def run_command(cmd: List[str], separate_stderr: bool = False) -> subprocess.CompletedProcess: + """Run a command and exit on failure. + + When separate_stderr=True, stderr is captured separately from stdout so + that callers which parse stdout as structured data (e.g. LCOV text) are + not polluted by llvm-cov warning messages. + """ + stderr_target = subprocess.PIPE if separate_stderr else subprocess.STDOUT try: return subprocess.run( cmd, check=True, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, + stderr=stderr_target, text=True, ) except subprocess.CalledProcessError as e: print(f"ERROR: Command failed with code {e.returncode}:", file=sys.stderr) print(f" {' '.join(cmd)}", file=sys.stderr) - if e.stdout: - print(e.stdout, file=sys.stderr) + output = (e.stdout or "") + (e.stderr or "") + if output: + print(output, file=sys.stderr) sys.exit(1) @@ -314,6 +361,299 @@ def create_zip(root: Path, directories: List[Path], output_file: Path) -> None: zf.write(file_path, arcname) +def _covered_sources_from_lcov(lcov_text: str) -> Set[str]: + """Return the set of absolute source paths that appear in an LCOV report.""" + sources: Set[str] = set() + for line in lcov_text.splitlines(): + if line.startswith("SF:"): + sources.add(os.path.realpath(line[3:].strip())) + return sources + + +def _find_untested_sources( + manifest_path: Path, + workspace_root: str, + covered_sources: Set[str], + filter_regexes: List[str], +) -> List[str]: + """Read the manifest and return entries not present in covered_sources. + + Manifest entries are workspace-relative paths. Filter regexes from the + consumer are applied so that the same exclusions that affect llvm-cov also + affect the synthesized entries. Entries that do not resolve to an existing + file on disk are dropped silently (typically generated files or stale + manifest content). + """ + ws = Path(workspace_root) + compiled_filters = [re.compile(r) for r in filter_regexes if r] + + ws_resolved = ws.resolve() + untested: List[str] = [] + seen: Set[str] = set() + raw = manifest_path.read_text(encoding="utf-8") + for entry in raw.splitlines(): + rel = entry.strip() + if not rel: + continue + abs_path = (ws / rel).resolve() + if not abs_path.is_relative_to(ws_resolved): + continue + if not abs_path.exists() or not abs_path.is_file(): + continue + abs_str = str(abs_path) + if abs_str in covered_sources or abs_str in seen: + continue + if any(rx.search(abs_str) or rx.search(rel) for rx in compiled_filters): + continue + seen.add(abs_str) + untested.append(abs_str) + return sorted(untested) + + +def _count_nonblank_lines(path: str) -> Tuple[List[int], int]: + """Return (list of 1-based non-blank line numbers, total non-blank count).""" + line_numbers: List[int] = [] + try: + with open(path, "r", encoding="utf-8", errors="replace") as f: + for i, line in enumerate(f, 1): + if line.strip(): + line_numbers.append(i) + except OSError: + pass + return line_numbers, len(line_numbers) + + +def _append_zero_coverage_lcov( + lcov_text: str, untested_sources: List[str], workspace_root: str +) -> str: + """Append minimal zero-coverage LCOV records for each untested source.""" + blocks: List[str] = [] + for abs_path in untested_sources: + line_numbers, lf = _count_nonblank_lines(abs_path) + if lf == 0: + continue + da = "\n".join(f"DA:{n},0" for n in line_numbers) + block = ( + f"SF:{abs_path}\n" + f"{da}\n" + f"LF:{lf}\n" + f"LH:0\n" + "end_of_record\n" + ) + blocks.append(block) + if not blocks: + return lcov_text + sep = "" if lcov_text.endswith("\n") else "\n" + return lcov_text + sep + "".join(blocks) + + +def _augment_text_summary(summary_text: str, untested_sources: List[str]) -> str: + """Re-compute the TOTALS line in the llvm-cov report summary. + + llvm-cov report --summary-only emits a table ending with a TOTALS row: + + TOTAL ... ... + + We parse the existing totals, add the untested file line counts, and + rewrite the TOTALS row so that summary.txt and the CI console reflect the + augmented numbers. + """ + extra_lines_found = 0 + for abs_path in untested_sources: + _, lf = _count_nonblank_lines(abs_path) + extra_lines_found += lf + + if extra_lines_found == 0: + return summary_text + + lines = summary_text.splitlines(keepends=True) + totals_idx = None + for i in range(len(lines) - 1, -1, -1): + if lines[i].strip().startswith("TOTAL"): + totals_idx = i + break + + if totals_idx is None: + banner = ( + f"\n[score-coverage] {len(untested_sources)} file(s) not linked " + f"into any test ({extra_lines_found} lines at 0% coverage) — " + f"not reflected in the TOTALS above.\n" + ) + return summary_text + banner + + totals_line = lines[totals_idx] + + # llvm-cov report --summary-only with --show-region-summary=0 emits: + # TOTAL ... + # We match the second numeric triple (the Lines group) and do an in-place + # substitution that preserves the original fixed-width whitespace. + pct_groups = list(re.finditer(r"(\d+)(\s+)(\d+)(\s+)([\d.]+%)", totals_line)) + + if len(pct_groups) >= 2: + m = pct_groups[1] # second triple = Lines group + try: + old_lf = int(m.group(1)) + old_lm = int(m.group(3)) + new_lf = old_lf + extra_lines_found + new_lm = old_lm + extra_lines_found + new_pct = ((new_lf - new_lm) / new_lf * 100) if new_lf > 0 else 0 + + old_lf_str = m.group(1) + old_lm_str = m.group(3) + old_pct_str = m.group(5) + new_lf_str = str(new_lf).rjust(len(old_lf_str)) + new_lm_str = str(new_lm).rjust(len(old_lm_str)) + new_pct_str = f"{new_pct:.2f}%".rjust(len(old_pct_str)) + + replacement = ( + new_lf_str + m.group(2) + new_lm_str + m.group(4) + new_pct_str + ) + lines[totals_idx] = ( + totals_line[:m.start()] + replacement + totals_line[m.end():] + ) + return "".join(lines) + except (ValueError, IndexError): + pass + + banner = ( + f"\n[score-coverage] {len(untested_sources)} file(s) not linked " + f"into any test ({extra_lines_found} lines at 0% coverage) — " + f"not reflected in the TOTALS above.\n" + ) + return summary_text + banner + + +_UNTESTED_HTML_TEMPLATE = """ + + + + + {title} + + +

Coverage Report

+

{title}

+

+Not linked into any test. This source file is reachable from +the configured coverage targets but no test binary instruments it, so every +line is reported as uncovered. +

+ + +
+{body}
+
+ + +""" + + +def _augment_html_with_untested( + html_report_dir: Path, + untested_sources: List[str], + workspace_root: str, +) -> None: + """Create per-file HTML pages for untested sources and link them from index. + + The pages are intentionally minimal: llvm-cov's per-source HTML format is + not easily reproducible without the full coverage mapping, so we render a + plain source dump with a banner that explains the file was not exercised. + The index page gets a new "Not Linked Into Tests" section listing the + files at 0% coverage so the gap is visible to reviewers. + """ + if not html_report_dir.exists(): + return + + coverage_subdir = html_report_dir / "coverage" + output_root = coverage_subdir if coverage_subdir.exists() else html_report_dir + + entries: List[Tuple[str, str, int]] = [] # (rel_source, href, num_lines) + for abs_path in untested_sources: + rel_source = os.path.relpath(abs_path, workspace_root) + # Mirror llvm-cov: per-source HTML lives at /.html + # Strip the leading "/" so that the path joins under output_root. + target_html = output_root / (abs_path.lstrip("/") + ".html") + target_html.parent.mkdir(parents=True, exist_ok=True) + + try: + with open(abs_path, "r", encoding="utf-8", errors="replace") as f: + source_text = f.read() + except OSError: + continue + num_lines = source_text.count("\n") + ( + 0 if source_text.endswith("\n") or not source_text else 1 + ) + + rel_to_root = os.path.relpath(html_report_dir, target_html.parent) + css_path = (Path(rel_to_root) / "style.css").as_posix() + body = _escape_html(source_text) or "(empty file)" + html = ( + _UNTESTED_HTML_TEMPLATE + .replace("{css_path}", css_path) + .replace("{title}", _escape_html(rel_source)) + .replace("{body}", body) + ) + target_html.write_text(html, encoding="utf-8") + + href = target_html.relative_to(html_report_dir).as_posix() + entries.append((rel_source, href, max(num_lines, 1))) + + if not entries: + return + + _inject_untested_section_into_index(html_report_dir / "index.html", entries) + + +def _escape_html(text: str) -> str: + return ( + text.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace("'", "'") + .replace('"', """) + ) + + +def _inject_untested_section_into_index( + index_file: Path, entries: List[Tuple[str, str, int]] +) -> None: + """Insert a banner + table of untested files into the llvm-cov index page.""" + if not index_file.exists(): + return + + content = index_file.read_text(encoding="utf-8") + + rows = [] + for rel_source, href, num_lines in entries: + rows.append( + "" + f"
{_escape_html(rel_source)}
" + f"
  0.00% (0/{num_lines})
" + "
Not linked into any test
" + "" + ) + + section = ( + "
" + f"{len(entries)} file(s) not linked into any test " + "(counted as 0% coverage)." + "
" + "

Not Linked Into Tests

" + "" + "" + "" + f"{''.join(rows)}
FilenameLine CoverageNote
" + ) + + if "" in content: + content = content.replace("", section + "", 1) + else: + content += section + + index_file.write_text(content, encoding="utf-8") + + def parse_args() -> argparse.Namespace: """Parse command-line arguments matching the Bazel coverage_report_generator interface.""" parser = argparse.ArgumentParser(description="LLVM coverage reporter for Bazel") @@ -327,6 +667,11 @@ def parse_args() -> argparse.Namespace: help="Rlocation path of the llvm-cov binary") parser.add_argument("--llvm_profdata", type=str, required=True, help="Rlocation path of the llvm-profdata binary") + parser.add_argument("--instrumented_sources_manifest", type=str, default=None, + help="Optional rlocation path to a text file listing " + "workspace-relative source files that are expected " + "to be instrumented. Sources missing from the " + "llvm-cov output are added at 0%% coverage.") return parser.parse_args() diff --git a/tests/coverage/BUILD.bazel b/tests/coverage/BUILD.bazel index 4b7bde7..45ceed0 100644 --- a/tests/coverage/BUILD.bazel +++ b/tests/coverage/BUILD.bazel @@ -19,7 +19,11 @@ # `bazel run //coverage:generate_coverage_html -- --yaml coverage/coverage_justifications.yaml`. load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") -load("@score_cpp_policies//coverage:defs.bzl", "score_coverage_reporter") +load( + "@score_cpp_policies//coverage:defs.bzl", + "score_coverage_reporter", + "score_instrumented_sources_manifest", +) cc_library( name = "coverable", @@ -28,6 +32,16 @@ cc_library( visibility = ["//visibility:public"], ) +# A library that is intentionally NOT linked into any cc_test. It exercises +# the "untested files" augmentation: the reporter must surface this file at +# 0% coverage even though llvm-cov never sees its object file. +cc_library( + name = "uncovered", + srcs = ["uncovered.cpp"], + hdrs = ["uncovered.h"], + visibility = ["//visibility:public"], +) + cc_test( name = "coverable_test", srcs = ["coverable_test.cpp"], @@ -43,9 +57,22 @@ cc_test( # Exposed for the macro to consume. exports_files(["coverage_filter_regexes.txt"]) +# Manifest of all C/C++ sources reachable from the coverage targets. Anything +# in here that does not appear in the llvm-cov report is added at 0% coverage +# by the reporter. +score_instrumented_sources_manifest( + name = "instrumented_sources", + targets = [ + ":coverable", + ":uncovered", + ":coverable_test", + ], +) + score_coverage_reporter( name = "reporter_wrapper", extra_regex_files = [":coverage_filter_regexes.txt"], + instrumented_sources_manifest = ":instrumented_sources", llvm_cov = "@llvm_toolchain//:llvm-cov", llvm_profdata = "@llvm_toolchain//:llvm-profdata", visibility = ["//visibility:public"], diff --git a/tests/coverage/uncovered.cpp b/tests/coverage/uncovered.cpp new file mode 100644 index 0000000..0d36915 --- /dev/null +++ b/tests/coverage/uncovered.cpp @@ -0,0 +1,29 @@ +// ******************************************************************************* +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// https://www.apache.org/licenses/LICENSE-2.0 +// +// SPDX-License-Identifier: Apache-2.0 +// ******************************************************************************* + +#include "uncovered.h" + +namespace score::cpp_policies::tests { + +int never_called(int value) noexcept +{ + if (value > 0) { + return value + 1; + } + if (value == 0) { + return 42; + } + return -value; +} + +} // namespace score::cpp_policies::tests diff --git a/tests/coverage/uncovered.h b/tests/coverage/uncovered.h new file mode 100644 index 0000000..bfaa5bc --- /dev/null +++ b/tests/coverage/uncovered.h @@ -0,0 +1,26 @@ +// ******************************************************************************* +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// https://www.apache.org/licenses/LICENSE-2.0 +// +// SPDX-License-Identifier: Apache-2.0 +// ******************************************************************************* + +#ifndef SCORE_CPP_POLICIES_TESTS_COVERAGE_UNCOVERED_H_ +#define SCORE_CPP_POLICIES_TESTS_COVERAGE_UNCOVERED_H_ + +namespace score::cpp_policies::tests { + +// Intentionally not linked into any cc_test. Used to verify that the reporter +// surfaces source files that no test exercises as 0%-coverage entries instead +// of silently dropping them. +int never_called(int value) noexcept; + +} // namespace score::cpp_policies::tests + +#endif // SCORE_CPP_POLICIES_TESTS_COVERAGE_UNCOVERED_H_ From 6befed4840775cde37520eab0147d69bf493c7b8 Mon Sep 17 00:00:00 2001 From: "Emrich Oliver (ETAS)" Date: Tue, 23 Jun 2026 10:12:09 +0200 Subject: [PATCH 03/11] Fix review findings in coverage reporter - Use heuristic to identify instrumentable lines instead of counting all non-blank lines. Filters comments, preprocessor directives, lone braces, namespace declarations, and access specifiers to avoid inflating LF values in synthetic LCOV records. - Augment summary.txt and console output with untested file line counts so the visible TOTALS reflect the true coverage including 0%-files. - Parse the llvm-cov column header to determine the Lines group index dynamically instead of hardcoding position 1. - Add workspace-bounds check after resolve() in _find_untested_sources to prevent path traversal via symlinks. - Escape single and double quotes in _escape_html to prevent attribute breakout in generated HTML pages. --- coverage/reporter.py | 62 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 11 deletions(-) diff --git a/coverage/reporter.py b/coverage/reporter.py index 9965d60..d4f9bd7 100644 --- a/coverage/reporter.py +++ b/coverage/reporter.py @@ -410,13 +410,43 @@ def _find_untested_sources( return sorted(untested) -def _count_nonblank_lines(path: str) -> Tuple[List[int], int]: - """Return (list of 1-based non-blank line numbers, total non-blank count).""" +_NON_EXECUTABLE_RE = re.compile( + r"^\s*(" + r"[{}]" # lone braces + r"|//.*" # line comments + r"|/\*.*" # block comment open + r"|\*.*" # block comment continuation / close + r"|\*/.*" # block comment close + r"|#\s*(?:include|define|undef|if|ifdef|ifndef|elif|else|endif|pragma|error|warning)\b.*" + r"|namespace\b[^{;]*[{]?\s*" # namespace declarations + r"|}\s*//.*" # closing brace with comment + r"|}\s*namespace.*" # closing namespace + r"|public\s*:|private\s*:|protected\s*:" + r")\s*$" +) + + +def _is_likely_executable(line: str) -> bool: + """Heuristic: return True if a line is likely an executable C/C++ statement.""" + stripped = line.strip() + if not stripped: + return False + return _NON_EXECUTABLE_RE.match(line) is None + + +def _count_instrumentable_lines(path: str) -> Tuple[List[int], int]: + """Return (list of 1-based likely-executable line numbers, count). + + This is a conservative approximation. Without a real parser we cannot + perfectly distinguish executable from non-executable lines. The heuristic + excludes blank lines, comments, preprocessor directives, lone braces, + namespace declarations, and access specifiers. + """ line_numbers: List[int] = [] try: with open(path, "r", encoding="utf-8", errors="replace") as f: for i, line in enumerate(f, 1): - if line.strip(): + if _is_likely_executable(line): line_numbers.append(i) except OSError: pass @@ -429,7 +459,7 @@ def _append_zero_coverage_lcov( """Append minimal zero-coverage LCOV records for each untested source.""" blocks: List[str] = [] for abs_path in untested_sources: - line_numbers, lf = _count_nonblank_lines(abs_path) + line_numbers, lf = _count_instrumentable_lines(abs_path) if lf == 0: continue da = "\n".join(f"DA:{n},0" for n in line_numbers) @@ -460,7 +490,7 @@ def _augment_text_summary(summary_text: str, untested_sources: List[str]) -> str """ extra_lines_found = 0 for abs_path in untested_sources: - _, lf = _count_nonblank_lines(abs_path) + _, lf = _count_instrumentable_lines(abs_path) extra_lines_found += lf if extra_lines_found == 0: @@ -483,14 +513,24 @@ def _augment_text_summary(summary_text: str, untested_sources: List[str]) -> str totals_line = lines[totals_idx] - # llvm-cov report --summary-only with --show-region-summary=0 emits: - # TOTAL ... - # We match the second numeric triple (the Lines group) and do an in-place - # substitution that preserves the original fixed-width whitespace. + # Determine which numeric triple corresponds to "Lines" by inspecting the + # column header (the line immediately above TOTAL). llvm-cov report emits + # headers like "Filename Function Line Branch ...". We find the + # column position of "Line" in the header and match it to the correct + # numeric group in the TOTALS row. + lines_group_idx = None + if totals_idx > 0: + header_line = lines[totals_idx - 1] + header_cols = re.finditer(r"\b(Regions?|Functions?|Lines?|Branches?)\b", header_line) + for col_idx, col_match in enumerate(header_cols): + if col_match.group().startswith("Line"): + lines_group_idx = col_idx + break + pct_groups = list(re.finditer(r"(\d+)(\s+)(\d+)(\s+)([\d.]+%)", totals_line)) - if len(pct_groups) >= 2: - m = pct_groups[1] # second triple = Lines group + if lines_group_idx is not None and lines_group_idx < len(pct_groups): + m = pct_groups[lines_group_idx] try: old_lf = int(m.group(1)) old_lm = int(m.group(3)) From 8e27c8722deb9911ded088c57bb17a5bc5bdca4b Mon Sep 17 00:00:00 2001 From: "Emrich Oliver (ETAS)" Date: Tue, 23 Jun 2026 10:28:28 +0200 Subject: [PATCH 04/11] Fix pointer-deref false-positive and add reporter unit tests - Narrow _NON_EXECUTABLE_RE block-comment pattern from `|\*.*` to `|\*(?:[/\s].*)?` so that pointer dereferences (`*ptr = value;`) are correctly classified as executable. - Add py_test with unit tests for all reporter augmentation helpers: _is_likely_executable, _count_instrumentable_lines, _covered_sources_from_lcov, _find_untested_sources, _append_zero_coverage_lcov, _augment_text_summary, _escape_html. Includes a path-traversal rejection test for _find_untested_sources. --- coverage/reporter.py | 2 +- tests/coverage/BUILD.bazel | 7 + tests/coverage/reporter_test.py | 244 ++++++++++++++++++++++++++++++++ 3 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 tests/coverage/reporter_test.py diff --git a/coverage/reporter.py b/coverage/reporter.py index d4f9bd7..50badd6 100644 --- a/coverage/reporter.py +++ b/coverage/reporter.py @@ -415,7 +415,7 @@ def _find_untested_sources( r"[{}]" # lone braces r"|//.*" # line comments r"|/\*.*" # block comment open - r"|\*.*" # block comment continuation / close + r"|\*(?:[/\s].*)?" # block comment continuation (* text) / close (*/), lone * r"|\*/.*" # block comment close r"|#\s*(?:include|define|undef|if|ifdef|ifndef|elif|else|endif|pragma|error|warning)\b.*" r"|namespace\b[^{;]*[{]?\s*" # namespace declarations diff --git a/tests/coverage/BUILD.bazel b/tests/coverage/BUILD.bazel index 45ceed0..88047e5 100644 --- a/tests/coverage/BUILD.bazel +++ b/tests/coverage/BUILD.bazel @@ -19,6 +19,7 @@ # `bazel run //coverage:generate_coverage_html -- --yaml coverage/coverage_justifications.yaml`. load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") +load("@rules_python//python:defs.bzl", "py_test") load( "@score_cpp_policies//coverage:defs.bzl", "score_coverage_reporter", @@ -69,6 +70,12 @@ score_instrumented_sources_manifest( ], ) +py_test( + name = "reporter_test", + srcs = ["reporter_test.py"], + deps = ["@score_cpp_policies//coverage:reporter"], +) + score_coverage_reporter( name = "reporter_wrapper", extra_regex_files = [":coverage_filter_regexes.txt"], diff --git a/tests/coverage/reporter_test.py b/tests/coverage/reporter_test.py new file mode 100644 index 0000000..634bbd0 --- /dev/null +++ b/tests/coverage/reporter_test.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python3 +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""Unit tests for the coverage reporter's untested-files augmentation logic. + +These tests exercise the helper functions that identify untested source files +and synthesize 0%-coverage LCOV records, without requiring a full +bazel coverage run or llvm-cov toolchain. +""" + +import os +import tempfile +import textwrap +import unittest +from pathlib import Path + +from coverage.reporter import ( + _append_zero_coverage_lcov, + _augment_text_summary, + _count_instrumentable_lines, + _covered_sources_from_lcov, + _escape_html, + _find_untested_sources, + _is_likely_executable, +) + + +class IsLikelyExecutableTest(unittest.TestCase): + def test_executable_statements(self): + for line in [ + " return 42;", + " int x = foo();", + " if (x > 0) {", + " bar(x);", + " x = *ptr;", + " *ptr = value;", + " **pp = data;", + ]: + self.assertTrue(_is_likely_executable(line), f"should be executable: {line!r}") + + def test_non_executable_lines(self): + for line in [ + "", + " ", + "// comment", + "/* block open", + " * continuation", + " */", + "#include ", + '#include "bar.h"', + "#define FOO 42", + "#ifdef SOMETHING", + "#endif", + "#pragma once", + "{", + "}", + "namespace score {", + "namespace score::detail {", + "} // namespace score", + "public:", + " private:", + " protected:", + ]: + self.assertFalse(_is_likely_executable(line), f"should NOT be executable: {line!r}") + + +class CountInstrumentableLinesTest(unittest.TestCase): + def test_mixed_cpp_file(self): + content = textwrap.dedent("""\ + // Copyright header + #include "foo.h" + + namespace test { + + int foo(int x) noexcept + { + if (x > 0) { + return x + 1; + } + return -x; + } + + } // namespace test + """) + with tempfile.NamedTemporaryFile(mode="w", suffix=".cpp", delete=False) as f: + f.write(content) + f.flush() + try: + line_numbers, count = _count_instrumentable_lines(f.name) + self.assertEqual(count, 4) + self.assertIn(6, line_numbers) # int foo(int x) noexcept + self.assertIn(8, line_numbers) # if (x > 0) { + self.assertIn(9, line_numbers) # return x + 1; + self.assertIn(11, line_numbers) # return -x; + finally: + os.unlink(f.name) + + def test_nonexistent_file(self): + line_numbers, count = _count_instrumentable_lines("/nonexistent/file.cpp") + self.assertEqual(count, 0) + self.assertEqual(line_numbers, []) + + +class CoveredSourcesFromLcovTest(unittest.TestCase): + def test_extracts_sf_entries(self): + lcov = textwrap.dedent("""\ + SF:/workspace/src/a.cpp + DA:1,5 + DA:2,0 + LF:2 + LH:1 + end_of_record + SF:/workspace/src/b.cpp + DA:1,3 + LF:1 + LH:1 + end_of_record + """) + sources = _covered_sources_from_lcov(lcov) + self.assertIn(os.path.realpath("/workspace/src/a.cpp"), sources) + self.assertIn(os.path.realpath("/workspace/src/b.cpp"), sources) + self.assertEqual(len(sources), 2) + + def test_empty_lcov(self): + self.assertEqual(_covered_sources_from_lcov(""), set()) + + +class FindUntestedSourcesTest(unittest.TestCase): + def test_filters_covered_and_nonexistent(self): + with tempfile.TemporaryDirectory() as ws: + src_a = Path(ws) / "src" / "a.cpp" + src_b = Path(ws) / "src" / "b.cpp" + src_a.parent.mkdir(parents=True) + src_a.write_text("int a() { return 1; }\n") + src_b.write_text("int b() { return 2; }\n") + + manifest = Path(ws) / "manifest.txt" + manifest.write_text("src/a.cpp\nsrc/b.cpp\nsrc/gone.cpp\n") + + covered = {str(src_a.resolve())} + result = _find_untested_sources(manifest, ws, covered, []) + self.assertEqual(len(result), 1) + self.assertIn(str(src_b.resolve()), result) + + def test_respects_filter_regexes(self): + with tempfile.TemporaryDirectory() as ws: + src = Path(ws) / "generated" / "foo.cpp" + src.parent.mkdir() + src.write_text("int foo() { return 0; }\n") + + manifest = Path(ws) / "manifest.txt" + manifest.write_text("generated/foo.cpp\n") + + result = _find_untested_sources(manifest, ws, set(), ["generated/"]) + self.assertEqual(result, []) + + def test_rejects_path_traversal(self): + with tempfile.TemporaryDirectory() as ws: + outside = Path(ws).parent / "outside.cpp" + outside.write_text("int secret() { return 42; }\n") + + manifest = Path(ws) / "manifest.txt" + manifest.write_text(f"../{outside.name}\n") + + try: + result = _find_untested_sources(manifest, ws, set(), []) + self.assertEqual(result, []) + finally: + outside.unlink(missing_ok=True) + + +class AppendZeroCoverageLcovTest(unittest.TestCase): + def test_appends_records_with_lh_zero(self): + with tempfile.TemporaryDirectory() as ws: + src = Path(ws) / "untested.cpp" + src.write_text(textwrap.dedent("""\ + #include "untested.h" + int foo() { + return 42; + } + """)) + lcov = "SF:/other.cpp\nDA:1,5\nLF:1\nLH:1\nend_of_record\n" + result = _append_zero_coverage_lcov(lcov, [str(src)], ws) + + self.assertIn(f"SF:{src}", result) + self.assertIn("LH:0", result) + self.assertIn("end_of_record", result) + lines = result.split("\n") + sf_lines = [l for l in lines if l.startswith("SF:")] + self.assertEqual(len(sf_lines), 2) + + def test_empty_untested_returns_original(self): + lcov = "SF:/a.cpp\nend_of_record\n" + self.assertEqual(_append_zero_coverage_lcov(lcov, [], "/ws"), lcov) + + +class EscapeHtmlTest(unittest.TestCase): + def test_escapes_all_special_chars(self): + self.assertIn("&", _escape_html("a & b")) + self.assertIn("<", _escape_html("")) + self.assertIn(">", _escape_html("")) + self.assertIn("'", _escape_html("it's")) + self.assertIn(""", _escape_html('"quoted"')) + + +class AugmentTextSummaryTest(unittest.TestCase): + def test_rewrites_totals_line(self): + with tempfile.TemporaryDirectory() as ws: + src = Path(ws) / "untested.cpp" + src.write_text("int foo() {\n return 42;\n}\n") + + summary = textwrap.dedent("""\ + Filename Functions Lines Branches + --- --- --- --- + /ws/tested.cpp 2 0 100.00% 10 0 100.00% 4 0 100.00% + --- --- --- --- + TOTAL 2 0 100.00% 10 0 100.00% 4 0 100.00% + """) + result = _augment_text_summary(summary, [str(src)]) + self.assertNotIn("100.00%", result.split("\n")[-2]) + self.assertIn("TOTAL", result) + + def test_fallback_banner_on_missing_header(self): + with tempfile.TemporaryDirectory() as ws: + src = Path(ws) / "untested.cpp" + src.write_text("int foo() { return 1; }\n") + + summary = "TOTAL 2 0 100.00% 10 0 100.00%\n" + result = _augment_text_summary(summary, [str(src)]) + self.assertIn("[score-coverage]", result) + + +if __name__ == "__main__": + unittest.main() From e7bb8e744911ac814c08d9a096c76449ab30dbfa Mon Sep 17 00:00:00 2001 From: "Emrich Oliver (ETAS)" Date: Tue, 23 Jun 2026 10:36:14 +0200 Subject: [PATCH 05/11] Add reporter_lib py_library and fix testonly manifest dep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add py_library target for reporter so unit tests can import it. - Remove coverable_test from instrumented_sources manifest targets to avoid testonly dependency violation (test sources don't need to appear in the manifest — they're tested by definition). --- coverage/BUILD.bazel | 9 ++++++++- tests/coverage/BUILD.bazel | 3 +-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/coverage/BUILD.bazel b/coverage/BUILD.bazel index 9d59cdb..cb8caa9 100644 --- a/coverage/BUILD.bazel +++ b/coverage/BUILD.bazel @@ -11,7 +11,7 @@ # SPDX-License-Identifier: Apache-2.0 # ******************************************************************************* -load("@rules_python//python:defs.bzl", "py_binary") +load("@rules_python//python:defs.bzl", "py_binary", "py_library") load("@rules_shell//shell:sh_binary.bzl", "sh_binary") package(default_visibility = ["//visibility:public"]) @@ -46,6 +46,13 @@ py_binary( # letting each consumer pick its own llvm_toolchain repository name and # version (see score_coverage_reporter in coverage/defs.bzl). # --------------------------------------------------------------------------- +py_library( + name = "reporter_lib", + srcs = ["reporter.py"], + imports = [".."], + deps = ["@rules_python//python/runfiles"], +) + py_binary( name = "reporter", srcs = ["reporter.py"], diff --git a/tests/coverage/BUILD.bazel b/tests/coverage/BUILD.bazel index 88047e5..e9d8df5 100644 --- a/tests/coverage/BUILD.bazel +++ b/tests/coverage/BUILD.bazel @@ -66,14 +66,13 @@ score_instrumented_sources_manifest( targets = [ ":coverable", ":uncovered", - ":coverable_test", ], ) py_test( name = "reporter_test", srcs = ["reporter_test.py"], - deps = ["@score_cpp_policies//coverage:reporter"], + deps = ["@score_cpp_policies//coverage:reporter_lib"], ) score_coverage_reporter( From b7f31176181b2563ff68ce97a14d53d5c9b7ce5a Mon Sep 17 00:00:00 2001 From: "Emrich Oliver (ETAS)" Date: Tue, 23 Jun 2026 11:10:34 +0200 Subject: [PATCH 06/11] Replace TOTALS rewrite with honest banner, add HTML top-banner The heuristic line count (_count_instrumentable_lines) cannot replicate what llvm-cov would report for actually-instrumented objects. Rewriting TOTALS with approximate numbers gives false precision. - _augment_text_summary: no longer rewrites the TOTALS line; appends a clearly-labelled WARNING banner with ~N estimated lines instead. - _inject_untested_section_into_index: injects a prominent banner right after so it is the first thing reviewers see. Detail table uses ~N notation and includes a disclaimer about the heuristic. - _append_zero_coverage_lcov docstring documents the approximation. - Tests updated to match banner-only behavior. --- coverage/reporter.py | 143 +++++++++++++------------------- tests/coverage/reporter_test.py | 16 ++-- 2 files changed, 66 insertions(+), 93 deletions(-) diff --git a/coverage/reporter.py b/coverage/reporter.py index 50badd6..054663e 100644 --- a/coverage/reporter.py +++ b/coverage/reporter.py @@ -456,7 +456,13 @@ def _count_instrumentable_lines(path: str) -> Tuple[List[int], int]: def _append_zero_coverage_lcov( lcov_text: str, untested_sources: List[str], workspace_root: str ) -> str: - """Append minimal zero-coverage LCOV records for each untested source.""" + """Append synthetic zero-coverage LCOV records for each untested source. + + LF/DA values are heuristic estimates from _count_instrumentable_lines, + not actual llvm-cov instrumentation data. The counts approximate what + llvm-cov would report but may differ for templates, inlined constructors, + multi-line statements, initializer lists, and lambdas. + """ blocks: List[str] = [] for abs_path in untested_sources: line_numbers, lf = _count_instrumentable_lines(abs_path) @@ -478,87 +484,26 @@ def _append_zero_coverage_lcov( def _augment_text_summary(summary_text: str, untested_sources: List[str]) -> str: - """Re-compute the TOTALS line in the llvm-cov report summary. - - llvm-cov report --summary-only emits a table ending with a TOTALS row: - - TOTAL ... ... - - We parse the existing totals, add the untested file line counts, and - rewrite the TOTALS row so that summary.txt and the CI console reflect the - augmented numbers. + """Append a banner to the llvm-cov report summary for untested files. + + The TOTALS line from llvm-cov is left untouched because the heuristic + line count (_count_instrumentable_lines) is only an approximation — it + cannot replicate what llvm-cov would report for actually-instrumented + object files. Rewriting TOTALS with approximate numbers would give a + false sense of precision. Instead we append a clearly-labelled banner + so that CI consumers and reviewers see the gap without mistaking an + estimate for an exact measurement. """ extra_lines_found = 0 for abs_path in untested_sources: _, lf = _count_instrumentable_lines(abs_path) extra_lines_found += lf - if extra_lines_found == 0: - return summary_text - - lines = summary_text.splitlines(keepends=True) - totals_idx = None - for i in range(len(lines) - 1, -1, -1): - if lines[i].strip().startswith("TOTAL"): - totals_idx = i - break - - if totals_idx is None: - banner = ( - f"\n[score-coverage] {len(untested_sources)} file(s) not linked " - f"into any test ({extra_lines_found} lines at 0% coverage) — " - f"not reflected in the TOTALS above.\n" - ) - return summary_text + banner - - totals_line = lines[totals_idx] - - # Determine which numeric triple corresponds to "Lines" by inspecting the - # column header (the line immediately above TOTAL). llvm-cov report emits - # headers like "Filename Function Line Branch ...". We find the - # column position of "Line" in the header and match it to the correct - # numeric group in the TOTALS row. - lines_group_idx = None - if totals_idx > 0: - header_line = lines[totals_idx - 1] - header_cols = re.finditer(r"\b(Regions?|Functions?|Lines?|Branches?)\b", header_line) - for col_idx, col_match in enumerate(header_cols): - if col_match.group().startswith("Line"): - lines_group_idx = col_idx - break - - pct_groups = list(re.finditer(r"(\d+)(\s+)(\d+)(\s+)([\d.]+%)", totals_line)) - - if lines_group_idx is not None and lines_group_idx < len(pct_groups): - m = pct_groups[lines_group_idx] - try: - old_lf = int(m.group(1)) - old_lm = int(m.group(3)) - new_lf = old_lf + extra_lines_found - new_lm = old_lm + extra_lines_found - new_pct = ((new_lf - new_lm) / new_lf * 100) if new_lf > 0 else 0 - - old_lf_str = m.group(1) - old_lm_str = m.group(3) - old_pct_str = m.group(5) - new_lf_str = str(new_lf).rjust(len(old_lf_str)) - new_lm_str = str(new_lm).rjust(len(old_lm_str)) - new_pct_str = f"{new_pct:.2f}%".rjust(len(old_pct_str)) - - replacement = ( - new_lf_str + m.group(2) + new_lm_str + m.group(4) + new_pct_str - ) - lines[totals_idx] = ( - totals_line[:m.start()] + replacement + totals_line[m.end():] - ) - return "".join(lines) - except (ValueError, IndexError): - pass - banner = ( - f"\n[score-coverage] {len(untested_sources)} file(s) not linked " - f"into any test ({extra_lines_found} lines at 0% coverage) — " - f"not reflected in the TOTALS above.\n" + f"\n[score-coverage] WARNING: {len(untested_sources)} source file(s) " + f"not linked into any test (~{extra_lines_found} instrumentable lines, " + f"estimated via heuristic). These files are absent from the TOTALS above " + f"and contribute 0% coverage. See lcov.dat and the HTML report for details.\n" ) return summary_text + banner @@ -657,39 +602,65 @@ def _escape_html(text: str) -> str: def _inject_untested_section_into_index( index_file: Path, entries: List[Tuple[str, str, int]] ) -> None: - """Insert a banner + table of untested files into the llvm-cov index page.""" + """Insert a top-banner and detail table for untested files into the index. + + The banner is injected right after so it is the first thing a + reviewer sees. It explicitly labels the line count as a heuristic + estimate to avoid false precision. The detail table with per-file links + is appended before . + """ if not index_file.exists(): return content = index_file.read_text(encoding="utf-8") + total_estimated_lines = sum(n for _, _, n in entries) + + top_banner = ( + "
" + f"⚠️ {len(entries)} source file(s) not linked into " + f"any test (~{total_estimated_lines} instrumentable lines, " + "estimated via heuristic). The coverage percentages above do " + "not include these files. See the " + "detail table below." + "
" + ) + rows = [] for rel_source, href, num_lines in entries: rows.append( "" f"
{_escape_html(rel_source)}
" - f"
  0.00% (0/{num_lines})
" + f"
  0.00% (0/~{num_lines})
" "
Not linked into any test
" "" ) - section = ( - "
" - f"{len(entries)} file(s) not linked into any test " - "(counted as 0% coverage)." - "
" + detail_section = ( + "" "

Not Linked Into Tests

" + "

Line counts are heuristic " + "estimates (non-blank, non-comment, non-directive lines). Actual " + "instrumentable line counts may differ from what llvm-cov would report.

" "" - "" + "" "" f"{''.join(rows)}
FilenameLine CoverageLine Coverage (est.)Note
" ) + if "" in content: + content = content.replace("", "" + top_banner, 1) + elif "") + 1 + content = content[:body_end] + top_banner + content[body_end:] + else: + content = top_banner + content + if "" in content: - content = content.replace("", section + "", 1) + content = content.replace("", detail_section + "", 1) else: - content += section + content += detail_section index_file.write_text(content, encoding="utf-8") diff --git a/tests/coverage/reporter_test.py b/tests/coverage/reporter_test.py index 634bbd0..3a27479 100644 --- a/tests/coverage/reporter_test.py +++ b/tests/coverage/reporter_test.py @@ -214,7 +214,7 @@ def test_escapes_all_special_chars(self): class AugmentTextSummaryTest(unittest.TestCase): - def test_rewrites_totals_line(self): + def test_appends_banner_without_modifying_totals(self): with tempfile.TemporaryDirectory() as ws: src = Path(ws) / "untested.cpp" src.write_text("int foo() {\n return 42;\n}\n") @@ -222,22 +222,24 @@ def test_rewrites_totals_line(self): summary = textwrap.dedent("""\ Filename Functions Lines Branches --- --- --- --- - /ws/tested.cpp 2 0 100.00% 10 0 100.00% 4 0 100.00% - --- --- --- --- TOTAL 2 0 100.00% 10 0 100.00% 4 0 100.00% """) result = _augment_text_summary(summary, [str(src)]) - self.assertNotIn("100.00%", result.split("\n")[-2]) - self.assertIn("TOTAL", result) + self.assertIn("[score-coverage]", result) + self.assertIn("WARNING", result) + self.assertIn("estimated via heuristic", result) + totals_line = [l for l in result.splitlines() if "TOTAL" in l and "score-coverage" not in l][0] + self.assertIn("100.00%", totals_line) - def test_fallback_banner_on_missing_header(self): + def test_banner_contains_file_count_and_line_estimate(self): with tempfile.TemporaryDirectory() as ws: src = Path(ws) / "untested.cpp" src.write_text("int foo() { return 1; }\n") summary = "TOTAL 2 0 100.00% 10 0 100.00%\n" result = _augment_text_summary(summary, [str(src)]) - self.assertIn("[score-coverage]", result) + self.assertIn("1 source file(s)", result) + self.assertIn("~1 instrumentable lines", result) if __name__ == "__main__": From d762cc5cd9fe6ce5920c0f246303662f71a698ee Mon Sep 17 00:00:00 2001 From: "Emrich Oliver (ETAS)" Date: Fri, 26 Jun 2026 10:30:59 +0200 Subject: [PATCH 07/11] Replace genrule script generation with Starlark rule in score_coverage_reporter The previous implementation used a genrule + heredoc to generate the reporter wrapper shell script. This required three-level escaping (\\$$) to handle Bazel make-variable and bash variable expansion in the same string which makes the code fragile and hard to debug. Replace with a custom Starlark rule (`_score_coverage_reporter_rule`) that uses `ctx.actions.write()` to produce the wrapper script directly. A new `_rlocation_path()` helper computes `Runfiles.Rlocation()`-compatible paths from File objects, eliminating all genrule make-variable gymnastics. The `--workspace_root` argument (previously computed via `readlink -f` in bash) is replaced by `--module_bazel` (a plain rlocation string). The reporter resolves it with `r.Rlocation()` and derives the workspace root in Python. The generated wrapper is now a readable 7-line script with no escaping. `rules_shell` is no longer a dependency of defs.bzl. Addresses review comment: https://github.com/eclipse-score/score_cpp_policies/pull/9#discussion_r3477218608 --- coverage/defs.bzl | 164 ++++++++++++++++++++++++++----------------- coverage/reporter.py | 11 ++- 2 files changed, 107 insertions(+), 68 deletions(-) diff --git a/coverage/defs.bzl b/coverage/defs.bzl index 3039243..a2ae3ae 100644 --- a/coverage/defs.bzl +++ b/coverage/defs.bzl @@ -44,7 +44,6 @@ and from the consumer .bazelrc: coverage --coverage_report_generator=//tools/coverage:reporter_wrapper """ -load("@rules_shell//shell:sh_binary.bzl", "sh_binary") _BASELINE_REGEX = "@score_cpp_policies//coverage:filter_regexes.txt" _REPORTER = "@score_cpp_policies//coverage:reporter" @@ -142,6 +141,98 @@ target to score_coverage_reporter(instrumented_sources_manifest = ...) so the reporter can add 0%-coverage entries for files that no test linked against.""", ) +def _rlocation_path(ctx, file): + """Return the Runfiles.Rlocation()-compatible path for a Bazel File. + + External-repo files have short_path = "../repo/path" — strip the "../". + Main-workspace files have short_path = "pkg/file" — prepend workspace name. + """ + if file.short_path.startswith("../"): + return file.short_path[3:] + return ctx.workspace_name + "/" + file.short_path + +# Template for the thin wrapper script generated per consumer. +# Uses %s substitution so bash $-variables are never touched by Starlark. +_WRAPPER_TEMPLATE = """\ +#!/usr/bin/env bash +set -euo pipefail +if [[ -z "${RUNFILES_DIR:-}" || ! -d "${RUNFILES_DIR}" ]]; then + RUNFILES_DIR="$(cd "$(dirname "$0")" && pwd)/$(basename "$0").runfiles" +fi +exec "${RUNFILES_DIR}/%s" \\ + --filter_regexes="%s" \\ + --module_bazel="%s" \\ + --llvm_cov="%s" \\ + --llvm_profdata="%s" \\ +%s "$@" +""" + + +def _score_coverage_reporter_impl(ctx): + reporter_rloc = _rlocation_path(ctx, ctx.executable._reporter) + filter_rloc = _rlocation_path(ctx, ctx.file.filter_regexes) + module_bazel_rloc = _rlocation_path(ctx, ctx.file.module_bazel) + llvm_cov_rloc = _rlocation_path(ctx, ctx.file.llvm_cov) + llvm_profdata_rloc = _rlocation_path(ctx, ctx.file.llvm_profdata) + + manifest_line = "" + if ctx.file.instrumented_sources_manifest: + manifest_rloc = _rlocation_path(ctx, ctx.file.instrumented_sources_manifest) + manifest_line = ( + " --instrumented_sources_manifest=\"%s\" \\\n" % manifest_rloc + ) + + wrapper = ctx.actions.declare_file(ctx.label.name + ".sh") + ctx.actions.write( + output = wrapper, + content = _WRAPPER_TEMPLATE % ( + reporter_rloc, + filter_rloc, + module_bazel_rloc, + llvm_cov_rloc, + llvm_profdata_rloc, + manifest_line, + ), + is_executable = True, + ) + + runfiles_files = [ + ctx.file.filter_regexes, + ctx.file.module_bazel, + ctx.file.llvm_cov, + ctx.file.llvm_profdata, + ] + if ctx.file.instrumented_sources_manifest: + runfiles_files.append(ctx.file.instrumented_sources_manifest) + + runfiles = ctx.runfiles(files = runfiles_files).merge( + ctx.attr._reporter[DefaultInfo].default_runfiles, + ) + + return [DefaultInfo(executable = wrapper, runfiles = runfiles)] + + +_score_coverage_reporter_rule = rule( + implementation = _score_coverage_reporter_impl, + executable = True, + attrs = { + "llvm_cov": attr.label(mandatory = True, allow_single_file = True), + "llvm_profdata": attr.label(mandatory = True, allow_single_file = True), + "filter_regexes": attr.label(mandatory = True, allow_single_file = True), + "module_bazel": attr.label(mandatory = True, allow_single_file = True), + "instrumented_sources_manifest": attr.label( + allow_single_file = True, + default = None, + ), + "_reporter": attr.label( + default = Label(_REPORTER), + executable = True, + cfg = "exec", + ), + }, +) + + def score_coverage_reporter( name, llvm_cov, @@ -170,23 +261,14 @@ def score_coverage_reporter( every file in the manifest that did not appear in the llvm-cov report (i.e. files that no test linked against). - **kwargs: Forwarded to the underlying sh_binary (e.g. visibility, tags). + **kwargs: Forwarded to the underlying rule (e.g. visibility, tags). """ extra_regex_files = extra_regex_files or [] merged_name = name + "_merged_filter_regexes" merged_out = merged_name + ".txt" - wrapper_gen_name = name + "_wrapper_gen" - wrapper_out = name + ".sh" - - manifest_srcs = [instrumented_sources_manifest] if instrumented_sources_manifest else [] - manifest_flag_line = ( - " --instrumented_sources_manifest=\"$(rlocationpath %s)\" \\\\\n" % instrumented_sources_manifest - if instrumented_sources_manifest else "" - ) # Concatenate baseline regexes + consumer extras into a single file. - # Order is irrelevant for llvm-cov; it treats them as a set. native.genrule( name = merged_name, srcs = [_BASELINE_REGEX] + list(extra_regex_files), @@ -194,60 +276,12 @@ def score_coverage_reporter( cmd = "cat $(SRCS) > $@", ) - # Generate the wrapper shell script. It computes the consumer workspace - # root from the runfiles location of //:MODULE.bazel and then execs the - # shared reporter binary with the merged regex file, workspace root, and - # consumer-supplied llvm tool rlocation paths. - # - # Escaping note: this genrule uses an unquoted heredoc (`<< EOF`) so the - # shell would normally expand $... — we escape each `$` we want literal - # in the output script as `\\$$`: - # * `$$` is Bazel's escape for a literal `$`. - # * `\` then makes the heredoc treat that `$` as literal. - # `$(rlocationpath ...)` IS a Bazel make-variable and is intentionally - # expanded at genrule time so the actual rlocation path is baked into - # the script. - native.genrule( - name = wrapper_gen_name, - srcs = [ - ":" + merged_name, - "//:MODULE.bazel", - llvm_cov, - llvm_profdata, - ] + manifest_srcs, - outs = [wrapper_out], - tools = [_REPORTER], - cmd = ("""cat > $@ << EOF -#!/usr/bin/env bash -set -euo pipefail -_SELF_DIR="\\$$(cd "\\$$(dirname "\\$$0")" && pwd)" -_SELF_NAME="\\$$(basename "\\$$0")" -if [[ -z "\\$${RUNFILES_DIR:-}" || ! -d "\\$${RUNFILES_DIR}" ]]; then - if [[ -d "\\$${_SELF_DIR}/\\$${_SELF_NAME}.runfiles" ]]; then - export RUNFILES_DIR="\\$${_SELF_DIR}/\\$${_SELF_NAME}.runfiles" - fi -fi -WORKSPACE_ROOT="\\$$(cd "\\$$(dirname "\\$$(readlink -f "\\$${RUNFILES_DIR}/$(rlocationpath //:MODULE.bazel)")")" && pwd)/" -exec "\\$${RUNFILES_DIR}/$(rlocationpath %s)" \\\\ - --filter_regexes="$(rlocationpath :%s)" \\\\ - --workspace_root="\\$${WORKSPACE_ROOT}" \\\\ - --llvm_cov="$(rlocationpath %s)" \\\\ - --llvm_profdata="$(rlocationpath %s)" \\\\ -%s "\\$$@" -EOF -chmod +x $@ -""" % (_REPORTER, merged_name, llvm_cov, llvm_profdata, manifest_flag_line)), - ) - - sh_binary( + _score_coverage_reporter_rule( name = name, - srcs = [":" + wrapper_gen_name], - data = [ - ":" + merged_name, - _REPORTER, - "//:MODULE.bazel", - llvm_cov, - llvm_profdata, - ] + manifest_srcs, + llvm_cov = llvm_cov, + llvm_profdata = llvm_profdata, + filter_regexes = ":" + merged_name, + module_bazel = "//:MODULE.bazel", + instrumented_sources_manifest = instrumented_sources_manifest, **kwargs ) diff --git a/coverage/reporter.py b/coverage/reporter.py index 054663e..e031d60 100644 --- a/coverage/reporter.py +++ b/coverage/reporter.py @@ -83,7 +83,11 @@ def main() -> None: # Get filter regexes and workspace root. filter_regexes = load_filter_regexes(r, args.filter_regexes) - workspace_root = args.workspace_root + module_bazel_resolved = r.Rlocation(args.module_bazel) + if not module_bazel_resolved: + print(f"ERROR: MODULE.bazel not found in runfiles via {args.module_bazel}", file=sys.stderr) + sys.exit(1) + workspace_root = str(Path(module_bazel_resolved).parent) + "/" common_show_args = { "llvm_bin_path": llvm_bin_path, @@ -672,8 +676,9 @@ def parse_args() -> argparse.Namespace: parser.add_argument("--reports_file", type=Path, required=True) parser.add_argument("--filter_regexes", type=str, required=True, help="Rlocation path to the filter regexes file") - parser.add_argument("--workspace_root", type=str, required=True, - help="Real workspace root path for source path mapping") + parser.add_argument("--module_bazel", type=str, required=True, + help="Rlocation path of the consumer MODULE.bazel; " + "its parent directory is used as the workspace root") parser.add_argument("--llvm_cov", type=str, required=True, help="Rlocation path of the llvm-cov binary") parser.add_argument("--llvm_profdata", type=str, required=True, From 5b1117ef3e886ffb26318956ef7d0ef4ef7c8efc Mon Sep 17 00:00:00 2001 From: "Emrich Oliver (ETAS)" Date: Thu, 2 Jul 2026 09:45:04 +0200 Subject: [PATCH 08/11] Fix untested-source files not surfacing under sandboxed execution score_instrumented_sources_manifest only emitted the manifest .txt listing source paths, never propagating the actual source Files as runfiles. Under the linux-sandbox strategy the reporter therefore couldn't find those files on disk, so _find_untested_sources() silently dropped every manifest entry and 0%-coverage files never appeared in the report. Even after fixing runfiles propagation, a second bug remained: _find_untested_sources() resolved each manifest path and rejected it if it fell outside workspace_root. Runfiles are frequently symlinks that legitimately resolve to the real on-disk source tree outside the sandboxed workspace_root, so this check rejected every legitimate entry as a false-positive path traversal. - _instrumented_sources_manifest_impl: return runfiles for the source Files (not just the manifest file) via DefaultInfo, and expose the InstrumentedSourcesInfo provider. - _score_coverage_reporter_impl: merge the manifest target's default_runfiles into the wrapper so the source files are present on disk when the reporter runs sandboxed. - _find_untested_sources: reject path traversal based on the raw manifest-relative path (no ".." components) instead of resolve()-then-containment, so legitimate runfiles symlinks are no longer dropped. Returns (abs_path, rel_path) pairs so callers can display the clean manifest-relative name instead of the resolved on-disk path. - _augment_html_with_untested: use the manifest-relative name for display instead of recomputing it from the resolved absolute path, which leaked the full host filesystem path once symlinks were followed. - Add regression test covering manifest entries reached only through symlinks. --- coverage/defs.bzl | 26 +++++++++++++++++-- coverage/reporter.py | 45 ++++++++++++++++++++------------- tests/coverage/reporter_test.py | 24 +++++++++++++++++- 3 files changed, 75 insertions(+), 20 deletions(-) diff --git a/coverage/defs.bzl b/coverage/defs.bzl index a2ae3ae..21f1347 100644 --- a/coverage/defs.bzl +++ b/coverage/defs.bzl @@ -115,7 +115,8 @@ def _instrumented_sources_manifest_impl(ctx): for t in ctx.attr.targets if InstrumentedSourcesInfo in t ] - files = depset(transitive = transitive).to_list() + sources = depset(transitive = transitive) + files = sources.to_list() # Deduplicate (Starlark has no ordered set type) and sort for determinism. paths = sorted({f.short_path: None for f in files}.keys()) @@ -123,7 +124,18 @@ def _instrumented_sources_manifest_impl(ctx): out = ctx.actions.declare_file(ctx.label.name + ".txt") content = "\n".join(paths) + ("\n" if paths else "") ctx.actions.write(output = out, content = content) - return [DefaultInfo(files = depset([out]))] + + # The manifest text file only lists paths - the reporter also needs the + # actual source files present on disk (as runfiles) so it can read them + # under sandboxing. Expose them via default_runfiles so consumers that + # depend on this target (e.g. score_coverage_reporter) can merge them in. + return [ + DefaultInfo( + files = depset([out]), + runfiles = ctx.runfiles(transitive_files = sources), + ), + InstrumentedSourcesInfo(sources = sources), + ] score_instrumented_sources_manifest = rule( implementation = _instrumented_sources_manifest_impl, @@ -209,6 +221,16 @@ def _score_coverage_reporter_impl(ctx): ctx.attr._reporter[DefaultInfo].default_runfiles, ) + # Merge in the actual instrumented source files (not just the manifest + # .txt listing their paths) so the reporter can find them on disk when + # the coverage-report-generator action runs sandboxed. Without this, + # _find_untested_sources() silently drops every manifest entry because + # the workspace-relative path does not resolve to an existing file. + if ctx.attr.instrumented_sources_manifest: + runfiles = runfiles.merge( + ctx.attr.instrumented_sources_manifest[DefaultInfo].default_runfiles, + ) + return [DefaultInfo(executable = wrapper, runfiles = runfiles)] diff --git a/coverage/reporter.py b/coverage/reporter.py index e031d60..4d01602 100644 --- a/coverage/reporter.py +++ b/coverage/reporter.py @@ -29,7 +29,7 @@ import subprocess import sys import zipfile -from pathlib import Path +from pathlib import Path, PurePosixPath from typing import List, Set, Tuple from python.runfiles import Runfiles @@ -114,6 +114,7 @@ def main() -> None: lcov_text = lcov_result.stdout # Augment with 0%-coverage entries for files that no test linked against. + untested_pairs: List[Tuple[str, str]] = [] untested_sources: List[str] = [] if args.instrumented_sources_manifest: manifest_path = r.Rlocation(args.instrumented_sources_manifest) @@ -125,12 +126,13 @@ def main() -> None: ) else: covered = _covered_sources_from_lcov(lcov_text) - untested_sources = _find_untested_sources( + untested_pairs = _find_untested_sources( manifest_path=Path(manifest_path), workspace_root=workspace_root, covered_sources=covered, filter_regexes=sorted(filter_regexes), ) + untested_sources = [abs_path for abs_path, _rel in untested_pairs] if untested_sources: print( f"INFO: Augmenting report with {len(untested_sources)} " @@ -145,11 +147,10 @@ def main() -> None: f.write(lcov_text) # Augment the HTML report with 0%-coverage pages for untested files. - if untested_sources: + if untested_pairs: _augment_html_with_untested( html_report_dir=html_report_dir, - untested_sources=untested_sources, - workspace_root=workspace_root, + untested_sources=untested_pairs, ) # Generate text summary. @@ -379,39 +380,51 @@ def _find_untested_sources( workspace_root: str, covered_sources: Set[str], filter_regexes: List[str], -) -> List[str]: +) -> List[Tuple[str, str]]: """Read the manifest and return entries not present in covered_sources. + Returns a list of (resolved_absolute_path, manifest_relative_path) pairs, + sorted by absolute path. The manifest-relative path is kept alongside the + resolved path so callers can display a clean workspace-relative name + instead of the fully-resolved on-disk path (see _augment_html_with_untested). + Manifest entries are workspace-relative paths. Filter regexes from the consumer are applied so that the same exclusions that affect llvm-cov also affect the synthesized entries. Entries that do not resolve to an existing file on disk are dropped silently (typically generated files or stale manifest content). + + Path traversal is rejected based on the manifest-relative path itself + (rejecting ".." components), not by resolving symlinks and checking + containment: manifest entries are runfiles, which are frequently symlinks + that legitimately resolve outside workspace_root (e.g. to the real + on-disk source tree when Bazel runs this sandboxed). Resolving first and + then checking containment would silently drop every such file. """ ws = Path(workspace_root) compiled_filters = [re.compile(r) for r in filter_regexes if r] - ws_resolved = ws.resolve() - untested: List[str] = [] + untested: List[Tuple[str, str]] = [] seen: Set[str] = set() raw = manifest_path.read_text(encoding="utf-8") for entry in raw.splitlines(): rel = entry.strip() if not rel: continue - abs_path = (ws / rel).resolve() - if not abs_path.is_relative_to(ws_resolved): + rel_path = PurePosixPath(rel) + if rel_path.is_absolute() or ".." in rel_path.parts: continue + abs_path = ws / rel if not abs_path.exists() or not abs_path.is_file(): continue - abs_str = str(abs_path) + abs_str = str(abs_path.resolve()) if abs_str in covered_sources or abs_str in seen: continue if any(rx.search(abs_str) or rx.search(rel) for rx in compiled_filters): continue seen.add(abs_str) - untested.append(abs_str) - return sorted(untested) + untested.append((abs_str, rel)) + return sorted(untested, key=lambda pair: pair[0]) _NON_EXECUTABLE_RE = re.compile( @@ -539,8 +552,7 @@ def _augment_text_summary(summary_text: str, untested_sources: List[str]) -> str def _augment_html_with_untested( html_report_dir: Path, - untested_sources: List[str], - workspace_root: str, + untested_sources: List[Tuple[str, str]], ) -> None: """Create per-file HTML pages for untested sources and link them from index. @@ -557,8 +569,7 @@ def _augment_html_with_untested( output_root = coverage_subdir if coverage_subdir.exists() else html_report_dir entries: List[Tuple[str, str, int]] = [] # (rel_source, href, num_lines) - for abs_path in untested_sources: - rel_source = os.path.relpath(abs_path, workspace_root) + for abs_path, rel_source in untested_sources: # Mirror llvm-cov: per-source HTML lives at /.html # Strip the leading "/" so that the path joins under output_root. target_html = output_root / (abs_path.lstrip("/") + ".html") diff --git a/tests/coverage/reporter_test.py b/tests/coverage/reporter_test.py index 3a27479..5409747 100644 --- a/tests/coverage/reporter_test.py +++ b/tests/coverage/reporter_test.py @@ -150,7 +150,7 @@ def test_filters_covered_and_nonexistent(self): covered = {str(src_a.resolve())} result = _find_untested_sources(manifest, ws, covered, []) self.assertEqual(len(result), 1) - self.assertIn(str(src_b.resolve()), result) + self.assertEqual(result[0], (str(src_b.resolve()), "src/b.cpp")) def test_respects_filter_regexes(self): with tempfile.TemporaryDirectory() as ws: @@ -178,6 +178,28 @@ def test_rejects_path_traversal(self): finally: outside.unlink(missing_ok=True) + def test_finds_sources_reached_only_through_symlinks(self): + """Regression test: manifest entries are often runfiles symlinks that + resolve outside workspace_root (e.g. to the real on-disk source tree + when Bazel runs the reporter sandboxed). These must still be found - + see the docstring on _find_untested_sources for the historical bug + this guards against. + """ + with tempfile.TemporaryDirectory() as real_dir: + real_src = Path(real_dir) / "real.cpp" + real_src.write_text("int real() { return 1; }\n") + + with tempfile.TemporaryDirectory() as ws: + linked_src = Path(ws) / "src" / "linked.cpp" + linked_src.parent.mkdir(parents=True) + linked_src.symlink_to(real_src) + + manifest = Path(ws) / "manifest.txt" + manifest.write_text("src/linked.cpp\n") + + result = _find_untested_sources(manifest, ws, set(), []) + self.assertEqual(result, [(str(real_src.resolve()), "src/linked.cpp")]) + class AppendZeroCoverageLcovTest(unittest.TestCase): def test_appends_records_with_lh_zero(self): From efee93867bf00b879a1a382f26ca3a1abdede9ea Mon Sep 17 00:00:00 2001 From: "Emrich Oliver (ETAS)" Date: Thu, 2 Jul 2026 16:08:07 +0200 Subject: [PATCH 09/11] Fix broken file links in coverage report caused by unresolved runfiles symlink Clicking a file in the HTML coverage report, or opening a source referenced in lcov.dat (IDE coverage gutters, SonarQube, ...), resulted in file-not-found. The links pointed into a path that no longer existed once the coverage-report-generator action's sandbox was torn down. Root cause: workspace_root was computed by taking the parent of Runfiles.Rlocation(MODULE.bazel) directly. Under linux-sandbox, Rlocation() returns a path inside the runfiles tree, which is itself a symlink into the reporter action's own (ephemeral) sandbox - not the real workspace. Since workspace_root feeds --path-equivalence for both llvm-cov show and llvm-cov export, every SF: entry and every per-file HTML link for normal (tested) source files was built from that dead sandbox path (e.g. .../reporter_wrapper.sh.runfiles/_main/...) instead of the real, stable workspace directory. Extract the computation into _resolve_workspace_root() and resolve the symlink before taking its parent. Add a regression test that reproduces the runfiles-symlink layout. --- coverage/reporter.py | 17 ++++++++++++++++- tests/coverage/reporter_test.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/coverage/reporter.py b/coverage/reporter.py index 4d01602..8e6c9b9 100644 --- a/coverage/reporter.py +++ b/coverage/reporter.py @@ -87,7 +87,7 @@ def main() -> None: if not module_bazel_resolved: print(f"ERROR: MODULE.bazel not found in runfiles via {args.module_bazel}", file=sys.stderr) sys.exit(1) - workspace_root = str(Path(module_bazel_resolved).parent) + "/" + workspace_root = _resolve_workspace_root(module_bazel_resolved) common_show_args = { "llvm_bin_path": llvm_bin_path, @@ -366,6 +366,21 @@ def create_zip(root: Path, directories: List[Path], output_file: Path) -> None: zf.write(file_path, arcname) +def _resolve_workspace_root(module_bazel_resolved: str) -> str: + """Return the real workspace root directory for a resolved MODULE.bazel rlocation. + + Rlocation() returns the runfiles-tree path, which is a symlink into the + current action's sandbox when running under linux-sandbox. Taking its + parent without resolving the symlink yields an ephemeral path buried + inside this action's own sandbox (e.g. + .../reporter_wrapper.sh.runfiles/_main/) that stops existing once the + action finishes - any SF: entry or HTML link built from it points + nowhere once the report is extracted and used. Resolving first yields + the real, stable workspace directory instead. + """ + return str(Path(module_bazel_resolved).resolve().parent) + "/" + + def _covered_sources_from_lcov(lcov_text: str) -> Set[str]: """Return the set of absolute source paths that appear in an LCOV report.""" sources: Set[str] = set() diff --git a/tests/coverage/reporter_test.py b/tests/coverage/reporter_test.py index 5409747..0681f7a 100644 --- a/tests/coverage/reporter_test.py +++ b/tests/coverage/reporter_test.py @@ -32,6 +32,7 @@ _escape_html, _find_untested_sources, _is_likely_executable, + _resolve_workspace_root, ) @@ -226,6 +227,35 @@ def test_empty_untested_returns_original(self): self.assertEqual(_append_zero_coverage_lcov(lcov, [], "/ws"), lcov) +class ResolveWorkspaceRootTest(unittest.TestCase): + def test_plain_path_returns_parent_with_trailing_slash(self): + with tempfile.TemporaryDirectory() as ws: + module_bazel = Path(ws) / "MODULE.bazel" + module_bazel.write_text("") + self.assertEqual(_resolve_workspace_root(str(module_bazel)), f"{ws}/") + + def test_resolves_runfiles_symlink_to_real_workspace(self): + """Regression test: Rlocation() returns a runfiles-tree path, which is + a symlink into the current action's sandbox under linux-sandbox. The + parent of that symlink is an ephemeral sandbox path that stops + existing once the action finishes; SF: entries and HTML links built + from it point nowhere in the extracted report. This must resolve to + the real, stable workspace directory instead. + """ + with tempfile.TemporaryDirectory() as real_ws: + real_module_bazel = Path(real_ws) / "MODULE.bazel" + real_module_bazel.write_text("") + + with tempfile.TemporaryDirectory() as sandbox: + linked_module_bazel = Path(sandbox) / "runfiles" / "_main" / "MODULE.bazel" + linked_module_bazel.parent.mkdir(parents=True) + linked_module_bazel.symlink_to(real_module_bazel) + + self.assertEqual( + _resolve_workspace_root(str(linked_module_bazel)), f"{real_ws}/" + ) + + class EscapeHtmlTest(unittest.TestCase): def test_escapes_all_special_chars(self): self.assertIn("&", _escape_html("a & b")) From 40a68f2acc348d8598f8eb4acbd9eb877a913d1c Mon Sep 17 00:00:00 2001 From: "Emrich Oliver (ETAS)" Date: Thu, 2 Jul 2026 16:39:27 +0200 Subject: [PATCH 10/11] Add estimated combined line coverage banner Sums LH/LF across the final LCOV report (real + synthetic 0% records) to surface a combined coverage estimate in both the text summary and HTML index, clearly labeled as a heuristic. --- coverage/README.md | 6 ++++- coverage/reporter.py | 48 +++++++++++++++++++++++++++------ tests/coverage/reporter_test.py | 44 ++++++++++++++++++++++++++++-- 3 files changed, 87 insertions(+), 11 deletions(-) diff --git a/coverage/README.md b/coverage/README.md index 4033770..f5e5d87 100644 --- a/coverage/README.md +++ b/coverage/README.md @@ -156,7 +156,11 @@ score_coverage_reporter( Anything in the manifest that the llvm-cov export does not already cover (and that survives the configured `--ignore-filename-regex` set) is added as a synthetic 0%-coverage record to the LCOV file and gets a per-file -HTML page plus a "Not Linked Into Tests" section on the report index. +HTML page plus a "Not Linked Into Tests" section on the report index. The +banner also reports an estimated combined line coverage percentage across +tested and untested files, clearly labelled as an estimate since the +untested files' line counts come from a heuristic, not real instrumentation +data. ## 6. (Optional) Set up justifications diff --git a/coverage/reporter.py b/coverage/reporter.py index 8e6c9b9..67fe67a 100644 --- a/coverage/reporter.py +++ b/coverage/reporter.py @@ -151,6 +151,7 @@ def main() -> None: _augment_html_with_untested( html_report_dir=html_report_dir, untested_sources=untested_pairs, + lcov_text=lcov_text, ) # Generate text summary. @@ -163,7 +164,7 @@ def main() -> None: ) summary_text = summary.stdout if untested_sources: - summary_text = _augment_text_summary(summary_text, untested_sources) + summary_text = _augment_text_summary(summary_text, untested_sources, lcov_text) with open(text_report_dir / "summary.txt", "w", encoding="utf-8") as f: f.write(summary_text) print(summary_text, file=sys.stderr) @@ -390,6 +391,23 @@ def _covered_sources_from_lcov(lcov_text: str) -> Set[str]: return sources +def _lcov_totals(lcov_text: str) -> Tuple[int, int]: + """Return (total_LH, total_LF) summed across every record in an LCOV report. + + Intended to be called on the final, already-augmented LCOV text (real + llvm-cov records plus the synthetic 0%-coverage records for untested + files), so the result reflects combined line coverage across both. + """ + total_lh = 0 + total_lf = 0 + for line in lcov_text.splitlines(): + if line.startswith("LH:"): + total_lh += int(line[3:].strip()) + elif line.startswith("LF:"): + total_lf += int(line[3:].strip()) + return total_lh, total_lf + + def _find_untested_sources( manifest_path: Path, workspace_root: str, @@ -515,7 +533,7 @@ def _append_zero_coverage_lcov( return lcov_text + sep + "".join(blocks) -def _augment_text_summary(summary_text: str, untested_sources: List[str]) -> str: +def _augment_text_summary(summary_text: str, untested_sources: List[str], lcov_text: str) -> str: """Append a banner to the llvm-cov report summary for untested files. The TOTALS line from llvm-cov is left untouched because the heuristic @@ -525,17 +543,26 @@ def _augment_text_summary(summary_text: str, untested_sources: List[str]) -> str false sense of precision. Instead we append a clearly-labelled banner so that CI consumers and reviewers see the gap without mistaking an estimate for an exact measurement. + + The combined percentage below is derived from lcov_text (the final, + already-augmented LCOV report), so it shares the same estimate for the + untested files' totals as the rest of the banner. """ extra_lines_found = 0 for abs_path in untested_sources: _, lf = _count_instrumentable_lines(abs_path) extra_lines_found += lf + total_lh, total_lf = _lcov_totals(lcov_text) + combined_pct = (100.0 * total_lh / total_lf) if total_lf else 0.0 + banner = ( f"\n[score-coverage] WARNING: {len(untested_sources)} source file(s) " f"not linked into any test (~{extra_lines_found} instrumentable lines, " f"estimated via heuristic). These files are absent from the TOTALS above " f"and contribute 0% coverage. See lcov.dat and the HTML report for details.\n" + f"[score-coverage] Estimated combined line coverage (incl. untested " + f"files): ~{combined_pct:.2f}% ({total_lh}/{total_lf} lines).\n" ) return summary_text + banner @@ -568,6 +595,7 @@ def _augment_text_summary(summary_text: str, untested_sources: List[str]) -> str def _augment_html_with_untested( html_report_dir: Path, untested_sources: List[Tuple[str, str]], + lcov_text: str, ) -> None: """Create per-file HTML pages for untested sources and link them from index. @@ -616,7 +644,9 @@ def _augment_html_with_untested( if not entries: return - _inject_untested_section_into_index(html_report_dir / "index.html", entries) + total_lh, total_lf = _lcov_totals(lcov_text) + combined_pct = (100.0 * total_lh / total_lf) if total_lf else 0.0 + _inject_untested_section_into_index(html_report_dir / "index.html", entries, combined_pct) def _escape_html(text: str) -> str: @@ -630,14 +660,14 @@ def _escape_html(text: str) -> str: def _inject_untested_section_into_index( - index_file: Path, entries: List[Tuple[str, str, int]] + index_file: Path, entries: List[Tuple[str, str, int]], combined_pct: float ) -> None: """Insert a top-banner and detail table for untested files into the index. The banner is injected right after so it is the first thing a - reviewer sees. It explicitly labels the line count as a heuristic - estimate to avoid false precision. The detail table with per-file links - is appended before . + reviewer sees. It explicitly labels the line count and combined + percentage as heuristic estimates to avoid false precision. The detail + table with per-file links is appended before . """ if not index_file.exists(): return @@ -653,7 +683,9 @@ def _inject_untested_section_into_index( f"any test (~{total_estimated_lines} instrumentable lines, " "estimated via heuristic). The coverage percentages above do " "not include these files. See the " - "detail table below." + "detail table below.
" + f"Estimated combined line coverage (incl. untested files): " + f"~{combined_pct:.2f}%." "" ) diff --git a/tests/coverage/reporter_test.py b/tests/coverage/reporter_test.py index 0681f7a..a645a2a 100644 --- a/tests/coverage/reporter_test.py +++ b/tests/coverage/reporter_test.py @@ -32,6 +32,7 @@ _escape_html, _find_untested_sources, _is_likely_executable, + _lcov_totals, _resolve_workspace_root, ) @@ -136,6 +137,27 @@ def test_empty_lcov(self): self.assertEqual(_covered_sources_from_lcov(""), set()) +class LcovTotalsTest(unittest.TestCase): + def test_sums_lh_and_lf_across_records(self): + lcov = textwrap.dedent("""\ + SF:/workspace/src/a.cpp + DA:1,5 + DA:2,0 + LF:2 + LH:1 + end_of_record + SF:/workspace/src/b.cpp + DA:1,3 + LF:1 + LH:1 + end_of_record + """) + self.assertEqual(_lcov_totals(lcov), (2, 3)) + + def test_empty_lcov(self): + self.assertEqual(_lcov_totals(""), (0, 0)) + + class FindUntestedSourcesTest(unittest.TestCase): def test_filters_covered_and_nonexistent(self): with tempfile.TemporaryDirectory() as ws: @@ -276,7 +298,8 @@ def test_appends_banner_without_modifying_totals(self): --- --- --- --- TOTAL 2 0 100.00% 10 0 100.00% 4 0 100.00% """) - result = _augment_text_summary(summary, [str(src)]) + lcov_text = "SF:/other.cpp\nDA:1,5\nLF:10\nLH:10\nend_of_record\n" + result = _augment_text_summary(summary, [str(src)], lcov_text) self.assertIn("[score-coverage]", result) self.assertIn("WARNING", result) self.assertIn("estimated via heuristic", result) @@ -289,10 +312,27 @@ def test_banner_contains_file_count_and_line_estimate(self): src.write_text("int foo() { return 1; }\n") summary = "TOTAL 2 0 100.00% 10 0 100.00%\n" - result = _augment_text_summary(summary, [str(src)]) + lcov_text = "SF:/other.cpp\nDA:1,5\nLF:10\nLH:10\nend_of_record\n" + result = _augment_text_summary(summary, [str(src)], lcov_text) self.assertIn("1 source file(s)", result) self.assertIn("~1 instrumentable lines", result) + def test_banner_contains_combined_percentage_from_lcov_totals(self): + with tempfile.TemporaryDirectory() as ws: + src = Path(ws) / "untested.cpp" + src.write_text("int foo() { return 1; }\n") + + summary = "TOTAL 2 0 100.00% 10 0 100.00%\n" + # Combined: 8 lines hit out of (8 real + 2 synthetic) = 80.00%. + lcov_text = ( + "SF:/other.cpp\nDA:1,5\nLF:8\nLH:8\nend_of_record\n" + f"SF:{src}\nDA:1,0\nDA:2,0\nLF:2\nLH:0\nend_of_record\n" + ) + result = _augment_text_summary(summary, [str(src)], lcov_text) + self.assertIn("Estimated combined line coverage", result) + self.assertIn("~80.00%", result) + self.assertIn("(8/10 lines)", result) + if __name__ == "__main__": unittest.main() From 736779bb174f00c397a21f3a175ce1127497fe8c Mon Sep 17 00:00:00 2001 From: "Emrich Oliver (ETAS)" Date: Fri, 3 Jul 2026 12:25:41 +0200 Subject: [PATCH 11/11] Render untested-source pages with llvm-cov's own line-by-line structure The synthetic "Not Linked Into Tests" pages dumped the whole file into a single
 block, so they never used the line-number/uncovered-line
classes style.css actually styles, next to a real llvm-cov page they looked broken/unstyled even though style.css loaded fine. Render one row per line instead, matching llvm-cov's own markup, and link control.js so keyboard navigation works on these pages too.
---
 coverage/reporter.py            | 49 ++++++++++++++++++++++++---------
 tests/coverage/reporter_test.py | 33 ++++++++++++++++++++++
 2 files changed, 69 insertions(+), 13 deletions(-)

diff --git a/coverage/reporter.py b/coverage/reporter.py
index 67fe67a..d9a40b7 100644
--- a/coverage/reporter.py
+++ b/coverage/reporter.py
@@ -572,26 +572,47 @@ def _augment_text_summary(summary_text: str, untested_sources: List[str], lcov_t
 
   
   
+  
   {title}
 
 
 

Coverage Report

-

{title}

Not linked into any test. This source file is reachable from the configured coverage targets but no test binary instruments it, so every line is reported as uncovered.

- - -
-{body}
-
+
+
{title}
+ +{rows} +
Line
Count
Source
""" +def _render_untested_rows(source_text: str) -> str: + """Render one llvm-cov-style table row per source line, all marked uncovered. + + Mirrors llvm-cov's real per-source HTML (line-number gutter + a + `td.uncovered-line` status cell) so these synthetic pages pick up the same + `style.css` rules and `control.js` keyboard navigation as genuine llvm-cov + pages, instead of rendering as an unstyled wall of text. + """ + if not source_text: + return "
(empty file)
" + lines = source_text.split("\n") + if source_text.endswith("\n"): + lines = lines[:-1] + return "\n".join( + f"
{i}
" + f"" + f"
{_escape_html(line)}
" + for i, line in enumerate(lines, start=1) + ) + + def _augment_html_with_untested( html_report_dir: Path, untested_sources: List[Tuple[str, str]], @@ -599,11 +620,11 @@ def _augment_html_with_untested( ) -> None: """Create per-file HTML pages for untested sources and link them from index. - The pages are intentionally minimal: llvm-cov's per-source HTML format is - not easily reproducible without the full coverage mapping, so we render a - plain source dump with a banner that explains the file was not exercised. - The index page gets a new "Not Linked Into Tests" section listing the - files at 0% coverage so the gap is visible to reviewers. + The pages reuse llvm-cov's own line-number/uncovered-line row structure + (see `_render_untested_rows`) so they render with the same style.css rules + as genuine llvm-cov pages, with a banner explaining the file was not + exercised. The index page gets a new "Not Linked Into Tests" section + listing the files at 0% coverage so the gap is visible to reviewers. """ if not html_report_dir.exists(): return @@ -629,12 +650,14 @@ def _augment_html_with_untested( rel_to_root = os.path.relpath(html_report_dir, target_html.parent) css_path = (Path(rel_to_root) / "style.css").as_posix() - body = _escape_html(source_text) or "(empty file)" + js_path = (Path(rel_to_root) / "control.js").as_posix() + rows = _render_untested_rows(source_text) html = ( _UNTESTED_HTML_TEMPLATE .replace("{css_path}", css_path) + .replace("{js_path}", js_path) .replace("{title}", _escape_html(rel_source)) - .replace("{body}", body) + .replace("{rows}", rows) ) target_html.write_text(html, encoding="utf-8") diff --git a/tests/coverage/reporter_test.py b/tests/coverage/reporter_test.py index a645a2a..6bb5c39 100644 --- a/tests/coverage/reporter_test.py +++ b/tests/coverage/reporter_test.py @@ -33,6 +33,7 @@ _find_untested_sources, _is_likely_executable, _lcov_totals, + _render_untested_rows, _resolve_workspace_root, ) @@ -287,6 +288,38 @@ def test_escapes_all_special_chars(self): self.assertIn(""", _escape_html('"quoted"')) +class RenderUntestedRowsTest(unittest.TestCase): + """Regression test for synthetic untested-file pages looking unstyled. + + llvm-cov's own per-source pages render one per line with a + 'line-number' and 'uncovered-line'/'covered-line' cell, which is what + style.css actually has rules for. The original implementation dumped the + whole file into one
 block, which loaded style.css successfully but
+    used none of its classes - so the page looked broken/unstyled next to a
+    genuine llvm-cov page. This locks in the line-per-row structure instead.
+    """
+
+    def test_one_row_per_line_with_line_number_and_uncovered_class(self):
+        rows = _render_untested_rows("int foo() {\n    return 1;\n}\n")
+        self.assertEqual(rows.count(""), 3)
+        self.assertIn("class='line-number'", rows)
+        self.assertIn("class='uncovered-line'", rows)
+        self.assertIn(">1<", rows)
+        self.assertIn(">2<", rows)
+        self.assertIn(">3<", rows)
+
+    def test_escapes_source_content(self):
+        rows = _render_untested_rows("a < b && c > d\n")
+        self.assertIn("<", rows)
+        self.assertIn(">", rows)
+        self.assertIn("&", rows)
+
+    def test_empty_file_renders_placeholder_row(self):
+        rows = _render_untested_rows("")
+        self.assertIn("(empty file)", rows)
+        self.assertEqual(rows.count(""), 1)
+
+
 class AugmentTextSummaryTest(unittest.TestCase):
     def test_appends_banner_without_modifying_totals(self):
         with tempfile.TemporaryDirectory() as ws: