diff --git a/.gitignore b/.gitignore index 1eeaf4d..9184d1c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,10 @@ bazel-* .bazel-* +# Generated coverage reports (output from `bazel run @score_cpp_policies//coverage:generate_coverage_html`) +cpp_coverage/ +*/cpp_coverage/ + # IDE files .vscode/ .idea/ diff --git a/MODULE.bazel b/MODULE.bazel index dcb06f4..b4e6299 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -17,3 +17,26 @@ bazel_dep(name = "bazel_skylib", version = "1.8.2") bazel_dep(name = "platforms", version = "0.0.10") bazel_dep(name = "rules_cc", version = "0.2.17") bazel_dep(name = "aspect_rules_lint", version = "2.5.0") + +# --------------------------------------------------------------------------- +# Coverage tooling (//coverage/...). Hosts the merger/reporter/justify/ +# effective_coverage Python binaries and the shell driver. Consumers do not +# need rules_python themselves to use //coverage:reporter — it is wired in +# here once for the whole policies module. +# --------------------------------------------------------------------------- +bazel_dep(name = "rules_python", version = "1.8.5") +bazel_dep(name = "rules_shell", version = "0.3.0") + +python = use_extension("@rules_python//python/extensions:python.bzl", "python") +python.toolchain( + python_version = "3.12", + is_default = True, +) + +pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip") +pip.parse( + hub_name = "score_cpp_policies_pip", + python_version = "3.12", + requirements_lock = "//coverage:requirements_lock.txt", +) +use_repo(pip, "score_cpp_policies_pip") diff --git a/README.md b/README.md index a22246d..703b35a 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ # score_cpp_policies Centralized C++ quality tool policies for Eclipse S-CORE, providing sanitizer -configurations and clang-tidy integration reusable across all S-CORE modules -(logging, communication, baselibs, etc.). +configurations, clang-tidy integration and source-based code coverage +reusable across all S-CORE modules (logging, communication, baselibs, etc.). -Planned: clang-format, code coverage policies. +Planned: clang-format. ## What This Provides @@ -15,6 +15,10 @@ Planned: clang-format, code coverage policies. - **Constraint system** — `target_compatible_with` settings for sanitizer-incompatible targets - **`clang_tidy/.clang-tidy`** — centralized default check set (conservative baseline, tailorable per module) - **`clang_tidy/clang_tidy.bazelrc`** — `--config=clang-tidy` bazelrc config consumers can import +- **`//coverage:reporter` + `score_coverage_reporter` macro** — llvm-cov source-based +coverage with a shared baseline of ignore regexes (test/mock/fake/external), pluggable +per-module extensions and an effective-coverage justification post-processor. +See [`coverage/README.md`](coverage/README.md) for the seven-step adoption guide. ## Available Sanitizer Configurations @@ -54,6 +58,10 @@ bazel_dep(name = "score_cpp_policies") Copy [`sanitizers/sanitizers.bazelrc`](sanitizers/sanitizers.bazelrc) into your repository's `.bazelrc`. +### Configure Coverage + +See [`coverage/README.md`](coverage/README.md). + ### Run Tests ```bash diff --git a/coverage/BUILD.bazel b/coverage/BUILD.bazel new file mode 100644 index 0000000..cb8caa9 --- /dev/null +++ b/coverage/BUILD.bazel @@ -0,0 +1,97 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +load("@rules_python//python:defs.bzl", "py_binary", "py_library") +load("@rules_shell//shell:sh_binary.bzl", "sh_binary") + +package(default_visibility = ["//visibility:public"]) + +# Files consumers may reference directly (e.g. .bazelrc imports, custom macros). +exports_files([ + "coverage.bazelrc", + "filter_regexes.txt", + "generate_coverage_html.sh", +]) + +# --------------------------------------------------------------------------- +# Per-test coverage merger. +# Bazel calls this as --coverage_output_generator once per test; it takes +# profraw files + the source-file manifest, merges them with llvm-profdata +# and packages everything (profdata + object-file list) into a zip that the +# reporter later aggregates. +# --------------------------------------------------------------------------- +py_binary( + name = "merger", + srcs = ["merger.py"], +) + +# --------------------------------------------------------------------------- +# Final coverage reporter (HTML / LCOV / text). +# Invoked indirectly via the per-consumer wrapper produced by +# `score_coverage_reporter` (see defs.bzl). Reads per-test zips, merges all +# profdata, then calls llvm-cov to render the combined report. +# +# Note: this target intentionally does NOT carry the llvm-cov / llvm-profdata +# binaries in its `data` — the consumer-side wrapper supplies them by label, +# letting each consumer pick its own llvm_toolchain repository name and +# version (see score_coverage_reporter in coverage/defs.bzl). +# --------------------------------------------------------------------------- +py_library( + name = "reporter_lib", + srcs = ["reporter.py"], + imports = [".."], + deps = ["@rules_python//python/runfiles"], +) + +py_binary( + name = "reporter", + srcs = ["reporter.py"], + deps = ["@rules_python//python/runfiles"], +) + +# --------------------------------------------------------------------------- +# Coverage justification processor. +# Reads a YAML database of justifications + scans source for COV_JUSTIFIED +# markers and emits a manifest of (file, line) -> justification. +# --------------------------------------------------------------------------- +py_binary( + name = "justify", + srcs = ["justify.py"], + deps = [ + "@score_cpp_policies_pip//pyyaml", + ], +) + +# --------------------------------------------------------------------------- +# Effective coverage calculator + HTML post-processor. +# Takes the llvm-cov HTML report and the resolved justification manifest; +# rewrites the HTML to highlight justified lines and emits effective +# coverage statistics. +# --------------------------------------------------------------------------- +py_binary( + name = "effective_coverage", + srcs = ["effective_coverage.py"], +) + +# --------------------------------------------------------------------------- +# Generic post-`bazel coverage` driver. Consumers invoke this via +# `bazel run @score_cpp_policies//coverage:generate_coverage_html -- [flags]`. +# --------------------------------------------------------------------------- +sh_binary( + name = "generate_coverage_html", + srcs = ["generate_coverage_html.sh"], + data = [ + ":justify", + ":effective_coverage", + ], +) diff --git a/coverage/README.md b/coverage/README.md new file mode 100644 index 0000000..f5e5d87 --- /dev/null +++ b/coverage/README.md @@ -0,0 +1,241 @@ +# Coverage — adoption guide + +Centralized C++ source-based coverage tooling for Eclipse S-CORE modules, +built on `llvm-cov` source-based coverage. This package provides: + +| Component | What it does | +|---|---| +| `:merger` (py_binary) | Per-test profraw → profdata + object-file packaging. Wired as `--coverage_output_generator` by `coverage.bazelrc`. | +| `:reporter` (py_binary) | Final aggregation: profdata merge + llvm-cov HTML / LCOV / text. Invoked by the per-consumer wrapper produced by `score_coverage_reporter`. | +| `:justify` (py_binary) | Reads a YAML database + `COV_JUSTIFIED` source markers and emits a manifest of justified lines/branches. | +| `:effective_coverage` (py_binary) | Post-processes the llvm-cov HTML to highlight justified lines and compute effective coverage. | +| `:generate_coverage_html` (sh_binary) | One-shot driver: unzip Bazel coverage output, run justification, optional CI archive. | +| `defs.bzl :: score_coverage_reporter` | Macro consumers call to wire the report generator with their own filter regex extensions and llvm tools. | +| `coverage.bazelrc` | Generic `coverage` flags consumers import from their own `.bazelrc`. | +| `filter_regexes.txt` | Baseline `--ignore-filename-regex` set (tests, mocks, fakes, benchmarks, external/). | + +--- + +## Prerequisites + +Your repository must already have: + +1. **A Bzlmod setup** (`MODULE.bazel`). +2. **An `@llvm_toolchain`-style toolchain registered** through + `toolchains_llvm` (or any other source that produces `:llvm-cov` and + `:llvm-profdata` targets). The repository name does *not* have to be + `llvm_toolchain` — you pass the labels to the macro. +3. **A coverage-instrumented C++ toolchain** that matches the `@llvm_toolchain` + above (set via `--extra_toolchains` in your `.bazelrc`). + +--- + +## 1. Depend on `score_cpp_policies` + +```python +# MODULE.bazel +bazel_dep(name = "score_cpp_policies", version = "") +``` + +`rules_python`, `rules_shell` and the `pyyaml` pip hub are pulled in +transitively — you do **not** need to declare them yourself. + +> ⚠️ Add one line to your **root** `BUILD` / `BUILD.bazel` so the macro can +> rlocation-resolve the consumer workspace root at runtime: +> +> ```python +> exports_files(["MODULE.bazel"]) +> ``` + +## 2. Import the generic bazelrc + +```bazelrc +# .bazelrc +import %workspace%/../external/+_repo_rules+score_cpp_policies/coverage/coverage.bazelrc +``` + +Or, more portably, vendor a one-line `coverage.bazelrc` in your repo: + +```bazelrc +# .bazelrc +try-import %workspace%/coverage.bazelrc +``` + +```bazelrc +# coverage.bazelrc (vendored) +import %workspace%/external/+_repo_rules+score_cpp_policies/coverage/coverage.bazelrc +``` + +If your build uses a `local_path_override`, refer to the file by its repo +root path. (The recommended pattern is to copy the file's `import` lines +into your project's `.bazelrc` — there are no hidden flags.) + +## 3. Set your instrumentation filter + +`coverage.bazelrc` deliberately leaves `--instrumentation_filter` empty +because it is module-specific. Add one line in **your** `.bazelrc`: + +```bazelrc +coverage --instrumentation_filter="^//[/:]" +``` + +> 💡 Use `[/:]` (not just `/`) so the top-level package itself +> (e.g. `//mymod:lib`) is included, not just subpackages. + +## 4. Create your reporter wrapper + +Create a small BUILD file (e.g. `tools/coverage/BUILD.bazel`): + +```python +load("@score_cpp_policies//coverage:defs.bzl", "score_coverage_reporter") + +score_coverage_reporter( + name = "reporter_wrapper", + llvm_cov = "@llvm_toolchain//:llvm-cov", + llvm_profdata = "@llvm_toolchain//:llvm-profdata", + # OPTIONAL: extend the baseline ignore regexes with project-specific patterns. + extra_regex_files = [":coverage_filter_regexes.txt"], + visibility = ["//visibility:public"], +) + +exports_files(["coverage_filter_regexes.txt"]) +``` + +Example `tools/coverage/coverage_filter_regexes.txt`: + +```text +# Project-specific exclusions on top of the S-CORE baseline. +.*/generated/.* +.*/proto/.*\.pb\.(h|cc)$ +``` + +## 5. Point Bazel at your wrapper + +```bazelrc +# .bazelrc +coverage --coverage_report_generator=//tools/coverage:reporter_wrapper +``` + +## 5a. (Optional) Surface untested files at 0% coverage + +`llvm-cov` only reports source files that are linked into at least one +exercised test. Source files that ship in the project but no test pulls in +will silently disappear from the report — which usually misrepresents +coverage as higher than it actually is. + +To surface those files at 0% coverage, build a manifest of every C/C++ +source reachable from your coverage roots and pass it to the reporter: + +```python +load( + "@score_cpp_policies//coverage:defs.bzl", + "score_coverage_reporter", + "score_instrumented_sources_manifest", +) + +score_instrumented_sources_manifest( + name = "instrumented_sources", + # The aspect walks `deps` (and `srcs`) recursively, so listing the + # top-level library/binary/test targets is enough. + targets = [ + "//mymod:lib", + "//mymod/tests:all_tests", + ], +) + +score_coverage_reporter( + name = "reporter_wrapper", + llvm_cov = "@llvm_toolchain//:llvm-cov", + llvm_profdata = "@llvm_toolchain//:llvm-profdata", + extra_regex_files = [":coverage_filter_regexes.txt"], + instrumented_sources_manifest = ":instrumented_sources", + visibility = ["//visibility:public"], +) +``` + +Anything in the manifest that the llvm-cov export does not already cover +(and that survives the configured `--ignore-filename-regex` set) is added +as a synthetic 0%-coverage record to the LCOV file and gets a per-file +HTML page plus a "Not Linked Into Tests" section on the report index. The +banner also reports an estimated combined line coverage percentage across +tested and untested files, clearly labelled as an estimate since the +untested files' line counts come from a heuristic, not real instrumentation +data. + +## 6. (Optional) Set up justifications + +Create `tools/coverage/coverage_justifications.yaml`: + +```yaml +version: 1 +justifications: + - id: hw-unreachable-on-x86 + category: platform_specific + reason: | + ARM-only error path; cannot be exercised by x86 CI. + locations: + - file: mymod/src/foo.cpp + line_start: 42 + line_end: 47 +``` + +Or annotate code in place: + +```cpp +// One-liner: +return false; // COV_JUSTIFIED hw-unreachable-on-x86 + +// Region: +// COV_JUSTIFIED_START hw-unreachable-on-x86 +if (running_on_arm()) { ... } +// COV_JUSTIFIED_STOP +``` + +Valid categories: `defensive_programming`, `tool_false_positive`, +`platform_specific`, `other`. IDs must be kebab-case. + +## 7. Run it + +```bash +# Collect coverage data. +bazel coverage //... --build_tests_only + +# Build the HTML report + run justifications (if YAML exists) + show summary. +bazel run @score_cpp_policies//coverage:generate_coverage_html -- \ + --yaml tools/coverage/coverage_justifications.yaml +``` + +The HTML report appears at `cpp_coverage/index.html` by default. The +human-readable summary shows raw vs. effective line/branch coverage. + +For CI, you can also produce a zipped archive (HTML + LCOV + JUnit XMLs): + +```bash +bazel run @score_cpp_policies//coverage:generate_coverage_html -- \ + --yaml tools/coverage/coverage_justifications.yaml \ + --archive coverage_artifacts +``` + +--- + +## Customization knobs + +| Need | How | +|---|---| +| Add project-specific ignore regexes | `extra_regex_files = [":"]` on the macro | +| Different llvm version | Register your own `@my_llvm` and pass `llvm_cov = "@my_llvm//:llvm-cov"` | +| Different output directory | `--output-dir ` on `generate_coverage_html` | +| Different effective coverage threshold | `COVERAGE_THRESHOLD=95 bazel run ...:generate_coverage_html ...` | + +## Troubleshooting + +- **`html_report/ not found`** — re-run `bazel coverage` first; the script + only post-processes existing output. +- **Some `.cpp` files missing from the report** — confirm your + `--instrumentation_filter` covers the top-level package using `[/:]` + (not just `/`). +- **Test / mock files appearing in the report** — add a pattern that + matches their path or filename to your `extra_regex_files` entry. +- **`llvm-cov not found in runfiles`** — the macro arg `llvm_cov` must + point to a real binary target in your repo's repo mapping; the + default `@llvm_toolchain//:llvm-cov` requires `use_repo(llvm, "llvm_toolchain")`. diff --git a/coverage/coverage.bazelrc b/coverage/coverage.bazelrc new file mode 100644 index 0000000..e835b80 --- /dev/null +++ b/coverage/coverage.bazelrc @@ -0,0 +1,88 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +# +# Generic coverage configuration shipped by @score_cpp_policies. +# +# Consumers import this file from their own .bazelrc with: +# +# import %workspace%/path/to/coverage.bazelrc +# +# This file deliberately does NOT set the following — they are consumer- +# specific and must be configured in the consumer's .bazelrc on top of this +# file: +# +# coverage --instrumentation_filter="^//[/:]" +# coverage --coverage_report_generator=//:reporter_wrapper +# +# The reporter_wrapper target must be created with the +# `score_coverage_reporter` macro from @score_cpp_policies//coverage:defs.bzl +# so that consumer-specific filter regex extensions and the consumer +# workspace root are wired in correctly. +# +# Prerequisites the consumer's MODULE.bazel must satisfy: +# - bazel_dep(name = "score_cpp_policies", version = "...") +# - register an llvm toolchain via the toolchains_llvm extension and pass +# the resulting `:llvm-cov` and `:llvm-profdata` labels to the +# `score_coverage_reporter` macro (the repository name is freely chosen +# by the consumer) + +# --------------------------------------------------------------------------- +# Bazel coverage instrumentation flags. +# --------------------------------------------------------------------------- +# `experimental_use_llvm_covmap` switches Bazel to source-based coverage +# (instr profiles) instead of gcov-style notes. +coverage --experimental_use_llvm_covmap +coverage --experimental_generate_llvm_lcov +coverage --combined_report=lcov +coverage --experimental_fetch_all_coverage_outputs + +# --------------------------------------------------------------------------- +# Custom coverage report generators provided by @score_cpp_policies. +# The merger is consumer-agnostic and is used as-is. +# The reporter_wrapper MUST be defined per-consumer via score_coverage_reporter +# and pointed to with `--coverage_report_generator` in the consumer's .bazelrc. +# --------------------------------------------------------------------------- +coverage --coverage_output_generator=@score_cpp_policies//coverage:merger + +# --------------------------------------------------------------------------- +# Test-time environment. +# --------------------------------------------------------------------------- +# Bazel's default collect_cc_coverage.sh would call gcov; we use llvm-cov, so +# stub out gcov and suppress the auto LCOV conversion (raw profraw is what +# the merger consumes). +coverage --test_env=GENERATE_LLVM_LCOV=0 +coverage --test_env=COVERAGE_GCOV_PATH=/usr/bin/true +# Required so that LLVM writes counters incrementally; without it abnormal +# termination paths report as uncovered even when reached. +coverage --test_env=LLVM_PROFILE_CONTINUOUS_MODE=1 + +# --------------------------------------------------------------------------- +# Compile flags required for accurate llvm-cov data. +# --------------------------------------------------------------------------- +# -O0: no optimization, otherwise line/branch mapping becomes unreliable. +coverage --cxxopt=-O0 +# Required for LLVM continuous mode (paired with LLVM_PROFILE_CONTINUOUS_MODE). +coverage --cxxopt=-mllvm +coverage --cxxopt=-runtime-counter-relocation + +# --------------------------------------------------------------------------- +# Disable dynamic libraries — they create per-test .so files whose +# instrumentation can clash with the production .so files. The first object +# loaded wins, leading to flaky / order-dependent coverage gaps. +# --------------------------------------------------------------------------- +coverage --dynamic_mode=off + +# --------------------------------------------------------------------------- +# Always rerun tests in coverage mode (cached results carry no profraw). +# --------------------------------------------------------------------------- +coverage --nocache_test_results diff --git a/coverage/defs.bzl b/coverage/defs.bzl new file mode 100644 index 0000000..21f1347 --- /dev/null +++ b/coverage/defs.bzl @@ -0,0 +1,309 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +"""Public API for the S-CORE centralized coverage report generator. + +Consumers instantiate `score_coverage_reporter` in their own BUILD file to +create the `--coverage_report_generator` target that Bazel will call after +running `bazel coverage`. The macro wires in: + + 1. The S-CORE baseline filter regexes — applied first, on top of which + consumer-specific exclusions (`extra_regex_files`) are appended. + 2. The consumer's MODULE.bazel — used at runtime to resolve the real + workspace root for source path mapping in llvm-cov reports. + 3. The shared reporter binary `@score_cpp_policies//coverage:reporter`, + which performs profdata merge + HTML/LCOV/text report generation. + 4. The consumer-supplied llvm-cov and llvm-profdata binaries — passed by + label so the consumer can pick their own llvm_toolchain version and + repository name. + +Typical usage from a consumer BUILD file: + + load("@score_cpp_policies//coverage:defs.bzl", "score_coverage_reporter") + + score_coverage_reporter( + name = "reporter_wrapper", + llvm_cov = "@llvm_toolchain//:llvm-cov", + llvm_profdata = "@llvm_toolchain//:llvm-profdata", + extra_regex_files = ["coverage_filter_regexes.txt"], + visibility = ["//visibility:public"], + ) + +and from the consumer .bazelrc: + + coverage --coverage_report_generator=//tools/coverage:reporter_wrapper +""" + + +_BASELINE_REGEX = "@score_cpp_policies//coverage:filter_regexes.txt" +_REPORTER = "@score_cpp_policies//coverage:reporter" + +# --------------------------------------------------------------------------- +# Instrumented sources collection. +# +# llvm-cov only reports files whose object files were linked into one of the +# tests it was asked to analyse. Source files that exist in the workspace but +# are not linked into any cc_test (directly or transitively) therefore never +# appear in the coverage report - even though they would normally be +# instrumented under --instrumentation_filter. +# +# To surface those files at 0% coverage we ship: +# +# * _collect_sources_aspect - walks the dependency graph of a target, +# gathers srcs (.cpp/.cc/.cxx/.c/.C) from every cc_library, cc_binary, +# and cc_test it encounters, and aggregates them into +# InstrumentedSourcesInfo. +# * score_instrumented_sources_manifest - applies the aspect to a list of +# consumer-supplied targets and writes a text file with one +# workspace-relative source path per line. +# +# The consumer points score_coverage_reporter at this manifest via the +# optional `instrumented_sources_manifest` attribute. The reporter then +# augments the llvm-cov LCOV + HTML output with synthetic 0%-coverage entries +# for every manifest entry that did not appear in the report. +# --------------------------------------------------------------------------- + +InstrumentedSourcesInfo = provider( + doc = "Aggregate of all C/C++ source files reachable through cc_* targets.", + fields = { + "sources": "depset of File objects (workspace-local C/C++ source files)", + }, +) + +_CC_SRC_EXTS = ("cc", "cpp", "cxx", "c", "C") +_CC_KINDS = ("cc_library", "cc_binary", "cc_test") +_PROPAGATE_ATTRS = ["deps", "srcs", "implementation_deps"] + +def _collect_sources_aspect_impl(target, ctx): + direct = [] + if ctx.rule.kind in _CC_KINDS: + for src in getattr(ctx.rule.attr, "srcs", None) or []: + for f in src.files.to_list(): + if f.extension in _CC_SRC_EXTS and not f.short_path.startswith("../"): + direct.append(f) + + transitive = [] + for attr_name in _PROPAGATE_ATTRS: + for dep in getattr(ctx.rule.attr, attr_name, None) or []: + if InstrumentedSourcesInfo in dep: + transitive.append(dep[InstrumentedSourcesInfo].sources) + + return [InstrumentedSourcesInfo( + sources = depset(direct = direct, transitive = transitive), + )] + +_collect_sources_aspect = aspect( + implementation = _collect_sources_aspect_impl, + attr_aspects = _PROPAGATE_ATTRS, + provides = [InstrumentedSourcesInfo], + doc = "Collect C/C++ source files from cc_* targets reachable via deps/srcs.", +) + +def _instrumented_sources_manifest_impl(ctx): + transitive = [ + t[InstrumentedSourcesInfo].sources + for t in ctx.attr.targets + if InstrumentedSourcesInfo in t + ] + sources = depset(transitive = transitive) + files = sources.to_list() + + # Deduplicate (Starlark has no ordered set type) and sort for determinism. + paths = sorted({f.short_path: None for f in files}.keys()) + + out = ctx.actions.declare_file(ctx.label.name + ".txt") + content = "\n".join(paths) + ("\n" if paths else "") + ctx.actions.write(output = out, content = content) + + # The manifest text file only lists paths - the reporter also needs the + # actual source files present on disk (as runfiles) so it can read them + # under sandboxing. Expose them via default_runfiles so consumers that + # depend on this target (e.g. score_coverage_reporter) can merge them in. + return [ + DefaultInfo( + files = depset([out]), + runfiles = ctx.runfiles(transitive_files = sources), + ), + InstrumentedSourcesInfo(sources = sources), + ] + +score_instrumented_sources_manifest = rule( + implementation = _instrumented_sources_manifest_impl, + attrs = { + "targets": attr.label_list( + aspects = [_collect_sources_aspect], + mandatory = True, + doc = "Targets whose transitive cc_* source files should be listed.", + ), + }, + doc = """Emit a text manifest of C/C++ source files reachable from `targets`. + +The output is a newline-separated list of workspace-relative paths. Pass this +target to score_coverage_reporter(instrumented_sources_manifest = ...) so the +reporter can add 0%-coverage entries for files that no test linked against.""", +) + +def _rlocation_path(ctx, file): + """Return the Runfiles.Rlocation()-compatible path for a Bazel File. + + External-repo files have short_path = "../repo/path" — strip the "../". + Main-workspace files have short_path = "pkg/file" — prepend workspace name. + """ + if file.short_path.startswith("../"): + return file.short_path[3:] + return ctx.workspace_name + "/" + file.short_path + +# Template for the thin wrapper script generated per consumer. +# Uses %s substitution so bash $-variables are never touched by Starlark. +_WRAPPER_TEMPLATE = """\ +#!/usr/bin/env bash +set -euo pipefail +if [[ -z "${RUNFILES_DIR:-}" || ! -d "${RUNFILES_DIR}" ]]; then + RUNFILES_DIR="$(cd "$(dirname "$0")" && pwd)/$(basename "$0").runfiles" +fi +exec "${RUNFILES_DIR}/%s" \\ + --filter_regexes="%s" \\ + --module_bazel="%s" \\ + --llvm_cov="%s" \\ + --llvm_profdata="%s" \\ +%s "$@" +""" + + +def _score_coverage_reporter_impl(ctx): + reporter_rloc = _rlocation_path(ctx, ctx.executable._reporter) + filter_rloc = _rlocation_path(ctx, ctx.file.filter_regexes) + module_bazel_rloc = _rlocation_path(ctx, ctx.file.module_bazel) + llvm_cov_rloc = _rlocation_path(ctx, ctx.file.llvm_cov) + llvm_profdata_rloc = _rlocation_path(ctx, ctx.file.llvm_profdata) + + manifest_line = "" + if ctx.file.instrumented_sources_manifest: + manifest_rloc = _rlocation_path(ctx, ctx.file.instrumented_sources_manifest) + manifest_line = ( + " --instrumented_sources_manifest=\"%s\" \\\n" % manifest_rloc + ) + + wrapper = ctx.actions.declare_file(ctx.label.name + ".sh") + ctx.actions.write( + output = wrapper, + content = _WRAPPER_TEMPLATE % ( + reporter_rloc, + filter_rloc, + module_bazel_rloc, + llvm_cov_rloc, + llvm_profdata_rloc, + manifest_line, + ), + is_executable = True, + ) + + runfiles_files = [ + ctx.file.filter_regexes, + ctx.file.module_bazel, + ctx.file.llvm_cov, + ctx.file.llvm_profdata, + ] + if ctx.file.instrumented_sources_manifest: + runfiles_files.append(ctx.file.instrumented_sources_manifest) + + runfiles = ctx.runfiles(files = runfiles_files).merge( + ctx.attr._reporter[DefaultInfo].default_runfiles, + ) + + # Merge in the actual instrumented source files (not just the manifest + # .txt listing their paths) so the reporter can find them on disk when + # the coverage-report-generator action runs sandboxed. Without this, + # _find_untested_sources() silently drops every manifest entry because + # the workspace-relative path does not resolve to an existing file. + if ctx.attr.instrumented_sources_manifest: + runfiles = runfiles.merge( + ctx.attr.instrumented_sources_manifest[DefaultInfo].default_runfiles, + ) + + return [DefaultInfo(executable = wrapper, runfiles = runfiles)] + + +_score_coverage_reporter_rule = rule( + implementation = _score_coverage_reporter_impl, + executable = True, + attrs = { + "llvm_cov": attr.label(mandatory = True, allow_single_file = True), + "llvm_profdata": attr.label(mandatory = True, allow_single_file = True), + "filter_regexes": attr.label(mandatory = True, allow_single_file = True), + "module_bazel": attr.label(mandatory = True, allow_single_file = True), + "instrumented_sources_manifest": attr.label( + allow_single_file = True, + default = None, + ), + "_reporter": attr.label( + default = Label(_REPORTER), + executable = True, + cfg = "exec", + ), + }, +) + + +def score_coverage_reporter( + name, + llvm_cov, + llvm_profdata, + extra_regex_files = None, + instrumented_sources_manifest = None, + **kwargs): + """Create a Bazel --coverage_report_generator wrapper for this repository. + + Args: + name: The target name. Reference it as + `--coverage_report_generator=//:` in your + coverage.bazelrc. + llvm_cov: Label of the llvm-cov binary (typically + "@llvm_toolchain//:llvm-cov"). + llvm_profdata: Label of the llvm-profdata binary (typically + "@llvm_toolchain//:llvm-profdata"). + extra_regex_files: Optional list of additional filter-regex file labels + (or strings) to concatenate AFTER the + @score_cpp_policies baseline. Use these to exclude + consumer-specific patterns (e.g. project-only + generator outputs). + instrumented_sources_manifest: Optional label of a + `score_instrumented_sources_manifest` target. When + provided, the reporter adds 0%-coverage entries for + every file in the manifest that did not appear in + the llvm-cov report (i.e. files that no test linked + against). + **kwargs: Forwarded to the underlying rule (e.g. visibility, tags). + """ + extra_regex_files = extra_regex_files or [] + + merged_name = name + "_merged_filter_regexes" + merged_out = merged_name + ".txt" + + # Concatenate baseline regexes + consumer extras into a single file. + native.genrule( + name = merged_name, + srcs = [_BASELINE_REGEX] + list(extra_regex_files), + outs = [merged_out], + cmd = "cat $(SRCS) > $@", + ) + + _score_coverage_reporter_rule( + name = name, + llvm_cov = llvm_cov, + llvm_profdata = llvm_profdata, + filter_regexes = ":" + merged_name, + module_bazel = "//:MODULE.bazel", + instrumented_sources_manifest = instrumented_sources_manifest, + **kwargs + ) diff --git a/coverage/effective_coverage.py b/coverage/effective_coverage.py new file mode 100644 index 0000000..f151e54 --- /dev/null +++ b/coverage/effective_coverage.py @@ -0,0 +1,753 @@ +#!/usr/bin/env python3 +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""Effective coverage calculator and HTML post-processor. + +Takes the llvm-cov HTML report and the resolved justification manifest. +Modifies the HTML to show justified lines in a distinct color (yellow/orange) +and calculates effective coverage metrics. + +Usage: + python effective_coverage.py --html-dir --manifest --output +""" + +import argparse +import json +import os +import re +import sys +from pathlib import Path +from typing import Any, Dict, List, Tuple + + +# Pattern to match a table row in llvm-cov HTML source pages +# Format: ......... +LINE_NUMBER_RE = re.compile(r"") +COVERED_LINE_TD_RE = re.compile(r"") + + +def main() -> None: + """Main entry point.""" + args = parse_args() + + # Load the justification manifest + manifest = load_manifest(args.manifest) + justified_files = manifest.get("justified_files", {}) + + # Find all source HTML files in the report + html_dir = args.html_dir + if not html_dir.exists(): + print(f"ERROR: HTML report directory not found: {html_dir}", file=sys.stderr) + sys.exit(1) + + # Parse raw coverage totals from the index page (matches llvm-cov exactly). + totals = parse_index_page_totals(html_dir) + raw_covered, raw_total = totals["lines"] + raw_branch_covered, raw_branch_total = totals["branches"] + + # Process each source HTML file (restyle justified lines + count them) + total_justified = 0 + total_stale = 0 + total_justified_branches = 0 + applied_justifications: List[Dict[str, Any]] = [] + stale_justifications: List[Dict[str, Any]] = [] + # Track per-file justification counts for index page updates + per_file_stats: Dict[str, Dict[str, int]] = {} + + source_html_files = find_source_html_files(html_dir) + for html_file in source_html_files: + rel_source_path = extract_source_path_from_html(html_file, html_dir) + if not rel_source_path: + continue + + file_justifications = find_matching_justifications( + rel_source_path, justified_files + ) + + file_stats = process_html_file( + html_file, file_justifications, applied_justifications, stale_justifications + ) + + total_justified += file_stats["justified"] + total_stale += file_stats["stale"] + total_justified_branches += file_stats["justified_branches"] + + if file_stats["justified"] > 0 or file_stats["justified_branches"] > 0: + per_file_stats[rel_source_path] = file_stats + + # Calculate stats using llvm-cov's exact numbers + raw_uncovered = raw_total - raw_covered + unjustified_uncovered = raw_uncovered - total_justified + + effective_branch_covered = raw_branch_covered + total_justified_branches + + stats = { + "total_instrumented_lines": raw_total, + "covered_lines": raw_covered, + "justified_lines": total_justified, + "unjustified_uncovered_lines": max(0, unjustified_uncovered), + "stale_justifications": total_stale, + "raw_line_coverage_pct": round(100.0 * raw_covered / raw_total, 2) if raw_total > 0 else 0.0, + "effective_line_coverage_pct": round( + 100.0 * (raw_covered + total_justified) / raw_total, 2 + ) if raw_total > 0 else 0.0, + "total_branches": raw_branch_total, + "covered_branches": raw_branch_covered, + "justified_branches": total_justified_branches, + "raw_branch_coverage_pct": round(100.0 * raw_branch_covered / raw_branch_total, 2) if raw_branch_total > 0 else 0.0, + "effective_branch_coverage_pct": round( + 100.0 * effective_branch_covered / raw_branch_total, 2 + ) if raw_branch_total > 0 else 0.0, + } + + # Inject CSS for justified lines into style.css + inject_justified_css(html_dir) + + # Update the index page with effective coverage info and per-file stats + update_index_page(html_dir, stats, per_file_stats) + + # Write output report + report = { + "version": 1, + "summary": stats, + "applied_justifications": applied_justifications, + "stale_justifications": stale_justifications, + } + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w", encoding="utf-8") as f: + json.dump(report, f, indent=2) + + # Write human-readable summary + summary_path = output_path.parent / "summary.txt" + write_summary(summary_path, stats, stale_justifications) + + # Print summary + print( + f"INFO: Effective line coverage: {stats['effective_line_coverage_pct']}% " + f"(raw: {stats['raw_line_coverage_pct']}%, " + f"justified: {stats['justified_lines']} lines, " + f"unjustified uncovered: {stats['unjustified_uncovered_lines']} lines)", + file=sys.stderr, + ) + if stats['justified_branches'] > 0: + print( + f"INFO: Effective branch coverage: {stats['effective_branch_coverage_pct']}% " + f"(raw: {stats['raw_branch_coverage_pct']}%, " + f"justified: {stats['justified_branches']} branches)", + file=sys.stderr, + ) + if stale_justifications: + print( + f"WARNING: {len(stale_justifications)} stale justifications " + f"(lines are actually covered, justification can be removed)", + file=sys.stderr, + ) + + +def process_html_file( + html_file: Path, + justifications: Dict[int, Dict[str, str]], + applied_justifications: List[Dict[str, Any]], + stale_justifications: List[Dict[str, Any]], +) -> Dict[str, int]: + """Process a single source HTML file. Modifies it in-place. + + Restyles justified lines: changes the count cell to show "J" with justified-line + class, and changes red code regions to justified (orange) background. + Also restyles uncovered branches on justified lines. + Only counts justified/stale lines for the justification report — raw coverage + numbers are taken from the index page to match llvm-cov exactly. + """ + file_stats = { + "justified": 0, + "stale": 0, + "justified_branches": 0, + } + + with open(html_file, "r", encoding="utf-8") as f: + content = f.read() + + if not justifications: + return file_stats + + # Determine effective line status (covered if ANY instantiation covers it) + row_pattern = re.compile( + r"
\d+
" + r"" + ) + line_effective_status: Dict[int, str] = {} + for m in row_pattern.finditer(content): + line_num = int(m.group(1)) + line_class = m.group(2) + if line_class == "covered-line": + line_effective_status[line_num] = "covered" + elif line_class == "uncovered-line": + if line_num not in line_effective_status: + line_effective_status[line_num] = "uncovered" + + # Determine which lines have truly uncovered branches (never covered in any instantiation). + # A branch direction is "truly uncovered" if no instantiation covers it. + branch_check_pattern = re.compile( + r"Branch \(" + r"(\d+:\d+)\):\s*\[(.*?)\]" + ) + covered_branch_dirs_check: Dict[str, set] = {} # branch_id → set of covered directions + uncovered_branch_dirs_check: Dict[str, set] = {} # branch_id → set of uncovered directions + branch_line_map: Dict[str, int] = {} # branch_id → line_num + + for m in branch_check_pattern.finditer(content): + line_num = int(m.group(1)) + branch_id = m.group(2) + branch_content = m.group(3) + branch_line_map[branch_id] = line_num + if branch_id not in covered_branch_dirs_check: + covered_branch_dirs_check[branch_id] = set() + uncovered_branch_dirs_check[branch_id] = set() + for direction in ("True", "False"): + if f"class='None'>{direction}" in branch_content: + covered_branch_dirs_check[branch_id].add(direction) + if f"class='red branch'>{direction}" in branch_content: + uncovered_branch_dirs_check[branch_id].add(direction) + + # Lines with truly uncovered branches (uncovered in ALL instantiations) + lines_with_uncovered_branches: set = set() + for branch_id, uncov_dirs in uncovered_branch_dirs_check.items(): + cov_dirs = covered_branch_dirs_check.get(branch_id, set()) + truly_uncovered = uncov_dirs - cov_dirs + if truly_uncovered: + lines_with_uncovered_branches.add(branch_line_map[branch_id]) + + # Determine which justified lines are stale vs applicable. + # A justification is stale only if the line is covered AND has no uncovered branches. + for line_num, justification in justifications.items(): + status = line_effective_status.get(line_num) + has_uncovered_branches = line_num in lines_with_uncovered_branches + if status == "covered" and not has_uncovered_branches: + file_stats["stale"] += 1 + stale_justifications.append({ + "file": html_file.stem, + "line": line_num, + "id": justification.get("id", ""), + "reason": "Line is already covered and has no uncovered branches — justification is stale", + }) + elif status == "uncovered": + file_stats["justified"] += 1 + applied_justifications.append({ + "file": html_file.stem, + "line": line_num, + "id": justification.get("id", ""), + "category": justification.get("category", ""), + }) + elif status == "covered" and has_uncovered_branches: + # Line is covered but has uncovered branches — justification applies to branches only + applied_justifications.append({ + "file": html_file.stem, + "line": line_num, + "id": justification.get("id", ""), + "category": justification.get("category", ""), + }) + + # Restyle justified lines in the HTML (all occurrences including instantiations). + # Full row pattern to capture and replace the entire row: + # ...
0
...
... + full_row_pattern = re.compile( + r"(
\d+
)" + r"(
)\d+(
)" + r"(
)(.*?)(
)" + ) + + modified = False + + def replace_full_row(match: re.Match) -> str: + nonlocal modified + line_num = int(match.group(2)) + if line_num not in justifications: + return match.group(0) + + justification = justifications[line_num] + reason = justification.get("reason", "").replace("'", "'").replace('"', """) + jid = justification.get("id", "") + tooltip = f"Justified [{jid}]: {reason}" + modified = True + + # Rebuild the row with justified styling: + # 1. Line number td (unchanged) + line_td = match.group(1) + # 2. Count td: change class and show "J" instead of "0" + count_td = f"
J{match.group(4)}"
+        # 3. Code td: replace 'region red' spans with 'region justified'
+        code_start = match.group(5)
+        code_content = match.group(6).replace("class='region red'", "class='region justified'")
+        code_end = match.group(7)
+
+        return line_td + count_td + code_start + code_content + code_end
+
+    new_content = full_row_pattern.sub(replace_full_row, content)
+
+    # Restyle branches on justified lines.
+    # Branch format in expansion-view:
+    # Branch (195:17):
+    #   [True: 0, ...]
+    # We find branches at justified line numbers and restyle red branch → justified branch
+    # Counting: A branch direction is "uncovered" only if ALL instantiations show it as red.
+    # (Same as llvm-cov's logic: covered if ANY instantiation covers it.)
+    branch_pattern = re.compile(
+        r"(Branch \("
+        r"(\d+:\d+)\):\s*\[)(.*?\])"
+    )
+
+    # First pass: determine which branch directions are covered in any instantiation
+    covered_branch_dirs: set = set()  # (line:col, direction) that are covered somewhere
+    for m in branch_pattern.finditer(new_content):
+        line_num = int(m.group(2))
+        if line_num not in justifications:
+            continue
+        branch_id = m.group(3)
+        branch_content = m.group(4)
+        # A direction is covered if it does NOT have 'red branch' class
+        for direction in ("True", "False"):
+            # Check if this direction appears as covered (class='None' means covered)
+            covered_marker = f"class='None'>{direction}"
+            if covered_marker in branch_content:
+                covered_branch_dirs.add((branch_id, direction))
+
+    # Second pass: restyle and count only truly uncovered branch directions
+    justified_branch_ids: set = set()  # Track unique uncovered (line:col, direction) pairs
+
+    def replace_branch(match: re.Match) -> str:
+        nonlocal modified
+        line_num = int(match.group(2))
+        if line_num not in justifications:
+            return match.group(0)
+
+        branch_content = match.group(4)
+        if "class='red branch'" not in branch_content:
+            return match.group(0)
+
+        modified = True
+        branch_id = match.group(3)  # e.g. "68:13"
+
+        # Count unique uncovered branch directions that are NEVER covered in any instantiation
+        for direction in ("True", "False"):
+            if f"class='red branch'>{direction}" in branch_content:
+                uid = (branch_id, direction)
+                if uid not in covered_branch_dirs and uid not in justified_branch_ids:
+                    justified_branch_ids.add(uid)
+                    file_stats["justified_branches"] += 1
+
+        # Restyle: red branch → justified-branch, uncovered-line → justified-line
+        branch_content = branch_content.replace(
+            "class='red branch'", "class='justified-branch'"
+        )
+        branch_content = branch_content.replace(
+            "class='uncovered-line'", "class='justified-line'"
+        )
+        return match.group(1) + branch_content
+
+    new_content = branch_pattern.sub(replace_branch, new_content)
+
+    if modified:
+        with open(html_file, "w", encoding="utf-8") as f:
+            f.write(new_content)
+
+    return file_stats
+
+
+def parse_index_page_totals(html_dir: Path) -> Dict[str, Tuple[int, int]]:
+    """Parse the TOTALS row from the llvm-cov index.html to get exact coverage numbers.
+
+    Returns dict with 'lines' and 'branches' keys, each (covered, total).
+    The TOTALS row in llvm-cov HTML is always the last 
+    (or plain last bold row) and contains exactly 3 coverage cells: func, line, branch.
+    We locate the row by the 'Totals' text anchor and extract the 3 cells from it,
+    rather than relying on positional offset from the full-page match list (which
+    breaks when individual file rows also contain matching percent patterns).
+    """
+    index_file = html_dir / "index.html"
+    if not index_file.exists():
+        return {"lines": (0, 0), "branches": (0, 0)}
+
+    with open(index_file, "r", encoding="utf-8") as f:
+        content = f.read()
+
+    result = {"lines": (0, 0), "branches": (0, 0)}
+
+    # Locate the Totals row: llvm-cov emits "
Totals
" as the first cell. + totals_row_match = re.search(r"
Totals
(.*?)(?:|$)", content, re.DOTALL) + if not totals_row_match: + print("WARNING: Could not parse coverage totals from index.html", file=sys.stderr) + return result + + row_fragment = totals_row_match.group(1) + pct_pattern = re.compile(r"(\d+\.\d+)%\s*\((\d+)/(\d+)\)") + cells = pct_pattern.findall(row_fragment) + + # The 3 cells in order are: func, line, branch. + if len(cells) >= 2: + _, line_covered, line_total = cells[1] + result["lines"] = (int(line_covered), int(line_total)) + if len(cells) >= 3: + _, branch_covered, branch_total = cells[2] + result["branches"] = (int(branch_covered), int(branch_total)) + + if result["lines"] == (0, 0): + print("WARNING: Could not parse coverage totals from index.html", file=sys.stderr) + + return result + + +def inject_justified_css(html_dir: Path) -> None: + """Add CSS for justified lines to style.css.""" + style_file = html_dir / "style.css" + if not style_file.exists(): + return + + justified_css = """ +/* Coverage justification styling */ +.justified-line { + text-align: right; + color: #a60; +} +.region.justified { + background-color: #fa04; +} +.justified-branch { + color: #a60; + font-weight: bold; +} +tr:has(> td.justified-line) > td.code { + background-color: #fff3e0; +} +@media (prefers-color-scheme: dark) { + .justified-line { + color: #fa0; + } + .justified-branch { + color: #fa0; + } + tr:has(> td.justified-line) > td.code { + background-color: #3d2800; + } + .region.justified { + background-color: #fa03; + } +} +""" + + with open(style_file, "a", encoding="utf-8") as f: + f.write(justified_css) + + +def update_index_page(html_dir: Path, stats: Dict[str, Any], per_file_stats: Dict[str, Dict[str, int]]) -> None: + """Update the index page with effective coverage info and per-file adjusted percentages.""" + index_file = html_dir / "index.html" + if not index_file.exists(): + return + + with open(index_file, "r", encoding="utf-8") as f: + content = f.read() + + # Banner with overall effective coverage (lines + branches) + branch_info = "" + if stats.get("justified_branches", 0) > 0: + branch_info = ( + f" | Effective Branch Coverage: {stats['effective_branch_coverage_pct']}%" + f" (Raw: {stats['raw_branch_coverage_pct']}%, Justified: {stats['justified_branches']} branches)" + ) + + banner = ( + f"
" + f"Effective Line Coverage: {stats['effective_line_coverage_pct']}% " + f"(Raw: {stats['raw_line_coverage_pct']}% | " + f"Justified: {stats['justified_lines']} lines | " + f"Unjustified Uncovered: {stats['unjustified_uncovered_lines']} lines)" + f"{branch_info}" + f"
" + ) + + # Insert after the tag or after the first

+ if "

" in content: + content = content.replace("

", banner + "

", 1) + else: + content = content.replace("", f"{banner}", 1) + + # Update per-file rows in the index table. + # For each file with justifications, find its row and update line% and branch% cells. + # Row format:
displayname
+ #
  XX.XX% (covered/total)
← function + #
  XX.XX% (covered/total)
← line + #
  XX.XX% (covered/total)
← branch + # + pct_cell_pattern = re.compile( + r"
\s*(\d+\.\d+)%\s*\((\d+)/(\d+)\)
" + ) + + for file_path, fstats in per_file_stats.items(): + justified_lines = fstats.get("justified", 0) + justified_branches = fstats.get("justified_branches", 0) + if justified_lines == 0 and justified_branches == 0: + continue + + # Find the row for this file in the index page + # The href contains the full path to the HTML file + if file_path not in content: + continue + + # Find the containing this file path + file_idx = content.find(file_path) + if file_idx < 0: + continue + row_start = content.rfind("", file_idx) + if row_start < 0 or row_end < 0: + continue + + row = content[row_start:row_end + 5] + + # Find all percentage cells in this row (func, line, branch) + cells = list(pct_cell_pattern.finditer(row)) + if len(cells) < 2: + continue + + new_row = row + # Update line coverage cell (second cell, index 1) + if justified_lines > 0 and len(cells) >= 2: + line_cell = cells[1] + covered = int(line_cell.group(3)) + total = int(line_cell.group(4)) + eff_covered = covered + justified_lines + eff_pct = round(100.0 * eff_covered / total, 2) if total > 0 else 0.0 + color = _get_coverage_color(eff_pct) + old_cell = line_cell.group(0) + new_cell = ( + f"
"
+                f"{eff_pct:>7.2f}% ({eff_covered}/{total})
" + ) + new_row = new_row.replace(old_cell, new_cell) + + # Update branch coverage cell (third cell, index 2) + if justified_branches > 0 and len(cells) >= 3: + branch_cell = cells[2] + covered = int(branch_cell.group(3)) + total = int(branch_cell.group(4)) + eff_covered = covered + justified_branches + eff_pct = round(100.0 * eff_covered / total, 2) if total > 0 else 0.0 + color = _get_coverage_color(eff_pct) + old_cell = branch_cell.group(0) + new_cell = ( + f"
"
+                f"{eff_pct:>7.2f}% ({eff_covered}/{total})
" + ) + new_row = new_row.replace(old_cell, new_cell) + + if new_row != row: + content = content.replace(row, new_row) + + # Update the TOTALS row + content = _update_totals_row(content, stats) + + with open(index_file, "w", encoding="utf-8") as f: + f.write(content) + + +def _get_coverage_color(pct: float) -> str: + """Return the llvm-cov color class for a coverage percentage.""" + if pct >= 100.0: + return "green" + elif pct >= 80.0: + return "yellow" + else: + return "red" + + +def _update_totals_row(content: str, stats: Dict[str, Any]) -> str: + """Update the TOTALS row in the index page with effective coverage numbers.""" + # Find the TOTALS row — it's the last row before + totals_idx = content.rfind("Totals") + if totals_idx < 0: + return content + + row_start = content.rfind("", totals_idx) + if row_start < 0 or row_end < 0: + return content + + row = content[row_start:row_end + 5] + + pct_cell_pattern = re.compile( + r"
\s*(\d+\.\d+)%\s*\((\d+)/(\d+)\)
" + ) + cells = list(pct_cell_pattern.finditer(row)) + + new_row = row + + # Update line coverage in totals (index 1) + if len(cells) >= 2 and stats.get("justified_lines", 0) > 0: + line_cell = cells[1] + eff_covered = stats["covered_lines"] + stats["justified_lines"] + total = stats["total_instrumented_lines"] + eff_pct = stats["effective_line_coverage_pct"] + color = _get_coverage_color(eff_pct) + old_cell = line_cell.group(0) + new_cell = ( + f"
"
+            f"{eff_pct:>7.2f}% ({eff_covered}/{total})
" + ) + new_row = new_row.replace(old_cell, new_cell) + + # Update branch coverage in totals (index 2) + if len(cells) >= 3 and stats.get("justified_branches", 0) > 0: + branch_cell = cells[2] + eff_covered = stats["covered_branches"] + stats["justified_branches"] + total = stats["total_branches"] + eff_pct = stats["effective_branch_coverage_pct"] + color = _get_coverage_color(eff_pct) + old_cell = branch_cell.group(0) + new_cell = ( + f"
"
+            f"{eff_pct:>7.2f}% ({eff_covered}/{total})
" + ) + new_row = new_row.replace(old_cell, new_cell) + + if new_row != row: + content = content.replace(row, new_row) + + return content + + +def find_source_html_files(html_dir: Path) -> List[Path]: + """Find all per-source HTML files (not index.html, style.css, etc.).""" + coverage_dir = html_dir / "coverage" + if not coverage_dir.exists(): + # Some llvm-cov versions put source files directly in html_dir + coverage_dir = html_dir + + files = [] + for html_file in coverage_dir.rglob("*.html"): + if html_file.name in ("index.html",): + continue + files.append(html_file) + return sorted(files) + + +def extract_source_path_from_html(html_file: Path, html_dir: Path) -> str: + """Extract the relative source file path from the HTML file path. + + llvm-cov creates paths like: html_report/coverage/.html + We need to extract the relative path within the project. + """ + rel = str(html_file.relative_to(html_dir)) + # Remove "coverage/" prefix if present + if rel.startswith("coverage/"): + rel = rel[len("coverage/"):] + # Remove .html suffix + if rel.endswith(".html"): + rel = rel[:-5] + return rel + + +def find_matching_justifications( + source_path: str, justified_files: Dict[str, Dict[str, Dict[str, str]]] +) -> Dict[int, Dict[str, str]]: + """Find justifications that match the given source path. + + The source_path from HTML may be an absolute path or relative. + The justified_files keys are relative to source root. + We match by path-component suffix to avoid crossing file-name boundaries + (e.g. "bar.cpp" must not match "foobar.cpp"). + """ + result: Dict[int, Dict[str, str]] = {} + + src_parts = Path(source_path).parts + for justified_path, line_justifications in justified_files.items(): + j_parts = Path(justified_path).parts + # Accept if one path's components are a suffix of the other's components. + if (len(src_parts) >= len(j_parts) and src_parts[-len(j_parts):] == j_parts) or ( + len(j_parts) > len(src_parts) and j_parts[-len(src_parts):] == src_parts + ): + for line_str, justification in line_justifications.items(): + result[int(line_str)] = justification + + return result + + +def write_summary( + path: Path, stats: Dict[str, Any], stale: List[Dict[str, Any]] +) -> None: + """Write human-readable summary.""" + with open(path, "w", encoding="utf-8") as f: + f.write("Coverage Justification Summary\n") + f.write("=" * 40 + "\n\n") + f.write(f"Total instrumented lines: {stats['total_instrumented_lines']}\n") + f.write(f"Covered lines: {stats['covered_lines']}\n") + f.write(f"Justified lines: {stats['justified_lines']}\n") + f.write(f"Unjustified uncovered: {stats['unjustified_uncovered_lines']}\n") + f.write(f"\n") + f.write(f"Raw line coverage: {stats['raw_line_coverage_pct']}%\n") + f.write(f"Effective line coverage: {stats['effective_line_coverage_pct']}%\n") + f.write(f"\n") + if stats.get("total_branches", 0) > 0: + f.write(f"Total branches: {stats['total_branches']}\n") + f.write(f"Covered branches: {stats['covered_branches']}\n") + f.write(f"Justified branches: {stats['justified_branches']}\n") + f.write(f"Raw branch coverage: {stats['raw_branch_coverage_pct']}%\n") + f.write(f"Effective branch coverage: {stats['effective_branch_coverage_pct']}%\n") + f.write(f"\n") + if stale: + f.write(f"Stale justifications ({len(stale)}):\n") + for s in stale: + f.write(f" - {s['file']}:{s['line']} [{s['id']}]\n") + f.write("\n") + + +def load_manifest(path: Path) -> Dict[str, Any]: + """Load the justification manifest JSON.""" + if not path.exists(): + print(f"ERROR: Manifest not found: {path}", file=sys.stderr) + sys.exit(1) + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Effective coverage calculator and HTML post-processor" + ) + parser.add_argument( + "--html-dir", + type=Path, + required=True, + help="Path to llvm-cov HTML report directory", + ) + parser.add_argument( + "--manifest", + type=Path, + required=True, + help="Path to resolved justification manifest (from justify.py)", + ) + parser.add_argument( + "--output", + type=Path, + required=True, + help="Output path for justification report (JSON)", + ) + return parser.parse_args() + + +if __name__ == "__main__": + main() diff --git a/coverage/filter_regexes.txt b/coverage/filter_regexes.txt new file mode 100644 index 0000000..bcc14cf --- /dev/null +++ b/coverage/filter_regexes.txt @@ -0,0 +1,52 @@ +# Coverage filter regexes (one per line; blank lines and lines starting +# with `#` are ignored). +# +# These regexes are passed to llvm-cov as --ignore-filename-regex, matching +# against the full source path as seen by llvm-cov. +# +# This file is the baseline shipped with @score_cpp_policies. Consumers can +# extend it with their own module-specific regexes through the +# score_coverage_reporter(extra_regex_files = [...]) macro in coverage/defs.bzl. +# +# NOTE: --experimental_use_llvm_covmap causes Bazel to instrument ALL targets +# regardless of --instrumentation_filter. Therefore, source filtering MUST +# happen here at the report level. + +# --------------------------------------------------------------------------- +# External dependencies +# --------------------------------------------------------------------------- +external/.* + +# --------------------------------------------------------------------------- +# Test files and test directories +# --------------------------------------------------------------------------- +.*_test\.(cpp|cc|h|hpp)$ +.*_tests\.(cpp|cc|h|hpp)$ +.*/test/.* +.*/test_support/.* +.*/test_doubles/.* +.*_fixture.*\.(cpp|cc|h|hpp)$ +.*_fixtures.*\.(cpp|cc|h|hpp)$ + +# --------------------------------------------------------------------------- +# Mock infrastructure +# --------------------------------------------------------------------------- +.*_mock.*\.(cpp|cc|h|hpp)$ +.*/mock/.* +.*/mocks/.* +.*/mocking/.* +.*/mock_binding/.* + +# --------------------------------------------------------------------------- +# Fakes / stubs +# --------------------------------------------------------------------------- +.*_fake\.(cpp|cc|h|hpp)$ +.*_fakes\.(cpp|cc|h|hpp)$ +.*_stub\.(cpp|cc|h|hpp)$ +.*_stubs\.(cpp|cc|h|hpp)$ + +# --------------------------------------------------------------------------- +# Performance benchmarks (not part of functional coverage) +# --------------------------------------------------------------------------- +.*/performance_benchmarks/.* +.*/benchmarks/.* diff --git a/coverage/generate_coverage_html.sh b/coverage/generate_coverage_html.sh new file mode 100755 index 0000000..cebe42b --- /dev/null +++ b/coverage/generate_coverage_html.sh @@ -0,0 +1,203 @@ +#!/usr/bin/env bash +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +# +# Generic post-`bazel coverage` driver shipped by @score_cpp_policies. +# +# Extracts the HTML coverage report from the llvm-cov generated zip produced +# by `bazel coverage`. Optionally runs the justification post-processor and/or +# assembles a CI archive with HTML + LCOV + JUnit XMLs. +# +# Run via Bazel from the CONSUMER repository: +# +# bazel run @score_cpp_policies//coverage:generate_coverage_html -- \ +# [--yaml ] \ +# [--output-dir ] \ +# [--archive ] \ +# [--junit-glob ] +# +# Arguments: +# --yaml Path (relative to workspace root, or absolute) to the +# consumer's coverage_justifications.yaml. If omitted (or the +# file does not exist), justification post-processing is +# skipped. +# --output-dir Directory (relative to workspace root, or absolute) into +# which the HTML report is written. Default: cpp_coverage +# --archive If set, also create .zip containing the HTML +# report, raw LCOV data and matched JUnit XMLs. +# --junit-glob Glob (relative to workspace root) used when --archive is +# set, to locate test.xml files. Default: bazel-testlogs/** + +set -euo pipefail + +JUSTIFICATION_YAML="" +OUTPUT_DIR="cpp_coverage" +ARCHIVE_NAME="" +JUNIT_GLOB="bazel-testlogs" + +while [[ $# -gt 0 ]]; do + case "$1" in + --yaml) + JUSTIFICATION_YAML="${2:?--yaml requires a path argument}" + shift 2 + ;; + --output-dir) + OUTPUT_DIR="${2:?--output-dir requires a directory argument}" + shift 2 + ;; + --archive) + ARCHIVE_NAME="${2:?--archive requires a name argument}" + shift 2 + ;; + --junit-glob) + JUNIT_GLOB="${2:?--junit-glob requires a glob argument}" + shift 2 + ;; + -h|--help) + sed -n '17,42p' "$0" >&2 + exit 0 + ;; + *) + echo "ERROR: Unknown argument: $1" >&2 + echo " Run with --help for usage." >&2 + exit 2 + ;; + esac +done + +if [[ -z "${BUILD_WORKSPACE_DIRECTORY:-}" ]]; then + echo "ERROR: BUILD_WORKSPACE_DIRECTORY is not set. This script must be run via 'bazel run'." >&2 + exit 1 +fi + +# Locate the justify and effective_coverage binaries from the runfiles tree +# (they are declared as `data` deps of this sh_binary). Invoking them directly +# avoids a nested `bazel run` which would deadlock on Bazel's output-base lock. +_rlocation() { + local rlpath="${RUNFILES_DIR:-$0.runfiles}/${1}" + if [[ -x "${rlpath}" ]]; then echo "${rlpath}"; return 0; fi + # Fallback: try manifest (useful when RUNFILES_DIR is not set). + if [[ -f "${RUNFILES_DIR:-$0.runfiles}_manifest" ]]; then + local entry + entry=$(grep -F "${1} " "${RUNFILES_DIR:-$0.runfiles}_manifest" | head -1 | cut -d' ' -f2-) + if [[ -x "${entry}" ]]; then echo "${entry}"; return 0; fi + fi + echo "ERROR: runfile not found: ${1}" >&2 + exit 1 +} + +_JUSTIFY=$(_rlocation "score_cpp_policies+/coverage/justify") +_EFFECTIVE_COVERAGE=$(_rlocation "score_cpp_policies+/coverage/effective_coverage") + +cd "${BUILD_WORKSPACE_DIRECTORY}" + +# Resolve relative paths against the workspace root. +case "${OUTPUT_DIR}" in + /*) ;; + *) OUTPUT_DIR="${BUILD_WORKSPACE_DIRECTORY}/${OUTPUT_DIR}" ;; +esac + +if [[ -n "${JUSTIFICATION_YAML}" ]]; then + case "${JUSTIFICATION_YAML}" in + /*) ;; + *) JUSTIFICATION_YAML="${BUILD_WORKSPACE_DIRECTORY}/${JUSTIFICATION_YAML}" ;; + esac +fi + +# Coverage report generator output (the zip our reporter produced). +COVERAGE_ZIP="${BUILD_WORKSPACE_DIRECTORY}/bazel-out/_coverage/_coverage_report.dat" + +if [[ ! -f "${COVERAGE_ZIP}" ]]; then + echo "ERROR: Coverage report not found at ${COVERAGE_ZIP}" >&2 + echo " Run 'bazel coverage //... --build_tests_only' first." >&2 + exit 1 +fi + +# Extract the HTML report from the zip. +TMPDIR_EXTRACT="${TMPDIR:-/tmp}/coverage_extract_$$" +mkdir -p "${TMPDIR_EXTRACT}" +trap 'rm -rf "${TMPDIR_EXTRACT}"' EXIT + +unzip -q -o "${COVERAGE_ZIP}" -d "${TMPDIR_EXTRACT}" + +rm -rf "${OUTPUT_DIR}" +if [[ -d "${TMPDIR_EXTRACT}/html_report" ]]; then + cp -r "${TMPDIR_EXTRACT}/html_report" "${OUTPUT_DIR}" +else + echo "ERROR: html_report/ not found in ${COVERAGE_ZIP}" >&2 + exit 1 +fi + +echo "Coverage report written to: ${OUTPUT_DIR}" + +# --------------------------------------------------------------------------- +# Optional justification processing. +# --------------------------------------------------------------------------- +if [[ -n "${JUSTIFICATION_YAML}" && -f "${JUSTIFICATION_YAML}" ]]; then + echo "" + echo "Running coverage justification processing..." + + JUSTIFICATION_DIR="${TMPDIR_EXTRACT}/justification_report" + mkdir -p "${JUSTIFICATION_DIR}" + + if "${_JUSTIFY}" \ + --yaml "${JUSTIFICATION_YAML}" \ + --source-root "${BUILD_WORKSPACE_DIRECTORY}" \ + --output "${JUSTIFICATION_DIR}/manifest.json"; then + + "${_EFFECTIVE_COVERAGE}" \ + --html-dir "${OUTPUT_DIR}" \ + --manifest "${JUSTIFICATION_DIR}/manifest.json" \ + --output "${JUSTIFICATION_DIR}/report.json" + fi + + if [[ -f "${JUSTIFICATION_DIR}/summary.txt" ]]; then + echo "" + cat "${JUSTIFICATION_DIR}/summary.txt" + + EFFECTIVE_PCT=$(grep -oP 'Effective line coverage:\s+\K[0-9.]+' \ + "${JUSTIFICATION_DIR}/summary.txt" 2>/dev/null || echo "0") + + THRESHOLD="${COVERAGE_THRESHOLD:-100}" + if awk "BEGIN {exit (${EFFECTIVE_PCT} >= ${THRESHOLD}) ? 0 : 1}"; then + : + else + echo "WARNING: Effective coverage ${EFFECTIVE_PCT}% is below threshold ${THRESHOLD}%" >&2 + fi + fi +elif [[ -n "${JUSTIFICATION_YAML}" ]]; then + echo "INFO: --yaml ${JUSTIFICATION_YAML} not found, skipping justification processing." +else + echo "INFO: No --yaml provided, skipping justification processing." +fi + +# --------------------------------------------------------------------------- +# Optional CI archive. +# --------------------------------------------------------------------------- +if [[ -n "${ARCHIVE_NAME}" ]]; then + mkdir -p artifacts + + if [[ -d "${JUNIT_GLOB}" ]]; then + find "${JUNIT_GLOB}" -name 'test.xml' -exec cp --parents {} artifacts/ \; 2>/dev/null || true + fi + + cp -r "${OUTPUT_DIR}" artifacts/ + + if [[ -f "${TMPDIR_EXTRACT}/lcov_report/lcov.dat" ]]; then + cp "${TMPDIR_EXTRACT}/lcov_report/lcov.dat" artifacts/coverage_report.dat + fi + + zip -r "${ARCHIVE_NAME}.zip" artifacts/ + rm -rf artifacts/ + echo "Coverage archive written to: ${ARCHIVE_NAME}.zip" +fi diff --git a/coverage/justify.py b/coverage/justify.py new file mode 100644 index 0000000..8e37292 --- /dev/null +++ b/coverage/justify.py @@ -0,0 +1,402 @@ +#!/usr/bin/env python3 +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""Coverage justification processor. + +Parses the YAML justification database and source files for COV_JUSTIFIED markers. +Resolves all justified lines and produces a manifest mapping file:line → justification. + +Usage: + python justify.py --yaml --source-root --output + +Supports two ways to specify justified lines: +1. YAML locations: directly specify file + line ranges in the YAML +2. In-code markers: COV_JUSTIFIED , COV_JUSTIFIED_START / COV_JUSTIFIED_STOP +""" + +import argparse +import json +import re +import sys +from pathlib import Path +from typing import Any, Dict, List, Set, Tuple + +import yaml + + +# Marker patterns +COV_JUSTIFIED_LINE_RE = re.compile(r"COV_JUSTIFIED\s+([\w-]+)") +COV_JUSTIFIED_START_RE = re.compile(r"COV_JUSTIFIED_START\s+([\w-]+)") +COV_JUSTIFIED_STOP_RE = re.compile(r"COV_JUSTIFIED_STOP") + +VALID_CATEGORIES = { + "defensive_programming", + "tool_false_positive", + "platform_specific", + "other", +} + + +def main() -> None: + """Main entry point.""" + args = parse_args() + + justifications_data = load_yaml(args.yaml) + validate_yaml(justifications_data) + + # Build lookup: id -> justification entry + justifications_by_id: Dict[str, Dict[str, Any]] = {} + for entry in justifications_data.get("justifications", []): + justifications_by_id[entry["id"]] = entry + + # Resolve all justified lines + resolved: Dict[str, Dict[int, Dict[str, str]]] = {} + warnings: List[str] = [] + errors: List[str] = [] + + # 1. Process YAML direct locations + for entry in justifications_data.get("justifications", []): + for location in entry.get("locations", []): + file_path = location["file"] + full_path = Path(args.source_root) / file_path + + if not full_path.exists(): + errors.append( + f"File not found for justification '{entry['id']}': {file_path}" + ) + continue + + lines = resolve_location_lines(location) + if file_path not in resolved: + resolved[file_path] = {} + for line in lines: + resolved[file_path][line] = { + "id": entry["id"], + "category": entry["category"], + "reason": entry["reason"].strip(), + } + + # 2. Scan source files for in-code COV_JUSTIFIED markers + source_files = collect_source_files(args.source_root, args.file_filter) + for source_file in source_files: + rel_path = str(source_file.relative_to(args.source_root)) + scan_warnings, scan_lines = scan_file_for_markers( + source_file, rel_path, justifications_by_id + ) + warnings.extend(scan_warnings) + + if scan_lines: + if rel_path not in resolved: + resolved[rel_path] = {} + for line_num, justification_info in scan_lines.items(): + resolved[rel_path][line_num] = justification_info + + # Output manifest + manifest = { + "version": 1, + "source_root": str(args.source_root), + "justified_files": { + filepath: {str(k): v for k, v in lines.items()} + for filepath, lines in sorted(resolved.items()) + }, + "warnings": warnings, + "errors": errors, + } + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w", encoding="utf-8") as f: + json.dump(manifest, f, indent=2) + + # Print diagnostics + total_justified_lines = sum(len(lines) for lines in resolved.values()) + print( + f"INFO: Resolved {total_justified_lines} justified lines across " + f"{len(resolved)} files.", + file=sys.stderr, + ) + if warnings: + for w in warnings: + print(f"WARNING: {w}", file=sys.stderr) + if errors: + for e in errors: + print(f"ERROR: {e}", file=sys.stderr) + sys.exit(1) + + +def resolve_location_lines(location: Dict[str, Any]) -> List[int]: + """Resolve line numbers from a YAML location entry.""" + if "lines" in location: + return location["lines"] + elif "line_start" in location and "line_end" in location: + return list(range(location["line_start"], location["line_end"] + 1)) + elif "line" in location: + return [location["line"]] + return [] + + +def scan_file_for_markers( + file_path: Path, + rel_path: str, + justifications_by_id: Dict[str, Dict[str, Any]], +) -> Tuple[List[str], Dict[int, Dict[str, str]]]: + """Scan a source file for COV_JUSTIFIED markers.""" + warnings = [] + justified_lines: Dict[int, Dict[str, str]] = {} + + try: + with open(file_path, "r", encoding="utf-8", errors="replace") as f: + lines = f.readlines() + except (IOError, OSError): + return warnings, justified_lines + + region_stack: List[Tuple[int, str]] = [] # (start_line, justification_id) + + for line_num, line in enumerate(lines, start=1): + # Check for COV_JUSTIFIED_START + start_match = COV_JUSTIFIED_START_RE.search(line) + if start_match: + jid = start_match.group(1) + if jid not in justifications_by_id: + warnings.append( + f"{rel_path}:{line_num}: COV_JUSTIFIED_START references " + f"unknown ID '{jid}'" + ) + else: + region_stack.append((line_num, jid)) + continue + + # Check for COV_JUSTIFIED_STOP + stop_match = COV_JUSTIFIED_STOP_RE.search(line) + if stop_match: + if not region_stack: + warnings.append( + f"{rel_path}:{line_num}: COV_JUSTIFIED_STOP without matching START" + ) + else: + start_line, jid = region_stack.pop() + if jid in justifications_by_id: + entry = justifications_by_id[jid] + for ln in range(start_line + 1, line_num): + justified_lines[ln] = { + "id": jid, + "category": entry["category"], + "reason": entry["reason"].strip(), + } + continue + + # Check for single-line COV_JUSTIFIED (but not START/STOP) + if "COV_JUSTIFIED_START" not in line and "COV_JUSTIFIED_STOP" not in line: + line_match = COV_JUSTIFIED_LINE_RE.search(line) + if line_match: + jid = line_match.group(1) + if jid not in justifications_by_id: + warnings.append( + f"{rel_path}:{line_num}: COV_JUSTIFIED references " + f"unknown ID '{jid}'" + ) + else: + entry = justifications_by_id[jid] + justified_lines[line_num] = { + "id": jid, + "category": entry["category"], + "reason": entry["reason"].strip(), + } + + # Check for unclosed regions + for start_line, jid in region_stack: + warnings.append( + f"{rel_path}:{start_line}: COV_JUSTIFIED_START '{jid}' without matching STOP" + ) + + return warnings, justified_lines + + +def collect_source_files(source_root: Path, file_filter: str) -> List[Path]: + """Collect source files to scan for markers.""" + extensions = file_filter.split(",") if file_filter else ["cpp", "h", "hpp", "cc"] + files = [] + for ext in extensions: + files.extend(source_root.rglob(f"*.{ext.strip()}")) + return sorted(files) + + +def load_yaml(yaml_path: Path) -> Dict[str, Any]: + """Load YAML justification database.""" + if not yaml_path.exists(): + print(f"ERROR: Justification YAML not found: {yaml_path}", file=sys.stderr) + sys.exit(1) + + with open(yaml_path, "r", encoding="utf-8") as f: + content = f.read() + + return yaml.safe_load(content) + + +def validate_yaml(data: Dict[str, Any]) -> None: + """Validate the justification YAML structure and types.""" + try: + errors = [] + + if not isinstance(data, dict): + print("ERROR: YAML validation: root must be a mapping", file=sys.stderr) + sys.exit(1) + + if "version" not in data: + errors.append("Missing 'version' field") + elif not isinstance(data["version"], int): + errors.append(f"'version' must be an integer, got {type(data['version']).__name__}") + + if "justifications" not in data: + errors.append("Missing 'justifications' field") + for e in errors: + print(f"ERROR: {e}", file=sys.stderr) + sys.exit(1) + + if not isinstance(data["justifications"], list): + errors.append( + f"'justifications' must be a list, got {type(data['justifications']).__name__}" + ) + for e in errors: + print(f"ERROR: YAML validation: {e}", file=sys.stderr) + sys.exit(1) + + seen_ids: Set[str] = set() + for i, entry in enumerate(data["justifications"]): + prefix = f"justifications[{i}]" + + if not isinstance(entry, dict): + errors.append(f"{prefix}: must be a mapping, got {type(entry).__name__}") + continue + + if "id" not in entry: + errors.append(f"{prefix}: missing 'id'") + continue + + jid = entry["id"] + if not isinstance(jid, str): + errors.append(f"{prefix}: 'id' must be a string, got {type(jid).__name__}") + continue + + if jid in seen_ids: + errors.append(f"{prefix}: duplicate ID '{jid}'") + seen_ids.add(jid) + + if not re.match(r"^[a-z0-9]+(-[a-z0-9]+)*$", jid): + errors.append(f"{prefix}: ID '{jid}' must be kebab-case") + + if "category" not in entry: + errors.append(f"{prefix}: missing 'category'") + elif not isinstance(entry["category"], str): + errors.append( + f"{prefix}: 'category' must be a string, " + f"got {type(entry['category']).__name__}" + ) + elif entry["category"] not in VALID_CATEGORIES: + errors.append( + f"{prefix}: invalid category '{entry['category']}'. " + f"Must be one of: {sorted(VALID_CATEGORIES)}" + ) + + if "reason" not in entry: + errors.append(f"{prefix}: missing 'reason'") + elif not isinstance(entry["reason"], str): + errors.append( + f"{prefix}: 'reason' must be a string, " + f"got {type(entry['reason']).__name__}" + ) + elif not entry["reason"].strip(): + errors.append(f"{prefix}: 'reason' must not be empty") + + if "locations" in entry: + if not isinstance(entry["locations"], list): + errors.append( + f"{prefix}: 'locations' must be a list, " + f"got {type(entry['locations']).__name__}" + ) + else: + for j, loc in enumerate(entry["locations"]): + loc_prefix = f"{prefix}.locations[{j}]" + if not isinstance(loc, dict): + errors.append( + f"{loc_prefix}: must be a mapping, " + f"got {type(loc).__name__}" + ) + continue + if "file" not in loc: + errors.append(f"{loc_prefix}: missing 'file'") + elif not isinstance(loc["file"], str): + errors.append( + f"{loc_prefix}: 'file' must be a string, " + f"got {type(loc['file']).__name__}" + ) + for int_field in ("line", "line_start", "line_end"): + if int_field in loc and not isinstance(loc[int_field], int): + errors.append( + f"{loc_prefix}: '{int_field}' must be an integer, " + f"got {type(loc[int_field]).__name__}" + ) + if "lines" in loc: + if not isinstance(loc["lines"], list): + errors.append( + f"{loc_prefix}: 'lines' must be a list, " + f"got {type(loc['lines']).__name__}" + ) + elif not all(isinstance(ln, int) for ln in loc["lines"]): + errors.append( + f"{loc_prefix}: 'lines' must contain only integers" + ) + + if errors: + for e in errors: + print(f"ERROR: YAML validation: {e}", file=sys.stderr) + sys.exit(1) + except Exception as error: + print(f"ERROR: YAML validation: {error}", file=sys.stderr) + sys.exit(1) + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Coverage justification processor" + ) + parser.add_argument( + "--yaml", + type=Path, + required=True, + help="Path to coverage_justifications.yaml", + ) + parser.add_argument( + "--source-root", + type=Path, + required=True, + help="Root directory of source files", + ) + parser.add_argument( + "--output", + type=Path, + required=True, + help="Output path for resolved justification manifest (JSON)", + ) + parser.add_argument( + "--file-filter", + type=str, + default="cpp,h,hpp,cc", + help="Comma-separated file extensions to scan (default: cpp,h,hpp,cc)", + ) + return parser.parse_args() + + +if __name__ == "__main__": + main() diff --git a/coverage/merger.py b/coverage/merger.py new file mode 100644 index 0000000..90d5b1e --- /dev/null +++ b/coverage/merger.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python3 +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""Per-test coverage output generator using llvm-cov. + +This script is invoked by Bazel as the --coverage_output_generator for each test. +It receives profraw files from test execution, merges them into profdata, generates +an HTML coverage report using llvm-cov show, and packages everything into a zip file +that the reporter can later aggregate. + +Expected Bazel interface (from collect_coverage.sh): + --coverage_dir= Directory containing *.profraw files + --output_file= Where to write the output (zip) + --source_file_manifest= File listing instrumented sources and object files + --filter_sources= Source path regexes to exclude (repeatable) + [--sources_to_replace_file=] Optional source mapping file +""" + +import argparse +import json +import os +import subprocess +import sys +import zipfile +from pathlib import Path +from typing import List, Set + + +def main() -> None: + args = parse_args() + + # Get object files from the manifest. + object_files = get_object_files_from_manifest(args.source_file_manifest) + if not object_files: + print("INFO: No instrumented object files found, skipping coverage.", file=sys.stderr) + cleanup_dangling_symlinks(args.coverage_dir) + sys.exit(0) + + # Find profraw files. + profraw_files = sorted(args.coverage_dir.glob("*.profraw")) + if not profraw_files: + print("INFO: No *.profraw files found, skipping coverage.", file=sys.stderr) + cleanup_dangling_symlinks(args.coverage_dir) + sys.exit(0) + + # Merge profraw → profdata. + profdata_dir = args.coverage_dir / "profdata" + profdata_dir.mkdir(exist_ok=True) + profdata_file = profdata_dir / "target.profdata" + + llvm_profdata = os.environ.get("LLVM_PROFDATA") + if not llvm_profdata: + print( + "ERROR: LLVM_PROFDATA environment variable is not set. " + "Ensure coverage.bazelrc is imported and the llvm toolchain is registered.", + file=sys.stderr, + ) + sys.exit(1) + run_command([ + llvm_profdata, "merge", + "--sparse", + "--output", str(profdata_file), + ] + [str(f) for f in profraw_files]) + + # Create meta.json with object files for the reporter. + meta_dir = args.coverage_dir / "meta" + meta_dir.mkdir(exist_ok=True) + meta = { + "object_files": [os.path.realpath(f) for f in sorted(object_files)], + } + with open(meta_dir / "meta.json", "w", encoding="utf-8") as f: + json.dump(meta, f) + + # Package into zip at output_file. + create_zip( + root=args.coverage_dir, + directories=[profdata_dir, meta_dir], + output_file=args.output_file, + ) + + # Clean up dangling symlinks in coverage_dir that would cause Bazel tree + # artifact validation to fail (e.g. the 'gcov' symlink created by + # collect_cc_coverage.sh's init_gcov() pointing into the destroyed sandbox). + cleanup_dangling_symlinks(args.coverage_dir) + + target = os.environ.get("TEST_TARGET", "unknown") + print(f"INFO: Coverage merger completed for '{target}'", file=sys.stderr) + + +def cleanup_dangling_symlinks(directory: Path) -> None: + """Remove symlinks in the coverage directory that would become dangling. + + Bazel's tree artifact validation rejects directories containing dangling + symlinks. The 'gcov' symlink created by collect_cc_coverage.sh's init_gcov() + points into the sandbox which is torn down before validation runs. Since we + use llvm-cov directly, this symlink is not needed. + """ + gcov_link = directory / "gcov" + if gcov_link.is_symlink(): + gcov_link.unlink() + + # Also remove any other symlinks pointing into sandbox paths. + for entry in directory.iterdir(): + if entry.is_symlink(): + target = os.readlink(entry) + if "sandbox" in target: + entry.unlink() + + +def get_object_files_from_manifest(source_file_manifest: Path) -> Set[str]: + """Parse the coverage manifest to find instrumented object files.""" + runfiles_dir = Path(os.environ.get("RUNFILES_DIR", "")) / os.environ.get("TEST_WORKSPACE", "_main") + root_env = os.environ.get("ROOT") + if not root_env: + print( + "ERROR: ROOT environment variable is not set. " + "This is normally set by Bazel when invoking the coverage output generator.", + file=sys.stderr, + ) + sys.exit(1) + exec_root = Path(root_env) + + object_files = set() + with open(source_file_manifest, encoding="utf-8") as f: + manifests = [line.strip() for line in f.readlines()] + + for manifest in manifests: + if "objects_list.txt" in manifest: + with open(manifest, encoding="utf-8") as f: + for line in f: + obj_path = line.strip() + if not obj_path: + continue + # Try runfiles first, then exec_root. + candidate = runfiles_dir / obj_path + if candidate.exists(): + object_files.add(str(candidate)) + else: + object_files.add(str(exec_root / obj_path)) + + return object_files + + +def run_command(cmd: List[str]) -> subprocess.CompletedProcess: + """Run a command and exit on failure.""" + try: + return subprocess.run( + cmd, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + except subprocess.CalledProcessError as e: + print(f"ERROR: Command failed with code {e.returncode}:", file=sys.stderr) + print(f" {' '.join(cmd)}", file=sys.stderr) + if e.stdout: + print(e.stdout, file=sys.stderr) + sys.exit(1) + + +def create_zip(root: Path, directories: List[Path], output_file: Path) -> None: + """Create a zip file from the given directories relative to root.""" + with zipfile.ZipFile(output_file, "w", zipfile.ZIP_DEFLATED) as zf: + for directory in directories: + if not directory.exists(): + continue + for dirpath, _, files in os.walk(directory): + for filename in files: + file_path = Path(dirpath) / filename + arcname = file_path.relative_to(root) + zf.write(file_path, arcname) + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments matching the Bazel LCOV_MERGER interface.""" + parser = argparse.ArgumentParser(description="LLVM coverage merger for Bazel") + parser.add_argument("--coverage_dir", type=Path, required=True) + parser.add_argument("--output_file", type=Path, required=True) + parser.add_argument("--source_file_manifest", type=Path, required=True) + parser.add_argument("--filter_sources", action="append", default=[]) + parser.add_argument("--sources_to_replace_file", type=str, default=None) + return parser.parse_args() + + +if __name__ == "__main__": + main() diff --git a/coverage/reporter.py b/coverage/reporter.py new file mode 100644 index 0000000..67fe67a --- /dev/null +++ b/coverage/reporter.py @@ -0,0 +1,754 @@ +#!/usr/bin/env python3 +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""Final coverage report generator using llvm-cov. + +This script is invoked by Bazel as the --coverage_report_generator after all tests +complete. It reads the per-test zip files produced by the merger, merges all profdata +into one, and generates the final combined HTML report. + +Expected Bazel interface: + --reports_file= Text file listing paths to all per-test coverage outputs + --output_file= Where to write the final report (zip) +""" + +import argparse +import json +import os +import re +import subprocess +import sys +import zipfile +from pathlib import Path, PurePosixPath +from typing import List, Set, Tuple +from python.runfiles import Runfiles + + +def main() -> None: + """Main entry point.""" + args = parse_args() + + # Read the list of per-test report files. + reports = read_reports_file(args.reports_file) + if not reports: + print("INFO: No coverage reports found.", file=sys.stderr) + write_empty_output(args.output_file) + sys.exit(0) + + # Extract profdata and object files from each per-test zip. + valid_profdata_files, valid_object_files = extract_reports(reports) + + if not valid_profdata_files or not valid_object_files: + print("INFO: No valid profdata or object files found.", file=sys.stderr) + write_empty_output(args.output_file) + sys.exit(0) + + # Get llvm tools via runfiles. The rlocation paths are passed in by the + # wrapper script (generated by score_coverage_reporter macro) so the + # llvm toolchain repository can be named anything the consumer prefers. + r = Runfiles.Create() + llvm_cov_resolved = r.Rlocation(args.llvm_cov) + if not llvm_cov_resolved: + print(f"ERROR: llvm-cov not found in runfiles via {args.llvm_cov}", file=sys.stderr) + sys.exit(1) + llvm_bin_path = Path(llvm_cov_resolved) + + llvm_profdata_resolved = r.Rlocation(args.llvm_profdata) + if not llvm_profdata_resolved: + print(f"ERROR: llvm-profdata not found in runfiles via {args.llvm_profdata}", file=sys.stderr) + sys.exit(1) + + # Merge all profdata files. + merged_profdata = Path.cwd() / "merged_coverage.profdata" + run_command([ + llvm_profdata_resolved, "merge", + "--sparse", + "--output", str(merged_profdata), + ] + sorted(valid_profdata_files)) + + # Build coverage arguments. + coverage_args = ["--instr-profile", str(merged_profdata)] + for obj in sorted(valid_object_files): + coverage_args.extend(["--object", obj]) + + # Get filter regexes and workspace root. + filter_regexes = load_filter_regexes(r, args.filter_regexes) + module_bazel_resolved = r.Rlocation(args.module_bazel) + if not module_bazel_resolved: + print(f"ERROR: MODULE.bazel not found in runfiles via {args.module_bazel}", file=sys.stderr) + sys.exit(1) + workspace_root = _resolve_workspace_root(module_bazel_resolved) + + common_show_args = { + "llvm_bin_path": llvm_bin_path, + "coverage_args": coverage_args, + "filter_regexes": sorted(filter_regexes), + "workspace_root": workspace_root, + } + + # Generate HTML report. + html_report_dir = Path.cwd() / "html_report" + run_llvm_cov_show( + **common_show_args, + output_format="html", + html_report_dir=html_report_dir, + ) + lcov_report_dir = Path.cwd() / "lcov_report" + lcov_report_dir.mkdir(exist_ok=True) + lcov_result = run_llvm_cov_export( + llvm_bin_path=llvm_bin_path, + coverage_args=coverage_args, + filter_regexes=sorted(filter_regexes), + workspace_root=workspace_root, + ) + lcov_text = lcov_result.stdout + + # Augment with 0%-coverage entries for files that no test linked against. + untested_pairs: List[Tuple[str, str]] = [] + untested_sources: List[str] = [] + if args.instrumented_sources_manifest: + manifest_path = r.Rlocation(args.instrumented_sources_manifest) + if not manifest_path or not Path(manifest_path).exists(): + print( + f"WARNING: instrumented sources manifest not found via " + f"{args.instrumented_sources_manifest}", + file=sys.stderr, + ) + else: + covered = _covered_sources_from_lcov(lcov_text) + untested_pairs = _find_untested_sources( + manifest_path=Path(manifest_path), + workspace_root=workspace_root, + covered_sources=covered, + filter_regexes=sorted(filter_regexes), + ) + untested_sources = [abs_path for abs_path, _rel in untested_pairs] + if untested_sources: + print( + f"INFO: Augmenting report with {len(untested_sources)} " + f"untested source file(s).", + file=sys.stderr, + ) + lcov_text = _append_zero_coverage_lcov( + lcov_text, untested_sources, workspace_root + ) + + with open(lcov_report_dir / "lcov.dat", "w", encoding="utf-8") as f: + f.write(lcov_text) + + # Augment the HTML report with 0%-coverage pages for untested files. + if untested_pairs: + _augment_html_with_untested( + html_report_dir=html_report_dir, + untested_sources=untested_pairs, + lcov_text=lcov_text, + ) + + # Generate text summary. + text_report_dir = Path.cwd() / "text_report" + text_report_dir.mkdir(exist_ok=True) + summary = run_llvm_cov_report( + llvm_bin_path=llvm_bin_path, + coverage_args=coverage_args, + filter_regexes=sorted(filter_regexes), + ) + summary_text = summary.stdout + if untested_sources: + summary_text = _augment_text_summary(summary_text, untested_sources, lcov_text) + with open(text_report_dir / "summary.txt", "w", encoding="utf-8") as f: + f.write(summary_text) + print(summary_text, file=sys.stderr) + + # Package everything into the output zip. + directories = [html_report_dir, lcov_report_dir, text_report_dir] + create_zip( + root=Path.cwd(), + directories=directories, + output_file=args.output_file, + ) + + print(f"INFO: Coverage reporter completed. Output: {args.output_file}", file=sys.stderr) + + +def run_llvm_cov_show( + llvm_bin_path: Path, + coverage_args: List[str], + filter_regexes: List[str], + workspace_root: str, + output_format: str, + html_report_dir: Path = None, +) -> subprocess.CompletedProcess: + """Run llvm-cov show.""" + cmd = [ + str(llvm_bin_path), + "show", + f"--format={output_format}", + f"--path-equivalence=/proc/self/cwd/,{workspace_root}", + f"--compilation-dir={workspace_root}", + "--show-branches=count", + "--show-region-summary=0", + ] + + cxxfilt = llvm_bin_path.parent / "llvm-cxxfilt" + if cxxfilt.exists(): + cmd.append(f"--Xdemangler={cxxfilt}") + + for regex in filter_regexes: + adjusted = regex.replace("/proc/self/cwd/", workspace_root) + cmd.append(f"--ignore-filename-regex={adjusted}") + + if html_report_dir: + cmd.append(f"--output-dir={html_report_dir}") + cmd.append("--coverage-watermark=100,50") + cmd.append("--show-expansions") + + cmd.extend(coverage_args) + return run_command(cmd) + + +def run_llvm_cov_export( + llvm_bin_path: Path, + coverage_args: List[str], + filter_regexes: List[str], + workspace_root: str, +) -> subprocess.CompletedProcess: + """Run llvm-cov export to produce LCOV format.""" + cmd = [ + str(llvm_bin_path), + "export", + "--format=lcov", + f"--path-equivalence=/proc/self/cwd/,{workspace_root}", + f"--compilation-dir={workspace_root}", + ] + + for regex in filter_regexes: + adjusted = regex.replace("/proc/self/cwd/", workspace_root) + cmd.append(f"--ignore-filename-regex={adjusted}") + + cmd.extend(coverage_args) + return run_command(cmd, separate_stderr=True) + + +def run_llvm_cov_report( + llvm_bin_path: Path, + coverage_args: List[str], + filter_regexes: List[str], +) -> subprocess.CompletedProcess: + """Run llvm-cov report for a summary.""" + cmd = [ + str(llvm_bin_path), + "report", + "--summary-only", + "--show-region-summary=0", + "--show-branch-summary=1", + ] + + for regex in filter_regexes: + cmd.append(f"--ignore-filename-regex={regex}") + + cmd.extend(coverage_args) + return run_command(cmd, separate_stderr=True) + + +def extract_reports(reports: List[str]) -> Tuple[Set[str], Set[str]]: + """Extract profdata and object files from per-test zip files.""" + valid_profdata_files = set() + valid_object_files = set() + + for i, report_path in enumerate(reports): + # Skip baseline_coverage files (LCOV format, not our zip). + if "baseline_coverage" in report_path: + continue + + report = Path(report_path) + if not report.exists() or report.stat().st_size == 0: + continue + + # Check if it's a valid zip. + if not zipfile.is_zipfile(report): + continue + + profdata_name = f"coverage_report_{i:08d}.profdata" + + try: + with zipfile.ZipFile(report, "r") as archive: + # Extract meta. + meta_json = archive.read("meta/meta.json") + target_meta = json.loads(meta_json) + + # Extract profdata. + profdata_content = archive.read("profdata/target.profdata") + profdata_path = Path.cwd() / profdata_name + with open(profdata_path, "wb") as f: + f.write(profdata_content) + + valid_profdata_files.add(str(profdata_path)) + + # Collect object files. + for obj in target_meta.get("object_files", []): + if obj and Path(obj).exists(): + valid_object_files.add(os.path.realpath(obj)) + + except (zipfile.BadZipFile, KeyError, json.JSONDecodeError) as e: + print(f"WARNING: Skipping invalid report {report_path}: {e}", file=sys.stderr) + continue + + return valid_profdata_files, valid_object_files + +def read_reports_file(reports_file: Path) -> List[str]: + """Read the reports file listing all per-test coverage outputs.""" + with open(reports_file, encoding="utf-8") as f: + return [line.strip() for line in f if line.strip()] + + +def load_filter_regexes(runfiles: Runfiles, rlocation_path: str) -> List[str]: + """Load filter regexes from filter_regexes.txt via Bazel runfiles.""" + path = runfiles.Rlocation(rlocation_path) + if not path or not Path(path).exists(): + print(f"WARNING: {rlocation_path} not found in runfiles, no source filtering applied", + file=sys.stderr) + return [] + + lines = Path(path).read_text(encoding="utf-8").splitlines() + return [line.strip() for line in lines if line.strip() and not line.strip().startswith("#")] + + +def write_empty_output(output_file: Path) -> None: + """Write an empty file as output when there's nothing to report.""" + with open(output_file, "w", encoding="utf-8") as f: + f.write("") + + +def run_command(cmd: List[str], separate_stderr: bool = False) -> subprocess.CompletedProcess: + """Run a command and exit on failure. + + When separate_stderr=True, stderr is captured separately from stdout so + that callers which parse stdout as structured data (e.g. LCOV text) are + not polluted by llvm-cov warning messages. + """ + stderr_target = subprocess.PIPE if separate_stderr else subprocess.STDOUT + try: + return subprocess.run( + cmd, + check=True, + stdout=subprocess.PIPE, + stderr=stderr_target, + text=True, + ) + except subprocess.CalledProcessError as e: + print(f"ERROR: Command failed with code {e.returncode}:", file=sys.stderr) + print(f" {' '.join(cmd)}", file=sys.stderr) + output = (e.stdout or "") + (e.stderr or "") + if output: + print(output, file=sys.stderr) + sys.exit(1) + + +def create_zip(root: Path, directories: List[Path], output_file: Path) -> None: + """Create a zip file from the given directories relative to root.""" + with zipfile.ZipFile(output_file, "w", zipfile.ZIP_DEFLATED) as zf: + for directory in directories: + if not directory.exists(): + continue + for dirpath, _, files in os.walk(directory): + for filename in files: + file_path = Path(dirpath) / filename + arcname = file_path.relative_to(root) + zf.write(file_path, arcname) + + +def _resolve_workspace_root(module_bazel_resolved: str) -> str: + """Return the real workspace root directory for a resolved MODULE.bazel rlocation. + + Rlocation() returns the runfiles-tree path, which is a symlink into the + current action's sandbox when running under linux-sandbox. Taking its + parent without resolving the symlink yields an ephemeral path buried + inside this action's own sandbox (e.g. + .../reporter_wrapper.sh.runfiles/_main/) that stops existing once the + action finishes - any SF: entry or HTML link built from it points + nowhere once the report is extracted and used. Resolving first yields + the real, stable workspace directory instead. + """ + return str(Path(module_bazel_resolved).resolve().parent) + "/" + + +def _covered_sources_from_lcov(lcov_text: str) -> Set[str]: + """Return the set of absolute source paths that appear in an LCOV report.""" + sources: Set[str] = set() + for line in lcov_text.splitlines(): + if line.startswith("SF:"): + sources.add(os.path.realpath(line[3:].strip())) + return sources + + +def _lcov_totals(lcov_text: str) -> Tuple[int, int]: + """Return (total_LH, total_LF) summed across every record in an LCOV report. + + Intended to be called on the final, already-augmented LCOV text (real + llvm-cov records plus the synthetic 0%-coverage records for untested + files), so the result reflects combined line coverage across both. + """ + total_lh = 0 + total_lf = 0 + for line in lcov_text.splitlines(): + if line.startswith("LH:"): + total_lh += int(line[3:].strip()) + elif line.startswith("LF:"): + total_lf += int(line[3:].strip()) + return total_lh, total_lf + + +def _find_untested_sources( + manifest_path: Path, + workspace_root: str, + covered_sources: Set[str], + filter_regexes: List[str], +) -> List[Tuple[str, str]]: + """Read the manifest and return entries not present in covered_sources. + + Returns a list of (resolved_absolute_path, manifest_relative_path) pairs, + sorted by absolute path. The manifest-relative path is kept alongside the + resolved path so callers can display a clean workspace-relative name + instead of the fully-resolved on-disk path (see _augment_html_with_untested). + + Manifest entries are workspace-relative paths. Filter regexes from the + consumer are applied so that the same exclusions that affect llvm-cov also + affect the synthesized entries. Entries that do not resolve to an existing + file on disk are dropped silently (typically generated files or stale + manifest content). + + Path traversal is rejected based on the manifest-relative path itself + (rejecting ".." components), not by resolving symlinks and checking + containment: manifest entries are runfiles, which are frequently symlinks + that legitimately resolve outside workspace_root (e.g. to the real + on-disk source tree when Bazel runs this sandboxed). Resolving first and + then checking containment would silently drop every such file. + """ + ws = Path(workspace_root) + compiled_filters = [re.compile(r) for r in filter_regexes if r] + + untested: List[Tuple[str, str]] = [] + seen: Set[str] = set() + raw = manifest_path.read_text(encoding="utf-8") + for entry in raw.splitlines(): + rel = entry.strip() + if not rel: + continue + rel_path = PurePosixPath(rel) + if rel_path.is_absolute() or ".." in rel_path.parts: + continue + abs_path = ws / rel + if not abs_path.exists() or not abs_path.is_file(): + continue + abs_str = str(abs_path.resolve()) + if abs_str in covered_sources or abs_str in seen: + continue + if any(rx.search(abs_str) or rx.search(rel) for rx in compiled_filters): + continue + seen.add(abs_str) + untested.append((abs_str, rel)) + return sorted(untested, key=lambda pair: pair[0]) + + +_NON_EXECUTABLE_RE = re.compile( + r"^\s*(" + r"[{}]" # lone braces + r"|//.*" # line comments + r"|/\*.*" # block comment open + r"|\*(?:[/\s].*)?" # block comment continuation (* text) / close (*/), lone * + r"|\*/.*" # block comment close + r"|#\s*(?:include|define|undef|if|ifdef|ifndef|elif|else|endif|pragma|error|warning)\b.*" + r"|namespace\b[^{;]*[{]?\s*" # namespace declarations + r"|}\s*//.*" # closing brace with comment + r"|}\s*namespace.*" # closing namespace + r"|public\s*:|private\s*:|protected\s*:" + r")\s*$" +) + + +def _is_likely_executable(line: str) -> bool: + """Heuristic: return True if a line is likely an executable C/C++ statement.""" + stripped = line.strip() + if not stripped: + return False + return _NON_EXECUTABLE_RE.match(line) is None + + +def _count_instrumentable_lines(path: str) -> Tuple[List[int], int]: + """Return (list of 1-based likely-executable line numbers, count). + + This is a conservative approximation. Without a real parser we cannot + perfectly distinguish executable from non-executable lines. The heuristic + excludes blank lines, comments, preprocessor directives, lone braces, + namespace declarations, and access specifiers. + """ + line_numbers: List[int] = [] + try: + with open(path, "r", encoding="utf-8", errors="replace") as f: + for i, line in enumerate(f, 1): + if _is_likely_executable(line): + line_numbers.append(i) + except OSError: + pass + return line_numbers, len(line_numbers) + + +def _append_zero_coverage_lcov( + lcov_text: str, untested_sources: List[str], workspace_root: str +) -> str: + """Append synthetic zero-coverage LCOV records for each untested source. + + LF/DA values are heuristic estimates from _count_instrumentable_lines, + not actual llvm-cov instrumentation data. The counts approximate what + llvm-cov would report but may differ for templates, inlined constructors, + multi-line statements, initializer lists, and lambdas. + """ + blocks: List[str] = [] + for abs_path in untested_sources: + line_numbers, lf = _count_instrumentable_lines(abs_path) + if lf == 0: + continue + da = "\n".join(f"DA:{n},0" for n in line_numbers) + block = ( + f"SF:{abs_path}\n" + f"{da}\n" + f"LF:{lf}\n" + f"LH:0\n" + "end_of_record\n" + ) + blocks.append(block) + if not blocks: + return lcov_text + sep = "" if lcov_text.endswith("\n") else "\n" + return lcov_text + sep + "".join(blocks) + + +def _augment_text_summary(summary_text: str, untested_sources: List[str], lcov_text: str) -> str: + """Append a banner to the llvm-cov report summary for untested files. + + The TOTALS line from llvm-cov is left untouched because the heuristic + line count (_count_instrumentable_lines) is only an approximation — it + cannot replicate what llvm-cov would report for actually-instrumented + object files. Rewriting TOTALS with approximate numbers would give a + false sense of precision. Instead we append a clearly-labelled banner + so that CI consumers and reviewers see the gap without mistaking an + estimate for an exact measurement. + + The combined percentage below is derived from lcov_text (the final, + already-augmented LCOV report), so it shares the same estimate for the + untested files' totals as the rest of the banner. + """ + extra_lines_found = 0 + for abs_path in untested_sources: + _, lf = _count_instrumentable_lines(abs_path) + extra_lines_found += lf + + total_lh, total_lf = _lcov_totals(lcov_text) + combined_pct = (100.0 * total_lh / total_lf) if total_lf else 0.0 + + banner = ( + f"\n[score-coverage] WARNING: {len(untested_sources)} source file(s) " + f"not linked into any test (~{extra_lines_found} instrumentable lines, " + f"estimated via heuristic). These files are absent from the TOTALS above " + f"and contribute 0% coverage. See lcov.dat and the HTML report for details.\n" + f"[score-coverage] Estimated combined line coverage (incl. untested " + f"files): ~{combined_pct:.2f}% ({total_lh}/{total_lf} lines).\n" + ) + return summary_text + banner + + +_UNTESTED_HTML_TEMPLATE = """ + + + + + {title} + + +

Coverage Report

+

{title}

+

+Not linked into any test. This source file is reachable from +the configured coverage targets but no test binary instruments it, so every +line is reported as uncovered. +

+ + +
+{body}
+
+ + +""" + + +def _augment_html_with_untested( + html_report_dir: Path, + untested_sources: List[Tuple[str, str]], + lcov_text: str, +) -> None: + """Create per-file HTML pages for untested sources and link them from index. + + The pages are intentionally minimal: llvm-cov's per-source HTML format is + not easily reproducible without the full coverage mapping, so we render a + plain source dump with a banner that explains the file was not exercised. + The index page gets a new "Not Linked Into Tests" section listing the + files at 0% coverage so the gap is visible to reviewers. + """ + if not html_report_dir.exists(): + return + + coverage_subdir = html_report_dir / "coverage" + output_root = coverage_subdir if coverage_subdir.exists() else html_report_dir + + entries: List[Tuple[str, str, int]] = [] # (rel_source, href, num_lines) + for abs_path, rel_source in untested_sources: + # Mirror llvm-cov: per-source HTML lives at /.html + # Strip the leading "/" so that the path joins under output_root. + target_html = output_root / (abs_path.lstrip("/") + ".html") + target_html.parent.mkdir(parents=True, exist_ok=True) + + try: + with open(abs_path, "r", encoding="utf-8", errors="replace") as f: + source_text = f.read() + except OSError: + continue + num_lines = source_text.count("\n") + ( + 0 if source_text.endswith("\n") or not source_text else 1 + ) + + rel_to_root = os.path.relpath(html_report_dir, target_html.parent) + css_path = (Path(rel_to_root) / "style.css").as_posix() + body = _escape_html(source_text) or "(empty file)" + html = ( + _UNTESTED_HTML_TEMPLATE + .replace("{css_path}", css_path) + .replace("{title}", _escape_html(rel_source)) + .replace("{body}", body) + ) + target_html.write_text(html, encoding="utf-8") + + href = target_html.relative_to(html_report_dir).as_posix() + entries.append((rel_source, href, max(num_lines, 1))) + + if not entries: + return + + total_lh, total_lf = _lcov_totals(lcov_text) + combined_pct = (100.0 * total_lh / total_lf) if total_lf else 0.0 + _inject_untested_section_into_index(html_report_dir / "index.html", entries, combined_pct) + + +def _escape_html(text: str) -> str: + return ( + text.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace("'", "'") + .replace('"', """) + ) + + +def _inject_untested_section_into_index( + index_file: Path, entries: List[Tuple[str, str, int]], combined_pct: float +) -> None: + """Insert a top-banner and detail table for untested files into the index. + + The banner is injected right after so it is the first thing a + reviewer sees. It explicitly labels the line count and combined + percentage as heuristic estimates to avoid false precision. The detail + table with per-file links is appended before . + """ + if not index_file.exists(): + return + + content = index_file.read_text(encoding="utf-8") + + total_estimated_lines = sum(n for _, _, n in entries) + + top_banner = ( + "
" + f"⚠️ {len(entries)} source file(s) not linked into " + f"any test (~{total_estimated_lines} instrumentable lines, " + "estimated via heuristic). The coverage percentages above do " + "not include these files. See the " + "detail table below.
" + f"Estimated combined line coverage (incl. untested files): " + f"~{combined_pct:.2f}%." + "
" + ) + + rows = [] + for rel_source, href, num_lines in entries: + rows.append( + "" + f"
{_escape_html(rel_source)}
" + f"
  0.00% (0/~{num_lines})
" + "
Not linked into any test
" + "" + ) + + detail_section = ( + "" + "

Not Linked Into Tests

" + "

Line counts are heuristic " + "estimates (non-blank, non-comment, non-directive lines). Actual " + "instrumentable line counts may differ from what llvm-cov would report.

" + "" + "" + "" + f"{''.join(rows)}
FilenameLine Coverage (est.)Note
" + ) + + if "" in content: + content = content.replace("", "" + top_banner, 1) + elif "") + 1 + content = content[:body_end] + top_banner + content[body_end:] + else: + content = top_banner + content + + if "" in content: + content = content.replace("", detail_section + "", 1) + else: + content += detail_section + + index_file.write_text(content, encoding="utf-8") + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments matching the Bazel coverage_report_generator interface.""" + parser = argparse.ArgumentParser(description="LLVM coverage reporter for Bazel") + parser.add_argument("--output_file", type=Path, required=True) + parser.add_argument("--reports_file", type=Path, required=True) + parser.add_argument("--filter_regexes", type=str, required=True, + help="Rlocation path to the filter regexes file") + parser.add_argument("--module_bazel", type=str, required=True, + help="Rlocation path of the consumer MODULE.bazel; " + "its parent directory is used as the workspace root") + parser.add_argument("--llvm_cov", type=str, required=True, + help="Rlocation path of the llvm-cov binary") + parser.add_argument("--llvm_profdata", type=str, required=True, + help="Rlocation path of the llvm-profdata binary") + parser.add_argument("--instrumented_sources_manifest", type=str, default=None, + help="Optional rlocation path to a text file listing " + "workspace-relative source files that are expected " + "to be instrumented. Sources missing from the " + "llvm-cov output are added at 0%% coverage.") + return parser.parse_args() + + + +if __name__ == "__main__": + main() diff --git a/coverage/requirements.in b/coverage/requirements.in new file mode 100644 index 0000000..c3726e8 --- /dev/null +++ b/coverage/requirements.in @@ -0,0 +1 @@ +pyyaml diff --git a/coverage/requirements_lock.txt b/coverage/requirements_lock.txt new file mode 100644 index 0000000..88bca0e --- /dev/null +++ b/coverage/requirements_lock.txt @@ -0,0 +1,83 @@ +# This file is autogenerated by pip-compile equivalent for the +# @score_cpp_policies//coverage package. The single direct dependency is +# `pyyaml`, required by coverage/justify.py to parse coverage +# justification YAML files. +# +# To regenerate (when pyyaml is updated), run pip-compile on the sibling +# requirements.in file and replace the body below with the resulting lock. +# +pyyaml==6.0.3 \ + --hash=sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c \ + --hash=sha256:0150219816b6a1fa26fb4699fb7daa9caf09eb1999f3b70fb6e786805e80375a \ + --hash=sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3 \ + --hash=sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956 \ + --hash=sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6 \ + --hash=sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c \ + --hash=sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65 \ + --hash=sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a \ + --hash=sha256:1ebe39cb5fc479422b83de611d14e2c0d3bb2a18bbcb01f229ab3cfbd8fee7a0 \ + --hash=sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b \ + --hash=sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1 \ + --hash=sha256:22ba7cfcad58ef3ecddc7ed1db3409af68d023b7f940da23c6c2a1890976eda6 \ + --hash=sha256:27c0abcb4a5dac13684a37f76e701e054692a9b2d3064b70f5e4eb54810553d7 \ + --hash=sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e \ + --hash=sha256:2e71d11abed7344e42a8849600193d15b6def118602c4c176f748e4583246007 \ + --hash=sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310 \ + --hash=sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4 \ + --hash=sha256:3c5677e12444c15717b902a5798264fa7909e41153cdf9ef7ad571b704a63dd9 \ + --hash=sha256:3ff07ec89bae51176c0549bc4c63aa6202991da2d9a6129d7aef7f1407d3f295 \ + --hash=sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea \ + --hash=sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0 \ + --hash=sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e \ + --hash=sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac \ + --hash=sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9 \ + --hash=sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7 \ + --hash=sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35 \ + --hash=sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb \ + --hash=sha256:5cf4e27da7e3fbed4d6c3d8e797387aaad68102272f8f9752883bc32d61cb87b \ + --hash=sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69 \ + --hash=sha256:5ed875a24292240029e4483f9d4a4b8a1ae08843b9c54f43fcc11e404532a8a5 \ + --hash=sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b \ + --hash=sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c \ + --hash=sha256:6344df0d5755a2c9a276d4473ae6b90647e216ab4757f8426893b5dd2ac3f369 \ + --hash=sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd \ + --hash=sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824 \ + --hash=sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198 \ + --hash=sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065 \ + --hash=sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c \ + --hash=sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c \ + --hash=sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764 \ + --hash=sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196 \ + --hash=sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b \ + --hash=sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00 \ + --hash=sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac \ + --hash=sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8 \ + --hash=sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e \ + --hash=sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28 \ + --hash=sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3 \ + --hash=sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5 \ + --hash=sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4 \ + --hash=sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b \ + --hash=sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf \ + --hash=sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5 \ + --hash=sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702 \ + --hash=sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8 \ + --hash=sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788 \ + --hash=sha256:b865addae83924361678b652338317d1bd7e79b1f4596f96b96c77a5a34b34da \ + --hash=sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d \ + --hash=sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc \ + --hash=sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c \ + --hash=sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba \ + --hash=sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f \ + --hash=sha256:c3355370a2c156cffb25e876646f149d5d68f5e0a3ce86a5084dd0b64a994917 \ + --hash=sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5 \ + --hash=sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26 \ + --hash=sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f \ + --hash=sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b \ + --hash=sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be \ + --hash=sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c \ + --hash=sha256:efd7b85f94a6f21e4932043973a7ba2613b059c4a000551892ac9f1d11f5baf3 \ + --hash=sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6 \ + --hash=sha256:fa160448684b4e94d80416c0fa4aac48967a969efe22931448d853ada8baf926 \ + --hash=sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0 + # via -r requirements.in diff --git a/tests/.bazelrc b/tests/.bazelrc index 0c70808..6fbb827 100644 --- a/tests/.bazelrc +++ b/tests/.bazelrc @@ -15,6 +15,8 @@ import %workspace%/../sanitizers/sanitizers.bazelrc import %workspace%/../clang_tidy/clang_tidy.bazelrc +import %workspace%/../coverage/coverage.bazelrc + common --registry=https://raw.githubusercontent.com/eclipse-score/bazel_registry/main/ common --registry=https://bcr.bazel.build @@ -25,3 +27,11 @@ build:lsan --extra_toolchains=@llvm_toolchain//:cc-toolchain-x86_64-lin build:tsan --extra_toolchains=@llvm_toolchain//:cc-toolchain-x86_64-linux build:asan_ubsan_lsan --extra_toolchains=@llvm_toolchain//:cc-toolchain-x86_64-linux build:tsan_ubsan --extra_toolchains=@llvm_toolchain//:cc-toolchain-x86_64-linux + +# --------------------------------------------------------------------------- +# Coverage smoke-test wiring (consumer-side settings on top of the imported +# generic coverage.bazelrc). +# --------------------------------------------------------------------------- +coverage --extra_toolchains=@llvm_toolchain//:cc-toolchain-x86_64-linux +coverage --instrumentation_filter="^//coverage[/:]" +coverage --coverage_report_generator=//coverage:reporter_wrapper diff --git a/tests/BUILD.bazel b/tests/BUILD.bazel index 04527db..aa1b025 100644 --- a/tests/BUILD.bazel +++ b/tests/BUILD.bazel @@ -14,6 +14,10 @@ load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test") load("//tools/lint:linters.bzl", "clang_tidy_test") +# Required by @score_cpp_policies//coverage:defs.bzl :: score_coverage_reporter +# so the macro can rlocation-resolve the consumer workspace root at runtime. +exports_files(["MODULE.bazel"]) + # Enforce sanitizer combination constraints at build time (e.g. ASan+TSan is invalid). genrule( diff --git a/tests/MODULE.bazel b/tests/MODULE.bazel index d4e97f9..7844112 100644 --- a/tests/MODULE.bazel +++ b/tests/MODULE.bazel @@ -15,6 +15,7 @@ module(name = "score_cpp_policies_tests") bazel_dep(name = "googletest", version = "1.17.0.bcr.2") bazel_dep(name = "rules_cc", version = "0.2.17") +bazel_dep(name = "rules_python", version = "1.8.5") bazel_dep(name = "toolchains_llvm", version = "1.7.0") bazel_dep(name = "score_cpp_policies") local_path_override( @@ -39,3 +40,11 @@ llvm.toolchain( llvm_version = "19.1.7", ) use_repo(llvm, "llvm_toolchain") + +# Python toolchain — required to run @score_cpp_policies//coverage:justify and +# :effective_coverage (py_binary targets that depend on pyyaml for Python 3.12). +python = use_extension("@rules_python//python/extensions:python.bzl", "python") +python.toolchain( + python_version = "3.12", + is_default = True, +) diff --git a/tests/coverage/BUILD.bazel b/tests/coverage/BUILD.bazel new file mode 100644 index 0000000..e9d8df5 --- /dev/null +++ b/tests/coverage/BUILD.bazel @@ -0,0 +1,85 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +# +# Smoke test for @score_cpp_policies//coverage. +# +# Build verification: `bazel build //coverage/...` from the tests workspace +# exercises macro expansion, genrule wiring, py_binary loading, and sh_binary +# construction. Coverage execution: `bazel coverage //coverage/...` then +# `bazel run //coverage:generate_coverage_html -- --yaml coverage/coverage_justifications.yaml`. + +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") +load("@rules_python//python:defs.bzl", "py_test") +load( + "@score_cpp_policies//coverage:defs.bzl", + "score_coverage_reporter", + "score_instrumented_sources_manifest", +) + +cc_library( + name = "coverable", + srcs = ["coverable.cpp"], + hdrs = ["coverable.h"], + visibility = ["//visibility:public"], +) + +# A library that is intentionally NOT linked into any cc_test. It exercises +# the "untested files" augmentation: the reporter must surface this file at +# 0% coverage even though llvm-cov never sees its object file. +cc_library( + name = "uncovered", + srcs = ["uncovered.cpp"], + hdrs = ["uncovered.h"], + visibility = ["//visibility:public"], +) + +cc_test( + name = "coverable_test", + srcs = ["coverable_test.cpp"], + target_compatible_with = ["@score_cpp_policies//sanitizers/constraints:no_tsan"], + deps = [ + ":coverable", + "@googletest//:gtest", + "@googletest//:gtest_main", + ], +) + +# Project-specific extension of the baseline filter regexes. +# Exposed for the macro to consume. +exports_files(["coverage_filter_regexes.txt"]) + +# Manifest of all C/C++ sources reachable from the coverage targets. Anything +# in here that does not appear in the llvm-cov report is added at 0% coverage +# by the reporter. +score_instrumented_sources_manifest( + name = "instrumented_sources", + targets = [ + ":coverable", + ":uncovered", + ], +) + +py_test( + name = "reporter_test", + srcs = ["reporter_test.py"], + deps = ["@score_cpp_policies//coverage:reporter_lib"], +) + +score_coverage_reporter( + name = "reporter_wrapper", + extra_regex_files = [":coverage_filter_regexes.txt"], + instrumented_sources_manifest = ":instrumented_sources", + llvm_cov = "@llvm_toolchain//:llvm-cov", + llvm_profdata = "@llvm_toolchain//:llvm-profdata", + visibility = ["//visibility:public"], +) diff --git a/tests/coverage/coverable.cpp b/tests/coverage/coverable.cpp new file mode 100644 index 0000000..95a9558 --- /dev/null +++ b/tests/coverage/coverable.cpp @@ -0,0 +1,28 @@ +// ******************************************************************************* +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// https://www.apache.org/licenses/LICENSE-2.0 +// +// SPDX-License-Identifier: Apache-2.0 +// ******************************************************************************* + +#include "coverage/coverable.h" + +namespace score::cpp_policies::tests { + +int classify(int value) noexcept { + if (value < 0) { + return -1; + } + if (value == 0) { + return 0; + } + return 1; +} + +} // namespace score::cpp_policies::tests diff --git a/tests/coverage/coverable.h b/tests/coverage/coverable.h new file mode 100644 index 0000000..052834a --- /dev/null +++ b/tests/coverage/coverable.h @@ -0,0 +1,26 @@ +// ******************************************************************************* +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// https://www.apache.org/licenses/LICENSE-2.0 +// +// SPDX-License-Identifier: Apache-2.0 +// ******************************************************************************* + +#ifndef SCORE_CPP_POLICIES_TESTS_COVERAGE_COVERABLE_H_ +#define SCORE_CPP_POLICIES_TESTS_COVERAGE_COVERABLE_H_ + +namespace score::cpp_policies::tests { + +// Minimal API exercised by the coverage smoke test. Two branches are exposed +// so the report has both a covered and an uncovered branch to verify the +// HTML / LCOV pipeline end-to-end. +int classify(int value) noexcept; + +} // namespace score::cpp_policies::tests + +#endif // SCORE_CPP_POLICIES_TESTS_COVERAGE_COVERABLE_H_ diff --git a/tests/coverage/coverable_test.cpp b/tests/coverage/coverable_test.cpp new file mode 100644 index 0000000..0b5236a --- /dev/null +++ b/tests/coverage/coverable_test.cpp @@ -0,0 +1,30 @@ +// ******************************************************************************* +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// https://www.apache.org/licenses/LICENSE-2.0 +// +// SPDX-License-Identifier: Apache-2.0 +// ******************************************************************************* + +#include "coverage/coverable.h" + +#include + +namespace score::cpp_policies::tests { +namespace { + +// Intentionally only covers the negative and zero branches — the positive +// branch should appear in the coverage report as uncovered, exercising the +// "uncovered branch" rendering path. +TEST(ClassifyTest, NegativeAndZero) { + EXPECT_EQ(classify(-5), -1); + EXPECT_EQ(classify(0), 0); +} + +} // namespace +} // namespace score::cpp_policies::tests diff --git a/tests/coverage/coverage_filter_regexes.txt b/tests/coverage/coverage_filter_regexes.txt new file mode 100644 index 0000000..7f8d928 --- /dev/null +++ b/tests/coverage/coverage_filter_regexes.txt @@ -0,0 +1,3 @@ +# Smoke-test consumer-specific filter regex extensions. +# These are concatenated AFTER @score_cpp_policies//coverage:filter_regexes.txt. +.*/smoke_generated/.* diff --git a/tests/coverage/coverage_justifications.yaml b/tests/coverage/coverage_justifications.yaml new file mode 100644 index 0000000..7b02617 --- /dev/null +++ b/tests/coverage/coverage_justifications.yaml @@ -0,0 +1,21 @@ +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* + +version: 1 +justifications: + - id: smoke-positive-branch + category: platform_specific + reason: | + Sample justification used by the score_cpp_policies smoke test. + The positive branch in coverable.cpp is intentionally not exercised + by coverable_test.cpp to verify the effective-coverage post-processor. diff --git a/tests/coverage/reporter_test.py b/tests/coverage/reporter_test.py new file mode 100644 index 0000000..a645a2a --- /dev/null +++ b/tests/coverage/reporter_test.py @@ -0,0 +1,338 @@ +#!/usr/bin/env python3 +# ******************************************************************************* +# Copyright (c) 2026 Contributors to the Eclipse Foundation +# +# See the NOTICE file(s) distributed with this work for additional +# information regarding copyright ownership. +# +# This program and the accompanying materials are made available under the +# terms of the Apache License Version 2.0 which is available at +# https://www.apache.org/licenses/LICENSE-2.0 +# +# SPDX-License-Identifier: Apache-2.0 +# ******************************************************************************* +"""Unit tests for the coverage reporter's untested-files augmentation logic. + +These tests exercise the helper functions that identify untested source files +and synthesize 0%-coverage LCOV records, without requiring a full +bazel coverage run or llvm-cov toolchain. +""" + +import os +import tempfile +import textwrap +import unittest +from pathlib import Path + +from coverage.reporter import ( + _append_zero_coverage_lcov, + _augment_text_summary, + _count_instrumentable_lines, + _covered_sources_from_lcov, + _escape_html, + _find_untested_sources, + _is_likely_executable, + _lcov_totals, + _resolve_workspace_root, +) + + +class IsLikelyExecutableTest(unittest.TestCase): + def test_executable_statements(self): + for line in [ + " return 42;", + " int x = foo();", + " if (x > 0) {", + " bar(x);", + " x = *ptr;", + " *ptr = value;", + " **pp = data;", + ]: + self.assertTrue(_is_likely_executable(line), f"should be executable: {line!r}") + + def test_non_executable_lines(self): + for line in [ + "", + " ", + "// comment", + "/* block open", + " * continuation", + " */", + "#include ", + '#include "bar.h"', + "#define FOO 42", + "#ifdef SOMETHING", + "#endif", + "#pragma once", + "{", + "}", + "namespace score {", + "namespace score::detail {", + "} // namespace score", + "public:", + " private:", + " protected:", + ]: + self.assertFalse(_is_likely_executable(line), f"should NOT be executable: {line!r}") + + +class CountInstrumentableLinesTest(unittest.TestCase): + def test_mixed_cpp_file(self): + content = textwrap.dedent("""\ + // Copyright header + #include "foo.h" + + namespace test { + + int foo(int x) noexcept + { + if (x > 0) { + return x + 1; + } + return -x; + } + + } // namespace test + """) + with tempfile.NamedTemporaryFile(mode="w", suffix=".cpp", delete=False) as f: + f.write(content) + f.flush() + try: + line_numbers, count = _count_instrumentable_lines(f.name) + self.assertEqual(count, 4) + self.assertIn(6, line_numbers) # int foo(int x) noexcept + self.assertIn(8, line_numbers) # if (x > 0) { + self.assertIn(9, line_numbers) # return x + 1; + self.assertIn(11, line_numbers) # return -x; + finally: + os.unlink(f.name) + + def test_nonexistent_file(self): + line_numbers, count = _count_instrumentable_lines("/nonexistent/file.cpp") + self.assertEqual(count, 0) + self.assertEqual(line_numbers, []) + + +class CoveredSourcesFromLcovTest(unittest.TestCase): + def test_extracts_sf_entries(self): + lcov = textwrap.dedent("""\ + SF:/workspace/src/a.cpp + DA:1,5 + DA:2,0 + LF:2 + LH:1 + end_of_record + SF:/workspace/src/b.cpp + DA:1,3 + LF:1 + LH:1 + end_of_record + """) + sources = _covered_sources_from_lcov(lcov) + self.assertIn(os.path.realpath("/workspace/src/a.cpp"), sources) + self.assertIn(os.path.realpath("/workspace/src/b.cpp"), sources) + self.assertEqual(len(sources), 2) + + def test_empty_lcov(self): + self.assertEqual(_covered_sources_from_lcov(""), set()) + + +class LcovTotalsTest(unittest.TestCase): + def test_sums_lh_and_lf_across_records(self): + lcov = textwrap.dedent("""\ + SF:/workspace/src/a.cpp + DA:1,5 + DA:2,0 + LF:2 + LH:1 + end_of_record + SF:/workspace/src/b.cpp + DA:1,3 + LF:1 + LH:1 + end_of_record + """) + self.assertEqual(_lcov_totals(lcov), (2, 3)) + + def test_empty_lcov(self): + self.assertEqual(_lcov_totals(""), (0, 0)) + + +class FindUntestedSourcesTest(unittest.TestCase): + def test_filters_covered_and_nonexistent(self): + with tempfile.TemporaryDirectory() as ws: + src_a = Path(ws) / "src" / "a.cpp" + src_b = Path(ws) / "src" / "b.cpp" + src_a.parent.mkdir(parents=True) + src_a.write_text("int a() { return 1; }\n") + src_b.write_text("int b() { return 2; }\n") + + manifest = Path(ws) / "manifest.txt" + manifest.write_text("src/a.cpp\nsrc/b.cpp\nsrc/gone.cpp\n") + + covered = {str(src_a.resolve())} + result = _find_untested_sources(manifest, ws, covered, []) + self.assertEqual(len(result), 1) + self.assertEqual(result[0], (str(src_b.resolve()), "src/b.cpp")) + + def test_respects_filter_regexes(self): + with tempfile.TemporaryDirectory() as ws: + src = Path(ws) / "generated" / "foo.cpp" + src.parent.mkdir() + src.write_text("int foo() { return 0; }\n") + + manifest = Path(ws) / "manifest.txt" + manifest.write_text("generated/foo.cpp\n") + + result = _find_untested_sources(manifest, ws, set(), ["generated/"]) + self.assertEqual(result, []) + + def test_rejects_path_traversal(self): + with tempfile.TemporaryDirectory() as ws: + outside = Path(ws).parent / "outside.cpp" + outside.write_text("int secret() { return 42; }\n") + + manifest = Path(ws) / "manifest.txt" + manifest.write_text(f"../{outside.name}\n") + + try: + result = _find_untested_sources(manifest, ws, set(), []) + self.assertEqual(result, []) + finally: + outside.unlink(missing_ok=True) + + def test_finds_sources_reached_only_through_symlinks(self): + """Regression test: manifest entries are often runfiles symlinks that + resolve outside workspace_root (e.g. to the real on-disk source tree + when Bazel runs the reporter sandboxed). These must still be found - + see the docstring on _find_untested_sources for the historical bug + this guards against. + """ + with tempfile.TemporaryDirectory() as real_dir: + real_src = Path(real_dir) / "real.cpp" + real_src.write_text("int real() { return 1; }\n") + + with tempfile.TemporaryDirectory() as ws: + linked_src = Path(ws) / "src" / "linked.cpp" + linked_src.parent.mkdir(parents=True) + linked_src.symlink_to(real_src) + + manifest = Path(ws) / "manifest.txt" + manifest.write_text("src/linked.cpp\n") + + result = _find_untested_sources(manifest, ws, set(), []) + self.assertEqual(result, [(str(real_src.resolve()), "src/linked.cpp")]) + + +class AppendZeroCoverageLcovTest(unittest.TestCase): + def test_appends_records_with_lh_zero(self): + with tempfile.TemporaryDirectory() as ws: + src = Path(ws) / "untested.cpp" + src.write_text(textwrap.dedent("""\ + #include "untested.h" + int foo() { + return 42; + } + """)) + lcov = "SF:/other.cpp\nDA:1,5\nLF:1\nLH:1\nend_of_record\n" + result = _append_zero_coverage_lcov(lcov, [str(src)], ws) + + self.assertIn(f"SF:{src}", result) + self.assertIn("LH:0", result) + self.assertIn("end_of_record", result) + lines = result.split("\n") + sf_lines = [l for l in lines if l.startswith("SF:")] + self.assertEqual(len(sf_lines), 2) + + def test_empty_untested_returns_original(self): + lcov = "SF:/a.cpp\nend_of_record\n" + self.assertEqual(_append_zero_coverage_lcov(lcov, [], "/ws"), lcov) + + +class ResolveWorkspaceRootTest(unittest.TestCase): + def test_plain_path_returns_parent_with_trailing_slash(self): + with tempfile.TemporaryDirectory() as ws: + module_bazel = Path(ws) / "MODULE.bazel" + module_bazel.write_text("") + self.assertEqual(_resolve_workspace_root(str(module_bazel)), f"{ws}/") + + def test_resolves_runfiles_symlink_to_real_workspace(self): + """Regression test: Rlocation() returns a runfiles-tree path, which is + a symlink into the current action's sandbox under linux-sandbox. The + parent of that symlink is an ephemeral sandbox path that stops + existing once the action finishes; SF: entries and HTML links built + from it point nowhere in the extracted report. This must resolve to + the real, stable workspace directory instead. + """ + with tempfile.TemporaryDirectory() as real_ws: + real_module_bazel = Path(real_ws) / "MODULE.bazel" + real_module_bazel.write_text("") + + with tempfile.TemporaryDirectory() as sandbox: + linked_module_bazel = Path(sandbox) / "runfiles" / "_main" / "MODULE.bazel" + linked_module_bazel.parent.mkdir(parents=True) + linked_module_bazel.symlink_to(real_module_bazel) + + self.assertEqual( + _resolve_workspace_root(str(linked_module_bazel)), f"{real_ws}/" + ) + + +class EscapeHtmlTest(unittest.TestCase): + def test_escapes_all_special_chars(self): + self.assertIn("&", _escape_html("a & b")) + self.assertIn("<", _escape_html("")) + self.assertIn(">", _escape_html("")) + self.assertIn("'", _escape_html("it's")) + self.assertIn(""", _escape_html('"quoted"')) + + +class AugmentTextSummaryTest(unittest.TestCase): + def test_appends_banner_without_modifying_totals(self): + with tempfile.TemporaryDirectory() as ws: + src = Path(ws) / "untested.cpp" + src.write_text("int foo() {\n return 42;\n}\n") + + summary = textwrap.dedent("""\ + Filename Functions Lines Branches + --- --- --- --- + TOTAL 2 0 100.00% 10 0 100.00% 4 0 100.00% + """) + lcov_text = "SF:/other.cpp\nDA:1,5\nLF:10\nLH:10\nend_of_record\n" + result = _augment_text_summary(summary, [str(src)], lcov_text) + self.assertIn("[score-coverage]", result) + self.assertIn("WARNING", result) + self.assertIn("estimated via heuristic", result) + totals_line = [l for l in result.splitlines() if "TOTAL" in l and "score-coverage" not in l][0] + self.assertIn("100.00%", totals_line) + + def test_banner_contains_file_count_and_line_estimate(self): + with tempfile.TemporaryDirectory() as ws: + src = Path(ws) / "untested.cpp" + src.write_text("int foo() { return 1; }\n") + + summary = "TOTAL 2 0 100.00% 10 0 100.00%\n" + lcov_text = "SF:/other.cpp\nDA:1,5\nLF:10\nLH:10\nend_of_record\n" + result = _augment_text_summary(summary, [str(src)], lcov_text) + self.assertIn("1 source file(s)", result) + self.assertIn("~1 instrumentable lines", result) + + def test_banner_contains_combined_percentage_from_lcov_totals(self): + with tempfile.TemporaryDirectory() as ws: + src = Path(ws) / "untested.cpp" + src.write_text("int foo() { return 1; }\n") + + summary = "TOTAL 2 0 100.00% 10 0 100.00%\n" + # Combined: 8 lines hit out of (8 real + 2 synthetic) = 80.00%. + lcov_text = ( + "SF:/other.cpp\nDA:1,5\nLF:8\nLH:8\nend_of_record\n" + f"SF:{src}\nDA:1,0\nDA:2,0\nLF:2\nLH:0\nend_of_record\n" + ) + result = _augment_text_summary(summary, [str(src)], lcov_text) + self.assertIn("Estimated combined line coverage", result) + self.assertIn("~80.00%", result) + self.assertIn("(8/10 lines)", result) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/coverage/uncovered.cpp b/tests/coverage/uncovered.cpp new file mode 100644 index 0000000..0d36915 --- /dev/null +++ b/tests/coverage/uncovered.cpp @@ -0,0 +1,29 @@ +// ******************************************************************************* +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// https://www.apache.org/licenses/LICENSE-2.0 +// +// SPDX-License-Identifier: Apache-2.0 +// ******************************************************************************* + +#include "uncovered.h" + +namespace score::cpp_policies::tests { + +int never_called(int value) noexcept +{ + if (value > 0) { + return value + 1; + } + if (value == 0) { + return 42; + } + return -value; +} + +} // namespace score::cpp_policies::tests diff --git a/tests/coverage/uncovered.h b/tests/coverage/uncovered.h new file mode 100644 index 0000000..bfaa5bc --- /dev/null +++ b/tests/coverage/uncovered.h @@ -0,0 +1,26 @@ +// ******************************************************************************* +// Copyright (c) 2026 Contributors to the Eclipse Foundation +// +// See the NOTICE file(s) distributed with this work for additional +// information regarding copyright ownership. +// +// This program and the accompanying materials are made available under the +// terms of the Apache License Version 2.0 which is available at +// https://www.apache.org/licenses/LICENSE-2.0 +// +// SPDX-License-Identifier: Apache-2.0 +// ******************************************************************************* + +#ifndef SCORE_CPP_POLICIES_TESTS_COVERAGE_UNCOVERED_H_ +#define SCORE_CPP_POLICIES_TESTS_COVERAGE_UNCOVERED_H_ + +namespace score::cpp_policies::tests { + +// Intentionally not linked into any cc_test. Used to verify that the reporter +// surfaces source files that no test exercises as 0%-coverage entries instead +// of silently dropping them. +int never_called(int value) noexcept; + +} // namespace score::cpp_policies::tests + +#endif // SCORE_CPP_POLICIES_TESTS_COVERAGE_UNCOVERED_H_