pineforge-4pass · luisleo526 · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026
diff --git a/scripts/cov_union.py b/scripts/cov_union.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+"""Accurate line-coverage via per-binary union (clang/llvm-cov only).
+
+WHY THIS EXISTS
+---------------
+`llvm-cov report` over MANY test binaries that share inline/template code
+(our header-only Pine wrappers: series.hpp, generic_matrix.hpp, ta.hpp, the
+inline accessors in engine.hpp) emits "N functions have mismatched data" and
+UNDERCOUNTS those headers. Each binary instantiates the same inline/template
+function with its own coverage-mapping hash; the merged profile splinters per
+hash, so a single cross-binary report only matches one variant and drops the
+rest.
+
+This helper sidesteps that: it asks `llvm-cov show` for each binary
+individually (where every function matches its own binary's hash, so the
+per-line counts are authoritative), then UNIONS line hits across binaries — a
+line is executable if any binary maps it, covered if any binary executed it.
+For headers it recovers the undercount.
+
+METRIC NOTE: this counts PHYSICAL source lines (what `llvm-cov show` marks
+executable), which differs from `llvm-cov report`/`export` — those count
+region/expansion lines and yield larger absolute line totals. So this tool's
+absolute %/counts are NOT directly comparable to totals.txt; use it to check the
+RELATIVE truth for template-heavy headers (e.g. generic_matrix.hpp), where the
+multi-binary `report` undercounts. The canonical headline metric remains
+`llvm-cov report` (scripts/coverage.sh → totals.txt).
+
+USAGE
+-----
+  cov_union.py --profdata P --bindir D --bin-glob 'test_*' SOURCE [SOURCE...]
+
+Outputs a per-file line-coverage table + TOTAL to stdout. Exit 0 always
+(measurement tool). With --uncovered FILE, also writes a file sorted ascending
+by line coverage.
+"""
+from __future__ import annotations
+import argparse, glob, os, re, subprocess, sys
+
+LINE_RE = re.compile(r"^\s*(\d+)\|([^|]*)\|")
+BANNER_RE = re.compile(r"^(/.*):$")
+
+
+def find_llvm_cov() -> list[str]:
+    if sys.platform == "darwin":
+        from shutil import which
+        if which("xcrun"):
+            return ["xcrun", "llvm-cov"]
+    return ["llvm-cov"]
+
+
+def show_one(llvm_cov, binary, profdata, src):
+    """Run `llvm-cov show` for ONE source file and return {lineno: covered_bool}
+    for executable lines, deduped across instantiation groups (covered if ANY
+    instantiation executed the line).
+
+    Must be ONE file per invocation: passing many files to a single `show`
+    suppresses template instantiation groups, which silently drops most of a
+    template header's covered lines (e.g. generic_matrix.hpp).
+    """
+    r = subprocess.run(
+        llvm_cov + ["show", binary, "-instr-profile", profdata, "--format=text", src],
+        capture_output=True, text=True)
+    out: dict[int, bool] = {}
+    for ln in r.stdout.splitlines():
+        m = LINE_RE.match(ln)
+        if not m:
+            continue
+        cnt = m.group(2).strip()
+        if cnt == "":
+            continue  # non-executable line
+        lineno = int(m.group(1))
+        covered = cnt != "0"  # any non-zero (incl. human "1.2k") => covered
+        prev = out.get(lineno)
+        out[lineno] = covered if prev is None else (prev or covered)
+    return out
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser(description=__doc__,
+                                 formatter_class=argparse.RawDescriptionHelpFormatter)
+    ap.add_argument("--profdata", required=True)
+    ap.add_argument("--bindir", required=True, help="dir containing test binaries")
+    ap.add_argument("--bin-glob", default="test_*")
+    ap.add_argument("--uncovered", help="write file sorted ascending by line %%")
+    ap.add_argument("sources", nargs="+")
+    args = ap.parse_args()
+
+    llvm_cov = find_llvm_cov()
+    bins = sorted(b for b in glob.glob(os.path.join(args.bindir, args.bin_glob))
+                  if os.path.isfile(b) and os.access(b, os.X_OK))
+    if not bins:
+        print(f"cov_union: no binaries matched {args.bindir}/{args.bin_glob}", file=sys.stderr)
+        return 0
+
+    # canonical source list (realpath -> display path)
+    disp = {}
+    for s in args.sources:
+        if os.path.isfile(s):
+            disp[os.path.realpath(s)] = s
+    if not disp:
+        print("cov_union: no source files found", file=sys.stderr)
+        return 0
+
+    union_exec: dict[str, set] = {ap_: set() for ap_ in disp}
+    union_cov: dict[str, set] = {ap_: set() for ap_ in disp}
+    # One `show` per (binary, file): multi-file show suppresses template
+    # instantiation groups and undercounts template headers.
+    for binary in bins:
+        for ap_, d in disp.items():
+            lines = show_one(llvm_cov, binary, args.profdata, d)
+            for lineno, cov in lines.items():
+                union_exec[ap_].add(lineno)
+                if cov:
+                    union_cov[ap_].add(lineno)
+
+    rows = []
+    tot_c = tot_n = 0
+    for ap_, d in disp.items():
+        n = len(union_exec[ap_]); c = len(union_cov[ap_])
+        tot_c += c; tot_n += n
+        rows.append((d, c, n, (100.0 * c / n) if n else 0.0))
+    rows.sort(key=lambda r: r[0])
+
+    w = max((len(r[0]) for r in rows), default=20)
+    print(f"{'Filename':<{w}}  {'Lines':>7}  {'Missed':>7}  {'Cover':>7}")
+    print("-" * (w + 26))
+    for d, c, n, pct in rows:
+        print(f"{d:<{w}}  {n:>7}  {n-c:>7}  {pct:>6.2f}%")
+    print("-" * (w + 26))
+    tot_pct = (100.0 * tot_c / tot_n) if tot_n else 0.0
+    print(f"{'TOTAL':<{w}}  {tot_n:>7}  {tot_n-tot_c:>7}  {tot_pct:>6.2f}%")
+    print(f"\n(union of {len(bins)} binaries — accurate header line coverage; "
+          f"see scripts/cov_union.py header for why)")
+
+    if args.uncovered:
+        with open(args.uncovered, "w") as f:
+            for d, c, n, pct in sorted(rows, key=lambda r: r[3]):
+                f.write(f"{d}\t{n}\t{n-c}\t{pct:.2f}%\n")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/coverage.sh b/scripts/coverage.sh
@@ -144,35 +144,62 @@ if [[ "$COMPILER" == "clang" ]]; then
         [[ -f "$f" ]] && SOURCES+=( "$f" )
     done
 
+    # llvm-cov prints "N functions have mismatched data" to stderr when the same
+    # inline/template function is instrumented in many test binaries with
+    # differing coverage-mapping hashes (our header-only Pine wrappers:
+    # series.hpp / generic_matrix.hpp / ta.hpp + engine.hpp inline accessors).
+    # The merged profile splinters per hash, so the single cross-binary report
+    # drops the unmatched variants. This is BENIGN for .cpp (compiled once into
+    # libpineforge.a → one hash) and only undercounts a few template-heavy
+    # headers. We capture that stderr, suppress the raw scary line, and emit a
+    # plain-language note instead. See scripts/cov_union.py to verify exact
+    # header line coverage on demand (per-binary union recovers the undercount;
+    # e.g. generic_matrix.hpp reads ~77% here but is ~86% true).
+    COV_STDERR="$COV_DIR/llvm-cov.stderr"
     {
         echo "PineForge coverage — $(date -u +%Y-%m-%dT%H:%M:%SZ)"
         echo "Compiler: $($CXX_BIN --version | head -n1)"
         echo
         "${LLVM_COV[@]}" report "${BINS[@]:1}" \
             -instr-profile="$PROFDATA" \
             "${SHARED_FILTER[@]}" \
-            "${SOURCES[@]}"
+            "${SOURCES[@]}" 2> "$COV_STDERR"
+        if grep -q 'mismatched data' "$COV_STDERR" 2>/dev/null; then
+            _mm="$(grep -oE '[0-9]+ functions? have mismatched data' "$COV_STDERR" | head -n1)"
+            echo
+            echo "NOTE: llvm-cov reported \"${_mm}\" — this is BENIGN, not a coverage gap."
+            echo "      Cause: header inline/template Pine code is instantiated in every test"
+            echo "      binary with differing mapping hashes; the multi-binary merge drops the"
+            echo "      unmatched variants. .cpp line coverage is UNAFFECTED (single hash via"
+            echo "      libpineforge.a). Only template-heavy headers are undercounted here —"
+            echo "      e.g. generic_matrix.hpp shows ~77% but is ~86% true. engine.hpp's lower"
+            echo "      figure is real (unexercised inline overloads), not an artifact."
+            echo "      Verify exact header coverage: scripts/cov_union.py --profdata \"$PROFDATA\" \\"
+            echo "                                    --bindir \"$BUILD_DIR/bin\" include/pineforge/*.hpp"
+        fi
     } | tee "$COV_DIR/totals.txt"
 
-    # Per-file annotated listings (text, fast to grep).
+    # Per-file annotated listings (text, fast to grep). stderr re-emits the same
+    # benign mismatch warning already explained above → discard it.
     "${LLVM_COV[@]}" show "${BINS[@]:1}" \
         -instr-profile="$PROFDATA" \
         "${SHARED_FILTER[@]}" \
         -format=text \
         -output-dir="$COV_DIR/per-file" \
         "${SOURCES[@]}" \
-        > /dev/null
+        > /dev/null 2>&1
 
-    # Sortable per-file totals (lowest-covered first → easy hole-spotter).
-    # The text report rows look like:
-    #   src/engine_orders.cpp                        87       12    86.21%   ...
-    # so column 4 is line-coverage percent.
+    # Sortable per-file totals (lowest line-covered first → easy hole-spotter).
+    # `llvm-cov report` rows carry stats in fixed columns:
+    #   $1 file  $2 Regions $3 MissedReg $4 Cover(region)
+    #   $5 Funcs $6 MissedFn $7 Exec%   $8 Lines $9 MissedLines $10 Cover(line)
+    # so column 10 is the line-coverage percent (column 4 is region coverage).
     "${LLVM_COV[@]}" report "${BINS[@]:1}" \
         -instr-profile="$PROFDATA" \
         "${SHARED_FILTER[@]}" \
-        "${SOURCES[@]}" \
+        "${SOURCES[@]}" 2>/dev/null \
         | awk '/^[A-Za-z0-9_\/\.\-]+\.(cpp|hpp|h|cc|c)[[:space:]]/ {print $0}' \
-        | sort -k4n \
+        | sort -k10n \
         > "$COV_DIR/uncovered.txt"
 
     if [[ "$FORMAT" == "html" ]]; then

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -51,6 +51,21 @@ set(TEST_SOURCES
     test_handle_reuse_reset
     test_calendar_aggregation_wm
     test_adversarial_ohlcv
+    test_report_trace
+    test_path_resolve_extra
+    test_security_validation_throws
+    test_session_calendar_extra
+    test_run_inputs_overrides
+    test_matrix_ops_extra
+    test_timeframe_aggregator_extra
+    test_c_abi_setters
+    test_fills_edge
+    test_strategy_commands_extra
+    test_lower_tf_parse_extra
+    test_ta_ma_warmup_extra
+    test_ta_osc_edge
+    test_ta_extremes_edge
+    test_ta_voltrend_edge
 )
 
 # When PINEFORGE_ENABLE_COVERAGE is ON we also instrument the test