|
| 1 | +#!/bin/bash |
| 2 | +# |
| 3 | +# merge-sarif-runs.sh |
| 4 | +# |
| 5 | +# Merges multiple SARIF runs into a single run to comply with GitHub's July 2025 |
| 6 | +# requirement that each SARIF upload must have a single run per category. |
| 7 | +# |
| 8 | +# See: https://github.blog/changelog/2025-07-21-code-scanning-will-stop-combining-multiple-sarif-runs-uploaded-in-the-same-sarif-file/ |
| 9 | +# |
| 10 | +# Usage: ./merge-sarif-runs.sh <input.sarif> [output.sarif] |
| 11 | +# - If output is not specified, input file is modified in place |
| 12 | +# |
| 13 | +# Exit codes: |
| 14 | +# 0 - Success (merged or no merge needed) |
| 15 | +# 1 - Error (invalid input, jq failure, or invalid output) |
| 16 | + |
| 17 | +set -e |
| 18 | + |
| 19 | +# --- Argument parsing --- |
| 20 | +INPUT_FILE="${1:-}" |
| 21 | +OUTPUT_FILE="${2:-$INPUT_FILE}" |
| 22 | + |
| 23 | +if [ -z "$INPUT_FILE" ]; then |
| 24 | + echo "Error: Input SARIF file path required" |
| 25 | + echo "Usage: $0 <input.sarif> [output.sarif]" |
| 26 | + exit 1 |
| 27 | +fi |
| 28 | + |
| 29 | +if [ ! -f "$INPUT_FILE" ]; then |
| 30 | + echo "Error: Input file not found: $INPUT_FILE" |
| 31 | + exit 1 |
| 32 | +fi |
| 33 | + |
| 34 | +# --- Check run count --- |
| 35 | +RUN_COUNT=$(jq '.runs | length' "$INPUT_FILE") |
| 36 | +echo "Found $RUN_COUNT runs in SARIF file" |
| 37 | + |
| 38 | +if [ "$RUN_COUNT" -le 1 ]; then |
| 39 | + echo "Single run detected, no merging needed" |
| 40 | + # Strip partialFingerprints even for single-run files so that |
| 41 | + # github/codeql-action/upload-sarif can calculate its own consistent |
| 42 | + # fingerprints from source-file line hashes without conflicting with |
| 43 | + # pre-embedded values from the analysis tool. |
| 44 | + SINGLE_RUN_TEMP=$(mktemp) |
| 45 | + trap 'rm -f "$SINGLE_RUN_TEMP"' EXIT |
| 46 | + if ! jq 'del(.runs[].results[]?.partialFingerprints)' "$INPUT_FILE" > "$SINGLE_RUN_TEMP"; then |
| 47 | + echo "Error: jq fingerprint-strip failed for single run" |
| 48 | + exit 1 |
| 49 | + fi |
| 50 | + mv "$SINGLE_RUN_TEMP" "$OUTPUT_FILE" |
| 51 | + trap - EXIT |
| 52 | + exit 0 |
| 53 | +fi |
| 54 | + |
| 55 | +echo "Merging $RUN_COUNT runs into a single run..." |
| 56 | + |
| 57 | +# --- Create temporary file for merge output --- |
| 58 | +TEMP_FILE=$(mktemp) |
| 59 | +trap 'rm -f "$TEMP_FILE"' EXIT |
| 60 | + |
| 61 | +# --- Merge all runs into a single run --- |
| 62 | +# |
| 63 | +# SARIF Structure Overview: |
| 64 | +# A SARIF file contains: { "$schema", "version", "runs": [...] } |
| 65 | +# Each run contains: { "tool", "results", "artifacts", "invocations", ... } |
| 66 | +# GitHub requires exactly ONE run per category for code scanning. |
| 67 | +# |
| 68 | +# Merge Strategy: |
| 69 | +# 1. RULES: Collect all rules from all runs, deduplicate by rule ID |
| 70 | +# 2. RESULTS: Collect all findings from all runs, deduplicate by location |
| 71 | +# 3. ARTIFACTS: Combine all file references from all runs |
| 72 | +# 4. INVOCATIONS: Combine all tool execution records |
| 73 | +# 5. METADATA: Take first non-null value for scalar properties |
| 74 | +# |
| 75 | + |
| 76 | +if ! jq ' |
| 77 | +# ============================================================================= |
| 78 | +# SARIF MERGE TRANSFORMATION |
| 79 | +# ============================================================================= |
| 80 | +{ |
| 81 | + # Preserve top-level SARIF metadata |
| 82 | + "$schema": ."$schema", |
| 83 | + version: .version, |
| 84 | +
|
| 85 | + # Create single merged run from all input runs |
| 86 | + runs: [{ |
| 87 | +
|
| 88 | + # ------------------------------------------------------------------------- |
| 89 | + # TOOL SECTION |
| 90 | + # Defines the analysis tool and its rules |
| 91 | + # ------------------------------------------------------------------------- |
| 92 | + tool: { |
| 93 | + driver: { |
| 94 | + # Use unified tool name since we are merging multiple Codacy tools |
| 95 | + name: "Codacy", |
| 96 | + informationUri: "https://www.codacy.com", |
| 97 | + version: "1.0.0", |
| 98 | +
|
| 99 | + # RULES: Flatten all rules from all runs into single array |
| 100 | + # - .runs[].tool.driver.rules: Get rules array from each run |
| 101 | + # - // []: Default to empty array if rules is null |
| 102 | + # - | .[]: Flatten nested arrays into single stream |
| 103 | + # - unique_by(.id): Remove duplicates, keeping first occurrence of each rule ID |
| 104 | + rules: [.runs[].tool.driver.rules // [] | .[]] | unique_by(.id) |
| 105 | + } |
| 106 | + }, |
| 107 | +
|
| 108 | + # ------------------------------------------------------------------------- |
| 109 | + # RESULTS SECTION |
| 110 | + # Contains all findings/alerts from the analysis |
| 111 | + # ------------------------------------------------------------------------- |
| 112 | + # RESULTS: Flatten all results and deduplicate by unique location key |
| 113 | + # Deduplication key = ruleId + fileURI + startLine + startColumn + endLine |
| 114 | + # |
| 115 | + # Defensive null handling at each level: |
| 116 | + # - .locations // []: Default to empty array if no locations |
| 117 | + # - [0] // {}: Default to empty object if array is empty |
| 118 | + # - .physicalLocation // {}: Default if no physical location |
| 119 | + # - .region.* // 0: Default line/column numbers to 0 |
| 120 | + # |
| 121 | + # Fingerprint normalisation: strip legacy MD5-format partialFingerprints |
| 122 | + # (exactly 32 lowercase hex chars, no colon) that were generated by an |
| 123 | + # older Codacy fingerprinting algorithm. Codacy now uses a "hex:version" |
| 124 | + # format (e.g. "9d8c1cf6a28255f9:1"). Keeping stale MD5 values alongside |
| 125 | + # new-format ones triggers "inconsistent fingerprint" warnings; removing |
| 126 | + # them lets Codacy recalculate clean fingerprints on the next analysis. |
| 127 | + # The subsequent map(del(.partialFingerprints)) then removes all remaining |
| 128 | + # fingerprints so github/codeql-action/upload-sarif can calculate its own |
| 129 | + # consistent values from source-file line hashes without conflicts. |
| 130 | + results: [ |
| 131 | + .runs[].results // [] | .[] | |
| 132 | + if .partialFingerprints then |
| 133 | + .partialFingerprints |= with_entries( |
| 134 | + select(.value | test("^[0-9a-f]{32}$") | not) |
| 135 | + ) | |
| 136 | + if (.partialFingerprints | length) == 0 then del(.partialFingerprints) else . end |
| 137 | + else . |
| 138 | + end |
| 139 | + ] | unique_by( |
| 140 | + (.ruleId // "") + |
| 141 | + ((((.locations // [])[0] // {}).physicalLocation // {}).artifactLocation.uri // "") + |
| 142 | + ((((.locations // [])[0] // {}).physicalLocation // {}).region.startLine // 0 | tostring) + |
| 143 | + ((((.locations // [])[0] // {}).physicalLocation // {}).region.startColumn // 0 | tostring) + |
| 144 | + ((((.locations // [])[0] // {}).physicalLocation // {}).region.endLine // 0 | tostring) |
| 145 | + ) | map(del(.partialFingerprints)), |
| 146 | +
|
| 147 | + # ------------------------------------------------------------------------- |
| 148 | + # ADDITIONAL SARIF PROPERTIES |
| 149 | + # Preserved to maintain full SARIF compliance |
| 150 | + # ------------------------------------------------------------------------- |
| 151 | +
|
| 152 | + # originalUriBaseIds: Maps logical names to physical paths (e.g., %SRCROOT%) |
| 153 | + # Merge strategy: Combine all mappings, later values override earlier ones |
| 154 | + originalUriBaseIds: (reduce (.runs[].originalUriBaseIds // {}) as $m ({}; . * $m)), |
| 155 | +
|
| 156 | + # artifacts: List of files analyzed |
| 157 | + # Merge strategy: Combine all artifact lists and deduplicate by URI |
| 158 | + # Note: Artifacts without URIs are grouped together; this is acceptable as |
| 159 | + # SARIF artifacts without location.uri are typically redundant metadata |
| 160 | + artifacts: [.runs[].artifacts // [] | .[]] | unique_by(.location.uri // ""), |
| 161 | +
|
| 162 | + # invocations: Records of tool executions (timing, exit codes, etc.) |
| 163 | + # Merge strategy: Keep all invocation records from all runs |
| 164 | + invocations: [.runs[].invocations // [] | .[]] |
| 165 | + } |
| 166 | + # Add columnKind only if a valid value exists (SARIF requires valid enum string, not null) |
| 167 | + + (([.runs[].columnKind | select(. != null and . != "")][0]) as $ck | |
| 168 | + if $ck then { columnKind: $ck } else {} end) |
| 169 | + # Add conversion only if a valid object exists (SARIF requires object type, not null) |
| 170 | + + (([.runs[].conversion | select(. != null and type == "object")][0]) as $cv | |
| 171 | + if $cv then { conversion: $cv } else {} end) |
| 172 | + ] |
| 173 | +} |
| 174 | +' "$INPUT_FILE" > "$TEMP_FILE"; then |
| 175 | + echo "Error: jq merge operation failed" |
| 176 | + exit 1 |
| 177 | +fi |
| 178 | + |
| 179 | +# --- Validate merged file is valid JSON --- |
| 180 | +if ! jq empty "$TEMP_FILE" 2>/dev/null; then |
| 181 | + echo "Error: Merged SARIF file is invalid JSON" |
| 182 | + exit 1 |
| 183 | +fi |
| 184 | + |
| 185 | +# --- Move merged file to output --- |
| 186 | +mv "$TEMP_FILE" "$OUTPUT_FILE" |
| 187 | +trap - EXIT # Clear trap since we moved the file |
| 188 | + |
| 189 | +echo "Merged SARIF file created successfully: $OUTPUT_FILE" |
| 190 | + |
| 191 | +# --- Report final structure --- |
| 192 | +echo "Final SARIF structure:" |
| 193 | +echo " Runs: $(jq '.runs | length' "$OUTPUT_FILE")" |
| 194 | +echo " Results: $(jq '.runs[0].results // [] | length' "$OUTPUT_FILE")" |
0 commit comments