fix: attempt to fix mutiple sarif issue & update codeql workflow

milkshakeuk · milkshakeuk · commit 5a040f651833 · 2026-03-13T10:16:12.000Z
diff --git a/.github/workflows/codacy.yml b/.github/workflows/codacy.yml
@@ -30,16 +30,17 @@ jobs:
     permissions:
       contents: read # for actions/checkout to fetch code
       security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
+
     name: Codacy Security Scan
     runs-on: ubuntu-latest
     steps:
       # Checkout the repository to the GitHub Actions runner
       - name: Checkout code
-        uses: actions/checkout@v6
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
 
       # Execute Codacy Analysis CLI and generate a SARIF output with the security issues identified during the analysis
       - name: Run Codacy Analysis CLI
-        uses: codacy/codacy-analysis-cli-action@v4
+        uses: codacy/codacy-analysis-cli-action@562ee3e92b8e92df8b67e0a5ff8aa8e261919c08 # v4.4.7
         with:
           # Check https://github.com/codacy/codacy-analysis-cli#project-token to get your project token from your Codacy repository
           # You can also omit the token and run the tools that support default configurations
@@ -53,9 +54,19 @@ jobs:
           # This will handover control about PR rejection to the GitHub side
           max-allowed-issues: 2147483647
 
+      # Merge multiple SARIF runs into a single run to comply with GitHub's July 2025
+      # requirement that each upload must have a single run per category.
+      # See: https://github.blog/changelog/2025-07-21-code-scanning-will-stop-combining-multiple-sarif-runs-uploaded-in-the-same-sarif-file/
+      - name: Merge SARIF runs into single run
+        if: hashFiles('results-codacy.sarif') != ''
+        run: |
+          chmod +x ./scripts/merge-sarif-runs.sh
+          ./scripts/merge-sarif-runs.sh results-codacy.sarif
+
       # Upload the SARIF file generated in the previous step
       - name: Upload SARIF results file
-        uses: github/codeql-action/upload-sarif@v4
+        uses: github/codeql-action/upload-sarif@0d579ffd059c29b07949a3cce3983f0780820c98 # v4.32.6
+        if: hashFiles('results-codacy.sarif') != ''
         with:
           sarif_file: results-codacy.sarif
           category: codacy
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
@@ -9,7 +9,7 @@
 # the `language` matrix defined below to confirm you have the correct set of
 # supported CodeQL languages.
 #
-name: "CodeQL"
+name: "Advanced CodeQL"
 
 on:
   push:
@@ -21,73 +21,84 @@ on:
 
 jobs:
   analyze:
-    name: Analyze
+    name: Analyze (${{ matrix.language }})
     # Runner size impacts CodeQL analysis time. To learn more, please see:
     #   - https://gh.io/recommended-hardware-resources-for-running-codeql
     #   - https://gh.io/supported-runners-and-hardware-resources
-    #   - https://gh.io/using-larger-runners
-    # Consider using larger runners for possible analysis time improvements.
+    #   - https://gh.io/using-larger-runners (GitHub.com only)
+    # Consider using larger runners or machines with greater resources for possible analysis time improvements.
     runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
-    timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
     permissions:
+      # required for all workflows
+      security-events: write
+
+      # required to fetch internal or private CodeQL packs
+      packages: read
+
+      # only required for workflows in private repositories
       actions: read
       contents: read
-      security-events: write
 
     strategy:
       fail-fast: false
       matrix:
-        language: [ 'csharp' ]
-        # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ]
-        # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both
-        # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
-        # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
+        include:
+        - language: actions
+          build-mode: none
+        - language: csharp
+          build-mode: manual
+        # CodeQL supports the following values keywords for 'language': 'actions', 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'rust', 'swift'
+        # Use `c-cpp` to analyze code written in C, C++ or both
+        # Use 'java-kotlin' to analyze code written in Java, Kotlin or both
+        # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
+        # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis,
+        # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning.
+        # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how
+        # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      # Add any setup steps before running the `github/codeql-action/init` action.
+      # This includes steps like installing compilers or runtimes (`actions/setup-node`
+      # or others). This is typically only required for manual builds.
+      # - name: Setup runtime (example)
+      #   uses: actions/setup-example@v1
+
+      - name: Setup .NET Core
+        if: ${{ matrix.language == 'csharp' }}
+        uses: actions/setup-dotnet@c2fa09f4bde5ebb9d1777cf28262a3eb3db3ced7 # V5.2.0
+        with:
+          global-json-file: global.json
 
       # Initializes the CodeQL tools for scanning.
       - name: Initialize CodeQL
-        uses: github/codeql-action/init@v4
+        uses: github/codeql-action/init@0d579ffd059c29b07949a3cce3983f0780820c98 # v4.32.6
         with:
           languages: ${{ matrix.language }}
+          build-mode: ${{ matrix.build-mode }}
           # If you wish to specify custom queries, you can do so here or in a config file.
           # By default, queries listed here will override any specified in a config file.
           # Prefix the list here with "+" to use these queries and those in the config file.
-
+  
           # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
           # queries: security-extended,security-and-quality
-
-      # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
-      # If this step fails, then you should remove it and run the build manually (see below)
-      # - name: Autobuild
-      #   uses: github/codeql-action/autobuild@v3
-
-      - name: Setup .NET Core
-        uses: actions/setup-dotnet@v5
-        with:
-          global-json-file: global.json
-
-      - name: Restore nHapi
+  
+      # If the analyze step fails for one of the languages you are analyzing with
+      # "We were unable to automatically build your code", modify the matrix above
+      # to set the build mode to "manual" for that language. Then modify this step
+      # to build your code.
+      # ℹ️ Command-line programs to run using the OS shell.
+      # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
+      - name: Run manual build steps
+        if: ${{ matrix.language == 'csharp' }}
+        shell: bash
         run: |
           dotnet restore nHapi.sln --configfile build/.nuget/NuGet.config
-
-      - name: Build nHapi
-        run: |
           dotnet build nHapi.sln -c Release --no-restore
 
-      # ℹ️ Command-line programs to run using the OS shell.
-      # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
-
-      #   If the Autobuild fails above, remove it and uncomment the following three lines.
-      #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
-
-      # - run: |
-      #     echo "Run, Build Application using script"
-      #     ./location_of_script_within_repo/buildscript.sh
-
       - name: Perform CodeQL Analysis
-        uses: github/codeql-action/analyze@v4
+        uses: github/codeql-action/analyze@0d579ffd059c29b07949a3cce3983f0780820c98 # v4.32.6
         with:
           category: "/language:${{matrix.language}}"
diff --git a/scripts/merge-sarif-runs.sh b/scripts/merge-sarif-runs.sh
@@ -0,0 +1,194 @@
+#!/bin/bash
+#
+# merge-sarif-runs.sh
+#
+# Merges multiple SARIF runs into a single run to comply with GitHub's July 2025
+# requirement that each SARIF upload must have a single run per category.
+#
+# See: https://github.blog/changelog/2025-07-21-code-scanning-will-stop-combining-multiple-sarif-runs-uploaded-in-the-same-sarif-file/
+#
+# Usage: ./merge-sarif-runs.sh <input.sarif> [output.sarif]
+#   - If output is not specified, input file is modified in place
+#
+# Exit codes:
+#   0 - Success (merged or no merge needed)
+#   1 - Error (invalid input, jq failure, or invalid output)
+
+set -e
+
+# --- Argument parsing ---
+INPUT_FILE="${1:-}"
+OUTPUT_FILE="${2:-$INPUT_FILE}"
+
+if [ -z "$INPUT_FILE" ]; then
+  echo "Error: Input SARIF file path required"
+  echo "Usage: $0 <input.sarif> [output.sarif]"
+  exit 1
+fi
+
+if [ ! -f "$INPUT_FILE" ]; then
+  echo "Error: Input file not found: $INPUT_FILE"
+  exit 1
+fi
+
+# --- Check run count ---
+RUN_COUNT=$(jq '.runs | length' "$INPUT_FILE")
+echo "Found $RUN_COUNT runs in SARIF file"
+
+if [ "$RUN_COUNT" -le 1 ]; then
+  echo "Single run detected, no merging needed"
+  # Strip partialFingerprints even for single-run files so that
+  # github/codeql-action/upload-sarif can calculate its own consistent
+  # fingerprints from source-file line hashes without conflicting with
+  # pre-embedded values from the analysis tool.
+  SINGLE_RUN_TEMP=$(mktemp)
+  trap 'rm -f "$SINGLE_RUN_TEMP"' EXIT
+  if ! jq 'del(.runs[].results[]?.partialFingerprints)' "$INPUT_FILE" > "$SINGLE_RUN_TEMP"; then
+    echo "Error: jq fingerprint-strip failed for single run"
+    exit 1
+  fi
+  mv "$SINGLE_RUN_TEMP" "$OUTPUT_FILE"
+  trap - EXIT
+  exit 0
+fi
+
+echo "Merging $RUN_COUNT runs into a single run..."
+
+# --- Create temporary file for merge output ---
+TEMP_FILE=$(mktemp)
+trap 'rm -f "$TEMP_FILE"' EXIT
+
+# --- Merge all runs into a single run ---
+#
+# SARIF Structure Overview:
+#   A SARIF file contains: { "$schema", "version", "runs": [...] }
+#   Each run contains: { "tool", "results", "artifacts", "invocations", ... }
+#   GitHub requires exactly ONE run per category for code scanning.
+#
+# Merge Strategy:
+#   1. RULES: Collect all rules from all runs, deduplicate by rule ID
+#   2. RESULTS: Collect all findings from all runs, deduplicate by location
+#   3. ARTIFACTS: Combine all file references from all runs
+#   4. INVOCATIONS: Combine all tool execution records
+#   5. METADATA: Take first non-null value for scalar properties
+#
+
+if ! jq '
+# =============================================================================
+# SARIF MERGE TRANSFORMATION
+# =============================================================================
+{
+  # Preserve top-level SARIF metadata
+  "$schema": ."$schema",
+  version: .version,
+
+  # Create single merged run from all input runs
+  runs: [{
+
+    # -------------------------------------------------------------------------
+    # TOOL SECTION
+    # Defines the analysis tool and its rules
+    # -------------------------------------------------------------------------
+    tool: {
+      driver: {
+        # Use unified tool name since we are merging multiple Codacy tools
+        name: "Codacy",
+        informationUri: "https://www.codacy.com",
+        version: "1.0.0",
+
+        # RULES: Flatten all rules from all runs into single array
+        # - .runs[].tool.driver.rules: Get rules array from each run
+        # - // []: Default to empty array if rules is null
+        # - | .[]: Flatten nested arrays into single stream
+        # - unique_by(.id): Remove duplicates, keeping first occurrence of each rule ID
+        rules: [.runs[].tool.driver.rules // [] | .[]] | unique_by(.id)
+      }
+    },
+
+    # -------------------------------------------------------------------------
+    # RESULTS SECTION
+    # Contains all findings/alerts from the analysis
+    # -------------------------------------------------------------------------
+    # RESULTS: Flatten all results and deduplicate by unique location key
+    # Deduplication key = ruleId + fileURI + startLine + startColumn + endLine
+    #
+    # Defensive null handling at each level:
+    # - .locations // []: Default to empty array if no locations
+    # - [0] // {}: Default to empty object if array is empty
+    # - .physicalLocation // {}: Default if no physical location
+    # - .region.* // 0: Default line/column numbers to 0
+    #
+    # Fingerprint normalisation: strip legacy MD5-format partialFingerprints
+    # (exactly 32 lowercase hex chars, no colon) that were generated by an
+    # older Codacy fingerprinting algorithm. Codacy now uses a "hex:version"
+    # format (e.g. "9d8c1cf6a28255f9:1"). Keeping stale MD5 values alongside
+    # new-format ones triggers "inconsistent fingerprint" warnings; removing
+    # them lets Codacy recalculate clean fingerprints on the next analysis.
+    # The subsequent map(del(.partialFingerprints)) then removes all remaining
+    # fingerprints so github/codeql-action/upload-sarif can calculate its own
+    # consistent values from source-file line hashes without conflicts.
+    results: [
+      .runs[].results // [] | .[] |
+      if .partialFingerprints then
+        .partialFingerprints |= with_entries(
+          select(.value | test("^[0-9a-f]{32}$") | not)
+        ) |
+        if (.partialFingerprints | length) == 0 then del(.partialFingerprints) else . end
+      else .
+      end
+    ] | unique_by(
+      (.ruleId // "") +
+      ((((.locations // [])[0] // {}).physicalLocation // {}).artifactLocation.uri // "") +
+      ((((.locations // [])[0] // {}).physicalLocation // {}).region.startLine // 0 | tostring) +
+      ((((.locations // [])[0] // {}).physicalLocation // {}).region.startColumn // 0 | tostring) +
+      ((((.locations // [])[0] // {}).physicalLocation // {}).region.endLine // 0 | tostring)
+    ) | map(del(.partialFingerprints)),
+
+    # -------------------------------------------------------------------------
+    # ADDITIONAL SARIF PROPERTIES
+    # Preserved to maintain full SARIF compliance
+    # -------------------------------------------------------------------------
+
+    # originalUriBaseIds: Maps logical names to physical paths (e.g., %SRCROOT%)
+    # Merge strategy: Combine all mappings, later values override earlier ones
+    originalUriBaseIds: (reduce (.runs[].originalUriBaseIds // {}) as $m ({}; . * $m)),
+
+    # artifacts: List of files analyzed
+    # Merge strategy: Combine all artifact lists and deduplicate by URI
+    # Note: Artifacts without URIs are grouped together; this is acceptable as
+    # SARIF artifacts without location.uri are typically redundant metadata
+    artifacts: [.runs[].artifacts // [] | .[]] | unique_by(.location.uri // ""),
+
+    # invocations: Records of tool executions (timing, exit codes, etc.)
+    # Merge strategy: Keep all invocation records from all runs
+    invocations: [.runs[].invocations // [] | .[]]
+  }
+  # Add columnKind only if a valid value exists (SARIF requires valid enum string, not null)
+  + (([.runs[].columnKind | select(. != null and . != "")][0]) as $ck |
+     if $ck then { columnKind: $ck } else {} end)
+  # Add conversion only if a valid object exists (SARIF requires object type, not null)
+  + (([.runs[].conversion | select(. != null and type == "object")][0]) as $cv |
+     if $cv then { conversion: $cv } else {} end)
+  ]
+}
+' "$INPUT_FILE" > "$TEMP_FILE"; then
+  echo "Error: jq merge operation failed"
+  exit 1
+fi
+
+# --- Validate merged file is valid JSON ---
+if ! jq empty "$TEMP_FILE" 2>/dev/null; then
+  echo "Error: Merged SARIF file is invalid JSON"
+  exit 1
+fi
+
+# --- Move merged file to output ---
+mv "$TEMP_FILE" "$OUTPUT_FILE"
+trap - EXIT  # Clear trap since we moved the file
+
+echo "Merged SARIF file created successfully: $OUTPUT_FILE"
+
+# --- Report final structure ---
+echo "Final SARIF structure:"
+echo "  Runs: $(jq '.runs | length' "$OUTPUT_FILE")"
+echo "  Results: $(jq '.runs[0].results // [] | length' "$OUTPUT_FILE")"