Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ py_binary(

# Build & Test script template
exports_files(
["codechecker_script.py"],
[
"codechecker_script.py",
"metadata_merge.py",
],
)

# The following are flags and default values for clang_tidy_aspect
Expand Down
118 changes: 118 additions & 0 deletions src/metadata_merge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#!{PythonPath}
# Copyright 2023 Ericsson AB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import os
import sys
from typing import List, Dict, Any


def merge_two_json(json1, json2):
if json1 == {}:
return json2
if json2 == {}:
return json1
# Fail if the plist file version is different
assert(json1["version"] == json2["version"])
json1_root = json1["tools"][0]
json2_root = json2["tools"][0]
# Command, working directory nad output directory may be different
# from metadata to metadata, due to remote workers.
# Currently we choose the first of these values.
# We expect the following fields to be the same in all metadata files.
assert(json1_root["name"] == json2_root["name"])
# same CodeChecker version
assert(json1_root["version"] == json2_root["version"])
# We assume that the list of enabled checkers haven't changed between runs.
# We append info from json2 to json1 from here on out
json1_root["action_num"] += json2_root["action_num"]
json1_root["result_source_files"].update(json2_root["result_source_files"])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should comment here that if the runs came from the same bazel invocation then (and lets make sure that this is actually true) the following fields must be exactly teh same:

  • name
  • version (of codechecker, not the version of the metadata.json file)
  • working_directory
  • output_path

On the other hand, how about these fields? Are we sure these are the same for every job?:

  • command

Also, action_num should be aggregated up to actually reflect the amount of jobs, shouldn't it?

Copy link
Contributor Author

@furtib furtib Oct 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried my best to cover everything here.

There are a couple of misses...
The analyzer tools, for example, have their own version number, but we theoretically could tolerate different versions of them running (?).

json1_root["skipped"] = json1_root["skipped"] + json2_root["skipped"]
# Merge time; we assume here both json files describe jobs in
# the same analysis invocation, implying that the analysis start
# time is the lowest timestamp, and the end is the highest.
# Note: caching will break this assumption
json1_root["timestamps"]["begin"] = min(
float(json1_root["timestamps"]["begin"]),
float(json2_root["timestamps"]["begin"]),
)
json1_root["timestamps"]["end"] = max(
float(json1_root["timestamps"]["end"]),
float(json2_root["timestamps"]["end"]),
)
# Merge analyzers
for key, value in json2_root["analyzers"].items():
json1_stat = json1_root["analyzers"][key]["analyzer_statistics"]
json2_stat = json2_root["analyzers"][key]["analyzer_statistics"]
json1_stat["failed"] = json1_stat["failed"] + json2_stat["failed"]
json1_stat["failed_sources"].extend(json2_stat["failed_sources"])
json1_stat["successful"] = (
json1_stat["successful"] + json2_stat["successful"]
)
json1_stat["successful_sources"].extend(
json2_stat["successful_sources"]
)
return json1


def merge_json_files(file_paths: List[str]) -> Dict[str, Any]:
merged_data = {}
for file_path in file_paths:
if not os.path.exists(file_path):
print(
f"Error: File not found at '{file_path}'. Skipping.",
file=sys.stderr,
)
continue

try:
with open(file_path, "r", encoding="utf-8") as f:
data = json.load(f)
merged_data = merge_two_json(merged_data, data)
except json.JSONDecodeError:
print(
f"Error: Could not decode JSON from '{file_path}'. Skipping.",
file=sys.stderr,
)
except Exception as e:
print(
f"An unexpected error occurred while processing '{file_path}': {e}",
file=sys.stderr,
)

return merged_data


def save_json_file(data: Dict[str, Any], output_path: str) -> None:
try:
with open(output_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=4)
print(f"\nSuccessfully saved merged data to '{output_path}'")
except Exception as e:
print(f"An error occurred while saving the file: {e}", file=sys.stderr)


def main():
output_file = sys.argv[1]
input_files = sys.argv[2:]

Copy link
Contributor

@Szelethus Szelethus Oct 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One of the first entries in the metadata.json file is version, which is currently on 2. We could check the version and throw an warning (or maybe an error) if its not something we can handle; for instance, if CodeChecker starts emitting new versions of this, we should know about it.

As a fun fact, there is a version 1 to version 2 converter in CodeChecker:
https://github.com/Ericsson/codechecker/blob/1d5c4ffc6cc45f4f6eb61756faee92b8699ff39a/web/client/codechecker_client/metadata.py#L18

Copy link
Contributor Author

@furtib furtib Oct 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be a good use of the common lib mentioned in #81 (fail, warn)

merged_data = merge_json_files(input_files)
if merged_data:
save_json_file(merged_data, output_file)
else:
print("\nNo data was merged. Output file will not be created.")

if __name__ == "__main__":
main()
43 changes: 41 additions & 2 deletions src/per_file.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ CLANGSA_PLIST=$1
shift
LOG_FILE=$1
shift
METADATA=$1
shift
COMPILE_COMMANDS_JSON=$1
shift
COMPILE_COMMANDS_ABS=$COMPILE_COMMANDS_JSON.abs
Expand All @@ -35,6 +37,7 @@ if [ $ret_code -eq 1 ] || [ $ret_code -ge 128 ]; then
fi
cp $DATA_DIR/*_clang-tidy_*.plist $CLANG_TIDY_PLIST
cp $DATA_DIR/*_clangsa_*.plist $CLANGSA_PLIST
cp $DATA_DIR/metadata.json $METADATA

# sed -i -e "s|<string>.*execroot/bazel_codechecker/|<string>|g" $CLANG_TIDY_PLIST
# sed -i -e "s|<string>.*execroot/bazel_codechecker/|<string>|g" $CLANGSA_PLIST
Expand All @@ -56,11 +59,13 @@ def _run_code_checker(
clang_tidy_plist_file_name = "{}/{}_clang-tidy.plist".format(*file_name_params)
clangsa_plist_file_name = "{}/{}_clangsa.plist".format(*file_name_params)
codechecker_log_file_name = "{}/{}_codechecker.log".format(*file_name_params)
codechecker_metadata_file_name = "{}/{}_metadata.json".format(*file_name_params)

# Declare output files
clang_tidy_plist = ctx.actions.declare_file(clang_tidy_plist_file_name)
clangsa_plist = ctx.actions.declare_file(clangsa_plist_file_name)
codechecker_log = ctx.actions.declare_file(codechecker_log_file_name)
codechecker_metadata = ctx.actions.declare_file(codechecker_metadata_file_name)

if "--ctu" in options:
inputs = [compile_commands_json] + sources_and_headers
Expand All @@ -69,7 +74,12 @@ def _run_code_checker(
headers = depset([src], transitive = [compilation_context.headers])
inputs = depset([compile_commands_json, src], transitive = [headers])

outputs = [clang_tidy_plist, clangsa_plist, codechecker_log]
outputs = [
clang_tidy_plist,
clangsa_plist,
codechecker_log,
codechecker_metadata,
]

# Create CodeChecker wrapper script
wrapper = ctx.actions.declare_file(ctx.attr.name + "/code_checker.sh")
Expand All @@ -87,6 +97,7 @@ def _run_code_checker(
args.add(clang_tidy_plist.path)
args.add(clangsa_plist.path)
args.add(codechecker_log.path)
args.add(codechecker_metadata.path)
args.add(compile_commands_json.path)
args.add("CodeChecker")
args.add("analyze")
Expand Down Expand Up @@ -125,7 +136,6 @@ def check_valid_file_type(src):
return False

def _rule_sources(ctx):

srcs = []
if hasattr(ctx.rule.attr, "srcs"):
for src in ctx.rule.attr.srcs:
Expand Down Expand Up @@ -340,6 +350,27 @@ def _per_file_impl(ctx):
sources_and_headers,
)
all_files += outputs

# merge metadata
ctx.actions.expand_template(
template = ctx.file._metadata_merge_template,
output = ctx.outputs._metadata_merge_script,
is_executable = True,
substitutions = {
"{PythonPath}": ctx.attr._python_runtime[PyRuntimeInfo].interpreter_path,
},
)
metadata = [file for file in all_files if file.path.endswith("metadata.json")]
metadata_json = ctx.actions.declare_file(ctx.attr.name + "/data/metadata.json")
ctx.actions.run(
inputs = metadata,
outputs = [metadata_json],
executable = ctx.outputs._metadata_merge_script,
arguments = [metadata_json.path] + [file.path for file in metadata],
mnemonic = "Metadata",
progress_message = "Merging metadata.json",
)
all_files.append(metadata_json)
ctx.actions.write(
output = ctx.outputs.test_script,
is_executable = True,
Expand Down Expand Up @@ -378,6 +409,13 @@ per_file_test = rule(
],
doc = "List of default CodeChecker analyze options",
),
"_metadata_merge_template": attr.label(
default = ":metadata_merge.py",
allow_single_file = True,
),
"_python_runtime": attr.label(
default = "@default_python_tools//:py3_runtime",
),
"targets": attr.label_list(
aspects = [
compile_info_aspect,
Expand All @@ -387,6 +425,7 @@ per_file_test = rule(
},
outputs = {
"test_script": "%{name}/test_script.sh",
"_metadata_merge_script": "%{name}/_metadata_merge_script.py"
},
test = True,
)
Expand Down