From 1f4025c84e8fd0dbb023806e3c339072f698f8a0 Mon Sep 17 00:00:00 2001 From: "F.Tibor" Date: Wed, 23 Jul 2025 08:55:29 +0200 Subject: [PATCH 1/9] Generate metadata for all target --- src/per_file.bzl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/per_file.bzl b/src/per_file.bzl index cfb76621..c8b192f0 100644 --- a/src/per_file.bzl +++ b/src/per_file.bzl @@ -14,6 +14,8 @@ CLANGSA_PLIST=$1 shift LOG_FILE=$1 shift +METADATA=$1 +shift COMPILE_COMMANDS_JSON=$1 shift COMPILE_COMMANDS_ABS=$COMPILE_COMMANDS_JSON.abs @@ -35,6 +37,7 @@ if [ $ret_code -eq 1 ] || [ $ret_code -ge 128 ]; then fi cp $DATA_DIR/*_clang-tidy_*.plist $CLANG_TIDY_PLIST cp $DATA_DIR/*_clangsa_*.plist $CLANGSA_PLIST +cp $DATA_DIR/metadata.json $METADATA # sed -i -e "s|.*execroot/bazel_codechecker/||g" $CLANG_TIDY_PLIST # sed -i -e "s|.*execroot/bazel_codechecker/||g" $CLANGSA_PLIST @@ -56,11 +59,13 @@ def _run_code_checker( clang_tidy_plist_file_name = "{}/{}_clang-tidy.plist".format(*file_name_params) clangsa_plist_file_name = "{}/{}_clangsa.plist".format(*file_name_params) codechecker_log_file_name = "{}/{}_codechecker.log".format(*file_name_params) + codechecker_metadata_file_name = "{}/{}_metadata.json".format(*file_name_params) # Declare output files clang_tidy_plist = ctx.actions.declare_file(clang_tidy_plist_file_name) clangsa_plist = ctx.actions.declare_file(clangsa_plist_file_name) codechecker_log = ctx.actions.declare_file(codechecker_log_file_name) + codechecker_metadata = ctx.actions.declare_file(codechecker_metadata_file_name) if "--ctu" in options: inputs = [compile_commands_json] + sources_and_headers @@ -87,6 +92,7 @@ def _run_code_checker( args.add(clang_tidy_plist.path) args.add(clangsa_plist.path) args.add(codechecker_log.path) + args.add(codechecker_metadata.path) args.add(compile_commands_json.path) args.add("CodeChecker") args.add("analyze") From c196276ad707fe3588a07ae238de9628a51b7139 Mon Sep 17 00:00:00 2001 From: "F.Tibor" Date: Tue, 9 Sep 2025 17:16:22 +0200 Subject: [PATCH 2/9] Initial implementation of metadata merger --- src/BUILD | 6 +++ src/metadata_merge.py | 86 +++++++++++++++++++++++++++++++++++++++++++ src/per_file.bzl | 20 ++++++++++ 3 files changed, 112 insertions(+) create mode 100644 src/metadata_merge.py diff --git a/src/BUILD b/src/BUILD index 8a2e5cdb..e6b24f42 100644 --- a/src/BUILD +++ b/src/BUILD @@ -5,6 +5,12 @@ py_binary( visibility = ["//visibility:public"], ) +py_binary( + name = "metadata_merge", + srcs = ["metadata_merge.py"], + visibility = ["//visibility:public"], +) + # Build & Test script template exports_files( ["codechecker_script.py"], diff --git a/src/metadata_merge.py b/src/metadata_merge.py new file mode 100644 index 00000000..34c1b5fa --- /dev/null +++ b/src/metadata_merge.py @@ -0,0 +1,86 @@ +import json +import os +import sys +from typing import List, Dict, Any + + +def merge_two_json(json1, json2): + if json1 == {}: + return json2 + if json2 == {}: + return json1 + json1_root = json1["tools"][0] + json2_root = json2["tools"][0] + # We append info from json2 to json1 from here on out + json1_root["result_source_files"].update(json2_root["result_source_files"]) + json1_root["skipped"] = json1_root["skipped"] + json2_root["skipped"] + # Merge time + json1_root["timestamps"]["begin"] = min( + float(json1_root["timestamps"]["begin"]), + float(json2_root["timestamps"]["begin"]), + ) + json1_root["timestamps"]["end"] = max( + float(json1_root["timestamps"]["end"]), + float(json2_root["timestamps"]["end"]), + ) + # Merge analyzers + for key, value in json2_root["analyzers"].items(): + json1_stat = json1_root["analyzers"][key]["analyzer_statistics"] + json2_stat = json2_root["analyzers"][key]["analyzer_statistics"] + json1_stat["failed"] = json1_stat["failed"] + json2_stat["failed"] + json1_stat["failed_sources"].extend(json2_stat["failed_sources"]) + json1_stat["successful"] = ( + json1_stat["successful"] + json2_stat["successful"] + ) + json1_stat["successful_sources"].extend( + json2_stat["successful_sources"] + ) + return json1 + + +def merge_json_files(file_paths: List[str]) -> Dict[str, Any]: + merged_data = {} + for file_path in file_paths: + if not os.path.exists(file_path): + print( + f"Error: File not found at '{file_path}'. Skipping.", + file=sys.stderr, + ) + continue + + try: + with open(file_path, "r", encoding="utf-8") as f: + data = json.load(f) + merged_data = merge_two_json(merged_data, data) + except json.JSONDecodeError: + print( + f"Error: Could not decode JSON from '{file_path}'. Skipping.", + file=sys.stderr, + ) + except Exception as e: + print( + f"An unexpected error occurred while processing '{file_path}': {e}", + file=sys.stderr, + ) + + return merged_data + + +def save_json_file(data: Dict[str, Any], output_path: str) -> None: + try: + with open(output_path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=4) + print(f"\nSuccessfully saved merged data to '{output_path}'") + except Exception as e: + print(f"An error occurred while saving the file: {e}", file=sys.stderr) + + +if __name__ == "__main__": + output_file = sys.argv[1] + input_files = sys.argv[2:] + + merged_data = merge_json_files(input_files) + if merged_data: + save_json_file(merged_data, output_file) + else: + print("\nNo data was merged. Output file will not be created.") diff --git a/src/per_file.bzl b/src/per_file.bzl index c8b192f0..9cb8f3d7 100644 --- a/src/per_file.bzl +++ b/src/per_file.bzl @@ -346,6 +346,18 @@ def _per_file_impl(ctx): sources_and_headers, ) all_files += outputs + # merge metadata + metadata = [file for file in all_files if file.path.endswith("metadata.json")] + metadata_json = ctx.actions.declare_file(ctx.attr.name + "/data/metadata.json") + ctx.actions.run( + inputs = metadata, + outputs = [metadata_json], + executable = ctx.executable._metadata_merge_script, + arguments = [metadata_json.path] + [file.path for file in metadata], + mnemonic = "Metadata", + progress_message = "Merging metadata.json" + ) + all_files.append(metadata_json) ctx.actions.write( output = ctx.outputs.test_script, is_executable = True, @@ -384,6 +396,14 @@ per_file_test = rule( ], doc = "List of default CodeChecker analyze options", ), + "_metadata_merge_script": attr.label( + default = ":metadata_merge", + executable = True, + cfg = 'exec', + ), + "_python_runtime": attr.label( + default = "@default_python_tools//:py3_runtime", + ), "targets": attr.label_list( aspects = [ compile_info_aspect, From 6e276df61f8026bfb46ea22ca5368c6d2afd47ba Mon Sep 17 00:00:00 2001 From: "F.Tibor" Date: Mon, 15 Sep 2025 16:21:43 +0200 Subject: [PATCH 3/9] Fix messed up rebase --- src/per_file.bzl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/per_file.bzl b/src/per_file.bzl index 9cb8f3d7..cd40abeb 100644 --- a/src/per_file.bzl +++ b/src/per_file.bzl @@ -74,7 +74,12 @@ def _run_code_checker( headers = depset([src], transitive = [compilation_context.headers]) inputs = depset([compile_commands_json, src], transitive = [headers]) - outputs = [clang_tidy_plist, clangsa_plist, codechecker_log] + outputs = [ + clang_tidy_plist, + clangsa_plist, + codechecker_log, + codechecker_metadata, + ] # Create CodeChecker wrapper script wrapper = ctx.actions.declare_file(ctx.attr.name + "/code_checker.sh") @@ -131,7 +136,6 @@ def check_valid_file_type(src): return False def _rule_sources(ctx): - srcs = [] if hasattr(ctx.rule.attr, "srcs"): for src in ctx.rule.attr.srcs: @@ -346,6 +350,7 @@ def _per_file_impl(ctx): sources_and_headers, ) all_files += outputs + # merge metadata metadata = [file for file in all_files if file.path.endswith("metadata.json")] metadata_json = ctx.actions.declare_file(ctx.attr.name + "/data/metadata.json") @@ -355,7 +360,7 @@ def _per_file_impl(ctx): executable = ctx.executable._metadata_merge_script, arguments = [metadata_json.path] + [file.path for file in metadata], mnemonic = "Metadata", - progress_message = "Merging metadata.json" + progress_message = "Merging metadata.json", ) all_files.append(metadata_json) ctx.actions.write( @@ -399,7 +404,7 @@ per_file_test = rule( "_metadata_merge_script": attr.label( default = ":metadata_merge", executable = True, - cfg = 'exec', + cfg = "exec", ), "_python_runtime": attr.label( default = "@default_python_tools//:py3_runtime", From 91cba9568d123ce4a82a9bf9c21f2d98b7723de6 Mon Sep 17 00:00:00 2001 From: "F.Tibor" Date: Mon, 6 Oct 2025 16:38:42 +0200 Subject: [PATCH 4/9] Create template from matadata_merge.py, and expand that --- src/BUILD | 11 ++++------- src/metadata_merge.py | 2 ++ src/per_file.bzl | 18 +++++++++++++----- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/BUILD b/src/BUILD index e6b24f42..a9364460 100644 --- a/src/BUILD +++ b/src/BUILD @@ -5,15 +5,12 @@ py_binary( visibility = ["//visibility:public"], ) -py_binary( - name = "metadata_merge", - srcs = ["metadata_merge.py"], - visibility = ["//visibility:public"], -) - # Build & Test script template exports_files( - ["codechecker_script.py"], + [ + "codechecker_script.py", + "metadata_merge.py", + ], ) # The following are flags and default values for clang_tidy_aspect diff --git a/src/metadata_merge.py b/src/metadata_merge.py index 34c1b5fa..6765a9ad 100644 --- a/src/metadata_merge.py +++ b/src/metadata_merge.py @@ -1,3 +1,5 @@ +#!{PythonPath} + import json import os import sys diff --git a/src/per_file.bzl b/src/per_file.bzl index cd40abeb..fb967e15 100644 --- a/src/per_file.bzl +++ b/src/per_file.bzl @@ -352,12 +352,20 @@ def _per_file_impl(ctx): all_files += outputs # merge metadata + ctx.actions.expand_template( + template = ctx.file._metadata_merge_template, + output = ctx.outputs._metadata_merge_script, + is_executable = True, + substitutions = { + "{PythonPath}": ctx.attr._python_runtime[PyRuntimeInfo].interpreter_path, + }, + ) metadata = [file for file in all_files if file.path.endswith("metadata.json")] metadata_json = ctx.actions.declare_file(ctx.attr.name + "/data/metadata.json") ctx.actions.run( inputs = metadata, outputs = [metadata_json], - executable = ctx.executable._metadata_merge_script, + executable = ctx.outputs._metadata_merge_script, arguments = [metadata_json.path] + [file.path for file in metadata], mnemonic = "Metadata", progress_message = "Merging metadata.json", @@ -401,10 +409,9 @@ per_file_test = rule( ], doc = "List of default CodeChecker analyze options", ), - "_metadata_merge_script": attr.label( - default = ":metadata_merge", - executable = True, - cfg = "exec", + "_metadata_merge_template": attr.label( + default = ":metadata_merge.py", + allow_single_file = True, ), "_python_runtime": attr.label( default = "@default_python_tools//:py3_runtime", @@ -418,6 +425,7 @@ per_file_test = rule( }, outputs = { "test_script": "%{name}/test_script.sh", + "_metadata_merge_script": "%{name}/_metadata_merge_script.py" }, test = True, ) From 20d93b282a8fb5b2f37457a64fc73ce22d7b35c0 Mon Sep 17 00:00:00 2001 From: "F.Tibor" Date: Mon, 6 Oct 2025 16:39:41 +0200 Subject: [PATCH 5/9] Add license --- src/metadata_merge.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/metadata_merge.py b/src/metadata_merge.py index 6765a9ad..c7fe1a6f 100644 --- a/src/metadata_merge.py +++ b/src/metadata_merge.py @@ -1,4 +1,17 @@ #!{PythonPath} +# Copyright 2023 Ericsson AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import json import os From c62d86bc621d5ac5866f8a065289b1539ac40222 Mon Sep 17 00:00:00 2001 From: "F.Tibor" Date: Mon, 6 Oct 2025 16:40:21 +0200 Subject: [PATCH 6/9] Create main function --- src/metadata_merge.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/metadata_merge.py b/src/metadata_merge.py index c7fe1a6f..f36cb681 100644 --- a/src/metadata_merge.py +++ b/src/metadata_merge.py @@ -90,7 +90,7 @@ def save_json_file(data: Dict[str, Any], output_path: str) -> None: print(f"An error occurred while saving the file: {e}", file=sys.stderr) -if __name__ == "__main__": +def main(): output_file = sys.argv[1] input_files = sys.argv[2:] @@ -99,3 +99,6 @@ def save_json_file(data: Dict[str, Any], output_path: str) -> None: save_json_file(merged_data, output_file) else: print("\nNo data was merged. Output file will not be created.") + +if __name__ == "__main__": + main() From 95305d7c264333adcc34becbd495d7d94e4d0c19 Mon Sep 17 00:00:00 2001 From: "F.Tibor" Date: Mon, 6 Oct 2025 16:50:52 +0200 Subject: [PATCH 7/9] Fail if plist file version is not equivalent --- src/metadata_merge.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/metadata_merge.py b/src/metadata_merge.py index f36cb681..86485259 100644 --- a/src/metadata_merge.py +++ b/src/metadata_merge.py @@ -24,6 +24,8 @@ def merge_two_json(json1, json2): return json2 if json2 == {}: return json1 + # Fail if the plist file version is different + assert(json1["version"] == json2["version"]) json1_root = json1["tools"][0] json2_root = json2["tools"][0] # We append info from json2 to json1 from here on out From 0032bc5f3762659f14f71350d20742120f0b8f16 Mon Sep 17 00:00:00 2001 From: "F.Tibor" Date: Mon, 6 Oct 2025 16:51:02 +0200 Subject: [PATCH 8/9] Edit comment for time merging --- src/metadata_merge.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/metadata_merge.py b/src/metadata_merge.py index 86485259..4116014f 100644 --- a/src/metadata_merge.py +++ b/src/metadata_merge.py @@ -31,7 +31,10 @@ def merge_two_json(json1, json2): # We append info from json2 to json1 from here on out json1_root["result_source_files"].update(json2_root["result_source_files"]) json1_root["skipped"] = json1_root["skipped"] + json2_root["skipped"] - # Merge time + # Merge time; we assume here both json files describe jobs in + # the same analysis invocation, implying that the analysis start + # time is the lowest timestamp, and the end is the highest. + # Note: caching will break this assumption json1_root["timestamps"]["begin"] = min( float(json1_root["timestamps"]["begin"]), float(json2_root["timestamps"]["begin"]), From 4a7a2beef8831eead84b9d723f25344aa272deec Mon Sep 17 00:00:00 2001 From: "F.Tibor" Date: Mon, 6 Oct 2025 16:58:59 +0200 Subject: [PATCH 9/9] Added asserts and comments --- src/metadata_merge.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/metadata_merge.py b/src/metadata_merge.py index 4116014f..43232733 100644 --- a/src/metadata_merge.py +++ b/src/metadata_merge.py @@ -28,7 +28,16 @@ def merge_two_json(json1, json2): assert(json1["version"] == json2["version"]) json1_root = json1["tools"][0] json2_root = json2["tools"][0] + # Command, working directory nad output directory may be different + # from metadata to metadata, due to remote workers. + # Currently we choose the first of these values. + # We expect the following fields to be the same in all metadata files. + assert(json1_root["name"] == json2_root["name"]) + # same CodeChecker version + assert(json1_root["version"] == json2_root["version"]) + # We assume that the list of enabled checkers haven't changed between runs. # We append info from json2 to json1 from here on out + json1_root["action_num"] += json2_root["action_num"] json1_root["result_source_files"].update(json2_root["result_source_files"]) json1_root["skipped"] = json1_root["skipped"] + json2_root["skipped"] # Merge time; we assume here both json files describe jobs in