diff --git a/src/BUILD b/src/BUILD index 8a2e5cdb..a9364460 100644 --- a/src/BUILD +++ b/src/BUILD @@ -7,7 +7,10 @@ py_binary( # Build & Test script template exports_files( - ["codechecker_script.py"], + [ + "codechecker_script.py", + "metadata_merge.py", + ], ) # The following are flags and default values for clang_tidy_aspect diff --git a/src/metadata_merge.py b/src/metadata_merge.py new file mode 100644 index 00000000..43232733 --- /dev/null +++ b/src/metadata_merge.py @@ -0,0 +1,118 @@ +#!{PythonPath} +# Copyright 2023 Ericsson AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +import sys +from typing import List, Dict, Any + + +def merge_two_json(json1, json2): + if json1 == {}: + return json2 + if json2 == {}: + return json1 + # Fail if the plist file version is different + assert(json1["version"] == json2["version"]) + json1_root = json1["tools"][0] + json2_root = json2["tools"][0] + # Command, working directory nad output directory may be different + # from metadata to metadata, due to remote workers. + # Currently we choose the first of these values. + # We expect the following fields to be the same in all metadata files. + assert(json1_root["name"] == json2_root["name"]) + # same CodeChecker version + assert(json1_root["version"] == json2_root["version"]) + # We assume that the list of enabled checkers haven't changed between runs. + # We append info from json2 to json1 from here on out + json1_root["action_num"] += json2_root["action_num"] + json1_root["result_source_files"].update(json2_root["result_source_files"]) + json1_root["skipped"] = json1_root["skipped"] + json2_root["skipped"] + # Merge time; we assume here both json files describe jobs in + # the same analysis invocation, implying that the analysis start + # time is the lowest timestamp, and the end is the highest. + # Note: caching will break this assumption + json1_root["timestamps"]["begin"] = min( + float(json1_root["timestamps"]["begin"]), + float(json2_root["timestamps"]["begin"]), + ) + json1_root["timestamps"]["end"] = max( + float(json1_root["timestamps"]["end"]), + float(json2_root["timestamps"]["end"]), + ) + # Merge analyzers + for key, value in json2_root["analyzers"].items(): + json1_stat = json1_root["analyzers"][key]["analyzer_statistics"] + json2_stat = json2_root["analyzers"][key]["analyzer_statistics"] + json1_stat["failed"] = json1_stat["failed"] + json2_stat["failed"] + json1_stat["failed_sources"].extend(json2_stat["failed_sources"]) + json1_stat["successful"] = ( + json1_stat["successful"] + json2_stat["successful"] + ) + json1_stat["successful_sources"].extend( + json2_stat["successful_sources"] + ) + return json1 + + +def merge_json_files(file_paths: List[str]) -> Dict[str, Any]: + merged_data = {} + for file_path in file_paths: + if not os.path.exists(file_path): + print( + f"Error: File not found at '{file_path}'. Skipping.", + file=sys.stderr, + ) + continue + + try: + with open(file_path, "r", encoding="utf-8") as f: + data = json.load(f) + merged_data = merge_two_json(merged_data, data) + except json.JSONDecodeError: + print( + f"Error: Could not decode JSON from '{file_path}'. Skipping.", + file=sys.stderr, + ) + except Exception as e: + print( + f"An unexpected error occurred while processing '{file_path}': {e}", + file=sys.stderr, + ) + + return merged_data + + +def save_json_file(data: Dict[str, Any], output_path: str) -> None: + try: + with open(output_path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=4) + print(f"\nSuccessfully saved merged data to '{output_path}'") + except Exception as e: + print(f"An error occurred while saving the file: {e}", file=sys.stderr) + + +def main(): + output_file = sys.argv[1] + input_files = sys.argv[2:] + + merged_data = merge_json_files(input_files) + if merged_data: + save_json_file(merged_data, output_file) + else: + print("\nNo data was merged. Output file will not be created.") + +if __name__ == "__main__": + main() diff --git a/src/per_file.bzl b/src/per_file.bzl index cfb76621..fb967e15 100644 --- a/src/per_file.bzl +++ b/src/per_file.bzl @@ -14,6 +14,8 @@ CLANGSA_PLIST=$1 shift LOG_FILE=$1 shift +METADATA=$1 +shift COMPILE_COMMANDS_JSON=$1 shift COMPILE_COMMANDS_ABS=$COMPILE_COMMANDS_JSON.abs @@ -35,6 +37,7 @@ if [ $ret_code -eq 1 ] || [ $ret_code -ge 128 ]; then fi cp $DATA_DIR/*_clang-tidy_*.plist $CLANG_TIDY_PLIST cp $DATA_DIR/*_clangsa_*.plist $CLANGSA_PLIST +cp $DATA_DIR/metadata.json $METADATA # sed -i -e "s|.*execroot/bazel_codechecker/||g" $CLANG_TIDY_PLIST # sed -i -e "s|.*execroot/bazel_codechecker/||g" $CLANGSA_PLIST @@ -56,11 +59,13 @@ def _run_code_checker( clang_tidy_plist_file_name = "{}/{}_clang-tidy.plist".format(*file_name_params) clangsa_plist_file_name = "{}/{}_clangsa.plist".format(*file_name_params) codechecker_log_file_name = "{}/{}_codechecker.log".format(*file_name_params) + codechecker_metadata_file_name = "{}/{}_metadata.json".format(*file_name_params) # Declare output files clang_tidy_plist = ctx.actions.declare_file(clang_tidy_plist_file_name) clangsa_plist = ctx.actions.declare_file(clangsa_plist_file_name) codechecker_log = ctx.actions.declare_file(codechecker_log_file_name) + codechecker_metadata = ctx.actions.declare_file(codechecker_metadata_file_name) if "--ctu" in options: inputs = [compile_commands_json] + sources_and_headers @@ -69,7 +74,12 @@ def _run_code_checker( headers = depset([src], transitive = [compilation_context.headers]) inputs = depset([compile_commands_json, src], transitive = [headers]) - outputs = [clang_tidy_plist, clangsa_plist, codechecker_log] + outputs = [ + clang_tidy_plist, + clangsa_plist, + codechecker_log, + codechecker_metadata, + ] # Create CodeChecker wrapper script wrapper = ctx.actions.declare_file(ctx.attr.name + "/code_checker.sh") @@ -87,6 +97,7 @@ def _run_code_checker( args.add(clang_tidy_plist.path) args.add(clangsa_plist.path) args.add(codechecker_log.path) + args.add(codechecker_metadata.path) args.add(compile_commands_json.path) args.add("CodeChecker") args.add("analyze") @@ -125,7 +136,6 @@ def check_valid_file_type(src): return False def _rule_sources(ctx): - srcs = [] if hasattr(ctx.rule.attr, "srcs"): for src in ctx.rule.attr.srcs: @@ -340,6 +350,27 @@ def _per_file_impl(ctx): sources_and_headers, ) all_files += outputs + + # merge metadata + ctx.actions.expand_template( + template = ctx.file._metadata_merge_template, + output = ctx.outputs._metadata_merge_script, + is_executable = True, + substitutions = { + "{PythonPath}": ctx.attr._python_runtime[PyRuntimeInfo].interpreter_path, + }, + ) + metadata = [file for file in all_files if file.path.endswith("metadata.json")] + metadata_json = ctx.actions.declare_file(ctx.attr.name + "/data/metadata.json") + ctx.actions.run( + inputs = metadata, + outputs = [metadata_json], + executable = ctx.outputs._metadata_merge_script, + arguments = [metadata_json.path] + [file.path for file in metadata], + mnemonic = "Metadata", + progress_message = "Merging metadata.json", + ) + all_files.append(metadata_json) ctx.actions.write( output = ctx.outputs.test_script, is_executable = True, @@ -378,6 +409,13 @@ per_file_test = rule( ], doc = "List of default CodeChecker analyze options", ), + "_metadata_merge_template": attr.label( + default = ":metadata_merge.py", + allow_single_file = True, + ), + "_python_runtime": attr.label( + default = "@default_python_tools//:py3_runtime", + ), "targets": attr.label_list( aspects = [ compile_info_aspect, @@ -387,6 +425,7 @@ per_file_test = rule( }, outputs = { "test_script": "%{name}/test_script.sh", + "_metadata_merge_script": "%{name}/_metadata_merge_script.py" }, test = True, )