From 5f363e35460395f331a2151183044aea1807c637 Mon Sep 17 00:00:00 2001 From: Nivedita Singh Date: Thu, 28 May 2026 05:44:56 +0000 Subject: [PATCH 1/7] modified few lines --- tools/import_validation/validator_goldens.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py index 7b19b783fe..a4bdbbadbc 100644 --- a/tools/import_validation/validator_goldens.py +++ b/tools/import_validation/validator_goldens.py @@ -71,6 +71,7 @@ import os import sys import tempfile +import csv from absl import app from absl import flags @@ -379,7 +380,8 @@ def generate_goldens(input_files: str, for k, node in input_nodes.items(): match = False for col, vals in must_include_values.items(): - if node.get(col) in vals: + val = node.get(col) + if val in vals or mcf_file_util.strip_namespace(val) in vals: match = True break if match: @@ -440,9 +442,12 @@ def generate_goldens(input_files: str, if golden_nodes and output_file: logging.info(f'Writing {len(golden_nodes)} goldens to {output_file}') if file_util.file_is_csv(output_file): - file_util.file_write_csv_dict(golden_nodes, - output_file, - key_column_name=None) + with file_util.FileIO(output_file, mode='w') as csvfile: + columns = sorted(list(next(iter(golden_nodes.values())).keys())) + writer = csv.DictWriter(csvfile, fieldnames=columns, quoting=csv.QUOTE_NONNUMERIC) + writer.writeheader() + for node in golden_nodes.values(): + writer.writerow(node) else: mcf_file_util.write_mcf_nodes([golden_nodes], output_file) From 399873ed1ab1986feb0844b4d4de1bcc1ccd3d64 Mon Sep 17 00:00:00 2001 From: Nivedita Singh Date: Thu, 28 May 2026 06:01:37 +0000 Subject: [PATCH 2/7] modified few lines --- tools/import_validation/validator_goldens.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py index a4bdbbadbc..da78583066 100644 --- a/tools/import_validation/validator_goldens.py +++ b/tools/import_validation/validator_goldens.py @@ -443,7 +443,7 @@ def generate_goldens(input_files: str, logging.info(f'Writing {len(golden_nodes)} goldens to {output_file}') if file_util.file_is_csv(output_file): with file_util.FileIO(output_file, mode='w') as csvfile: - columns = sorted(list(next(iter(golden_nodes.values())).keys())) + columns = sorted(list(set().union(*(node.keys() for node in golden_nodes.values())))) writer = csv.DictWriter(csvfile, fieldnames=columns, quoting=csv.QUOTE_NONNUMERIC) writer.writeheader() for node in golden_nodes.values(): From bd3f90d76ee8264ede4c09622943b32cf408089b Mon Sep 17 00:00:00 2001 From: Nivedita Singh Date: Thu, 28 May 2026 06:26:54 +0000 Subject: [PATCH 3/7] fixed test --- tools/import_validation/validator_goldens.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py index da78583066..aa49378d45 100644 --- a/tools/import_validation/validator_goldens.py +++ b/tools/import_validation/validator_goldens.py @@ -61,6 +61,7 @@ --goldens_sampler_exhaustive \ --generate_goldens=goldens_data/generated_goldens.mcf + # To generate goldens ensuring prominent DCIDs are included if present: python3 validator_goldens.py \ --validate_goldens_input=output/observations.csv \ From 4f14df9bdb4ed84d0a572d9012a56e7518c811e5 Mon Sep 17 00:00:00 2001 From: Nivedita Singh Date: Thu, 28 May 2026 06:44:52 +0000 Subject: [PATCH 4/7] fixed test --- tools/import_validation/validator_goldens.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py index aa49378d45..aa63e025b6 100644 --- a/tools/import_validation/validator_goldens.py +++ b/tools/import_validation/validator_goldens.py @@ -382,7 +382,8 @@ def generate_goldens(input_files: str, match = False for col, vals in must_include_values.items(): val = node.get(col) - if val in vals or mcf_file_util.strip_namespace(val) in vals: + if val in vals or mcf_file_util.strip_namespace( + val) in vals: match = True break if match: @@ -444,8 +445,12 @@ def generate_goldens(input_files: str, logging.info(f'Writing {len(golden_nodes)} goldens to {output_file}') if file_util.file_is_csv(output_file): with file_util.FileIO(output_file, mode='w') as csvfile: - columns = sorted(list(set().union(*(node.keys() for node in golden_nodes.values())))) - writer = csv.DictWriter(csvfile, fieldnames=columns, quoting=csv.QUOTE_NONNUMERIC) + columns = sorted( + list(set().union( + *(node.keys() for node in golden_nodes.values())))) + writer = csv.DictWriter(csvfile, + fieldnames=columns, + quoting=csv.QUOTE_NONNUMERIC) writer.writeheader() for node in golden_nodes.values(): writer.writerow(node) From 83b56478199d22f43802647beadc419c9d68448d Mon Sep 17 00:00:00 2001 From: Nivedita Singh Date: Thu, 28 May 2026 08:51:31 +0000 Subject: [PATCH 5/7] fixed test --- tools/import_validation/validator_goldens.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py index aa63e025b6..12ec78c94f 100644 --- a/tools/import_validation/validator_goldens.py +++ b/tools/import_validation/validator_goldens.py @@ -449,8 +449,8 @@ def generate_goldens(input_files: str, list(set().union( *(node.keys() for node in golden_nodes.values())))) writer = csv.DictWriter(csvfile, - fieldnames=columns, - quoting=csv.QUOTE_NONNUMERIC) + fieldnames=columns, + quoting=csv.QUOTE_NONNUMERIC) writer.writeheader() for node in golden_nodes.values(): writer.writerow(node) From d88c10f4b4d760f806bbc1ec278b5bea9b296d11 Mon Sep 17 00:00:00 2001 From: Nivedita Singh Date: Thu, 28 May 2026 09:45:02 +0000 Subject: [PATCH 6/7] fixed test --- tools/import_validation/validator_goldens.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py index 12ec78c94f..a84b654bd3 100644 --- a/tools/import_validation/validator_goldens.py +++ b/tools/import_validation/validator_goldens.py @@ -61,7 +61,6 @@ --goldens_sampler_exhaustive \ --generate_goldens=goldens_data/generated_goldens.mcf - # To generate goldens ensuring prominent DCIDs are included if present: python3 validator_goldens.py \ --validate_goldens_input=output/observations.csv \ From 416c58bc0c1b253413556a7eb6c243cbe810987f Mon Sep 17 00:00:00 2001 From: Nivedita Singh Date: Fri, 5 Jun 2026 06:10:58 +0000 Subject: [PATCH 7/7] modified code --- tools/import_validation/runner.py | 44 ++++++++++++++++++++ tools/import_validation/validator_goldens.py | 3 +- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/tools/import_validation/runner.py b/tools/import_validation/runner.py index f1364518e6..9f9ddfaebc 100644 --- a/tools/import_validation/runner.py +++ b/tools/import_validation/runner.py @@ -41,6 +41,8 @@ class ValidationRunner: def __init__(self, validation_config_path: str, differ_output: str, stats_summary: str, lint_report: str, validation_output: str): + self.validation_config_path = validation_config_path + self.stats_summary = stats_summary self.config = ValidationConfig(validation_config_path) self.validation_output = validation_output self.validator = Validator() @@ -212,6 +214,48 @@ def run_validations(self) -> tuple[bool, list[ValidationResult]]: if output_dir: rule_params.setdefault('output_path', output_dir) + # Resolve paths relative to the directory of the validation config. + if 'summary_report' in rule.get('rule_id', ''): + # Helper to find a base directory containing target_sub_path by walking up + def find_base_dir(start_path: str, target_sub_path: str): + if not start_path: + return None + curr = os.path.abspath(start_path) + for _ in range(10): # limit to 10 levels up + if os.path.exists(os.path.join(curr, target_sub_path)): + return curr + parent = os.path.dirname(curr) + if parent == curr: + break + curr = parent + return None + + config_dir = None + # Walk up from validation_config_path, self.stats_summary, or CWD to find where 'golden_data' lives + for start in [self.validation_config_path, self.stats_summary, os.getcwd()]: + config_dir = find_base_dir(start, 'golden_data') + if config_dir: + break + + if not config_dir: + config_dir = os.path.dirname(os.path.abspath(self.validation_config_path)) + + print(f"DEBUG: Found summary_report rule: '{rule.get('rule_id')}'") + print(f"DEBUG: Config directory resolved to: '{config_dir}'") + for path_key in ['golden_files', 'input_files']: + if path_key in rule_params: + val = rule_params[path_key] + print(f"DEBUG: Before resolve '{path_key}': '{val}'") + if isinstance(val, str): + if val and not os.path.isabs(val) and not val.startswith('gs://') and not val.startswith('http://') and not val.startswith('https://'): + rule_params[path_key] = os.path.join(config_dir, val) + elif isinstance(val, list): + rule_params[path_key] = [ + os.path.join(config_dir, item) if isinstance(item, str) and item and not os.path.isabs(item) and not item.startswith('gs://') and not item.startswith('http://') and not item.startswith('https://') else item + for item in val + ] + print(f"DEBUG: After resolve '{path_key}': '{rule_params[path_key]}'") + if validator_name == 'SQL_VALIDATOR': result = validation_func(self.data_sources['stats'], self.data_sources['differ'], diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py index a84b654bd3..916cd2f43c 100644 --- a/tools/import_validation/validator_goldens.py +++ b/tools/import_validation/validator_goldens.py @@ -299,7 +299,8 @@ def load_nodes_from_file(files: str) -> dict: file_nodes = file_util.file_load_csv_dict(input_file, key_index=True) for node in file_nodes.values(): - nodes[len(nodes)] = node + cleaned_node = {k.strip(): v for k, v in node.items() if k is not None and isinstance(k, str) and k.strip() != ''} + nodes[len(nodes)] = cleaned_node else: # For MCF or JSON, we assume nodes are already keyed by DCID. file_nodes = mcf_file_util.load_mcf_nodes(input_file)