datacommonsorg · niveditasing · May 28, 2026 · May 28, 2026 · May 28, 2026 · May 28, 2026
diff --git a/tools/import_validation/runner.py b/tools/import_validation/runner.py
@@ -41,6 +41,8 @@ class ValidationRunner:
 
     def __init__(self, validation_config_path: str, differ_output: str,
                  stats_summary: str, lint_report: str, validation_output: str):
+        self.validation_config_path = validation_config_path
+        self.stats_summary = stats_summary
         self.config = ValidationConfig(validation_config_path)
         self.validation_output = validation_output
         self.validator = Validator()
@@ -212,6 +214,48 @@ def run_validations(self) -> tuple[bool, list[ValidationResult]]:
                 if output_dir:
                     rule_params.setdefault('output_path', output_dir)
 
+                # Resolve paths relative to the directory of the validation config.
+                if 'summary_report' in rule.get('rule_id', ''):
+                    # Helper to find a base directory containing target_sub_path by walking up
+                    def find_base_dir(start_path: str, target_sub_path: str):
+                        if not start_path:
+                            return None
+                        curr = os.path.abspath(start_path)
+                        for _ in range(10):  # limit to 10 levels up
+                            if os.path.exists(os.path.join(curr, target_sub_path)):
+                                return curr
+                            parent = os.path.dirname(curr)
+                            if parent == curr:
+                                break
+                            curr = parent
+                        return None
+
+                    config_dir = None
+                    # Walk up from validation_config_path, self.stats_summary, or CWD to find where 'golden_data' lives
+                    for start in [self.validation_config_path, self.stats_summary, os.getcwd()]:
+                        config_dir = find_base_dir(start, 'golden_data')
+                        if config_dir:
+                            break
+
+                    if not config_dir:
+                        config_dir = os.path.dirname(os.path.abspath(self.validation_config_path))
+
+                    print(f"DEBUG: Found summary_report rule: '{rule.get('rule_id')}'")
+                    print(f"DEBUG: Config directory resolved to: '{config_dir}'")
+                    for path_key in ['golden_files', 'input_files']:
+                        if path_key in rule_params:
+                            val = rule_params[path_key]
+                            print(f"DEBUG: Before resolve '{path_key}': '{val}'")
+                            if isinstance(val, str):
+                                if val and not os.path.isabs(val) and not val.startswith('gs://') and not val.startswith('http://') and not val.startswith('https://'):
+                                    rule_params[path_key] = os.path.join(config_dir, val)
+                            elif isinstance(val, list):
+                                rule_params[path_key] = [
+                                    os.path.join(config_dir, item) if isinstance(item, str) and item and not os.path.isabs(item) and not item.startswith('gs://') and not item.startswith('http://') and not item.startswith('https://') else item
+                                    for item in val
+                                ]
+                            print(f"DEBUG: After resolve '{path_key}': '{rule_params[path_key]}'")
+
             if validator_name == 'SQL_VALIDATOR':
                 result = validation_func(self.data_sources['stats'],
                                          self.data_sources['differ'],

diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py
@@ -71,6 +71,7 @@
 import os
 import sys
 import tempfile
+import csv
 
 from absl import app
 from absl import flags
@@ -298,7 +299,8 @@ def load_nodes_from_file(files: str) -> dict:
             file_nodes = file_util.file_load_csv_dict(input_file,
                                                       key_index=True)
             for node in file_nodes.values():
-                nodes[len(nodes)] = node
+                cleaned_node = {k.strip(): v for k, v in node.items() if k is not None and isinstance(k, str) and k.strip() != ''}
+                nodes[len(nodes)] = cleaned_node
         else:
             # For MCF or JSON, we assume nodes are already keyed by DCID.
             file_nodes = mcf_file_util.load_mcf_nodes(input_file)
@@ -379,7 +381,9 @@ def generate_goldens(input_files: str,
             for k, node in input_nodes.items():
                 match = False
                 for col, vals in must_include_values.items():
-                    if node.get(col) in vals:
+                    val = node.get(col)
+                    if val in vals or mcf_file_util.strip_namespace(
+                            val) in vals:
                         match = True
                         break
                 if match:
@@ -440,9 +444,16 @@ def generate_goldens(input_files: str,
     if golden_nodes and output_file:
         logging.info(f'Writing {len(golden_nodes)} goldens to {output_file}')
         if file_util.file_is_csv(output_file):
-            file_util.file_write_csv_dict(golden_nodes,
-                                          output_file,
-                                          key_column_name=None)
+            with file_util.FileIO(output_file, mode='w') as csvfile:
+                columns = sorted(
+                    list(set().union(
+                        *(node.keys() for node in golden_nodes.values()))))
+                writer = csv.DictWriter(csvfile,
+                                        fieldnames=columns,
+                                        quoting=csv.QUOTE_NONNUMERIC)
+                writer.writeheader()
+                for node in golden_nodes.values():
+                    writer.writerow(node)
         else:
             mcf_file_util.write_mcf_nodes([golden_nodes], output_file)