diff --git a/scripts/world_bank/wdi/README.md b/scripts/world_bank/wdi/README.md index ef1f0f5dc6..239d0f7277 100644 --- a/scripts/world_bank/wdi/README.md +++ b/scripts/world_bank/wdi/README.md @@ -146,5 +146,24 @@ If you want to perform "only download", run the below command: python3 worldbank.py --mode=download ``` +### Added golden files and increased the threshold with golden checks in validation_config.json. + +The `GOLDENS_CHECK` validator confirms that the import includes a specific set of expected records. This is useful for verifying that critical StatVars, Places, or specific metadata combinations are consistently present in the output. + +The validator compares the input data (usually from the stats data source) against one or more "golden" files (MCF or CSV). + +If any combination of values in a golden file row is missing from the input, the validation fails. The missing golden rows are then listed in the validation report JSON. + +If you want to get goldens, run the below command: +```bash +#goldens from output csv +python3 validator_goldens.py --validate_goldens_input=../../scripts/world_bank/wdi/output/WorldBank.csv --generate_goldens=golden_data/golden_observations.csv --goldens_must_include="ISO3166Alpha3:gs://unresolved_mcf/import_validation/top_100k_places.csv" --generate_goldens_property_sets="ISO3166Alpha3" +``` + +#goldens from summary reports +```bash +python3 validator_goldens.py --validate_goldens_input="summary_report.csv" --generate_goldens=golden_data/golden_summary_report.csv --generate_goldens_property_sets="StatVar|Units|MinDate|MeasurementMethods|observationPeriod" +``` + We highly recommend the use of the import validation tool for this import which you can find in https://github.com/datacommonsorg/tools/tree/master/import-validation-helper. diff --git a/scripts/world_bank/wdi/golden_data/golden_WorldBank.csv b/scripts/world_bank/wdi/golden_data/golden_WorldBank.csv new file mode 100644 index 0000000000..502e68e62d --- /dev/null +++ b/scripts/world_bank/wdi/golden_data/golden_WorldBank.csv @@ -0,0 +1,217 @@ +ISO3166Alpha3 +dcid:Earth +dcid:country/AGO +dcid:country/ALB +dcid:country/ARE +dcid:country/ARG +dcid:country/ARM +dcid:country/AUS +dcid:country/AUT +dcid:country/AZE +dcid:country/BEL +dcid:country/BEN +dcid:country/BFA +dcid:country/BGD +dcid:country/BGR +dcid:country/BHR +dcid:country/BIH +dcid:country/BLR +dcid:country/BOL +dcid:country/BRA +dcid:country/BRN +dcid:country/BWA +dcid:country/CAN +dcid:country/CHE +dcid:country/CHL +dcid:country/CHN +dcid:country/CIV +dcid:country/CMR +dcid:country/COD +dcid:country/COG +dcid:country/COL +dcid:country/CRI +dcid:country/CUB +dcid:country/CUW +dcid:country/CYP +dcid:country/CZE +dcid:country/DEU +dcid:country/DNK +dcid:country/DOM +dcid:country/DZA +dcid:country/ECU +dcid:country/EGY +dcid:country/ERI +dcid:country/ESP +dcid:country/EST +dcid:country/ETH +dcid:country/FIN +dcid:country/FRA +dcid:country/GAB +dcid:country/GBR +dcid:country/GEO +dcid:country/GHA +dcid:country/GIB +dcid:country/GNQ +dcid:country/GRC +dcid:country/GTM +dcid:country/HKG +dcid:country/HND +dcid:country/HRV +dcid:country/HTI +dcid:country/HUN +dcid:country/IDN +dcid:country/IND +dcid:country/IRL +dcid:country/IRN +dcid:country/IRQ +dcid:country/ISL +dcid:country/ISR +dcid:country/ITA +dcid:country/JAM +dcid:country/JOR +dcid:country/JPN +dcid:country/KAZ +dcid:country/KEN +dcid:country/KGZ +dcid:country/KHM +dcid:country/KOR +dcid:country/KWT +dcid:country/LAO +dcid:country/LBN +dcid:country/LBY +dcid:country/LKA +dcid:country/LTU +dcid:country/LUX +dcid:country/LVA +dcid:country/MAR +dcid:country/MDA +dcid:country/MDG +dcid:country/MEX +dcid:country/MKD +dcid:country/MLT +dcid:country/MMR +dcid:country/MNE +dcid:country/MNG +dcid:country/MOZ +dcid:country/MUS +dcid:country/MYS +dcid:country/NAM +dcid:country/NER +dcid:country/NGA +dcid:country/NIC +dcid:country/NLD +dcid:country/NOR +dcid:country/NPL +dcid:country/NZL +dcid:country/OMN +dcid:country/PAK +dcid:country/PAN +dcid:country/PER +dcid:country/PHL +dcid:country/POL +dcid:country/PRK +dcid:country/PRT +dcid:country/PRY +dcid:country/QAT +dcid:country/ROU +dcid:country/RUS +dcid:country/RWA +dcid:country/SAU +dcid:country/SDN +dcid:country/SEN +dcid:country/SGP +dcid:country/SLV +dcid:country/SRB +dcid:country/SSD +dcid:country/SUR +dcid:country/SVK +dcid:country/SVN +dcid:country/SWE +dcid:country/SWZ +dcid:country/SYR +dcid:country/TCD +dcid:country/TGO +dcid:country/THA +dcid:country/TJK +dcid:country/TKM +dcid:country/TTO +dcid:country/TUN +dcid:country/TUR +dcid:country/TZA +dcid:country/UGA +dcid:country/UKR +dcid:country/URY +dcid:country/USA +dcid:country/UZB +dcid:country/VEN +dcid:country/VNM +dcid:country/XKS +dcid:country/YEM +dcid:country/ZAF +dcid:country/ZMB +dcid:country/ZWE +dcid:country/ATG +dcid:country/BHS +dcid:country/BLZ +dcid:country/BRB +dcid:country/BTN +dcid:country/COM +dcid:country/CPV +dcid:country/DJI +dcid:country/DMA +dcid:country/FJI +dcid:country/GMB +dcid:country/GNB +dcid:country/GRD +dcid:country/GUY +dcid:country/KIR +dcid:country/KNA +dcid:country/LCA +dcid:country/LSO +dcid:country/MDV +dcid:country/MHL +dcid:country/PLW +dcid:country/SLB +dcid:country/STP +dcid:country/SYC +dcid:country/TLS +dcid:country/TON +dcid:country/VCT +dcid:country/VUT +dcid:country/WSM +dcid:ChannelIslands +dcid:country/ABW +dcid:country/AFG +dcid:country/AND +dcid:country/ASM +dcid:country/BDI +dcid:country/BMU +dcid:country/CAF +dcid:country/CYM +dcid:country/FRO +dcid:country/FSM +dcid:country/GIN +dcid:country/GRL +dcid:country/GUM +dcid:country/IMN +dcid:country/LBR +dcid:country/LIE +dcid:country/MAC +dcid:country/MAF +dcid:country/MCO +dcid:country/MLI +dcid:country/MNP +dcid:country/MRT +dcid:country/MWI +dcid:country/NCL +dcid:country/PNG +dcid:country/PRI +dcid:country/PSE +dcid:country/PYF +dcid:country/SLE +dcid:country/SMR +dcid:country/SOM +dcid:country/SXM +dcid:country/TCA +dcid:country/VIR +dcid:country/VGB diff --git a/scripts/world_bank/wdi/golden_data/golden_summary_report.csv b/scripts/world_bank/wdi/golden_data/golden_summary_report.csv new file mode 100644 index 0000000000..566ffb1385 --- /dev/null +++ b/scripts/world_bank/wdi/golden_data/golden_summary_report.csv @@ -0,0 +1,80 @@ +"NumPlaces","StatVar","ScalingFactors","MeasurementMethods","Units","observationPeriods","MinDate" +"186","Count_Death_IntentionalSelfHarm_Male_AsFractionOf_Count_Person_Male","[]","[]","[Per100000Males]","[P1Y]","2000" +"203","Amount_EconomicActivity_GrossNationalIncome_PurchasingPowerParity","[]","[]","[InternationalDollar]","[P1Y]","1990" +"165","Count_Person_Upto4Years_Wasting_AsFractionOf_Count_Person_Upto4Years","[100]","[JointChildMalnutritionEstimate]","[Percent]","[P1Y]","1983" +"144","Count_Person_25OrMoreYears_DoctorateDegree_AsFractionOf_Count_Person_25OrMoreYears","[]","[]","[]","[P1Y]","1994" +"204","Amount_Emissions_CarbonDioxide_PerCapita","[]","[]","[MetricTon]","[P1Y]","1970" +"184","Count_Person_25OrMoreYears_Male_TertiaryEducation_AsFractionOf_Count_Person_25OrMoreYears_Male","[]","[]","[]","[P1Y]","1970" +"218","LifeExpectancy_Person_Female","[]","[]","[Year]","[P1Y]","1960" +"139","Count_Person_25OrMoreYears_Male_DoctorateDegree_AsFractionOf_Count_Person_25OrMoreYears_Male","[]","[]","[]","[P1Y]","1994" +"197","Count_Death_0Years_Female_AsFractionOf_Count_BirthEvent_LiveBirth_Female","[]","[UnitedNationsIGMEEstimate]","[Per1000FemaleLiveBirths]","[P1Y]","1960" +"197","Count_CriminalActivities_MurderAndNonNegligentManslaughter_AsFractionOf_Count_Person","[]","[]","[Per100000Persons]","[P1Y]","1990" +"194","Amount_EconomicActivity_ExpenditureActivity_HealthcareExpenditure_AsFractionOf_Count_Person","[]","[]","[InternationalDollar, USDollar]","[P1Y]","2000" +"202","Amount_EconomicActivity_ExpenditureActivity_EducationExpenditure_Government_AsFractionOf_Amount_EconomicActivity_ExpenditureActivity_Government","[100]","[]","[Percent]","[P1Y]","1980" +"188","Count_Person_25OrMoreYears_Male_BachelorsDegreeOrHigher_AsFractionOf_Count_Person_25OrMoreYears_Male","[]","[]","[]","[P1Y]","1970" +"218","FertilityRate_Person_Female","[]","[]","[]","[]","1960" +"218","Count_Person_Rural","[]","[WorldBankEstimate]","[]","[P1Y]","1960" +"183","Count_Person_25OrMoreYears_Female_TertiaryEducation_AsFractionOf_Count_Person_25OrMoreYears_Female","[]","[]","[]","[P1Y]","1970" +"218","Count_Person_Urban","[]","[WorldBankEstimate]","[]","[P1Y]","1960" +"165","Count_Person_Upto4Years_Overweight_AsFractionOf_Count_Person_Upto4Years","[]","[]","[]","[P1Y]","1983" +"218","LifeExpectancy_Person_Male","[]","[]","[Year]","[P1Y]","1960" +"218","Count_BirthEvent_LiveBirth_AsFractionOf_Count_Person","[]","[]","[Per1000Persons]","[P1Y]","1960" +"197","MortalityRate_Person_Upto4Years_AsFractionOf_Count_BirthEvent_LiveBirth","[]","[]","[Per1000LiveBirths]","[P1Y]","1960" +"218","Count_Person","[]","[]","[]","[P1Y]","1960" +"100","Count_Person_7To14Years_Male_Employed_AsFractionOf_Count_Person_7To14Years_Male","[100]","[]","[Percent]","[P1Y]","1994" +"160","Count_Person_Upto4Years_Male_Wasting_AsFractionOf_Count_Person_Upto4Years_Male","[100]","[JointChildMalnutritionEstimate]","[Percent]","[P1Y]","1986" +"204","Amount_EconomicActivity_ExpenditureActivity_EducationExpenditure_Government_AsFractionOf_Amount_EconomicActivity_GrossDomesticProduction_Nominal","[100]","[]","[Percent]","[P1Y]","1970" +"188","Count_Person_25OrMoreYears_BachelorsDegreeOrHigher_AsFractionOf_Count_Person_25OrMoreYears","[]","[]","[]","[P1Y]","1970" +"165","Count_Person_15OrMoreYears_Female_Smoking_AsFractionOf_Count_Person_15OrMoreYears_Female","[]","[AgeAdjustedPrevalence]","[]","[P1Y]","2000" +"165","Count_Person_15OrMoreYears_Smoking_AsFractionOf_Count_Person_15OrMoreYears","[]","[AgeAdjustedPrevalence]","[]","[P1Y]","2000" +"203","Amount_EconomicActivity_GrossNationalIncome_PurchasingPowerParity_PerCapita","[]","[]","[InternationalDollar]","[P1Y]","1990" +"160","Count_Person_Upto4Years_Male_Overweight_AsFractionOf_Count_Person_Upto4Years_Male","[]","[]","[]","[P1Y]","1986" +"197","Count_Death_0Years","[]","[UnitedNationsIGMEEstimate]","[]","[P1Y]","1960" +"195","Amount_EconomicActivity_ExpenditureActivity_TertiaryEducationExpenditure_Government_AsFractionOf_Amount_EconomicActivity_ExpenditureActivity_EducationExpenditure_Government","[]","[]","[]","[P1Y]","1970" +"159","Count_Person_Upto4Years_Male_SevereWasting_AsFractionOf_Count_Person_Upto4Years_Male","[100]","[JointChildMalnutritionEstimate]","[Percent]","[P1Y]","1986" +"151","Amount_Consumption_Electricity_PerCapita","[]","[]","[KilowattHour]","[P1Y]","1990" +"197","Count_Death_0Years_Male_AsFractionOf_Count_BirthEvent_LiveBirth_Male","[]","[UnitedNationsIGMEEstimate]","[Per1000MaleLiveBirths]","[P1Y]","1960" +"180","Amount_Consumption_Energy_PerCapita","[]","[]","[KilogramOfOilEquivalent]","[P1Y]","1990" +"186","Count_Death_IntentionalSelfHarm_Female_AsFractionOf_Count_Person_Female","[]","[]","[Per100000Females]","[P1Y]","2000" +"165","Count_Person_15OrMoreYears_Male_Smoking_AsFractionOf_Count_Person_15OrMoreYears_Male","[]","[AgeAdjustedPrevalence]","[]","[P1Y]","2000" +"149","Count_CriminalActivities_MurderAndNonNegligentManslaughter_Male_AsFractionOf_Count_Person_Male","[]","[]","[Per100000Males]","[P1Y]","1990" +"200","Amount_Remittance_InwardRemittance_AsFractionOf_Amount_EconomicActivity_GrossDomesticProduction_Nominal","[100]","[WorldBankEstimate]","[Percent]","[P1Y]","1970" +"188","Count_Person_15To64Years_InLaborForce_AsFractionOf_Count_Person_15To64Years","[]","[]","[]","[P1Y]","1990" +"100","Count_Person_7To14Years_Employed_AsFractionOf_Count_Person_7To14Years","[100]","[]","[Percent]","[P1Y]","1994" +"171","GiniIndex_EconomicActivity","[]","[WorldBankEstimate]","[]","[P1Y]","1963" +"162","Count_Person_25OrMoreYears_Female_MastersDegreeOrHigher_AsFractionOf_Count_Person_25OrMoreYears_Female","[]","[]","[]","[P1Y]","1990" +"170","Count_Person_25OrMoreYears_MastersDegreeOrHigher_AsFractionOf_Count_Person_25OrMoreYears","[]","[]","[]","[P1Y]","1990" +"152","Count_CriminalActivities_MurderAndNonNegligentManslaughter_Female_AsFractionOf_Count_Person_Female","[]","[]","[Per100000Females]","[P1Y]","1990" +"188","Count_Person_15To64Years_Female_InLaborForce_AsFractionOf_Count_Person_15To64Years_Female","[]","[]","[]","[P1Y]","1990" +"104","Amount_Stock_AsFractionOf_Amount_EconomicActivity_GrossDomesticProduction_Nominal","[100]","[]","[Percent]","[P1Y]","1975" +"131","Count_Person_25OrMoreYears_Female_DoctorateDegree_AsFractionOf_Count_Person_25OrMoreYears_Female","[]","[]","[]","[P1Y]","1994" +"215","GrowthRate_Amount_EconomicActivity_GrossDomesticProduction","[]","[]","[]","[P1Y]","1961" +"218","Count_Death_AsAFractionOfCount_Person","[]","[WorldBankWeightedAverage]","[Per1000Persons]","[P1Y]","1960" +"215","Amount_EconomicActivity_GrossDomesticProduction_Nominal","[]","[]","[USDollar]","[P1Y]","1960" +"188","Count_Person_15To64Years_Male_InLaborForce_AsFractionOf_Count_Person_15To64Years_Male","[]","[]","[]","[P1Y]","1990" +"200","Amount_Remittance_InwardRemittance","[]","[WorldBankEstimate]","[USDollar]","[P1Y]","1970" +"161","Count_Person_Upto4Years_SevereWasting_AsFractionOf_Count_Person_Upto4Years","[100]","[JointChildMalnutritionEstimate]","[Percent]","[P1Y]","1983" +"188","Count_Person_25OrMoreYears_Female_BachelorsDegreeOrHigher_AsFractionOf_Count_Person_25OrMoreYears_Female","[]","[]","[]","[P1Y]","1970" +"100","Count_Person_7To14Years_Female_Employed_AsFractionOf_Count_Person_7To14Years_Female","[100]","[]","[Percent]","[P1Y]","1994" +"167","Count_Person_25OrMoreYears_Male_MastersDegreeOrHigher_AsFractionOf_Count_Person_25OrMoreYears_Male","[]","[]","[]","[P1Y]","1990" +"215","Amount_EconomicActivity_GrossDomesticProduction_Nominal_PerCapita","[]","[]","[USDollar]","[P1Y]","1960" +"188","Amount_Consumption_Alcohol_15OrMoreYears_AsFractionOf_Count_Person_15OrMoreYears","[]","[WorldHealthOrganizationEstimates]","[Liter]","[P1Y]","2000" +"188","Count_Person_15OrMoreYears_InLaborForce_Female_AsFractionOf_Count_Person_InLaborForce","[]","[]","[]","[P1Y]","1990" +"216","Count_Person_ResidingLessThan5MetersAboveSeaLevel_AsFractionOf_Count_Person","[]","[]","[]","[P1Y]","1990" +"215","Count_Product_MobileCellularSubscription_AsFractionOf_Count_Person","[]","[]","[]","[P1Y]","1960" +"188","Count_Person_InLaborForce","[]","[InternationalLaborOrganization]","[]","[P1Y]","1990" +"186","Count_Death_IntentionalSelfHarm_AsFractionOf_Count_Person","[]","[]","[Per100000Persons]","[P1Y]","2000" +"197","Count_Death_0Years_AsFractionOf_Count_BirthEvent_LiveBirth","[]","[UnitedNationsIGMEEstimate]","[Per1000LiveBirths]","[P1Y]","1960" +"160","Count_Person_Upto4Years_Female_Wasting_AsFractionOf_Count_Person_Upto4Years_Female","[100]","[JointChildMalnutritionEstimate]","[Percent]","[P1Y]","1986" +"203","Amount_Remittance_OutwardRemittance","[]","[WorldBankEstimate]","[USDollar]","[P1Y]","1970" +"160","Count_Person_Upto4Years_Female_Overweight_AsFractionOf_Count_Person_Upto4Years_Female","[]","[]","[]","[P1Y]","1986" +"214","Count_Person_IsInternetUser_PerCapita","[100]","[]","[]","[P1Y]","1990" +"210","Amount_Production_ElectricityFromNuclearSources_AsFractionOf_Amount_Production_Energy","[]","[]","[]","[P1Y]","1990" +"159","Count_Person_Upto4Years_Female_SevereWasting_AsFractionOf_Count_Person_Upto4Years_Female","[100]","[JointChildMalnutritionEstimate]","[Percent]","[P1Y]","1986" +"184","Count_Person_25OrMoreYears_TertiaryEducation_AsFractionOf_Count_Person_25OrMoreYears","[]","[]","[]","[P1Y]","1970" +"210","Amount_Production_ElectricityFromOilGasOrCoalSources_AsFractionOf_Amount_Production_Energy","[]","[]","[]","[P1Y]","1990" +"218","GrowthRate_Count_Person","[]","[]","[]","[P1Y]","1961" +"213","Amount_Consumption_RenewableEnergy_AsFractionOf_Amount_Consumption_Energy","[]","[]","[]","[P1Y]","1990" +"104","Amount_Stock","[]","[]","[USDollar]","[P1Y]","1975" +"218","LifeExpectancy_Person","[]","[]","[Year]","[]","1960" +"210","Count_Person_20To79Years_Diabetes_AsFractionOf_Count_Person_20To79Years","[]","[]","[]","[P1Y]","2000" diff --git a/scripts/world_bank/wdi/manifest.json b/scripts/world_bank/wdi/manifest.json index bc3927141e..eb427c0472 100644 --- a/scripts/world_bank/wdi/manifest.json +++ b/scripts/world_bank/wdi/manifest.json @@ -20,7 +20,8 @@ "WorldBankCountries.csv", "schema_csvs/WorldBankIndicators_prod.csv" ], - "cron_schedule": "0 11 * * 2" + "cron_schedule": "0 11 * * 2", + "validation_config_file": "validation_config.json" } ] } \ No newline at end of file diff --git a/scripts/world_bank/wdi/validation_config.json b/scripts/world_bank/wdi/validation_config.json new file mode 100644 index 0000000000..2cf7e60928 --- /dev/null +++ b/scripts/world_bank/wdi/validation_config.json @@ -0,0 +1,28 @@ +{ + "schema_version": "1.0", + "rules": [ + { + "rule_id": "check_deleted_records_percent", + "description": "Checks that the percentage of deleted points is within the threshold.", + "validator": "DELETED_RECORDS_PERCENT", + "params": { + "threshold": 0.61 + } + }, + { + "rule_id": "check_goldens_output_csv", + "validator": "GOLDENS_CHECK", + "params": { + "golden_files": "golden_data/golden_WorldBank.csv", + "input_files": "output/WorldBank.csv" + } + }, + { + "rule_id": "check_goldens_summary_report", + "validator": "GOLDENS_CHECK", + "params": { + "golden_files": "golden_data/golden_summary_report.csv" + } + } + ] +} \ No newline at end of file diff --git a/tools/import_validation/Validations.md b/tools/import_validation/Validations.md index 4efebb3a55..d46ece74fc 100644 --- a/tools/import_validation/Validations.md +++ b/tools/import_validation/Validations.md @@ -72,6 +72,8 @@ To generate goldens for the summary_report.csv to verify that all the expected StatVars are generated with the corresponding number of places and dates, run the following: +This will compare the golden files using summary_report.csv as the default input: + ```shell python3 validator_goldens.py \ --validate_goldens_input=summary_report.csv \ diff --git a/tools/import_validation/runner.py b/tools/import_validation/runner.py index f1364518e6..9f9ddfaebc 100644 --- a/tools/import_validation/runner.py +++ b/tools/import_validation/runner.py @@ -41,6 +41,8 @@ class ValidationRunner: def __init__(self, validation_config_path: str, differ_output: str, stats_summary: str, lint_report: str, validation_output: str): + self.validation_config_path = validation_config_path + self.stats_summary = stats_summary self.config = ValidationConfig(validation_config_path) self.validation_output = validation_output self.validator = Validator() @@ -212,6 +214,48 @@ def run_validations(self) -> tuple[bool, list[ValidationResult]]: if output_dir: rule_params.setdefault('output_path', output_dir) + # Resolve paths relative to the directory of the validation config. + if 'summary_report' in rule.get('rule_id', ''): + # Helper to find a base directory containing target_sub_path by walking up + def find_base_dir(start_path: str, target_sub_path: str): + if not start_path: + return None + curr = os.path.abspath(start_path) + for _ in range(10): # limit to 10 levels up + if os.path.exists(os.path.join(curr, target_sub_path)): + return curr + parent = os.path.dirname(curr) + if parent == curr: + break + curr = parent + return None + + config_dir = None + # Walk up from validation_config_path, self.stats_summary, or CWD to find where 'golden_data' lives + for start in [self.validation_config_path, self.stats_summary, os.getcwd()]: + config_dir = find_base_dir(start, 'golden_data') + if config_dir: + break + + if not config_dir: + config_dir = os.path.dirname(os.path.abspath(self.validation_config_path)) + + print(f"DEBUG: Found summary_report rule: '{rule.get('rule_id')}'") + print(f"DEBUG: Config directory resolved to: '{config_dir}'") + for path_key in ['golden_files', 'input_files']: + if path_key in rule_params: + val = rule_params[path_key] + print(f"DEBUG: Before resolve '{path_key}': '{val}'") + if isinstance(val, str): + if val and not os.path.isabs(val) and not val.startswith('gs://') and not val.startswith('http://') and not val.startswith('https://'): + rule_params[path_key] = os.path.join(config_dir, val) + elif isinstance(val, list): + rule_params[path_key] = [ + os.path.join(config_dir, item) if isinstance(item, str) and item and not os.path.isabs(item) and not item.startswith('gs://') and not item.startswith('http://') and not item.startswith('https://') else item + for item in val + ] + print(f"DEBUG: After resolve '{path_key}': '{rule_params[path_key]}'") + if validator_name == 'SQL_VALIDATOR': result = validation_func(self.data_sources['stats'], self.data_sources['differ'], diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py index 7b19b783fe..2782671d99 100644 --- a/tools/import_validation/validator_goldens.py +++ b/tools/import_validation/validator_goldens.py @@ -298,7 +298,9 @@ def load_nodes_from_file(files: str) -> dict: file_nodes = file_util.file_load_csv_dict(input_file, key_index=True) for node in file_nodes.values(): - nodes[len(nodes)] = node + # Clean up None/empty keys and strip whitespace from headers/keys to ensure robust parsing + cleaned_node = {k.strip(): v for k, v in node.items() if k is not None and isinstance(k, str) and k.strip() != ''} + nodes[len(nodes)] = cleaned_node else: # For MCF or JSON, we assume nodes are already keyed by DCID. file_nodes = mcf_file_util.load_mcf_nodes(input_file)