diff --git a/tools/agent_efficacy/Agent_Efficacy_Board.html b/tools/agent_efficacy/Agent_Efficacy_Board.html new file mode 100644 index 0000000000..3d758b0ab2 --- /dev/null +++ b/tools/agent_efficacy/Agent_Efficacy_Board.html @@ -0,0 +1,307 @@ + + + + + + Autoschematization Agent Evaluation + + + + +
+ + +
+
+
+

Autoschematization Agent Evaluation

+
+ +
+ + Run ID: DESA-GENDER_2025_OBS_ICT_SKILL_RT +
+
+ + Target Payload: 1M Observations +
+
+ + Pipeline Status: Audit Complete +
+
+
+
+
+ +
+ +
+
+ + +
+
+
+

Agent Efficacy (F1)

+

(Ultimate mapping reliability)

+
+

100.0%

+
+
+
+

+ Formula: 2*(P*R)/(P+R) +

+
+
+ + +
+
+

Precision

+

(Accuracy of generations)

+
+

100.0%

+
+
+
+

+ Formula: TP / (TP + FP) +

+
+
+ + +
+
+

Recall

+

(Capture rate of targets)

+
+

100.0%

+
+
+
+

+ Formula: TP / (TP + FN) +

+
+
+ + +
+
+

Time to Ingest

+

(Total processing speed)

+
+

14m 22s

+
+
+
+

+ Invocation to first query. +

+
+
+ + +
+
+

Completion Rate

+

(Pipeline execution health)

+
+

100%

+
+
+
+

+ Syntactical pipeline success. +

+
+
+ +
+
+ + +
+

Granular Mapping Analysis

+ +
+ + +
+
+ +
+
+ +
+ +

True Positives

+

(Mappings done by the Agent correctly)

+ +

384

+
+
+ + +
+
+ +
+
+ +
+ +

False Positives

+

(Wrong mappings / Hallucinations)

+ +

0

+
+
+ + +
+
+ +
+
+ +
+ +

False Negatives

+

(Mappings missed by the Agent)

+ +

0

+
+
+ +
+
+
+ +
+ +

Detailed File Comparisons

+
+

pvmap.csv Comparison

+
+
+ Precision + 100.0% +
+
+ Recall + 100.0% +
+
+ F1 Score + 100.0% +
+
+
+
mcf_diff.py output
+
+No differences found.
+                
+
+
+ +
+

stat_vars.mcf Comparison

+
+
+ Precision + 100.0% +
+
+ Recall + 100.0% +
+
+ F1 Score + 100.0% +
+
+
+
mcf_diff.py output
+
+No differences found.
+                
+
+
+ +
+

stat_vars_schema.mcf Comparison

+
+
+ Precision + 100.0% +
+
+ Recall + 100.0% +
+
+ F1 Score + 100.0% +
+
+
+
mcf_diff.py output
+
+No differences found.
+                
+
+
+ +
+

tmcf Comparison

+
+
+ Precision + 100.0% +
+
+ Recall + 100.0% +
+
+ F1 Score + 100.0% +
+
+
+
mcf_diff.py output
+
+No differences found.
+                
+
+
+ +
+

csv_data Comparison

+
+
+ Precision + 100.0% +
+
+ Recall + 100.0% +
+
+ F1 Score + 100.0% +
+
+
+
mcf_diff.py output
+
+No differences found.
+                
+
+
+
+ \ No newline at end of file diff --git a/tools/agent_efficacy/EFFICACY_GUIDE.md b/tools/agent_efficacy/EFFICACY_GUIDE.md new file mode 100644 index 0000000000..edd1c76ac0 --- /dev/null +++ b/tools/agent_efficacy/EFFICACY_GUIDE.md @@ -0,0 +1,71 @@ +# Agent Efficacy Testing Guide + +The efficacy calculation pipeline (`calculate_efficacy.py`) evaluates the quality of the autoschematization agent by comparing its output predictions against human-reviewed "gold standard" directories. It leverages semantic graph comparisons (`mcf_diff.py`) to accurately compute metrics like **Precision, Recall, and the F1 Score**. + +--- + +### 1. Understanding the Evaluated Files + +The script compares four primary output artifacts generated by the agent against their reviewed counterparts. It computes True Positives (TP), False Positives (FP), and False Negatives (FN) to determine the overall agent efficacy. + +* **`output_pvmap.csv` (Primary Efficacy Metric):** + * **What it is:** The Property-Value (PV) mapping file that links dataset column headers to Data Commons schema nodes. + * **How it's tested:** This is processed using the `PropertyValueMapper`. The script extracts a normalized semantic graph and calculates the "Hero Metrics" (Precision, Recall, F1) that define the overall run's success. +* **`output_stat_vars.mcf`:** + * **What it is:** Defines the statistical variables extracted from the dataset. + * **How it's tested:** The script uses **fingerprinting** (`use_fingerprint=True`). This means it evaluates if the core semantic definition (properties and values) of the StatVar is correct, even if the exact node ID (DCID) strings have slight, functionally irrelevant differences. +* **`output_stat_vars_schema.mcf` & `output.tmcf`:** + * **What it is:** Extends the schema definitions and dictates the tabular template bindings. + * **How it's tested:** Compared as standard MCF graphs to ensure the underlying structure matches the gold standard perfectly. + +--- + +### 2. Testing a Single Dataset + +The `calculate_efficacy.py` script uses command-line arguments to accept the required input and output paths dynamically. You should run this script within the project's virtual environment. + +**Command Line Arguments:** +* `--test`: Path to the test (prediction) directory. +* `--gold`: Path to the gold (reviewed) directory. +* `--output`: Path to the directory where the efficacy results dashboard should be saved. +* `--dataset_id` (Optional): The dataset ID for display purposes in the HTML dashboard. + +**Example execution:** +```bash +source venv/bin/activate +python tools/agent_efficacy/calculate_efficacy.py \ + --test test_DESA-GENDER_2025_OBS_ICT_SKILL_RT \ + --gold undata/DESA/output/reviewed_pvmap_harish/DESA-GENDER_2025_OBS_ICT_SKILL_RT \ + --output undata/DESA/efficacy_results/DESA-GENDER_2025_OBS_ICT_SKILL_RT \ + --dataset_id DESA-GENDER_2025_OBS_ICT_SKILL_RT +``` +**Results:** Open the generated dashboard (`Agent_Efficacy_Board.html` inside your `--output` directory) in your web browser. + +--- + +### 3. Testing in Bulk (Multiple Datasets) + +To evaluate the agent across multiple datasets simultaneously, you can use the built-in `--bulk` flag. When this flag is passed, the `--test` and `--gold` arguments are treated as the parent directories containing all the datasets. + +**Example Bulk Run Execution:** + +```bash +# Ensure virtual environment is active +source venv/bin/activate + +python tools/agent_efficacy/calculate_efficacy.py \ + --bulk \ + --test undata/DESA/output/reviewed \ + --gold undata/DESA/output/unreviewed \ + --output undata/DESA/efficacy_results/bulk_run +``` + +The script will automatically detect datasets and generate a unique run directory (e.g., `bulk_run_20260601_120000`) inside your `--output` path. This directory will contain a separate HTML dashboard folder for every evaluated dataset, along with an aggregated `summary.csv` file containing the F1, Precision, Recall, TP, FP, and FN scores for all datasets in that run. + +--- + +### 4. Interpreting the Output + +The HTML dashboard (`Agent_Efficacy_Board.html`) gives you a visual breakdown of the metrics: +* **Hero Metrics (Top Section):** Represents the accuracy of the `pvmap.csv`. A high F1 score here means the agent successfully mapped columns to the correct semantic properties. +* **Detailed Semantic Comparisons:** Look here to see the specific True Positives, False Positives (incorrect mappings), and False Negatives (missed mappings). The dashboard will output raw MCF node diffs to show you exactly *where* the agent deviated from the reviewed standard for `pvmap`, `stat_vars`, `schema`, and `tmcf`. \ No newline at end of file diff --git a/tools/agent_efficacy/USAGE_EXAMPLES.md b/tools/agent_efficacy/USAGE_EXAMPLES.md new file mode 100644 index 0000000000..916b9063c0 --- /dev/null +++ b/tools/agent_efficacy/USAGE_EXAMPLES.md @@ -0,0 +1,44 @@ +# Efficacy Tool Quick-Start Examples + +This guide provides sample commands for running the `calculate_efficacy.py` script in both single and bulk modes. + +## 1. Single Dataset Evaluation +Use this command to compare a single agent-generated folder against a reviewed gold standard. + +### Sample Command +```bash +python3 tools/agent_efficacy/calculate_efficacy.py \ + --test /usr/local/google/home/nehil/datacommons/import/git/data/test_DESA-GENDER_2025_OBS_ICT_SKILL_RT \ + --gold /usr/local/google/home/nehil/datacommons/import/git/data/undata/DESA/output/reviewed_pvmap_harish/DESA-GENDER_2025_OBS_ICT_SKILL_RT \ + --output /usr/local/google/home/nehil/datacommons/import/git/data/tmp/efficacy_results/single_run \ + --dataset_id ICT_SKILL_RT +``` + +### Result +- Dashboard: `/tmp/efficacy_results/single_run/Agent_Efficacy_Board.html` +- Updates: Precision, Recall, and F1 will be correctly populated in the HTML. + +--- + +## 2. Bulk Evaluation (Multiple Datasets) +Use this command to evaluate all datasets in a directory. It will create a unique, timestamped folder for the run. + +### Sample Command +```bash +python3 tools/agent_efficacy/calculate_efficacy.py \ + --bulk \ + --test /usr/local/google/home/nehil/datacommons/import/git/data/undata/DESA/output/agent_predictions \ + --gold /usr/local/google/home/nehil/datacommons/import/git/data/undata/DESA/output/reviewed_pvmap_harish \ + --output /tmp/efficacy_results/bulk_runs +``` + +### Result +- Output Directory: `/tmp/efficacy_results/bulk_runs/bulk_run_20260602_HHMMSS/` +- Summary File: `summary.csv` inside the new run folder. +- Dashboards: Individual `Agent_Efficacy_Board.html` files for every dataset found. + +--- + +## 3. How to Present Results +1. **HTML Dashboard:** Open the `Agent_Efficacy_Board.html` file in any browser to view the "Hero Metrics" and detailed semantic diffs. +2. **Summary CSV:** Use the `summary.csv` generated during bulk runs to create a high-level report or table of performance across all indicators. diff --git a/tools/agent_efficacy/calculate_efficacy.py b/tools/agent_efficacy/calculate_efficacy.py new file mode 100644 index 0000000000..f322624f0e --- /dev/null +++ b/tools/agent_efficacy/calculate_efficacy.py @@ -0,0 +1,213 @@ +import os +import sys +import re +import csv +import argparse +import datetime + +# Set up paths to import tools +PROJECT_ROOT = '/usr/local/google/home/nehil/datacommons/import/git/data' +sys.path.append(os.path.join(PROJECT_ROOT, 'tools/statvar_importer')) +sys.path.append(os.path.join(PROJECT_ROOT, 'util')) +sys.path.append(os.path.join(PROJECT_ROOT, 'tools/agentic_import/metrics')) + +import mcf_diff +from counters import Counters +from property_value_mapper import PropertyValueMapper +from pvmap_generator_metrics import PVMapGeneratorMetricsRunner + +def get_metrics_from_counters(counters_obj): + # Use metrics formula directly from pvmap_generator_metrics.py + diff_stats = {'counters': counters_obj.get_counters()} + stats = PVMapGeneratorMetricsRunner.get_stats_from_diff_counters(None, diff_stats) + + return { + 'tp': stats.get('true_positive', 0), + 'fp': stats.get('false_positive', 0), + 'fn': stats.get('false_negative', 0), + 'precision': stats.get('precision', 0), + 'recall': stats.get('recall', 0), + 'f1': stats.get('f1', 0) + } + +def load_pv_map_nodes(file_path): + """Loads PV map into MCF-like nodes using PropertyValueMapper normalization.""" + pv_mapper = PropertyValueMapper() + pv_mapper.load_pvs_from_file(file_path) + # Get the raw GLOBAL map + raw_map = pv_mapper.get_pv_map().get('GLOBAL', {}) + # Convert to standard node format: {dcid: {prop: val}} + nodes = {} + for key, pvs in raw_map.items(): + # Ensure DCID is clean + nodes[key] = pvs + return nodes + +def run_comparison(pred_path, gold_path, is_pvmap=False, use_fingerprint=False): + if not os.path.exists(pred_path) or not os.path.exists(gold_path): + print(f"Skipping: missing {pred_path} or {gold_path}") + return "", {'tp': 0, 'fp': 0, 'fn': 0, 'precision': 0, 'recall': 0, 'f1': 0} + + counters = Counters() + config = { + 'show_diff_nodes_only': True, + 'ignore_property': ['description', 'provenance', 'memberOf', 'member', 'name', 'constraintProperties', 'keyString', 'relevantVariable'], + 'fingerprint_dcid': use_fingerprint + } + + if is_pvmap: + # Specialized loading for PV Maps to handle wide vs narrow formats + nodes1 = load_pv_map_nodes(pred_path) + nodes2 = load_pv_map_nodes(gold_path) + print(f" [PVMap] Loaded {len(nodes1)} nodes from pred, {len(nodes2)} from gold") + diff_text = mcf_diff.diff_mcf_nodes(nodes1, nodes2, config, counters) + else: + # Standard MCF loading + diff_text = mcf_diff.diff_mcf_files(pred_path, gold_path, config, counters) + + metrics = get_metrics_from_counters(counters) + return diff_text, metrics + +def update_html(template_content, dataset_id, hero_metrics, detailed_results): + content = template_content + for key, label in [('f1', 'Agent Efficacy (F1)'), ('precision', 'Precision'), ('recall', 'Recall')]: + escaped_label = re.escape(label) + content = re.sub( + rf'({escaped_label}.*?tracking-tight(?:er)?">)([\d\.]+%)(

)', + r'\g<1>' + f"{hero_metrics[key]*100:.1f}%" + r'\g<3>', + content, flags=re.DOTALL + ) + + content = re.sub(r'(True Positives.*?tracking-tighter">)([\d,]+)(

)', r'\g<1>' + f"{hero_metrics['tp']:,}" + r'\g<3>', content, flags=re.DOTALL) + content = re.sub(r'(False Positives.*?tracking-tighter">)([\d,]+)(

)', r'\g<1>' + f"{hero_metrics['fp']:,}" + r'\g<3>', content, flags=re.DOTALL) + content = re.sub(r'(False Negatives.*?tracking-tighter">)([\d,]+)(

)', r'\g<1>' + f"{hero_metrics['fn']:,}" + r'\g<3>', content, flags=re.DOTALL) + content = re.sub(r'Run ID: .*?', f'Run ID: {dataset_id} (Rechecked)', content) + + if '
' in content: + content = content.split('
')[0] + + diff_sections = '

Detailed Semantic Comparisons

' + for label, data in detailed_results.items(): + diff_sections += f''' +
+

{label}

+
+
Precision{data['metrics']['precision']*100:.1f}%
+
Recall{data['metrics']['recall']*100:.1f}%
+
F1 Score{data['metrics']['f1']*100:.1f}%
+
+
Semantic Match (TP: {data['metrics']['tp']} | FP: {data['metrics']['fp']} | FN: {data['metrics']['fn']})
+
{data['diff'] if data['diff'] else 'No differences found.'}
+
+ ''' + diff_sections += '
' + return content.replace('', diff_sections + '') if '' in content else content + diff_sections + +def process_single_dataset(test_dir, gold_dir, output_dir, dataset_id, template_content): + if not os.path.exists(output_dir): os.makedirs(output_dir) + + file_mappings = [ + ('pvmap.csv', 'output_pvmap.csv', 'output_pvmap.csv', True, False), + ('stat_vars.mcf', 'output_stat_vars.mcf', 'output_stat_vars.mcf', False, True), + ('stat_vars_schema.mcf', 'output_stat_vars_schema.mcf', 'output_stat_vars_schema.mcf', False, False), + ('tmcf', 'output.tmcf', 'output.tmcf', False, False), + ] + + detailed_results = {} + total_tp = 0 + total_fp = 0 + total_fn = 0 + + print(f"\n--- Rechecking Efficacy for {dataset_id} ---") + for label, pred_name, gold_name, is_pv, use_fp in file_mappings: + print(f"Comparing {label}...") + diff_text, metrics = run_comparison(os.path.join(test_dir, pred_name), os.path.join(gold_dir, gold_name), is_pvmap=is_pv, use_fingerprint=use_fp) + detailed_results[label] = {'diff': diff_text, 'metrics': metrics} + print(f" Result: F1={metrics['f1']:.1%}, TP={metrics['tp']}, FP={metrics['fp']}, FN={metrics['fn']}") + + total_tp += metrics['tp'] + total_fp += metrics['fp'] + total_fn += metrics['fn'] + + precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0 + recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0 + f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0 + + hero_metrics = { + 'tp': total_tp, + 'fp': total_fp, + 'fn': total_fn, + 'precision': precision, + 'recall': recall, + 'f1': f1 + } + + final_html = update_html(template_content, dataset_id, hero_metrics, detailed_results) + + with open(os.path.join(output_dir, 'Agent_Efficacy_Board.html'), 'w') as f: f.write(final_html) + return hero_metrics + +def main(): + parser = argparse.ArgumentParser(description="Calculate efficacy metrics.") + parser.add_argument('--test', required=True, help="Path to the test (prediction) directory.") + parser.add_argument('--gold', required=True, help="Path to the gold (reviewed) directory.") + parser.add_argument('--output', required=True, help="Path to the output directory to save results.") + parser.add_argument('--dataset_id', default="Dataset", help="Optional dataset ID for display.") + parser.add_argument('--bulk', action='store_true', help="If set, treats test and gold as parent directories containing multiple dataset folders.") + args = parser.parse_args() + + template_path = os.path.join(os.path.dirname(__file__), 'Agent_Efficacy_Board.html') + with open(template_path, 'r') as f: + template_content = f.read() + + if args.bulk: + run_id = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + args.output = os.path.join(args.output, f"bulk_run_{run_id}") + print(f"Starting Bulk Efficacy Calculation natively... Output: {args.output}") + summary_data = [] + if not os.path.exists(args.output): + os.makedirs(args.output) + + for dataset_id in os.listdir(args.gold): + gold_ds_dir = os.path.join(args.gold, dataset_id) + if not os.path.isdir(gold_ds_dir): + continue + + # Allow test directories to either match exactly or be prefixed with test_ + test_ds_dir = os.path.join(args.test, dataset_id) + if not os.path.isdir(test_ds_dir): + test_ds_dir = os.path.join(args.test, f"test_{dataset_id}") + + if os.path.isdir(test_ds_dir): + out_ds_dir = os.path.join(args.output, dataset_id) + try: + hero_metrics = process_single_dataset(test_ds_dir, gold_ds_dir, out_ds_dir, dataset_id, template_content) + if hero_metrics: + summary_data.append({ + 'dataset_id': dataset_id, + 'f1': hero_metrics['f1'], + 'precision': hero_metrics['precision'], + 'recall': hero_metrics['recall'], + 'tp': hero_metrics['tp'], + 'fp': hero_metrics['fp'], + 'fn': hero_metrics['fn'] + }) + except Exception as e: + print(f"Error processing {dataset_id}: {e}") + else: + print(f"Skipped: {dataset_id} (Matching test directory not found)") + + # Write summary.csv + if summary_data: + summary_path = os.path.join(args.output, 'summary.csv') + with open(summary_path, 'w', newline='') as f: + writer = csv.DictWriter(f, fieldnames=['dataset_id', 'f1', 'precision', 'recall', 'tp', 'fp', 'fn']) + writer.writeheader() + writer.writerows(summary_data) + print(f"\nBulk run complete. Summary saved to {summary_path}") + else: + process_single_dataset(args.test, args.gold, args.output, args.dataset_id, template_content) + print(f"\nRecheck complete. Results saved to {args.output}") + +if __name__ == "__main__": + main() diff --git a/tools/agentic_import/metrics/pvmap_generator_metrics.py b/tools/agentic_import/metrics/pvmap_generator_metrics.py new file mode 100644 index 0000000000..8312cc71d1 --- /dev/null +++ b/tools/agentic_import/metrics/pvmap_generator_metrics.py @@ -0,0 +1,245 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utilities to generate metrics for PV map generation for statvar imports. +""" + +import os +import sys +import subprocess +import tempfile +import time + +from absl import app +from absl import flags +from absl import logging + +_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(_SCRIPT_DIR) +sys.path.append(os.path.dirname(_SCRIPT_DIR)) +sys.path.append(os.path.dirname(os.path.dirname(_SCRIPT_DIR))) +sys.path.append( + os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(_SCRIPT_DIR))), 'util')) +sys.path.append( + os.path.join(os.path.dirname(os.path.dirname(_SCRIPT_DIR)), + 'statvar_importer')) + +import file_util +from counters import Counters +from mcf_diff import diff_mcf_files +from mcf_file_util import get_value_list +from stat_var_test_runner import StatVarProcessorTestRunner, run_script + + +class PVMapGeneratorMetricsRunner(StatVarProcessorTestRunner): + """Class to generate metrics for PV map generation for statvar imports. + + The metrics are generated by running the statvar processor with the test + config and comparing the output with the expected output. + The files for pvmap egenration are specified in the test config as follows: + "test_name": "", + ... + "pvmap_generator_test": { + "pvmap_generator_args": { + "input_data": "", + "output_data": "", + }, + "expected_output_files": [ + "", + "", + ... + ], + } + + """ + + def __init__(self, test_config_file: str = None, test_config: dict = None): + super().__init__(test_config_file, test_config) + + def run_pvmap_generator(self, test_config: dict) -> dict: + """Runs a single instance of pvmap generator.""" + test_name = test_config.get('test_name', 'pvmap_generator_test') + test_dir = os.path.join(self._temp_dir.name, test_name) + output_dir = os.path.join(test_dir, 'generated') + logging.info( + f'Running pvmap generator for test {test_name}, output in {output_dir}, config: {test_config}' + ) + env_dict = dict(self.get_env_dict()) + env_dict.update(get_env_dict(test_config.get("env_file"))) + # Build the pvmap generator commandline + cmd_args = dict() + # Add default arguments for output + + cmd_args = merged_args([ + { + '--output_path': output_dir + }, + test_config.get('pvmap_generator_args', {}), + ]) + output_arg = get_arg(cmd_args, '--output_path') + if output_arg: + output_dir = os.path.realpath(output_arg) + if not output_dir.endswith('/'): + output_dir = os.path.dirname(output_dir) + + cwd = self.get_local_dir() + script_status = run_script( + interpreter=sys.executable, + script=os.path.join(os.path.dirname(_SCRIPT_DIR), + 'pvmap_generator.py'), + args=cmd_args, + cwd=cwd, + env=env_dict, + output_dir=output_dir, + log_dir=os.path.join(output_dir, 'debug'), + ) + + # Check for generated files + generated_config = { + 'pv_map': os.path.join(output_dir, 'generated_pvmap.csv'), + 'config_file': os.path.join(output_dir, 'generated_metadata.csv'), + } + script_status['generated_config'] = generated_config + for arg, file in generated_config.items(): + if not os.path.exists(file): + logging.error( + f'Failed to generate {file} for test: {test_name}') + script_status['status'] = 'FAIL' + + return script_status + + def run_sample_statvar_processor(self, test_config: dict, + generated_config: dict) -> dict: + """Run statvar processor on the sample input using the generated configs.""" + test_name = test_config.get('test_name', 'sample_statvar_processor') + test_dir = os.path.join(self._temp_dir.name, test_name) + output_dir = os.path.join(test_dir, 'sample') + env_dict = dict(self.get_env_dict()) + env_dict.update(get_env_dict(test_config.get("env_file"))) + + # Get commandline args for stavtar processor + # to use generated pvmap files. + cmd_args = merged_args([ + generated_config, + { + '--output_path': + output_dir, + '--output_data_pvs': + os.path.join(output_dir, 'output_data_pvs.csv'), + }, + ]) + script_status = self.run_statvar_processor(test_config, cmd_args) + + return script_status + + def generate_metrics(self, test_config: dict, test_status: dict) -> dict: + """Generates metrics for a test config. + + Args: + test_config: Dictionary with the test config. + + Returns: + Dictionary with the metrics. + """ + test_name = test_config.get('test_name', 'sample_statvar_processor') + test_dir = os.path.join(self._temp_dir.name, test_name) + outputs = test_config.get('pvmap_generator_outputs') + output_stats = self.verify_outputs(outputs, test_config, + test_status.get('output_dir')) + stats = {} + for output_name, stats in output_stats: + output_stats = output.get('counters', {}) + output_stats = self.get_stats_from_diff_counters(output_stats) + file_util.file_write_csv_dict( + output_stats, + os.path.join(test_dir, output_name + '-diff-counters.csv')) + stats[output_name] = output_stats + logging.info(f'Diff stats for {output_name}: {output_stats}') + + return stats + + def get_stats_from_diff_counters(self, + diff_stats: dict, + stats: dict = None) -> dict: + """Returns precision and recall stats from diff counters. + + Args: + diff_stats: diff counters for diff between actual and expected output files. + stats: output stats dictionary into which stats are added. + + """ + if stats is None: + stats = dict({ + 'false_positive': 0, + 'false_negative': 0, + 'true_positive': 0, + }) + counters = diff_stats.get('counters', {}) + matched = counters.get('pvs-matched', 0) + false_negative = counters.get('pvs-added', 0) + false_positive = counters.get('pvs-deleted', 0) + # Treat modfied as deleted+added + modified = counters.get('pvs-modified', 0) + false_negative += modified + false_positive += modified + + stats['true_positive'] += matched + stats['false_positive'] += false_positive + stats['false_negative'] += false_negative + + # Compute precision and recall + true_positive = stats['true_positive'] + precision = true_positive / (max( + true_positive + stats['false_positive'], 1)) + recall = true_positive / (max(true_positive + stats['false_negative'], + 1)) + + stats['precision'] = precision + stats['recall'] = recall + stats['f1'] = 2 * (precision * recall) / max(precision + recall, 1) + + return stats + + def run_test(self, test_output: str = None) -> dict: + """Runs the pvmap generator and returns the comparison metrics. + + Args: + test_output: JSON file with the test output status + + Returns: + JSON dict with the test summary + """ + cwd = self.get_local_dir() + os.chdir(cwd) + return_status = {'status': 'PASS'} + for index, test_config in enumerate(self._test_config): + test_status = {} + test_name = test_config.get('test_name') + if not test_name: + test_name = f'pvmap-generator-test-{index}' + basedir = os.path.basename(self.get_local_dir()) + if basedir: + test_name += '-' + basedir + test_config['test_name'] = test_name + logging.info(f'Running pvmap generator test: {index}:{test_name}') + pvmap_generator_status = self.run_pvmap_generator(test_config) + test_status['pvmap_generator_status'] = pvmap_generator_status + + logging.info( + f'Running statvar processor for test: {index}:{test_name}') + statvar_status = self.run_sample_statvar_processor( + test_config, pvmap_generator_status.get('generated_config', {})) + + test_status['statvar_processor_status'] = statvar_status + diff --git a/tools/agentic_import/pvmap_generator.py b/tools/agentic_import/pvmap_generator.py index 4b3d457ae8..0ca9f67ff0 100644 --- a/tools/agentic_import/pvmap_generator.py +++ b/tools/agentic_import/pvmap_generator.py @@ -81,6 +81,10 @@ def _define_flags(): 'Output path prefix for all generated files (default: output/output)' ) + flags.DEFINE_string( + 'places_resolved_csv', None, + 'Path to a CSV file with resolved places (optional)') + flags.DEFINE_string( 'gemini_cli', 'gemini', 'Custom path or command to invoke Gemini CLI. ' @@ -118,6 +122,7 @@ class Config: skip_confirmation: bool = False enable_sandboxing: bool = False output_path: str = 'output/output' + places_resolved_csv: Optional[str] = None gemini_cli: Optional[str] = None working_dir: Optional[str] = None extra_instruction_files: List[str] = field(default_factory=list) @@ -478,6 +483,9 @@ def _generate_prompt(self) -> Path: 'extra_instruction_files_abs': [ str(path) for path in self._config.extra_instruction_files ] if self._config.extra_instruction_files else [], + 'places_resolved_csv_abs': + str(self._resolve_path(self._config.places_resolved_csv)) + if self._config.places_resolved_csv else "", } # Render template with these variables @@ -507,6 +515,7 @@ def prepare_config() -> Config: skip_confirmation=_FLAGS.skip_confirmation, enable_sandboxing=_FLAGS.enable_sandboxing, output_path=_FLAGS.output_path, + places_resolved_csv=_FLAGS.places_resolved_csv, gemini_cli=_FLAGS.gemini_cli, working_dir=_FLAGS.working_dir, extra_instruction_files=_FLAGS.extra_instruction_files or [], diff --git a/tools/agentic_import/run_statvar_processor.sh b/tools/agentic_import/run_statvar_processor.sh index 6cddf9224f..d42ef5f16b 100755 --- a/tools/agentic_import/run_statvar_processor.sh +++ b/tools/agentic_import/run_statvar_processor.sh @@ -26,6 +26,7 @@ WORKING_DIR="" INPUT_DATA="" GEMINI_RUN_ID="" OUTPUT_PATH="" +PLACES_RESOLVED_CSV="" while [[ $# -gt 0 ]]; do case $1 in @@ -33,26 +34,58 @@ while [[ $# -gt 0 ]]; do PYTHON_INTERPRETER="$2" shift 2 ;; + --python=*) + PYTHON_INTERPRETER="${1#*=}" + shift + ;; --script-dir) SCRIPT_DIR="$2" shift 2 ;; + --script-dir=*) + SCRIPT_DIR="${1#*=}" + shift + ;; --working-dir) WORKING_DIR="$2" shift 2 ;; + --working-dir=*) + WORKING_DIR="${1#*=}" + shift + ;; --input-data) INPUT_DATA="$2" shift 2 ;; + --input-data=*) + INPUT_DATA="${1#*=}" + shift + ;; --gemini-run-id) GEMINI_RUN_ID="$2" shift 2 ;; + --gemini-run-id=*) + GEMINI_RUN_ID="${1#*=}" + shift + ;; --output-path) OUTPUT_PATH="$2" shift 2 ;; + --output-path=*) + OUTPUT_PATH="${1#*=}" + shift + ;; + --places_resolved_csv) + PLACES_RESOLVED_CSV="$2" + shift 2 + ;; + --places_resolved_csv=*) + PLACES_RESOLVED_CSV="${1#*=}" + shift + ;; *) echo "Unknown option $1" exit 1 @@ -96,6 +129,7 @@ echo "Running statvar processor..." --skip_constant_csv_columns=False \ --output_columns="${OUTPUT_COLUMNS}" \ --output_counters="${OUTPUT_COUNTERS}" \ + --places_resolved_csv="${PLACES_RESOLVED_CSV}" \ --output_path="${OUTPUT_PREFIX}" > "${PROCESSOR_LOG}" 2>&1 # Capture the processor exit code diff --git a/tools/agentic_import/templates/generate_pvmap_prompt.j2 b/tools/agentic_import/templates/generate_pvmap_prompt.j2 index 81af2afc63..ae4bda42f9 100644 --- a/tools/agentic_import/templates/generate_pvmap_prompt.j2 +++ b/tools/agentic_import/templates/generate_pvmap_prompt.j2 @@ -1403,6 +1403,7 @@ For SDMX datasets, also ensure: --working-dir "{{working_dir_abs}}" \ --input-data "{{input_data_abs}}" \ --gemini-run-id "{{gemini_run_id}}" \ + --places_resolved_csv "{{places_resolved_csv_abs}}" \ --output-path "{{output_path_abs}}" ``` diff --git a/tools/statvar_importer/mcf_diff.py b/tools/statvar_importer/mcf_diff.py index e7eaa825f8..d0d7dd5831 100644 --- a/tools/statvar_importer/mcf_diff.py +++ b/tools/statvar_importer/mcf_diff.py @@ -190,10 +190,11 @@ def diff_mcf_node_pvs(node_1: dict, pvs_deleted = set() pvs_added = set() pvs_modified = set() + pvs_matched = set() for d in diff: diff_str.append(d) - if d[0] == ' ': - counters and counters.add_counter(f'PVs-matched', 1) + if d[0] == ' ' and len(d) > 1: + counters and counters.add_counter(f'pvs-matched', 1) else: prop = '' value = '' @@ -204,10 +205,16 @@ def diff_mcf_node_pvs(node_1: dict, if prop: pvs_deleted.add(prop) has_diff = True - if d[0] == '+': + elif d[0] == '+': if prop: pvs_added.add(prop) has_diff = True + elif d[0] == '?': + # Ignore modifications that already show as delete/add + has_diff = True + else: + if prop: + pvs_matched.add(prop) if has_diff: pvs_modified = pvs_deleted.intersection(pvs_added) pvs_added = pvs_added.difference(pvs_modified) @@ -217,6 +224,7 @@ def diff_mcf_node_pvs(node_1: dict, (pvs_modified, 'modified'), (pvs_added, 'added'), (pvs_deleted, 'deleted'), + (pvs_matched, 'matched'), ]: for prop in props: counters.add_counter(f'pvs-{diff_type}', 1) @@ -230,7 +238,9 @@ def diff_mcf_node_pvs(node_1: dict, else: counters and counters.add_counter(f'nodes-missing-in-mcf1', 1) else: - counters and counters.add_counter(f'nodes-matched', 1) + if counters: + counters.add_counter(f'nodes-matched', 1) + counters.add_counter(f'pvs-matched', len(pvs_matched)) return has_diff, '\n'.join(diff_str), pvs_added, pvs_deleted, pvs_modified @@ -389,4 +399,4 @@ def main(_): if __name__ == '__main__': - app.run(main) + app.run(main) \ No newline at end of file diff --git a/tools/statvar_importer/stat_var_processor.py b/tools/statvar_importer/stat_var_processor.py index 487cce556f..d0184e022c 100644 --- a/tools/statvar_importer/stat_var_processor.py +++ b/tools/statvar_importer/stat_var_processor.py @@ -83,7 +83,7 @@ from mcf_filter import drop_existing_mcf_nodes from mcf_diff import fingerprint_node, fingerprint_mcf_nodes, diff_mcf_node_pvs from place_resolver import PlaceResolver -from property_value_mapper import PropertyValueMapper +from property_value_mapper import PropertyValueMapper, write_pv_map from schema_resolver import SchemaResolver from json_to_csv import file_json_to_csv from schema_generator import generate_schema_nodes, generate_statvar_name @@ -1429,6 +1429,8 @@ def __init__( self._config.get('numeric_data_key', 'Number'), self._config.get('pv_lookup_key', 'Key'), ] + # Save per input cell PVs if required. + self._data_pvs = {} if self._config.get('output_data_pvs') else None def generate_pvmap(self): """Generate a PV Map from the input data.""" @@ -2272,6 +2274,8 @@ def process_row(self, row: list, row_index: int): row_index, col_index + 1) and self.process_stat_var_obs_pvs( merged_col_pvs, row_index, col_index): row_svobs += 1 + if self._data_pvs is not None: + self._data_pvs[self._file_context] = merged_col_pvs self.process_row_header_pvs(row, row_index, row_col_pvs, row_svobs, cols_with_pvs) # If row has no SVObs but has PVs, it must be a header. @@ -2818,6 +2822,11 @@ def write_outputs(self, output_path: str): columns=self._config.get('output_columns', []), output_tmcf_file=output_tmcf_file, ) + output_data_pvs = self._config.get('output_data_pvs') + if output_data_pvs and self._data_pvs: + write_pv_map(self._data_pvs, output_data_pvs) + self._counters.add_counter('output-data-pvs', len(self._data_pvs)) + self._counters.print_counters() counters_filename = self._config.get('output_counters', output_path + '_counters.txt') diff --git a/tools/statvar_importer/stat_var_test_runner.py b/tools/statvar_importer/stat_var_test_runner.py new file mode 100644 index 0000000000..ed9a6fe99b --- /dev/null +++ b/tools/statvar_importer/stat_var_test_runner.py @@ -0,0 +1,524 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utilities to test statvar imports. + +StatVar imports use a JSON test config for regression tests that invoke +statvar processor with a specified set of configs on a test input and +verifies the expected output files. +The test config is a list of statvar processor command line invocation +settings with each invocation as a dictionary with the following sections:a + test_name: name of the test for logs + env_file: a file with environment variables such as API keys + statvar_processor_args: dictionary with the command line arguments + for statvar processor configs. + expected_outputs: + A dictionary of expected output files mapped to output files. + + All file references are releative to the import folder with the + test config. + +Example: +[ + # Parameters for a single invocation of statvar processor + { + "test_name": "Test_Import", + + # Enviroment file loaded with dictionary of env settings + # such as API keys + "env_file": "gs://datcom-prod-imports/config/test_env.csv" + + # Statvar processor command line arguments + "statvar_processor_args": { + "config_file": "metadata.csv", + "pv_map": "import_pvmap.csv", + "places_resolved_csv": "places.csv", + "input_data": "test_data/sample_input.csv", + }, + + # Statvar processor output files with expected outputs to compare with + "statvar_processor_outputs": [ + { + "output_name": "Test_Import_Observations", + "output_file": "", + "expected_output_file": "test_data/sample_output.csv", + }, + ] + } +] + +""" + +import os +import sys +import subprocess +import tempfile +import time + +from absl import app +from absl import flags +from absl import logging + +_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(_SCRIPT_DIR) +sys.path.append(os.path.dirname(_SCRIPT_DIR)) +sys.path.append(os.path.dirname(os.path.dirname(_SCRIPT_DIR))) +sys.path.append( + os.path.join(os.path.dirname(os.path.dirname(_SCRIPT_DIR)), 'util')) + +import file_util +from mcf_diff import diff_mcf_files +from mcf_file_util import get_value_list + +from counters import Counters + +flags.DEFINE_string('test_config', '', 'JSON test config file') +flags.DEFINE_string('test_output', '', 'Output JSON file with test summary') + +_FLAGS = flags.FLAGS + +_BASH_PATH = os.environ.get('BASH_PATH', 'bash') + + +class StatVarProcessorTestRunner: + """Class to run statvar processor for a test config + """ + + def __init__(self, test_config_file: str = None, test_config: dict = None): + self._temp_dir = tempfile.TemporaryDirectory() + self._test_config = [] + if test_config: + self._test_config.extend(test_config) + self._env_dict = {} + self.load_config(test_config_file) + self._counters = Counters() + + def __del__(self): + """Cleanup local files.""" + self._temp_dir.cleanup() + + def load_config(self, test_config_file: str): + """Loads a test config from a JSON file.""" + if test_config_file: + file_config = file_util.file_load_py_dict(test_config_file) + logging.info(f'Loading test config: {file_config}') + self._test_config.extend(file_config) + self._test_config_file = os.path.realpath(test_config_file) + self._env_dict.update( + get_env_dict(self._test_config[0].get('env_file', ''))) + + def get_config(self) -> dict: + """Returns the test config.""" + return self._test_config + + def get_local_dir(self) -> str: + """Returns the local directory for the test.""" + if self._test_config_file: + return os.path.dirname(self._test_config_file) + return os.getcwd() + + def get_env_dict(self) -> dict: + """Returns the environment variables for the test.""" + return self._env_dict + + def run_tests(self, test_output: str = None) -> dict: + """Runs all the statvar processor test and verified each output. + + Args: + test_output: JSON file with the test output status + + Returns: + JSON dict with the test summary + """ + cwd = self.get_local_dir() + os.chdir(cwd) + logging.info( + f'Running statvar processor for {len(self._test_config)} tests from {cwd}' + ) + return_status = {'status': 'PASS'} + for index, test_config in enumerate(self._test_config): + test_name = test_config.get('test_name') + if not test_name: + test_name = f'statvar-processor-test-{index}' + basedir = os.path.basename(self.get_local_dir()) + if basedir: + test_name += '-' + basedir + test_config['test_name'] = test_name + logging.info(f'Running statvar processor test: {index}:{test_name}') + test_status = self.run_statvar_processor(test_config) + if test_status.get('returncode', 1) != 0: + logging.error( + f'statvar processor test failed for {test_name}: {test_status}' + ) + return_status['status'] = 'FAIL' + else: + outputs = test_config.get('statvar_processor_outputs') + if outputs: + output_status = self.verify_outputs( + outputs, test_config, test_status.get('output_dir')) + if output_status.get('status', '') != 'PASS': + logging.error( + f'Failed to verify outputs for test: {index}:{test_name}:{output_status}' + ) + return_status['status'] = 'FAIL' + test_status['output_status'] = output_status + return_status[test_name] = test_status + logging.info( + f'statvar processor test: {index}:{test_name}: {return_status["status"]}' + ) + if test_output: + file_util.file_write_py_dict(return_status, test_output) + return return_status + + def run_statvar_processor(self, + config: dict, + override_args: dict = {}) -> dict: + """Runs a single instance of statvar processor.""" + test_name = config.get('test_name', 'statvar_processor_test') + test_dir = os.path.join(self._temp_dir.name, test_name) + output_dir = os.path.join(test_dir, 'output') + env_dict = dict(self.get_env_dict()) + env_dict.update(get_env_dict(config.get("env_file"))) + # Build the statvar processor commandline + cmd_args = merge_args([ + { + '--output_path': '"{output_dir}"', + '--output_counters': '"{output_dir}/statvar_counters.json"', + }, + config.get('statvar_processor_args', {}), + override_args, + ]) + output_arg = get_arg(cmd_args, '--output_path') + if output_arg: + output_dir = os.path.realpath(output_arg) + if not output_dir.endswith('/'): + output_dir = os.path.dirname(output_dir) + cwd = self.get_local_dir() + script_status = run_script( + interpreter=sys.executable, + script=os.path.join(_SCRIPT_DIR, 'stat_var_processor.py'), + args=cmd_args, + cwd=cwd, + env=env_dict, + output_dir=output_dir, + log_dir=os.path.join(output_dir, 'debug'), + ) + script_status['output_dir'] = output_dir + return script_status + + def verify_outputs(self, outputs: list[dict], config: dict, + output_path: dir) -> dict: + """Compare actual and expected outputs. + + Args: + outputs: list of expected and actual outputs to be compared. + each item in the list is a dictionary: + { + output_name: (optional) name of the output for debug and logs + output_file: actual output file to be compared. + this is a local file or path relative to output_path + expected_output_file: path to expected file to be compared with + diff_config: (optional) dictionary with diff configs such as: + ignore_property: 'name' + } + + Returns: + dictionary with the overall status as well as a list + of status per output. + { + 'status': 'PASS' if there are no diffs else 'FAIL' + + # diff summary per output + '': { + + 'diff_status': 'MATCHED' or 'DELETED' or 'MODIFIED' + 'deleted': count of deleted nodes + 'modified': count of modified nodes + }, + } + """ + logging.info(f'Comparing outputs: {outputs}') + return_status = {'status': 'PASS'} + for index, output in enumerate(outputs): + expected_files = file_util.file_get_matching( + output.get('expected_output_file')) + if not expected_files: + logging.warning( + f'Ignoring config without expected file: {output}') + continue + output_name = output.get('name') + if not output_name: + output_name = f'output_{index}_{os.path.basename(expected_files[0])}' + if output_name in return_status: + output_name = output_name + '-' + str(index) + actual_files = output.get('output_file') + actuals = [] + for file in get_value_list(actual_files): + matching_files = file_util.file_get_matching( + os.path.join(output_path, file)) + if not matching_files: + matching_files = file_util.file_get_matching(file) + if matching_files: + actuals.extend(matching_files) + if not actuals: + logging.error(f'Missing outputs for {output}') + return_status['status'] = 'ERROR' + diff_status = diff_files( + actuals, expected_files, output.get('diff_config', {}), + os.path.join(output_path, f'{output_name}.diff')) + if diff_status.get('diff_status') != 'MATCHED': + return_status['status'] = 'FAIL' + logging.error( + f'Failed to match {index}:{output}, diff: {diff_status}') + return_status[output_name] = diff_status + return_status[output_name]['name'] = output_name + logging.info( + f'Verify outputs: {return_status.get("status")} for {outputs}') + return return_status + + +def merge_args(self, args: list[dict]) -> dict: + """Returns a dictionary of command line args from a list of args. + + Args: + args: list of dictionary of command line argument and valies: + [ + { 'arg1': 'value1', ...}, + { ...}, + ... + } + + Returns: + Dictionary of command line args with the value from the last arg. + """ + + if not isinstance(args, list): + args = list(args) + + # Merge arguments keeping the last value + merged_args = dict() + for args_dict in args: + for arg, value in args_dict.items(): + if value is not None and not arg.startswith('--'): + arg = '--' + arg + merged_args[arg] = value + + return merged_args + + +def get_args_list(self, args: dict) -> list: + """Returns a list of command line args.""" + cmd_args = [] + if isinstance(args, dict): + for arg, val in args.items(): + if val: + cmd_args.append(f'{arg}={val}') + else: + cmd_args.append(f'{arg}') + elif isinstance(args, list): + cmd_args.extend(args) + else: + cmd_args.append(args) + return cmd_args + + +def get_arg(self, args_dict: dict, arg: str, default=None) -> str: + """Returns the value of a specific arg if present.""" + return args_dict.get(arg, default) + + +def get_env_dict(filename: str) -> dict: + """Returns a dictionary of env variable settings from a file. + + Args: + filename: file with the environemnt settings. + It can be a csv, json, txt or shell file. + + Returns: + dictionary of env variable to values mappings. + """ + env_dict = {} + if isinstance(filename, dict): + # arguent is a dictionary of env variables. Use it as is. + return filename + + env_files = file_util.file_get_matching(filename) + for env_file in env_files: + _, file_ext = os.path.splitext(env_file) + if file_ext == '.sh' or file_ext == ".txt" or file_ext == '.env': + # File has one variable per line + with file_util.FileIO(env_file) as fp: + for line in fp.readlines(): + line = line.strip() + if line.startswith('#'): + # Ignore commented lines + continue + if '=' in line: + env_var, value = line.split('=', 1) + env_var = env_var.strip().strip('"') + value = value.strip().strip('"') + env_dict[env_var] = value + else: + # Assume the file is a dictionary + env_dict.update(file_util.file_load_py_dict(env_file)) + + logging.info(f'Loaded {len(env_dict)} env vars from {env_files}') + return env_dict + + +def run_script(interpreter: str, script: str, args: list | dict, cwd: str, + env: dict, output_dir: str, log_dir: str) -> dict: + """Run a commandline script as a child process + + Args: + interpreter: interpreter for the script such as python, bash + script: local or full path to the script file. + args: command line arguments for the script as a list + cwd: current directory for the script + env: dictionary of environment variables. + output_dir: fodler for output files generated by the script, if any. + log_dir: directory to store logs. + + Returns: + dictionary of command and status + """ + cmd_args = [] + if interpreter: + cmd_args.append(interpreter) + else: + if script and script.endswith('.py'): + # For python scripts use the current script's python binary + interpreter = sys.executable + if script and script.endswith('.sh'): + # For shell scripts use bash + interpreter = _BASH_PATH + if script: + cmd_args.append(script) + if args: + cmd_args.extend(get_args_list(args)) + + logging.info(f'Running command: {cmd_args} in {cwd}, env: {env}') + env_dict = dict(os.environ) + if env: + env_dict.update(env) + start_time = time.perf_counter() + process = subprocess.Popen( + cmd_args, + cwd=cwd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env_dict, + ) + + # Log output continuously until the command completes. + stderr_file = os.path.join(log_dir, 'stderr.log') + stdout_file = os.path.join(log_dir, 'stdout.log') + with file_util.FileIO(stderr_file, 'wb') as f_err: + for line in process.stderr: + f_err.write(line) + logging.info(f'stderr: {line}') + with file_util.FileIO(stdout_file, 'wb') as f_out: + for line in process.stdout: + f_out.write(line) + logging.info(f'stdout: {line}') + + # Wait for process to complete + process.wait() + end_time = time.perf_counter() + return_code = process.returncode + duration = end_time - start_time + + logging.info( + f'Completed command: {cmd_args}, return code: {return_code}, time: {duration} secs, logs in: {log_dir}' + ) + + return { + 'command': ' '.join(cmd_args), + 'args': args, + 'cwd': cwd, + 'returncode': return_code, + 'output_path': output_dir, + 'debug': log_dir, + 'duration': duration, + 'stdout': stdout_file, + 'stderr': stderr_file, + } + + +def diff_files(actual: str, + expected: str, + diff_config: dict = {}, + diff_output_file: str = None) -> dict: + """Compares actual and expected files and returns dictionary with results. + + Args: + actual: list of actual files as csv or mcf. + expected: list of expected output files as csv or mcf + diff_config: dictionary of diff settings (refer to mcf_diff:diff_mcf_files) + compare_dcids: list of dcids to be compared + compare_nodes_with_pv: only compare nodes that have a listed propeorty:value + ignore_nodes_with_pv: ignore nodes with any of the property:value listed + compare_property: only compare listed propeorties in a node + + Returns: + { + status: MATCH or DELETED or MODIFIED + diff_log: log file with all the text style diffs with +|- prefixes. + missing: count of expected nodes missing in actual + modified: count of nodes expected nodes with modified pvs in actual + sample: a sample of 100 lines of diff output + } + """ + + counters = Counters() + diff_str = diff_mcf_files(actual, expected, diff_config, counters) + matched = counters.get_counter('nodes-matched') + deleted = counters.get_counter('dcid-missing-in-nodes1') + added = counters.get_counter('dcid-missing-in-nodes2') + modified = counters.get_counter('nodes-with-diff') + status = 'MATCHED' + if modified: + status = 'MODIFIED' + if deleted: + status = 'DELETED' + return_status = { + 'actual': actual, + 'expected': expected, + 'diff_status': status, + 'missing': deleted, + 'modified': modified, + 'added': added, + 'matched': matched, + 'counters': counters.get_counters(), + } + logging.info(f'Diff summary: {return_status}') + + if diff_str: + if diff_output_file: + with file_util.FileIO(diff_output_file, 'w') as diff_file: + diff_file.write(diff_str) + return_status['diff_output'] = diff_output_file + return_status['diff_sample'] = diff_str[:1000] + + return return_status + + +def main(_): + statvar_processor_tester = StatVarProcessorTestRunner( + test_config_file=_FLAGS.test_config) + test_result = statvar_processor_tester.run_tests(_FLAGS.test_output) + logging.info(f'Test result: {test_result}') + + +if __name__ == '__main__': + app.run(main) diff --git a/tools/statvar_importer/words_allowlist.txt b/tools/statvar_importer/words_allowlist.txt index 1a8f28cfda..a255f5b45c 100644 --- a/tools/statvar_importer/words_allowlist.txt +++ b/tools/statvar_importer/words_allowlist.txt @@ -25,3 +25,5 @@ svpg url usc var +antiretroviral +antiretrovirals