From 8f159988ccfd6fa83eeeefaed14e1bef075a4328 Mon Sep 17 00:00:00 2001
From: Nehil Sood <nehil@google.com>
Date: Thu, 4 Jun 2026 12:08:13 +0000
Subject: [PATCH] Update tools for agent efficacy wrapper

---
 .../agent_efficacy/Agent_Efficacy_Board.html  | 307 ++++++++++
 tools/agent_efficacy/EFFICACY_GUIDE.md        |  71 +++
 tools/agent_efficacy/USAGE_EXAMPLES.md        |  44 ++
 tools/agent_efficacy/calculate_efficacy.py    | 213 +++++++
 .../metrics/pvmap_generator_metrics.py        | 245 ++++++++
 tools/agentic_import/pvmap_generator.py       |   9 +
 tools/agentic_import/run_statvar_processor.sh |  34 ++
 .../templates/generate_pvmap_prompt.j2        |   1 +
 tools/statvar_importer/mcf_diff.py            |  20 +-
 tools/statvar_importer/stat_var_processor.py  |  11 +-
 .../statvar_importer/stat_var_test_runner.py  | 524 ++++++++++++++++++
 tools/statvar_importer/words_allowlist.txt    |   2 +
 12 files changed, 1475 insertions(+), 6 deletions(-)
 create mode 100644 tools/agent_efficacy/Agent_Efficacy_Board.html
 create mode 100644 tools/agent_efficacy/EFFICACY_GUIDE.md
 create mode 100644 tools/agent_efficacy/USAGE_EXAMPLES.md
 create mode 100644 tools/agent_efficacy/calculate_efficacy.py
 create mode 100644 tools/agentic_import/metrics/pvmap_generator_metrics.py
 create mode 100644 tools/statvar_importer/stat_var_test_runner.py
diff --git a/tools/agent_efficacy/Agent_Efficacy_Board.html b/tools/agent_efficacy/Agent_Efficacy_Board.html
new file mode 100644
index 0000000000..3d758b0ab2
--- /dev/null
+++ b/tools/agent_efficacy/Agent_Efficacy_Board.html
@@ -0,0 +1,307 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Autoschematization Agent Evaluation</title>
+    <script src="https://cdn.tailwindcss.com"></script>
+</head>
+<body class="bg-slate-100 font-sans text-slate-900 antialiased selection:bg-indigo-200 selection:text-indigo-900 h-screen w-screen overflow-auto flex flex-col ">
+    
+    <div class="w-full max-w-[100rem] mx-auto px-4 sm:px-6 lg:px-8 flex flex-col">
+        
+        <!-- Section 1: Header and Metadata Console -->
+        <header class="mb-4">
+            <div class="flex flex-col xl:flex-row xl:items-end xl:justify-between gap-3 border-b-2 border-slate-300 pb-3">
+                <div>
+                    <h1 class="text-3xl lg:text-4xl font-extrabold tracking-tight text-slate-900 leading-tight">Autoschematization Agent Evaluation</h1>
+                    <div class="mt-3 flex flex-wrap items-center gap-2 lg:gap-3">
+                        <!-- Metadata Badges -->
+                        <div class="inline-flex items-center px-3 py-1.5 rounded-full text-xs lg:text-sm font-bold bg-white border-2 border-slate-300 text-slate-800 shadow-sm">
+                            <svg class="w-4 h-4 lg:w-5 lg:h-5 mr-1.5 text-slate-500" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2.5" d="M10 20l4-16m4 4l4 4-4 4M6 16l-4-4 4-4"></path></svg>
+                            Run ID: DESA-GENDER_2025_OBS_ICT_SKILL_RT
+                        </div>
+                        <div class="inline-flex items-center px-3 py-1.5 rounded-full text-xs lg:text-sm font-bold bg-white border-2 border-slate-300 text-slate-800 shadow-sm">
+                            <svg class="w-4 h-4 lg:w-5 lg:h-5 mr-1.5 text-slate-500" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2.5" d="M4 7v10c0 2.21 3.582 4 8 4s8-1.79 8-4V7M4 7c0 2.21 3.582 4 8 4s8-1.79 8-4M4 7c0-2.21 3.582-4 8-4s8 1.79 8 4m0 5c0 2.21-3.582 4-8 4s-8-1.79-8-4"></path></svg>
+                            Target Payload: 1M Observations
+                        </div>
+                        <div class="inline-flex items-center px-3 py-1.5 rounded-full text-xs lg:text-sm font-bold bg-emerald-100 border-2 border-emerald-300 text-emerald-800 shadow-sm">
+                            <span class="w-2 h-2 lg:w-2.5 lg:h-2.5 rounded-full bg-emerald-600 mr-2 animate-pulse"></span>
+                            Pipeline Status: Audit Complete
+                        </div>
+                    </div>
+                </div>
+            </div>
+        </header>
+
+        <main class="flex flex-col gap-4 lg:gap-5">
+            <!-- Section 2: The Executive Hero Scorecard -->
+            <section>
+                <div class="grid grid-cols-1 md:grid-cols-2 xl:grid-cols-5 gap-3 lg:gap-4">
+                    
+                    <!-- Card 1: North Star Metric -->
+                    <article class="bg-indigo-50 rounded-2xl shadow-md border-2 border-indigo-300 relative overflow-auto flex flex-col justify-between p-4 h-full">
+                        <div class="absolute top-0 left-0 w-full h-1.5 bg-indigo-600"></div>
+                        <div class="relative z-10 flex-grow">
+                            <h2 class="text-xs lg:text-sm font-black text-slate-800 uppercase tracking-widest">Agent Efficacy (F1)</h2>
+                            <p class="text-xs lg:text-sm font-bold text-indigo-700 mt-0.5">(Ultimate mapping reliability)</p>
+                            <div class="mt-2 lg:mt-3">
+                                <p class="text-5xl lg:text-6xl font-black text-indigo-700 tracking-tighter">100.0%</p>
+                            </div>
+                        </div>
+                        <div class="relative z-10 mt-3 bg-white rounded-xl p-2.5 border-2 border-indigo-200 shadow-sm flex items-center min-h-[3.5rem]">
+                            <p class="text-xs lg:text-sm leading-tight font-mono font-bold text-slate-800 break-words w-full">
+                                <span class="font-black text-indigo-800">Formula:</span> 2*(P*R)/(P+R)
+                            </p>
+                        </div>
+                    </article>
+
+                    <!-- Card 2: Precision -->
+                    <article class="bg-white rounded-2xl shadow-md border-2 border-slate-200 flex flex-col justify-between p-4 h-full">
+                        <div class="flex-grow">
+                            <h2 class="text-xs lg:text-sm font-black text-slate-800 uppercase tracking-widest">Precision</h2>
+                            <p class="text-xs lg:text-sm font-bold text-slate-600 mt-0.5">(Accuracy of generations)</p>
+                            <div class="mt-2 lg:mt-3">
+                                <p class="text-4xl lg:text-5xl font-extrabold text-slate-900 tracking-tight">100.0%</p>
+                            </div>
+                        </div>
+                        <div class="mt-3 bg-slate-50 rounded-xl p-2.5 border-2 border-slate-200 shadow-sm flex items-center min-h-[3.5rem]">
+                            <p class="text-xs lg:text-sm leading-tight font-mono font-bold text-slate-800 break-words w-full">
+                                <span class="font-black text-slate-900">Formula:</span> TP / (TP + FP)
+                            </p>
+                        </div>
+                    </article>
+
+                    <!-- Card 3: Recall -->
+                    <article class="bg-white rounded-2xl shadow-md border-2 border-slate-200 flex flex-col justify-between p-4 h-full">
+                        <div class="flex-grow">
+                            <h2 class="text-xs lg:text-sm font-black text-slate-800 uppercase tracking-widest">Recall</h2>
+                            <p class="text-xs lg:text-sm font-bold text-slate-600 mt-0.5">(Capture rate of targets)</p>
+                            <div class="mt-2 lg:mt-3">
+                                <p class="text-4xl lg:text-5xl font-extrabold text-slate-900 tracking-tight">100.0%</p>
+                            </div>
+                        </div>
+                        <div class="mt-3 bg-slate-50 rounded-xl p-2.5 border-2 border-slate-200 shadow-sm flex items-center min-h-[3.5rem]">
+                            <p class="text-xs lg:text-sm leading-tight font-mono font-bold text-slate-800 break-words w-full">
+                                <span class="font-black text-slate-900">Formula:</span> TP / (TP + FN)
+                            </p>
+                        </div>
+                    </article>
+
+                    <!-- Card 4: Time to Ingest -->
+                    <article class="bg-white rounded-2xl shadow-md border-2 border-slate-200 flex flex-col justify-between p-4 h-full">
+                        <div class="flex-grow">
+                            <h2 class="text-xs lg:text-sm font-black text-slate-800 uppercase tracking-widest">Time to Ingest</h2>
+                            <p class="text-xs lg:text-sm font-bold text-slate-600 mt-0.5">(Total processing speed)</p>
+                            <div class="mt-2 lg:mt-3">
+                                <p class="text-4xl lg:text-5xl font-extrabold text-slate-900 tracking-tight">14m 22s</p>
+                            </div>
+                        </div>
+                        <div class="mt-3 bg-slate-50 rounded-xl p-2.5 border-2 border-slate-200 shadow-sm flex items-center min-h-[3.5rem]">
+                            <p class="text-xs lg:text-sm font-bold leading-tight text-slate-800 w-full">
+                                Invocation to first query.
+                            </p>
+                        </div>
+                    </article>
+
+                    <!-- Card 5: Completion Rate -->
+                    <article class="bg-white rounded-2xl shadow-md border-2 border-slate-200 flex flex-col justify-between p-4 h-full">
+                        <div class="flex-grow">
+                            <h2 class="text-xs lg:text-sm font-black text-slate-800 uppercase tracking-widest">Completion Rate</h2>
+                            <p class="text-xs lg:text-sm font-bold text-slate-600 mt-0.5">(Pipeline execution health)</p>
+                            <div class="mt-2 lg:mt-3">
+                                <p class="text-4xl lg:text-5xl font-extrabold text-slate-900 tracking-tight">100%</p>
+                            </div>
+                        </div>
+                        <div class="mt-3 bg-emerald-50 rounded-xl p-2.5 border-2 border-emerald-200 shadow-sm flex items-center min-h-[3.5rem]">
+                            <p class="text-xs lg:text-sm font-bold leading-tight text-emerald-800 w-full">
+                                Syntactical pipeline success.
+                            </p>
+                        </div>
+                    </article>
+
+                </div>
+            </section>
+
+            <!-- Section 3: The Agent Mapping Profile -->
+            <section>
+                <h2 class="text-xl lg:text-2xl font-extrabold text-slate-900 mb-3">Granular Mapping Analysis</h2>
+                
+                <div class="grid grid-cols-1 xl:grid-cols-3 gap-4 lg:gap-5">
+                    
+                    <!-- True Positives -->
+                    <article class="bg-white rounded-2xl border-2 border-emerald-300 shadow-lg p-5 lg:p-6 relative overflow-auto flex flex-col h-full">
+                        <div class="absolute -right-8 -top-8 w-32 h-32 lg:w-40 lg:h-40 bg-emerald-100 rounded-full blur-3xl opacity-60 pointer-events-none"></div>
+                        
+                        <div class="relative z-10 flex flex-col flex-grow">
+                            <div class="w-12 h-12 lg:w-14 lg:h-14 bg-emerald-100 border-2 border-emerald-200 rounded-xl flex items-center  text-emerald-700 mb-3 lg:mb-4 shadow-sm">
+                                <svg class="w-7 h-7 lg:w-8 lg:h-8" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="3" d="M5 13l4 4L19 7"></path></svg>
+                            </div>
+                            
+                            <h3 class="text-xl lg:text-2xl font-black text-slate-900 leading-tight">True Positives</h3>
+                            <p class="text-sm lg:text-base font-bold text-emerald-700 mt-1 mb-4 lg:mb-5 flex-grow">(Mappings done by the Agent correctly)</p>
+                            
+                            <p class="text-5xl lg:text-6xl font-black text-emerald-700 tracking-tighter">384</p>
+                        </div>
+                    </article>
+
+                    <!-- False Positives -->
+                    <article class="bg-white rounded-2xl border-2 border-rose-300 shadow-lg p-5 lg:p-6 relative overflow-auto flex flex-col h-full">
+                        <div class="absolute -right-8 -top-8 w-32 h-32 lg:w-40 lg:h-40 bg-rose-100 rounded-full blur-3xl opacity-60 pointer-events-none"></div>
+                        
+                        <div class="relative z-10 flex flex-col flex-grow">
+                            <div class="w-12 h-12 lg:w-14 lg:h-14 bg-rose-100 border-2 border-rose-200 rounded-xl flex items-center  text-rose-700 mb-3 lg:mb-4 shadow-sm">
+                                <svg class="w-7 h-7 lg:w-8 lg:h-8" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="3" d="M6 18L18 6M6 6l12 12"></path></svg>
+                            </div>
+                            
+                            <h3 class="text-xl lg:text-2xl font-black text-slate-900 leading-tight">False Positives</h3>
+                            <p class="text-sm lg:text-base font-bold text-rose-700 mt-1 mb-4 lg:mb-5 flex-grow">(Wrong mappings / Hallucinations)</p>
+                            
+                            <p class="text-5xl lg:text-6xl font-black text-rose-700 tracking-tighter">0</p>
+                        </div>
+                    </article>
+
+                    <!-- False Negatives -->
+                    <article class="bg-white rounded-2xl border-2 border-amber-300 shadow-lg p-5 lg:p-6 relative overflow-auto flex flex-col h-full">
+                        <div class="absolute -right-8 -top-8 w-32 h-32 lg:w-40 lg:h-40 bg-amber-100 rounded-full blur-3xl opacity-60 pointer-events-none"></div>
+                        
+                        <div class="relative z-10 flex flex-col flex-grow">
+                            <div class="w-12 h-12 lg:w-14 lg:h-14 bg-amber-100 border-2 border-amber-200 rounded-xl flex items-center  text-amber-700 mb-3 lg:mb-4 shadow-sm">
+                                <svg class="w-7 h-7 lg:w-8 lg:h-8" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="3" d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z"></path></svg>
+                            </div>
+                            
+                            <h3 class="text-xl lg:text-2xl font-black text-slate-900 leading-tight">False Negatives</h3>
+                            <p class="text-sm lg:text-base font-bold text-amber-700 mt-1 mb-4 lg:mb-5 flex-grow">(Mappings missed by the Agent)</p>
+                            
+                            <p class="text-5xl lg:text-6xl font-black text-amber-700 tracking-tighter">0</p>
+                        </div>
+                    </article>
+
+                </div>
+            </section>
+        </main>
+        
+    </div>
+
+<section class="mt-8 px-4 sm:px-6 lg:px-8 pb-12"><h2 class="text-2xl lg:text-3xl font-extrabold text-slate-900 mb-6">Detailed File Comparisons</h2>
+        <div class="mb-10 bg-white p-6 lg:p-8 rounded-2xl shadow-lg border-2 border-slate-200">
+            <h3 class="text-xl lg:text-2xl font-black mb-4 uppercase tracking-wider text-indigo-900 border-b-2 border-indigo-100 pb-2">pvmap.csv Comparison</h3>
+            <div class="grid grid-cols-1 md:grid-cols-3 gap-4 mb-6">
+                <div class="bg-emerald-50 p-4 rounded-xl border-2 border-emerald-200 shadow-sm">
+                    <span class="block text-xs font-black text-emerald-800 uppercase tracking-widest">Precision</span>
+                    <span class="text-2xl font-black text-emerald-900">100.0%</span>
+                </div>
+                <div class="bg-amber-50 p-4 rounded-xl border-2 border-amber-200 shadow-sm">
+                    <span class="block text-xs font-black text-amber-800 uppercase tracking-widest">Recall</span>
+                    <span class="text-2xl font-black text-amber-900">100.0%</span>
+                </div>
+                <div class="bg-indigo-50 p-4 rounded-xl border-2 border-indigo-200 shadow-sm">
+                    <span class="block text-xs font-black text-indigo-800 uppercase tracking-widest">F1 Score</span>
+                    <span class="text-2xl font-black text-indigo-900">100.0%</span>
+                </div>
+            </div>
+            <div class="relative">
+                <div class="absolute top-3 right-3 text-xs font-bold text-slate-400 uppercase tracking-widest bg-slate-800 px-2 py-1 rounded">mcf_diff.py output</div>
+                <pre class="bg-slate-900 text-slate-100 p-6 rounded-xl overflow-auto max-h-[30rem] text-sm font-mono leading-relaxed border-2 border-slate-800 shadow-inner whitespace-pre-wrap">
+No differences found.
+                </pre>
+            </div>
+        </div>
+        
+        <div class="mb-10 bg-white p-6 lg:p-8 rounded-2xl shadow-lg border-2 border-slate-200">
+            <h3 class="text-xl lg:text-2xl font-black mb-4 uppercase tracking-wider text-indigo-900 border-b-2 border-indigo-100 pb-2">stat_vars.mcf Comparison</h3>
+            <div class="grid grid-cols-1 md:grid-cols-3 gap-4 mb-6">
+                <div class="bg-emerald-50 p-4 rounded-xl border-2 border-emerald-200 shadow-sm">
+                    <span class="block text-xs font-black text-emerald-800 uppercase tracking-widest">Precision</span>
+                    <span class="text-2xl font-black text-emerald-900">100.0%</span>
+                </div>
+                <div class="bg-amber-50 p-4 rounded-xl border-2 border-amber-200 shadow-sm">
+                    <span class="block text-xs font-black text-amber-800 uppercase tracking-widest">Recall</span>
+                    <span class="text-2xl font-black text-amber-900">100.0%</span>
+                </div>
+                <div class="bg-indigo-50 p-4 rounded-xl border-2 border-indigo-200 shadow-sm">
+                    <span class="block text-xs font-black text-indigo-800 uppercase tracking-widest">F1 Score</span>
+                    <span class="text-2xl font-black text-indigo-900">100.0%</span>
+                </div>
+            </div>
+            <div class="relative">
+                <div class="absolute top-3 right-3 text-xs font-bold text-slate-400 uppercase tracking-widest bg-slate-800 px-2 py-1 rounded">mcf_diff.py output</div>
+                <pre class="bg-slate-900 text-slate-100 p-6 rounded-xl overflow-auto max-h-[30rem] text-sm font-mono leading-relaxed border-2 border-slate-800 shadow-inner whitespace-pre-wrap">
+No differences found.
+                </pre>
+            </div>
+        </div>
+        
+        <div class="mb-10 bg-white p-6 lg:p-8 rounded-2xl shadow-lg border-2 border-slate-200">
+            <h3 class="text-xl lg:text-2xl font-black mb-4 uppercase tracking-wider text-indigo-900 border-b-2 border-indigo-100 pb-2">stat_vars_schema.mcf Comparison</h3>
+            <div class="grid grid-cols-1 md:grid-cols-3 gap-4 mb-6">
+                <div class="bg-emerald-50 p-4 rounded-xl border-2 border-emerald-200 shadow-sm">
+                    <span class="block text-xs font-black text-emerald-800 uppercase tracking-widest">Precision</span>
+                    <span class="text-2xl font-black text-emerald-900">100.0%</span>
+                </div>
+                <div class="bg-amber-50 p-4 rounded-xl border-2 border-amber-200 shadow-sm">
+                    <span class="block text-xs font-black text-amber-800 uppercase tracking-widest">Recall</span>
+                    <span class="text-2xl font-black text-amber-900">100.0%</span>
+                </div>
+                <div class="bg-indigo-50 p-4 rounded-xl border-2 border-indigo-200 shadow-sm">
+                    <span class="block text-xs font-black text-indigo-800 uppercase tracking-widest">F1 Score</span>
+                    <span class="text-2xl font-black text-indigo-900">100.0%</span>
+                </div>
+            </div>
+            <div class="relative">
+                <div class="absolute top-3 right-3 text-xs font-bold text-slate-400 uppercase tracking-widest bg-slate-800 px-2 py-1 rounded">mcf_diff.py output</div>
+                <pre class="bg-slate-900 text-slate-100 p-6 rounded-xl overflow-auto max-h-[30rem] text-sm font-mono leading-relaxed border-2 border-slate-800 shadow-inner whitespace-pre-wrap">
+No differences found.
+                </pre>
+            </div>
+        </div>
+        
+        <div class="mb-10 bg-white p-6 lg:p-8 rounded-2xl shadow-lg border-2 border-slate-200">
+            <h3 class="text-xl lg:text-2xl font-black mb-4 uppercase tracking-wider text-indigo-900 border-b-2 border-indigo-100 pb-2">tmcf Comparison</h3>
+            <div class="grid grid-cols-1 md:grid-cols-3 gap-4 mb-6">
+                <div class="bg-emerald-50 p-4 rounded-xl border-2 border-emerald-200 shadow-sm">
+                    <span class="block text-xs font-black text-emerald-800 uppercase tracking-widest">Precision</span>
+                    <span class="text-2xl font-black text-emerald-900">100.0%</span>
+                </div>
+                <div class="bg-amber-50 p-4 rounded-xl border-2 border-amber-200 shadow-sm">
+                    <span class="block text-xs font-black text-amber-800 uppercase tracking-widest">Recall</span>
+                    <span class="text-2xl font-black text-amber-900">100.0%</span>
+                </div>
+                <div class="bg-indigo-50 p-4 rounded-xl border-2 border-indigo-200 shadow-sm">
+                    <span class="block text-xs font-black text-indigo-800 uppercase tracking-widest">F1 Score</span>
+                    <span class="text-2xl font-black text-indigo-900">100.0%</span>
+                </div>
+            </div>
+            <div class="relative">
+                <div class="absolute top-3 right-3 text-xs font-bold text-slate-400 uppercase tracking-widest bg-slate-800 px-2 py-1 rounded">mcf_diff.py output</div>
+                <pre class="bg-slate-900 text-slate-100 p-6 rounded-xl overflow-auto max-h-[30rem] text-sm font-mono leading-relaxed border-2 border-slate-800 shadow-inner whitespace-pre-wrap">
+No differences found.
+                </pre>
+            </div>
+        </div>
+        
+        <div class="mb-10 bg-white p-6 lg:p-8 rounded-2xl shadow-lg border-2 border-slate-200">
+            <h3 class="text-xl lg:text-2xl font-black mb-4 uppercase tracking-wider text-indigo-900 border-b-2 border-indigo-100 pb-2">csv_data Comparison</h3>
+            <div class="grid grid-cols-1 md:grid-cols-3 gap-4 mb-6">
+                <div class="bg-emerald-50 p-4 rounded-xl border-2 border-emerald-200 shadow-sm">
+                    <span class="block text-xs font-black text-emerald-800 uppercase tracking-widest">Precision</span>
+                    <span class="text-2xl font-black text-emerald-900">100.0%</span>
+                </div>
+                <div class="bg-amber-50 p-4 rounded-xl border-2 border-amber-200 shadow-sm">
+                    <span class="block text-xs font-black text-amber-800 uppercase tracking-widest">Recall</span>
+                    <span class="text-2xl font-black text-amber-900">100.0%</span>
+                </div>
+                <div class="bg-indigo-50 p-4 rounded-xl border-2 border-indigo-200 shadow-sm">
+                    <span class="block text-xs font-black text-indigo-800 uppercase tracking-widest">F1 Score</span>
+                    <span class="text-2xl font-black text-indigo-900">100.0%</span>
+                </div>
+            </div>
+            <div class="relative">
+                <div class="absolute top-3 right-3 text-xs font-bold text-slate-400 uppercase tracking-widest bg-slate-800 px-2 py-1 rounded">mcf_diff.py output</div>
+                <pre class="bg-slate-900 text-slate-100 p-6 rounded-xl overflow-auto max-h-[30rem] text-sm font-mono leading-relaxed border-2 border-slate-800 shadow-inner whitespace-pre-wrap">
+No differences found.
+                </pre>
+            </div>
+        </div>
+        </section></body>
+</html>
\ No newline at end of file
diff --git a/tools/agent_efficacy/EFFICACY_GUIDE.md b/tools/agent_efficacy/EFFICACY_GUIDE.md
new file mode 100644
index 0000000000..edd1c76ac0
--- /dev/null
+++ b/tools/agent_efficacy/EFFICACY_GUIDE.md
@@ -0,0 +1,71 @@
+# Agent Efficacy Testing Guide
+
+The efficacy calculation pipeline (`calculate_efficacy.py`) evaluates the quality of the autoschematization agent by comparing its output predictions against human-reviewed "gold standard" directories. It leverages semantic graph comparisons (`mcf_diff.py`) to accurately compute metrics like **Precision, Recall, and the F1 Score**.
+
+---
+
+### 1. Understanding the Evaluated Files
+
+The script compares four primary output artifacts generated by the agent against their reviewed counterparts. It computes True Positives (TP), False Positives (FP), and False Negatives (FN) to determine the overall agent efficacy.
+
+*   **`output_pvmap.csv` (Primary Efficacy Metric):**
+    *   **What it is:** The Property-Value (PV) mapping file that links dataset column headers to Data Commons schema nodes.
+    *   **How it's tested:** This is processed using the `PropertyValueMapper`. The script extracts a normalized semantic graph and calculates the "Hero Metrics" (Precision, Recall, F1) that define the overall run's success. 
+*   **`output_stat_vars.mcf`:**
+    *   **What it is:** Defines the statistical variables extracted from the dataset.
+    *   **How it's tested:** The script uses **fingerprinting** (`use_fingerprint=True`). This means it evaluates if the core semantic definition (properties and values) of the StatVar is correct, even if the exact node ID (DCID) strings have slight, functionally irrelevant differences.
+*   **`output_stat_vars_schema.mcf` & `output.tmcf`:**
+    *   **What it is:** Extends the schema definitions and dictates the tabular template bindings.
+    *   **How it's tested:** Compared as standard MCF graphs to ensure the underlying structure matches the gold standard perfectly.
+
+---
+
+### 2. Testing a Single Dataset
+
+The `calculate_efficacy.py` script uses command-line arguments to accept the required input and output paths dynamically. You should run this script within the project's virtual environment.
+
+**Command Line Arguments:**
+*   `--test`: Path to the test (prediction) directory.
+*   `--gold`: Path to the gold (reviewed) directory.
+*   `--output`: Path to the directory where the efficacy results dashboard should be saved.
+*   `--dataset_id` (Optional): The dataset ID for display purposes in the HTML dashboard.
+
+**Example execution:**
+```bash
+source venv/bin/activate
+python tools/agent_efficacy/calculate_efficacy.py \
+  --test test_DESA-GENDER_2025_OBS_ICT_SKILL_RT \
+  --gold undata/DESA/output/reviewed_pvmap_harish/DESA-GENDER_2025_OBS_ICT_SKILL_RT \
+  --output undata/DESA/efficacy_results/DESA-GENDER_2025_OBS_ICT_SKILL_RT \
+  --dataset_id DESA-GENDER_2025_OBS_ICT_SKILL_RT
+```
+**Results:** Open the generated dashboard (`Agent_Efficacy_Board.html` inside your `--output` directory) in your web browser.
+
+---
+
+### 3. Testing in Bulk (Multiple Datasets)
+
+To evaluate the agent across multiple datasets simultaneously, you can use the built-in `--bulk` flag. When this flag is passed, the `--test` and `--gold` arguments are treated as the parent directories containing all the datasets.
+
+**Example Bulk Run Execution:**
+
+```bash
+# Ensure virtual environment is active
+source venv/bin/activate
+
+python tools/agent_efficacy/calculate_efficacy.py \
+  --bulk \
+  --test undata/DESA/output/reviewed \
+  --gold undata/DESA/output/unreviewed \
+  --output undata/DESA/efficacy_results/bulk_run
+```
+
+The script will automatically detect datasets and generate a unique run directory (e.g., `bulk_run_20260601_120000`) inside your `--output` path. This directory will contain a separate HTML dashboard folder for every evaluated dataset, along with an aggregated `summary.csv` file containing the F1, Precision, Recall, TP, FP, and FN scores for all datasets in that run.
+
+---
+
+### 4. Interpreting the Output
+
+The HTML dashboard (`Agent_Efficacy_Board.html`) gives you a visual breakdown of the metrics:
+*   **Hero Metrics (Top Section):** Represents the accuracy of the `pvmap.csv`. A high F1 score here means the agent successfully mapped columns to the correct semantic properties.
+*   **Detailed Semantic Comparisons:** Look here to see the specific True Positives, False Positives (incorrect mappings), and False Negatives (missed mappings). The dashboard will output raw MCF node diffs to show you exactly *where* the agent deviated from the reviewed standard for `pvmap`, `stat_vars`, `schema`, and `tmcf`.
\ No newline at end of file
diff --git a/tools/agent_efficacy/USAGE_EXAMPLES.md b/tools/agent_efficacy/USAGE_EXAMPLES.md
new file mode 100644
index 0000000000..916b9063c0
--- /dev/null
+++ b/tools/agent_efficacy/USAGE_EXAMPLES.md
@@ -0,0 +1,44 @@
+# Efficacy Tool Quick-Start Examples
+
+This guide provides sample commands for running the `calculate_efficacy.py` script in both single and bulk modes.
+
+## 1. Single Dataset Evaluation
+Use this command to compare a single agent-generated folder against a reviewed gold standard.
+
+### Sample Command
+```bash
+python3 tools/agent_efficacy/calculate_efficacy.py \
+  --test /usr/local/google/home/nehil/datacommons/import/git/data/test_DESA-GENDER_2025_OBS_ICT_SKILL_RT \
+  --gold /usr/local/google/home/nehil/datacommons/import/git/data/undata/DESA/output/reviewed_pvmap_harish/DESA-GENDER_2025_OBS_ICT_SKILL_RT \
+  --output /usr/local/google/home/nehil/datacommons/import/git/data/tmp/efficacy_results/single_run \
+  --dataset_id ICT_SKILL_RT
+```
+
+### Result
+- Dashboard: `/tmp/efficacy_results/single_run/Agent_Efficacy_Board.html`
+- Updates: Precision, Recall, and F1 will be correctly populated in the HTML.
+
+---
+
+## 2. Bulk Evaluation (Multiple Datasets)
+Use this command to evaluate all datasets in a directory. It will create a unique, timestamped folder for the run.
+
+### Sample Command
+```bash
+python3 tools/agent_efficacy/calculate_efficacy.py \
+  --bulk \
+  --test /usr/local/google/home/nehil/datacommons/import/git/data/undata/DESA/output/agent_predictions \
+  --gold /usr/local/google/home/nehil/datacommons/import/git/data/undata/DESA/output/reviewed_pvmap_harish \
+  --output /tmp/efficacy_results/bulk_runs
+```
+
+### Result
+- Output Directory: `/tmp/efficacy_results/bulk_runs/bulk_run_20260602_HHMMSS/`
+- Summary File: `summary.csv` inside the new run folder.
+- Dashboards: Individual `Agent_Efficacy_Board.html` files for every dataset found.
+
+---
+
+## 3. How to Present Results
+1. **HTML Dashboard:** Open the `Agent_Efficacy_Board.html` file in any browser to view the "Hero Metrics" and detailed semantic diffs.
+2. **Summary CSV:** Use the `summary.csv` generated during bulk runs to create a high-level report or table of performance across all indicators.
diff --git a/tools/agent_efficacy/calculate_efficacy.py b/tools/agent_efficacy/calculate_efficacy.py
new file mode 100644
index 0000000000..f322624f0e
--- /dev/null
+++ b/tools/agent_efficacy/calculate_efficacy.py
@@ -0,0 +1,213 @@
+import os
+import sys
+import re
+import csv
+import argparse
+import datetime
+
+# Set up paths to import tools
+PROJECT_ROOT = '/usr/local/google/home/nehil/datacommons/import/git/data'
+sys.path.append(os.path.join(PROJECT_ROOT, 'tools/statvar_importer'))
+sys.path.append(os.path.join(PROJECT_ROOT, 'util'))
+sys.path.append(os.path.join(PROJECT_ROOT, 'tools/agentic_import/metrics'))
+
+import mcf_diff
+from counters import Counters
+from property_value_mapper import PropertyValueMapper
+from pvmap_generator_metrics import PVMapGeneratorMetricsRunner
+
+def get_metrics_from_counters(counters_obj):
+    # Use metrics formula directly from pvmap_generator_metrics.py
+    diff_stats = {'counters': counters_obj.get_counters()}
+    stats = PVMapGeneratorMetricsRunner.get_stats_from_diff_counters(None, diff_stats)
+    
+    return {
+        'tp': stats.get('true_positive', 0), 
+        'fp': stats.get('false_positive', 0), 
+        'fn': stats.get('false_negative', 0),
+        'precision': stats.get('precision', 0), 
+        'recall': stats.get('recall', 0), 
+        'f1': stats.get('f1', 0)
+    }
+
+def load_pv_map_nodes(file_path):
+    """Loads PV map into MCF-like nodes using PropertyValueMapper normalization."""
+    pv_mapper = PropertyValueMapper()
+    pv_mapper.load_pvs_from_file(file_path)
+    # Get the raw GLOBAL map
+    raw_map = pv_mapper.get_pv_map().get('GLOBAL', {})
+    # Convert to standard node format: {dcid: {prop: val}}
+    nodes = {}
+    for key, pvs in raw_map.items():
+        # Ensure DCID is clean
+        nodes[key] = pvs
+    return nodes
+
+def run_comparison(pred_path, gold_path, is_pvmap=False, use_fingerprint=False):
+    if not os.path.exists(pred_path) or not os.path.exists(gold_path):
+        print(f"Skipping: missing {pred_path} or {gold_path}")
+        return "", {'tp': 0, 'fp': 0, 'fn': 0, 'precision': 0, 'recall': 0, 'f1': 0}
+
+    counters = Counters()
+    config = {
+        'show_diff_nodes_only': True,
+        'ignore_property': ['description', 'provenance', 'memberOf', 'member', 'name', 'constraintProperties', 'keyString', 'relevantVariable'],
+        'fingerprint_dcid': use_fingerprint
+    }
+
+    if is_pvmap:
+        # Specialized loading for PV Maps to handle wide vs narrow formats
+        nodes1 = load_pv_map_nodes(pred_path)
+        nodes2 = load_pv_map_nodes(gold_path)
+        print(f"  [PVMap] Loaded {len(nodes1)} nodes from pred, {len(nodes2)} from gold")
+        diff_text = mcf_diff.diff_mcf_nodes(nodes1, nodes2, config, counters)
+    else:
+        # Standard MCF loading
+        diff_text = mcf_diff.diff_mcf_files(pred_path, gold_path, config, counters)
+    
+    metrics = get_metrics_from_counters(counters)
+    return diff_text, metrics
+
+def update_html(template_content, dataset_id, hero_metrics, detailed_results):
+    content = template_content
+    for key, label in [('f1', 'Agent Efficacy (F1)'), ('precision', 'Precision'), ('recall', 'Recall')]:
+        escaped_label = re.escape(label)
+        content = re.sub(
+            rf'({escaped_label}.*?tracking-tight(?:er)?">)([\d\.]+%)(</p>)',
+            r'\g<1>' + f"{hero_metrics[key]*100:.1f}%" + r'\g<3>',
+            content, flags=re.DOTALL
+        )
+    
+    content = re.sub(r'(True Positives.*?tracking-tighter">)([\d,]+)(</p>)', r'\g<1>' + f"{hero_metrics['tp']:,}" + r'\g<3>', content, flags=re.DOTALL)
+    content = re.sub(r'(False Positives.*?tracking-tighter">)([\d,]+)(</p>)', r'\g<1>' + f"{hero_metrics['fp']:,}" + r'\g<3>', content, flags=re.DOTALL)
+    content = re.sub(r'(False Negatives.*?tracking-tighter">)([\d,]+)(</p>)', r'\g<1>' + f"{hero_metrics['fn']:,}" + r'\g<3>', content, flags=re.DOTALL)
+    content = re.sub(r'Run ID: .*?</div>', f'Run ID: {dataset_id} (Rechecked)</div>', content)
+
+    if '<section class="mt-8 px-4 sm:px-6 lg:px-8 pb-12">' in content:
+        content = content.split('<section class="mt-8 px-4 sm:px-6 lg:px-8 pb-12">')[0]
+
+    diff_sections = '<section class="mt-8 px-4 sm:px-6 lg:px-8 pb-12"><h2 class="text-2xl lg:text-3xl font-extrabold text-slate-900 mb-6">Detailed Semantic Comparisons</h2>'
+    for label, data in detailed_results.items():
+        diff_sections += f'''
+        <div class="mb-10 bg-white p-6 lg:p-8 rounded-2xl shadow-lg border-2 border-slate-200">
+            <h3 class="text-xl lg:text-2xl font-black mb-4 uppercase tracking-wider text-indigo-900 border-b-2 border-indigo-100 pb-2">{label}</h3>
+            <div class="grid grid-cols-3 gap-4 mb-6">
+                <div class="bg-emerald-50 p-4 rounded-xl border-2 border-emerald-200"><span class="block text-xs font-black uppercase tracking-widest">Precision</span><span class="text-2xl font-black">{data['metrics']['precision']*100:.1f}%</span></div>
+                <div class="bg-amber-50 p-4 rounded-xl border-2 border-amber-200"><span class="block text-xs font-black uppercase tracking-widest">Recall</span><span class="text-2xl font-black">{data['metrics']['recall']*100:.1f}%</span></div>
+                <div class="bg-indigo-50 p-4 rounded-xl border-2 border-indigo-200"><span class="block text-xs font-black uppercase tracking-widest">F1 Score</span><span class="text-2xl font-black">{data['metrics']['f1']*100:.1f}%</span></div>
+            </div>
+            <div class="text-xs font-bold text-slate-500 mb-2 font-mono uppercase tracking-widest">Semantic Match (TP: {data['metrics']['tp']} | FP: {data['metrics']['fp']} | FN: {data['metrics']['fn']})</div>
+            <pre class="bg-slate-900 text-slate-100 p-6 rounded-xl overflow-auto max-h-[30rem] text-sm font-mono whitespace-pre-wrap">{data['diff'] if data['diff'] else 'No differences found.'}</pre>
+        </div>
+        '''
+    diff_sections += '</section>'
+    return content.replace('</body>', diff_sections + '</body>') if '</body>' in content else content + diff_sections
+
+def process_single_dataset(test_dir, gold_dir, output_dir, dataset_id, template_content):
+    if not os.path.exists(output_dir): os.makedirs(output_dir)
+
+    file_mappings = [
+        ('pvmap.csv', 'output_pvmap.csv', 'output_pvmap.csv', True, False),
+        ('stat_vars.mcf', 'output_stat_vars.mcf', 'output_stat_vars.mcf', False, True),
+        ('stat_vars_schema.mcf', 'output_stat_vars_schema.mcf', 'output_stat_vars_schema.mcf', False, False),
+        ('tmcf', 'output.tmcf', 'output.tmcf', False, False),
+    ]
+
+    detailed_results = {}
+    total_tp = 0
+    total_fp = 0
+    total_fn = 0
+
+    print(f"\n--- Rechecking Efficacy for {dataset_id} ---")
+    for label, pred_name, gold_name, is_pv, use_fp in file_mappings:
+        print(f"Comparing {label}...")
+        diff_text, metrics = run_comparison(os.path.join(test_dir, pred_name), os.path.join(gold_dir, gold_name), is_pvmap=is_pv, use_fingerprint=use_fp)
+        detailed_results[label] = {'diff': diff_text, 'metrics': metrics}
+        print(f"  Result: F1={metrics['f1']:.1%}, TP={metrics['tp']}, FP={metrics['fp']}, FN={metrics['fn']}")
+        
+        total_tp += metrics['tp']
+        total_fp += metrics['fp']
+        total_fn += metrics['fn']
+
+    precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0
+    recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0
+    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
+
+    hero_metrics = {
+        'tp': total_tp,
+        'fp': total_fp,
+        'fn': total_fn,
+        'precision': precision,
+        'recall': recall,
+        'f1': f1
+    }
+
+    final_html = update_html(template_content, dataset_id, hero_metrics, detailed_results)
+    
+    with open(os.path.join(output_dir, 'Agent_Efficacy_Board.html'), 'w') as f: f.write(final_html)
+    return hero_metrics
+
+def main():
+    parser = argparse.ArgumentParser(description="Calculate efficacy metrics.")
+    parser.add_argument('--test', required=True, help="Path to the test (prediction) directory.")
+    parser.add_argument('--gold', required=True, help="Path to the gold (reviewed) directory.")
+    parser.add_argument('--output', required=True, help="Path to the output directory to save results.")
+    parser.add_argument('--dataset_id', default="Dataset", help="Optional dataset ID for display.")
+    parser.add_argument('--bulk', action='store_true', help="If set, treats test and gold as parent directories containing multiple dataset folders.")
+    args = parser.parse_args()
+
+    template_path = os.path.join(os.path.dirname(__file__), 'Agent_Efficacy_Board.html')
+    with open(template_path, 'r') as f: 
+        template_content = f.read()
+
+    if args.bulk:
+        run_id = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        args.output = os.path.join(args.output, f"bulk_run_{run_id}")
+        print(f"Starting Bulk Efficacy Calculation natively... Output: {args.output}")
+        summary_data = []
+        if not os.path.exists(args.output):
+            os.makedirs(args.output)
+            
+        for dataset_id in os.listdir(args.gold):
+            gold_ds_dir = os.path.join(args.gold, dataset_id)
+            if not os.path.isdir(gold_ds_dir):
+                continue
+            
+            # Allow test directories to either match exactly or be prefixed with test_
+            test_ds_dir = os.path.join(args.test, dataset_id)
+            if not os.path.isdir(test_ds_dir):
+                test_ds_dir = os.path.join(args.test, f"test_{dataset_id}")
+            
+            if os.path.isdir(test_ds_dir):
+                out_ds_dir = os.path.join(args.output, dataset_id)
+                try:
+                    hero_metrics = process_single_dataset(test_ds_dir, gold_ds_dir, out_ds_dir, dataset_id, template_content)
+                    if hero_metrics:
+                        summary_data.append({
+                            'dataset_id': dataset_id,
+                            'f1': hero_metrics['f1'],
+                            'precision': hero_metrics['precision'],
+                            'recall': hero_metrics['recall'],
+                            'tp': hero_metrics['tp'],
+                            'fp': hero_metrics['fp'],
+                            'fn': hero_metrics['fn']
+                        })
+                except Exception as e:
+                    print(f"Error processing {dataset_id}: {e}")
+            else:
+                print(f"Skipped: {dataset_id} (Matching test directory not found)")
+        
+        # Write summary.csv
+        if summary_data:
+            summary_path = os.path.join(args.output, 'summary.csv')
+            with open(summary_path, 'w', newline='') as f:
+                writer = csv.DictWriter(f, fieldnames=['dataset_id', 'f1', 'precision', 'recall', 'tp', 'fp', 'fn'])
+                writer.writeheader()
+                writer.writerows(summary_data)
+            print(f"\nBulk run complete. Summary saved to {summary_path}")
+    else:
+        process_single_dataset(args.test, args.gold, args.output, args.dataset_id, template_content)
+        print(f"\nRecheck complete. Results saved to {args.output}")
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/agentic_import/metrics/pvmap_generator_metrics.py b/tools/agentic_import/metrics/pvmap_generator_metrics.py
new file mode 100644
index 0000000000..8312cc71d1
--- /dev/null
+++ b/tools/agentic_import/metrics/pvmap_generator_metrics.py
@@ -0,0 +1,245 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utilities to generate metrics for PV map generation for statvar imports.
+"""
+
+import os
+import sys
+import subprocess
+import tempfile
+import time
+
+from absl import app
+from absl import flags
+from absl import logging
+
+_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(_SCRIPT_DIR)
+sys.path.append(os.path.dirname(_SCRIPT_DIR))
+sys.path.append(os.path.dirname(os.path.dirname(_SCRIPT_DIR)))
+sys.path.append(
+    os.path.join(
+        os.path.dirname(os.path.dirname(os.path.dirname(_SCRIPT_DIR))), 'util'))
+sys.path.append(
+    os.path.join(os.path.dirname(os.path.dirname(_SCRIPT_DIR)),
+                 'statvar_importer'))
+
+import file_util
+from counters import Counters
+from mcf_diff import diff_mcf_files
+from mcf_file_util import get_value_list
+from stat_var_test_runner import StatVarProcessorTestRunner, run_script
+
+
+class PVMapGeneratorMetricsRunner(StatVarProcessorTestRunner):
+    """Class to generate metrics for PV map generation for statvar imports.
+
+    The metrics are generated by running the statvar processor with the test
+    config and comparing the output with the expected output.
+    The files for pvmap egenration are specified in the test config as follows:
+        "test_name": "<test name>",
+        ...
+        "pvmap_generator_test": {
+            "pvmap_generator_args": {
+                "input_data": "<path to input file>",
+                "output_data": "<path to output file>",
+            },
+            "expected_output_files": [
+                "<path to expected output file>",
+                "<path to expected output file>",
+                ...
+            ],
+        }
+
+    """
+
+    def __init__(self, test_config_file: str = None, test_config: dict = None):
+        super().__init__(test_config_file, test_config)
+
+    def run_pvmap_generator(self, test_config: dict) -> dict:
+        """Runs a single instance of pvmap generator."""
+        test_name = test_config.get('test_name', 'pvmap_generator_test')
+        test_dir = os.path.join(self._temp_dir.name, test_name)
+        output_dir = os.path.join(test_dir, 'generated')
+        logging.info(
+            f'Running pvmap generator for test {test_name}, output in {output_dir}, config: {test_config}'
+        )
+        env_dict = dict(self.get_env_dict())
+        env_dict.update(get_env_dict(test_config.get("env_file")))
+        # Build the pvmap generator commandline
+        cmd_args = dict()
+        # Add default arguments for output
+
+        cmd_args = merged_args([
+            {
+                '--output_path': output_dir
+            },
+            test_config.get('pvmap_generator_args', {}),
+        ])
+        output_arg = get_arg(cmd_args, '--output_path')
+        if output_arg:
+            output_dir = os.path.realpath(output_arg)
+            if not output_dir.endswith('/'):
+                output_dir = os.path.dirname(output_dir)
+
+        cwd = self.get_local_dir()
+        script_status = run_script(
+            interpreter=sys.executable,
+            script=os.path.join(os.path.dirname(_SCRIPT_DIR),
+                                'pvmap_generator.py'),
+            args=cmd_args,
+            cwd=cwd,
+            env=env_dict,
+            output_dir=output_dir,
+            log_dir=os.path.join(output_dir, 'debug'),
+        )
+
+        # Check for generated files
+        generated_config = {
+            'pv_map': os.path.join(output_dir, 'generated_pvmap.csv'),
+            'config_file': os.path.join(output_dir, 'generated_metadata.csv'),
+        }
+        script_status['generated_config'] = generated_config
+        for arg, file in generated_config.items():
+            if not os.path.exists(file):
+                logging.error(
+                    f'Failed to generate {file} for test: {test_name}')
+                script_status['status'] = 'FAIL'
+
+        return script_status
+
+    def run_sample_statvar_processor(self, test_config: dict,
+                                     generated_config: dict) -> dict:
+        """Run statvar processor on the sample input using the generated configs."""
+        test_name = test_config.get('test_name', 'sample_statvar_processor')
+        test_dir = os.path.join(self._temp_dir.name, test_name)
+        output_dir = os.path.join(test_dir, 'sample')
+        env_dict = dict(self.get_env_dict())
+        env_dict.update(get_env_dict(test_config.get("env_file")))
+
+        # Get commandline args for stavtar processor
+        # to use generated pvmap files.
+        cmd_args = merged_args([
+            generated_config,
+            {
+                '--output_path':
+                    output_dir,
+                '--output_data_pvs':
+                    os.path.join(output_dir, 'output_data_pvs.csv'),
+            },
+        ])
+        script_status = self.run_statvar_processor(test_config, cmd_args)
+
+        return script_status
+
+    def generate_metrics(self, test_config: dict, test_status: dict) -> dict:
+        """Generates metrics for a test config.
+
+        Args:
+          test_config: Dictionary with the test config.
+
+        Returns:
+          Dictionary with the metrics.
+        """
+        test_name = test_config.get('test_name', 'sample_statvar_processor')
+        test_dir = os.path.join(self._temp_dir.name, test_name)
+        outputs = test_config.get('pvmap_generator_outputs')
+        output_stats = self.verify_outputs(outputs, test_config,
+                                           test_status.get('output_dir'))
+        stats = {}
+        for output_name, stats in output_stats:
+            output_stats = output.get('counters', {})
+            output_stats = self.get_stats_from_diff_counters(output_stats)
+            file_util.file_write_csv_dict(
+                output_stats,
+                os.path.join(test_dir, output_name + '-diff-counters.csv'))
+            stats[output_name] = output_stats
+            logging.info(f'Diff stats for {output_name}: {output_stats}')
+
+        return stats
+
+    def get_stats_from_diff_counters(self,
+                                     diff_stats: dict,
+                                     stats: dict = None) -> dict:
+        """Returns precision and recall stats from diff counters.
+
+        Args:
+          diff_stats: diff counters for diff between actual and expected output files.
+          stats: output stats dictionary into which stats are added.
+
+        """
+        if stats is None:
+            stats = dict({
+                'false_positive': 0,
+                'false_negative': 0,
+                'true_positive': 0,
+            })
+        counters = diff_stats.get('counters', {})
+        matched = counters.get('pvs-matched', 0)
+        false_negative = counters.get('pvs-added', 0)
+        false_positive = counters.get('pvs-deleted', 0)
+        # Treat modfied as deleted+added
+        modified = counters.get('pvs-modified', 0)
+        false_negative += modified
+        false_positive += modified
+
+        stats['true_positive'] += matched
+        stats['false_positive'] += false_positive
+        stats['false_negative'] += false_negative
+
+        # Compute precision and recall
+        true_positive = stats['true_positive']
+        precision = true_positive / (max(
+            true_positive + stats['false_positive'], 1))
+        recall = true_positive / (max(true_positive + stats['false_negative'],
+                                      1))
+
+        stats['precision'] = precision
+        stats['recall'] = recall
+        stats['f1'] = 2 * (precision * recall) / max(precision + recall, 1)
+
+        return stats
+
+    def run_test(self, test_output: str = None) -> dict:
+        """Runs the pvmap generator and returns the comparison metrics.
+
+        Args:
+          test_output: JSON file with the test output status
+
+        Returns:
+          JSON dict with the test summary
+        """
+        cwd = self.get_local_dir()
+        os.chdir(cwd)
+        return_status = {'status': 'PASS'}
+        for index, test_config in enumerate(self._test_config):
+            test_status = {}
+            test_name = test_config.get('test_name')
+            if not test_name:
+                test_name = f'pvmap-generator-test-{index}'
+                basedir = os.path.basename(self.get_local_dir())
+                if basedir:
+                    test_name += '-' + basedir
+                test_config['test_name'] = test_name
+            logging.info(f'Running pvmap generator test: {index}:{test_name}')
+            pvmap_generator_status = self.run_pvmap_generator(test_config)
+            test_status['pvmap_generator_status'] = pvmap_generator_status
+
+            logging.info(
+                f'Running statvar processor for test: {index}:{test_name}')
+            statvar_status = self.run_sample_statvar_processor(
+                test_config, pvmap_generator_status.get('generated_config', {}))
+
+            test_status['statvar_processor_status'] = statvar_status
+
diff --git a/tools/agentic_import/pvmap_generator.py b/tools/agentic_import/pvmap_generator.py
index 4b3d457ae8..0ca9f67ff0 100644
--- a/tools/agentic_import/pvmap_generator.py
+++ b/tools/agentic_import/pvmap_generator.py
@@ -81,6 +81,10 @@ def _define_flags():
             'Output path prefix for all generated files (default: output/output)'
         )
 
+        flags.DEFINE_string(
+            'places_resolved_csv', None,
+            'Path to a CSV file with resolved places (optional)')
+
         flags.DEFINE_string(
             'gemini_cli', 'gemini',
             'Custom path or command to invoke Gemini CLI. '
@@ -118,6 +122,7 @@ class Config:
     skip_confirmation: bool = False
     enable_sandboxing: bool = False
     output_path: str = 'output/output'
+    places_resolved_csv: Optional[str] = None
     gemini_cli: Optional[str] = None
     working_dir: Optional[str] = None
     extra_instruction_files: List[str] = field(default_factory=list)
@@ -478,6 +483,9 @@ def _generate_prompt(self) -> Path:
             'extra_instruction_files_abs': [
                 str(path) for path in self._config.extra_instruction_files
             ] if self._config.extra_instruction_files else [],
+            'places_resolved_csv_abs':
+                str(self._resolve_path(self._config.places_resolved_csv))
+                if self._config.places_resolved_csv else "",
         }
 
         # Render template with these variables
@@ -507,6 +515,7 @@ def prepare_config() -> Config:
         skip_confirmation=_FLAGS.skip_confirmation,
         enable_sandboxing=_FLAGS.enable_sandboxing,
         output_path=_FLAGS.output_path,
+        places_resolved_csv=_FLAGS.places_resolved_csv,
         gemini_cli=_FLAGS.gemini_cli,
         working_dir=_FLAGS.working_dir,
         extra_instruction_files=_FLAGS.extra_instruction_files or [],
diff --git a/tools/agentic_import/run_statvar_processor.sh b/tools/agentic_import/run_statvar_processor.sh
index 6cddf9224f..d42ef5f16b 100755
--- a/tools/agentic_import/run_statvar_processor.sh
+++ b/tools/agentic_import/run_statvar_processor.sh
@@ -26,6 +26,7 @@ WORKING_DIR=""
 INPUT_DATA=""
 GEMINI_RUN_ID=""
 OUTPUT_PATH=""
+PLACES_RESOLVED_CSV=""
 
 while [[ $# -gt 0 ]]; do
   case $1 in
@@ -33,26 +34,58 @@ while [[ $# -gt 0 ]]; do
       PYTHON_INTERPRETER="$2"
       shift 2
       ;;
+    --python=*)
+      PYTHON_INTERPRETER="${1#*=}"
+      shift
+      ;;
     --script-dir)
       SCRIPT_DIR="$2"
       shift 2
       ;;
+    --script-dir=*)
+      SCRIPT_DIR="${1#*=}"
+      shift
+      ;;
     --working-dir)
       WORKING_DIR="$2"
       shift 2
       ;;
+    --working-dir=*)
+      WORKING_DIR="${1#*=}"
+      shift
+      ;;
     --input-data)
       INPUT_DATA="$2"
       shift 2
       ;;
+    --input-data=*)
+      INPUT_DATA="${1#*=}"
+      shift
+      ;;
     --gemini-run-id)
       GEMINI_RUN_ID="$2"
       shift 2
       ;;
+    --gemini-run-id=*)
+      GEMINI_RUN_ID="${1#*=}"
+      shift
+      ;;
     --output-path)
       OUTPUT_PATH="$2"
       shift 2
       ;;
+    --output-path=*)
+      OUTPUT_PATH="${1#*=}"
+      shift
+      ;;
+    --places_resolved_csv)
+      PLACES_RESOLVED_CSV="$2"
+      shift 2
+      ;;
+    --places_resolved_csv=*)
+      PLACES_RESOLVED_CSV="${1#*=}"
+      shift
+      ;;
     *)
       echo "Unknown option $1"
       exit 1
@@ -96,6 +129,7 @@ echo "Running statvar processor..."
   --skip_constant_csv_columns=False \
   --output_columns="${OUTPUT_COLUMNS}" \
   --output_counters="${OUTPUT_COUNTERS}" \
+  --places_resolved_csv="${PLACES_RESOLVED_CSV}" \
   --output_path="${OUTPUT_PREFIX}" > "${PROCESSOR_LOG}" 2>&1
 
 # Capture the processor exit code
diff --git a/tools/agentic_import/templates/generate_pvmap_prompt.j2 b/tools/agentic_import/templates/generate_pvmap_prompt.j2
index 81af2afc63..ae4bda42f9 100644
--- a/tools/agentic_import/templates/generate_pvmap_prompt.j2
+++ b/tools/agentic_import/templates/generate_pvmap_prompt.j2
@@ -1403,6 +1403,7 @@ For SDMX datasets, also ensure:
   --working-dir "{{working_dir_abs}}" \
   --input-data "{{input_data_abs}}" \
   --gemini-run-id "{{gemini_run_id}}" \
+  --places_resolved_csv "{{places_resolved_csv_abs}}" \
   --output-path "{{output_path_abs}}"
 ```
 
diff --git a/tools/statvar_importer/mcf_diff.py b/tools/statvar_importer/mcf_diff.py
index e7eaa825f8..d0d7dd5831 100644
--- a/tools/statvar_importer/mcf_diff.py
+++ b/tools/statvar_importer/mcf_diff.py
@@ -190,10 +190,11 @@ def diff_mcf_node_pvs(node_1: dict,
     pvs_deleted = set()
     pvs_added = set()
     pvs_modified = set()
+    pvs_matched = set()
     for d in diff:
         diff_str.append(d)
-        if d[0] == ' ':
-            counters and counters.add_counter(f'PVs-matched', 1)
+        if d[0] == ' ' and len(d) > 1:
+            counters and counters.add_counter(f'pvs-matched', 1)
         else:
             prop = ''
             value = ''
@@ -204,10 +205,16 @@ def diff_mcf_node_pvs(node_1: dict,
                 if prop:
                     pvs_deleted.add(prop)
                 has_diff = True
-            if d[0] == '+':
+            elif d[0] == '+':
                 if prop:
                     pvs_added.add(prop)
                 has_diff = True
+            elif d[0] == '?':
+                # Ignore modifications that already show as delete/add
+                has_diff = True
+            else:
+                if prop:
+                  pvs_matched.add(prop)
     if has_diff:
         pvs_modified = pvs_deleted.intersection(pvs_added)
         pvs_added = pvs_added.difference(pvs_modified)
@@ -217,6 +224,7 @@ def diff_mcf_node_pvs(node_1: dict,
                 (pvs_modified, 'modified'),
                 (pvs_added, 'added'),
                 (pvs_deleted, 'deleted'),
+                (pvs_matched, 'matched'),
             ]:
                 for prop in props:
                     counters.add_counter(f'pvs-{diff_type}', 1)
@@ -230,7 +238,9 @@ def diff_mcf_node_pvs(node_1: dict,
         else:
             counters and counters.add_counter(f'nodes-missing-in-mcf1', 1)
     else:
-        counters and counters.add_counter(f'nodes-matched', 1)
+      if counters:
+        counters.add_counter(f'nodes-matched', 1)
+        counters.add_counter(f'pvs-matched', len(pvs_matched))
     return has_diff, '\n'.join(diff_str), pvs_added, pvs_deleted, pvs_modified
 
 
@@ -389,4 +399,4 @@ def main(_):
 
 
 if __name__ == '__main__':
-    app.run(main)
+    app.run(main)
\ No newline at end of file
diff --git a/tools/statvar_importer/stat_var_processor.py b/tools/statvar_importer/stat_var_processor.py
index 487cce556f..d0184e022c 100644
--- a/tools/statvar_importer/stat_var_processor.py
+++ b/tools/statvar_importer/stat_var_processor.py
@@ -83,7 +83,7 @@
 from mcf_filter import drop_existing_mcf_nodes
 from mcf_diff import fingerprint_node, fingerprint_mcf_nodes, diff_mcf_node_pvs
 from place_resolver import PlaceResolver
-from property_value_mapper import PropertyValueMapper
+from property_value_mapper import PropertyValueMapper, write_pv_map
 from schema_resolver import SchemaResolver
 from json_to_csv import file_json_to_csv
 from schema_generator import generate_schema_nodes, generate_statvar_name
@@ -1429,6 +1429,8 @@ def __init__(
             self._config.get('numeric_data_key', 'Number'),
             self._config.get('pv_lookup_key', 'Key'),
         ]
+        # Save per input cell PVs if required.
+        self._data_pvs = {} if self._config.get('output_data_pvs') else None
 
     def generate_pvmap(self):
         """Generate a PV Map from the input data."""
@@ -2272,6 +2274,8 @@ def process_row(self, row: list, row_index: int):
                     row_index, col_index + 1) and self.process_stat_var_obs_pvs(
                         merged_col_pvs, row_index, col_index):
                 row_svobs += 1
+            if self._data_pvs is not None:
+                self._data_pvs[self._file_context] = merged_col_pvs
         self.process_row_header_pvs(row, row_index, row_col_pvs, row_svobs,
                                     cols_with_pvs)
         # If row has no SVObs but has PVs, it must be a header.
@@ -2818,6 +2822,11 @@ def write_outputs(self, output_path: str):
                 columns=self._config.get('output_columns', []),
                 output_tmcf_file=output_tmcf_file,
             )
+        output_data_pvs = self._config.get('output_data_pvs')
+        if output_data_pvs and self._data_pvs:
+            write_pv_map(self._data_pvs, output_data_pvs)
+            self._counters.add_counter('output-data-pvs', len(self._data_pvs))
+
         self._counters.print_counters()
         counters_filename = self._config.get('output_counters',
                                              output_path + '_counters.txt')
diff --git a/tools/statvar_importer/stat_var_test_runner.py b/tools/statvar_importer/stat_var_test_runner.py
new file mode 100644
index 0000000000..ed9a6fe99b
--- /dev/null
+++ b/tools/statvar_importer/stat_var_test_runner.py
@@ -0,0 +1,524 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utilities to test statvar imports.
+
+StatVar imports use a JSON test config for regression tests that invoke
+statvar processor with a specified set of configs on a test input and
+verifies the expected output files.
+The test config is a list of statvar processor command line invocation
+settings with each invocation as a dictionary with the following sections:a
+  test_name: name of the test for logs
+  env_file: a file with environment variables such as API keys
+  statvar_processor_args: dictionary with the command line arguments
+    for statvar processor configs.
+  expected_outputs:
+    A dictionary of expected output files mapped to output files.
+
+  All file references are releative to the import folder with the
+  test config.
+
+Example:
+[
+  # Parameters for a single invocation of statvar processor
+  {
+    "test_name": "Test_Import",
+
+    # Enviroment file loaded with dictionary of env settings
+    # such as API keys
+    "env_file": "gs://datcom-prod-imports/config/test_env.csv"
+
+    # Statvar processor command line arguments
+    "statvar_processor_args": {
+      "config_file": "metadata.csv",
+      "pv_map": "import_pvmap.csv",
+      "places_resolved_csv": "places.csv",
+      "input_data": "test_data/sample_input.csv",
+    },
+
+    # Statvar processor output files with expected outputs to compare with
+    "statvar_processor_outputs": [
+      {
+        "output_name": "Test_Import_Observations",
+        "output_file": "<path to output file for a local run>",
+        "expected_output_file": "test_data/sample_output.csv",
+      },
+    ]
+  }
+]
+
+"""
+
+import os
+import sys
+import subprocess
+import tempfile
+import time
+
+from absl import app
+from absl import flags
+from absl import logging
+
+_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(_SCRIPT_DIR)
+sys.path.append(os.path.dirname(_SCRIPT_DIR))
+sys.path.append(os.path.dirname(os.path.dirname(_SCRIPT_DIR)))
+sys.path.append(
+    os.path.join(os.path.dirname(os.path.dirname(_SCRIPT_DIR)), 'util'))
+
+import file_util
+from mcf_diff import diff_mcf_files
+from mcf_file_util import get_value_list
+
+from counters import Counters
+
+flags.DEFINE_string('test_config', '', 'JSON test config file')
+flags.DEFINE_string('test_output', '', 'Output JSON file with test summary')
+
+_FLAGS = flags.FLAGS
+
+_BASH_PATH = os.environ.get('BASH_PATH', 'bash')
+
+
+class StatVarProcessorTestRunner:
+    """Class to run statvar processor for a test config
+  """
+
+    def __init__(self, test_config_file: str = None, test_config: dict = None):
+        self._temp_dir = tempfile.TemporaryDirectory()
+        self._test_config = []
+        if test_config:
+            self._test_config.extend(test_config)
+        self._env_dict = {}
+        self.load_config(test_config_file)
+        self._counters = Counters()
+
+    def __del__(self):
+        """Cleanup local files."""
+        self._temp_dir.cleanup()
+
+    def load_config(self, test_config_file: str):
+        """Loads a test config from a JSON file."""
+        if test_config_file:
+            file_config = file_util.file_load_py_dict(test_config_file)
+            logging.info(f'Loading test config: {file_config}')
+            self._test_config.extend(file_config)
+            self._test_config_file = os.path.realpath(test_config_file)
+        self._env_dict.update(
+            get_env_dict(self._test_config[0].get('env_file', '')))
+
+    def get_config(self) -> dict:
+        """Returns the test config."""
+        return self._test_config
+
+    def get_local_dir(self) -> str:
+        """Returns the local directory for the test."""
+        if self._test_config_file:
+            return os.path.dirname(self._test_config_file)
+        return os.getcwd()
+
+    def get_env_dict(self) -> dict:
+        """Returns the environment variables for the test."""
+        return self._env_dict
+
+    def run_tests(self, test_output: str = None) -> dict:
+        """Runs all the statvar processor test and verified each output.
+
+        Args:
+          test_output: JSON file with the test output status
+
+        Returns:
+          JSON dict with the test summary
+        """
+        cwd = self.get_local_dir()
+        os.chdir(cwd)
+        logging.info(
+            f'Running statvar processor for {len(self._test_config)} tests from {cwd}'
+        )
+        return_status = {'status': 'PASS'}
+        for index, test_config in enumerate(self._test_config):
+            test_name = test_config.get('test_name')
+            if not test_name:
+                test_name = f'statvar-processor-test-{index}'
+                basedir = os.path.basename(self.get_local_dir())
+                if basedir:
+                    test_name += '-' + basedir
+                test_config['test_name'] = test_name
+            logging.info(f'Running statvar processor test: {index}:{test_name}')
+            test_status = self.run_statvar_processor(test_config)
+            if test_status.get('returncode', 1) != 0:
+                logging.error(
+                    f'statvar processor test failed for {test_name}: {test_status}'
+                )
+                return_status['status'] = 'FAIL'
+            else:
+                outputs = test_config.get('statvar_processor_outputs')
+                if outputs:
+                    output_status = self.verify_outputs(
+                        outputs, test_config, test_status.get('output_dir'))
+                    if output_status.get('status', '') != 'PASS':
+                        logging.error(
+                            f'Failed to verify outputs for test: {index}:{test_name}:{output_status}'
+                        )
+                        return_status['status'] = 'FAIL'
+                    test_status['output_status'] = output_status
+            return_status[test_name] = test_status
+            logging.info(
+                f'statvar processor test: {index}:{test_name}: {return_status["status"]}'
+            )
+        if test_output:
+            file_util.file_write_py_dict(return_status, test_output)
+        return return_status
+
+    def run_statvar_processor(self,
+                              config: dict,
+                              override_args: dict = {}) -> dict:
+        """Runs a single instance of statvar processor."""
+        test_name = config.get('test_name', 'statvar_processor_test')
+        test_dir = os.path.join(self._temp_dir.name, test_name)
+        output_dir = os.path.join(test_dir, 'output')
+        env_dict = dict(self.get_env_dict())
+        env_dict.update(get_env_dict(config.get("env_file")))
+        # Build the statvar processor commandline
+        cmd_args = merge_args([
+            {
+                '--output_path': '"{output_dir}"',
+                '--output_counters': '"{output_dir}/statvar_counters.json"',
+            },
+            config.get('statvar_processor_args', {}),
+            override_args,
+        ])
+        output_arg = get_arg(cmd_args, '--output_path')
+        if output_arg:
+            output_dir = os.path.realpath(output_arg)
+            if not output_dir.endswith('/'):
+                output_dir = os.path.dirname(output_dir)
+        cwd = self.get_local_dir()
+        script_status = run_script(
+            interpreter=sys.executable,
+            script=os.path.join(_SCRIPT_DIR, 'stat_var_processor.py'),
+            args=cmd_args,
+            cwd=cwd,
+            env=env_dict,
+            output_dir=output_dir,
+            log_dir=os.path.join(output_dir, 'debug'),
+        )
+        script_status['output_dir'] = output_dir
+        return script_status
+
+    def verify_outputs(self, outputs: list[dict], config: dict,
+                       output_path: dir) -> dict:
+        """Compare actual and expected outputs.
+
+        Args:
+          outputs: list of expected and actual outputs to be compared.
+            each item in the list is a dictionary:
+            {
+              output_name: (optional) name of the output for debug and logs
+              output_file: actual output file to be compared.
+                this is a local file or path relative to output_path
+              expected_output_file: path to expected file to be compared with
+              diff_config: (optional) dictionary with diff configs such as:
+                ignore_property: 'name'
+            }
+
+        Returns:
+          dictionary with the overall status as well as a list
+          of status per output.
+          {
+            'status': 'PASS' if there are no diffs else 'FAIL'
+
+            # diff summary per output
+            '<output_name>': {
+              <summary of diffs for output>
+              'diff_status': 'MATCHED' or 'DELETED' or 'MODIFIED'
+              'deleted': count of deleted nodes
+              'modified': count of modified nodes
+            },
+          }
+        """
+        logging.info(f'Comparing outputs: {outputs}')
+        return_status = {'status': 'PASS'}
+        for index, output in enumerate(outputs):
+            expected_files = file_util.file_get_matching(
+                output.get('expected_output_file'))
+            if not expected_files:
+                logging.warning(
+                    f'Ignoring config without expected file: {output}')
+                continue
+            output_name = output.get('name')
+            if not output_name:
+                output_name = f'output_{index}_{os.path.basename(expected_files[0])}'
+            if output_name in return_status:
+                output_name = output_name + '-' + str(index)
+            actual_files = output.get('output_file')
+            actuals = []
+            for file in get_value_list(actual_files):
+                matching_files = file_util.file_get_matching(
+                    os.path.join(output_path, file))
+                if not matching_files:
+                    matching_files = file_util.file_get_matching(file)
+                if matching_files:
+                    actuals.extend(matching_files)
+            if not actuals:
+                logging.error(f'Missing outputs for {output}')
+                return_status['status'] = 'ERROR'
+            diff_status = diff_files(
+                actuals, expected_files, output.get('diff_config', {}),
+                os.path.join(output_path, f'{output_name}.diff'))
+            if diff_status.get('diff_status') != 'MATCHED':
+                return_status['status'] = 'FAIL'
+                logging.error(
+                    f'Failed to match {index}:{output}, diff: {diff_status}')
+            return_status[output_name] = diff_status
+            return_status[output_name]['name'] = output_name
+        logging.info(
+            f'Verify outputs: {return_status.get("status")} for {outputs}')
+        return return_status
+
+
+def merge_args(self, args: list[dict]) -> dict:
+    """Returns a dictionary of command line args from a list of args.
+
+  Args:
+    args: list of dictionary of command line argument and valies:
+      [
+        { 'arg1': 'value1', ...},
+        { ...},
+        ...
+      }
+
+  Returns:
+    Dictionary of command line args with the value from the last arg.
+  """
+
+    if not isinstance(args, list):
+        args = list(args)
+
+    # Merge arguments keeping the last value
+    merged_args = dict()
+    for args_dict in args:
+        for arg, value in args_dict.items():
+            if value is not None and not arg.startswith('--'):
+                arg = '--' + arg
+            merged_args[arg] = value
+
+    return merged_args
+
+
+def get_args_list(self, args: dict) -> list:
+    """Returns a list of command line args."""
+    cmd_args = []
+    if isinstance(args, dict):
+        for arg, val in args.items():
+            if val:
+                cmd_args.append(f'{arg}={val}')
+            else:
+                cmd_args.append(f'{arg}')
+    elif isinstance(args, list):
+        cmd_args.extend(args)
+    else:
+        cmd_args.append(args)
+    return cmd_args
+
+
+def get_arg(self, args_dict: dict, arg: str, default=None) -> str:
+    """Returns the value of a specific arg if present."""
+    return args_dict.get(arg, default)
+
+
+def get_env_dict(filename: str) -> dict:
+    """Returns a dictionary of env variable settings from a file.
+
+  Args:
+    filename: file with the environemnt settings.
+      It can be a csv, json, txt or shell file.
+
+  Returns:
+    dictionary of env variable to values mappings.
+  """
+    env_dict = {}
+    if isinstance(filename, dict):
+        # arguent is a dictionary of env variables. Use it as is.
+        return filename
+
+    env_files = file_util.file_get_matching(filename)
+    for env_file in env_files:
+        _, file_ext = os.path.splitext(env_file)
+        if file_ext == '.sh' or file_ext == ".txt" or file_ext == '.env':
+            # File has one variable per line
+            with file_util.FileIO(env_file) as fp:
+                for line in fp.readlines():
+                    line = line.strip()
+                    if line.startswith('#'):
+                        # Ignore commented lines
+                        continue
+                    if '=' in line:
+                        env_var, value = line.split('=', 1)
+                        env_var = env_var.strip().strip('"')
+                        value = value.strip().strip('"')
+                        env_dict[env_var] = value
+        else:
+            # Assume the file is a dictionary
+            env_dict.update(file_util.file_load_py_dict(env_file))
+
+    logging.info(f'Loaded {len(env_dict)} env vars from {env_files}')
+    return env_dict
+
+
+def run_script(interpreter: str, script: str, args: list | dict, cwd: str,
+               env: dict, output_dir: str, log_dir: str) -> dict:
+    """Run a commandline script as a child process
+
+        Args:
+          interpreter: interpreter for the script such as python, bash
+          script: local or full path to the script file.
+          args: command line arguments for the script as a list
+          cwd: current directory for the script
+          env: dictionary of environment variables.
+          output_dir: fodler for output files generated by the script, if any.
+          log_dir: directory to store logs.
+
+        Returns:
+          dictionary of command and status
+        """
+    cmd_args = []
+    if interpreter:
+        cmd_args.append(interpreter)
+    else:
+        if script and script.endswith('.py'):
+            # For python scripts use the current script's python binary
+            interpreter = sys.executable
+        if script and script.endswith('.sh'):
+            # For shell scripts use bash
+            interpreter = _BASH_PATH
+    if script:
+        cmd_args.append(script)
+    if args:
+        cmd_args.extend(get_args_list(args))
+
+    logging.info(f'Running command: {cmd_args} in {cwd}, env: {env}')
+    env_dict = dict(os.environ)
+    if env:
+        env_dict.update(env)
+    start_time = time.perf_counter()
+    process = subprocess.Popen(
+        cmd_args,
+        cwd=cwd,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        env=env_dict,
+    )
+
+    # Log output continuously until the command completes.
+    stderr_file = os.path.join(log_dir, 'stderr.log')
+    stdout_file = os.path.join(log_dir, 'stdout.log')
+    with file_util.FileIO(stderr_file, 'wb') as f_err:
+        for line in process.stderr:
+            f_err.write(line)
+            logging.info(f'stderr: {line}')
+    with file_util.FileIO(stdout_file, 'wb') as f_out:
+        for line in process.stdout:
+            f_out.write(line)
+            logging.info(f'stdout: {line}')
+
+    # Wait for process to complete
+    process.wait()
+    end_time = time.perf_counter()
+    return_code = process.returncode
+    duration = end_time - start_time
+
+    logging.info(
+        f'Completed command: {cmd_args}, return code: {return_code}, time: {duration} secs, logs in: {log_dir}'
+    )
+
+    return {
+        'command': ' '.join(cmd_args),
+        'args': args,
+        'cwd': cwd,
+        'returncode': return_code,
+        'output_path': output_dir,
+        'debug': log_dir,
+        'duration': duration,
+        'stdout': stdout_file,
+        'stderr': stderr_file,
+    }
+
+
+def diff_files(actual: str,
+               expected: str,
+               diff_config: dict = {},
+               diff_output_file: str = None) -> dict:
+    """Compares actual and expected files and returns dictionary with results.
+
+  Args:
+    actual: list of actual files as csv or mcf.
+    expected: list of expected output files as csv or mcf
+    diff_config: dictionary of diff settings (refer to mcf_diff:diff_mcf_files)
+      compare_dcids: list of dcids to be compared
+      compare_nodes_with_pv: only compare nodes that have a listed propeorty:value
+      ignore_nodes_with_pv: ignore nodes with any of the property:value listed
+      compare_property: only compare listed propeorties in a node
+
+  Returns:
+      {
+        status: MATCH or DELETED or MODIFIED
+        diff_log: log file with all the text style diffs with +|- prefixes.
+        missing: count of expected nodes missing in actual
+        modified: count of nodes expected nodes with modified pvs in actual
+        sample: a sample of 100 lines of diff output
+      }
+  """
+
+    counters = Counters()
+    diff_str = diff_mcf_files(actual, expected, diff_config, counters)
+    matched = counters.get_counter('nodes-matched')
+    deleted = counters.get_counter('dcid-missing-in-nodes1')
+    added = counters.get_counter('dcid-missing-in-nodes2')
+    modified = counters.get_counter('nodes-with-diff')
+    status = 'MATCHED'
+    if modified:
+        status = 'MODIFIED'
+    if deleted:
+        status = 'DELETED'
+    return_status = {
+        'actual': actual,
+        'expected': expected,
+        'diff_status': status,
+        'missing': deleted,
+        'modified': modified,
+        'added': added,
+        'matched': matched,
+        'counters': counters.get_counters(),
+    }
+    logging.info(f'Diff summary: {return_status}')
+
+    if diff_str:
+        if diff_output_file:
+            with file_util.FileIO(diff_output_file, 'w') as diff_file:
+                diff_file.write(diff_str)
+            return_status['diff_output'] = diff_output_file
+        return_status['diff_sample'] = diff_str[:1000]
+
+    return return_status
+
+
+def main(_):
+    statvar_processor_tester = StatVarProcessorTestRunner(
+        test_config_file=_FLAGS.test_config)
+    test_result = statvar_processor_tester.run_tests(_FLAGS.test_output)
+    logging.info(f'Test result: {test_result}')
+
+
+if __name__ == '__main__':
+    app.run(main)
diff --git a/tools/statvar_importer/words_allowlist.txt b/tools/statvar_importer/words_allowlist.txt
index 1a8f28cfda..a255f5b45c 100644
--- a/tools/statvar_importer/words_allowlist.txt
+++ b/tools/statvar_importer/words_allowlist.txt
@@ -25,3 +25,5 @@ svpg
 url
 usc
 var
+antiretroviral
+antiretrovirals