ecrum19
diff --git a/‎.dockerignore‎
Lines changed: 4 additions & 4 deletions b/‎.dockerignore‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 4 deletions b/‎.gitignore‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎README.md‎
Lines changed: 16 additions & 14 deletions b/‎README.md‎
Lines changed: 16 additions & 14 deletions
diff --git a/‎test_vcf_files/test-100.vcf‎ ‎test/test_vcf_files/test-100.vcf‎test_vcf_files/test-100.vcf renamed to test/test_vcf_files/test-100.vcf b/‎test_vcf_files/test-100.vcf‎ ‎test/test_vcf_files/test-100.vcf‎test_vcf_files/test-100.vcf renamed to test/test_vcf_files/test-100.vcf
diff --git a/‎test_vcf_files/test-10k.vcf‎ ‎test/test_vcf_files/test-10k.vcf‎test_vcf_files/test-10k.vcf renamed to test/test_vcf_files/test-10k.vcf b/‎test_vcf_files/test-10k.vcf‎ ‎test/test_vcf_files/test-10k.vcf‎test_vcf_files/test-10k.vcf renamed to test/test_vcf_files/test-10k.vcf
diff --git a/‎test_vcf_files/test-1k.vcf‎ ‎test/test_vcf_files/test-1k.vcf‎test_vcf_files/test-1k.vcf renamed to test/test_vcf_files/test-1k.vcf b/‎test_vcf_files/test-1k.vcf‎ ‎test/test_vcf_files/test-1k.vcf‎test_vcf_files/test-1k.vcf renamed to test/test_vcf_files/test-1k.vcf
diff --git a/‎test/test_vcf_rdfizer_unit.py‎
Lines changed: 57 additions & 7 deletions b/‎test/test_vcf_rdfizer_unit.py‎
Lines changed: 57 additions & 7 deletions
@@ -2,10 +2,10 @@
 run_output/
 run_metrics/
 tsv_vcf_files/
-test_vcf_files/
-!test_vcf_files/test-100.vcf
-!test_vcf_files/test-1k.vcf
-!test_vcf_files/test-10k.vcf
+test/test_vcf_files/*
+!test/test_vcf_files/test-100.vcf
+!test/test_vcf_files/test-1k.vcf
+!test/test_vcf_files/test-10k.vcf
 out/
 tsv/
 *.jar
@@ -4,10 +4,10 @@ scripts/node_modules
 !time-*
 
 run_output/
-test_vcf_files/*
-!test_vcf_files/test-100.vcf
-!test_vcf_files/test-1k.vcf
-!test_vcf_files/test-10k.vcf
+test/test_vcf_files/*
+!test/test_vcf_files/test-100.vcf
+!test/test_vcf_files/test-1k.vcf
+!test/test_vcf_files/test-10k.vcf
 RMLStreamer-v2.5.0-standalone.jar
 out/
 tsv/
 
@@ -139,30 +139,32 @@ Outputs:
   - decompressed outputs (decompression mode default):
     - `./out/<sample>/<sample>.nt`
 - `./run_metrics` for logs and metrics
-  - `run_metrics/metrics.csv` includes both conversion and compression metrics per run
+  - each wrapper invocation creates a run-specific subdirectory: `run_metrics/<RUN_ID>/`
+    - example: `run_metrics/20260225T120434/`
+  - `run_metrics/<RUN_ID>/metrics.csv` includes both conversion and compression metrics for that run
     - compound-compression fields are explicit and separate from raw-RDF compression:
       - `gzip_on_hdt_*` (gzip applied to `.hdt`)
       - `brotli_on_hdt_*` (brotli applied to `.hdt`)
       - `hdt_source` (`generated` vs `existing` when reused)
   - conversion step artifacts:
-    - `run_metrics/conversion-time-<output_name>-<run_id>.txt`
-    - `run_metrics/conversion-metrics-<output_name>-<run_id>.json`
+    - `run_metrics/<RUN_ID>/conversion-time-<output_name>-<run_id>.txt`
+    - `run_metrics/<RUN_ID>/conversion-metrics-<output_name>-<run_id>.json`
   - compression step artifacts:
-    - `run_metrics/compression-time-<method>-<output_name>-<run_id>.txt`
-    - `run_metrics/compression-metrics-<output_name>-<run_id>.json`
+    - `run_metrics/<RUN_ID>/compression-time-<method>-<output_name>-<run_id>.txt`
+    - `run_metrics/<RUN_ID>/compression-metrics-<output_name>-<run_id>.json`
   - wrapper runtime artifacts:
-    - `run_metrics/wrapper_execution_times.csv` (one row per wrapper run with mode, elapsed time, status, and full-mode triple totals when available)
-  - `run_metrics/.wrapper_logs/wrapper-<timestamp>.log` stores detailed Docker/stdout/stderr command output
+    - `run_metrics/<RUN_ID>/wrapper_execution_times.csv` (one row for that run with mode, elapsed time, status, and full-mode triple totals when available)
+  - `run_metrics/<RUN_ID>/.wrapper_logs/wrapper-<run_id>.log` stores detailed Docker/stdout/stderr command output
 
 Small VCF fixtures for RDF size/inflation test runs:
-- `test_vcf_files/infl100.vcf` (100 total lines)
-- `test_vcf_files/infl1k.vcf` (1000 total lines)
-- `test_vcf_files/infl10k.vcf` (10000 total lines)
+- `test/test_vcf_files/test-100.vcf` (100 total lines)
+- `test/test_vcf_files/test-1k.vcf` (1000 total lines)
+- `test/test_vcf_files/test-10k.vcf` (10000 total lines)
 
 Example inflation check:
 ```bash
-python3 vcf_rdfizer.py --mode full --input test_vcf_files/infl1k.vcf --rdf-layout aggregate --compression none --keep-tsv --keep-rdf
-wc -l out/infl1k/infl1k.nt
+python3 vcf_rdfizer.py --mode full --input test/test_vcf_files/test-1k.vcf --rdf-layout aggregate --compression none --keep-tsv --keep-rdf
+wc -l out/test-1k/test-1k.nt
 ```
 
 ## How Dependencies Are Handled
@@ -195,7 +197,7 @@ The wrapper validates:
 - Docker runs as the host UID/GID by default to prevent root-owned output files on mounted volumes
 - If mounted output/metrics paths are not writable (e.g., stale root-owned files), the wrapper automatically attempts a one-time in-container permission repair before running
 - Raw command output is written to a hidden wrapper log file instead of printed directly to the terminal
-- A concise elapsed-time summary is printed at the end of each mode run and appended to `run_metrics/wrapper_execution_times.csv`
+- A concise elapsed-time summary is printed at the end of each mode run and written to `run_metrics/<RUN_ID>/wrapper_execution_times.csv`
 - Full mode prints triples produced per input (and total) when conversion metrics are available
 - Optional preflight storage estimate (`--estimate-size`) with a disk-space warning if the upper-bound estimate exceeds free space
 
@@ -242,7 +244,7 @@ Options:
 - `-b, --build`: force docker build
 - `-B, --no-build`: fail if image missing
 - `-n, --out-name` (default `rdf`): fallback output basename in full mode
-- `-M, --metrics` (default `./run_metrics`): metrics/log directory
+- `-M, --metrics` (default `./run_metrics`): metrics root directory (a `<RUN_ID>/` subdirectory is created per run)
 - `-c, --compression` (default `gzip,brotli,hdt`): compression methods (`gzip,brotli,hdt,hdt_gzip,hdt_brotli,none`)
 - `-k, --keep-tsv`: keep TSV intermediates (full mode)
 - `-R, --keep-rdf`: keep raw `.nt/.nq` RDF outputs after compression (full mode; default is delete)
 
@@ -61,7 +61,53 @@ def output_name_from_command(cmd):
     return None
 
 
+def latest_metrics_run_dir(metrics_root: Path) -> Path:
+    """Return the single/latest per-run metrics directory."""
+    run_dirs = sorted(
+        (
+            path
+            for path in metrics_root.iterdir()
+            if path.is_dir() and re.match(r"^\d{8}T\d{6}$", path.name)
+        ),
+        key=lambda path: path.name,
+    )
+    if not run_dirs:
+        raise AssertionError(f"No per-run metrics directories found under {metrics_root}")
+    return run_dirs[-1]
+
+
 class WrapperUnitTests(VerboseTestCase):
+    def test_print_summary_lists_all_selected_compression_sizes(self):
+        """Summary printer includes one size line per requested compression method."""
+        with tempfile.TemporaryDirectory() as td:
+            tmp_path = Path(td)
+            out_root = tmp_path / "out" / "sample"
+            out_root.mkdir(parents=True, exist_ok=True)
+            nt_path = tmp_path / "sample.nt"
+            nt_path.write_text("<s> <p> <o> .\n")
+            (out_root / "sample.hdt").write_text("hdt\n")
+            (out_root / "sample.nt.gz").write_text("gz\n")
+
+            out_buf = StringIO()
+            with redirect_stdout(out_buf):
+                vcf_rdfizer.print_nt_hdt_summary(
+                    output_root=out_root,
+                    nt_path=nt_path,
+                    hdt_path=out_root / "sample.hdt",
+                    selected_methods=["hdt", "gzip"],
+                    method_results={
+                        "hdt": {"output_size_bytes": 4, "exit_code": 0},
+                        "gzip": {"output_size_bytes": 3, "exit_code": 0},
+                    },
+                    indent="  ",
+                )
+
+            text = out_buf.getvalue()
+            self.assertIn("- HDT (.hdt):", text)
+            self.assertIn("- gzip (.nt.gz):", text)
+            self.assertIn(str(out_root / "sample.hdt"), text)
+            self.assertIn(str(out_root / "sample.nt.gz"), text)
+
     def test_update_metrics_csv_keeps_raw_and_hdt_compound_metrics_separate(self):
         """Metrics CSV keeps raw RDF gzip/brotli fields separate from gzip/brotli-on-HDT fields."""
         with tempfile.TemporaryDirectory() as td:
@@ -393,7 +439,8 @@ def fake_run(cmd, cwd=None, env=None):
 
             self.assertEqual(rc, 0)
             self.assertIn("Run time (compress mode):", out_buf.getvalue())
-            timings_csv = metrics_dir / "wrapper_execution_times.csv"
+            run_metrics_dir = latest_metrics_run_dir(metrics_dir)
+            timings_csv = run_metrics_dir / "wrapper_execution_times.csv"
             self.assertTrue(timings_csv.exists())
             with timings_csv.open() as handle:
                 rows = list(csv.DictReader(handle))
@@ -422,8 +469,9 @@ def fake_run(cmd, cwd=None, env=None):
                     sample_dir.mkdir(parents=True, exist_ok=True)
                     (sample_dir / f"{out_name}.nt").write_text("<s> <p> <o> .\n")
                     payload = {"artifacts": {"output_triples": {"TOTAL": 17}}}
-                    metrics_dir.mkdir(parents=True, exist_ok=True)
-                    (metrics_dir / f"conversion-metrics-{out_name}-{run_id}.json").write_text(
+                    run_metrics_dir = metrics_dir / run_id
+                    run_metrics_dir.mkdir(parents=True, exist_ok=True)
+                    (run_metrics_dir / f"conversion-metrics-{out_name}-{run_id}.json").write_text(
                         json.dumps(payload),
                         encoding="utf-8",
                     )
@@ -469,7 +517,8 @@ def fake_run(cmd, cwd=None, env=None):
             self.assertIn("Total triples produced (full run): 17", output)
             self.assertIn("Run time (full mode):", output)
 
-            timings_csv = metrics_dir / "wrapper_execution_times.csv"
+            run_metrics_dir = latest_metrics_run_dir(metrics_dir)
+            timings_csv = run_metrics_dir / "wrapper_execution_times.csv"
             self.assertTrue(timings_csv.exists())
             with timings_csv.open() as handle:
                 rows = list(csv.DictReader(handle))
@@ -1035,15 +1084,16 @@ def fake_run(cmd, cwd=None, env=None):
                 os.chdir(old_cwd)
 
             self.assertEqual(rc, 0)
-            metrics_csv = metrics_dir / "metrics.csv"
+            run_metrics_dir = latest_metrics_run_dir(metrics_dir)
+            metrics_csv = run_metrics_dir / "metrics.csv"
             self.assertTrue(metrics_csv.exists())
             csv_text = metrics_csv.read_text()
             self.assertIn("compression_methods", csv_text)
             self.assertIn("sample", csv_text)
             self.assertIn("hdt", csv_text)
 
-            json_files = list(metrics_dir.glob("compression-metrics-sample-*.json"))
-            time_files = list(metrics_dir.glob("compression-time-hdt-sample-*.txt"))
+            json_files = list(run_metrics_dir.glob("compression-metrics-sample-*.json"))
+            time_files = list(run_metrics_dir.glob("compression-time-hdt-sample-*.txt"))
             self.assertTrue(json_files)
             self.assertTrue(time_files)