From ec14860d29e6e6f4d1655f67e6d0c89608696050 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 07:56:02 -0500
Subject: [PATCH 1/2] Move benchmarks to .codeflash/benchmarks/ and
 auto-discover

Move codeflash's own benchmarks to .codeflash/benchmarks/. Add
auto-discovery of .codeflash/benchmarks/ in codeflash compare and
benchmark mode -- when benchmarks-root is not explicitly configured,
the CLI checks for .codeflash/benchmarks/ before erroring.

Backwards compatible: users with existing benchmarks-root config
are unaffected. Docs continue to show tests/benchmarks as the
example path.
---
 {tests => .codeflash}/benchmarks/__init__.py           |  0
 .../test_benchmark_code_extract_code_context.py        |  0
 .../benchmarks/test_benchmark_comparator.py            |  0
 .../benchmarks/test_benchmark_discover_unit_tests.py   |  0
 .../benchmarks/test_benchmark_libcst_multi_file.py     |  0
 .../benchmarks/test_benchmark_libcst_pipeline.py       |  0
 .../benchmarks/test_benchmark_merge_test_results.py    |  0
 codeflash.code-workspace                               |  2 +-
 codeflash/cli_cmds/cli.py                              |  9 ++++++++-
 codeflash/cli_cmds/cmd_compare.py                      | 10 ++++++++--
 pyproject.toml                                         |  2 +-
 11 files changed, 18 insertions(+), 5 deletions(-)
 rename {tests => .codeflash}/benchmarks/__init__.py (100%)
 rename {tests => .codeflash}/benchmarks/test_benchmark_code_extract_code_context.py (100%)
 rename {tests => .codeflash}/benchmarks/test_benchmark_comparator.py (100%)
 rename {tests => .codeflash}/benchmarks/test_benchmark_discover_unit_tests.py (100%)
 rename {tests => .codeflash}/benchmarks/test_benchmark_libcst_multi_file.py (100%)
 rename {tests => .codeflash}/benchmarks/test_benchmark_libcst_pipeline.py (100%)
 rename {tests => .codeflash}/benchmarks/test_benchmark_merge_test_results.py (100%)

diff --git a/tests/benchmarks/__init__.py b/.codeflash/benchmarks/__init__.py
similarity index 100%
rename from tests/benchmarks/__init__.py
rename to .codeflash/benchmarks/__init__.py
diff --git a/tests/benchmarks/test_benchmark_code_extract_code_context.py b/.codeflash/benchmarks/test_benchmark_code_extract_code_context.py
similarity index 100%
rename from tests/benchmarks/test_benchmark_code_extract_code_context.py
rename to .codeflash/benchmarks/test_benchmark_code_extract_code_context.py
diff --git a/tests/benchmarks/test_benchmark_comparator.py b/.codeflash/benchmarks/test_benchmark_comparator.py
similarity index 100%
rename from tests/benchmarks/test_benchmark_comparator.py
rename to .codeflash/benchmarks/test_benchmark_comparator.py
diff --git a/tests/benchmarks/test_benchmark_discover_unit_tests.py b/.codeflash/benchmarks/test_benchmark_discover_unit_tests.py
similarity index 100%
rename from tests/benchmarks/test_benchmark_discover_unit_tests.py
rename to .codeflash/benchmarks/test_benchmark_discover_unit_tests.py
diff --git a/tests/benchmarks/test_benchmark_libcst_multi_file.py b/.codeflash/benchmarks/test_benchmark_libcst_multi_file.py
similarity index 100%
rename from tests/benchmarks/test_benchmark_libcst_multi_file.py
rename to .codeflash/benchmarks/test_benchmark_libcst_multi_file.py
diff --git a/tests/benchmarks/test_benchmark_libcst_pipeline.py b/.codeflash/benchmarks/test_benchmark_libcst_pipeline.py
similarity index 100%
rename from tests/benchmarks/test_benchmark_libcst_pipeline.py
rename to .codeflash/benchmarks/test_benchmark_libcst_pipeline.py
diff --git a/tests/benchmarks/test_benchmark_merge_test_results.py b/.codeflash/benchmarks/test_benchmark_merge_test_results.py
similarity index 100%
rename from tests/benchmarks/test_benchmark_merge_test_results.py
rename to .codeflash/benchmarks/test_benchmark_merge_test_results.py
diff --git a/codeflash.code-workspace b/codeflash.code-workspace
index 67f000d35..2c9a31e22 100644
--- a/codeflash.code-workspace
+++ b/codeflash.code-workspace
@@ -16,7 +16,7 @@
             "tests/",
             "-vv",
             "--ignore",
-            "tests/benchmarks/"
+            ".codeflash/benchmarks/"
         ],
     },
     "launch": {
diff --git a/codeflash/cli_cmds/cli.py b/codeflash/cli_cmds/cli.py
index 400403843..2db13efe8 100644
--- a/codeflash/cli_cmds/cli.py
+++ b/codeflash/cli_cmds/cli.py
@@ -156,7 +156,14 @@ def process_pyproject_config(args: Namespace) -> Namespace:
             raise AssertionError("--tests-root must be specified")
     assert Path(args.tests_root).is_dir(), f"--tests-root {args.tests_root} must be a valid directory"
     if args.benchmark:
-        assert args.benchmarks_root is not None, "--benchmarks-root must be specified when running with --benchmark"
+        if args.benchmarks_root is None:
+            # Auto-discover .codeflash/benchmarks/ convention
+            candidate = Path.cwd() / ".codeflash" / "benchmarks"
+            if candidate.is_dir():
+                args.benchmarks_root = str(candidate)
+            else:
+                msg = "--benchmarks-root must be specified when running with --benchmark, or .codeflash/benchmarks/ must exist"
+                raise AssertionError(msg)
         assert Path(args.benchmarks_root).is_dir(), (
             f"--benchmarks-root {args.benchmarks_root} must be a valid directory"
         )
diff --git a/codeflash/cli_cmds/cmd_compare.py b/codeflash/cli_cmds/cmd_compare.py
index 87d659fdb..fab917502 100644
--- a/codeflash/cli_cmds/cmd_compare.py
+++ b/codeflash/cli_cmds/cmd_compare.py
@@ -87,8 +87,14 @@ def run_compare(args: Namespace) -> None:
     benchmarks_root_str = pyproject_config.get("benchmarks_root")
 
     if not benchmarks_root_str:
-        logger.error("benchmarks-root must be configured in [tool.codeflash] to use compare")
-        sys.exit(1)
+        # Auto-discover .codeflash/benchmarks/ if it exists
+        candidate = project_root / ".codeflash" / "benchmarks"
+        if candidate.is_dir():
+            benchmarks_root_str = str(candidate)
+            logger.info(f"Auto-discovered benchmarks at {candidate}")
+        else:
+            logger.error("benchmarks-root must be configured in [tool.codeflash] or .codeflash/benchmarks/ must exist")
+            sys.exit(1)
 
     benchmarks_root = Path(benchmarks_root_str).resolve()
     if not benchmarks_root.is_dir():
diff --git a/pyproject.toml b/pyproject.toml
index 38256ebfb..7701725ea 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -354,7 +354,7 @@ __version__ = "{version}"
 # All paths are relative to this pyproject.toml's directory.
 module-root = "codeflash"
 tests-root = "tests"
-benchmarks-root = "tests/benchmarks"
+benchmarks-root = ".codeflash/benchmarks"
 ignore-paths = []
 formatter-cmds = [
     "uvx ruff check --exit-zero --fix $file",

From 8959ead2f9b87de633cf1fd16c4b23abdd00c96f Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 08:51:10 -0500
Subject: [PATCH 2/2] fix: resolve Windows 8.3 short paths in get_run_tmp_file
 and fix ruff lint errors

Add .resolve() to TemporaryDirectory path to expand Windows 8.3 short
paths (e.g. RUNNER~1) to canonical long form, fixing test_pickle_patcher
failures on Windows CI. Also add missing return type annotations and
noqa suppressions for benchmark test file.
---
 .../benchmarks/test_benchmark_merge_test_results.py    | 10 +++++-----
 codeflash/code_utils/code_utils.py                     |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.codeflash/benchmarks/test_benchmark_merge_test_results.py b/.codeflash/benchmarks/test_benchmark_merge_test_results.py
index 9b4aaf2ca..355d1c2e8 100644
--- a/.codeflash/benchmarks/test_benchmark_merge_test_results.py
+++ b/.codeflash/benchmarks/test_benchmark_merge_test_results.py
@@ -2,7 +2,7 @@
 from codeflash.verification.parse_test_output import merge_test_results
 
 
-def generate_test_invocations(count=100):
+def generate_test_invocations(count: int = 100) -> tuple[TestResults, TestResults]:
     """Generate a set number of test invocations for benchmarking."""
     test_results_xml = TestResults()
     test_results_bin = TestResults()
@@ -21,7 +21,7 @@ def generate_test_invocations(count=100):
                     function_getting_tested="sorter",
                     iteration_id=iteration_id,
                 ),
-                file_name="/tmp/tests/unittest/test_bubble_sort__perfinstrumented.py",
+                file_name="/tmp/tests/unittest/test_bubble_sort__perfinstrumented.py",  # noqa: S108
                 did_pass=True,
                 runtime=None if i % 3 == 0 else i * 100,  # Vary runtime values
                 test_framework="unittest",
@@ -42,7 +42,7 @@ def generate_test_invocations(count=100):
                     function_getting_tested="sorter",
                     iteration_id=iteration_id,
                 ),
-                file_name="/tmp/tests/unittest/test_bubble_sort__perfinstrumented.py",
+                file_name="/tmp/tests/unittest/test_bubble_sort__perfinstrumented.py",  # noqa: S108
                 did_pass=True,
                 runtime=500 + i * 20,  # Generate varying runtime values
                 test_framework="unittest",
@@ -56,12 +56,12 @@ def generate_test_invocations(count=100):
     return test_results_xml, test_results_bin
 
 
-def run_merge_benchmark(count=100):
+def run_merge_benchmark(count: int = 100) -> None:
     test_results_xml, test_results_bin = generate_test_invocations(count)
 
     # Perform the merge operation that will be benchmarked
     merge_test_results(xml_test_results=test_results_xml, bin_test_results=test_results_bin, test_framework="unittest")
 
 
-def test_benchmark_merge_test_results(benchmark):
+def test_benchmark_merge_test_results(benchmark) -> None:
     benchmark(run_merge_benchmark, 1000)  # Default to 100 test invocations
diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py
index 0e374f16f..6f8b1bd85 100644
--- a/codeflash/code_utils/code_utils.py
+++ b/codeflash/code_utils/code_utils.py
@@ -423,7 +423,7 @@ def get_run_tmp_file(file_path: Path | str) -> Path:
         file_path = Path(file_path)
     if not hasattr(get_run_tmp_file, "tmpdir_path"):
         get_run_tmp_file.tmpdir = TemporaryDirectory(prefix="codeflash_")
-        get_run_tmp_file.tmpdir_path = Path(get_run_tmp_file.tmpdir.name)
+        get_run_tmp_file.tmpdir_path = Path(get_run_tmp_file.tmpdir.name).resolve()
     return get_run_tmp_file.tmpdir_path / file_path