codeflash-ai · KRRT7 · Apr 10, 2026 · Apr 10, 2026
diff --git a/tests/benchmarks/__init__.py → .codeflash/benchmarks/__init__.py b/tests/benchmarks/__init__.py → .codeflash/benchmarks/__init__.py
diff --git a/...st_benchmark_code_extract_code_context.py → ...st_benchmark_code_extract_code_context.py b/...st_benchmark_code_extract_code_context.py → ...st_benchmark_code_extract_code_context.py
diff --git a/...s/benchmarks/test_benchmark_comparator.py → ...h/benchmarks/test_benchmark_comparator.py b/...s/benchmarks/test_benchmark_comparator.py → ...h/benchmarks/test_benchmark_comparator.py
diff --git a/...rks/test_benchmark_discover_unit_tests.py → ...rks/test_benchmark_discover_unit_tests.py b/...rks/test_benchmark_discover_unit_tests.py → ...rks/test_benchmark_discover_unit_tests.py
diff --git a/...marks/test_benchmark_libcst_multi_file.py → ...marks/test_benchmark_libcst_multi_file.py b/...marks/test_benchmark_libcst_multi_file.py → ...marks/test_benchmark_libcst_multi_file.py
diff --git a/...chmarks/test_benchmark_libcst_pipeline.py → ...chmarks/test_benchmark_libcst_pipeline.py b/...chmarks/test_benchmark_libcst_pipeline.py → ...chmarks/test_benchmark_libcst_pipeline.py
diff --git a/...arks/test_benchmark_merge_test_results.py → ...arks/test_benchmark_merge_test_results.py b/...arks/test_benchmark_merge_test_results.py → ...arks/test_benchmark_merge_test_results.py
@@ -2,7 +2,7 @@
 from codeflash.verification.parse_test_output import merge_test_results
 
 
-def generate_test_invocations(count=100):
+def generate_test_invocations(count: int = 100) -> tuple[TestResults, TestResults]:
     """Generate a set number of test invocations for benchmarking."""
     test_results_xml = TestResults()
     test_results_bin = TestResults()
@@ -21,7 +21,7 @@ def generate_test_invocations(count=100):
                     function_getting_tested="sorter",
                     iteration_id=iteration_id,
                 ),
-                file_name="/tmp/tests/unittest/test_bubble_sort__perfinstrumented.py",
+                file_name="/tmp/tests/unittest/test_bubble_sort__perfinstrumented.py",  # noqa: S108
                 did_pass=True,
                 runtime=None if i % 3 == 0 else i * 100,  # Vary runtime values
                 test_framework="unittest",
@@ -42,7 +42,7 @@ def generate_test_invocations(count=100):
                     function_getting_tested="sorter",
                     iteration_id=iteration_id,
                 ),
-                file_name="/tmp/tests/unittest/test_bubble_sort__perfinstrumented.py",
+                file_name="/tmp/tests/unittest/test_bubble_sort__perfinstrumented.py",  # noqa: S108
                 did_pass=True,
                 runtime=500 + i * 20,  # Generate varying runtime values
                 test_framework="unittest",
@@ -56,12 +56,12 @@ def generate_test_invocations(count=100):
     return test_results_xml, test_results_bin
 
 
-def run_merge_benchmark(count=100):
+def run_merge_benchmark(count: int = 100) -> None:
     test_results_xml, test_results_bin = generate_test_invocations(count)
 
     # Perform the merge operation that will be benchmarked
     merge_test_results(xml_test_results=test_results_xml, bin_test_results=test_results_bin, test_framework="unittest")
 
 
-def test_benchmark_merge_test_results(benchmark):
+def test_benchmark_merge_test_results(benchmark) -> None:
     benchmark(run_merge_benchmark, 1000)  # Default to 100 test invocations
diff --git a/codeflash.code-workspace b/codeflash.code-workspace
@@ -16,7 +16,7 @@
             "tests/",
             "-vv",
             "--ignore",
-            "tests/benchmarks/"
+            ".codeflash/benchmarks/"
         ],
     },
     "launch": {

diff --git a/codeflash/cli_cmds/cli.py b/codeflash/cli_cmds/cli.py
@@ -156,7 +156,14 @@ def process_pyproject_config(args: Namespace) -> Namespace:
             raise AssertionError("--tests-root must be specified")
     assert Path(args.tests_root).is_dir(), f"--tests-root {args.tests_root} must be a valid directory"
     if args.benchmark:
-        assert args.benchmarks_root is not None, "--benchmarks-root must be specified when running with --benchmark"
+        if args.benchmarks_root is None:
+            # Auto-discover .codeflash/benchmarks/ convention
+            candidate = Path.cwd() / ".codeflash" / "benchmarks"
+            if candidate.is_dir():
+                args.benchmarks_root = str(candidate)
+            else:
+                msg = "--benchmarks-root must be specified when running with --benchmark, or .codeflash/benchmarks/ must exist"
+                raise AssertionError(msg)
         assert Path(args.benchmarks_root).is_dir(), (
             f"--benchmarks-root {args.benchmarks_root} must be a valid directory"
         )

diff --git a/codeflash/cli_cmds/cmd_compare.py b/codeflash/cli_cmds/cmd_compare.py
@@ -87,8 +87,14 @@ def run_compare(args: Namespace) -> None:
     benchmarks_root_str = pyproject_config.get("benchmarks_root")
 
     if not benchmarks_root_str:
-        logger.error("benchmarks-root must be configured in [tool.codeflash] to use compare")
-        sys.exit(1)
+        # Auto-discover .codeflash/benchmarks/ if it exists
+        candidate = project_root / ".codeflash" / "benchmarks"
+        if candidate.is_dir():
+            benchmarks_root_str = str(candidate)
+            logger.info(f"Auto-discovered benchmarks at {candidate}")
+        else:
+            logger.error("benchmarks-root must be configured in [tool.codeflash] or .codeflash/benchmarks/ must exist")
+            sys.exit(1)
 
     benchmarks_root = Path(benchmarks_root_str).resolve()
     if not benchmarks_root.is_dir():

diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py
@@ -423,7 +423,7 @@ def get_run_tmp_file(file_path: Path | str) -> Path:
         file_path = Path(file_path)
     if not hasattr(get_run_tmp_file, "tmpdir_path"):
         get_run_tmp_file.tmpdir = TemporaryDirectory(prefix="codeflash_")
-        get_run_tmp_file.tmpdir_path = Path(get_run_tmp_file.tmpdir.name)
+        get_run_tmp_file.tmpdir_path = Path(get_run_tmp_file.tmpdir.name).resolve()
     return get_run_tmp_file.tmpdir_path / file_path
 
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -354,7 +354,7 @@ __version__ = "{version}"
 # All paths are relative to this pyproject.toml's directory.
 module-root = "codeflash"
 tests-root = "tests"
-benchmarks-root = "tests/benchmarks"
+benchmarks-root = ".codeflash/benchmarks"
 ignore-paths = []
 formatter-cmds = [
     "uvx ruff check --exit-zero --fix $file",