From ec14860d29e6e6f4d1655f67e6d0c89608696050 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 07:56:02 -0500 Subject: [PATCH 1/2] Move benchmarks to .codeflash/benchmarks/ and auto-discover Move codeflash's own benchmarks to .codeflash/benchmarks/. Add auto-discovery of .codeflash/benchmarks/ in codeflash compare and benchmark mode -- when benchmarks-root is not explicitly configured, the CLI checks for .codeflash/benchmarks/ before erroring. Backwards compatible: users with existing benchmarks-root config are unaffected. Docs continue to show tests/benchmarks as the example path. --- {tests => .codeflash}/benchmarks/__init__.py | 0 .../test_benchmark_code_extract_code_context.py | 0 .../benchmarks/test_benchmark_comparator.py | 0 .../benchmarks/test_benchmark_discover_unit_tests.py | 0 .../benchmarks/test_benchmark_libcst_multi_file.py | 0 .../benchmarks/test_benchmark_libcst_pipeline.py | 0 .../benchmarks/test_benchmark_merge_test_results.py | 0 codeflash.code-workspace | 2 +- codeflash/cli_cmds/cli.py | 9 ++++++++- codeflash/cli_cmds/cmd_compare.py | 10 ++++++++-- pyproject.toml | 2 +- 11 files changed, 18 insertions(+), 5 deletions(-) rename {tests => .codeflash}/benchmarks/__init__.py (100%) rename {tests => .codeflash}/benchmarks/test_benchmark_code_extract_code_context.py (100%) rename {tests => .codeflash}/benchmarks/test_benchmark_comparator.py (100%) rename {tests => .codeflash}/benchmarks/test_benchmark_discover_unit_tests.py (100%) rename {tests => .codeflash}/benchmarks/test_benchmark_libcst_multi_file.py (100%) rename {tests => .codeflash}/benchmarks/test_benchmark_libcst_pipeline.py (100%) rename {tests => .codeflash}/benchmarks/test_benchmark_merge_test_results.py (100%) diff --git a/tests/benchmarks/__init__.py b/.codeflash/benchmarks/__init__.py similarity index 100% rename from tests/benchmarks/__init__.py rename to .codeflash/benchmarks/__init__.py diff --git a/tests/benchmarks/test_benchmark_code_extract_code_context.py b/.codeflash/benchmarks/test_benchmark_code_extract_code_context.py similarity index 100% rename from tests/benchmarks/test_benchmark_code_extract_code_context.py rename to .codeflash/benchmarks/test_benchmark_code_extract_code_context.py diff --git a/tests/benchmarks/test_benchmark_comparator.py b/.codeflash/benchmarks/test_benchmark_comparator.py similarity index 100% rename from tests/benchmarks/test_benchmark_comparator.py rename to .codeflash/benchmarks/test_benchmark_comparator.py diff --git a/tests/benchmarks/test_benchmark_discover_unit_tests.py b/.codeflash/benchmarks/test_benchmark_discover_unit_tests.py similarity index 100% rename from tests/benchmarks/test_benchmark_discover_unit_tests.py rename to .codeflash/benchmarks/test_benchmark_discover_unit_tests.py diff --git a/tests/benchmarks/test_benchmark_libcst_multi_file.py b/.codeflash/benchmarks/test_benchmark_libcst_multi_file.py similarity index 100% rename from tests/benchmarks/test_benchmark_libcst_multi_file.py rename to .codeflash/benchmarks/test_benchmark_libcst_multi_file.py diff --git a/tests/benchmarks/test_benchmark_libcst_pipeline.py b/.codeflash/benchmarks/test_benchmark_libcst_pipeline.py similarity index 100% rename from tests/benchmarks/test_benchmark_libcst_pipeline.py rename to .codeflash/benchmarks/test_benchmark_libcst_pipeline.py diff --git a/tests/benchmarks/test_benchmark_merge_test_results.py b/.codeflash/benchmarks/test_benchmark_merge_test_results.py similarity index 100% rename from tests/benchmarks/test_benchmark_merge_test_results.py rename to .codeflash/benchmarks/test_benchmark_merge_test_results.py diff --git a/codeflash.code-workspace b/codeflash.code-workspace index 67f000d35..2c9a31e22 100644 --- a/codeflash.code-workspace +++ b/codeflash.code-workspace @@ -16,7 +16,7 @@ "tests/", "-vv", "--ignore", - "tests/benchmarks/" + ".codeflash/benchmarks/" ], }, "launch": { diff --git a/codeflash/cli_cmds/cli.py b/codeflash/cli_cmds/cli.py index 400403843..2db13efe8 100644 --- a/codeflash/cli_cmds/cli.py +++ b/codeflash/cli_cmds/cli.py @@ -156,7 +156,14 @@ def process_pyproject_config(args: Namespace) -> Namespace: raise AssertionError("--tests-root must be specified") assert Path(args.tests_root).is_dir(), f"--tests-root {args.tests_root} must be a valid directory" if args.benchmark: - assert args.benchmarks_root is not None, "--benchmarks-root must be specified when running with --benchmark" + if args.benchmarks_root is None: + # Auto-discover .codeflash/benchmarks/ convention + candidate = Path.cwd() / ".codeflash" / "benchmarks" + if candidate.is_dir(): + args.benchmarks_root = str(candidate) + else: + msg = "--benchmarks-root must be specified when running with --benchmark, or .codeflash/benchmarks/ must exist" + raise AssertionError(msg) assert Path(args.benchmarks_root).is_dir(), ( f"--benchmarks-root {args.benchmarks_root} must be a valid directory" ) diff --git a/codeflash/cli_cmds/cmd_compare.py b/codeflash/cli_cmds/cmd_compare.py index 87d659fdb..fab917502 100644 --- a/codeflash/cli_cmds/cmd_compare.py +++ b/codeflash/cli_cmds/cmd_compare.py @@ -87,8 +87,14 @@ def run_compare(args: Namespace) -> None: benchmarks_root_str = pyproject_config.get("benchmarks_root") if not benchmarks_root_str: - logger.error("benchmarks-root must be configured in [tool.codeflash] to use compare") - sys.exit(1) + # Auto-discover .codeflash/benchmarks/ if it exists + candidate = project_root / ".codeflash" / "benchmarks" + if candidate.is_dir(): + benchmarks_root_str = str(candidate) + logger.info(f"Auto-discovered benchmarks at {candidate}") + else: + logger.error("benchmarks-root must be configured in [tool.codeflash] or .codeflash/benchmarks/ must exist") + sys.exit(1) benchmarks_root = Path(benchmarks_root_str).resolve() if not benchmarks_root.is_dir(): diff --git a/pyproject.toml b/pyproject.toml index 38256ebfb..7701725ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -354,7 +354,7 @@ __version__ = "{version}" # All paths are relative to this pyproject.toml's directory. module-root = "codeflash" tests-root = "tests" -benchmarks-root = "tests/benchmarks" +benchmarks-root = ".codeflash/benchmarks" ignore-paths = [] formatter-cmds = [ "uvx ruff check --exit-zero --fix $file", From 8959ead2f9b87de633cf1fd16c4b23abdd00c96f Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 08:51:10 -0500 Subject: [PATCH 2/2] fix: resolve Windows 8.3 short paths in get_run_tmp_file and fix ruff lint errors Add .resolve() to TemporaryDirectory path to expand Windows 8.3 short paths (e.g. RUNNER~1) to canonical long form, fixing test_pickle_patcher failures on Windows CI. Also add missing return type annotations and noqa suppressions for benchmark test file. --- .../benchmarks/test_benchmark_merge_test_results.py | 10 +++++----- codeflash/code_utils/code_utils.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.codeflash/benchmarks/test_benchmark_merge_test_results.py b/.codeflash/benchmarks/test_benchmark_merge_test_results.py index 9b4aaf2ca..355d1c2e8 100644 --- a/.codeflash/benchmarks/test_benchmark_merge_test_results.py +++ b/.codeflash/benchmarks/test_benchmark_merge_test_results.py @@ -2,7 +2,7 @@ from codeflash.verification.parse_test_output import merge_test_results -def generate_test_invocations(count=100): +def generate_test_invocations(count: int = 100) -> tuple[TestResults, TestResults]: """Generate a set number of test invocations for benchmarking.""" test_results_xml = TestResults() test_results_bin = TestResults() @@ -21,7 +21,7 @@ def generate_test_invocations(count=100): function_getting_tested="sorter", iteration_id=iteration_id, ), - file_name="/tmp/tests/unittest/test_bubble_sort__perfinstrumented.py", + file_name="/tmp/tests/unittest/test_bubble_sort__perfinstrumented.py", # noqa: S108 did_pass=True, runtime=None if i % 3 == 0 else i * 100, # Vary runtime values test_framework="unittest", @@ -42,7 +42,7 @@ def generate_test_invocations(count=100): function_getting_tested="sorter", iteration_id=iteration_id, ), - file_name="/tmp/tests/unittest/test_bubble_sort__perfinstrumented.py", + file_name="/tmp/tests/unittest/test_bubble_sort__perfinstrumented.py", # noqa: S108 did_pass=True, runtime=500 + i * 20, # Generate varying runtime values test_framework="unittest", @@ -56,12 +56,12 @@ def generate_test_invocations(count=100): return test_results_xml, test_results_bin -def run_merge_benchmark(count=100): +def run_merge_benchmark(count: int = 100) -> None: test_results_xml, test_results_bin = generate_test_invocations(count) # Perform the merge operation that will be benchmarked merge_test_results(xml_test_results=test_results_xml, bin_test_results=test_results_bin, test_framework="unittest") -def test_benchmark_merge_test_results(benchmark): +def test_benchmark_merge_test_results(benchmark) -> None: benchmark(run_merge_benchmark, 1000) # Default to 100 test invocations diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py index 0e374f16f..6f8b1bd85 100644 --- a/codeflash/code_utils/code_utils.py +++ b/codeflash/code_utils/code_utils.py @@ -423,7 +423,7 @@ def get_run_tmp_file(file_path: Path | str) -> Path: file_path = Path(file_path) if not hasattr(get_run_tmp_file, "tmpdir_path"): get_run_tmp_file.tmpdir = TemporaryDirectory(prefix="codeflash_") - get_run_tmp_file.tmpdir_path = Path(get_run_tmp_file.tmpdir.name) + get_run_tmp_file.tmpdir_path = Path(get_run_tmp_file.tmpdir.name).resolve() return get_run_tmp_file.tmpdir_path / file_path