diff --git a/codeflash/languages/javascript/support.py b/codeflash/languages/javascript/support.py index 500c02839..ed1ed80e2 100644 --- a/codeflash/languages/javascript/support.py +++ b/codeflash/languages/javascript/support.py @@ -2068,9 +2068,9 @@ def process_generated_test_strings( generated_test_source, project_module_system, test_cfg.tests_project_rootdir ) - # Add .js extensions to relative imports for ESM projects - # TypeScript + ESM requires explicit .js extensions even for .ts source files - if project_module_system == ModuleSystem.ES_MODULE: + # Add .js extensions to relative imports for ESM projects — but NOT for Jest, + # which resolves .ts imports without .js extensions via its transform/resolver. + if project_module_system == ModuleSystem.ES_MODULE and test_cfg.test_framework != "jest": from codeflash.languages.javascript.module_system import add_js_extensions_to_relative_imports generated_test_source = add_js_extensions_to_relative_imports(generated_test_source) @@ -2270,10 +2270,13 @@ def get_module_path(self, source_file: Path, project_root: Path, tests_root: Pat # Use os.path.relpath to compute relative path from tests_root to source file rel_path = os.path.relpath(str(source_without_ext), str(tests_root_abs)) - # For ESM, add .js extension (TypeScript convention) - # TypeScript requires imports to reference the OUTPUT file extension (.js), - # even when the source file is .ts. This is required for Node.js ESM resolution. - if module_system == ModuleSystem.ES_MODULE: + # For ESM, add .js extension (TypeScript convention) — but only for Vitest/native ESM. + # Jest resolves .ts imports without .js extensions via its transform/resolver config, + # so adding .js breaks Jest module resolution (Cannot find module '../foo.js'). + from codeflash.languages.test_framework import get_js_test_framework_or_default + + test_framework = get_js_test_framework_or_default() + if module_system == ModuleSystem.ES_MODULE and test_framework != "jest": rel_path = rel_path + ".js" logger.debug( f"!lsp|Module path (ESM): source={source_file_abs}, tests_root={tests_root_abs}, " @@ -2292,7 +2295,7 @@ def get_module_path(self, source_file: Path, project_root: Path, tests_root: Pat # For fallback, also check module system module_system = detect_module_system(project_root, source_file) path_without_ext = "../" + rel_path.with_suffix("").as_posix() - if module_system == ModuleSystem.ES_MODULE: + if module_system == ModuleSystem.ES_MODULE and test_framework != "jest": return path_without_ext + ".js" return path_without_ext diff --git a/codeflash/languages/javascript/test_runner.py b/codeflash/languages/javascript/test_runner.py index c5e029455..4a34e3f5d 100644 --- a/codeflash/languages/javascript/test_runner.py +++ b/codeflash/languages/javascript/test_runner.py @@ -1104,6 +1104,9 @@ def run_jest_benchmarking_tests( jest_env["CODEFLASH_PERF_TARGET_DURATION_MS"] = str(target_duration_ms) jest_env["CODEFLASH_PERF_STABILITY_CHECK"] = "true" if stability_check else "false" jest_env["CODEFLASH_LOOP_INDEX"] = "1" # Initial value for compatibility + # Warmup and calibration for accurate benchmarking + jest_env["CODEFLASH_PERF_WARMUP_ITERATIONS"] = "3" + jest_env["CODEFLASH_PERF_MIN_TIME_NS"] = "5000" # 5us minimum time for calibration # Enable console output for timing markers # Some projects mock console.log in test setup (e.g., based on LOG_LEVEL or DEBUG) @@ -1119,10 +1122,13 @@ def run_jest_benchmarking_tests( # Configure ESM support if project uses ES Modules _configure_esm_environment(jest_env, effective_cwd) - # Increase Node.js heap size for large TypeScript projects + # Increase Node.js heap size and expose GC for accurate benchmarking existing_node_options = jest_env.get("NODE_OPTIONS", "") if "--max-old-space-size" not in existing_node_options: - jest_env["NODE_OPTIONS"] = f"{existing_node_options} --max-old-space-size=4096".strip() + existing_node_options = f"{existing_node_options} --max-old-space-size=4096".strip() + if "--expose-gc" not in existing_node_options: + existing_node_options = f"{existing_node_options} --expose-gc".strip() + jest_env["NODE_OPTIONS"] = existing_node_options # Subprocess timeout: target_duration + 120s headroom for Jest startup # and TS compilation. capturePerf's time budget governs actual looping. diff --git a/codeflash/languages/javascript/vitest_runner.py b/codeflash/languages/javascript/vitest_runner.py index be577a136..ac35bb3cc 100644 --- a/codeflash/languages/javascript/vitest_runner.py +++ b/codeflash/languages/javascript/vitest_runner.py @@ -681,6 +681,17 @@ def run_vitest_benchmarking_tests( vitest_env["CODEFLASH_PERF_TARGET_DURATION_MS"] = str(target_duration_ms) vitest_env["CODEFLASH_PERF_STABILITY_CHECK"] = "true" if stability_check else "false" vitest_env["CODEFLASH_LOOP_INDEX"] = "1" + # Warmup and calibration for accurate benchmarking + vitest_env["CODEFLASH_PERF_WARMUP_ITERATIONS"] = "3" + vitest_env["CODEFLASH_PERF_MIN_TIME_NS"] = "5000" # 5us minimum time for calibration + + # Expose GC for accurate benchmarking (allows capturePerf to force GC before timing) + existing_node_options = vitest_env.get("NODE_OPTIONS", "") + if "--expose-gc" not in existing_node_options: + existing_node_options = f"{existing_node_options} --expose-gc".strip() + if "--max-old-space-size" not in existing_node_options: + existing_node_options = f"{existing_node_options} --max-old-space-size=4096".strip() + vitest_env["NODE_OPTIONS"] = existing_node_options # Set test module for marker identification (use first test file as reference) if test_files: diff --git a/packages/codeflash/runtime/capture.js b/packages/codeflash/runtime/capture.js index 764e2ec26..49a41371a 100644 --- a/packages/codeflash/runtime/capture.js +++ b/packages/codeflash/runtime/capture.js @@ -71,6 +71,16 @@ function getPerfStabilityCheck() { function getPerfCurrentBatch() { return parseInt(process.env.CODEFLASH_PERF_CURRENT_BATCH || '0', 10); } +// Warmup iterations to trigger V8 JIT compilation before timing +function getPerfWarmupIterations() { + return parseInt(process.env.CODEFLASH_PERF_WARMUP_ITERATIONS || '3', 10); +} +// Minimum time in nanoseconds for calibration (5us matches Python's MIN_TIME) +function getPerfMinTimeNs() { + return parseInt(process.env.CODEFLASH_PERF_MIN_TIME_NS || '5000', 10); +} +// Maximum iterations per round to prevent infinite loops on no-op functions +const MAX_CALIBRATION_ITERATIONS = 1_000_000; // Stability constants (matching Python's config_consts.py) const STABILITY_WINDOW_SIZE = 0.35; @@ -89,6 +99,7 @@ if (!process[PERF_STATE_KEY]) { invocationLoopCounts: {}, // Track loops per invocation: {invocationKey: loopCount} invocationRuntimes: {}, // Track runtimes per invocation for stability: {invocationKey: [runtimes]} stableInvocations: {}, // Invocations that have reached stability: {invocationKey: true} + calibrationCache: {}, // Cached calibration results per invocation: {invocationKey: {iterations, isAsync}} }; } const sharedPerfState = process[PERF_STATE_KEY]; @@ -405,6 +416,147 @@ function shouldStopStability(runtimes, window, minWindowSize) { return (rMax - rMin) / rMin <= STABILITY_SPREAD_TOLERANCE; } +/** + * Calibrate the number of iterations needed for accurate timing. + * Mirrors Python's calibrate() in benchmarking/plugin/plugin.py. + * + * Runs the function in increasing iteration counts until total time + * exceeds MIN_TIME_NS. This amortizes timer overhead for fast functions. + * + * @param {Function} fn - The function to calibrate + * @param {Array} args - Arguments to pass to fn + * @returns {number} - Number of iterations per round + */ +function calibrate(fn, args) { + const minTimeNs = getPerfMinTimeNs(); + const minTimeEstimate = minTimeNs / 5; + let iterations = 1; + + while (iterations <= MAX_CALIBRATION_ITERATIONS) { + const start = process.hrtime.bigint(); + for (let i = 0; i < iterations; i++) { + fn(...args); + } + const duration = Number(process.hrtime.bigint() - start); + + if (duration >= minTimeNs) { + break; + } + + if (duration >= minTimeEstimate) { + iterations = Math.min( + Math.ceil(minTimeNs * iterations / duration), + MAX_CALIBRATION_ITERATIONS + ); + } else { + iterations = Math.min(iterations * 10, MAX_CALIBRATION_ITERATIONS); + } + } + + return iterations; +} + +/** + * Async version of calibrate for Promise-returning functions. + * Async functions typically have higher per-call overhead, so iterations + * will usually be 1 (effectively single-call timing, which is correct). + * + * @param {Function} fn - The async function to calibrate + * @param {Array} args - Arguments to pass to fn + * @returns {Promise} - Number of iterations per round + */ +async function calibrateAsync(fn, args) { + const minTimeNs = getPerfMinTimeNs(); + const minTimeEstimate = minTimeNs / 5; + let iterations = 1; + + while (iterations <= MAX_CALIBRATION_ITERATIONS) { + const start = process.hrtime.bigint(); + for (let i = 0; i < iterations; i++) { + await fn(...args); + } + const duration = Number(process.hrtime.bigint() - start); + + if (duration >= minTimeNs) { + break; + } + + if (duration >= minTimeEstimate) { + iterations = Math.min( + Math.ceil(minTimeNs * iterations / duration), + MAX_CALIBRATION_ITERATIONS + ); + } else { + iterations = Math.min(iterations * 10, MAX_CALIBRATION_ITERATIONS); + } + } + + return iterations; +} + +/** + * Perform warmup and calibration for a call site. + * Runs warmup iterations to trigger V8 JIT compilation, then calibrates + * the number of iterations needed for accurate timing. + * + * Results are cached in sharedPerfState.calibrationCache per invocationKey. + * + * @param {string} invocationKey - Unique key for this call site + * @param {Function} fn - The function to benchmark + * @param {Array} args - Arguments to pass to fn + * @returns {{iterations: number, isAsync: boolean}} - Calibration result + */ +function warmupAndCalibrate(invocationKey, fn, args) { + const cached = sharedPerfState.calibrationCache[invocationKey]; + if (cached) return cached; + + const warmupCount = getPerfWarmupIterations(); + + // Warmup: run function to trigger V8 JIT compilation + for (let i = 0; i < warmupCount; i++) { + const result = fn(...args); + // If function returns a Promise, fall through to async calibration + if (result instanceof Promise) { + // Can't do sync warmup for async functions - mark for async path + const asyncResult = { iterations: 1, isAsync: true }; + sharedPerfState.calibrationCache[invocationKey] = asyncResult; + return asyncResult; + } + } + + // Calibrate: find iteration count that exceeds MIN_TIME_NS + const iterations = calibrate(fn, args); + const result = { iterations, isAsync: false }; + sharedPerfState.calibrationCache[invocationKey] = result; + return result; +} + +/** + * Async version of warmupAndCalibrate. + * + * @param {string} invocationKey - Unique key for this call site + * @param {Function} fn - The async function to benchmark + * @param {Array} args - Arguments to pass to fn + * @returns {Promise<{iterations: number, isAsync: boolean}>} - Calibration result + */ +async function warmupAndCalibrateAsync(invocationKey, fn, args) { + const cached = sharedPerfState.calibrationCache[invocationKey]; + if (cached) return cached; + + const warmupCount = getPerfWarmupIterations(); + + // Warmup: run async function to trigger V8 JIT compilation + for (let i = 0; i < warmupCount; i++) { + await fn(...args); + } + + // Calibrate + const iterations = await calibrateAsync(fn, args); + const result = { iterations, isAsync: true }; + sharedPerfState.calibrationCache[invocationKey] = result; + return result; +} + /** * Get high-resolution time in nanoseconds. * Prefers process.hrtime.bigint() for nanosecond precision, @@ -791,6 +943,18 @@ function capturePerf(funcName, lineId, fn, ...args) { // For Vitest (no loop-runner), do all loops internally in a single call const batchSize = hasExternalLoopRunner ? 1 : (shouldLoop ? getPerfLoopCount() : 1); + // Warmup + calibration: run once per call site to trigger JIT and determine iteration count + const calibrationResult = warmupAndCalibrate(invocationKey, fn, args); + if (calibrationResult.isAsync) { + // Function returns Promises - delegate entirely to async path + return _capturePerfAsyncFull( + funcName, lineId, fn, args, + safeModulePath, testClassName, safeTestFunctionName, + invocationKey, shouldLoop, batchSize, hasExternalLoopRunner + ); + } + const iterationsPerRound = calibrationResult.iterations; + // Initialize runtime tracking for this invocation if needed if (!sharedPerfState.invocationRuntimes[invocationKey]) { sharedPerfState.invocationRuntimes[invocationKey] = []; @@ -828,26 +992,29 @@ function capturePerf(funcName, lineId, fn, ...args) { // Format stdout tag with current loop index const testStdoutTag = `${safeModulePath}:${testClassName ? testClassName + '.' : ''}${safeTestFunctionName}:${funcName}:${loopIndex}:${invocationId}`; - // Timing with nanosecond precision + // Print start tag + process.stdout.write(`!$######${testStdoutTag}######$!\n`); + + // Force GC before timing to avoid GC pauses during measurement + if (typeof global.gc === 'function') global.gc(); + + // Timing with nanosecond precision and multi-iteration inner loop let durationNs; try { - const startTime = getTimeNs(); - lastReturnValue = fn(...args); - const endTime = getTimeNs(); - durationNs = getDurationNs(startTime, endTime); - - // Handle promises - for async functions, we need to handle looping differently - // Since we can't use await in the sync loop, delegate to async helper - if (lastReturnValue instanceof Promise) { - // For async functions, delegate to the async looping helper - // Pass along all the context needed for continued looping - return _capturePerfAsync( - funcName, lineId, fn, args, - lastReturnValue, startTime, testStdoutTag, - safeModulePath, testClassName, safeTestFunctionName, - invocationKey, runtimes, batchSize, batchIndex, - shouldLoop, getStabilityWindow - ); + if (iterationsPerRound > 1) { + // Multi-iteration: amortize timer overhead for fast functions + const startTime = getTimeNs(); + for (let i = 0; i < iterationsPerRound; i++) { + lastReturnValue = fn(...args); + } + const endTime = getTimeNs(); + durationNs = Math.round(getDurationNs(startTime, endTime) / iterationsPerRound); + } else { + // Single iteration + const startTime = getTimeNs(); + lastReturnValue = fn(...args); + const endTime = getTimeNs(); + durationNs = getDurationNs(startTime, endTime); } lastError = null; @@ -856,7 +1023,7 @@ function capturePerf(funcName, lineId, fn, ...args) { lastError = e; } - // Print end tag with timing (use process.stdout.write to bypass test framework console interception) + // Print end tag with per-call timing process.stdout.write(`!######${testStdoutTag}:${durationNs}######!\n`); // Update shared loop counter @@ -896,7 +1063,7 @@ function capturePerf(funcName, lineId, fn, ...args) { * Helper to record async timing and update state. * @private */ -function _recordAsyncTiming(startTime, testStdoutTag, durationNs, runtimes) { +function _recordAsyncTiming(testStdoutTag, durationNs, runtimes) { process.stdout.write(`!######${testStdoutTag}:${durationNs}######!\n`); sharedPerfState.totalLoopsCompleted++; if (durationNs > 0) { @@ -905,107 +1072,99 @@ function _recordAsyncTiming(startTime, testStdoutTag, durationNs, runtimes) { } /** - * Async helper for capturePerf to handle async function looping. - * This function awaits promises and continues the benchmark loop properly. + * Full async capturePerf path with warmup, calibration, GC control, and start markers. + * Called when warmupAndCalibrate detects the function returns Promises. * * @private - * @param {string} funcName - Name of the function being benchmarked - * @param {string} lineId - Line identifier for this capture point - * @param {Function} fn - The async function to benchmark - * @param {Array} args - Arguments to pass to fn - * @param {Promise} firstPromise - The first promise that was already started - * @param {number} firstStartTime - Start time of the first execution - * @param {string} firstTestStdoutTag - Timing marker tag for the first execution - * @param {string} safeModulePath - Sanitized module path - * @param {string|null} testClassName - Test class name (if any) - * @param {string} safeTestFunctionName - Sanitized test function name - * @param {string} invocationKey - Unique key for this invocation - * @param {Array} runtimes - Array to collect runtimes for stability checking - * @param {number} batchSize - Number of iterations per batch - * @param {number} startBatchIndex - Index where async looping started - * @param {boolean} shouldLoop - Whether to continue looping - * @param {Function} getStabilityWindow - Function to get stability window size - * @returns {Promise} The last return value from fn */ -async function _capturePerfAsync( +async function _capturePerfAsyncFull( funcName, lineId, fn, args, - firstPromise, firstStartTime, firstTestStdoutTag, safeModulePath, testClassName, safeTestFunctionName, - invocationKey, runtimes, batchSize, startBatchIndex, - shouldLoop, getStabilityWindow + invocationKey, shouldLoop, batchSize, hasExternalLoopRunner ) { - let lastReturnValue; - let lastError = null; + // Async warmup + calibration + const calibrationResult = await warmupAndCalibrateAsync(invocationKey, fn, args); + const iterationsPerRound = calibrationResult.iterations; - // Handle the first promise that was already started - try { - lastReturnValue = await firstPromise; - const asyncEndTime = getTimeNs(); - const asyncDurationNs = getDurationNs(firstStartTime, asyncEndTime); - _recordAsyncTiming(firstStartTime, firstTestStdoutTag, asyncDurationNs, runtimes); - } catch (err) { - const asyncEndTime = getTimeNs(); - const asyncDurationNs = getDurationNs(firstStartTime, asyncEndTime); - _recordAsyncTiming(firstStartTime, firstTestStdoutTag, asyncDurationNs, runtimes); - lastError = err; - // Don't throw yet - we want to record the timing first + // Initialize runtime tracking + if (!sharedPerfState.invocationRuntimes[invocationKey]) { + sharedPerfState.invocationRuntimes[invocationKey] = []; } + const runtimes = sharedPerfState.invocationRuntimes[invocationKey]; + const getStabilityWindow = () => Math.max(getPerfMinLoops(), Math.ceil(runtimes.length * STABILITY_WINDOW_SIZE)); - // If first iteration failed, stop and throw - if (lastError) { - throw lastError; - } + let lastReturnValue; + let lastError = null; - // Continue looping for remaining iterations - for (let batchIndex = startBatchIndex + 1; batchIndex < batchSize; batchIndex++) { - // Check exit conditions before starting next iteration - if (shouldLoop && checkSharedTimeLimit()) { + for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) { + if (!hasExternalLoopRunner && shouldLoop && checkSharedTimeLimit()) { break; } - if (getPerfStabilityCheck() && sharedPerfState.stableInvocations[invocationKey]) { + if (!hasExternalLoopRunner && getPerfStabilityCheck() && sharedPerfState.stableInvocations[invocationKey]) { break; } - // Get the loop index (batch number) for timing markers const loopIndex = getInvocationLoopIndex(invocationKey); - // Check if we've exceeded max loops for this invocation - const totalIterations = getTotalIterations(invocationKey); - if (totalIterations > getPerfLoopCount()) { + const totalIters = getTotalIterations(invocationKey); + if (!hasExternalLoopRunner && totalIters > getPerfLoopCount()) { break; } - // Generate timing marker identifiers const testId = `${safeModulePath}:${testClassName}:${safeTestFunctionName}:${lineId}:${loopIndex}`; const invocationIndex = getInvocationIndex(testId); const invocationId = `${lineId}_${invocationIndex}`; const testStdoutTag = `${safeModulePath}:${testClassName ? testClassName + '.' : ''}${safeTestFunctionName}:${funcName}:${loopIndex}:${invocationId}`; - // Execute and time the function + // Print start tag + process.stdout.write(`!$######${testStdoutTag}######$!\n`); + + // Force GC before timing + if (typeof global.gc === 'function') global.gc(); + + let durationNs; try { - const startTime = getTimeNs(); - lastReturnValue = await fn(...args); - const endTime = getTimeNs(); - const durationNs = getDurationNs(startTime, endTime); - - _recordAsyncTiming(startTime, testStdoutTag, durationNs, runtimes); - - // Check if we've reached performance stability - if (getPerfStabilityCheck() && runtimes.length >= getPerfMinLoops()) { - const window = getStabilityWindow(); - if (shouldStopStability(runtimes, window, getPerfMinLoops())) { - sharedPerfState.stableInvocations[invocationKey] = true; - break; + if (iterationsPerRound > 1) { + const startTime = getTimeNs(); + for (let i = 0; i < iterationsPerRound; i++) { + lastReturnValue = await fn(...args); } + const endTime = getTimeNs(); + durationNs = Math.round(getDurationNs(startTime, endTime) / iterationsPerRound); + } else { + const startTime = getTimeNs(); + lastReturnValue = await fn(...args); + const endTime = getTimeNs(); + durationNs = getDurationNs(startTime, endTime); } + lastError = null; } catch (e) { + durationNs = 0; lastError = e; + } + + _recordAsyncTiming(testStdoutTag, durationNs, runtimes); + + if (!hasExternalLoopRunner && getPerfStabilityCheck() && runtimes.length >= getPerfMinLoops()) { + const window = getStabilityWindow(); + if (shouldStopStability(runtimes, window, getPerfMinLoops())) { + sharedPerfState.stableInvocations[invocationKey] = true; + break; + } + } + + if (!hasExternalLoopRunner && lastError) { break; } } if (lastError) throw lastError; + + if (lastReturnValue === undefined && !lastError) { + return await fn(...args); + } + return lastReturnValue; } @@ -1066,6 +1225,7 @@ function resetPerfState() { sharedPerfState.shouldStop = false; sharedPerfState.invocationRuntimes = {}; sharedPerfState.stableInvocations = {}; + sharedPerfState.calibrationCache = {}; } /** @@ -1166,4 +1326,6 @@ module.exports = { getPerfTargetDurationMs, getPerfStabilityCheck, getPerfCurrentBatch, + getPerfWarmupIterations, + getPerfMinTimeNs, };