diff --git a/codeflash/languages/javascript/support.py b/codeflash/languages/javascript/support.py
index 500c02839..ed1ed80e2 100644
--- a/codeflash/languages/javascript/support.py
+++ b/codeflash/languages/javascript/support.py
@@ -2068,9 +2068,9 @@ def process_generated_test_strings(
             generated_test_source, project_module_system, test_cfg.tests_project_rootdir
         )
 
-        # Add .js extensions to relative imports for ESM projects
-        # TypeScript + ESM requires explicit .js extensions even for .ts source files
-        if project_module_system == ModuleSystem.ES_MODULE:
+        # Add .js extensions to relative imports for ESM projects — but NOT for Jest,
+        # which resolves .ts imports without .js extensions via its transform/resolver.
+        if project_module_system == ModuleSystem.ES_MODULE and test_cfg.test_framework != "jest":
             from codeflash.languages.javascript.module_system import add_js_extensions_to_relative_imports
 
             generated_test_source = add_js_extensions_to_relative_imports(generated_test_source)
@@ -2270,10 +2270,13 @@ def get_module_path(self, source_file: Path, project_root: Path, tests_root: Pat
             # Use os.path.relpath to compute relative path from tests_root to source file
             rel_path = os.path.relpath(str(source_without_ext), str(tests_root_abs))
 
-            # For ESM, add .js extension (TypeScript convention)
-            # TypeScript requires imports to reference the OUTPUT file extension (.js),
-            # even when the source file is .ts. This is required for Node.js ESM resolution.
-            if module_system == ModuleSystem.ES_MODULE:
+            # For ESM, add .js extension (TypeScript convention) — but only for Vitest/native ESM.
+            # Jest resolves .ts imports without .js extensions via its transform/resolver config,
+            # so adding .js breaks Jest module resolution (Cannot find module '../foo.js').
+            from codeflash.languages.test_framework import get_js_test_framework_or_default
+
+            test_framework = get_js_test_framework_or_default()
+            if module_system == ModuleSystem.ES_MODULE and test_framework != "jest":
                 rel_path = rel_path + ".js"
                 logger.debug(
                     f"!lsp|Module path (ESM): source={source_file_abs}, tests_root={tests_root_abs}, "
@@ -2292,7 +2295,7 @@ def get_module_path(self, source_file: Path, project_root: Path, tests_root: Pat
             # For fallback, also check module system
             module_system = detect_module_system(project_root, source_file)
             path_without_ext = "../" + rel_path.with_suffix("").as_posix()
-            if module_system == ModuleSystem.ES_MODULE:
+            if module_system == ModuleSystem.ES_MODULE and test_framework != "jest":
                 return path_without_ext + ".js"
             return path_without_ext
 
diff --git a/codeflash/languages/javascript/test_runner.py b/codeflash/languages/javascript/test_runner.py
index c5e029455..4a34e3f5d 100644
--- a/codeflash/languages/javascript/test_runner.py
+++ b/codeflash/languages/javascript/test_runner.py
@@ -1104,6 +1104,9 @@ def run_jest_benchmarking_tests(
     jest_env["CODEFLASH_PERF_TARGET_DURATION_MS"] = str(target_duration_ms)
     jest_env["CODEFLASH_PERF_STABILITY_CHECK"] = "true" if stability_check else "false"
     jest_env["CODEFLASH_LOOP_INDEX"] = "1"  # Initial value for compatibility
+    # Warmup and calibration for accurate benchmarking
+    jest_env["CODEFLASH_PERF_WARMUP_ITERATIONS"] = "3"
+    jest_env["CODEFLASH_PERF_MIN_TIME_NS"] = "5000"  # 5us minimum time for calibration
 
     # Enable console output for timing markers
     # Some projects mock console.log in test setup (e.g., based on LOG_LEVEL or DEBUG)
@@ -1119,10 +1122,13 @@ def run_jest_benchmarking_tests(
     # Configure ESM support if project uses ES Modules
     _configure_esm_environment(jest_env, effective_cwd)
 
-    # Increase Node.js heap size for large TypeScript projects
+    # Increase Node.js heap size and expose GC for accurate benchmarking
     existing_node_options = jest_env.get("NODE_OPTIONS", "")
     if "--max-old-space-size" not in existing_node_options:
-        jest_env["NODE_OPTIONS"] = f"{existing_node_options} --max-old-space-size=4096".strip()
+        existing_node_options = f"{existing_node_options} --max-old-space-size=4096".strip()
+    if "--expose-gc" not in existing_node_options:
+        existing_node_options = f"{existing_node_options} --expose-gc".strip()
+    jest_env["NODE_OPTIONS"] = existing_node_options
 
     # Subprocess timeout: target_duration + 120s headroom for Jest startup
     # and TS compilation.  capturePerf's time budget governs actual looping.
diff --git a/codeflash/languages/javascript/vitest_runner.py b/codeflash/languages/javascript/vitest_runner.py
index be577a136..ac35bb3cc 100644
--- a/codeflash/languages/javascript/vitest_runner.py
+++ b/codeflash/languages/javascript/vitest_runner.py
@@ -681,6 +681,17 @@ def run_vitest_benchmarking_tests(
     vitest_env["CODEFLASH_PERF_TARGET_DURATION_MS"] = str(target_duration_ms)
     vitest_env["CODEFLASH_PERF_STABILITY_CHECK"] = "true" if stability_check else "false"
     vitest_env["CODEFLASH_LOOP_INDEX"] = "1"
+    # Warmup and calibration for accurate benchmarking
+    vitest_env["CODEFLASH_PERF_WARMUP_ITERATIONS"] = "3"
+    vitest_env["CODEFLASH_PERF_MIN_TIME_NS"] = "5000"  # 5us minimum time for calibration
+
+    # Expose GC for accurate benchmarking (allows capturePerf to force GC before timing)
+    existing_node_options = vitest_env.get("NODE_OPTIONS", "")
+    if "--expose-gc" not in existing_node_options:
+        existing_node_options = f"{existing_node_options} --expose-gc".strip()
+    if "--max-old-space-size" not in existing_node_options:
+        existing_node_options = f"{existing_node_options} --max-old-space-size=4096".strip()
+    vitest_env["NODE_OPTIONS"] = existing_node_options
 
     # Set test module for marker identification (use first test file as reference)
     if test_files:
diff --git a/packages/codeflash/runtime/capture.js b/packages/codeflash/runtime/capture.js
index 764e2ec26..49a41371a 100644
--- a/packages/codeflash/runtime/capture.js
+++ b/packages/codeflash/runtime/capture.js
@@ -71,6 +71,16 @@ function getPerfStabilityCheck() {
 function getPerfCurrentBatch() {
     return parseInt(process.env.CODEFLASH_PERF_CURRENT_BATCH || '0', 10);
 }
+// Warmup iterations to trigger V8 JIT compilation before timing
+function getPerfWarmupIterations() {
+    return parseInt(process.env.CODEFLASH_PERF_WARMUP_ITERATIONS || '3', 10);
+}
+// Minimum time in nanoseconds for calibration (5us matches Python's MIN_TIME)
+function getPerfMinTimeNs() {
+    return parseInt(process.env.CODEFLASH_PERF_MIN_TIME_NS || '5000', 10);
+}
+// Maximum iterations per round to prevent infinite loops on no-op functions
+const MAX_CALIBRATION_ITERATIONS = 1_000_000;
 
 // Stability constants (matching Python's config_consts.py)
 const STABILITY_WINDOW_SIZE = 0.35;
@@ -89,6 +99,7 @@ if (!process[PERF_STATE_KEY]) {
         invocationLoopCounts: {},  // Track loops per invocation: {invocationKey: loopCount}
         invocationRuntimes: {},    // Track runtimes per invocation for stability: {invocationKey: [runtimes]}
         stableInvocations: {},     // Invocations that have reached stability: {invocationKey: true}
+        calibrationCache: {},      // Cached calibration results per invocation: {invocationKey: {iterations, isAsync}}
     };
 }
 const sharedPerfState = process[PERF_STATE_KEY];
@@ -405,6 +416,147 @@ function shouldStopStability(runtimes, window, minWindowSize) {
     return (rMax - rMin) / rMin <= STABILITY_SPREAD_TOLERANCE;
 }
 
+/**
+ * Calibrate the number of iterations needed for accurate timing.
+ * Mirrors Python's calibrate() in benchmarking/plugin/plugin.py.
+ *
+ * Runs the function in increasing iteration counts until total time
+ * exceeds MIN_TIME_NS. This amortizes timer overhead for fast functions.
+ *
+ * @param {Function} fn - The function to calibrate
+ * @param {Array} args - Arguments to pass to fn
+ * @returns {number} - Number of iterations per round
+ */
+function calibrate(fn, args) {
+    const minTimeNs = getPerfMinTimeNs();
+    const minTimeEstimate = minTimeNs / 5;
+    let iterations = 1;
+
+    while (iterations <= MAX_CALIBRATION_ITERATIONS) {
+        const start = process.hrtime.bigint();
+        for (let i = 0; i < iterations; i++) {
+            fn(...args);
+        }
+        const duration = Number(process.hrtime.bigint() - start);
+
+        if (duration >= minTimeNs) {
+            break;
+        }
+
+        if (duration >= minTimeEstimate) {
+            iterations = Math.min(
+                Math.ceil(minTimeNs * iterations / duration),
+                MAX_CALIBRATION_ITERATIONS
+            );
+        } else {
+            iterations = Math.min(iterations * 10, MAX_CALIBRATION_ITERATIONS);
+        }
+    }
+
+    return iterations;
+}
+
+/**
+ * Async version of calibrate for Promise-returning functions.
+ * Async functions typically have higher per-call overhead, so iterations
+ * will usually be 1 (effectively single-call timing, which is correct).
+ *
+ * @param {Function} fn - The async function to calibrate
+ * @param {Array} args - Arguments to pass to fn
+ * @returns {Promise<number>} - Number of iterations per round
+ */
+async function calibrateAsync(fn, args) {
+    const minTimeNs = getPerfMinTimeNs();
+    const minTimeEstimate = minTimeNs / 5;
+    let iterations = 1;
+
+    while (iterations <= MAX_CALIBRATION_ITERATIONS) {
+        const start = process.hrtime.bigint();
+        for (let i = 0; i < iterations; i++) {
+            await fn(...args);
+        }
+        const duration = Number(process.hrtime.bigint() - start);
+
+        if (duration >= minTimeNs) {
+            break;
+        }
+
+        if (duration >= minTimeEstimate) {
+            iterations = Math.min(
+                Math.ceil(minTimeNs * iterations / duration),
+                MAX_CALIBRATION_ITERATIONS
+            );
+        } else {
+            iterations = Math.min(iterations * 10, MAX_CALIBRATION_ITERATIONS);
+        }
+    }
+
+    return iterations;
+}
+
+/**
+ * Perform warmup and calibration for a call site.
+ * Runs warmup iterations to trigger V8 JIT compilation, then calibrates
+ * the number of iterations needed for accurate timing.
+ *
+ * Results are cached in sharedPerfState.calibrationCache per invocationKey.
+ *
+ * @param {string} invocationKey - Unique key for this call site
+ * @param {Function} fn - The function to benchmark
+ * @param {Array} args - Arguments to pass to fn
+ * @returns {{iterations: number, isAsync: boolean}} - Calibration result
+ */
+function warmupAndCalibrate(invocationKey, fn, args) {
+    const cached = sharedPerfState.calibrationCache[invocationKey];
+    if (cached) return cached;
+
+    const warmupCount = getPerfWarmupIterations();
+
+    // Warmup: run function to trigger V8 JIT compilation
+    for (let i = 0; i < warmupCount; i++) {
+        const result = fn(...args);
+        // If function returns a Promise, fall through to async calibration
+        if (result instanceof Promise) {
+            // Can't do sync warmup for async functions - mark for async path
+            const asyncResult = { iterations: 1, isAsync: true };
+            sharedPerfState.calibrationCache[invocationKey] = asyncResult;
+            return asyncResult;
+        }
+    }
+
+    // Calibrate: find iteration count that exceeds MIN_TIME_NS
+    const iterations = calibrate(fn, args);
+    const result = { iterations, isAsync: false };
+    sharedPerfState.calibrationCache[invocationKey] = result;
+    return result;
+}
+
+/**
+ * Async version of warmupAndCalibrate.
+ *
+ * @param {string} invocationKey - Unique key for this call site
+ * @param {Function} fn - The async function to benchmark
+ * @param {Array} args - Arguments to pass to fn
+ * @returns {Promise<{iterations: number, isAsync: boolean}>} - Calibration result
+ */
+async function warmupAndCalibrateAsync(invocationKey, fn, args) {
+    const cached = sharedPerfState.calibrationCache[invocationKey];
+    if (cached) return cached;
+
+    const warmupCount = getPerfWarmupIterations();
+
+    // Warmup: run async function to trigger V8 JIT compilation
+    for (let i = 0; i < warmupCount; i++) {
+        await fn(...args);
+    }
+
+    // Calibrate
+    const iterations = await calibrateAsync(fn, args);
+    const result = { iterations, isAsync: true };
+    sharedPerfState.calibrationCache[invocationKey] = result;
+    return result;
+}
+
 /**
  * Get high-resolution time in nanoseconds.
  * Prefers process.hrtime.bigint() for nanosecond precision,
@@ -791,6 +943,18 @@ function capturePerf(funcName, lineId, fn, ...args) {
     // For Vitest (no loop-runner), do all loops internally in a single call
     const batchSize = hasExternalLoopRunner ? 1 : (shouldLoop ? getPerfLoopCount() : 1);
 
+    // Warmup + calibration: run once per call site to trigger JIT and determine iteration count
+    const calibrationResult = warmupAndCalibrate(invocationKey, fn, args);
+    if (calibrationResult.isAsync) {
+        // Function returns Promises - delegate entirely to async path
+        return _capturePerfAsyncFull(
+            funcName, lineId, fn, args,
+            safeModulePath, testClassName, safeTestFunctionName,
+            invocationKey, shouldLoop, batchSize, hasExternalLoopRunner
+        );
+    }
+    const iterationsPerRound = calibrationResult.iterations;
+
     // Initialize runtime tracking for this invocation if needed
     if (!sharedPerfState.invocationRuntimes[invocationKey]) {
         sharedPerfState.invocationRuntimes[invocationKey] = [];
@@ -828,26 +992,29 @@ function capturePerf(funcName, lineId, fn, ...args) {
         // Format stdout tag with current loop index
         const testStdoutTag = `${safeModulePath}:${testClassName ? testClassName + '.' : ''}${safeTestFunctionName}:${funcName}:${loopIndex}:${invocationId}`;
 
-        // Timing with nanosecond precision
+        // Print start tag
+        process.stdout.write(`!$######${testStdoutTag}######$!\n`);
+
+        // Force GC before timing to avoid GC pauses during measurement
+        if (typeof global.gc === 'function') global.gc();
+
+        // Timing with nanosecond precision and multi-iteration inner loop
         let durationNs;
         try {
-            const startTime = getTimeNs();
-            lastReturnValue = fn(...args);
-            const endTime = getTimeNs();
-            durationNs = getDurationNs(startTime, endTime);
-
-            // Handle promises - for async functions, we need to handle looping differently
-            // Since we can't use await in the sync loop, delegate to async helper
-            if (lastReturnValue instanceof Promise) {
-                // For async functions, delegate to the async looping helper
-                // Pass along all the context needed for continued looping
-                return _capturePerfAsync(
-                    funcName, lineId, fn, args,
-                    lastReturnValue, startTime, testStdoutTag,
-                    safeModulePath, testClassName, safeTestFunctionName,
-                    invocationKey, runtimes, batchSize, batchIndex,
-                    shouldLoop, getStabilityWindow
-                );
+            if (iterationsPerRound > 1) {
+                // Multi-iteration: amortize timer overhead for fast functions
+                const startTime = getTimeNs();
+                for (let i = 0; i < iterationsPerRound; i++) {
+                    lastReturnValue = fn(...args);
+                }
+                const endTime = getTimeNs();
+                durationNs = Math.round(getDurationNs(startTime, endTime) / iterationsPerRound);
+            } else {
+                // Single iteration
+                const startTime = getTimeNs();
+                lastReturnValue = fn(...args);
+                const endTime = getTimeNs();
+                durationNs = getDurationNs(startTime, endTime);
             }
 
             lastError = null;
@@ -856,7 +1023,7 @@ function capturePerf(funcName, lineId, fn, ...args) {
             lastError = e;
         }
 
-        // Print end tag with timing (use process.stdout.write to bypass test framework console interception)
+        // Print end tag with per-call timing
         process.stdout.write(`!######${testStdoutTag}:${durationNs}######!\n`);
 
         // Update shared loop counter
@@ -896,7 +1063,7 @@ function capturePerf(funcName, lineId, fn, ...args) {
  * Helper to record async timing and update state.
  * @private
  */
-function _recordAsyncTiming(startTime, testStdoutTag, durationNs, runtimes) {
+function _recordAsyncTiming(testStdoutTag, durationNs, runtimes) {
     process.stdout.write(`!######${testStdoutTag}:${durationNs}######!\n`);
     sharedPerfState.totalLoopsCompleted++;
     if (durationNs > 0) {
@@ -905,107 +1072,99 @@ function _recordAsyncTiming(startTime, testStdoutTag, durationNs, runtimes) {
 }
 
 /**
- * Async helper for capturePerf to handle async function looping.
- * This function awaits promises and continues the benchmark loop properly.
+ * Full async capturePerf path with warmup, calibration, GC control, and start markers.
+ * Called when warmupAndCalibrate detects the function returns Promises.
  *
  * @private
- * @param {string} funcName - Name of the function being benchmarked
- * @param {string} lineId - Line identifier for this capture point
- * @param {Function} fn - The async function to benchmark
- * @param {Array} args - Arguments to pass to fn
- * @param {Promise} firstPromise - The first promise that was already started
- * @param {number} firstStartTime - Start time of the first execution
- * @param {string} firstTestStdoutTag - Timing marker tag for the first execution
- * @param {string} safeModulePath - Sanitized module path
- * @param {string|null} testClassName - Test class name (if any)
- * @param {string} safeTestFunctionName - Sanitized test function name
- * @param {string} invocationKey - Unique key for this invocation
- * @param {Array<number>} runtimes - Array to collect runtimes for stability checking
- * @param {number} batchSize - Number of iterations per batch
- * @param {number} startBatchIndex - Index where async looping started
- * @param {boolean} shouldLoop - Whether to continue looping
- * @param {Function} getStabilityWindow - Function to get stability window size
- * @returns {Promise} The last return value from fn
  */
-async function _capturePerfAsync(
+async function _capturePerfAsyncFull(
     funcName, lineId, fn, args,
-    firstPromise, firstStartTime, firstTestStdoutTag,
     safeModulePath, testClassName, safeTestFunctionName,
-    invocationKey, runtimes, batchSize, startBatchIndex,
-    shouldLoop, getStabilityWindow
+    invocationKey, shouldLoop, batchSize, hasExternalLoopRunner
 ) {
-    let lastReturnValue;
-    let lastError = null;
+    // Async warmup + calibration
+    const calibrationResult = await warmupAndCalibrateAsync(invocationKey, fn, args);
+    const iterationsPerRound = calibrationResult.iterations;
 
-    // Handle the first promise that was already started
-    try {
-        lastReturnValue = await firstPromise;
-        const asyncEndTime = getTimeNs();
-        const asyncDurationNs = getDurationNs(firstStartTime, asyncEndTime);
-        _recordAsyncTiming(firstStartTime, firstTestStdoutTag, asyncDurationNs, runtimes);
-    } catch (err) {
-        const asyncEndTime = getTimeNs();
-        const asyncDurationNs = getDurationNs(firstStartTime, asyncEndTime);
-        _recordAsyncTiming(firstStartTime, firstTestStdoutTag, asyncDurationNs, runtimes);
-        lastError = err;
-        // Don't throw yet - we want to record the timing first
+    // Initialize runtime tracking
+    if (!sharedPerfState.invocationRuntimes[invocationKey]) {
+        sharedPerfState.invocationRuntimes[invocationKey] = [];
     }
+    const runtimes = sharedPerfState.invocationRuntimes[invocationKey];
+    const getStabilityWindow = () => Math.max(getPerfMinLoops(), Math.ceil(runtimes.length * STABILITY_WINDOW_SIZE));
 
-    // If first iteration failed, stop and throw
-    if (lastError) {
-        throw lastError;
-    }
+    let lastReturnValue;
+    let lastError = null;
 
-    // Continue looping for remaining iterations
-    for (let batchIndex = startBatchIndex + 1; batchIndex < batchSize; batchIndex++) {
-        // Check exit conditions before starting next iteration
-        if (shouldLoop && checkSharedTimeLimit()) {
+    for (let batchIndex = 0; batchIndex < batchSize; batchIndex++) {
+        if (!hasExternalLoopRunner && shouldLoop && checkSharedTimeLimit()) {
             break;
         }
 
-        if (getPerfStabilityCheck() && sharedPerfState.stableInvocations[invocationKey]) {
+        if (!hasExternalLoopRunner && getPerfStabilityCheck() && sharedPerfState.stableInvocations[invocationKey]) {
             break;
         }
 
-        // Get the loop index (batch number) for timing markers
         const loopIndex = getInvocationLoopIndex(invocationKey);
 
-        // Check if we've exceeded max loops for this invocation
-        const totalIterations = getTotalIterations(invocationKey);
-        if (totalIterations > getPerfLoopCount()) {
+        const totalIters = getTotalIterations(invocationKey);
+        if (!hasExternalLoopRunner && totalIters > getPerfLoopCount()) {
             break;
         }
 
-        // Generate timing marker identifiers
         const testId = `${safeModulePath}:${testClassName}:${safeTestFunctionName}:${lineId}:${loopIndex}`;
         const invocationIndex = getInvocationIndex(testId);
         const invocationId = `${lineId}_${invocationIndex}`;
         const testStdoutTag = `${safeModulePath}:${testClassName ? testClassName + '.' : ''}${safeTestFunctionName}:${funcName}:${loopIndex}:${invocationId}`;
 
-        // Execute and time the function
+        // Print start tag
+        process.stdout.write(`!$######${testStdoutTag}######$!\n`);
+
+        // Force GC before timing
+        if (typeof global.gc === 'function') global.gc();
+
+        let durationNs;
         try {
-            const startTime = getTimeNs();
-            lastReturnValue = await fn(...args);
-            const endTime = getTimeNs();
-            const durationNs = getDurationNs(startTime, endTime);
-
-            _recordAsyncTiming(startTime, testStdoutTag, durationNs, runtimes);
-
-            // Check if we've reached performance stability
-            if (getPerfStabilityCheck() && runtimes.length >= getPerfMinLoops()) {
-                const window = getStabilityWindow();
-                if (shouldStopStability(runtimes, window, getPerfMinLoops())) {
-                    sharedPerfState.stableInvocations[invocationKey] = true;
-                    break;
+            if (iterationsPerRound > 1) {
+                const startTime = getTimeNs();
+                for (let i = 0; i < iterationsPerRound; i++) {
+                    lastReturnValue = await fn(...args);
                 }
+                const endTime = getTimeNs();
+                durationNs = Math.round(getDurationNs(startTime, endTime) / iterationsPerRound);
+            } else {
+                const startTime = getTimeNs();
+                lastReturnValue = await fn(...args);
+                const endTime = getTimeNs();
+                durationNs = getDurationNs(startTime, endTime);
             }
+            lastError = null;
         } catch (e) {
+            durationNs = 0;
             lastError = e;
+        }
+
+        _recordAsyncTiming(testStdoutTag, durationNs, runtimes);
+
+        if (!hasExternalLoopRunner && getPerfStabilityCheck() && runtimes.length >= getPerfMinLoops()) {
+            const window = getStabilityWindow();
+            if (shouldStopStability(runtimes, window, getPerfMinLoops())) {
+                sharedPerfState.stableInvocations[invocationKey] = true;
+                break;
+            }
+        }
+
+        if (!hasExternalLoopRunner && lastError) {
             break;
         }
     }
 
     if (lastError) throw lastError;
+
+    if (lastReturnValue === undefined && !lastError) {
+        return await fn(...args);
+    }
+
     return lastReturnValue;
 }
 
@@ -1066,6 +1225,7 @@ function resetPerfState() {
     sharedPerfState.shouldStop = false;
     sharedPerfState.invocationRuntimes = {};
     sharedPerfState.stableInvocations = {};
+    sharedPerfState.calibrationCache = {};
 }
 
 /**
@@ -1166,4 +1326,6 @@ module.exports = {
     getPerfTargetDurationMs,
     getPerfStabilityCheck,
     getPerfCurrentBatch,
+    getPerfWarmupIterations,
+    getPerfMinTimeNs,
 };