From 2b0f633c0fc936b3338f21c8228648eabe03884e Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 03:04:29 -0500
Subject: [PATCH 01/23] perf: reduce java-tracer E2E from ~75 min to ~15 min
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove filterEvens and instanceMethod from the Workload fixture (4→2
functions) and reduce main() loop from 1000→100 rounds. The E2E test
only needs to verify the tracer→optimizer pipeline works end-to-end;
it doesn't need 4 functions or 1604 replay tests to prove that.

Expected impact: ~2 functions × ~8 candidates × fewer replay tests
should bring the job from ~75 min down to ~10-15 min.
---
 .../src/main/java/com/example/Workload.java   | 34 ++-----------------
 1 file changed, 2 insertions(+), 32 deletions(-)
diff --git a/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java
index 7beb2a4ea..7c46668d5 100644
--- a/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java
+++ b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java
@@ -1,8 +1,5 @@
 package com.example;
 
-import java.util.ArrayList;
-import java.util.List;
-
 public class Workload {
 
     public static int computeSum(int n) {
@@ -21,46 +18,19 @@ public static String repeatString(String s, int count) {
         return result;
     }
 
-    public static List<Integer> filterEvens(List<Integer> numbers) {
-        List<Integer> result = new ArrayList<>();
-        for (int n : numbers) {
-            if (n % 2 == 0) {
-                result.add(n);
-            }
-        }
-        return result;
-    }
-
-    public int instanceMethod(int x, int y) {
-        return x * y + computeSum(x);
-    }
-
     public static void main(String[] args) {
         // Run methods with large inputs so JFR can capture CPU samples.
         // Small inputs finish too fast (<1ms) for JFR's 10ms sampling interval.
-        for (int round = 0; round < 1000; round++) {
+        // 100 rounds is enough for JFR to collect ~10 samples per function.
+        for (int round = 0; round < 100; round++) {
             computeSum(100_000);
             repeatString("hello world ", 1000);
-
-            List<Integer> nums = new ArrayList<>();
-            for (int i = 1; i <= 10_000; i++) nums.add(i);
-            filterEvens(nums);
-
-            Workload w = new Workload();
-            w.instanceMethod(100_000, 42);
         }
 
         // Also call with small inputs for variety in traced args
         System.out.println("computeSum(100) = " + computeSum(100));
         System.out.println("repeatString(\"ab\", 3) = " + repeatString("ab", 3));
 
-        List<Integer> small = new ArrayList<>();
-        for (int i = 1; i <= 10; i++) small.add(i);
-        System.out.println("filterEvens(1..10) = " + filterEvens(small));
-
-        Workload w = new Workload();
-        System.out.println("instanceMethod(5, 3) = " + w.instanceMethod(5, 3));
-
         System.out.println("Workload complete.");
     }
 }

From 21f61ec93d837f6c919e0d008cf037c8989047a7 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 03:08:03 -0500
Subject: [PATCH 02/23] ci: add java_tracer_e2e fixture path to e2e_java change
 detection

The fixture directory wasn't in the path filter, so changes to
Workload.java didn't trigger the java E2E tests.
---
 .github/workflows/ci.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 63c83149f..368459608 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -95,7 +95,8 @@ jobs:
             'codeflash/languages/java/' 'codeflash/languages/base.py' \
             'codeflash/languages/registry.py' 'codeflash/optimization/' \
             'codeflash/verification/' 'codeflash-java-runtime/' \
-            'code_to_optimize/java/' 'tests/scripts/end_to_end_test_java*'
+            'code_to_optimize/java/' 'tests/scripts/end_to_end_test_java*' \
+            'tests/test_languages/fixtures/java_tracer_e2e/'
         env:
           MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
 

From 46957e190f0490e1480dcd9a2d4985835f7c27ec Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 03:17:46 -0500
Subject: [PATCH 03/23] fix: update java tracer unit tests for reduced Workload
 fixture

Remove assertions for filterEvens and instanceMethod which were removed
from the Workload fixture. Adjust expected invocation counts accordingly.
---
 tests/test_languages/test_java/test_java_tracer_e2e.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/tests/test_languages/test_java/test_java_tracer_e2e.py b/tests/test_languages/test_java/test_java_tracer_e2e.py
index 157f23eb6..054b934f7 100644
--- a/tests/test_languages/test_java/test_java_tracer_e2e.py
+++ b/tests/test_languages/test_java/test_java_tracer_e2e.py
@@ -81,14 +81,12 @@ def test_agent_captures_invocations(self, compiled_workload: Path, trace_db: Pat
         conn = sqlite3.connect(str(trace_db))
         try:
             rows = conn.execute("SELECT function, classname, descriptor, length(args) FROM function_calls").fetchall()
-            assert len(rows) >= 5, f"Expected at least 5 captured invocations, got {len(rows)}"
+            assert len(rows) >= 3, f"Expected at least 3 captured invocations, got {len(rows)}"
 
             # Check that specific methods were captured
             functions = {row[0] for row in rows}
             assert "computeSum" in functions
             assert "repeatString" in functions
-            assert "filterEvens" in functions
-            assert "instanceMethod" in functions
 
             # Verify all rows have non-empty args blobs
             for row in rows:
@@ -97,7 +95,7 @@ def test_agent_captures_invocations(self, compiled_workload: Path, trace_db: Pat
             # Verify metadata
             metadata = dict(conn.execute("SELECT key, value FROM metadata").fetchall())
             assert "totalCaptures" in metadata
-            assert int(metadata["totalCaptures"]) >= 5
+            assert int(metadata["totalCaptures"]) >= 3
         finally:
             conn.close()
 
@@ -136,7 +134,7 @@ def test_max_function_count_limit(self, compiled_workload: Path, trace_db: Path)
 
         conn = sqlite3.connect(str(trace_db))
         try:
-            # computeSum is called 4 times (2 direct + 2 from instanceMethod)
+            # computeSum is called 2 times (direct calls in main)
             compute_count = conn.execute(
                 "SELECT COUNT(*) FROM function_calls WHERE function = 'computeSum'"
             ).fetchone()[0]
@@ -296,7 +294,7 @@ def test_full_trace_and_replay_generation(self, compiled_workload: Path, tmp_pat
         assert len(workload_files) == 1
         content = workload_files[0].read_text(encoding="utf-8")
         assert "replay_computeSum" in content
-        assert "replay_instanceMethod" in content
+        assert "replay_repeatString" in content
 
     def test_package_detection(self) -> None:
         """Test that package detection finds Java packages from source files."""

From 08aa94c54ac74d07a5265c0871da5b0da4dd67ca Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 03:44:54 -0500
Subject: [PATCH 04/23] perf: reduce java-tracer E2E to single function for ~11
 min target
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop repeatString from the Workload fixture (2→1 function).
computeSum alone exercises the full tracer→optimizer pipeline
(trace → replay tests → optimize → evaluate → rank → explain → review).
The second function added no additional pipeline coverage.
---
 .../src/main/java/com/example/Workload.java         | 13 +------------
 .../test_java/test_java_tracer_e2e.py               |  6 ++----
 2 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java
index 7c46668d5..ff0ae4d8a 100644
--- a/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java
+++ b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java
@@ -10,26 +10,15 @@ public static int computeSum(int n) {
         return sum;
     }
 
-    public static String repeatString(String s, int count) {
-        String result = "";
-        for (int i = 0; i < count; i++) {
-            result = result + s;
-        }
-        return result;
-    }
-
     public static void main(String[] args) {
-        // Run methods with large inputs so JFR can capture CPU samples.
+        // Run with large inputs so JFR can capture CPU samples.
         // Small inputs finish too fast (<1ms) for JFR's 10ms sampling interval.
-        // 100 rounds is enough for JFR to collect ~10 samples per function.
         for (int round = 0; round < 100; round++) {
             computeSum(100_000);
-            repeatString("hello world ", 1000);
         }
 
         // Also call with small inputs for variety in traced args
         System.out.println("computeSum(100) = " + computeSum(100));
-        System.out.println("repeatString(\"ab\", 3) = " + repeatString("ab", 3));
 
         System.out.println("Workload complete.");
     }
diff --git a/tests/test_languages/test_java/test_java_tracer_e2e.py b/tests/test_languages/test_java/test_java_tracer_e2e.py
index 054b934f7..c7dce2379 100644
--- a/tests/test_languages/test_java/test_java_tracer_e2e.py
+++ b/tests/test_languages/test_java/test_java_tracer_e2e.py
@@ -81,12 +81,11 @@ def test_agent_captures_invocations(self, compiled_workload: Path, trace_db: Pat
         conn = sqlite3.connect(str(trace_db))
         try:
             rows = conn.execute("SELECT function, classname, descriptor, length(args) FROM function_calls").fetchall()
-            assert len(rows) >= 3, f"Expected at least 3 captured invocations, got {len(rows)}"
+            assert len(rows) >= 2, f"Expected at least 2 captured invocations, got {len(rows)}"
 
             # Check that specific methods were captured
             functions = {row[0] for row in rows}
             assert "computeSum" in functions
-            assert "repeatString" in functions
 
             # Verify all rows have non-empty args blobs
             for row in rows:
@@ -95,7 +94,7 @@ def test_agent_captures_invocations(self, compiled_workload: Path, trace_db: Pat
             # Verify metadata
             metadata = dict(conn.execute("SELECT key, value FROM metadata").fetchall())
             assert "totalCaptures" in metadata
-            assert int(metadata["totalCaptures"]) >= 3
+            assert int(metadata["totalCaptures"]) >= 2
         finally:
             conn.close()
 
@@ -294,7 +293,6 @@ def test_full_trace_and_replay_generation(self, compiled_workload: Path, tmp_pat
         assert len(workload_files) == 1
         content = workload_files[0].read_text(encoding="utf-8")
         assert "replay_computeSum" in content
-        assert "replay_repeatString" in content
 
     def test_package_detection(self) -> None:
         """Test that package detection finds Java packages from source files."""

From 0772398c59840d20202b6cb8c2d2fda18f709538 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 04:55:36 -0500
Subject: [PATCH 05/23] perf: optimize Java tracing agent serialization and
 writes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Reuse ThreadLocal Kryo Output buffers (eliminates #1 allocation hotspot)
- Fast-path inline serialization for safe arg types (bypasses executor)
- Skip verification roundtrip for known-safe containers (ArrayList, HashMap, etc.)
- Batch SQLite inserts (256/txn) with permanent autocommit-off
- Switch to ArrayBlockingQueue (no per-element Node allocation)
- Add opt-in in-memory SQLite mode (VACUUM INTO at shutdown), enabled in CI
- Add timing instrumentation (onEntry, serialization, writes, dump)
- Add ProfilingWorkload fixture for benchmarking

Benchmark (50k captures): onEntry 5200ms→1200ms (4.3x), avg/capture
0.43ms→0.02ms (21x), writes 3200ms→900ms (3.5x) with in-memory mode.
---
 .../main/java/com/codeflash/Serializer.java   | 131 +++++++++++++----
 .../com/codeflash/tracer/TraceRecorder.java   |  58 +++++---
 .../com/codeflash/tracer/TraceWriter.java     | 132 +++++++++++++++---
 .../com/codeflash/tracer/TracerConfig.java    |   7 +
 codeflash/languages/java/tracer.py            |   2 +
 .../java/com/example/ProfilingWorkload.java   |  91 ++++++++++++
 .../test_java/test_java_tracer_e2e.py         |   1 -
 7 files changed, 355 insertions(+), 67 deletions(-)
 create mode 100644 tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/ProfilingWorkload.java

diff --git a/codeflash-java-runtime/src/main/java/com/codeflash/Serializer.java b/codeflash-java-runtime/src/main/java/com/codeflash/Serializer.java
index 80d400935..e1c177ac9 100644
--- a/codeflash-java-runtime/src/main/java/com/codeflash/Serializer.java
+++ b/codeflash-java-runtime/src/main/java/com/codeflash/Serializer.java
@@ -6,7 +6,6 @@
 import com.esotericsoftware.kryo.util.DefaultInstantiatorStrategy;
 import org.objenesis.strategy.StdInstantiatorStrategy;
 
-import java.io.ByteArrayOutputStream;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.lang.reflect.Field;
@@ -36,7 +35,11 @@ public final class Serializer {
     private static final int MAX_COLLECTION_SIZE = 1000;
     private static final int BUFFER_SIZE = 4096;
 
-    // Thread-local Kryo instances (Kryo is not thread-safe)
+    // Thread-local Kryo, Output, and IdentityHashMap instances for reuse
+    private static final ThreadLocal<Output> OUTPUT = ThreadLocal.withInitial(() -> new Output(BUFFER_SIZE, -1));
+    private static final ThreadLocal<IdentityHashMap<Object, Object>> SEEN =
+        ThreadLocal.withInitial(IdentityHashMap::new);
+
     private static final ThreadLocal<Kryo> KRYO = ThreadLocal.withInitial(() -> {
         Kryo kryo = new Kryo();
         kryo.setRegistrationRequired(false);
@@ -89,10 +92,78 @@ private Serializer() {
      * @return Serialized bytes (may contain KryoPlaceholder for unserializable parts)
      */
     public static byte[] serialize(Object obj) {
-        Object processed = recursiveProcess(obj, new IdentityHashMap<>(), 0, "");
+        // Fast path: if args are all safe types, skip recursive processing entirely
+        if (obj instanceof Object[] && isSafeArgs((Object[]) obj)) {
+            return directSerialize(obj);
+        }
+
+        IdentityHashMap<Object, Object> seen = SEEN.get();
+        seen.clear();
+        Object processed = recursiveProcess(obj, seen, 0, "");
         return directSerialize(processed);
     }
 
+    /**
+     * Attempt fast-path serialization for args that are all known-safe types.
+     * Returns serialized bytes if all args are safe, or null if the slow path is needed.
+     * Callers can use this to avoid executor submission overhead for simple arguments.
+     */
+    public static byte[] serializeFast(Object obj) {
+        if (obj instanceof Object[] && isSafeArgs((Object[]) obj)) {
+            return directSerialize(obj);
+        }
+        return null;
+    }
+
+    /**
+     * Check if all elements of an args array can be serialized directly without recursive processing.
+     */
+    private static boolean isSafeArgs(Object[] args) {
+        for (Object arg : args) {
+            if (!isSafeForDirectSerialization(arg)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Check if an object is safe to serialize directly without recursive processing.
+     * Covers: null, simple types, primitive arrays, and safe containers (up to 3 levels deep).
+     */
+    private static boolean isSafeForDirectSerialization(Object obj) {
+        return isSafeForDirectSerialization(obj, 3);
+    }
+
+    private static boolean isSafeForDirectSerialization(Object obj, int depthLeft) {
+        if (obj == null || isSimpleType(obj)) {
+            return true;
+        }
+        if (depthLeft <= 0) {
+            return false;
+        }
+        Class<?> clazz = obj.getClass();
+        if (clazz.isArray() && clazz.getComponentType().isPrimitive()) {
+            return true;
+        }
+        if (isSafeContainerType(clazz)) {
+            if (obj instanceof Collection) {
+                for (Object item : (Collection<?>) obj) {
+                    if (!isSafeForDirectSerialization(item, depthLeft - 1)) return false;
+                }
+                return true;
+            }
+            if (obj instanceof Map) {
+                for (Map.Entry<?, ?> e : ((Map<?, ?>) obj).entrySet()) {
+                    if (!isSafeForDirectSerialization(e.getKey(), depthLeft - 1) ||
+                        !isSafeForDirectSerialization(e.getValue(), depthLeft - 1)) return false;
+                }
+                return true;
+            }
+        }
+        return false;
+    }
+
     /**
      * Deserialize bytes back to an object.
      * The returned object may contain KryoPlaceholder instances for parts
@@ -141,14 +212,15 @@ public static byte[] serializeException(Throwable error) {
 
     /**
      * Direct serialization without recursive processing.
+     * Reuses a ThreadLocal Output buffer to avoid per-call allocation.
      */
     private static byte[] directSerialize(Object obj) {
         Kryo kryo = KRYO.get();
-        ByteArrayOutputStream baos = new ByteArrayOutputStream(BUFFER_SIZE);
-        try (Output output = new Output(baos)) {
-            kryo.writeClassAndObject(output, obj);
-        }
-        return baos.toByteArray();
+        Output output = OUTPUT.get();
+        output.reset();
+        kryo.writeClassAndObject(output, obj);
+        output.flush();
+        return output.toBytes();
     }
 
     /**
@@ -201,37 +273,23 @@ private static Object recursiveProcess(Object obj, IdentityHashMap<Object, Objec
             // unserializable types, recursively process to catch and replace unserializable objects.
             if (obj instanceof Map) {
                 Map<?, ?> map = (Map<?, ?>) obj;
-                if (containsOnlySimpleTypes(map)) {
-                    // Simple map - try direct serialization to preserve full size
-                    byte[] serialized = tryDirectSerialize(obj);
-                    if (serialized != null) {
-                        try {
-                            deserialize(serialized);
-                            return obj; // Success - return original
-                        } catch (Exception e) {
-                            // Fall through to recursive handling
-                        }
-                    }
+                if (isSafeContainerType(clazz) && containsOnlySimpleTypes(map)) {
+                    return obj;
                 }
                 return handleMap(map, seen, depth, path);
             }
             if (obj instanceof Collection) {
                 Collection<?> collection = (Collection<?>) obj;
-                if (containsOnlySimpleTypes(collection)) {
-                    // Simple collection - try direct serialization to preserve full size
-                    byte[] serialized = tryDirectSerialize(obj);
-                    if (serialized != null) {
-                        try {
-                            deserialize(serialized);
-                            return obj; // Success - return original
-                        } catch (Exception e) {
-                            // Fall through to recursive handling
-                        }
-                    }
+                if (isSafeContainerType(clazz) && containsOnlySimpleTypes(collection)) {
+                    return obj;
                 }
                 return handleCollection(collection, seen, depth, path);
             }
             if (clazz.isArray()) {
+                // Primitive arrays (int[], double[], etc.) are directly serializable by Kryo
+                if (clazz.getComponentType().isPrimitive()) {
+                    return obj;
+                }
                 return handleArray(obj, seen, depth, path);
             }
 
@@ -255,6 +313,19 @@ private static Object recursiveProcess(Object obj, IdentityHashMap<Object, Objec
         }
     }
 
+    /**
+     * Check if a container type is known to round-trip safely through Kryo without verification.
+     * Only includes types registered with Kryo that are known to serialize/deserialize correctly.
+     */
+    private static boolean isSafeContainerType(Class<?> clazz) {
+        return clazz == ArrayList.class ||
+               clazz == LinkedList.class ||
+               clazz == HashMap.class ||
+               clazz == LinkedHashMap.class ||
+               clazz == HashSet.class ||
+               clazz == LinkedHashSet.class;
+    }
+
     /**
      * Check if a class is known to be unserializable.
      */
diff --git a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceRecorder.java b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceRecorder.java
index 28c2d2998..a9acfe855 100644
--- a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceRecorder.java
+++ b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceRecorder.java
@@ -12,6 +12,7 @@
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
 
 public final class TraceRecorder {
 
@@ -23,6 +24,8 @@ public final class TraceRecorder {
     private final TraceWriter writer;
     private final ConcurrentHashMap<String, AtomicInteger> functionCounts = new ConcurrentHashMap<>();
     private final AtomicInteger droppedCaptures = new AtomicInteger(0);
+    private final AtomicLong totalOnEntryNs = new AtomicLong(0);
+    private final AtomicLong totalSerializationNs = new AtomicLong(0);
     private final int maxFunctionCount;
     private final ExecutorService serializerExecutor;
 
@@ -31,7 +34,7 @@ public final class TraceRecorder {
 
     private TraceRecorder(TracerConfig config) {
         this.config = config;
-        this.writer = new TraceWriter(config.getDbPath());
+        this.writer = new TraceWriter(config.getDbPath(), config.isInMemoryDb());
         this.maxFunctionCount = config.getMaxFunctionCount();
         this.serializerExecutor = Executors.newCachedThreadPool(r -> {
             Thread t = new Thread(r, "codeflash-serializer");
@@ -68,6 +71,8 @@ public void onEntry(String className, String methodName, String descriptor,
 
     private void onEntryImpl(String className, String methodName, String descriptor,
                              int lineNumber, String sourceFile, Object[] args) {
+        long entryStart = System.nanoTime();
+
         String qualifiedName = className + "." + methodName + descriptor;
 
         // Check per-method count limit
@@ -76,30 +81,38 @@ private void onEntryImpl(String className, String methodName, String descriptor,
             return;
         }
 
-        // Serialize args with timeout to prevent deep object graph traversal from blocking
+        // Serialize args — try inline fast path first, fall back to async with timeout
         byte[] argsBlob;
-        Future<byte[]> future = serializerExecutor.submit(() -> Serializer.serialize(args));
-        try {
-            argsBlob = future.get(SERIALIZATION_TIMEOUT_MS, TimeUnit.MILLISECONDS);
-        } catch (TimeoutException e) {
-            future.cancel(true);
-            droppedCaptures.incrementAndGet();
-            System.err.println("[codeflash-tracer] Serialization timed out for " + className + "."
-                    + methodName);
-            return;
-        } catch (Exception e) {
-            Throwable cause = e.getCause() != null ? e.getCause() : e;
-            droppedCaptures.incrementAndGet();
-            System.err.println("[codeflash-tracer] Serialization failed for " + className + "."
-                    + methodName + ": " + cause.getClass().getSimpleName() + ": " + cause.getMessage());
-            return;
+        long serStart = System.nanoTime();
+        argsBlob = Serializer.serializeFast(args);
+        if (argsBlob == null) {
+            // Slow path: async serialization with timeout for complex/unknown types
+            Future<byte[]> future = serializerExecutor.submit(() -> Serializer.serialize(args));
+            try {
+                argsBlob = future.get(SERIALIZATION_TIMEOUT_MS, TimeUnit.MILLISECONDS);
+            } catch (TimeoutException e) {
+                future.cancel(true);
+                droppedCaptures.incrementAndGet();
+                System.err.println("[codeflash-tracer] Serialization timed out for " + className + "."
+                        + methodName);
+                return;
+            } catch (Exception e) {
+                Throwable cause = e.getCause() != null ? e.getCause() : e;
+                droppedCaptures.incrementAndGet();
+                System.err.println("[codeflash-tracer] Serialization failed for " + className + "."
+                        + methodName + ": " + cause.getClass().getSimpleName() + ": " + cause.getMessage());
+                return;
+            }
         }
+        totalSerializationNs.addAndGet(System.nanoTime() - serStart);
 
         long timeNs = System.nanoTime();
         count.incrementAndGet();
 
         writer.recordFunctionCall("call", methodName, className, sourceFile,
                 lineNumber, descriptor, timeNs, argsBlob);
+
+        totalOnEntryNs.addAndGet(System.nanoTime() - entryStart);
     }
 
     public void flush() {
@@ -126,5 +139,16 @@ public void flush() {
         System.err.println("[codeflash-tracer] Captured " + totalCaptures
                 + " invocations across " + functionCounts.size() + " methods"
                 + (dropped > 0 ? " (" + dropped + " dropped due to serialization timeout/failure)" : ""));
+
+        // Timing summary
+        long onEntryMs = totalOnEntryNs.get() / 1_000_000;
+        long serMs = totalSerializationNs.get() / 1_000_000;
+        String writerSummary = writer.getTimingSummary();
+        System.err.println("[codeflash-tracer] Timing: onEntry=" + onEntryMs + "ms"
+                + " (serialization=" + serMs + "ms)"
+                + (totalCaptures > 0
+                    ? " avg=" + String.format("%.2f", (double) onEntryMs / totalCaptures) + "ms/capture"
+                    : "")
+                + " " + writerSummary);
     }
 }
diff --git a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceWriter.java b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceWriter.java
index a9eeabf60..7bc5032cb 100644
--- a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceWriter.java
+++ b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceWriter.java
@@ -7,30 +7,49 @@
 import java.sql.PreparedStatement;
 import java.sql.SQLException;
 import java.sql.Statement;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Map;
 import java.util.concurrent.BlockingQueue;
-import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
 
 public final class TraceWriter {
 
+    private static final int BATCH_SIZE = 256;
+    private static final int QUEUE_CAPACITY = 65536;
+
     private final Connection connection;
+    private final Path diskPath;
+    private final boolean inMemory;
     private final BlockingQueue<WriteTask> writeQueue;
     private final Thread writerThread;
     private final AtomicBoolean running;
+    private final AtomicLong totalWriteNs = new AtomicLong(0);
+    private final AtomicInteger batchCount = new AtomicInteger(0);
+    private final AtomicInteger taskCount = new AtomicInteger(0);
+    private volatile long dumpToFileMs = 0;
 
     private PreparedStatement insertFunctionCall;
     private PreparedStatement insertMetadata;
 
-    public TraceWriter(String dbPath) {
-        this.writeQueue = new LinkedBlockingQueue<>();
+    public TraceWriter(String dbPath, boolean inMemory) {
+        this.diskPath = Paths.get(dbPath).toAbsolutePath();
+        this.diskPath.getParent().toFile().mkdirs();
+        this.inMemory = inMemory;
+        this.writeQueue = new ArrayBlockingQueue<>(QUEUE_CAPACITY);
         this.running = new AtomicBoolean(true);
 
         try {
-            Path path = Paths.get(dbPath).toAbsolutePath();
-            path.getParent().toFile().mkdirs();
-            this.connection = DriverManager.getConnection("jdbc:sqlite:" + path);
+            if (inMemory) {
+                // In-memory database for maximum write performance; flushed to disk via VACUUM INTO at close()
+                this.connection = DriverManager.getConnection("jdbc:sqlite::memory:");
+            } else {
+                this.connection = DriverManager.getConnection("jdbc:sqlite:" + this.diskPath);
+            }
             initializeSchema();
             prepareStatements();
 
@@ -45,8 +64,12 @@ public TraceWriter(String dbPath) {
 
     private void initializeSchema() throws SQLException {
         try (Statement stmt = connection.createStatement()) {
-            stmt.execute("PRAGMA journal_mode=WAL");
-            stmt.execute("PRAGMA synchronous=NORMAL");
+            if (!inMemory) {
+                stmt.execute("PRAGMA journal_mode=WAL");
+                stmt.execute("PRAGMA synchronous=NORMAL");
+                stmt.execute("PRAGMA cache_size=-16000");
+                stmt.execute("PRAGMA temp_store=MEMORY");
+            }
 
             stmt.execute(
                 "CREATE TABLE IF NOT EXISTS function_calls(" +
@@ -69,6 +92,8 @@ private void initializeSchema() throws SQLException {
 
             stmt.execute("CREATE INDEX IF NOT EXISTS idx_fc_class_func ON function_calls(classname, function)");
         }
+        // Keep autocommit off for writer performance — commit explicitly per batch
+        connection.setAutoCommit(false);
     }
 
     private void prepareStatements() throws SQLException {
@@ -95,29 +120,65 @@ public void writeMetadata(Map<String, String> metadata) {
     }
 
     private void writerLoop() {
+        List<WriteTask> batch = new ArrayList<>(BATCH_SIZE);
+
         while (running.get() || !writeQueue.isEmpty()) {
             try {
                 WriteTask task = writeQueue.poll(100, TimeUnit.MILLISECONDS);
-                if (task != null) {
-                    task.execute(this);
+                if (task == null) {
+                    continue;
                 }
+                batch.add(task);
+                writeQueue.drainTo(batch, BATCH_SIZE - 1);
+                executeBatch(batch);
+                batch.clear();
             } catch (InterruptedException e) {
                 Thread.currentThread().interrupt();
                 break;
-            } catch (SQLException e) {
-                System.err.println("[codeflash-tracer] Write error: " + e.getMessage());
             }
         }
 
         // Drain remaining
-        WriteTask task;
-        while ((task = writeQueue.poll()) != null) {
+        writeQueue.drainTo(batch);
+        if (!batch.isEmpty()) {
+            executeBatch(batch);
+        }
+    }
+
+    private void executeBatch(List<WriteTask> batch) {
+        if (batch.isEmpty()) {
+            return;
+        }
+
+        long writeStart = System.nanoTime();
+        boolean hasFunctionCalls = false;
+        try {
+            for (WriteTask task : batch) {
+                if (task instanceof FunctionCallTask) {
+                    ((FunctionCallTask) task).bindParameters(this);
+                    insertFunctionCall.addBatch();
+                    hasFunctionCalls = true;
+                } else {
+                    task.execute(this);
+                }
+            }
+
+            if (hasFunctionCalls) {
+                insertFunctionCall.executeBatch();
+            }
+
+            connection.commit();
+        } catch (SQLException e) {
+            System.err.println("[codeflash-tracer] Batch write error (" + batch.size() + " tasks): " + e.getMessage());
             try {
-                task.execute(this);
-            } catch (SQLException e) {
-                System.err.println("[codeflash-tracer] Write error: " + e.getMessage());
+                connection.rollback();
+            } catch (SQLException re) {
+                System.err.println("[codeflash-tracer] Rollback failed: " + re.getMessage());
             }
         }
+        totalWriteNs.addAndGet(System.nanoTime() - writeStart);
+        batchCount.incrementAndGet();
+        taskCount.addAndGet(batch.size());
     }
 
     public void flush() {
@@ -131,6 +192,15 @@ public void flush() {
         }
     }
 
+    public String getTimingSummary() {
+        long writeMs = totalWriteNs.get() / 1_000_000;
+        int batches = batchCount.get();
+        int tasks = taskCount.get();
+        return "writes=" + writeMs + "ms (" + tasks + " tasks in " + batches + " batches"
+                + (batches > 0 ? ", avg=" + String.format("%.1f", (double) tasks / batches) + " tasks/batch" : "")
+                + ") dump=" + dumpToFileMs + "ms";
+    }
+
     public void close() {
         running.set(false);
         try {
@@ -139,9 +209,29 @@ public void close() {
             Thread.currentThread().interrupt();
         }
 
+        // Close prepared statements first — required before VACUUM
         try {
             if (insertFunctionCall != null) insertFunctionCall.close();
             if (insertMetadata != null) insertMetadata.close();
+        } catch (SQLException e) {
+            System.err.println("[codeflash-tracer] Error closing statements: " + e.getMessage());
+        }
+
+        if (inMemory) {
+            long dumpStart = System.nanoTime();
+            try {
+                connection.commit();
+                connection.setAutoCommit(true);
+                try (Statement stmt = connection.createStatement()) {
+                    stmt.execute("VACUUM INTO '" + diskPath.toString().replace("'", "''") + "'");
+                }
+            } catch (SQLException e) {
+                System.err.println("[codeflash-tracer] Failed to write trace DB to disk: " + e.getMessage());
+            }
+            dumpToFileMs = (System.nanoTime() - dumpStart) / 1_000_000;
+        }
+
+        try {
             if (connection != null) connection.close();
         } catch (SQLException e) {
             System.err.println("[codeflash-tracer] Error closing TraceWriter: " + e.getMessage());
@@ -177,8 +267,7 @@ private static class FunctionCallTask implements WriteTask {
             this.argsBlob = argsBlob;
         }
 
-        @Override
-        public void execute(TraceWriter writer) throws SQLException {
+        void bindParameters(TraceWriter writer) throws SQLException {
             writer.insertFunctionCall.setString(1, type);
             writer.insertFunctionCall.setString(2, function);
             writer.insertFunctionCall.setString(3, classname);
@@ -187,6 +276,11 @@ public void execute(TraceWriter writer) throws SQLException {
             writer.insertFunctionCall.setString(6, descriptor);
             writer.insertFunctionCall.setLong(7, timeNs);
             writer.insertFunctionCall.setBytes(8, argsBlob);
+        }
+
+        @Override
+        public void execute(TraceWriter writer) throws SQLException {
+            bindParameters(writer);
             writer.insertFunctionCall.executeUpdate();
         }
     }
diff --git a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TracerConfig.java b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TracerConfig.java
index 8fe799d2f..9e2675c00 100644
--- a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TracerConfig.java
+++ b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TracerConfig.java
@@ -30,6 +30,9 @@ public final class TracerConfig {
     @SerializedName("projectRoot")
     private String projectRoot = "";
 
+    @SerializedName("inMemoryDb")
+    private boolean inMemoryDb = false;
+
     private static final Gson GSON = new Gson();
 
     public static TracerConfig parse(String agentArgs) {
@@ -89,6 +92,10 @@ public String getProjectRoot() {
         return projectRoot;
     }
 
+    public boolean isInMemoryDb() {
+        return inMemoryDb;
+    }
+
     public boolean shouldInstrumentClass(String internalClassName) {
         String dotName = internalClassName.replace('/', '.');
 
diff --git a/codeflash/languages/java/tracer.py b/codeflash/languages/java/tracer.py
index 50506797e..8e8348681 100644
--- a/codeflash/languages/java/tracer.py
+++ b/codeflash/languages/java/tracer.py
@@ -6,6 +6,7 @@
 import subprocess
 from typing import TYPE_CHECKING
 
+from codeflash.code_utils.env_utils import is_ci
 from codeflash.languages.java.line_profiler import find_agent_jar
 from codeflash.languages.java.replay_test import generate_replay_tests
 
@@ -114,6 +115,7 @@ def create_tracer_config(
             "maxFunctionCount": max_function_count,
             "timeout": timeout,
             "projectRoot": str(project_root.resolve()) if project_root else "",
+            "inMemoryDb": is_ci(),
         }
 
         config_path = trace_db_path.with_suffix(".config.json")
diff --git a/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/ProfilingWorkload.java b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/ProfilingWorkload.java
new file mode 100644
index 000000000..b7c48c625
--- /dev/null
+++ b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/ProfilingWorkload.java
@@ -0,0 +1,91 @@
+package com.example;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Profiling workload for benchmarking the codeflash tracing agent.
+ * Exercises different argument types to stress serialization paths.
+ */
+public class ProfilingWorkload {
+
+    // 1. Primitives only — cheapest to serialize
+    public static int addInts(int a, int b) {
+        return a + b;
+    }
+
+    // 2. String arguments — moderate serialization cost
+    public static String concatStrings(String a, String b) {
+        return a + b;
+    }
+
+    // 3. Array argument — requires element-by-element serialization
+    public static int sumArray(int[] values) {
+        int sum = 0;
+        for (int v : values) {
+            sum += v;
+        }
+        return sum;
+    }
+
+    // 4. Collection argument — triggers recursive Kryo processing
+    public static int sumList(List<Integer> values) {
+        int sum = 0;
+        for (int v : values) {
+            sum += v;
+        }
+        return sum;
+    }
+
+    // 5. Nested map — deep object graph, expensive serialization
+    public static int countMapEntries(Map<String, List<Integer>> data) {
+        int count = 0;
+        for (List<Integer> list : data.values()) {
+            count += list.size();
+        }
+        return count;
+    }
+
+    public static void main(String[] args) {
+        int iterations = 1000;
+
+        // 1. Primitives
+        for (int i = 0; i < iterations; i++) {
+            addInts(i, i + 1);
+        }
+
+        // 2. Strings
+        for (int i = 0; i < iterations; i++) {
+            concatStrings("hello-" + i, "-world");
+        }
+
+        // 3. Arrays
+        int[] arr = new int[100];
+        for (int i = 0; i < arr.length; i++) arr[i] = i;
+        for (int i = 0; i < iterations; i++) {
+            sumArray(arr);
+        }
+
+        // 4. Lists
+        List<Integer> list = new ArrayList<>(100);
+        for (int i = 0; i < 100; i++) list.add(i);
+        for (int i = 0; i < iterations; i++) {
+            sumList(list);
+        }
+
+        // 5. Nested maps
+        Map<String, List<Integer>> map = new HashMap<>();
+        for (int i = 0; i < 10; i++) {
+            List<Integer> vals = new ArrayList<>();
+            for (int j = 0; j < 10; j++) vals.add(j);
+            map.put("key-" + i, vals);
+        }
+        for (int i = 0; i < iterations; i++) {
+            countMapEntries(map);
+        }
+
+        System.out.println("ProfilingWorkload complete.");
+    }
+}
diff --git a/tests/test_languages/test_java/test_java_tracer_e2e.py b/tests/test_languages/test_java/test_java_tracer_e2e.py
index c7dce2379..2ea87de9c 100644
--- a/tests/test_languages/test_java/test_java_tracer_e2e.py
+++ b/tests/test_languages/test_java/test_java_tracer_e2e.py
@@ -196,7 +196,6 @@ def test_generates_test_files(self, compiled_workload: Path, trace_db: Path, tmp
         assert "import org.junit.jupiter.api.Test;" in content
         assert "ReplayHelper" in content
         assert "replay_computeSum_0" in content
-        assert "replay_repeatString_0" in content
 
     def test_metadata_parsing(self, compiled_workload: Path, trace_db: Path, tmp_path: Path) -> None:
         """Test that metadata comments are correctly parsed from generated tests."""

From e81f25f8256078acf657c124e587682fe39e6c66 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 05:05:17 -0500
Subject: [PATCH 06/23] fix: remove stale repeatString assertions from
 integration tests

repeatString was removed from Workload.java in the E2E reduction.
---
 tests/test_languages/test_java/test_java_tracer_integration.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/test_languages/test_java/test_java_tracer_integration.py b/tests/test_languages/test_java/test_java_tracer_integration.py
index f6ffefdf2..a8dbc5118 100644
--- a/tests/test_languages/test_java/test_java_tracer_integration.py
+++ b/tests/test_languages/test_java/test_java_tracer_integration.py
@@ -88,7 +88,6 @@ def test_discover_functions_from_replay_tests(self, traced_workload: tuple) -> N
                 assert func.file_path == file_path
 
         assert "computeSum" in all_func_names
-        assert "repeatString" in all_func_names
 
     def test_discover_tests_for_replay_tests(self, traced_workload: tuple) -> None:
         """Test that test discovery maps replay tests to source functions."""
@@ -112,7 +111,6 @@ def test_discover_tests_for_replay_tests(self, traced_workload: tuple) -> None:
             matched_func_names.add(func_name)
 
         assert "computeSum" in matched_func_names, f"computeSum not found in: {result.keys()}"
-        assert "repeatString" in matched_func_names, f"repeatString not found in: {result.keys()}"
 
         # Each function should have at least one test
         for func_name, test_infos in result.items():

From 01e22152c70500724b3f578877e47344f3083870 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 05:07:53 -0500
Subject: [PATCH 07/23] flexing

---
 .../src/main/java/com/example/Workload.java   | 45 ++++++++++++++++++-
 .../test_java/test_java_tracer_e2e.py         | 11 +++--
 .../test_java/test_java_tracer_integration.py |  2 +
 3 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java
index ff0ae4d8a..7beb2a4ea 100644
--- a/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java
+++ b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java
@@ -1,5 +1,8 @@
 package com.example;
 
+import java.util.ArrayList;
+import java.util.List;
+
 public class Workload {
 
     public static int computeSum(int n) {
@@ -10,15 +13,53 @@ public static int computeSum(int n) {
         return sum;
     }
 
+    public static String repeatString(String s, int count) {
+        String result = "";
+        for (int i = 0; i < count; i++) {
+            result = result + s;
+        }
+        return result;
+    }
+
+    public static List<Integer> filterEvens(List<Integer> numbers) {
+        List<Integer> result = new ArrayList<>();
+        for (int n : numbers) {
+            if (n % 2 == 0) {
+                result.add(n);
+            }
+        }
+        return result;
+    }
+
+    public int instanceMethod(int x, int y) {
+        return x * y + computeSum(x);
+    }
+
     public static void main(String[] args) {
-        // Run with large inputs so JFR can capture CPU samples.
+        // Run methods with large inputs so JFR can capture CPU samples.
         // Small inputs finish too fast (<1ms) for JFR's 10ms sampling interval.
-        for (int round = 0; round < 100; round++) {
+        for (int round = 0; round < 1000; round++) {
             computeSum(100_000);
+            repeatString("hello world ", 1000);
+
+            List<Integer> nums = new ArrayList<>();
+            for (int i = 1; i <= 10_000; i++) nums.add(i);
+            filterEvens(nums);
+
+            Workload w = new Workload();
+            w.instanceMethod(100_000, 42);
         }
 
         // Also call with small inputs for variety in traced args
         System.out.println("computeSum(100) = " + computeSum(100));
+        System.out.println("repeatString(\"ab\", 3) = " + repeatString("ab", 3));
+
+        List<Integer> small = new ArrayList<>();
+        for (int i = 1; i <= 10; i++) small.add(i);
+        System.out.println("filterEvens(1..10) = " + filterEvens(small));
+
+        Workload w = new Workload();
+        System.out.println("instanceMethod(5, 3) = " + w.instanceMethod(5, 3));
 
         System.out.println("Workload complete.");
     }
diff --git a/tests/test_languages/test_java/test_java_tracer_e2e.py b/tests/test_languages/test_java/test_java_tracer_e2e.py
index 2ea87de9c..157f23eb6 100644
--- a/tests/test_languages/test_java/test_java_tracer_e2e.py
+++ b/tests/test_languages/test_java/test_java_tracer_e2e.py
@@ -81,11 +81,14 @@ def test_agent_captures_invocations(self, compiled_workload: Path, trace_db: Pat
         conn = sqlite3.connect(str(trace_db))
         try:
             rows = conn.execute("SELECT function, classname, descriptor, length(args) FROM function_calls").fetchall()
-            assert len(rows) >= 2, f"Expected at least 2 captured invocations, got {len(rows)}"
+            assert len(rows) >= 5, f"Expected at least 5 captured invocations, got {len(rows)}"
 
             # Check that specific methods were captured
             functions = {row[0] for row in rows}
             assert "computeSum" in functions
+            assert "repeatString" in functions
+            assert "filterEvens" in functions
+            assert "instanceMethod" in functions
 
             # Verify all rows have non-empty args blobs
             for row in rows:
@@ -94,7 +97,7 @@ def test_agent_captures_invocations(self, compiled_workload: Path, trace_db: Pat
             # Verify metadata
             metadata = dict(conn.execute("SELECT key, value FROM metadata").fetchall())
             assert "totalCaptures" in metadata
-            assert int(metadata["totalCaptures"]) >= 2
+            assert int(metadata["totalCaptures"]) >= 5
         finally:
             conn.close()
 
@@ -133,7 +136,7 @@ def test_max_function_count_limit(self, compiled_workload: Path, trace_db: Path)
 
         conn = sqlite3.connect(str(trace_db))
         try:
-            # computeSum is called 2 times (direct calls in main)
+            # computeSum is called 4 times (2 direct + 2 from instanceMethod)
             compute_count = conn.execute(
                 "SELECT COUNT(*) FROM function_calls WHERE function = 'computeSum'"
             ).fetchone()[0]
@@ -196,6 +199,7 @@ def test_generates_test_files(self, compiled_workload: Path, trace_db: Path, tmp
         assert "import org.junit.jupiter.api.Test;" in content
         assert "ReplayHelper" in content
         assert "replay_computeSum_0" in content
+        assert "replay_repeatString_0" in content
 
     def test_metadata_parsing(self, compiled_workload: Path, trace_db: Path, tmp_path: Path) -> None:
         """Test that metadata comments are correctly parsed from generated tests."""
@@ -292,6 +296,7 @@ def test_full_trace_and_replay_generation(self, compiled_workload: Path, tmp_pat
         assert len(workload_files) == 1
         content = workload_files[0].read_text(encoding="utf-8")
         assert "replay_computeSum" in content
+        assert "replay_instanceMethod" in content
 
     def test_package_detection(self) -> None:
         """Test that package detection finds Java packages from source files."""
diff --git a/tests/test_languages/test_java/test_java_tracer_integration.py b/tests/test_languages/test_java/test_java_tracer_integration.py
index a8dbc5118..f6ffefdf2 100644
--- a/tests/test_languages/test_java/test_java_tracer_integration.py
+++ b/tests/test_languages/test_java/test_java_tracer_integration.py
@@ -88,6 +88,7 @@ def test_discover_functions_from_replay_tests(self, traced_workload: tuple) -> N
                 assert func.file_path == file_path
 
         assert "computeSum" in all_func_names
+        assert "repeatString" in all_func_names
 
     def test_discover_tests_for_replay_tests(self, traced_workload: tuple) -> None:
         """Test that test discovery maps replay tests to source functions."""
@@ -111,6 +112,7 @@ def test_discover_tests_for_replay_tests(self, traced_workload: tuple) -> None:
             matched_func_names.add(func_name)
 
         assert "computeSum" in matched_func_names, f"computeSum not found in: {result.keys()}"
+        assert "repeatString" in matched_func_names, f"repeatString not found in: {result.keys()}"
 
         # Each function should have at least one test
         for func_name, test_infos in result.items():

From bfe6f3a828c8ca0c53272ce1d528c064d81b425a Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 05:16:49 -0500
Subject: [PATCH 08/23] Remove debug timing instrumentation from tracer

Strip AtomicLong accumulators, System.nanoTime() timing, and
getTimingSummary() that were added for profiling. No functional change.
---
 .../com/codeflash/tracer/TraceRecorder.java   | 20 ------------------
 .../com/codeflash/tracer/TraceWriter.java     | 21 -------------------
 2 files changed, 41 deletions(-)

diff --git a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceRecorder.java b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceRecorder.java
index a9acfe855..8596d3ee8 100644
--- a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceRecorder.java
+++ b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceRecorder.java
@@ -12,7 +12,6 @@
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
 
 public final class TraceRecorder {
 
@@ -24,8 +23,6 @@ public final class TraceRecorder {
     private final TraceWriter writer;
     private final ConcurrentHashMap<String, AtomicInteger> functionCounts = new ConcurrentHashMap<>();
     private final AtomicInteger droppedCaptures = new AtomicInteger(0);
-    private final AtomicLong totalOnEntryNs = new AtomicLong(0);
-    private final AtomicLong totalSerializationNs = new AtomicLong(0);
     private final int maxFunctionCount;
     private final ExecutorService serializerExecutor;
 
@@ -71,8 +68,6 @@ public void onEntry(String className, String methodName, String descriptor,
 
     private void onEntryImpl(String className, String methodName, String descriptor,
                              int lineNumber, String sourceFile, Object[] args) {
-        long entryStart = System.nanoTime();
-
         String qualifiedName = className + "." + methodName + descriptor;
 
         // Check per-method count limit
@@ -83,7 +78,6 @@ private void onEntryImpl(String className, String methodName, String descriptor,
 
         // Serialize args — try inline fast path first, fall back to async with timeout
         byte[] argsBlob;
-        long serStart = System.nanoTime();
         argsBlob = Serializer.serializeFast(args);
         if (argsBlob == null) {
             // Slow path: async serialization with timeout for complex/unknown types
@@ -104,15 +98,12 @@ private void onEntryImpl(String className, String methodName, String descriptor,
                 return;
             }
         }
-        totalSerializationNs.addAndGet(System.nanoTime() - serStart);
 
         long timeNs = System.nanoTime();
         count.incrementAndGet();
 
         writer.recordFunctionCall("call", methodName, className, sourceFile,
                 lineNumber, descriptor, timeNs, argsBlob);
-
-        totalOnEntryNs.addAndGet(System.nanoTime() - entryStart);
     }
 
     public void flush() {
@@ -139,16 +130,5 @@ public void flush() {
         System.err.println("[codeflash-tracer] Captured " + totalCaptures
                 + " invocations across " + functionCounts.size() + " methods"
                 + (dropped > 0 ? " (" + dropped + " dropped due to serialization timeout/failure)" : ""));
-
-        // Timing summary
-        long onEntryMs = totalOnEntryNs.get() / 1_000_000;
-        long serMs = totalSerializationNs.get() / 1_000_000;
-        String writerSummary = writer.getTimingSummary();
-        System.err.println("[codeflash-tracer] Timing: onEntry=" + onEntryMs + "ms"
-                + " (serialization=" + serMs + "ms)"
-                + (totalCaptures > 0
-                    ? " avg=" + String.format("%.2f", (double) onEntryMs / totalCaptures) + "ms/capture"
-                    : "")
-                + " " + writerSummary);
     }
 }
diff --git a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceWriter.java b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceWriter.java
index 7bc5032cb..a75872089 100644
--- a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceWriter.java
+++ b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceWriter.java
@@ -14,8 +14,6 @@
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
 
 public final class TraceWriter {
 
@@ -28,10 +26,6 @@ public final class TraceWriter {
     private final BlockingQueue<WriteTask> writeQueue;
     private final Thread writerThread;
     private final AtomicBoolean running;
-    private final AtomicLong totalWriteNs = new AtomicLong(0);
-    private final AtomicInteger batchCount = new AtomicInteger(0);
-    private final AtomicInteger taskCount = new AtomicInteger(0);
-    private volatile long dumpToFileMs = 0;
 
     private PreparedStatement insertFunctionCall;
     private PreparedStatement insertMetadata;
@@ -150,7 +144,6 @@ private void executeBatch(List<WriteTask> batch) {
             return;
         }
 
-        long writeStart = System.nanoTime();
         boolean hasFunctionCalls = false;
         try {
             for (WriteTask task : batch) {
@@ -176,9 +169,6 @@ private void executeBatch(List<WriteTask> batch) {
                 System.err.println("[codeflash-tracer] Rollback failed: " + re.getMessage());
             }
         }
-        totalWriteNs.addAndGet(System.nanoTime() - writeStart);
-        batchCount.incrementAndGet();
-        taskCount.addAndGet(batch.size());
     }
 
     public void flush() {
@@ -192,15 +182,6 @@ public void flush() {
         }
     }
 
-    public String getTimingSummary() {
-        long writeMs = totalWriteNs.get() / 1_000_000;
-        int batches = batchCount.get();
-        int tasks = taskCount.get();
-        return "writes=" + writeMs + "ms (" + tasks + " tasks in " + batches + " batches"
-                + (batches > 0 ? ", avg=" + String.format("%.1f", (double) tasks / batches) + " tasks/batch" : "")
-                + ") dump=" + dumpToFileMs + "ms";
-    }
-
     public void close() {
         running.set(false);
         try {
@@ -218,7 +199,6 @@ public void close() {
         }
 
         if (inMemory) {
-            long dumpStart = System.nanoTime();
             try {
                 connection.commit();
                 connection.setAutoCommit(true);
@@ -228,7 +208,6 @@ public void close() {
             } catch (SQLException e) {
                 System.err.println("[codeflash-tracer] Failed to write trace DB to disk: " + e.getMessage());
             }
-            dumpToFileMs = (System.nanoTime() - dumpStart) / 1_000_000;
         }
 
         try {

From fefccd5935ac4e8d758f999ea807733b3a0ffbcd Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 05:28:34 -0500
Subject: [PATCH 09/23] fix: drop JFR inline event config that breaks JDK 11

The jdk.ExecutionSample#period=1ms syntax in -XX:StartFlightRecording
is only supported on JDK 13+. On JDK 11 (CI), it causes
"Failure when starting JFR on_create_vm_2" and no JFR file is created.
The settings=profile preset still provides 10ms CPU sampling.
---
 codeflash/languages/java/tracer.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/codeflash/languages/java/tracer.py b/codeflash/languages/java/tracer.py
index 8e8348681..b971e5526 100644
--- a/codeflash/languages/java/tracer.py
+++ b/codeflash/languages/java/tracer.py
@@ -124,12 +124,7 @@ def create_tracer_config(
 
     def build_jfr_env(self, jfr_file: Path) -> dict[str, str]:
         env = os.environ.copy()
-        # Use profile settings with increased sampling frequency (1ms instead of default 10ms)
-        # This captures more samples for short-running programs
-        jfr_opts = (
-            f"-XX:StartFlightRecording=filename={jfr_file.resolve()},settings=profile,dumponexit=true"
-            ",jdk.ExecutionSample#period=1ms"
-        )
+        jfr_opts = f"-XX:StartFlightRecording=filename={jfr_file.resolve()},settings=profile,dumponexit=true"
         existing = env.get("JAVA_TOOL_OPTIONS", "")
         env["JAVA_TOOL_OPTIONS"] = f"{existing} {jfr_opts}".strip()
         return env

From e191f74aa6e4d0c133452325ff102e1a1ff64b90 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 06:16:39 -0500
Subject: [PATCH 10/23] chore: add diagnostic logging to compare_test_results

Temporary instrumentation to debug flaky futurehouse E2E test.
Logs matched/skipped/timed-out counts and did_all_timeout state.
---
 codeflash/verification/equivalence.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/codeflash/verification/equivalence.py b/codeflash/verification/equivalence.py
index f660e35ea..630cec8b6 100644
--- a/codeflash/verification/equivalence.py
+++ b/codeflash/verification/equivalence.py
@@ -41,11 +41,17 @@ def compare_test_results(
     )
     test_diffs: list[TestDiff] = []
     did_all_timeout: bool = True
+    _matched_count = 0
+    _skipped_cdd_only = 0
+    _skipped_init_state = 0
+    _skipped_none = 0
+    _timed_out_count = 0
     for test_id in test_ids_superset:
         original_test_result = original_results.get_by_unique_invocation_loop_id(test_id)
         cdd_test_result = candidate_results.get_by_unique_invocation_loop_id(test_id)
 
         if cdd_test_result is not None and original_test_result is None:
+            _skipped_cdd_only += 1
             continue
         # If helper function instance_state verification is not present, that's ok. continue
         if (
@@ -53,11 +59,15 @@ def compare_test_results(
             and original_test_result.verification_type == VerificationType.INIT_STATE_HELPER
             and cdd_test_result is None
         ):
+            _skipped_init_state += 1
             continue
         if original_test_result is None or cdd_test_result is None:
+            _skipped_none += 1
             continue
+        _matched_count += 1
         did_all_timeout = did_all_timeout and original_test_result.timed_out
         if original_test_result.timed_out:
+            _timed_out_count += 1
             continue
         superset_obj = False
         if original_test_result.verification_type and (
@@ -148,6 +158,23 @@ def compare_test_results(
             )
 
     sys.setrecursionlimit(original_recursion_limit)
+    logger.info(
+        f"[compare_test_results] superset={len(test_ids_superset)} matched={_matched_count} "
+        f"skipped(cdd_only={_skipped_cdd_only} init_state={_skipped_init_state} none={_skipped_none}) "
+        f"timed_out={_timed_out_count} did_all_timeout={did_all_timeout} diffs={len(test_diffs)} "
+        f"pass_fail_only={pass_fail_only} orig_len={len(original_results)} cand_len={len(candidate_results)}"
+    )
+    if did_all_timeout and _matched_count > 0 and _matched_count <= 3:
+        # Log a few sample matched IDs for debugging
+        _sample_ids = []
+        for test_id in test_ids_superset:
+            orig = original_results.get_by_unique_invocation_loop_id(test_id)
+            cand = candidate_results.get_by_unique_invocation_loop_id(test_id)
+            if orig is not None and cand is not None:
+                _sample_ids.append(f"  id={test_id} orig_timed_out={orig.timed_out} orig_pass={orig.did_pass}")
+                if len(_sample_ids) >= 3:
+                    break
+        logger.info(f"[compare_test_results] sample matched: {_sample_ids}")
     if did_all_timeout:
         return False, test_diffs
     return len(test_diffs) == 0, test_diffs

From 986654b7e67d4b004af3cc98328251383cefd19a Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 06:38:08 -0500
Subject: [PATCH 11/23] fix: pin PYTHONHASHSEED=0 in test env and enhance diff
 diagnostics

Set PYTHONHASHSEED=0 in test subprocess environments so original and
candidate runs use identical hash behavior, eliminating a source of
non-deterministic return-value comparisons.

Also upgrade diff logging from debug to info level with actual types
and repr values for DID_PASS, RETURN_VALUE, and STDOUT diffs.
---
 codeflash/languages/function_optimizer.py |  5 +++++
 codeflash/verification/equivalence.py     | 26 +++++++++++++++++------
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/codeflash/languages/function_optimizer.py b/codeflash/languages/function_optimizer.py
index d9b4918fd..9c42070ef 100644
--- a/codeflash/languages/function_optimizer.py
+++ b/codeflash/languages/function_optimizer.py
@@ -3253,6 +3253,11 @@ def get_test_env(
         test_env["CODEFLASH_TEST_ITERATION"] = str(codeflash_test_iteration)
         test_env["CODEFLASH_TRACER_DISABLE"] = str(codeflash_tracer_disable)
         test_env["CODEFLASH_LOOP_INDEX"] = str(codeflash_loop_index)
+        # Pin PYTHONHASHSEED so original and candidate test processes use the same hash seed.
+        # Without this, each subprocess gets a random seed, which can cause non-deterministic
+        # iteration order in sets/dicts and lead to flaky return-value comparisons.
+        if "PYTHONHASHSEED" not in test_env:
+            test_env["PYTHONHASHSEED"] = "0"
         return test_env
 
     def line_profiler_step(
diff --git a/codeflash/verification/equivalence.py b/codeflash/verification/equivalence.py
index 630cec8b6..68cf216de 100644
--- a/codeflash/verification/equivalence.py
+++ b/codeflash/verification/equivalence.py
@@ -111,6 +111,11 @@ def compare_test_results(
                     original_pytest_error=original_pytest_error,
                 )
             )
+            logger.info(
+                f"[DIFF] scope=DID_PASS test_id={test_id} "
+                f"orig_pass={original_test_result.did_pass} cand_pass={cdd_test_result.did_pass} "
+                f"test_type={original_test_result.test_type} cand_error={cdd_pytest_error[:200] if cdd_pytest_error else 'none'}"
+            )
 
         elif not pass_fail_only and not comparator(
             original_test_result.return_value, cdd_test_result.return_value, superset_obj=superset_obj
@@ -129,13 +134,15 @@ def compare_test_results(
             )
 
             try:
-                logger.debug(
-                    f"File Name: {original_test_result.file_name}\n"
-                    f"Test Type: {original_test_result.test_type}\n"
-                    f"Verification Type: {original_test_result.verification_type}\n"
-                    f"Invocation ID: {original_test_result.id}\n"
-                    f"Original return value: {original_test_result.return_value}\n"
-                    f"Candidate return value: {cdd_test_result.return_value}\n"
+                _orig_rv = original_test_result.return_value
+                _cand_rv = cdd_test_result.return_value
+                logger.info(
+                    f"[DIFF] scope=RETURN_VALUE test_id={test_id} "
+                    f"orig_type={type(_orig_rv).__name__} cand_type={type(_cand_rv).__name__} "
+                    f"orig_pass={original_test_result.did_pass} cand_pass={cdd_test_result.did_pass} "
+                    f"test_type={original_test_result.test_type} "
+                    f"orig_repr={safe_repr(_orig_rv)[:200]} "
+                    f"cand_repr={safe_repr(_cand_rv)[:200]}"
                 )
             except Exception as e:
                 logger.error(e)
@@ -156,6 +163,11 @@ def compare_test_results(
                     original_pytest_error=original_pytest_error,
                 )
             )
+            logger.info(
+                f"[DIFF] scope=STDOUT test_id={test_id} "
+                f"orig_stdout={str(original_test_result.stdout)[:200]} "
+                f"cand_stdout={str(cdd_test_result.stdout)[:200]}"
+            )
 
     sys.setrecursionlimit(original_recursion_limit)
     logger.info(

From 82ec301fad6f78921dee84b9a98fcd22e76427c7 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 06:49:43 -0500
Subject: [PATCH 12/23] chore: remove diagnostic logging from
 compare_test_results

---
 codeflash/verification/equivalence.py | 53 ++++-----------------------
 1 file changed, 7 insertions(+), 46 deletions(-)

diff --git a/codeflash/verification/equivalence.py b/codeflash/verification/equivalence.py
index 68cf216de..f660e35ea 100644
--- a/codeflash/verification/equivalence.py
+++ b/codeflash/verification/equivalence.py
@@ -41,17 +41,11 @@ def compare_test_results(
     )
     test_diffs: list[TestDiff] = []
     did_all_timeout: bool = True
-    _matched_count = 0
-    _skipped_cdd_only = 0
-    _skipped_init_state = 0
-    _skipped_none = 0
-    _timed_out_count = 0
     for test_id in test_ids_superset:
         original_test_result = original_results.get_by_unique_invocation_loop_id(test_id)
         cdd_test_result = candidate_results.get_by_unique_invocation_loop_id(test_id)
 
         if cdd_test_result is not None and original_test_result is None:
-            _skipped_cdd_only += 1
             continue
         # If helper function instance_state verification is not present, that's ok. continue
         if (
@@ -59,15 +53,11 @@ def compare_test_results(
             and original_test_result.verification_type == VerificationType.INIT_STATE_HELPER
             and cdd_test_result is None
         ):
-            _skipped_init_state += 1
             continue
         if original_test_result is None or cdd_test_result is None:
-            _skipped_none += 1
             continue
-        _matched_count += 1
         did_all_timeout = did_all_timeout and original_test_result.timed_out
         if original_test_result.timed_out:
-            _timed_out_count += 1
             continue
         superset_obj = False
         if original_test_result.verification_type and (
@@ -111,11 +101,6 @@ def compare_test_results(
                     original_pytest_error=original_pytest_error,
                 )
             )
-            logger.info(
-                f"[DIFF] scope=DID_PASS test_id={test_id} "
-                f"orig_pass={original_test_result.did_pass} cand_pass={cdd_test_result.did_pass} "
-                f"test_type={original_test_result.test_type} cand_error={cdd_pytest_error[:200] if cdd_pytest_error else 'none'}"
-            )
 
         elif not pass_fail_only and not comparator(
             original_test_result.return_value, cdd_test_result.return_value, superset_obj=superset_obj
@@ -134,15 +119,13 @@ def compare_test_results(
             )
 
             try:
-                _orig_rv = original_test_result.return_value
-                _cand_rv = cdd_test_result.return_value
-                logger.info(
-                    f"[DIFF] scope=RETURN_VALUE test_id={test_id} "
-                    f"orig_type={type(_orig_rv).__name__} cand_type={type(_cand_rv).__name__} "
-                    f"orig_pass={original_test_result.did_pass} cand_pass={cdd_test_result.did_pass} "
-                    f"test_type={original_test_result.test_type} "
-                    f"orig_repr={safe_repr(_orig_rv)[:200]} "
-                    f"cand_repr={safe_repr(_cand_rv)[:200]}"
+                logger.debug(
+                    f"File Name: {original_test_result.file_name}\n"
+                    f"Test Type: {original_test_result.test_type}\n"
+                    f"Verification Type: {original_test_result.verification_type}\n"
+                    f"Invocation ID: {original_test_result.id}\n"
+                    f"Original return value: {original_test_result.return_value}\n"
+                    f"Candidate return value: {cdd_test_result.return_value}\n"
                 )
             except Exception as e:
                 logger.error(e)
@@ -163,30 +146,8 @@ def compare_test_results(
                     original_pytest_error=original_pytest_error,
                 )
             )
-            logger.info(
-                f"[DIFF] scope=STDOUT test_id={test_id} "
-                f"orig_stdout={str(original_test_result.stdout)[:200]} "
-                f"cand_stdout={str(cdd_test_result.stdout)[:200]}"
-            )
 
     sys.setrecursionlimit(original_recursion_limit)
-    logger.info(
-        f"[compare_test_results] superset={len(test_ids_superset)} matched={_matched_count} "
-        f"skipped(cdd_only={_skipped_cdd_only} init_state={_skipped_init_state} none={_skipped_none}) "
-        f"timed_out={_timed_out_count} did_all_timeout={did_all_timeout} diffs={len(test_diffs)} "
-        f"pass_fail_only={pass_fail_only} orig_len={len(original_results)} cand_len={len(candidate_results)}"
-    )
-    if did_all_timeout and _matched_count > 0 and _matched_count <= 3:
-        # Log a few sample matched IDs for debugging
-        _sample_ids = []
-        for test_id in test_ids_superset:
-            orig = original_results.get_by_unique_invocation_loop_id(test_id)
-            cand = candidate_results.get_by_unique_invocation_loop_id(test_id)
-            if orig is not None and cand is not None:
-                _sample_ids.append(f"  id={test_id} orig_timed_out={orig.timed_out} orig_pass={orig.did_pass}")
-                if len(_sample_ids) >= 3:
-                    break
-        logger.info(f"[compare_test_results] sample matched: {_sample_ids}")
     if did_all_timeout:
         return False, test_diffs
     return len(test_diffs) == 0, test_diffs

From 70260f22b351474110e5a6fa46b36f545c8adfa7 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 07:39:49 -0500
Subject: [PATCH 13/23] fix: ensure language_version is detected before
 optimization API calls

JavaSupport.ensure_runtime_environment() was never called during the
optimization flow, so _language_version stayed None and the backend
received language_version=null. The LLM had no Java version constraint,
causing it to generate Java 16+ APIs (e.g. Stream.toList()) for Java 11
projects.
---
 codeflash/languages/function_optimizer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/codeflash/languages/function_optimizer.py b/codeflash/languages/function_optimizer.py
index 9c42070ef..71ad03b18 100644
--- a/codeflash/languages/function_optimizer.py
+++ b/codeflash/languages/function_optimizer.py
@@ -489,6 +489,7 @@ def __init__(
             else function_to_optimize.file_path.read_text(encoding="utf8")
         )
         self.language_support = current_language_support()
+        self.language_support.ensure_runtime_environment(self.project_root)
         if not function_to_optimize_ast:
             self.function_to_optimize_ast = self._resolve_function_ast(
                 self.function_to_optimize_source_code, function_to_optimize.function_name, function_to_optimize.parents

From b05561ef9ecbcb55a46f6256be0eec2c6c198484 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 07:51:08 -0500
Subject: [PATCH 14/23] chore: replace console.print with logger.info for Java
 project detection

---
 codeflash/tracer.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/codeflash/tracer.py b/codeflash/tracer.py
index 48920be8c..4a7d24585 100644
--- a/codeflash/tracer.py
+++ b/codeflash/tracer.py
@@ -349,10 +349,10 @@ def _run_java_tracer(existing_args: Namespace | None = None) -> ArgumentParser:
     max_function_count = getattr(config, "max_function_count", 256)
     timeout = int(getattr(config, "timeout", None) or getattr(config, "tracer_timeout", 0) or 0)
 
-    console.print("[bold]Java project detected[/]")
-    console.print(f"  Project root: {project_root}")
-    console.print(f"  Module root:  {getattr(config, 'module_root', '?')}")
-    console.print(f"  Tests root:   {getattr(config, 'tests_root', '?')}")
+    logger.info("Java project detected")
+    logger.info("  Project root: %s", project_root)
+    logger.info("  Module root:  %s", getattr(config, "module_root", "?"))
+    logger.info("  Tests root:   %s", getattr(config, "tests_root", "?"))
 
     from codeflash.code_utils.code_utils import get_run_tmp_file
     from codeflash.languages.java.tracer import JavaTracer, run_java_tracer

From 151df774a4ca2764b2bad28475e61413614402fe Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 08:29:46 -0500
Subject: [PATCH 15/23] perf: use --effort low for java-tracer E2E to reduce CI
 time

---
 tests/scripts/end_to_end_test_java_tracer.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/scripts/end_to_end_test_java_tracer.py b/tests/scripts/end_to_end_test_java_tracer.py
index 0f9f8a2ff..5d92662ec 100644
--- a/tests/scripts/end_to_end_test_java_tracer.py
+++ b/tests/scripts/end_to_end_test_java_tracer.py
@@ -51,6 +51,8 @@ def run_test(expected_improvement_pct: int) -> bool:
         "-m",
         "codeflash.main",
         "--no-pr",
+        "--effort",
+        "low",
         "optimize",
         "java",
         "-cp",

From ecf4e63eca4032217ea2db4eafd027ef8f0f66e1 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 09:02:45 -0500
Subject: [PATCH 16/23] perf: reduce Java E2E looping time to 5s and cache
 runtime JAR build

Make TOTAL_LOOPING_TIME configurable via CODEFLASH_LOOPING_TIME env var
(defaults to 10s). Set to 5s in Java E2E CI jobs to cut verification
time per candidate. Also cache the codeflash-runtime JAR keyed on
source hash to skip mvn install when unchanged.
---
 .github/workflows/ci.yaml             | 9 +++++++++
 codeflash/code_utils/config_consts.py | 3 ++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 368459608..e6e55298b 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -436,6 +436,7 @@ jobs:
       RETRY_DELAY: 5
       EXPECTED_IMPROVEMENT_PCT: ${{ matrix.expected_improvement }}
       CODEFLASH_END_TO_END: 1
+      CODEFLASH_LOOPING_TIME: 5
     steps:
       - uses: actions/checkout@v6
         with:
@@ -469,7 +470,15 @@ jobs:
       - name: Install dependencies
         run: uv sync
 
+      - name: Cache codeflash-runtime JAR
+        id: runtime-jar-cache
+        uses: actions/cache@v4
+        with:
+          path: ~/.m2/repository/io/codeflash
+          key: codeflash-runtime-${{ hashFiles('codeflash-java-runtime/pom.xml', 'codeflash-java-runtime/src/**') }}
+
       - name: Build and install codeflash-runtime JAR
+        if: steps.runtime-jar-cache.outputs.cache-hit != 'true'
         run: |
           cd codeflash-java-runtime
           mvn install -q -DskipTests
diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py
index ff6494d73..c8cb8d884 100644
--- a/codeflash/code_utils/config_consts.py
+++ b/codeflash/code_utils/config_consts.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import os
 from enum import Enum
 from typing import Any, Union
 
@@ -17,7 +18,7 @@
 CONCURRENCY_FACTOR = 10  # Number of concurrent executions for concurrency benchmark
 MAX_TEST_FUNCTION_RUNS = 50
 MAX_CUMULATIVE_TEST_RUNTIME_NANOSECONDS = 100e6  # 100ms
-TOTAL_LOOPING_TIME = 10.0  # 10 second candidate benchmarking budget
+TOTAL_LOOPING_TIME = float(os.getenv("CODEFLASH_LOOPING_TIME", "10.0"))  # candidate benchmarking budget (seconds)
 COVERAGE_THRESHOLD = 60.0
 MIN_TESTCASE_PASSED_THRESHOLD = 6
 REPEAT_OPTIMIZATION_PROBABILITY = 0.1

From 0d928f2b49c7dccf18e3f28e43fd5d4616c7bb99 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 09:05:30 -0500
Subject: [PATCH 17/23] perf: merge Java tracer into single-pass JVM invocation

Combine JFR profiling and argument capture agent into one
JAVA_TOOL_OPTIONS string, running the target program once instead of
twice. JFR and javaagent are orthogonal JVM features that coexist
without conflict. Keeps build_jfr_env/build_agent_env for standalone
use.
---
 codeflash/languages/java/tracer.py | 38 +++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/codeflash/languages/java/tracer.py b/codeflash/languages/java/tracer.py
index b971e5526..bdeec34e0 100644
--- a/codeflash/languages/java/tracer.py
+++ b/codeflash/languages/java/tracer.py
@@ -61,7 +61,7 @@ def _run_java_with_graceful_timeout(
 
 
 class JavaTracer:
-    """Orchestrates two-stage Java tracing: JFR profiling + argument capture."""
+    """Orchestrates Java tracing: combined JFR profiling + argument capture in a single JVM invocation."""
 
     def trace(
         self,
@@ -72,29 +72,23 @@ def trace(
         max_function_count: int = 256,
         timeout: int = 0,
     ) -> tuple[Path, Path]:
-        """Run the Java program twice: once for profiling, once for arg capture.
+        """Run the Java program once with both JFR profiling and argument capture.
 
         Returns (trace_db_path, jfr_file_path).
         """
         jfr_file = trace_db_path.with_suffix(".jfr")
         trace_db_path.parent.mkdir(parents=True, exist_ok=True)
 
-        # Stage 1: JFR Profiling
-        logger.info("Stage 1: Running JFR profiling...")
-        jfr_env = self.build_jfr_env(jfr_file)
-        _run_java_with_graceful_timeout(java_command, jfr_env, timeout, "JFR profiling")
-
-        if not jfr_file.exists():
-            logger.warning("JFR file was not created at %s", jfr_file)
-
-        # Stage 2: Argument Capture via Tracing Agent
-        logger.info("Stage 2: Running argument capture...")
         config_path = self.create_tracer_config(
             trace_db_path, packages, project_root=project_root, max_function_count=max_function_count, timeout=timeout
         )
-        agent_env = self.build_agent_env(config_path)
-        _run_java_with_graceful_timeout(java_command, agent_env, timeout, "Argument capture")
+        combined_env = self.build_combined_env(jfr_file, config_path)
+
+        logger.info("Running combined JFR profiling + argument capture...")
+        _run_java_with_graceful_timeout(java_command, combined_env, timeout, "Combined tracing")
 
+        if not jfr_file.exists():
+            logger.warning("JFR file was not created at %s", jfr_file)
         if not trace_db_path.exists():
             logger.error("Trace database was not created at %s", trace_db_path)
 
@@ -141,6 +135,22 @@ def build_agent_env(self, config_path: Path, classpath: str | None = None) -> di
         env["JAVA_TOOL_OPTIONS"] = f"{existing} {agent_opts}".strip()
         return env
 
+    def build_combined_env(self, jfr_file: Path, config_path: Path, classpath: str | None = None) -> dict[str, str]:
+        """Build env with both JFR recording and tracing agent in a single JAVA_TOOL_OPTIONS."""
+        env = os.environ.copy()
+        jfr_opts = (
+            f"-XX:StartFlightRecording=filename={jfr_file.resolve()},settings=profile,dumponexit=true"
+            ",jdk.ExecutionSample#period=1ms"
+        )
+        agent_jar = find_agent_jar(classpath=classpath)
+        if agent_jar is None:
+            msg = "codeflash-runtime JAR not found, cannot run tracing agent"
+            raise FileNotFoundError(msg)
+        agent_opts = f"{ADD_OPENS_FLAGS} -javaagent:{agent_jar}=trace={config_path.resolve()}"
+        existing = env.get("JAVA_TOOL_OPTIONS", "")
+        env["JAVA_TOOL_OPTIONS"] = f"{existing} {jfr_opts} {agent_opts}".strip()
+        return env
+
     @staticmethod
     def detect_packages_from_source(module_root: Path) -> list[str]:
         """Scan Java files for package declarations and return unique package prefixes."""

From 013c83f5e49659d8232a3cac512516b6fad14919 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 09:11:02 -0500
Subject: [PATCH 18/23] fix: drop jdk.ExecutionSample#period from combined JFR
 opts (unsupported on Java 11)

---
 codeflash/languages/java/tracer.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/codeflash/languages/java/tracer.py b/codeflash/languages/java/tracer.py
index bdeec34e0..649369d97 100644
--- a/codeflash/languages/java/tracer.py
+++ b/codeflash/languages/java/tracer.py
@@ -138,10 +138,7 @@ def build_agent_env(self, config_path: Path, classpath: str | None = None) -> di
     def build_combined_env(self, jfr_file: Path, config_path: Path, classpath: str | None = None) -> dict[str, str]:
         """Build env with both JFR recording and tracing agent in a single JAVA_TOOL_OPTIONS."""
         env = os.environ.copy()
-        jfr_opts = (
-            f"-XX:StartFlightRecording=filename={jfr_file.resolve()},settings=profile,dumponexit=true"
-            ",jdk.ExecutionSample#period=1ms"
-        )
+        jfr_opts = f"-XX:StartFlightRecording=filename={jfr_file.resolve()},settings=profile,dumponexit=true"
         agent_jar = find_agent_jar(classpath=classpath)
         if agent_jar is None:
             msg = "codeflash-runtime JAR not found, cannot run tracing agent"

From cb87763a2d508b14bd9444ed570fbc63450d5b41 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 12:58:54 -0500
Subject: [PATCH 19/23] fix: skip environment approval gate for trusted users
 on workflow_dispatch

---
 .github/workflows/ci.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index e6e55298b..3b5b1c74c 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -258,7 +258,7 @@ jobs:
           - name: init-optimization
             script: end_to_end_test_init_optimization.py
             expected_improvement: 10
-    environment: ${{ (github.event_name == 'workflow_dispatch' || (contains(toJSON(github.event.pull_request.files.*.filename), '.github/workflows/') && github.event.pull_request.user.login != 'misrasaurabh1' && github.event.pull_request.user.login != 'KRRT7')) && 'external-trusted-contributors' || '' }}
+    environment: ${{ ((github.event_name == 'workflow_dispatch' && github.actor != 'misrasaurabh1' && github.actor != 'KRRT7') || (contains(toJSON(github.event.pull_request.files.*.filename), '.github/workflows/') && github.event.pull_request.user.login != 'misrasaurabh1' && github.event.pull_request.user.login != 'KRRT7')) && 'external-trusted-contributors' || '' }}
     runs-on: ubuntu-latest
     env:
       CODEFLASH_AIS_SERVER: prod
@@ -345,7 +345,7 @@ jobs:
             script: end_to_end_test_js_ts_class.py
             js_project_dir: code_to_optimize/js/code_to_optimize_ts
             expected_improvement: 30
-    environment: ${{ (github.event_name == 'workflow_dispatch' || (contains(toJSON(github.event.pull_request.files.*.filename), '.github/workflows/') && github.event.pull_request.user.login != 'misrasaurabh1' && github.event.pull_request.user.login != 'KRRT7')) && 'external-trusted-contributors' || '' }}
+    environment: ${{ ((github.event_name == 'workflow_dispatch' && github.actor != 'misrasaurabh1' && github.actor != 'KRRT7') || (contains(toJSON(github.event.pull_request.files.*.filename), '.github/workflows/') && github.event.pull_request.user.login != 'misrasaurabh1' && github.event.pull_request.user.login != 'KRRT7')) && 'external-trusted-contributors' || '' }}
     runs-on: ubuntu-latest
     env:
       CODEFLASH_AIS_SERVER: prod
@@ -425,7 +425,7 @@ jobs:
             script: end_to_end_test_java_void_optimization.py
             expected_improvement: 70
             remove_git: true
-    environment: ${{ (github.event_name == 'workflow_dispatch' || (contains(toJSON(github.event.pull_request.files.*.filename), '.github/workflows/') && github.event.pull_request.user.login != 'misrasaurabh1' && github.event.pull_request.user.login != 'KRRT7')) && 'external-trusted-contributors' || '' }}
+    environment: ${{ ((github.event_name == 'workflow_dispatch' && github.actor != 'misrasaurabh1' && github.actor != 'KRRT7') || (contains(toJSON(github.event.pull_request.files.*.filename), '.github/workflows/') && github.event.pull_request.user.login != 'misrasaurabh1' && github.event.pull_request.user.login != 'KRRT7')) && 'external-trusted-contributors' || '' }}
     runs-on: ubuntu-latest
     env:
       CODEFLASH_AIS_SERVER: prod

From 40f16b565ab768578e53ae6e6be5d536312a0237 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 13:09:36 -0500
Subject: [PATCH 20/23] ci: add standalone Java E2E workflow for isolated
 testing

---
 .github/workflows/java-e2e.yaml | 77 +++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 .github/workflows/java-e2e.yaml

diff --git a/.github/workflows/java-e2e.yaml b/.github/workflows/java-e2e.yaml
new file mode 100644
index 000000000..0bfc979b6
--- /dev/null
+++ b/.github/workflows/java-e2e.yaml
@@ -0,0 +1,77 @@
+name: Java E2E Tests
+on:
+  workflow_dispatch:
+
+jobs:
+  e2e-java:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: java-fibonacci-nogit
+            script: end_to_end_test_java_fibonacci.py
+            expected_improvement: 70
+            remove_git: true
+          - name: java-tracer
+            script: end_to_end_test_java_tracer.py
+            expected_improvement: 10
+          - name: java-void-optimization-nogit
+            script: end_to_end_test_java_void_optimization.py
+            expected_improvement: 70
+            remove_git: true
+    runs-on: ubuntu-latest
+    env:
+      CODEFLASH_AIS_SERVER: prod
+      POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
+      CODEFLASH_API_KEY: ${{ secrets.CODEFLASH_API_KEY }}
+      COLUMNS: 110
+      MAX_RETRIES: 3
+      RETRY_DELAY: 5
+      EXPECTED_IMPROVEMENT_PCT: ${{ matrix.expected_improvement }}
+      CODEFLASH_END_TO_END: 1
+      CODEFLASH_LOOPING_TIME: 5
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Set up JDK 11
+        uses: actions/setup-java@v5
+        with:
+          java-version: '11'
+          distribution: 'temurin'
+          cache: maven
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v8.0.0
+        with:
+          python-version: 3.11.6
+          enable-cache: true
+
+      - name: Install dependencies
+        run: uv sync
+
+      - name: Cache codeflash-runtime JAR
+        id: runtime-jar-cache
+        uses: actions/cache@v4
+        with:
+          path: ~/.m2/repository/io/codeflash
+          key: codeflash-runtime-${{ hashFiles('codeflash-java-runtime/pom.xml', 'codeflash-java-runtime/src/**') }}
+
+      - name: Build and install codeflash-runtime JAR
+        if: steps.runtime-jar-cache.outputs.cache-hit != 'true'
+        run: |
+          cd codeflash-java-runtime
+          mvn install -q -DskipTests
+
+      - name: Remove .git
+        if: matrix.remove_git
+        run: |
+          if [ -d ".git" ]; then
+            sudo rm -rf .git
+            echo ".git directory removed."
+          else
+            echo ".git directory does not exist."
+            exit 1
+          fi
+
+      - name: Run E2E test
+        run: uv run python tests/scripts/${{ matrix.script }}

From 5c778dfad40d6473024ec6824b5d2f7d5a634887 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 15:08:03 -0500
Subject: [PATCH 21/23] perf: trim tracer E2E workload to single function
 (repeatString)

Keep only repeatString which reliably produces 284% improvement.
Drop computeSum (marginal 16%), filterEvens and instanceMethod (no
optimization found). Reduces tracer E2E from ~1h27m to ~21m.
---
 .../src/main/java/com/example/Workload.java   | 44 +------------------
 1 file changed, 1 insertion(+), 43 deletions(-)

diff --git a/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java
index 7beb2a4ea..7dfdad95f 100644
--- a/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java
+++ b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java
@@ -1,18 +1,7 @@
 package com.example;
 
-import java.util.ArrayList;
-import java.util.List;
-
 public class Workload {
 
-    public static int computeSum(int n) {
-        int sum = 0;
-        for (int i = 0; i < n; i++) {
-            sum += i;
-        }
-        return sum;
-    }
-
     public static String repeatString(String s, int count) {
         String result = "";
         for (int i = 0; i < count; i++) {
@@ -21,46 +10,15 @@ public static String repeatString(String s, int count) {
         return result;
     }
 
-    public static List<Integer> filterEvens(List<Integer> numbers) {
-        List<Integer> result = new ArrayList<>();
-        for (int n : numbers) {
-            if (n % 2 == 0) {
-                result.add(n);
-            }
-        }
-        return result;
-    }
-
-    public int instanceMethod(int x, int y) {
-        return x * y + computeSum(x);
-    }
-
     public static void main(String[] args) {
-        // Run methods with large inputs so JFR can capture CPU samples.
-        // Small inputs finish too fast (<1ms) for JFR's 10ms sampling interval.
+        // Run with large inputs so JFR can capture CPU samples.
         for (int round = 0; round < 1000; round++) {
-            computeSum(100_000);
             repeatString("hello world ", 1000);
-
-            List<Integer> nums = new ArrayList<>();
-            for (int i = 1; i <= 10_000; i++) nums.add(i);
-            filterEvens(nums);
-
-            Workload w = new Workload();
-            w.instanceMethod(100_000, 42);
         }
 
         // Also call with small inputs for variety in traced args
-        System.out.println("computeSum(100) = " + computeSum(100));
         System.out.println("repeatString(\"ab\", 3) = " + repeatString("ab", 3));
 
-        List<Integer> small = new ArrayList<>();
-        for (int i = 1; i <= 10; i++) small.add(i);
-        System.out.println("filterEvens(1..10) = " + filterEvens(small));
-
-        Workload w = new Workload();
-        System.out.println("instanceMethod(5, 3) = " + w.instanceMethod(5, 3));
-
         System.out.println("Workload complete.");
     }
 }

From 0cb67c1a17af4b846f3c7810179046206c8f14b4 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 15:12:48 -0500
Subject: [PATCH 22/23] fix: add --no-pr to codeflash optimize workflow to
 prevent CI-opened PRs

---
 .github/workflows/codeflash-optimize.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/codeflash-optimize.yaml b/.github/workflows/codeflash-optimize.yaml
index 9884665da..ab08aa1f8 100644
--- a/.github/workflows/codeflash-optimize.yaml
+++ b/.github/workflows/codeflash-optimize.yaml
@@ -43,4 +43,4 @@ jobs:
       - name: ⚡️Codeflash Optimization
         id: optimize_code
         run: |
-          uv run codeflash --benchmark --testgen-review
\ No newline at end of file
+          uv run codeflash --benchmark --testgen-review --no-pr
\ No newline at end of file

From b737f71e46a5c21f518ecb11f66532d3bbb5766a Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Apr 2026 16:05:27 -0500
Subject: [PATCH 23/23] fix: update test assertions to match simplified
 Workload fixture

The Workload.java fixture was trimmed to only repeatString but test
files still asserted computeSum, filterEvens, and instanceMethod.
---
 .../test_java/test_java_tracer_e2e.py         | 23 ++++++++-----------
 .../test_java/test_java_tracer_integration.py |  8 +++----
 2 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/tests/test_languages/test_java/test_java_tracer_e2e.py b/tests/test_languages/test_java/test_java_tracer_e2e.py
index 157f23eb6..f16f19aa2 100644
--- a/tests/test_languages/test_java/test_java_tracer_e2e.py
+++ b/tests/test_languages/test_java/test_java_tracer_e2e.py
@@ -81,14 +81,11 @@ def test_agent_captures_invocations(self, compiled_workload: Path, trace_db: Pat
         conn = sqlite3.connect(str(trace_db))
         try:
             rows = conn.execute("SELECT function, classname, descriptor, length(args) FROM function_calls").fetchall()
-            assert len(rows) >= 5, f"Expected at least 5 captured invocations, got {len(rows)}"
+            assert len(rows) >= 2, f"Expected at least 2 captured invocations, got {len(rows)}"
 
             # Check that specific methods were captured
             functions = {row[0] for row in rows}
-            assert "computeSum" in functions
             assert "repeatString" in functions
-            assert "filterEvens" in functions
-            assert "instanceMethod" in functions
 
             # Verify all rows have non-empty args blobs
             for row in rows:
@@ -97,7 +94,7 @@ def test_agent_captures_invocations(self, compiled_workload: Path, trace_db: Pat
             # Verify metadata
             metadata = dict(conn.execute("SELECT key, value FROM metadata").fetchall())
             assert "totalCaptures" in metadata
-            assert int(metadata["totalCaptures"]) >= 5
+            assert int(metadata["totalCaptures"]) >= 2
         finally:
             conn.close()
 
@@ -136,11 +133,11 @@ def test_max_function_count_limit(self, compiled_workload: Path, trace_db: Path)
 
         conn = sqlite3.connect(str(trace_db))
         try:
-            # computeSum is called 4 times (2 direct + 2 from instanceMethod)
-            compute_count = conn.execute(
-                "SELECT COUNT(*) FROM function_calls WHERE function = 'computeSum'"
+            # repeatString is called 1000+ times; with maxFunctionCount=2, at most 2 should be captured
+            repeat_count = conn.execute(
+                "SELECT COUNT(*) FROM function_calls WHERE function = 'repeatString'"
             ).fetchone()[0]
-            assert compute_count <= 2, f"Expected at most 2 computeSum captures, got {compute_count}"
+            assert repeat_count <= 2, f"Expected at most 2 repeatString captures, got {repeat_count}"
         finally:
             conn.close()
 
@@ -198,7 +195,6 @@ def test_generates_test_files(self, compiled_workload: Path, trace_db: Path, tmp
         assert "package codeflash.replay;" in content
         assert "import org.junit.jupiter.api.Test;" in content
         assert "ReplayHelper" in content
-        assert "replay_computeSum_0" in content
         assert "replay_repeatString_0" in content
 
     def test_metadata_parsing(self, compiled_workload: Path, trace_db: Path, tmp_path: Path) -> None:
@@ -243,7 +239,7 @@ def test_metadata_parsing(self, compiled_workload: Path, trace_db: Path, tmp_pat
         assert "functions" in metadata
         assert "trace_file" in metadata
         assert "classname" in metadata
-        assert "computeSum" in metadata["functions"]
+        assert "repeatString" in metadata["functions"]
         assert metadata["classname"] == "com.example.Workload"
         assert metadata["trace_file"] == trace_db.as_posix()
 
@@ -267,7 +263,7 @@ def test_two_stage_trace(self, compiled_workload: Path, tmp_path: Path) -> None:
         conn = sqlite3.connect(str(trace_db))
         try:
             count = conn.execute("SELECT COUNT(*) FROM function_calls").fetchone()[0]
-            assert count >= 5, f"Expected at least 5 captured invocations, got {count}"
+            assert count >= 2, f"Expected at least 2 captured invocations, got {count}"
         finally:
             conn.close()
 
@@ -295,8 +291,7 @@ def test_full_trace_and_replay_generation(self, compiled_workload: Path, tmp_pat
         workload_files = [f for f in test_files if "Workload" in f.name and "ConstructorAccess" not in f.name]
         assert len(workload_files) == 1
         content = workload_files[0].read_text(encoding="utf-8")
-        assert "replay_computeSum" in content
-        assert "replay_instanceMethod" in content
+        assert "replay_repeatString" in content
 
     def test_package_detection(self) -> None:
         """Test that package detection finds Java packages from source files."""
diff --git a/tests/test_languages/test_java/test_java_tracer_integration.py b/tests/test_languages/test_java/test_java_tracer_integration.py
index f6ffefdf2..6927faba4 100644
--- a/tests/test_languages/test_java/test_java_tracer_integration.py
+++ b/tests/test_languages/test_java/test_java_tracer_integration.py
@@ -87,7 +87,6 @@ def test_discover_functions_from_replay_tests(self, traced_workload: tuple) -> N
                 assert func.language == "java", f"Expected language='java', got '{func.language}'"
                 assert func.file_path == file_path
 
-        assert "computeSum" in all_func_names
         assert "repeatString" in all_func_names
 
     def test_discover_tests_for_replay_tests(self, traced_workload: tuple) -> None:
@@ -111,7 +110,6 @@ def test_discover_tests_for_replay_tests(self, traced_workload: tuple) -> None:
             func_name = qualified_name.split(".")[-1] if "." in qualified_name else qualified_name
             matched_func_names.add(func_name)
 
-        assert "computeSum" in matched_func_names, f"computeSum not found in: {result.keys()}"
         assert "repeatString" in matched_func_names, f"repeatString not found in: {result.keys()}"
 
         # Each function should have at least one test
@@ -222,8 +220,8 @@ def test_full_pipeline(self, compiled_workload: Path, tmp_path: Path) -> None:
             assert len(function_to_tests) > 0, "No function-to-test mappings"
 
             # Verify function_to_tests has entries for our traced functions
-            has_compute_sum = any("computeSum" in key for key in function_to_tests)
-            assert has_compute_sum, f"computeSum not in function_to_tests keys: {list(function_to_tests.keys())}"
+            has_repeat_string = any("repeatString" in key for key in function_to_tests)
+            assert has_repeat_string, f"repeatString not in function_to_tests keys: {list(function_to_tests.keys())}"
 
             # Step 4: Rank functions (like optimizer.rank_all_functions_globally)
             if jfr_file.exists():
@@ -280,7 +278,7 @@ def test_instrument_and_compile_replay_tests(self, compiled_workload: Path, tmp_
             source_code = WORKLOAD_SOURCE.read_text(encoding="utf-8")
             source_functions = discover_functions_from_source(source_code, file_path=WORKLOAD_SOURCE)
             # Pick the first function with a return type for instrumentation
-            target_func = next(f for f in source_functions if f.function_name == "computeSum")
+            target_func = next(f for f in source_functions if f.function_name == "repeatString")
 
             replay_test_file = replay_test_paths[0]
             test_source = replay_test_file.read_text(encoding="utf-8")