From 2b0f633c0fc936b3338f21c8228648eabe03884e Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 03:04:29 -0500 Subject: [PATCH 01/23] perf: reduce java-tracer E2E from ~75 min to ~15 min MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove filterEvens and instanceMethod from the Workload fixture (4→2 functions) and reduce main() loop from 1000→100 rounds. The E2E test only needs to verify the tracer→optimizer pipeline works end-to-end; it doesn't need 4 functions or 1604 replay tests to prove that. Expected impact: ~2 functions × ~8 candidates × fewer replay tests should bring the job from ~75 min down to ~10-15 min. --- .../src/main/java/com/example/Workload.java | 34 ++----------------- 1 file changed, 2 insertions(+), 32 deletions(-) diff --git a/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java index 7beb2a4ea..7c46668d5 100644 --- a/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java +++ b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java @@ -1,8 +1,5 @@ package com.example; -import java.util.ArrayList; -import java.util.List; - public class Workload { public static int computeSum(int n) { @@ -21,46 +18,19 @@ public static String repeatString(String s, int count) { return result; } - public static List filterEvens(List numbers) { - List result = new ArrayList<>(); - for (int n : numbers) { - if (n % 2 == 0) { - result.add(n); - } - } - return result; - } - - public int instanceMethod(int x, int y) { - return x * y + computeSum(x); - } - public static void main(String[] args) { // Run methods with large inputs so JFR can capture CPU samples. // Small inputs finish too fast (<1ms) for JFR's 10ms sampling interval. - for (int round = 0; round < 1000; round++) { + // 100 rounds is enough for JFR to collect ~10 samples per function. + for (int round = 0; round < 100; round++) { computeSum(100_000); repeatString("hello world ", 1000); - - List nums = new ArrayList<>(); - for (int i = 1; i <= 10_000; i++) nums.add(i); - filterEvens(nums); - - Workload w = new Workload(); - w.instanceMethod(100_000, 42); } // Also call with small inputs for variety in traced args System.out.println("computeSum(100) = " + computeSum(100)); System.out.println("repeatString(\"ab\", 3) = " + repeatString("ab", 3)); - List small = new ArrayList<>(); - for (int i = 1; i <= 10; i++) small.add(i); - System.out.println("filterEvens(1..10) = " + filterEvens(small)); - - Workload w = new Workload(); - System.out.println("instanceMethod(5, 3) = " + w.instanceMethod(5, 3)); - System.out.println("Workload complete."); } } From 21f61ec93d837f6c919e0d008cf037c8989047a7 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 03:08:03 -0500 Subject: [PATCH 02/23] ci: add java_tracer_e2e fixture path to e2e_java change detection The fixture directory wasn't in the path filter, so changes to Workload.java didn't trigger the java E2E tests. --- .github/workflows/ci.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 63c83149f..368459608 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -95,7 +95,8 @@ jobs: 'codeflash/languages/java/' 'codeflash/languages/base.py' \ 'codeflash/languages/registry.py' 'codeflash/optimization/' \ 'codeflash/verification/' 'codeflash-java-runtime/' \ - 'code_to_optimize/java/' 'tests/scripts/end_to_end_test_java*' + 'code_to_optimize/java/' 'tests/scripts/end_to_end_test_java*' \ + 'tests/test_languages/fixtures/java_tracer_e2e/' env: MERGE_BASE: ${{ steps.merge_base.outputs.sha }} From 46957e190f0490e1480dcd9a2d4985835f7c27ec Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 03:17:46 -0500 Subject: [PATCH 03/23] fix: update java tracer unit tests for reduced Workload fixture Remove assertions for filterEvens and instanceMethod which were removed from the Workload fixture. Adjust expected invocation counts accordingly. --- tests/test_languages/test_java/test_java_tracer_e2e.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/test_languages/test_java/test_java_tracer_e2e.py b/tests/test_languages/test_java/test_java_tracer_e2e.py index 157f23eb6..054b934f7 100644 --- a/tests/test_languages/test_java/test_java_tracer_e2e.py +++ b/tests/test_languages/test_java/test_java_tracer_e2e.py @@ -81,14 +81,12 @@ def test_agent_captures_invocations(self, compiled_workload: Path, trace_db: Pat conn = sqlite3.connect(str(trace_db)) try: rows = conn.execute("SELECT function, classname, descriptor, length(args) FROM function_calls").fetchall() - assert len(rows) >= 5, f"Expected at least 5 captured invocations, got {len(rows)}" + assert len(rows) >= 3, f"Expected at least 3 captured invocations, got {len(rows)}" # Check that specific methods were captured functions = {row[0] for row in rows} assert "computeSum" in functions assert "repeatString" in functions - assert "filterEvens" in functions - assert "instanceMethod" in functions # Verify all rows have non-empty args blobs for row in rows: @@ -97,7 +95,7 @@ def test_agent_captures_invocations(self, compiled_workload: Path, trace_db: Pat # Verify metadata metadata = dict(conn.execute("SELECT key, value FROM metadata").fetchall()) assert "totalCaptures" in metadata - assert int(metadata["totalCaptures"]) >= 5 + assert int(metadata["totalCaptures"]) >= 3 finally: conn.close() @@ -136,7 +134,7 @@ def test_max_function_count_limit(self, compiled_workload: Path, trace_db: Path) conn = sqlite3.connect(str(trace_db)) try: - # computeSum is called 4 times (2 direct + 2 from instanceMethod) + # computeSum is called 2 times (direct calls in main) compute_count = conn.execute( "SELECT COUNT(*) FROM function_calls WHERE function = 'computeSum'" ).fetchone()[0] @@ -296,7 +294,7 @@ def test_full_trace_and_replay_generation(self, compiled_workload: Path, tmp_pat assert len(workload_files) == 1 content = workload_files[0].read_text(encoding="utf-8") assert "replay_computeSum" in content - assert "replay_instanceMethod" in content + assert "replay_repeatString" in content def test_package_detection(self) -> None: """Test that package detection finds Java packages from source files.""" From 08aa94c54ac74d07a5265c0871da5b0da4dd67ca Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 03:44:54 -0500 Subject: [PATCH 04/23] perf: reduce java-tracer E2E to single function for ~11 min target MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop repeatString from the Workload fixture (2→1 function). computeSum alone exercises the full tracer→optimizer pipeline (trace → replay tests → optimize → evaluate → rank → explain → review). The second function added no additional pipeline coverage. --- .../src/main/java/com/example/Workload.java | 13 +------------ .../test_java/test_java_tracer_e2e.py | 6 ++---- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java index 7c46668d5..ff0ae4d8a 100644 --- a/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java +++ b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java @@ -10,26 +10,15 @@ public static int computeSum(int n) { return sum; } - public static String repeatString(String s, int count) { - String result = ""; - for (int i = 0; i < count; i++) { - result = result + s; - } - return result; - } - public static void main(String[] args) { - // Run methods with large inputs so JFR can capture CPU samples. + // Run with large inputs so JFR can capture CPU samples. // Small inputs finish too fast (<1ms) for JFR's 10ms sampling interval. - // 100 rounds is enough for JFR to collect ~10 samples per function. for (int round = 0; round < 100; round++) { computeSum(100_000); - repeatString("hello world ", 1000); } // Also call with small inputs for variety in traced args System.out.println("computeSum(100) = " + computeSum(100)); - System.out.println("repeatString(\"ab\", 3) = " + repeatString("ab", 3)); System.out.println("Workload complete."); } diff --git a/tests/test_languages/test_java/test_java_tracer_e2e.py b/tests/test_languages/test_java/test_java_tracer_e2e.py index 054b934f7..c7dce2379 100644 --- a/tests/test_languages/test_java/test_java_tracer_e2e.py +++ b/tests/test_languages/test_java/test_java_tracer_e2e.py @@ -81,12 +81,11 @@ def test_agent_captures_invocations(self, compiled_workload: Path, trace_db: Pat conn = sqlite3.connect(str(trace_db)) try: rows = conn.execute("SELECT function, classname, descriptor, length(args) FROM function_calls").fetchall() - assert len(rows) >= 3, f"Expected at least 3 captured invocations, got {len(rows)}" + assert len(rows) >= 2, f"Expected at least 2 captured invocations, got {len(rows)}" # Check that specific methods were captured functions = {row[0] for row in rows} assert "computeSum" in functions - assert "repeatString" in functions # Verify all rows have non-empty args blobs for row in rows: @@ -95,7 +94,7 @@ def test_agent_captures_invocations(self, compiled_workload: Path, trace_db: Pat # Verify metadata metadata = dict(conn.execute("SELECT key, value FROM metadata").fetchall()) assert "totalCaptures" in metadata - assert int(metadata["totalCaptures"]) >= 3 + assert int(metadata["totalCaptures"]) >= 2 finally: conn.close() @@ -294,7 +293,6 @@ def test_full_trace_and_replay_generation(self, compiled_workload: Path, tmp_pat assert len(workload_files) == 1 content = workload_files[0].read_text(encoding="utf-8") assert "replay_computeSum" in content - assert "replay_repeatString" in content def test_package_detection(self) -> None: """Test that package detection finds Java packages from source files.""" From 0772398c59840d20202b6cb8c2d2fda18f709538 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 04:55:36 -0500 Subject: [PATCH 05/23] perf: optimize Java tracing agent serialization and writes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Reuse ThreadLocal Kryo Output buffers (eliminates #1 allocation hotspot) - Fast-path inline serialization for safe arg types (bypasses executor) - Skip verification roundtrip for known-safe containers (ArrayList, HashMap, etc.) - Batch SQLite inserts (256/txn) with permanent autocommit-off - Switch to ArrayBlockingQueue (no per-element Node allocation) - Add opt-in in-memory SQLite mode (VACUUM INTO at shutdown), enabled in CI - Add timing instrumentation (onEntry, serialization, writes, dump) - Add ProfilingWorkload fixture for benchmarking Benchmark (50k captures): onEntry 5200ms→1200ms (4.3x), avg/capture 0.43ms→0.02ms (21x), writes 3200ms→900ms (3.5x) with in-memory mode. --- .../main/java/com/codeflash/Serializer.java | 131 +++++++++++++---- .../com/codeflash/tracer/TraceRecorder.java | 58 +++++--- .../com/codeflash/tracer/TraceWriter.java | 132 +++++++++++++++--- .../com/codeflash/tracer/TracerConfig.java | 7 + codeflash/languages/java/tracer.py | 2 + .../java/com/example/ProfilingWorkload.java | 91 ++++++++++++ .../test_java/test_java_tracer_e2e.py | 1 - 7 files changed, 355 insertions(+), 67 deletions(-) create mode 100644 tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/ProfilingWorkload.java diff --git a/codeflash-java-runtime/src/main/java/com/codeflash/Serializer.java b/codeflash-java-runtime/src/main/java/com/codeflash/Serializer.java index 80d400935..e1c177ac9 100644 --- a/codeflash-java-runtime/src/main/java/com/codeflash/Serializer.java +++ b/codeflash-java-runtime/src/main/java/com/codeflash/Serializer.java @@ -6,7 +6,6 @@ import com.esotericsoftware.kryo.util.DefaultInstantiatorStrategy; import org.objenesis.strategy.StdInstantiatorStrategy; -import java.io.ByteArrayOutputStream; import java.io.InputStream; import java.io.OutputStream; import java.lang.reflect.Field; @@ -36,7 +35,11 @@ public final class Serializer { private static final int MAX_COLLECTION_SIZE = 1000; private static final int BUFFER_SIZE = 4096; - // Thread-local Kryo instances (Kryo is not thread-safe) + // Thread-local Kryo, Output, and IdentityHashMap instances for reuse + private static final ThreadLocal OUTPUT = ThreadLocal.withInitial(() -> new Output(BUFFER_SIZE, -1)); + private static final ThreadLocal> SEEN = + ThreadLocal.withInitial(IdentityHashMap::new); + private static final ThreadLocal KRYO = ThreadLocal.withInitial(() -> { Kryo kryo = new Kryo(); kryo.setRegistrationRequired(false); @@ -89,10 +92,78 @@ private Serializer() { * @return Serialized bytes (may contain KryoPlaceholder for unserializable parts) */ public static byte[] serialize(Object obj) { - Object processed = recursiveProcess(obj, new IdentityHashMap<>(), 0, ""); + // Fast path: if args are all safe types, skip recursive processing entirely + if (obj instanceof Object[] && isSafeArgs((Object[]) obj)) { + return directSerialize(obj); + } + + IdentityHashMap seen = SEEN.get(); + seen.clear(); + Object processed = recursiveProcess(obj, seen, 0, ""); return directSerialize(processed); } + /** + * Attempt fast-path serialization for args that are all known-safe types. + * Returns serialized bytes if all args are safe, or null if the slow path is needed. + * Callers can use this to avoid executor submission overhead for simple arguments. + */ + public static byte[] serializeFast(Object obj) { + if (obj instanceof Object[] && isSafeArgs((Object[]) obj)) { + return directSerialize(obj); + } + return null; + } + + /** + * Check if all elements of an args array can be serialized directly without recursive processing. + */ + private static boolean isSafeArgs(Object[] args) { + for (Object arg : args) { + if (!isSafeForDirectSerialization(arg)) { + return false; + } + } + return true; + } + + /** + * Check if an object is safe to serialize directly without recursive processing. + * Covers: null, simple types, primitive arrays, and safe containers (up to 3 levels deep). + */ + private static boolean isSafeForDirectSerialization(Object obj) { + return isSafeForDirectSerialization(obj, 3); + } + + private static boolean isSafeForDirectSerialization(Object obj, int depthLeft) { + if (obj == null || isSimpleType(obj)) { + return true; + } + if (depthLeft <= 0) { + return false; + } + Class clazz = obj.getClass(); + if (clazz.isArray() && clazz.getComponentType().isPrimitive()) { + return true; + } + if (isSafeContainerType(clazz)) { + if (obj instanceof Collection) { + for (Object item : (Collection) obj) { + if (!isSafeForDirectSerialization(item, depthLeft - 1)) return false; + } + return true; + } + if (obj instanceof Map) { + for (Map.Entry e : ((Map) obj).entrySet()) { + if (!isSafeForDirectSerialization(e.getKey(), depthLeft - 1) || + !isSafeForDirectSerialization(e.getValue(), depthLeft - 1)) return false; + } + return true; + } + } + return false; + } + /** * Deserialize bytes back to an object. * The returned object may contain KryoPlaceholder instances for parts @@ -141,14 +212,15 @@ public static byte[] serializeException(Throwable error) { /** * Direct serialization without recursive processing. + * Reuses a ThreadLocal Output buffer to avoid per-call allocation. */ private static byte[] directSerialize(Object obj) { Kryo kryo = KRYO.get(); - ByteArrayOutputStream baos = new ByteArrayOutputStream(BUFFER_SIZE); - try (Output output = new Output(baos)) { - kryo.writeClassAndObject(output, obj); - } - return baos.toByteArray(); + Output output = OUTPUT.get(); + output.reset(); + kryo.writeClassAndObject(output, obj); + output.flush(); + return output.toBytes(); } /** @@ -201,37 +273,23 @@ private static Object recursiveProcess(Object obj, IdentityHashMap map = (Map) obj; - if (containsOnlySimpleTypes(map)) { - // Simple map - try direct serialization to preserve full size - byte[] serialized = tryDirectSerialize(obj); - if (serialized != null) { - try { - deserialize(serialized); - return obj; // Success - return original - } catch (Exception e) { - // Fall through to recursive handling - } - } + if (isSafeContainerType(clazz) && containsOnlySimpleTypes(map)) { + return obj; } return handleMap(map, seen, depth, path); } if (obj instanceof Collection) { Collection collection = (Collection) obj; - if (containsOnlySimpleTypes(collection)) { - // Simple collection - try direct serialization to preserve full size - byte[] serialized = tryDirectSerialize(obj); - if (serialized != null) { - try { - deserialize(serialized); - return obj; // Success - return original - } catch (Exception e) { - // Fall through to recursive handling - } - } + if (isSafeContainerType(clazz) && containsOnlySimpleTypes(collection)) { + return obj; } return handleCollection(collection, seen, depth, path); } if (clazz.isArray()) { + // Primitive arrays (int[], double[], etc.) are directly serializable by Kryo + if (clazz.getComponentType().isPrimitive()) { + return obj; + } return handleArray(obj, seen, depth, path); } @@ -255,6 +313,19 @@ private static Object recursiveProcess(Object obj, IdentityHashMap clazz) { + return clazz == ArrayList.class || + clazz == LinkedList.class || + clazz == HashMap.class || + clazz == LinkedHashMap.class || + clazz == HashSet.class || + clazz == LinkedHashSet.class; + } + /** * Check if a class is known to be unserializable. */ diff --git a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceRecorder.java b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceRecorder.java index 28c2d2998..a9acfe855 100644 --- a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceRecorder.java +++ b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceRecorder.java @@ -12,6 +12,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; public final class TraceRecorder { @@ -23,6 +24,8 @@ public final class TraceRecorder { private final TraceWriter writer; private final ConcurrentHashMap functionCounts = new ConcurrentHashMap<>(); private final AtomicInteger droppedCaptures = new AtomicInteger(0); + private final AtomicLong totalOnEntryNs = new AtomicLong(0); + private final AtomicLong totalSerializationNs = new AtomicLong(0); private final int maxFunctionCount; private final ExecutorService serializerExecutor; @@ -31,7 +34,7 @@ public final class TraceRecorder { private TraceRecorder(TracerConfig config) { this.config = config; - this.writer = new TraceWriter(config.getDbPath()); + this.writer = new TraceWriter(config.getDbPath(), config.isInMemoryDb()); this.maxFunctionCount = config.getMaxFunctionCount(); this.serializerExecutor = Executors.newCachedThreadPool(r -> { Thread t = new Thread(r, "codeflash-serializer"); @@ -68,6 +71,8 @@ public void onEntry(String className, String methodName, String descriptor, private void onEntryImpl(String className, String methodName, String descriptor, int lineNumber, String sourceFile, Object[] args) { + long entryStart = System.nanoTime(); + String qualifiedName = className + "." + methodName + descriptor; // Check per-method count limit @@ -76,30 +81,38 @@ private void onEntryImpl(String className, String methodName, String descriptor, return; } - // Serialize args with timeout to prevent deep object graph traversal from blocking + // Serialize args — try inline fast path first, fall back to async with timeout byte[] argsBlob; - Future future = serializerExecutor.submit(() -> Serializer.serialize(args)); - try { - argsBlob = future.get(SERIALIZATION_TIMEOUT_MS, TimeUnit.MILLISECONDS); - } catch (TimeoutException e) { - future.cancel(true); - droppedCaptures.incrementAndGet(); - System.err.println("[codeflash-tracer] Serialization timed out for " + className + "." - + methodName); - return; - } catch (Exception e) { - Throwable cause = e.getCause() != null ? e.getCause() : e; - droppedCaptures.incrementAndGet(); - System.err.println("[codeflash-tracer] Serialization failed for " + className + "." - + methodName + ": " + cause.getClass().getSimpleName() + ": " + cause.getMessage()); - return; + long serStart = System.nanoTime(); + argsBlob = Serializer.serializeFast(args); + if (argsBlob == null) { + // Slow path: async serialization with timeout for complex/unknown types + Future future = serializerExecutor.submit(() -> Serializer.serialize(args)); + try { + argsBlob = future.get(SERIALIZATION_TIMEOUT_MS, TimeUnit.MILLISECONDS); + } catch (TimeoutException e) { + future.cancel(true); + droppedCaptures.incrementAndGet(); + System.err.println("[codeflash-tracer] Serialization timed out for " + className + "." + + methodName); + return; + } catch (Exception e) { + Throwable cause = e.getCause() != null ? e.getCause() : e; + droppedCaptures.incrementAndGet(); + System.err.println("[codeflash-tracer] Serialization failed for " + className + "." + + methodName + ": " + cause.getClass().getSimpleName() + ": " + cause.getMessage()); + return; + } } + totalSerializationNs.addAndGet(System.nanoTime() - serStart); long timeNs = System.nanoTime(); count.incrementAndGet(); writer.recordFunctionCall("call", methodName, className, sourceFile, lineNumber, descriptor, timeNs, argsBlob); + + totalOnEntryNs.addAndGet(System.nanoTime() - entryStart); } public void flush() { @@ -126,5 +139,16 @@ public void flush() { System.err.println("[codeflash-tracer] Captured " + totalCaptures + " invocations across " + functionCounts.size() + " methods" + (dropped > 0 ? " (" + dropped + " dropped due to serialization timeout/failure)" : "")); + + // Timing summary + long onEntryMs = totalOnEntryNs.get() / 1_000_000; + long serMs = totalSerializationNs.get() / 1_000_000; + String writerSummary = writer.getTimingSummary(); + System.err.println("[codeflash-tracer] Timing: onEntry=" + onEntryMs + "ms" + + " (serialization=" + serMs + "ms)" + + (totalCaptures > 0 + ? " avg=" + String.format("%.2f", (double) onEntryMs / totalCaptures) + "ms/capture" + : "") + + " " + writerSummary); } } diff --git a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceWriter.java b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceWriter.java index a9eeabf60..7bc5032cb 100644 --- a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceWriter.java +++ b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceWriter.java @@ -7,30 +7,49 @@ import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Statement; +import java.util.ArrayList; +import java.util.List; import java.util.Map; import java.util.concurrent.BlockingQueue; -import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; public final class TraceWriter { + private static final int BATCH_SIZE = 256; + private static final int QUEUE_CAPACITY = 65536; + private final Connection connection; + private final Path diskPath; + private final boolean inMemory; private final BlockingQueue writeQueue; private final Thread writerThread; private final AtomicBoolean running; + private final AtomicLong totalWriteNs = new AtomicLong(0); + private final AtomicInteger batchCount = new AtomicInteger(0); + private final AtomicInteger taskCount = new AtomicInteger(0); + private volatile long dumpToFileMs = 0; private PreparedStatement insertFunctionCall; private PreparedStatement insertMetadata; - public TraceWriter(String dbPath) { - this.writeQueue = new LinkedBlockingQueue<>(); + public TraceWriter(String dbPath, boolean inMemory) { + this.diskPath = Paths.get(dbPath).toAbsolutePath(); + this.diskPath.getParent().toFile().mkdirs(); + this.inMemory = inMemory; + this.writeQueue = new ArrayBlockingQueue<>(QUEUE_CAPACITY); this.running = new AtomicBoolean(true); try { - Path path = Paths.get(dbPath).toAbsolutePath(); - path.getParent().toFile().mkdirs(); - this.connection = DriverManager.getConnection("jdbc:sqlite:" + path); + if (inMemory) { + // In-memory database for maximum write performance; flushed to disk via VACUUM INTO at close() + this.connection = DriverManager.getConnection("jdbc:sqlite::memory:"); + } else { + this.connection = DriverManager.getConnection("jdbc:sqlite:" + this.diskPath); + } initializeSchema(); prepareStatements(); @@ -45,8 +64,12 @@ public TraceWriter(String dbPath) { private void initializeSchema() throws SQLException { try (Statement stmt = connection.createStatement()) { - stmt.execute("PRAGMA journal_mode=WAL"); - stmt.execute("PRAGMA synchronous=NORMAL"); + if (!inMemory) { + stmt.execute("PRAGMA journal_mode=WAL"); + stmt.execute("PRAGMA synchronous=NORMAL"); + stmt.execute("PRAGMA cache_size=-16000"); + stmt.execute("PRAGMA temp_store=MEMORY"); + } stmt.execute( "CREATE TABLE IF NOT EXISTS function_calls(" + @@ -69,6 +92,8 @@ private void initializeSchema() throws SQLException { stmt.execute("CREATE INDEX IF NOT EXISTS idx_fc_class_func ON function_calls(classname, function)"); } + // Keep autocommit off for writer performance — commit explicitly per batch + connection.setAutoCommit(false); } private void prepareStatements() throws SQLException { @@ -95,29 +120,65 @@ public void writeMetadata(Map metadata) { } private void writerLoop() { + List batch = new ArrayList<>(BATCH_SIZE); + while (running.get() || !writeQueue.isEmpty()) { try { WriteTask task = writeQueue.poll(100, TimeUnit.MILLISECONDS); - if (task != null) { - task.execute(this); + if (task == null) { + continue; } + batch.add(task); + writeQueue.drainTo(batch, BATCH_SIZE - 1); + executeBatch(batch); + batch.clear(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); break; - } catch (SQLException e) { - System.err.println("[codeflash-tracer] Write error: " + e.getMessage()); } } // Drain remaining - WriteTask task; - while ((task = writeQueue.poll()) != null) { + writeQueue.drainTo(batch); + if (!batch.isEmpty()) { + executeBatch(batch); + } + } + + private void executeBatch(List batch) { + if (batch.isEmpty()) { + return; + } + + long writeStart = System.nanoTime(); + boolean hasFunctionCalls = false; + try { + for (WriteTask task : batch) { + if (task instanceof FunctionCallTask) { + ((FunctionCallTask) task).bindParameters(this); + insertFunctionCall.addBatch(); + hasFunctionCalls = true; + } else { + task.execute(this); + } + } + + if (hasFunctionCalls) { + insertFunctionCall.executeBatch(); + } + + connection.commit(); + } catch (SQLException e) { + System.err.println("[codeflash-tracer] Batch write error (" + batch.size() + " tasks): " + e.getMessage()); try { - task.execute(this); - } catch (SQLException e) { - System.err.println("[codeflash-tracer] Write error: " + e.getMessage()); + connection.rollback(); + } catch (SQLException re) { + System.err.println("[codeflash-tracer] Rollback failed: " + re.getMessage()); } } + totalWriteNs.addAndGet(System.nanoTime() - writeStart); + batchCount.incrementAndGet(); + taskCount.addAndGet(batch.size()); } public void flush() { @@ -131,6 +192,15 @@ public void flush() { } } + public String getTimingSummary() { + long writeMs = totalWriteNs.get() / 1_000_000; + int batches = batchCount.get(); + int tasks = taskCount.get(); + return "writes=" + writeMs + "ms (" + tasks + " tasks in " + batches + " batches" + + (batches > 0 ? ", avg=" + String.format("%.1f", (double) tasks / batches) + " tasks/batch" : "") + + ") dump=" + dumpToFileMs + "ms"; + } + public void close() { running.set(false); try { @@ -139,9 +209,29 @@ public void close() { Thread.currentThread().interrupt(); } + // Close prepared statements first — required before VACUUM try { if (insertFunctionCall != null) insertFunctionCall.close(); if (insertMetadata != null) insertMetadata.close(); + } catch (SQLException e) { + System.err.println("[codeflash-tracer] Error closing statements: " + e.getMessage()); + } + + if (inMemory) { + long dumpStart = System.nanoTime(); + try { + connection.commit(); + connection.setAutoCommit(true); + try (Statement stmt = connection.createStatement()) { + stmt.execute("VACUUM INTO '" + diskPath.toString().replace("'", "''") + "'"); + } + } catch (SQLException e) { + System.err.println("[codeflash-tracer] Failed to write trace DB to disk: " + e.getMessage()); + } + dumpToFileMs = (System.nanoTime() - dumpStart) / 1_000_000; + } + + try { if (connection != null) connection.close(); } catch (SQLException e) { System.err.println("[codeflash-tracer] Error closing TraceWriter: " + e.getMessage()); @@ -177,8 +267,7 @@ private static class FunctionCallTask implements WriteTask { this.argsBlob = argsBlob; } - @Override - public void execute(TraceWriter writer) throws SQLException { + void bindParameters(TraceWriter writer) throws SQLException { writer.insertFunctionCall.setString(1, type); writer.insertFunctionCall.setString(2, function); writer.insertFunctionCall.setString(3, classname); @@ -187,6 +276,11 @@ public void execute(TraceWriter writer) throws SQLException { writer.insertFunctionCall.setString(6, descriptor); writer.insertFunctionCall.setLong(7, timeNs); writer.insertFunctionCall.setBytes(8, argsBlob); + } + + @Override + public void execute(TraceWriter writer) throws SQLException { + bindParameters(writer); writer.insertFunctionCall.executeUpdate(); } } diff --git a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TracerConfig.java b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TracerConfig.java index 8fe799d2f..9e2675c00 100644 --- a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TracerConfig.java +++ b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TracerConfig.java @@ -30,6 +30,9 @@ public final class TracerConfig { @SerializedName("projectRoot") private String projectRoot = ""; + @SerializedName("inMemoryDb") + private boolean inMemoryDb = false; + private static final Gson GSON = new Gson(); public static TracerConfig parse(String agentArgs) { @@ -89,6 +92,10 @@ public String getProjectRoot() { return projectRoot; } + public boolean isInMemoryDb() { + return inMemoryDb; + } + public boolean shouldInstrumentClass(String internalClassName) { String dotName = internalClassName.replace('/', '.'); diff --git a/codeflash/languages/java/tracer.py b/codeflash/languages/java/tracer.py index 50506797e..8e8348681 100644 --- a/codeflash/languages/java/tracer.py +++ b/codeflash/languages/java/tracer.py @@ -6,6 +6,7 @@ import subprocess from typing import TYPE_CHECKING +from codeflash.code_utils.env_utils import is_ci from codeflash.languages.java.line_profiler import find_agent_jar from codeflash.languages.java.replay_test import generate_replay_tests @@ -114,6 +115,7 @@ def create_tracer_config( "maxFunctionCount": max_function_count, "timeout": timeout, "projectRoot": str(project_root.resolve()) if project_root else "", + "inMemoryDb": is_ci(), } config_path = trace_db_path.with_suffix(".config.json") diff --git a/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/ProfilingWorkload.java b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/ProfilingWorkload.java new file mode 100644 index 000000000..b7c48c625 --- /dev/null +++ b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/ProfilingWorkload.java @@ -0,0 +1,91 @@ +package com.example; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Profiling workload for benchmarking the codeflash tracing agent. + * Exercises different argument types to stress serialization paths. + */ +public class ProfilingWorkload { + + // 1. Primitives only — cheapest to serialize + public static int addInts(int a, int b) { + return a + b; + } + + // 2. String arguments — moderate serialization cost + public static String concatStrings(String a, String b) { + return a + b; + } + + // 3. Array argument — requires element-by-element serialization + public static int sumArray(int[] values) { + int sum = 0; + for (int v : values) { + sum += v; + } + return sum; + } + + // 4. Collection argument — triggers recursive Kryo processing + public static int sumList(List values) { + int sum = 0; + for (int v : values) { + sum += v; + } + return sum; + } + + // 5. Nested map — deep object graph, expensive serialization + public static int countMapEntries(Map> data) { + int count = 0; + for (List list : data.values()) { + count += list.size(); + } + return count; + } + + public static void main(String[] args) { + int iterations = 1000; + + // 1. Primitives + for (int i = 0; i < iterations; i++) { + addInts(i, i + 1); + } + + // 2. Strings + for (int i = 0; i < iterations; i++) { + concatStrings("hello-" + i, "-world"); + } + + // 3. Arrays + int[] arr = new int[100]; + for (int i = 0; i < arr.length; i++) arr[i] = i; + for (int i = 0; i < iterations; i++) { + sumArray(arr); + } + + // 4. Lists + List list = new ArrayList<>(100); + for (int i = 0; i < 100; i++) list.add(i); + for (int i = 0; i < iterations; i++) { + sumList(list); + } + + // 5. Nested maps + Map> map = new HashMap<>(); + for (int i = 0; i < 10; i++) { + List vals = new ArrayList<>(); + for (int j = 0; j < 10; j++) vals.add(j); + map.put("key-" + i, vals); + } + for (int i = 0; i < iterations; i++) { + countMapEntries(map); + } + + System.out.println("ProfilingWorkload complete."); + } +} diff --git a/tests/test_languages/test_java/test_java_tracer_e2e.py b/tests/test_languages/test_java/test_java_tracer_e2e.py index c7dce2379..2ea87de9c 100644 --- a/tests/test_languages/test_java/test_java_tracer_e2e.py +++ b/tests/test_languages/test_java/test_java_tracer_e2e.py @@ -196,7 +196,6 @@ def test_generates_test_files(self, compiled_workload: Path, trace_db: Path, tmp assert "import org.junit.jupiter.api.Test;" in content assert "ReplayHelper" in content assert "replay_computeSum_0" in content - assert "replay_repeatString_0" in content def test_metadata_parsing(self, compiled_workload: Path, trace_db: Path, tmp_path: Path) -> None: """Test that metadata comments are correctly parsed from generated tests.""" From e81f25f8256078acf657c124e587682fe39e6c66 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 05:05:17 -0500 Subject: [PATCH 06/23] fix: remove stale repeatString assertions from integration tests repeatString was removed from Workload.java in the E2E reduction. --- tests/test_languages/test_java/test_java_tracer_integration.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_languages/test_java/test_java_tracer_integration.py b/tests/test_languages/test_java/test_java_tracer_integration.py index f6ffefdf2..a8dbc5118 100644 --- a/tests/test_languages/test_java/test_java_tracer_integration.py +++ b/tests/test_languages/test_java/test_java_tracer_integration.py @@ -88,7 +88,6 @@ def test_discover_functions_from_replay_tests(self, traced_workload: tuple) -> N assert func.file_path == file_path assert "computeSum" in all_func_names - assert "repeatString" in all_func_names def test_discover_tests_for_replay_tests(self, traced_workload: tuple) -> None: """Test that test discovery maps replay tests to source functions.""" @@ -112,7 +111,6 @@ def test_discover_tests_for_replay_tests(self, traced_workload: tuple) -> None: matched_func_names.add(func_name) assert "computeSum" in matched_func_names, f"computeSum not found in: {result.keys()}" - assert "repeatString" in matched_func_names, f"repeatString not found in: {result.keys()}" # Each function should have at least one test for func_name, test_infos in result.items(): From 01e22152c70500724b3f578877e47344f3083870 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 05:07:53 -0500 Subject: [PATCH 07/23] flexing --- .../src/main/java/com/example/Workload.java | 45 ++++++++++++++++++- .../test_java/test_java_tracer_e2e.py | 11 +++-- .../test_java/test_java_tracer_integration.py | 2 + 3 files changed, 53 insertions(+), 5 deletions(-) diff --git a/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java index ff0ae4d8a..7beb2a4ea 100644 --- a/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java +++ b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java @@ -1,5 +1,8 @@ package com.example; +import java.util.ArrayList; +import java.util.List; + public class Workload { public static int computeSum(int n) { @@ -10,15 +13,53 @@ public static int computeSum(int n) { return sum; } + public static String repeatString(String s, int count) { + String result = ""; + for (int i = 0; i < count; i++) { + result = result + s; + } + return result; + } + + public static List filterEvens(List numbers) { + List result = new ArrayList<>(); + for (int n : numbers) { + if (n % 2 == 0) { + result.add(n); + } + } + return result; + } + + public int instanceMethod(int x, int y) { + return x * y + computeSum(x); + } + public static void main(String[] args) { - // Run with large inputs so JFR can capture CPU samples. + // Run methods with large inputs so JFR can capture CPU samples. // Small inputs finish too fast (<1ms) for JFR's 10ms sampling interval. - for (int round = 0; round < 100; round++) { + for (int round = 0; round < 1000; round++) { computeSum(100_000); + repeatString("hello world ", 1000); + + List nums = new ArrayList<>(); + for (int i = 1; i <= 10_000; i++) nums.add(i); + filterEvens(nums); + + Workload w = new Workload(); + w.instanceMethod(100_000, 42); } // Also call with small inputs for variety in traced args System.out.println("computeSum(100) = " + computeSum(100)); + System.out.println("repeatString(\"ab\", 3) = " + repeatString("ab", 3)); + + List small = new ArrayList<>(); + for (int i = 1; i <= 10; i++) small.add(i); + System.out.println("filterEvens(1..10) = " + filterEvens(small)); + + Workload w = new Workload(); + System.out.println("instanceMethod(5, 3) = " + w.instanceMethod(5, 3)); System.out.println("Workload complete."); } diff --git a/tests/test_languages/test_java/test_java_tracer_e2e.py b/tests/test_languages/test_java/test_java_tracer_e2e.py index 2ea87de9c..157f23eb6 100644 --- a/tests/test_languages/test_java/test_java_tracer_e2e.py +++ b/tests/test_languages/test_java/test_java_tracer_e2e.py @@ -81,11 +81,14 @@ def test_agent_captures_invocations(self, compiled_workload: Path, trace_db: Pat conn = sqlite3.connect(str(trace_db)) try: rows = conn.execute("SELECT function, classname, descriptor, length(args) FROM function_calls").fetchall() - assert len(rows) >= 2, f"Expected at least 2 captured invocations, got {len(rows)}" + assert len(rows) >= 5, f"Expected at least 5 captured invocations, got {len(rows)}" # Check that specific methods were captured functions = {row[0] for row in rows} assert "computeSum" in functions + assert "repeatString" in functions + assert "filterEvens" in functions + assert "instanceMethod" in functions # Verify all rows have non-empty args blobs for row in rows: @@ -94,7 +97,7 @@ def test_agent_captures_invocations(self, compiled_workload: Path, trace_db: Pat # Verify metadata metadata = dict(conn.execute("SELECT key, value FROM metadata").fetchall()) assert "totalCaptures" in metadata - assert int(metadata["totalCaptures"]) >= 2 + assert int(metadata["totalCaptures"]) >= 5 finally: conn.close() @@ -133,7 +136,7 @@ def test_max_function_count_limit(self, compiled_workload: Path, trace_db: Path) conn = sqlite3.connect(str(trace_db)) try: - # computeSum is called 2 times (direct calls in main) + # computeSum is called 4 times (2 direct + 2 from instanceMethod) compute_count = conn.execute( "SELECT COUNT(*) FROM function_calls WHERE function = 'computeSum'" ).fetchone()[0] @@ -196,6 +199,7 @@ def test_generates_test_files(self, compiled_workload: Path, trace_db: Path, tmp assert "import org.junit.jupiter.api.Test;" in content assert "ReplayHelper" in content assert "replay_computeSum_0" in content + assert "replay_repeatString_0" in content def test_metadata_parsing(self, compiled_workload: Path, trace_db: Path, tmp_path: Path) -> None: """Test that metadata comments are correctly parsed from generated tests.""" @@ -292,6 +296,7 @@ def test_full_trace_and_replay_generation(self, compiled_workload: Path, tmp_pat assert len(workload_files) == 1 content = workload_files[0].read_text(encoding="utf-8") assert "replay_computeSum" in content + assert "replay_instanceMethod" in content def test_package_detection(self) -> None: """Test that package detection finds Java packages from source files.""" diff --git a/tests/test_languages/test_java/test_java_tracer_integration.py b/tests/test_languages/test_java/test_java_tracer_integration.py index a8dbc5118..f6ffefdf2 100644 --- a/tests/test_languages/test_java/test_java_tracer_integration.py +++ b/tests/test_languages/test_java/test_java_tracer_integration.py @@ -88,6 +88,7 @@ def test_discover_functions_from_replay_tests(self, traced_workload: tuple) -> N assert func.file_path == file_path assert "computeSum" in all_func_names + assert "repeatString" in all_func_names def test_discover_tests_for_replay_tests(self, traced_workload: tuple) -> None: """Test that test discovery maps replay tests to source functions.""" @@ -111,6 +112,7 @@ def test_discover_tests_for_replay_tests(self, traced_workload: tuple) -> None: matched_func_names.add(func_name) assert "computeSum" in matched_func_names, f"computeSum not found in: {result.keys()}" + assert "repeatString" in matched_func_names, f"repeatString not found in: {result.keys()}" # Each function should have at least one test for func_name, test_infos in result.items(): From bfe6f3a828c8ca0c53272ce1d528c064d81b425a Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 05:16:49 -0500 Subject: [PATCH 08/23] Remove debug timing instrumentation from tracer Strip AtomicLong accumulators, System.nanoTime() timing, and getTimingSummary() that were added for profiling. No functional change. --- .../com/codeflash/tracer/TraceRecorder.java | 20 ------------------ .../com/codeflash/tracer/TraceWriter.java | 21 ------------------- 2 files changed, 41 deletions(-) diff --git a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceRecorder.java b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceRecorder.java index a9acfe855..8596d3ee8 100644 --- a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceRecorder.java +++ b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceRecorder.java @@ -12,7 +12,6 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; public final class TraceRecorder { @@ -24,8 +23,6 @@ public final class TraceRecorder { private final TraceWriter writer; private final ConcurrentHashMap functionCounts = new ConcurrentHashMap<>(); private final AtomicInteger droppedCaptures = new AtomicInteger(0); - private final AtomicLong totalOnEntryNs = new AtomicLong(0); - private final AtomicLong totalSerializationNs = new AtomicLong(0); private final int maxFunctionCount; private final ExecutorService serializerExecutor; @@ -71,8 +68,6 @@ public void onEntry(String className, String methodName, String descriptor, private void onEntryImpl(String className, String methodName, String descriptor, int lineNumber, String sourceFile, Object[] args) { - long entryStart = System.nanoTime(); - String qualifiedName = className + "." + methodName + descriptor; // Check per-method count limit @@ -83,7 +78,6 @@ private void onEntryImpl(String className, String methodName, String descriptor, // Serialize args — try inline fast path first, fall back to async with timeout byte[] argsBlob; - long serStart = System.nanoTime(); argsBlob = Serializer.serializeFast(args); if (argsBlob == null) { // Slow path: async serialization with timeout for complex/unknown types @@ -104,15 +98,12 @@ private void onEntryImpl(String className, String methodName, String descriptor, return; } } - totalSerializationNs.addAndGet(System.nanoTime() - serStart); long timeNs = System.nanoTime(); count.incrementAndGet(); writer.recordFunctionCall("call", methodName, className, sourceFile, lineNumber, descriptor, timeNs, argsBlob); - - totalOnEntryNs.addAndGet(System.nanoTime() - entryStart); } public void flush() { @@ -139,16 +130,5 @@ public void flush() { System.err.println("[codeflash-tracer] Captured " + totalCaptures + " invocations across " + functionCounts.size() + " methods" + (dropped > 0 ? " (" + dropped + " dropped due to serialization timeout/failure)" : "")); - - // Timing summary - long onEntryMs = totalOnEntryNs.get() / 1_000_000; - long serMs = totalSerializationNs.get() / 1_000_000; - String writerSummary = writer.getTimingSummary(); - System.err.println("[codeflash-tracer] Timing: onEntry=" + onEntryMs + "ms" - + " (serialization=" + serMs + "ms)" - + (totalCaptures > 0 - ? " avg=" + String.format("%.2f", (double) onEntryMs / totalCaptures) + "ms/capture" - : "") - + " " + writerSummary); } } diff --git a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceWriter.java b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceWriter.java index 7bc5032cb..a75872089 100644 --- a/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceWriter.java +++ b/codeflash-java-runtime/src/main/java/com/codeflash/tracer/TraceWriter.java @@ -14,8 +14,6 @@ import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; public final class TraceWriter { @@ -28,10 +26,6 @@ public final class TraceWriter { private final BlockingQueue writeQueue; private final Thread writerThread; private final AtomicBoolean running; - private final AtomicLong totalWriteNs = new AtomicLong(0); - private final AtomicInteger batchCount = new AtomicInteger(0); - private final AtomicInteger taskCount = new AtomicInteger(0); - private volatile long dumpToFileMs = 0; private PreparedStatement insertFunctionCall; private PreparedStatement insertMetadata; @@ -150,7 +144,6 @@ private void executeBatch(List batch) { return; } - long writeStart = System.nanoTime(); boolean hasFunctionCalls = false; try { for (WriteTask task : batch) { @@ -176,9 +169,6 @@ private void executeBatch(List batch) { System.err.println("[codeflash-tracer] Rollback failed: " + re.getMessage()); } } - totalWriteNs.addAndGet(System.nanoTime() - writeStart); - batchCount.incrementAndGet(); - taskCount.addAndGet(batch.size()); } public void flush() { @@ -192,15 +182,6 @@ public void flush() { } } - public String getTimingSummary() { - long writeMs = totalWriteNs.get() / 1_000_000; - int batches = batchCount.get(); - int tasks = taskCount.get(); - return "writes=" + writeMs + "ms (" + tasks + " tasks in " + batches + " batches" - + (batches > 0 ? ", avg=" + String.format("%.1f", (double) tasks / batches) + " tasks/batch" : "") - + ") dump=" + dumpToFileMs + "ms"; - } - public void close() { running.set(false); try { @@ -218,7 +199,6 @@ public void close() { } if (inMemory) { - long dumpStart = System.nanoTime(); try { connection.commit(); connection.setAutoCommit(true); @@ -228,7 +208,6 @@ public void close() { } catch (SQLException e) { System.err.println("[codeflash-tracer] Failed to write trace DB to disk: " + e.getMessage()); } - dumpToFileMs = (System.nanoTime() - dumpStart) / 1_000_000; } try { From fefccd5935ac4e8d758f999ea807733b3a0ffbcd Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 05:28:34 -0500 Subject: [PATCH 09/23] fix: drop JFR inline event config that breaks JDK 11 The jdk.ExecutionSample#period=1ms syntax in -XX:StartFlightRecording is only supported on JDK 13+. On JDK 11 (CI), it causes "Failure when starting JFR on_create_vm_2" and no JFR file is created. The settings=profile preset still provides 10ms CPU sampling. --- codeflash/languages/java/tracer.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/codeflash/languages/java/tracer.py b/codeflash/languages/java/tracer.py index 8e8348681..b971e5526 100644 --- a/codeflash/languages/java/tracer.py +++ b/codeflash/languages/java/tracer.py @@ -124,12 +124,7 @@ def create_tracer_config( def build_jfr_env(self, jfr_file: Path) -> dict[str, str]: env = os.environ.copy() - # Use profile settings with increased sampling frequency (1ms instead of default 10ms) - # This captures more samples for short-running programs - jfr_opts = ( - f"-XX:StartFlightRecording=filename={jfr_file.resolve()},settings=profile,dumponexit=true" - ",jdk.ExecutionSample#period=1ms" - ) + jfr_opts = f"-XX:StartFlightRecording=filename={jfr_file.resolve()},settings=profile,dumponexit=true" existing = env.get("JAVA_TOOL_OPTIONS", "") env["JAVA_TOOL_OPTIONS"] = f"{existing} {jfr_opts}".strip() return env From e191f74aa6e4d0c133452325ff102e1a1ff64b90 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 06:16:39 -0500 Subject: [PATCH 10/23] chore: add diagnostic logging to compare_test_results Temporary instrumentation to debug flaky futurehouse E2E test. Logs matched/skipped/timed-out counts and did_all_timeout state. --- codeflash/verification/equivalence.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/codeflash/verification/equivalence.py b/codeflash/verification/equivalence.py index f660e35ea..630cec8b6 100644 --- a/codeflash/verification/equivalence.py +++ b/codeflash/verification/equivalence.py @@ -41,11 +41,17 @@ def compare_test_results( ) test_diffs: list[TestDiff] = [] did_all_timeout: bool = True + _matched_count = 0 + _skipped_cdd_only = 0 + _skipped_init_state = 0 + _skipped_none = 0 + _timed_out_count = 0 for test_id in test_ids_superset: original_test_result = original_results.get_by_unique_invocation_loop_id(test_id) cdd_test_result = candidate_results.get_by_unique_invocation_loop_id(test_id) if cdd_test_result is not None and original_test_result is None: + _skipped_cdd_only += 1 continue # If helper function instance_state verification is not present, that's ok. continue if ( @@ -53,11 +59,15 @@ def compare_test_results( and original_test_result.verification_type == VerificationType.INIT_STATE_HELPER and cdd_test_result is None ): + _skipped_init_state += 1 continue if original_test_result is None or cdd_test_result is None: + _skipped_none += 1 continue + _matched_count += 1 did_all_timeout = did_all_timeout and original_test_result.timed_out if original_test_result.timed_out: + _timed_out_count += 1 continue superset_obj = False if original_test_result.verification_type and ( @@ -148,6 +158,23 @@ def compare_test_results( ) sys.setrecursionlimit(original_recursion_limit) + logger.info( + f"[compare_test_results] superset={len(test_ids_superset)} matched={_matched_count} " + f"skipped(cdd_only={_skipped_cdd_only} init_state={_skipped_init_state} none={_skipped_none}) " + f"timed_out={_timed_out_count} did_all_timeout={did_all_timeout} diffs={len(test_diffs)} " + f"pass_fail_only={pass_fail_only} orig_len={len(original_results)} cand_len={len(candidate_results)}" + ) + if did_all_timeout and _matched_count > 0 and _matched_count <= 3: + # Log a few sample matched IDs for debugging + _sample_ids = [] + for test_id in test_ids_superset: + orig = original_results.get_by_unique_invocation_loop_id(test_id) + cand = candidate_results.get_by_unique_invocation_loop_id(test_id) + if orig is not None and cand is not None: + _sample_ids.append(f" id={test_id} orig_timed_out={orig.timed_out} orig_pass={orig.did_pass}") + if len(_sample_ids) >= 3: + break + logger.info(f"[compare_test_results] sample matched: {_sample_ids}") if did_all_timeout: return False, test_diffs return len(test_diffs) == 0, test_diffs From 986654b7e67d4b004af3cc98328251383cefd19a Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 06:38:08 -0500 Subject: [PATCH 11/23] fix: pin PYTHONHASHSEED=0 in test env and enhance diff diagnostics Set PYTHONHASHSEED=0 in test subprocess environments so original and candidate runs use identical hash behavior, eliminating a source of non-deterministic return-value comparisons. Also upgrade diff logging from debug to info level with actual types and repr values for DID_PASS, RETURN_VALUE, and STDOUT diffs. --- codeflash/languages/function_optimizer.py | 5 +++++ codeflash/verification/equivalence.py | 26 +++++++++++++++++------ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/codeflash/languages/function_optimizer.py b/codeflash/languages/function_optimizer.py index d9b4918fd..9c42070ef 100644 --- a/codeflash/languages/function_optimizer.py +++ b/codeflash/languages/function_optimizer.py @@ -3253,6 +3253,11 @@ def get_test_env( test_env["CODEFLASH_TEST_ITERATION"] = str(codeflash_test_iteration) test_env["CODEFLASH_TRACER_DISABLE"] = str(codeflash_tracer_disable) test_env["CODEFLASH_LOOP_INDEX"] = str(codeflash_loop_index) + # Pin PYTHONHASHSEED so original and candidate test processes use the same hash seed. + # Without this, each subprocess gets a random seed, which can cause non-deterministic + # iteration order in sets/dicts and lead to flaky return-value comparisons. + if "PYTHONHASHSEED" not in test_env: + test_env["PYTHONHASHSEED"] = "0" return test_env def line_profiler_step( diff --git a/codeflash/verification/equivalence.py b/codeflash/verification/equivalence.py index 630cec8b6..68cf216de 100644 --- a/codeflash/verification/equivalence.py +++ b/codeflash/verification/equivalence.py @@ -111,6 +111,11 @@ def compare_test_results( original_pytest_error=original_pytest_error, ) ) + logger.info( + f"[DIFF] scope=DID_PASS test_id={test_id} " + f"orig_pass={original_test_result.did_pass} cand_pass={cdd_test_result.did_pass} " + f"test_type={original_test_result.test_type} cand_error={cdd_pytest_error[:200] if cdd_pytest_error else 'none'}" + ) elif not pass_fail_only and not comparator( original_test_result.return_value, cdd_test_result.return_value, superset_obj=superset_obj @@ -129,13 +134,15 @@ def compare_test_results( ) try: - logger.debug( - f"File Name: {original_test_result.file_name}\n" - f"Test Type: {original_test_result.test_type}\n" - f"Verification Type: {original_test_result.verification_type}\n" - f"Invocation ID: {original_test_result.id}\n" - f"Original return value: {original_test_result.return_value}\n" - f"Candidate return value: {cdd_test_result.return_value}\n" + _orig_rv = original_test_result.return_value + _cand_rv = cdd_test_result.return_value + logger.info( + f"[DIFF] scope=RETURN_VALUE test_id={test_id} " + f"orig_type={type(_orig_rv).__name__} cand_type={type(_cand_rv).__name__} " + f"orig_pass={original_test_result.did_pass} cand_pass={cdd_test_result.did_pass} " + f"test_type={original_test_result.test_type} " + f"orig_repr={safe_repr(_orig_rv)[:200]} " + f"cand_repr={safe_repr(_cand_rv)[:200]}" ) except Exception as e: logger.error(e) @@ -156,6 +163,11 @@ def compare_test_results( original_pytest_error=original_pytest_error, ) ) + logger.info( + f"[DIFF] scope=STDOUT test_id={test_id} " + f"orig_stdout={str(original_test_result.stdout)[:200]} " + f"cand_stdout={str(cdd_test_result.stdout)[:200]}" + ) sys.setrecursionlimit(original_recursion_limit) logger.info( From 82ec301fad6f78921dee84b9a98fcd22e76427c7 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 06:49:43 -0500 Subject: [PATCH 12/23] chore: remove diagnostic logging from compare_test_results --- codeflash/verification/equivalence.py | 53 ++++----------------------- 1 file changed, 7 insertions(+), 46 deletions(-) diff --git a/codeflash/verification/equivalence.py b/codeflash/verification/equivalence.py index 68cf216de..f660e35ea 100644 --- a/codeflash/verification/equivalence.py +++ b/codeflash/verification/equivalence.py @@ -41,17 +41,11 @@ def compare_test_results( ) test_diffs: list[TestDiff] = [] did_all_timeout: bool = True - _matched_count = 0 - _skipped_cdd_only = 0 - _skipped_init_state = 0 - _skipped_none = 0 - _timed_out_count = 0 for test_id in test_ids_superset: original_test_result = original_results.get_by_unique_invocation_loop_id(test_id) cdd_test_result = candidate_results.get_by_unique_invocation_loop_id(test_id) if cdd_test_result is not None and original_test_result is None: - _skipped_cdd_only += 1 continue # If helper function instance_state verification is not present, that's ok. continue if ( @@ -59,15 +53,11 @@ def compare_test_results( and original_test_result.verification_type == VerificationType.INIT_STATE_HELPER and cdd_test_result is None ): - _skipped_init_state += 1 continue if original_test_result is None or cdd_test_result is None: - _skipped_none += 1 continue - _matched_count += 1 did_all_timeout = did_all_timeout and original_test_result.timed_out if original_test_result.timed_out: - _timed_out_count += 1 continue superset_obj = False if original_test_result.verification_type and ( @@ -111,11 +101,6 @@ def compare_test_results( original_pytest_error=original_pytest_error, ) ) - logger.info( - f"[DIFF] scope=DID_PASS test_id={test_id} " - f"orig_pass={original_test_result.did_pass} cand_pass={cdd_test_result.did_pass} " - f"test_type={original_test_result.test_type} cand_error={cdd_pytest_error[:200] if cdd_pytest_error else 'none'}" - ) elif not pass_fail_only and not comparator( original_test_result.return_value, cdd_test_result.return_value, superset_obj=superset_obj @@ -134,15 +119,13 @@ def compare_test_results( ) try: - _orig_rv = original_test_result.return_value - _cand_rv = cdd_test_result.return_value - logger.info( - f"[DIFF] scope=RETURN_VALUE test_id={test_id} " - f"orig_type={type(_orig_rv).__name__} cand_type={type(_cand_rv).__name__} " - f"orig_pass={original_test_result.did_pass} cand_pass={cdd_test_result.did_pass} " - f"test_type={original_test_result.test_type} " - f"orig_repr={safe_repr(_orig_rv)[:200]} " - f"cand_repr={safe_repr(_cand_rv)[:200]}" + logger.debug( + f"File Name: {original_test_result.file_name}\n" + f"Test Type: {original_test_result.test_type}\n" + f"Verification Type: {original_test_result.verification_type}\n" + f"Invocation ID: {original_test_result.id}\n" + f"Original return value: {original_test_result.return_value}\n" + f"Candidate return value: {cdd_test_result.return_value}\n" ) except Exception as e: logger.error(e) @@ -163,30 +146,8 @@ def compare_test_results( original_pytest_error=original_pytest_error, ) ) - logger.info( - f"[DIFF] scope=STDOUT test_id={test_id} " - f"orig_stdout={str(original_test_result.stdout)[:200]} " - f"cand_stdout={str(cdd_test_result.stdout)[:200]}" - ) sys.setrecursionlimit(original_recursion_limit) - logger.info( - f"[compare_test_results] superset={len(test_ids_superset)} matched={_matched_count} " - f"skipped(cdd_only={_skipped_cdd_only} init_state={_skipped_init_state} none={_skipped_none}) " - f"timed_out={_timed_out_count} did_all_timeout={did_all_timeout} diffs={len(test_diffs)} " - f"pass_fail_only={pass_fail_only} orig_len={len(original_results)} cand_len={len(candidate_results)}" - ) - if did_all_timeout and _matched_count > 0 and _matched_count <= 3: - # Log a few sample matched IDs for debugging - _sample_ids = [] - for test_id in test_ids_superset: - orig = original_results.get_by_unique_invocation_loop_id(test_id) - cand = candidate_results.get_by_unique_invocation_loop_id(test_id) - if orig is not None and cand is not None: - _sample_ids.append(f" id={test_id} orig_timed_out={orig.timed_out} orig_pass={orig.did_pass}") - if len(_sample_ids) >= 3: - break - logger.info(f"[compare_test_results] sample matched: {_sample_ids}") if did_all_timeout: return False, test_diffs return len(test_diffs) == 0, test_diffs From 70260f22b351474110e5a6fa46b36f545c8adfa7 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 07:39:49 -0500 Subject: [PATCH 13/23] fix: ensure language_version is detected before optimization API calls JavaSupport.ensure_runtime_environment() was never called during the optimization flow, so _language_version stayed None and the backend received language_version=null. The LLM had no Java version constraint, causing it to generate Java 16+ APIs (e.g. Stream.toList()) for Java 11 projects. --- codeflash/languages/function_optimizer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/codeflash/languages/function_optimizer.py b/codeflash/languages/function_optimizer.py index 9c42070ef..71ad03b18 100644 --- a/codeflash/languages/function_optimizer.py +++ b/codeflash/languages/function_optimizer.py @@ -489,6 +489,7 @@ def __init__( else function_to_optimize.file_path.read_text(encoding="utf8") ) self.language_support = current_language_support() + self.language_support.ensure_runtime_environment(self.project_root) if not function_to_optimize_ast: self.function_to_optimize_ast = self._resolve_function_ast( self.function_to_optimize_source_code, function_to_optimize.function_name, function_to_optimize.parents From b05561ef9ecbcb55a46f6256be0eec2c6c198484 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 07:51:08 -0500 Subject: [PATCH 14/23] chore: replace console.print with logger.info for Java project detection --- codeflash/tracer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/codeflash/tracer.py b/codeflash/tracer.py index 48920be8c..4a7d24585 100644 --- a/codeflash/tracer.py +++ b/codeflash/tracer.py @@ -349,10 +349,10 @@ def _run_java_tracer(existing_args: Namespace | None = None) -> ArgumentParser: max_function_count = getattr(config, "max_function_count", 256) timeout = int(getattr(config, "timeout", None) or getattr(config, "tracer_timeout", 0) or 0) - console.print("[bold]Java project detected[/]") - console.print(f" Project root: {project_root}") - console.print(f" Module root: {getattr(config, 'module_root', '?')}") - console.print(f" Tests root: {getattr(config, 'tests_root', '?')}") + logger.info("Java project detected") + logger.info(" Project root: %s", project_root) + logger.info(" Module root: %s", getattr(config, "module_root", "?")) + logger.info(" Tests root: %s", getattr(config, "tests_root", "?")) from codeflash.code_utils.code_utils import get_run_tmp_file from codeflash.languages.java.tracer import JavaTracer, run_java_tracer From 151df774a4ca2764b2bad28475e61413614402fe Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 08:29:46 -0500 Subject: [PATCH 15/23] perf: use --effort low for java-tracer E2E to reduce CI time --- tests/scripts/end_to_end_test_java_tracer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/scripts/end_to_end_test_java_tracer.py b/tests/scripts/end_to_end_test_java_tracer.py index 0f9f8a2ff..5d92662ec 100644 --- a/tests/scripts/end_to_end_test_java_tracer.py +++ b/tests/scripts/end_to_end_test_java_tracer.py @@ -51,6 +51,8 @@ def run_test(expected_improvement_pct: int) -> bool: "-m", "codeflash.main", "--no-pr", + "--effort", + "low", "optimize", "java", "-cp", From ecf4e63eca4032217ea2db4eafd027ef8f0f66e1 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 09:02:45 -0500 Subject: [PATCH 16/23] perf: reduce Java E2E looping time to 5s and cache runtime JAR build Make TOTAL_LOOPING_TIME configurable via CODEFLASH_LOOPING_TIME env var (defaults to 10s). Set to 5s in Java E2E CI jobs to cut verification time per candidate. Also cache the codeflash-runtime JAR keyed on source hash to skip mvn install when unchanged. --- .github/workflows/ci.yaml | 9 +++++++++ codeflash/code_utils/config_consts.py | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 368459608..e6e55298b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -436,6 +436,7 @@ jobs: RETRY_DELAY: 5 EXPECTED_IMPROVEMENT_PCT: ${{ matrix.expected_improvement }} CODEFLASH_END_TO_END: 1 + CODEFLASH_LOOPING_TIME: 5 steps: - uses: actions/checkout@v6 with: @@ -469,7 +470,15 @@ jobs: - name: Install dependencies run: uv sync + - name: Cache codeflash-runtime JAR + id: runtime-jar-cache + uses: actions/cache@v4 + with: + path: ~/.m2/repository/io/codeflash + key: codeflash-runtime-${{ hashFiles('codeflash-java-runtime/pom.xml', 'codeflash-java-runtime/src/**') }} + - name: Build and install codeflash-runtime JAR + if: steps.runtime-jar-cache.outputs.cache-hit != 'true' run: | cd codeflash-java-runtime mvn install -q -DskipTests diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py index ff6494d73..c8cb8d884 100644 --- a/codeflash/code_utils/config_consts.py +++ b/codeflash/code_utils/config_consts.py @@ -1,5 +1,6 @@ from __future__ import annotations +import os from enum import Enum from typing import Any, Union @@ -17,7 +18,7 @@ CONCURRENCY_FACTOR = 10 # Number of concurrent executions for concurrency benchmark MAX_TEST_FUNCTION_RUNS = 50 MAX_CUMULATIVE_TEST_RUNTIME_NANOSECONDS = 100e6 # 100ms -TOTAL_LOOPING_TIME = 10.0 # 10 second candidate benchmarking budget +TOTAL_LOOPING_TIME = float(os.getenv("CODEFLASH_LOOPING_TIME", "10.0")) # candidate benchmarking budget (seconds) COVERAGE_THRESHOLD = 60.0 MIN_TESTCASE_PASSED_THRESHOLD = 6 REPEAT_OPTIMIZATION_PROBABILITY = 0.1 From 0d928f2b49c7dccf18e3f28e43fd5d4616c7bb99 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 09:05:30 -0500 Subject: [PATCH 17/23] perf: merge Java tracer into single-pass JVM invocation Combine JFR profiling and argument capture agent into one JAVA_TOOL_OPTIONS string, running the target program once instead of twice. JFR and javaagent are orthogonal JVM features that coexist without conflict. Keeps build_jfr_env/build_agent_env for standalone use. --- codeflash/languages/java/tracer.py | 38 +++++++++++++++++++----------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/codeflash/languages/java/tracer.py b/codeflash/languages/java/tracer.py index b971e5526..bdeec34e0 100644 --- a/codeflash/languages/java/tracer.py +++ b/codeflash/languages/java/tracer.py @@ -61,7 +61,7 @@ def _run_java_with_graceful_timeout( class JavaTracer: - """Orchestrates two-stage Java tracing: JFR profiling + argument capture.""" + """Orchestrates Java tracing: combined JFR profiling + argument capture in a single JVM invocation.""" def trace( self, @@ -72,29 +72,23 @@ def trace( max_function_count: int = 256, timeout: int = 0, ) -> tuple[Path, Path]: - """Run the Java program twice: once for profiling, once for arg capture. + """Run the Java program once with both JFR profiling and argument capture. Returns (trace_db_path, jfr_file_path). """ jfr_file = trace_db_path.with_suffix(".jfr") trace_db_path.parent.mkdir(parents=True, exist_ok=True) - # Stage 1: JFR Profiling - logger.info("Stage 1: Running JFR profiling...") - jfr_env = self.build_jfr_env(jfr_file) - _run_java_with_graceful_timeout(java_command, jfr_env, timeout, "JFR profiling") - - if not jfr_file.exists(): - logger.warning("JFR file was not created at %s", jfr_file) - - # Stage 2: Argument Capture via Tracing Agent - logger.info("Stage 2: Running argument capture...") config_path = self.create_tracer_config( trace_db_path, packages, project_root=project_root, max_function_count=max_function_count, timeout=timeout ) - agent_env = self.build_agent_env(config_path) - _run_java_with_graceful_timeout(java_command, agent_env, timeout, "Argument capture") + combined_env = self.build_combined_env(jfr_file, config_path) + + logger.info("Running combined JFR profiling + argument capture...") + _run_java_with_graceful_timeout(java_command, combined_env, timeout, "Combined tracing") + if not jfr_file.exists(): + logger.warning("JFR file was not created at %s", jfr_file) if not trace_db_path.exists(): logger.error("Trace database was not created at %s", trace_db_path) @@ -141,6 +135,22 @@ def build_agent_env(self, config_path: Path, classpath: str | None = None) -> di env["JAVA_TOOL_OPTIONS"] = f"{existing} {agent_opts}".strip() return env + def build_combined_env(self, jfr_file: Path, config_path: Path, classpath: str | None = None) -> dict[str, str]: + """Build env with both JFR recording and tracing agent in a single JAVA_TOOL_OPTIONS.""" + env = os.environ.copy() + jfr_opts = ( + f"-XX:StartFlightRecording=filename={jfr_file.resolve()},settings=profile,dumponexit=true" + ",jdk.ExecutionSample#period=1ms" + ) + agent_jar = find_agent_jar(classpath=classpath) + if agent_jar is None: + msg = "codeflash-runtime JAR not found, cannot run tracing agent" + raise FileNotFoundError(msg) + agent_opts = f"{ADD_OPENS_FLAGS} -javaagent:{agent_jar}=trace={config_path.resolve()}" + existing = env.get("JAVA_TOOL_OPTIONS", "") + env["JAVA_TOOL_OPTIONS"] = f"{existing} {jfr_opts} {agent_opts}".strip() + return env + @staticmethod def detect_packages_from_source(module_root: Path) -> list[str]: """Scan Java files for package declarations and return unique package prefixes.""" From 013c83f5e49659d8232a3cac512516b6fad14919 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 09:11:02 -0500 Subject: [PATCH 18/23] fix: drop jdk.ExecutionSample#period from combined JFR opts (unsupported on Java 11) --- codeflash/languages/java/tracer.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/codeflash/languages/java/tracer.py b/codeflash/languages/java/tracer.py index bdeec34e0..649369d97 100644 --- a/codeflash/languages/java/tracer.py +++ b/codeflash/languages/java/tracer.py @@ -138,10 +138,7 @@ def build_agent_env(self, config_path: Path, classpath: str | None = None) -> di def build_combined_env(self, jfr_file: Path, config_path: Path, classpath: str | None = None) -> dict[str, str]: """Build env with both JFR recording and tracing agent in a single JAVA_TOOL_OPTIONS.""" env = os.environ.copy() - jfr_opts = ( - f"-XX:StartFlightRecording=filename={jfr_file.resolve()},settings=profile,dumponexit=true" - ",jdk.ExecutionSample#period=1ms" - ) + jfr_opts = f"-XX:StartFlightRecording=filename={jfr_file.resolve()},settings=profile,dumponexit=true" agent_jar = find_agent_jar(classpath=classpath) if agent_jar is None: msg = "codeflash-runtime JAR not found, cannot run tracing agent" From cb87763a2d508b14bd9444ed570fbc63450d5b41 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 12:58:54 -0500 Subject: [PATCH 19/23] fix: skip environment approval gate for trusted users on workflow_dispatch --- .github/workflows/ci.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e6e55298b..3b5b1c74c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -258,7 +258,7 @@ jobs: - name: init-optimization script: end_to_end_test_init_optimization.py expected_improvement: 10 - environment: ${{ (github.event_name == 'workflow_dispatch' || (contains(toJSON(github.event.pull_request.files.*.filename), '.github/workflows/') && github.event.pull_request.user.login != 'misrasaurabh1' && github.event.pull_request.user.login != 'KRRT7')) && 'external-trusted-contributors' || '' }} + environment: ${{ ((github.event_name == 'workflow_dispatch' && github.actor != 'misrasaurabh1' && github.actor != 'KRRT7') || (contains(toJSON(github.event.pull_request.files.*.filename), '.github/workflows/') && github.event.pull_request.user.login != 'misrasaurabh1' && github.event.pull_request.user.login != 'KRRT7')) && 'external-trusted-contributors' || '' }} runs-on: ubuntu-latest env: CODEFLASH_AIS_SERVER: prod @@ -345,7 +345,7 @@ jobs: script: end_to_end_test_js_ts_class.py js_project_dir: code_to_optimize/js/code_to_optimize_ts expected_improvement: 30 - environment: ${{ (github.event_name == 'workflow_dispatch' || (contains(toJSON(github.event.pull_request.files.*.filename), '.github/workflows/') && github.event.pull_request.user.login != 'misrasaurabh1' && github.event.pull_request.user.login != 'KRRT7')) && 'external-trusted-contributors' || '' }} + environment: ${{ ((github.event_name == 'workflow_dispatch' && github.actor != 'misrasaurabh1' && github.actor != 'KRRT7') || (contains(toJSON(github.event.pull_request.files.*.filename), '.github/workflows/') && github.event.pull_request.user.login != 'misrasaurabh1' && github.event.pull_request.user.login != 'KRRT7')) && 'external-trusted-contributors' || '' }} runs-on: ubuntu-latest env: CODEFLASH_AIS_SERVER: prod @@ -425,7 +425,7 @@ jobs: script: end_to_end_test_java_void_optimization.py expected_improvement: 70 remove_git: true - environment: ${{ (github.event_name == 'workflow_dispatch' || (contains(toJSON(github.event.pull_request.files.*.filename), '.github/workflows/') && github.event.pull_request.user.login != 'misrasaurabh1' && github.event.pull_request.user.login != 'KRRT7')) && 'external-trusted-contributors' || '' }} + environment: ${{ ((github.event_name == 'workflow_dispatch' && github.actor != 'misrasaurabh1' && github.actor != 'KRRT7') || (contains(toJSON(github.event.pull_request.files.*.filename), '.github/workflows/') && github.event.pull_request.user.login != 'misrasaurabh1' && github.event.pull_request.user.login != 'KRRT7')) && 'external-trusted-contributors' || '' }} runs-on: ubuntu-latest env: CODEFLASH_AIS_SERVER: prod From 40f16b565ab768578e53ae6e6be5d536312a0237 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 13:09:36 -0500 Subject: [PATCH 20/23] ci: add standalone Java E2E workflow for isolated testing --- .github/workflows/java-e2e.yaml | 77 +++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 .github/workflows/java-e2e.yaml diff --git a/.github/workflows/java-e2e.yaml b/.github/workflows/java-e2e.yaml new file mode 100644 index 000000000..0bfc979b6 --- /dev/null +++ b/.github/workflows/java-e2e.yaml @@ -0,0 +1,77 @@ +name: Java E2E Tests +on: + workflow_dispatch: + +jobs: + e2e-java: + strategy: + fail-fast: false + matrix: + include: + - name: java-fibonacci-nogit + script: end_to_end_test_java_fibonacci.py + expected_improvement: 70 + remove_git: true + - name: java-tracer + script: end_to_end_test_java_tracer.py + expected_improvement: 10 + - name: java-void-optimization-nogit + script: end_to_end_test_java_void_optimization.py + expected_improvement: 70 + remove_git: true + runs-on: ubuntu-latest + env: + CODEFLASH_AIS_SERVER: prod + POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} + CODEFLASH_API_KEY: ${{ secrets.CODEFLASH_API_KEY }} + COLUMNS: 110 + MAX_RETRIES: 3 + RETRY_DELAY: 5 + EXPECTED_IMPROVEMENT_PCT: ${{ matrix.expected_improvement }} + CODEFLASH_END_TO_END: 1 + CODEFLASH_LOOPING_TIME: 5 + steps: + - uses: actions/checkout@v6 + + - name: Set up JDK 11 + uses: actions/setup-java@v5 + with: + java-version: '11' + distribution: 'temurin' + cache: maven + + - name: Install uv + uses: astral-sh/setup-uv@v8.0.0 + with: + python-version: 3.11.6 + enable-cache: true + + - name: Install dependencies + run: uv sync + + - name: Cache codeflash-runtime JAR + id: runtime-jar-cache + uses: actions/cache@v4 + with: + path: ~/.m2/repository/io/codeflash + key: codeflash-runtime-${{ hashFiles('codeflash-java-runtime/pom.xml', 'codeflash-java-runtime/src/**') }} + + - name: Build and install codeflash-runtime JAR + if: steps.runtime-jar-cache.outputs.cache-hit != 'true' + run: | + cd codeflash-java-runtime + mvn install -q -DskipTests + + - name: Remove .git + if: matrix.remove_git + run: | + if [ -d ".git" ]; then + sudo rm -rf .git + echo ".git directory removed." + else + echo ".git directory does not exist." + exit 1 + fi + + - name: Run E2E test + run: uv run python tests/scripts/${{ matrix.script }} From 5c778dfad40d6473024ec6824b5d2f7d5a634887 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 15:08:03 -0500 Subject: [PATCH 21/23] perf: trim tracer E2E workload to single function (repeatString) Keep only repeatString which reliably produces 284% improvement. Drop computeSum (marginal 16%), filterEvens and instanceMethod (no optimization found). Reduces tracer E2E from ~1h27m to ~21m. --- .../src/main/java/com/example/Workload.java | 44 +------------------ 1 file changed, 1 insertion(+), 43 deletions(-) diff --git a/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java index 7beb2a4ea..7dfdad95f 100644 --- a/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java +++ b/tests/test_languages/fixtures/java_tracer_e2e/src/main/java/com/example/Workload.java @@ -1,18 +1,7 @@ package com.example; -import java.util.ArrayList; -import java.util.List; - public class Workload { - public static int computeSum(int n) { - int sum = 0; - for (int i = 0; i < n; i++) { - sum += i; - } - return sum; - } - public static String repeatString(String s, int count) { String result = ""; for (int i = 0; i < count; i++) { @@ -21,46 +10,15 @@ public static String repeatString(String s, int count) { return result; } - public static List filterEvens(List numbers) { - List result = new ArrayList<>(); - for (int n : numbers) { - if (n % 2 == 0) { - result.add(n); - } - } - return result; - } - - public int instanceMethod(int x, int y) { - return x * y + computeSum(x); - } - public static void main(String[] args) { - // Run methods with large inputs so JFR can capture CPU samples. - // Small inputs finish too fast (<1ms) for JFR's 10ms sampling interval. + // Run with large inputs so JFR can capture CPU samples. for (int round = 0; round < 1000; round++) { - computeSum(100_000); repeatString("hello world ", 1000); - - List nums = new ArrayList<>(); - for (int i = 1; i <= 10_000; i++) nums.add(i); - filterEvens(nums); - - Workload w = new Workload(); - w.instanceMethod(100_000, 42); } // Also call with small inputs for variety in traced args - System.out.println("computeSum(100) = " + computeSum(100)); System.out.println("repeatString(\"ab\", 3) = " + repeatString("ab", 3)); - List small = new ArrayList<>(); - for (int i = 1; i <= 10; i++) small.add(i); - System.out.println("filterEvens(1..10) = " + filterEvens(small)); - - Workload w = new Workload(); - System.out.println("instanceMethod(5, 3) = " + w.instanceMethod(5, 3)); - System.out.println("Workload complete."); } } From 0cb67c1a17af4b846f3c7810179046206c8f14b4 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 15:12:48 -0500 Subject: [PATCH 22/23] fix: add --no-pr to codeflash optimize workflow to prevent CI-opened PRs --- .github/workflows/codeflash-optimize.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/codeflash-optimize.yaml b/.github/workflows/codeflash-optimize.yaml index 9884665da..ab08aa1f8 100644 --- a/.github/workflows/codeflash-optimize.yaml +++ b/.github/workflows/codeflash-optimize.yaml @@ -43,4 +43,4 @@ jobs: - name: ⚡️Codeflash Optimization id: optimize_code run: | - uv run codeflash --benchmark --testgen-review \ No newline at end of file + uv run codeflash --benchmark --testgen-review --no-pr \ No newline at end of file From b737f71e46a5c21f518ecb11f66532d3bbb5766a Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 16:05:27 -0500 Subject: [PATCH 23/23] fix: update test assertions to match simplified Workload fixture The Workload.java fixture was trimmed to only repeatString but test files still asserted computeSum, filterEvens, and instanceMethod. --- .../test_java/test_java_tracer_e2e.py | 23 ++++++++----------- .../test_java/test_java_tracer_integration.py | 8 +++---- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/tests/test_languages/test_java/test_java_tracer_e2e.py b/tests/test_languages/test_java/test_java_tracer_e2e.py index 157f23eb6..f16f19aa2 100644 --- a/tests/test_languages/test_java/test_java_tracer_e2e.py +++ b/tests/test_languages/test_java/test_java_tracer_e2e.py @@ -81,14 +81,11 @@ def test_agent_captures_invocations(self, compiled_workload: Path, trace_db: Pat conn = sqlite3.connect(str(trace_db)) try: rows = conn.execute("SELECT function, classname, descriptor, length(args) FROM function_calls").fetchall() - assert len(rows) >= 5, f"Expected at least 5 captured invocations, got {len(rows)}" + assert len(rows) >= 2, f"Expected at least 2 captured invocations, got {len(rows)}" # Check that specific methods were captured functions = {row[0] for row in rows} - assert "computeSum" in functions assert "repeatString" in functions - assert "filterEvens" in functions - assert "instanceMethod" in functions # Verify all rows have non-empty args blobs for row in rows: @@ -97,7 +94,7 @@ def test_agent_captures_invocations(self, compiled_workload: Path, trace_db: Pat # Verify metadata metadata = dict(conn.execute("SELECT key, value FROM metadata").fetchall()) assert "totalCaptures" in metadata - assert int(metadata["totalCaptures"]) >= 5 + assert int(metadata["totalCaptures"]) >= 2 finally: conn.close() @@ -136,11 +133,11 @@ def test_max_function_count_limit(self, compiled_workload: Path, trace_db: Path) conn = sqlite3.connect(str(trace_db)) try: - # computeSum is called 4 times (2 direct + 2 from instanceMethod) - compute_count = conn.execute( - "SELECT COUNT(*) FROM function_calls WHERE function = 'computeSum'" + # repeatString is called 1000+ times; with maxFunctionCount=2, at most 2 should be captured + repeat_count = conn.execute( + "SELECT COUNT(*) FROM function_calls WHERE function = 'repeatString'" ).fetchone()[0] - assert compute_count <= 2, f"Expected at most 2 computeSum captures, got {compute_count}" + assert repeat_count <= 2, f"Expected at most 2 repeatString captures, got {repeat_count}" finally: conn.close() @@ -198,7 +195,6 @@ def test_generates_test_files(self, compiled_workload: Path, trace_db: Path, tmp assert "package codeflash.replay;" in content assert "import org.junit.jupiter.api.Test;" in content assert "ReplayHelper" in content - assert "replay_computeSum_0" in content assert "replay_repeatString_0" in content def test_metadata_parsing(self, compiled_workload: Path, trace_db: Path, tmp_path: Path) -> None: @@ -243,7 +239,7 @@ def test_metadata_parsing(self, compiled_workload: Path, trace_db: Path, tmp_pat assert "functions" in metadata assert "trace_file" in metadata assert "classname" in metadata - assert "computeSum" in metadata["functions"] + assert "repeatString" in metadata["functions"] assert metadata["classname"] == "com.example.Workload" assert metadata["trace_file"] == trace_db.as_posix() @@ -267,7 +263,7 @@ def test_two_stage_trace(self, compiled_workload: Path, tmp_path: Path) -> None: conn = sqlite3.connect(str(trace_db)) try: count = conn.execute("SELECT COUNT(*) FROM function_calls").fetchone()[0] - assert count >= 5, f"Expected at least 5 captured invocations, got {count}" + assert count >= 2, f"Expected at least 2 captured invocations, got {count}" finally: conn.close() @@ -295,8 +291,7 @@ def test_full_trace_and_replay_generation(self, compiled_workload: Path, tmp_pat workload_files = [f for f in test_files if "Workload" in f.name and "ConstructorAccess" not in f.name] assert len(workload_files) == 1 content = workload_files[0].read_text(encoding="utf-8") - assert "replay_computeSum" in content - assert "replay_instanceMethod" in content + assert "replay_repeatString" in content def test_package_detection(self) -> None: """Test that package detection finds Java packages from source files.""" diff --git a/tests/test_languages/test_java/test_java_tracer_integration.py b/tests/test_languages/test_java/test_java_tracer_integration.py index f6ffefdf2..6927faba4 100644 --- a/tests/test_languages/test_java/test_java_tracer_integration.py +++ b/tests/test_languages/test_java/test_java_tracer_integration.py @@ -87,7 +87,6 @@ def test_discover_functions_from_replay_tests(self, traced_workload: tuple) -> N assert func.language == "java", f"Expected language='java', got '{func.language}'" assert func.file_path == file_path - assert "computeSum" in all_func_names assert "repeatString" in all_func_names def test_discover_tests_for_replay_tests(self, traced_workload: tuple) -> None: @@ -111,7 +110,6 @@ def test_discover_tests_for_replay_tests(self, traced_workload: tuple) -> None: func_name = qualified_name.split(".")[-1] if "." in qualified_name else qualified_name matched_func_names.add(func_name) - assert "computeSum" in matched_func_names, f"computeSum not found in: {result.keys()}" assert "repeatString" in matched_func_names, f"repeatString not found in: {result.keys()}" # Each function should have at least one test @@ -222,8 +220,8 @@ def test_full_pipeline(self, compiled_workload: Path, tmp_path: Path) -> None: assert len(function_to_tests) > 0, "No function-to-test mappings" # Verify function_to_tests has entries for our traced functions - has_compute_sum = any("computeSum" in key for key in function_to_tests) - assert has_compute_sum, f"computeSum not in function_to_tests keys: {list(function_to_tests.keys())}" + has_repeat_string = any("repeatString" in key for key in function_to_tests) + assert has_repeat_string, f"repeatString not in function_to_tests keys: {list(function_to_tests.keys())}" # Step 4: Rank functions (like optimizer.rank_all_functions_globally) if jfr_file.exists(): @@ -280,7 +278,7 @@ def test_instrument_and_compile_replay_tests(self, compiled_workload: Path, tmp_ source_code = WORKLOAD_SOURCE.read_text(encoding="utf-8") source_functions = discover_functions_from_source(source_code, file_path=WORKLOAD_SOURCE) # Pick the first function with a return type for instrumentation - target_func = next(f for f in source_functions if f.function_name == "computeSum") + target_func = next(f for f in source_functions if f.function_name == "repeatString") replay_test_file = replay_test_paths[0] test_source = replay_test_file.read_text(encoding="utf-8")