AztecProtocol · federicobarbacovi · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026 · Mar 26, 2026
diff --git a/BENCHMARKS.md b/BENCHMARKS.md
@@ -0,0 +1,48 @@
+# Chonk Remote Benchmarks
+
+## Summary
+
+| # | Flow | Circuits | Total (s) | Accumulate (s) | Prove (s) | Load (ms) | Peak mem (MiB) |
+|---|---|---|---|---|---|---|---|
+| 1 | deploy_ecdsar1+sponsored_fpc | 13 | 7.68 | 5.14 | 2.36 | 158 | 310.87 |
+| 2 | deploy_schnorr+sponsored_fpc | 13 | 7.37 | 4.81 | 2.37 | 160 | 311.40 |
+| 3 | ecdsar1+amm_add_liquidity_1_recursions+sponsored_fpc | 19 | 12.61 | 9.59 | 2.62 | 342 | 523.71 |
+| 4 | ecdsar1+deploy_tokenContract_with_registration+sponsored_fpc | 11 | 8.16 | 5.66 | 2.27 | 194 | 459.11 |
+| 5 | ecdsar1+token_bridge_claim_private+sponsored_fpc | 11 | 7.06 | 4.60 | 2.27 | 158 | 313.44 |
+| 6 | ecdsar1+transfer_0_recursions+private_fpc | 15 | 9.96 | 7.23 | 2.44 | 259 | 433.19 |
+| 7 | ecdsar1+transfer_0_recursions+sponsored_fpc | 9 | 6.11 | 3.79 | 2.17 | 114 | 294.02 |
+| 8 | ecdsar1+transfer_1_recursions+private_fpc | 17 | 11.26 | 8.36 | 2.55 | 301 | 509.51 |
+| 9 | ecdsar1+transfer_1_recursions+sponsored_fpc | 11 | 7.11 | 4.67 | 2.26 | 156 | 308.41 |
+| 10 | schnorr+deploy_tokenContract_with_registration+sponsored_fpc | 11 | 7.87 | 5.36 | 2.28 | 196 | 462.34 |
+
+## Baseline
+
+| # | Flow | Circuits | Total (s) | Accumulate (s) | Prove (s) | Load (ms) | Peak mem (MiB) |
+|---|---|---|---|---|---|---|---|
+| 1 | deploy_ecdsar1+sponsored_fpc | 13 | 7.63 | 4.99 | 2.46 | 155 | 311.31 |
+| 2 | deploy_schnorr+sponsored_fpc | 13 | 7.31 | 4.66 | 2.47 | 156 | 312.44 |
+| 3 | ecdsar1+amm_add_liquidity_1_recursions+sponsored_fpc | 19 | 12.82 | 9.76 | 2.67 | 343 | 513.35 |
+| 4 | ecdsar1+deploy_tokenContract_with_registration+sponsored_fpc | 11 | 8.07 | 5.57 | 2.28 | 192 | 458.79 |
+| 5 | ecdsar1+token_bridge_claim_private+sponsored_fpc | 11 | 6.97 | 4.41 | 2.38 | 160 | 316.43 |
+| 6 | ecdsar1+transfer_0_recursions+private_fpc | 15 | 10.03 | 7.27 | 2.47 | 258 | 427.96 |
+| 7 | ecdsar1+transfer_0_recursions+sponsored_fpc | 9 | 5.92 | 3.52 | 2.26 | 112 | 294.53 |
+| 8 | ecdsar1+transfer_1_recursions+private_fpc | 17 | 11.32 | 8.42 | 2.57 | 301 | 500.86 |
+| 9 | ecdsar1+transfer_1_recursions+sponsored_fpc | 11 | 7.02 | 4.49 | 2.36 | 153 | 309.69 |
+| 10 | schnorr+deploy_tokenContract_with_registration+sponsored_fpc | 11 | 7.81 | 5.30 | 2.28 | 195 | 458.34 |
+
+## Branch vs Baseline (Total time)
+
+| # | Flow | Branch Total (s) | Baseline Total (s) | Δ (s) | Δ (%) |
+|---|---|---|---|---|---|
+| 1 | deploy_ecdsar1+sponsored_fpc | 7.68 | 7.63 | +0.05 | +0.66% |
+| 2 | deploy_schnorr+sponsored_fpc | 7.37 | 7.31 | +0.06 | +0.82% |
+| 3 | ecdsar1+amm_add_liquidity_1_recursions+sponsored_fpc | 12.61 | 12.82 | −0.21 | −1.64% |
+| 4 | ecdsar1+deploy_tokenContract_with_registration+sponsored_fpc | 8.16 | 8.07 | +0.09 | +1.12% |
+| 5 | ecdsar1+token_bridge_claim_private+sponsored_fpc | 7.06 | 6.97 | +0.09 | +1.29% |
+| 6 | ecdsar1+transfer_0_recursions+private_fpc | 9.96 | 10.03 | −0.07 | −0.70% |
+| 7 | ecdsar1+transfer_0_recursions+sponsored_fpc | 6.11 | 5.92 | +0.19 | +3.21% |
+| 8 | ecdsar1+transfer_1_recursions+private_fpc | 11.26 | 11.32 | −0.06 | −0.53% |
+| 9 | ecdsar1+transfer_1_recursions+sponsored_fpc | 7.11 | 7.02 | +0.09 | +1.28% |
+| 10 | schnorr+deploy_tokenContract_with_registration+sponsored_fpc | 7.87 | 7.81 | +0.06 | +0.77% |
+
+Mean Δ: +0.029 s (+0.63%). Within the ±5% noise caveat.
diff --git a/WASM_FINDINGS_IVC_INTEGRATION.md b/WASM_FINDINGS_IVC_INTEGRATION.md
@@ -0,0 +1,64 @@
+# WASM Findings: Goblin Flush in IVC Integration
+
+## Summary
+
+The goblin flush test (`generateTestingIVCStack(1, 0, true)`) was crashing in WASM but working correctly with NativeUnixSocket. Root cause: WASM stack overflow due to large recursive verifier types on the stack. Fixed by increasing the WASM stack from 1 MB to 2 MB.
+
+## Root Cause: WASM Stack Overflow
+
+The crash was **not** a heap memory issue — WASM heap usage was only 338 MiB at crash time, well below the 4 GiB maximum. The problem was the 1 MB WASM stack being exhausted by the ECCVM recursive verifier's large stack-allocated types.
+
+### Why recursive types are so large
+
+The ECCVM recursive verifier operates on `bigfield` elements (`stdlib::bigfield<UltraCircuitBuilder, Bn254FqParams>`) instead of native field elements. Each `bigfield` contains 4 limbs (each a `field_t` + `uint256_t` max value) plus a `prime_basis_limb`, making it ~492 bytes in WASM release vs 32 bytes for a native field element — a **15x** blowup.
+
+### Stack size breakdown (native debug build, measured via sizeof)
+
+| Type | sizeof | Notes |
+|------|--------|-------|
+| `bigfield` (recursive FF) | 1,280 B | 4 limbs × (field_t + uint256_t) + prime_basis_limb |
+| `field_t<Builder>` | 224 B | ptr + 2×fr + witness_index + OriginTag (debug only) |
+| `ECCVMSumcheckVerifier` | **343,200 B** (335 KB) | Dominates the stack |
+| `ECCVMSumcheckRound` | 172,896 B (169 KB) | Contains `TupleOfArraysOfValues relation_evaluations` |
+| `AllValues` (118 × FF) | 151,040 B (147 KB) | 118 entities × 1,280 bytes each |
+| `TranslatorSumcheckVerifier` | 66,400 B (65 KB) | Smaller but still significant |
+| `TranslatorSumcheckRound` | 33,408 B (33 KB) | |
+| `ECCVMRecursiveVerifier` | 31,616 B (31 KB) | |
+| `TranslatorRecursiveVerifier` | 23,680 B (23 KB) | |
+| `Builder` | 4,864 B | |
+
+In WASM release builds (no OriginTag, 4-byte pointers), sizes are roughly **2.6x smaller** than native debug. Estimated peak stack usage during ECCVM recursive verification: **~300-500 KB**.
+
+### Why the stack overflows
+
+The main culprits are value-type members allocated on the stack:
+
+1. **`SumcheckVerifierRound::relation_evaluations`** (`TupleOfArraysOfValues`) — a flat tuple of arrays of `bigfield`, one per subrelation across ~134 ECCVM subrelations. This is a **member** of `SumcheckVerifierRound`, which is a **member** of `SumcheckVerifier`, which is a **local** in `reduce_to_ipa_opening()`.
+
+2. **`SumcheckVerifier::alphas`** (`std::array<FF, NUM_SUBRELATIONS - 1>`) — another large array of `bigfield` elements.
+
+3. **`ClaimedEvaluations`** (`AllEntities<FF>`) — 118 `bigfield` elements, allocated as a local in `SumcheckVerifier::verify()`.
+
+These all live on the stack simultaneously during ECCVM sumcheck verification.
+
+### Call stack during peak usage
+
+```
+build_goblin_flush_circuit          ~15 KB (builder, verifiers, proof)
+  └─ reduce_to_ipa_opening         ~130 KB (SumcheckVerifier as local)
+       └─ verify()                 ~70 KB (ClaimedEvaluations, gate separators)
+```
+
+The Translator verification runs sequentially after ECCVM, so their frames don't overlap.
+
+## Fix Applied
+
+1. **`barretenberg/cpp/src/CMakeLists.txt`**: WASM stack size 1 MB → **2 MB** (2097152)
+2. **`barretenberg/ts/src/barretenberg_wasm/barretenberg_wasm_main/index.ts`**: initial memory pages 35 → **49** (to match WASM module's declared minimum with larger stack)
+3. **`yarn-project/ivc-integration/src/chonk_integration.test.ts`**: goblin flush test now runs on both WASM and NativeUnixSocket
+
+Note: WASM test binaries already use 8 MB stack (`barretenberg/cpp/cmake/module.cmake`), which is why C++ tests never hit this issue.
+
+## Potential Future Optimization
+
+The `relation_evaluations` tuple in `SumcheckVerifierRound` could be heap-allocated (e.g. via `std::unique_ptr`) to reduce stack pressure. This would allow reverting the stack size increase, but 2 MB is a reasonable default with ~4-6x headroom over estimated peak usage.
diff --git a/barretenberg/cpp/scripts/test_chonk_standalone_vks_havent_changed.sh b/barretenberg/cpp/scripts/test_chonk_standalone_vks_havent_changed.sh
@@ -21,7 +21,7 @@ script_path="$root/barretenberg/cpp/scripts/test_chonk_standalone_vks_havent_cha
 # - Generate a hash for versioning: sha256sum bb-chonk-inputs.tar.gz
 # - Upload the compressed results: aws s3 cp bb-chonk-inputs.tar.gz s3://aztec-ci-artifacts/protocol/bb-chonk-inputs-[hash(0:8)].tar.gz
 # Note: In case of the "Test suite failed to run ... Unexpected token 'with' " error, need to run: docker pull aztecprotocol/build:3.0
-pinned_short_hash="50947760"
+pinned_short_hash="23853a1b"
 pinned_chonk_inputs_url="https://aztec-ci-artifacts.s3.us-east-2.amazonaws.com/protocol/bb-chonk-inputs-${pinned_short_hash}.tar.gz"
 
 function update_pinned_hash_in_script {

diff --git a/barretenberg/cpp/src/CMakeLists.txt b/barretenberg/cpp/src/CMakeLists.txt
@@ -49,7 +49,7 @@ if(WASM)
     set(CMAKE_C_FLAGS_DEBUG "-O1 -g")
     set(CMAKE_CXX_FLAGS_RELEASE "-Oz -DNDEBUG")
     set(CMAKE_C_FLAGS_RELEASE "-Oz -DNDEBUG")
-    add_link_options(-Wl,--export-memory,--import-memory,--stack-first,-z,stack-size=1048576,--max-memory=4294967296)
+    add_link_options(-Wl,--export-memory,--import-memory,--stack-first,-z,stack-size=2097152,--max-memory=4294967296)
 endif()
 
 include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${MSGPACK_INCLUDE} ${TRACY_INCLUDE} ${LMDB_INCLUDE} ${LIBDEFLATE_INCLUDE} ${HTTPLIB_INCLUDE} ${BACKWARD_INCLUDE} ${NLOHMANN_JSON_INCLUDE})
@@ -96,7 +96,7 @@ add_subdirectory(barretenberg/ext/starknet/crypto)
 add_subdirectory(barretenberg/ext/starknet/transcript)
 add_subdirectory(barretenberg/flavor)
 add_subdirectory(barretenberg/goblin)
-add_subdirectory(barretenberg/goblin_avm)
+add_subdirectory(barretenberg/goblin_without_merge)
 add_subdirectory(barretenberg/grumpkin_srs_gen)
 add_subdirectory(barretenberg/multilinear_batching)
 add_subdirectory(barretenberg/numeric)
@@ -172,7 +172,7 @@ set(BARRETENBERG_TARGET_OBJECTS
     $<TARGET_OBJECTS:trace_to_polynomials_objects>
     $<TARGET_OBJECTS:flavor_objects>
     $<TARGET_OBJECTS:goblin_objects>
-    $<TARGET_OBJECTS:goblin_avm_objects>
+    $<TARGET_OBJECTS:goblin_without_merge_objects>
     $<TARGET_OBJECTS:hypernova_objects>
     $<TARGET_OBJECTS:honk>
     $<TARGET_OBJECTS:multilinear_batching_objects>

diff --git a/barretenberg/cpp/src/barretenberg/bbapi/bbapi_chonk.cpp b/barretenberg/cpp/src/barretenberg/bbapi/bbapi_chonk.cpp
@@ -198,8 +198,8 @@ ChonkBatchVerify::Response ChonkBatchVerify::execute(const BBApiRequest& /*reque
     // Phase 1: Run all non-IPA verification for each proof, collecting IPA claims
     std::vector<OpeningClaim<curve::Grumpkin>> ipa_claims;
     std::vector<std::shared_ptr<NativeTranscript>> ipa_transcripts;
-    ipa_claims.reserve(proofs.size());
-    ipa_transcripts.reserve(proofs.size());
+    ipa_claims.reserve(proofs.size() * 2);
+    ipa_transcripts.reserve(proofs.size() * 2);
 
     for (size_t i = 0; i < proofs.size(); ++i) {
         validate_vk_size<VerificationKey>(vks[i]);
@@ -218,8 +218,10 @@ ChonkBatchVerify::Response ChonkBatchVerify::execute(const BBApiRequest& /*reque
         if (!result.all_checks_passed) {
             return { .valid = false };
         }
-        ipa_claims.push_back(std::move(result.ipa_claim));
-        ipa_transcripts.push_back(std::make_shared<NativeTranscript>(std::move(result.ipa_proof)));
+        ipa_claims.push_back(std::move(result.eccvm_ipa_claim));
+        ipa_transcripts.push_back(std::make_shared<NativeTranscript>(std::move(result.eccvm_ipa_proof)));
+        ipa_claims.push_back(std::move(result.kernel_ipa_claim));
+        ipa_transcripts.push_back(std::make_shared<NativeTranscript>(std::move(result.kernel_ipa_proof)));
     }
 
     // Phase 2: Batch IPA verification with single SRS MSM

diff --git a/barretenberg/cpp/src/barretenberg/benchmark/CMakeLists.txt b/barretenberg/cpp/src/barretenberg/benchmark/CMakeLists.txt
@@ -11,3 +11,4 @@ add_subdirectory(indexed_tree_bench)
 add_subdirectory(append_only_tree_bench)
 add_subdirectory(ultra_bench)
 add_subdirectory(circuit_construction_bench)
+add_subdirectory(goblin_flush_bench)
diff --git a/barretenberg/cpp/src/barretenberg/benchmark/goblin_flush_bench/CMakeLists.txt b/barretenberg/cpp/src/barretenberg/benchmark/goblin_flush_bench/CMakeLists.txt
@@ -0,0 +1 @@
+barretenberg_module(goblin_flush_bench vm2_stub dsl chonk goblin_without_merge stdlib_honk_verifier stdlib_sha256 stdlib_primitives)
diff --git a/barretenberg/cpp/src/barretenberg/benchmark/goblin_flush_bench/goblin_flush.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/goblin_flush_bench/goblin_flush.bench.cpp
@@ -0,0 +1,88 @@
+/**
+ * @brief Benchmarks for the Goblin flush pipeline.
+ *
+ * Measures the individual phases of a Goblin flush:
+ *   1. Prove Goblin (ECCVM + Translator, non-ZK)
+ *   2. Build + prove the flush verification circuit (Circuit C, Ultra Honk)
+ *   3. Accumulate the Goblin app (which recursively verifies C's proof)
+ *   4. Accumulate the Goblin kernel
+ */
+
+#include <benchmark/benchmark.h>
+
+#include "barretenberg/chonk/chonk.hpp"
+#include "barretenberg/chonk/mock_circuit_producer.hpp"
+#include "barretenberg/common/bb_bench.hpp"
+#include "barretenberg/dsl/acir_format/goblin_flush_recursion_constraint.hpp"
+#include "barretenberg/goblin/mock_circuits.hpp"
+#include "barretenberg/goblin_without_merge/goblin_flush_circuit.hpp"
+#include "barretenberg/goblin_without_merge/goblin_without_merge.hpp"
+#include "barretenberg/srs/global_crs.hpp"
+#include "barretenberg/ultra_honk/ultra_prover.hpp"
+
+using namespace benchmark;
+using namespace bb;
+
+namespace {
+
+/**
+ * @brief Populate an op queue to near-Translator capacity, mimicking a real flush scenario.
+ * @details The tighter constraint is the Translator's op queue table (2^CONST_OP_QUEUE_LOG_SIZE = 4096 entries),
+ *          not the ECCVM (2^CONST_ECCVM_LOG_N = 32768 rows). Fills until near the Translator limit.
+ */
+void create_populated_op_queue(std::shared_ptr<ECCOpQueue>& op_queue)
+{
+    static constexpr size_t OP_QUEUE_TABLE_CAPACITY = 1UL << CONST_OP_QUEUE_LOG_SIZE;
+    // Leave headroom for structural ops (eq_and_reset, no-ops) that chonk adds per circuit
+    static constexpr size_t TARGET_OPS = OP_QUEUE_TABLE_CAPACITY - 128;
+
+    // Structural ops required by the chonk flush table structure
+    op_queue->no_op_ultra_only();
+    op_queue->no_op_ultra_only();
+    op_queue->no_op_ultra_only();
+    op_queue->no_op_ultra_only();
+    op_queue->eq_and_reset();
+
+    // Fill the op queue to near capacity with add_accumulate operations
+    auto point = bb::g1::affine_element::one();
+    while (op_queue->get_current_subtable_size() < TARGET_OPS - 1) {
+        op_queue->add_accumulate(point);
+    }
+    op_queue->eq_and_reset();
+
+    op_queue->merge();
+}
+
+class GoblinFlushBench : public benchmark::Fixture {
+  public:
+    void SetUp([[maybe_unused]] const ::benchmark::State& state) override
+    {
+        bb::srs::init_file_crs_factory(bb::srs::bb_crs_path());
+    }
+};
+
+/**
+ * @brief Benchmark Phase 2: Build and prove the flush verification circuit (Circuit C) with Ultra Honk
+ */
+BENCHMARK_DEFINE_F(GoblinFlushBench, ProveFlushCircuit)(benchmark::State& state)
+{
+    // Pre-compute the Goblin proof and table commitments outside the timed region
+    auto ivc = std::make_shared<Chonk>(/*num_circuits=*/4);
+    create_populated_op_queue(ivc->get_goblin().op_queue);
+    acir_format::RecursionConstraint recursion_constraint = {
+        {}, {}, {}, 0, acir_format::ULTRA_GOBLIN, acir_format::WitnessOrConstant<bb::fr>::from_constant(0)
+    };
+
+    for (auto _ : state) {
+        auto op_queue_copy = std::make_shared<ECCOpQueue>(*ivc->get_goblin().op_queue);
+        MegaCircuitBuilder builder(op_queue_copy);
+        benchmark::DoNotOptimize(
+            acir_format::create_goblin_flush_recursion_constraints(builder, recursion_constraint, ivc));
+    }
+}
+
+BENCHMARK_REGISTER_F(GoblinFlushBench, ProveFlushCircuit)->Unit(benchmark::kMillisecond)->Iterations(1);
+
+} // namespace
+
+BENCHMARK_MAIN();
diff --git a/barretenberg/cpp/src/barretenberg/boomerang_value_detection/CMakeLists.txt b/barretenberg/cpp/src/barretenberg/boomerang_value_detection/CMakeLists.txt
@@ -2,5 +2,5 @@ barretenberg_module(boomerang_value_detection stdlib_circuit_builders circuit_ch
                     stdlib_primitives numeric stdlib_aes128 stdlib_sha256 stdlib_blake2s
                     stdlib_blake3s stdlib_poseidon2
                     goblin
-                    goblin_avm
+                    goblin_without_merge
                     commitment_schemes)
diff --git a/...berg/cpp/src/barretenberg/boomerang_value_detection/graph_description_goblin_avm.test.cpp b/...berg/cpp/src/barretenberg/boomerang_value_detection/graph_description_goblin_avm.test.cpp
@@ -3,8 +3,8 @@
 #include "barretenberg/common/test.hpp"
 
 #include "barretenberg/goblin/mock_circuits.hpp"
-#include "barretenberg/goblin_avm/goblin_avm.hpp"
-#include "barretenberg/goblin_avm/goblin_avm_verifier.hpp"
+#include "barretenberg/goblin_without_merge/goblin_without_merge.hpp"
+#include "barretenberg/goblin_without_merge/goblin_without_merge_verifier.hpp"
 #include "barretenberg/srs/global_crs.hpp"
 #include "barretenberg/stdlib/honk_verifier/ultra_verification_keys_comparator.hpp"
 #include "barretenberg/ultra_honk/ultra_prover.hpp"
@@ -50,10 +50,11 @@ class BoomerangGoblinAvmRecursiveVerifierTests : public testing::Test {
     {
         auto op_queue = std::make_shared<ECCOpQueue>();
         InnerBuilder inner_builder(op_queue);
-        GoblinAvm goblin(inner_builder);
+        GoblinAvm goblin(inner_builder, /*is_zk=*/false);
         MockCircuits::construct_arithmetic_circuit(inner_builder);
 
         // Merge the ecc ops from the newly constructed circuit
+        op_queue->merge();
         auto goblin_proof = goblin.prove();
 
         // Subtable values and commitments - needed for (Recursive)MergeVerifier
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		barretenberg_module(goblin_flush_bench vm2_stub dsl chonk goblin_without_merge stdlib_honk_verifier stdlib_sha256 stdlib_primitives)