-
Notifications
You must be signed in to change notification settings - Fork 26
Reduce peak memory during prove by releasing witness shared memory early #204
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: hc/export_device_memories_unchecked
Are you sure you want to change the base?
Changes from all commits
639a18b
95038f4
2465d52
dc557cb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -140,7 +140,11 @@ where | |||||
|
|
||||||
| setup_timer.stop(); | ||||||
|
|
||||||
| SharedMemoryEngine::read_pcs_setup_from_shared_memory() | ||||||
| // Prover setup not needed on client side (server does the proving). | ||||||
| // Verifier setup is required for verification, so read it from shared memory. | ||||||
| let (_prover_setup, verifier_setup) = | ||||||
| SharedMemoryEngine::read_pcs_setup_from_shared_memory::<C::FieldConfig, C::PCSConfig>(); | ||||||
| (ExpanderProverSetup::default(), verifier_setup) | ||||||
| } | ||||||
|
|
||||||
| pub fn client_send_witness_and_prove<C, ECCConfig>( | ||||||
|
|
@@ -152,8 +156,39 @@ where | |||||
| { | ||||||
| let timer = Timer::new("prove", true); | ||||||
|
|
||||||
| // Reset ack signal, then write witness | ||||||
| SharedMemoryEngine::reset_witness_ack(); | ||||||
| SharedMemoryEngine::write_witness_to_shared_memory::<C::FieldConfig>(device_memories); | ||||||
| wait_async(ClientHttpHelper::request_prove()); | ||||||
|
|
||||||
| #[cfg(all(target_os = "linux", target_env = "gnu"))] | ||||||
| { | ||||||
| extern "C" { | ||||||
| fn malloc_trim(pad: usize) -> i32; | ||||||
| } | ||||||
| unsafe { | ||||||
| malloc_trim(0); | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| // Async: send prove request + poll for witness ack to release shared memory early | ||||||
| let rt = tokio::runtime::Runtime::new().unwrap(); | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Creating a new Tokio runtime with Consider using Example: use once_cell::sync::Lazy;
static RUNTIME: Lazy<tokio::runtime::Runtime> = Lazy::new(|| tokio::runtime::Runtime::new().expect("Failed to create Tokio runtime"));
// Then in your function:
// RUNTIME.block_on(async { ... });Additionally,
Suggested change
|
||||||
| rt.block_on(async { | ||||||
| let prove_handle = tokio::spawn(async { | ||||||
| ClientHttpHelper::request_prove().await; | ||||||
| }); | ||||||
|
|
||||||
| // Poll witness_ack; once server confirms read, release witness shared memory | ||||||
| tokio::task::spawn_blocking(|| { | ||||||
| SharedMemoryEngine::wait_for_witness_read_complete(); | ||||||
| unsafe { | ||||||
| super::shared_memory_utils::SHARED_MEMORY.witness = None; | ||||||
| } | ||||||
| }) | ||||||
|
Comment on lines
+181
to
+186
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The tokio::task::spawn_blocking(|| {
SharedMemoryEngine::wait_for_witness_read_complete();
unsafe {
super::shared_memory_utils::SHARED_MEMORY.witness = None;
}
#[cfg(all(target_os = "linux", target_env = "gnu"))]
{
extern "C" {
fn malloc_trim(pad: usize) -> i32;
}
unsafe {
malloc_trim(0);
}
}
}) |
||||||
| .await | ||||||
| .expect("Witness cleanup task failed"); | ||||||
|
|
||||||
| prove_handle.await.expect("Prove task failed"); | ||||||
| }); | ||||||
|
|
||||||
| let proof = SharedMemoryEngine::read_proof_from_shared_memory(); | ||||||
|
|
||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,12 +18,15 @@ pub struct SharedMemory { | |
| pub pcs_setup: Option<Shmem>, | ||
| pub witness: Option<Shmem>, | ||
| pub proof: Option<Shmem>, | ||
| /// 1-byte signal: 0 = witness not read, 1 = server finished reading witness | ||
| pub witness_ack: Option<Shmem>, | ||
| } | ||
|
|
||
| pub static mut SHARED_MEMORY: SharedMemory = SharedMemory { | ||
| pcs_setup: None, | ||
| witness: None, | ||
| proof: None, | ||
| witness_ack: None, | ||
| }; | ||
|
|
||
| pub struct SharedMemoryEngine {} | ||
|
|
@@ -106,6 +109,56 @@ impl SharedMemoryEngine { | |
| Self::read_object_from_shared_memory("pcs_setup", 0) | ||
| } | ||
|
|
||
| /// Client: reset witness_ack to 0 (call before writing witness) | ||
| pub fn reset_witness_ack() { | ||
| unsafe { | ||
| Self::allocate_shared_memory_if_necessary( | ||
| &mut SHARED_MEMORY.witness_ack, | ||
| "witness_ack", | ||
| 1, | ||
| ); | ||
| let ptr = SHARED_MEMORY.witness_ack.as_mut().unwrap().as_ptr(); | ||
| std::ptr::write_volatile(ptr, 0u8); | ||
| } | ||
| } | ||
|
|
||
| /// Server: set witness_ack to 1 (call after reading witness) | ||
| pub fn signal_witness_read_complete() { | ||
| let shmem = ShmemConf::new() | ||
| .flink("witness_ack") | ||
| .open() | ||
| .expect("Failed to open witness_ack shared memory"); | ||
| unsafe { | ||
| std::ptr::write_volatile(shmem.as_ptr(), 1u8); | ||
| } | ||
| } | ||
|
|
||
| /// Client: poll until witness_ack becomes 1, with a timeout to avoid hanging | ||
| /// if the server crashes. | ||
| pub fn wait_for_witness_read_complete() { | ||
| const TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300); | ||
| let start = std::time::Instant::now(); | ||
| unsafe { | ||
| let ptr = SHARED_MEMORY | ||
| .witness_ack | ||
| .as_ref() | ||
| .expect("witness_ack not initialized, call reset_witness_ack first") | ||
| .as_ptr() as *const u8; | ||
| loop { | ||
| if std::ptr::read_volatile(ptr) != 0 { | ||
| break; | ||
| } | ||
| if start.elapsed() > TIMEOUT { | ||
| panic!( | ||
| "Timed out waiting for server to read witness ({}s)", | ||
| TIMEOUT.as_secs() | ||
| ); | ||
| } | ||
| std::thread::sleep(std::time::Duration::from_millis(10)); | ||
| } | ||
|
Comment on lines
147
to
158
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The polling loop lacks a timeout. If the server process crashes or fails to signal the let start = std::time::Instant::now();
loop {
if std::ptr::read_volatile(ptr) != 0 {
break;
}
if start.elapsed().as_secs() > 300 {
panic!("Timeout waiting for witness ack from server");
}
std::thread::sleep(std::time::Duration::from_millis(10));
} |
||
| } | ||
| } | ||
|
Comment on lines
+138
to
+160
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function panics on timeout. It's more idiomatic in Rust to return a pub fn wait_for_witness_read_complete() -> Result<(), String> {
const TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300);
let start = std::time::Instant::now();
unsafe {
let ptr = SHARED_MEMORY
.witness_ack
.as_ref()
.expect("witness_ack not initialized, call reset_witness_ack first")
.as_ptr() as *const u8;
loop {
if std::ptr::read_volatile(ptr) != 0 {
return Ok(());
}
if start.elapsed() > TIMEOUT {
return Err(format!(
"Timed out waiting for server to read witness ({}s)",
TIMEOUT.as_secs()
));
}
std::thread::sleep(std::time::Duration::from_millis(10));
}
}
} |
||
|
|
||
| pub fn write_witness_to_shared_memory<F: FieldEngine>(values: Vec<Vec<F::SimdCircuitField>>) { | ||
| let total_size = std::mem::size_of::<usize>() | ||
| + values | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
extern "C"declaration formalloc_trimis repeated twice in this function (here and at line 188). It is better to declare it once at the beginning of the function scope. Additionally, the second call tomalloc_trim(after dropping the shared memory handle) may have negligible impact compared to the first one, as shared memory is typically managed viammaprather than themallocheap.