Skip to content

Commit 1aa3f72

Browse files
fix(DSM): Fix flaky sandbox eviction tests (#8903)
These tests seemed to sometimes get killed for using too many resources, so we modify them to spin up ~40 sandboxes each instead of ~400.
1 parent 5baabdc commit 1aa3f72

1 file changed

Lines changed: 66 additions & 22 deletions

File tree

rs/canister_sandbox/src/replica_controller/sandboxed_execution_controller.rs

Lines changed: 66 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1503,14 +1503,16 @@ impl SandboxedExecutionController {
15031503
&self,
15041504
backends: &mut HashMap<CanisterId, Backend>,
15051505
available_memory: F,
1506+
sandbox_processes_to_evict: usize,
1507+
sandbox_processes_rss_to_evict: NumBytes,
15061508
) where
15071509
F: Fn() -> Option<NumBytes>,
15081510
{
15091511
let active_sandboxes = total_active_sandboxes(backends);
15101512
if active_sandboxes > self.max_sandbox_count {
15111513
// The number of sandboxes is exceeded.
15121514
// Reduce the number of active sandboxes regardless of their RSS.
1513-
let max_active_sandboxes = active_sandboxes.saturating_sub(SANDBOX_PROCESSES_TO_EVICT);
1515+
let max_active_sandboxes = active_sandboxes.saturating_sub(sandbox_processes_to_evict);
15141516
let max_sandboxes_rss = u64::MAX.into();
15151517

15161518
evict_sandbox_processes(
@@ -1533,7 +1535,7 @@ impl SandboxedExecutionController {
15331535
// Reduce the RSS of sandboxes, regardless of their number.
15341536
let max_active_sandboxes = backends.len();
15351537
let max_sandboxes_rss =
1536-
total_sandboxes_rss.saturating_sub(&SANDBOX_PROCESSES_RSS_TO_EVICT);
1538+
total_sandboxes_rss.saturating_sub(&sandbox_processes_rss_to_evict);
15371539

15381540
evict_sandbox_processes(
15391541
backends,
@@ -1591,13 +1593,23 @@ impl SandboxedExecutionController {
15911593
};
15921594
}
15931595
// The number of active sandboxes is increasing, so trigger the eviction.
1594-
self.trigger_sandbox_eviction(&mut guard, Self::available_memory_wrapper);
1596+
self.trigger_sandbox_eviction(
1597+
&mut guard,
1598+
Self::available_memory_wrapper,
1599+
SANDBOX_PROCESSES_TO_EVICT,
1600+
SANDBOX_PROCESSES_RSS_TO_EVICT,
1601+
);
15951602
return sandbox_process;
15961603
}
15971604
}
15981605

15991606
let _timer = self.metrics.sandboxed_execution_spawn_process.start_timer();
1600-
self.trigger_sandbox_eviction(&mut guard, Self::available_memory_wrapper);
1607+
self.trigger_sandbox_eviction(
1608+
&mut guard,
1609+
Self::available_memory_wrapper,
1610+
SANDBOX_PROCESSES_TO_EVICT,
1611+
SANDBOX_PROCESSES_RSS_TO_EVICT,
1612+
);
16011613

16021614
// No sandbox process found for this canister. Start a new one and register it.
16031615
let reg = Arc::new(ActiveExecutionStateRegistry::new());
@@ -2381,7 +2393,8 @@ mod tests {
23812393

23822394
#[test]
23832395
fn sandbox_eviction_is_triggered_by_count() {
2384-
let active = SANDBOX_PROCESSES_TO_EVICT * 2;
2396+
let processes_to_evict = 10;
2397+
let active = processes_to_evict * 2;
23852398
let evicted = 3;
23862399
let empty = 2;
23872400
let (mut controller, _dir, _path) =
@@ -2398,7 +2411,12 @@ mod tests {
23982411
controller.max_sandboxes_rss = NumBytes::from(u64::MAX);
23992412
{
24002413
let mut guard = controller.backends.lock().unwrap();
2401-
controller.trigger_sandbox_eviction(&mut guard, || None);
2414+
controller.trigger_sandbox_eviction(
2415+
&mut guard,
2416+
|| None,
2417+
processes_to_evict,
2418+
SANDBOX_PROCESSES_RSS_TO_EVICT,
2419+
);
24022420
}
24032421
let partitioned_backends = get_active_evicted_empty_backends(&controller);
24042422
// No eviction should be triggered.
@@ -2410,21 +2428,25 @@ mod tests {
24102428
controller.max_sandbox_count = active - 1;
24112429
{
24122430
let mut guard = controller.backends.lock().unwrap();
2413-
controller.trigger_sandbox_eviction(&mut guard, || None);
2431+
controller.trigger_sandbox_eviction(
2432+
&mut guard,
2433+
|| None,
2434+
processes_to_evict,
2435+
SANDBOX_PROCESSES_RSS_TO_EVICT,
2436+
);
24142437
}
24152438
let partitioned_backends = get_active_evicted_empty_backends(&controller);
24162439
// A batch of active sandboxes should be evicted.
2417-
assert_eq!(
2418-
active - SANDBOX_PROCESSES_TO_EVICT,
2419-
partitioned_backends.0.len()
2420-
);
2421-
assert_eq!(SANDBOX_PROCESSES_TO_EVICT, partitioned_backends.1.len());
2440+
assert_eq!(active - processes_to_evict, partitioned_backends.0.len());
2441+
assert_eq!(processes_to_evict, partitioned_backends.1.len());
24222442
assert_eq!(0, partitioned_backends.2.len());
24232443
}
24242444

24252445
#[test]
24262446
fn sandbox_eviction_is_triggered_by_rss() {
2427-
let active = SANDBOX_PROCESSES_TO_EVICT * 2;
2447+
let processes_to_evict = 20;
2448+
let rss_to_evict = NumBytes::new(50 * 1024 * 1024);
2449+
let active = processes_to_evict * 2;
24282450
let evicted = 3;
24292451
let empty = 2;
24302452
let (mut controller, _dir, _path) =
@@ -2442,7 +2464,12 @@ mod tests {
24422464
NumBytes::from(active as u64 * DEFAULT_SANDBOX_PROCESS_RSS.get());
24432465
{
24442466
let mut guard = controller.backends.lock().unwrap();
2445-
controller.trigger_sandbox_eviction(&mut guard, || None);
2467+
controller.trigger_sandbox_eviction(
2468+
&mut guard,
2469+
|| None,
2470+
processes_to_evict,
2471+
rss_to_evict,
2472+
);
24462473
}
24472474
let partitioned_backends = get_active_evicted_empty_backends(&controller);
24482475
// No eviction should be triggered.
@@ -2455,24 +2482,31 @@ mod tests {
24552482
NumBytes::from((active as u64 - 1) * DEFAULT_SANDBOX_PROCESS_RSS.get());
24562483
{
24572484
let mut guard = controller.backends.lock().unwrap();
2458-
controller.trigger_sandbox_eviction(&mut guard, || None);
2485+
controller.trigger_sandbox_eviction(
2486+
&mut guard,
2487+
|| None,
2488+
processes_to_evict,
2489+
rss_to_evict,
2490+
);
24592491
}
24602492
let partitioned_backends = get_active_evicted_empty_backends(&controller);
24612493
// A batch of active sandboxes should be evicted.
24622494
assert_eq!(
2463-
active - 1 - (SANDBOX_PROCESSES_RSS_TO_EVICT / DEFAULT_SANDBOX_PROCESS_RSS) as usize,
2495+
active - (rss_to_evict / DEFAULT_SANDBOX_PROCESS_RSS) as usize,
24642496
partitioned_backends.0.len()
24652497
);
24662498
assert_eq!(
2467-
1 + (SANDBOX_PROCESSES_RSS_TO_EVICT / DEFAULT_SANDBOX_PROCESS_RSS) as usize,
2499+
(rss_to_evict / DEFAULT_SANDBOX_PROCESS_RSS) as usize,
24682500
partitioned_backends.1.len()
24692501
);
24702502
assert_eq!(0, partitioned_backends.2.len());
24712503
}
24722504

24732505
#[test]
24742506
fn sandbox_eviction_is_triggered_by_available_memory() {
2475-
let active = SANDBOX_PROCESSES_TO_EVICT * 2;
2507+
let processes_to_evict = 20;
2508+
let rss_to_evict = NumBytes::new(50 * 1024 * 1024);
2509+
let active = processes_to_evict * 2;
24762510
let evicted = 3;
24772511
let empty = 2;
24782512
let (mut controller, _dir, _path) =
@@ -2492,7 +2526,12 @@ mod tests {
24922526
let available_memory = || Some(DEFAULT_MIN_MEM_AVAILABLE_TO_EVICT_SANDBOXES);
24932527
{
24942528
let mut guard = controller.backends.lock().unwrap();
2495-
controller.trigger_sandbox_eviction(&mut guard, available_memory);
2529+
controller.trigger_sandbox_eviction(
2530+
&mut guard,
2531+
available_memory,
2532+
processes_to_evict,
2533+
rss_to_evict,
2534+
);
24962535
}
24972536
let partitioned_backends = get_active_evicted_empty_backends(&controller);
24982537
// No eviction should be triggered.
@@ -2504,16 +2543,21 @@ mod tests {
25042543
let available_memory = || Some(DEFAULT_MIN_MEM_AVAILABLE_TO_EVICT_SANDBOXES - 1.into());
25052544
{
25062545
let mut guard = controller.backends.lock().unwrap();
2507-
controller.trigger_sandbox_eviction(&mut guard, available_memory);
2546+
controller.trigger_sandbox_eviction(
2547+
&mut guard,
2548+
available_memory,
2549+
processes_to_evict,
2550+
rss_to_evict,
2551+
);
25082552
}
25092553
let partitioned_backends = get_active_evicted_empty_backends(&controller);
25102554
// A batch of active sandboxes should be evicted.
25112555
assert_eq!(
2512-
active - 1 - (SANDBOX_PROCESSES_RSS_TO_EVICT / DEFAULT_SANDBOX_PROCESS_RSS) as usize,
2556+
active - (rss_to_evict / DEFAULT_SANDBOX_PROCESS_RSS) as usize,
25132557
partitioned_backends.0.len()
25142558
);
25152559
assert_eq!(
2516-
1 + (SANDBOX_PROCESSES_RSS_TO_EVICT / DEFAULT_SANDBOX_PROCESS_RSS) as usize,
2560+
(rss_to_evict / DEFAULT_SANDBOX_PROCESS_RSS) as usize,
25172561
partitioned_backends.1.len()
25182562
);
25192563
assert_eq!(0, partitioned_backends.2.len());

0 commit comments

Comments
 (0)