Skip to content

Commit c912058

Browse files
committed
Perf: improve delete merges via pre-reserve before bulk append; add large Instruments/benchmark helpers; document no-feature-flag policy in agent.md
1 parent 026254c commit c912058

7 files changed

Lines changed: 288 additions & 15 deletions

File tree

agent.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Engineering Conventions for BPlusTree3
2+
3+
- No feature flags for internal experiments. We have no external users, so avoid `#[cfg(feature = ...)]` branches. Implement improvements directly (or in short‑lived local branches) and remove experimental code before merging.
4+
5+
- Performance work
6+
- Validate with existing Criterion benches and the large delete runner (`rust/src/bin/large_delete_benchmark.rs`).
7+
- For line‑level CPU hotspots, use the Instruments workload (`rust/src/bin/instruments_delete_target.rs`) and store traces under `rust/delete_profile.trace` (not committed).
8+
- Prefer targeted, localized changes that don’t regress insert/get/range performance.
9+
10+
- Coding style
11+
- Keep changes minimal and focused on the stated goal.
12+
- Reduce repeated arena lookups and redundant separator/key reads in hot paths.
13+
- Favor bulk moves and pre‑allocation over per‑element operations.
14+
15+
- Benchmarks to run for delete changes
16+
- `cd rust && cargo bench --bench comparison deletion`
17+
- `cd rust && cargo run --release --bin large_delete_benchmark`
18+
- Optional: record Instruments trace for confirmation of hotspot reductions.
19+
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
use bplustree::BPlusTreeMap;
2+
use std::time::{Duration, Instant};
3+
4+
// A long-running delete-focused workload for Instruments Time Profiler.
5+
// It builds a large tree at a specified capacity, then repeatedly deletes a
6+
// pseudo-random batch of keys and reinserts them to keep the workload steady.
7+
// Configure via env vars: CAPACITY, TREE_SIZE, BATCH_SIZE, DURATION_SEC.
8+
fn main() {
9+
let capacity: usize = std::env::var("CAPACITY")
10+
.ok()
11+
.and_then(|v| v.parse().ok())
12+
.unwrap_or(256);
13+
let tree_size: usize = std::env::var("TREE_SIZE")
14+
.ok()
15+
.and_then(|v| v.parse().ok())
16+
.unwrap_or(2_000_000);
17+
let batch_size: usize = std::env::var("BATCH_SIZE")
18+
.ok()
19+
.and_then(|v| v.parse().ok())
20+
.unwrap_or(500_000);
21+
let duration_sec: u64 = std::env::var("DURATION_SEC")
22+
.ok()
23+
.and_then(|v| v.parse().ok())
24+
.unwrap_or(15);
25+
26+
eprintln!(
27+
"instruments_delete_target: cap={}, size={}, batch={}, duration={}s",
28+
capacity, tree_size, batch_size, duration_sec
29+
);
30+
31+
// Build initial tree
32+
let mut tree = BPlusTreeMap::new(capacity).expect("init B+tree");
33+
for i in 0..tree_size {
34+
// small values to reduce memory
35+
tree.insert(i as i32, i as i32);
36+
}
37+
38+
// Prepare a pseudo-random but deterministic batch of keys
39+
let mut keys: Vec<i32> = Vec::with_capacity(batch_size);
40+
let mut seed = 42_u64;
41+
for _ in 0..batch_size {
42+
seed = seed.wrapping_mul(1103515245).wrapping_add(12345);
43+
let k = (seed as usize) % tree_size;
44+
keys.push(k as i32);
45+
}
46+
47+
// Run mixed cycles of deletes and reinserts until duration elapses
48+
let deadline = Instant::now() + Duration::from_secs(duration_sec);
49+
let mut cycles: u64 = 0;
50+
while Instant::now() < deadline {
51+
// Delete phase
52+
for &k in &keys {
53+
let _ = tree.remove(&k);
54+
}
55+
// Reinsert phase to keep tree size stable
56+
for &k in &keys {
57+
tree.insert(k, k);
58+
}
59+
cycles += 1;
60+
}
61+
62+
eprintln!("completed cycles: {} (cap={}, size={})", cycles, capacity, tree_size);
63+
}
64+
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
use bplustree::BPlusTreeMap;
2+
use std::collections::BTreeMap;
3+
use std::time::Instant;
4+
5+
// Large-scale delete benchmark comparing BPlusTreeMap vs BTreeMap
6+
// Focus: delete performance with large trees (1M+) and capacity 256
7+
// Note: Run in release mode for meaningful results.
8+
fn main() {
9+
// Configurable via env vars if needed
10+
let tree_size: usize = std::env::var("TREE_SIZE")
11+
.ok()
12+
.and_then(|v| v.parse().ok())
13+
.unwrap_or(1_000_000);
14+
let capacity: usize = std::env::var("CAPACITY")
15+
.ok()
16+
.and_then(|v| v.parse().ok())
17+
.unwrap_or(256);
18+
let delete_sample: usize = std::env::var("DELETE_SAMPLE")
19+
.ok()
20+
.and_then(|v| v.parse().ok())
21+
.unwrap_or(100_000);
22+
23+
println!("=== Large Delete Benchmark ===");
24+
println!("Size: {} elements, Capacity: {} keys/node", tree_size, capacity);
25+
println!("Delete sample: {} keys (pseudo-random)", delete_sample);
26+
27+
// Prepare delete keys (pseudo-random deterministic sequence across range [0, tree_size))
28+
let delete_keys: Vec<usize> = (0..delete_sample)
29+
.scan(42_u64, |seed, _| {
30+
*seed = seed.wrapping_mul(1103515245).wrapping_add(12345);
31+
Some((*seed as usize) % tree_size)
32+
})
33+
.collect();
34+
35+
// Build maps
36+
println!("\nBuilding maps...");
37+
let mut bplus = BPlusTreeMap::new(capacity).expect("init bplus");
38+
let mut btree = BTreeMap::new();
39+
40+
let start = Instant::now();
41+
for i in 0..tree_size {
42+
bplus.insert(i, i);
43+
}
44+
let bplus_build = start.elapsed();
45+
46+
let start = Instant::now();
47+
for i in 0..tree_size {
48+
btree.insert(i, i);
49+
}
50+
let btree_build = start.elapsed();
51+
52+
println!(
53+
"Build times: BPlusTreeMap={:?}, BTreeMap={:?}",
54+
bplus_build, btree_build
55+
);
56+
57+
// Clone maps to avoid interaction between runs
58+
println!("\nDeleting ({} keys)...", delete_sample);
59+
60+
// BPlusTreeMap delete timing
61+
let mut bplus_copy = bplus; // move
62+
let start = Instant::now();
63+
for &k in &delete_keys {
64+
let _ = bplus_copy.remove(&k);
65+
}
66+
let bplus_delete = start.elapsed();
67+
68+
// BTreeMap delete timing
69+
let mut btree_copy = btree; // move
70+
let start = Instant::now();
71+
for &k in &delete_keys {
72+
let _ = btree_copy.remove(&k);
73+
}
74+
let btree_delete = start.elapsed();
75+
76+
let bplus_per_op = (bplus_delete.as_nanos() as f64) / (delete_sample as f64);
77+
let btree_per_op = (btree_delete.as_nanos() as f64) / (delete_sample as f64);
78+
let ratio = btree_per_op / bplus_per_op;
79+
80+
println!("\nDelete times:");
81+
println!(" BPlusTreeMap: {:?} total ({:.1} ns/op)", bplus_delete, bplus_per_op);
82+
println!(" BTreeMap: {:?} total ({:.1} ns/op)", btree_delete, btree_per_op);
83+
println!(
84+
" Ratio: {:.2}x {}",
85+
ratio,
86+
if ratio > 1.0 { "(BPlusTreeMap faster)" } else { "(BTreeMap faster)" }
87+
);
88+
}
89+

rust/src/delete_operations.rs

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,8 @@ impl<K: Ord + Clone, V: Clone> BPlusTreeMap<K, V> {
176176

177177
/// Rebalance an underfull child in an arena branch
178178
#[inline]
179-
/// Rebalance an underfull child node using optimized sibling information gathering.
180-
/// This version minimizes arena access by batching all sibling checks.
181179
fn rebalance_child(&mut self, parent_id: NodeId, child_index: usize) -> bool {
182-
// OPTIMIZATION: Gather all rebalancing information in minimal arena accesses
180+
// Gather rebalancing information in minimal arena accesses
183181
let rebalance_info = {
184182
let parent_branch = match self.get_branch(parent_id) {
185183
Some(branch) => branch,
@@ -188,7 +186,6 @@ impl<K: Ord + Clone, V: Clone> BPlusTreeMap<K, V> {
188186

189187
let child_is_leaf = matches!(parent_branch.children[child_index], NodeRef::Leaf(_, _));
190188

191-
// OPTIMIZATION: Batch sibling information gathering with direct node access
192189
let left_sibling_info = if child_index > 0 {
193190
let sibling_ref = parent_branch.children[child_index - 1];
194191
let can_donate = match &sibling_ref {
@@ -228,7 +225,6 @@ impl<K: Ord + Clone, V: Clone> BPlusTreeMap<K, V> {
228225

229226
let (child_is_leaf, left_sibling_info, right_sibling_info) = rebalance_info;
230227

231-
// Dispatch to appropriate rebalancing strategy
232228
if child_is_leaf {
233229
self.rebalance_leaf(
234230
parent_id,
@@ -245,6 +241,8 @@ impl<K: Ord + Clone, V: Clone> BPlusTreeMap<K, V> {
245241
)
246242
}
247243
}
244+
245+
// (Experimental ID-based helpers removed)
248246
}
249247

250248
#[cfg(test)]
@@ -460,10 +458,13 @@ impl<K: Ord + Clone, V: Clone> BPlusTreeMap<K, V> {
460458
None => return false,
461459
};
462460

463-
// Then merge into left
461+
// Then merge into left, reserving capacity to avoid reallocations
464462
let Some(left_branch) = self.get_branch_mut(left_id) else {
465463
return false;
466464
};
465+
let add_keys = 1 + child_keys.len(); // separator + child keys
466+
let add_children = child_children.len();
467+
left_branch.reserve_for_merge(add_keys, add_children);
467468
left_branch.keys.push(separator_key);
468469
left_branch.keys.append(&mut child_keys);
469470
left_branch.children.append(&mut child_children);
@@ -513,10 +514,13 @@ impl<K: Ord + Clone, V: Clone> BPlusTreeMap<K, V> {
513514
None => return false,
514515
};
515516

516-
// Then merge into child
517+
// Then merge into child, reserving capacity to avoid reallocations
517518
let Some(child_branch) = self.get_branch_mut(child_id) else {
518519
return false;
519520
};
521+
let add_keys = 1 + right_keys.len(); // separator + right keys
522+
let add_children = right_children.len();
523+
child_branch.reserve_for_merge(add_keys, add_children);
520524
child_branch.keys.push(separator_key);
521525
child_branch.keys.append(&mut right_keys);
522526
child_branch.children.append(&mut right_children);
@@ -729,10 +733,10 @@ impl<K: Ord + Clone, V: Clone> BPlusTreeMap<K, V> {
729733
None => return false,
730734
};
731735

732-
// Merge into left leaf and update linked list - use early return for cleaner flow
733-
let Some(left_leaf) = self.get_leaf_mut(left_id) else {
734-
return false;
735-
};
736+
// Merge into left leaf and update linked list - reserve to avoid reallocations
737+
let Some(left_leaf) = self.get_leaf_mut(left_id) else { return false; };
738+
let add = child_keys.len();
739+
left_leaf.reserve_for_merge(add);
736740
left_leaf.append_keys(&mut child_keys);
737741
left_leaf.append_values(&mut child_values);
738742
left_leaf.next = child_next;
@@ -782,10 +786,10 @@ impl<K: Ord + Clone, V: Clone> BPlusTreeMap<K, V> {
782786
None => return false,
783787
};
784788

785-
// Then merge into child
786-
let Some(child_leaf) = self.get_leaf_mut(child_id) else {
787-
return false;
788-
};
789+
// Then merge into child, reserving capacity to avoid reallocations
790+
let Some(child_leaf) = self.get_leaf_mut(child_id) else { return false; };
791+
let add = right_keys.len();
792+
child_leaf.reserve_for_merge(add);
789793
child_leaf.append_keys(&mut right_keys);
790794
child_leaf.append_values(&mut right_values);
791795
child_leaf.next = right_next;

rust/src/node.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,14 @@ impl<K: Ord + Clone, V: Clone> LeafNode<K, V> {
226226
self.values.append(other);
227227
}
228228

229+
/// Reserve capacity for an incoming merge of `additional` items.
230+
#[inline]
231+
pub fn reserve_for_merge(&mut self, additional: usize) {
232+
// Reserve for both keys and values (kept in lockstep)
233+
self.keys.reserve(additional);
234+
self.values.reserve(additional);
235+
}
236+
229237
/// Take all keys, leaving an empty vector.
230238
#[inline]
231239
pub fn take_keys(&mut self) -> Vec<K> {
@@ -474,6 +482,7 @@ impl<K: Ord + Clone, V: Clone> LeafNode<K, V> {
474482
Some((self.keys.remove(0), self.values.remove(0)))
475483
}
476484

485+
477486
/// Accept a borrowed key-value pair at the beginning (from left sibling)
478487
pub fn accept_from_left(&mut self, key: K, value: V) {
479488
self.keys.insert(0, key);
@@ -713,4 +722,11 @@ impl<K: Ord + Clone, V: Clone> BranchNode<K, V> {
713722
self.keys.append(&mut other.keys);
714723
self.children.append(&mut other.children);
715724
}
725+
726+
/// Reserve capacity for an incoming merge.
727+
#[inline]
728+
pub fn reserve_for_merge(&mut self, additional_keys: usize, additional_children: usize) {
729+
self.keys.reserve(additional_keys);
730+
self.children.reserve(additional_children);
731+
}
716732
}

rust/tools/parse_time_profile.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/usr/bin/env python3
2+
import sys
3+
import xml.etree.ElementTree as ET
4+
from collections import Counter
5+
6+
"""
7+
Best-effort parser for Instruments xctrace XML exports to list top functions/frames.
8+
Usage:
9+
python3 rust/tools/parse_time_profile.py rust/delete_export/time_profile.xml
10+
11+
Notes:
12+
- XML schema varies across Xcode versions; this script attempts to be robust.
13+
- If time_profile.xml is empty or missing, try time_sample.xml instead:
14+
python3 rust/tools/parse_time_profile.py rust/delete_export/time_sample.xml
15+
"""
16+
17+
def main(path: str) -> int:
18+
try:
19+
tree = ET.parse(path)
20+
except Exception as e:
21+
print(f"Failed to parse {path}: {e}")
22+
return 1
23+
24+
root = tree.getroot()
25+
# Find all leaf text that look like function symbols; Instruments usually
26+
# includes stacks as text content or attributes in nested elements. We will
27+
# count any text nodes that look like code symbols (contain '::' or '['file:line']').
28+
counter = Counter()
29+
for elem in root.iter():
30+
text = (elem.text or '').strip()
31+
if not text:
32+
continue
33+
if '::' in text or ' - [' in text or ' + ' in text:
34+
# Normalize long frames by splitting on ' + ' (address offsets)
35+
frame = text.split(' + ')[0]
36+
counter[frame] += 1
37+
38+
print("Top frames by sample count (heuristic):")
39+
for frame, count in counter.most_common(50):
40+
print(f"{count:>8} {frame}")
41+
42+
return 0
43+
44+
if __name__ == '__main__':
45+
if len(sys.argv) != 2:
46+
print("Usage: parse_time_profile.py <exported_xml>")
47+
sys.exit(2)
48+
sys.exit(main(sys.argv[1]))
49+

scripts/instruments_export.sh

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
TRACE_PATH=${1:-rust/delete_profile.trace}
5+
OUT_DIR=${2:-rust/delete_export}
6+
7+
mkdir -p "$OUT_DIR"
8+
9+
echo "Exporting TOC to $OUT_DIR/toc.xml"
10+
xcrun xctrace export --input "$TRACE_PATH" --toc > "$OUT_DIR/toc.xml"
11+
12+
echo "Exporting time-profile table to $OUT_DIR/time_profile.xml (if available)"
13+
if ! xcrun xctrace export --input "$TRACE_PATH" --xpath '/trace-toc/run[@number="1"]/data/table[@schema="time-profile"]' > "$OUT_DIR/time_profile.xml"; then
14+
echo "time-profile export failed; continuing"
15+
fi
16+
17+
echo "Exporting time-sample table to $OUT_DIR/time_sample.xml (if available)"
18+
if ! xcrun xctrace export --input "$TRACE_PATH" --xpath '/trace-toc/run[@number="1"]/data/table[@schema="time-sample"]' > "$OUT_DIR/time_sample.xml"; then
19+
echo "time-sample export failed; continuing"
20+
fi
21+
22+
echo "Exporting thread-info to $OUT_DIR/thread_info.xml"
23+
xcrun xctrace export --input "$TRACE_PATH" --xpath '/trace-toc/run[@number="1"]/data/table[@schema="thread-info"]' > "$OUT_DIR/thread_info.xml"
24+
25+
echo "Exporting process-info to $OUT_DIR/process_info.xml"
26+
xcrun xctrace export --input "$TRACE_PATH" --xpath '/trace-toc/run[@number="1"]/data/table[@schema="process-info"]' > "$OUT_DIR/process_info.xml"
27+
28+
echo "Exporting dyld-library-load to $OUT_DIR/dyld_library_load.xml"
29+
xcrun xctrace export --input "$TRACE_PATH" --xpath '/trace-toc/run[@number="1"]/data/table[@schema="dyld-library-load"]' > "$OUT_DIR/dyld_library_load.xml"
30+
31+
echo "Done. Inspect XML files under $OUT_DIR"
32+

0 commit comments

Comments
 (0)