Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .envrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
use flake
2 changes: 1 addition & 1 deletion .reuse/dep5
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ Files: docs/*.md *.md
Copyright: 2024
License: CC-BY-4.0

Files: scripts/* test_data/* *.toml .git* fuzz/Cargo.lock fuzz/.gitignore resources/linux-config-* vmm/src/api/openapi/cloud-hypervisor.yaml CODEOWNERS Cargo.lock flake.nix flake.lock chv.nix
Files: scripts/* test_data/* *.toml .git* fuzz/Cargo.lock fuzz/.gitignore resources/linux-config-* vmm/src/api/openapi/cloud-hypervisor.yaml CODEOWNERS Cargo.lock flake.nix flake.lock chv.nix .envrc
Copyright: 2024
License: Apache-2.0
1 change: 1 addition & 0 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
inputsFrom = builtins.attrValues self.packages;
packages = with pkgs; [
gitlint
rustup
];
};
packages =
Expand Down
258 changes: 174 additions & 84 deletions vmm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -688,41 +688,81 @@ impl VmmVersionInfo {
}
}

/// Holds internal metrics about the ongoing migration.
///
/// Is supposed to be updated on the fly.
#[derive(Debug, Clone)]
struct MigrationState {
current_dirty_pages: u64,
downtime: Duration,
downtime_start: Instant,
/* ---------------------------------------------- */
/* Properties that are updated before the first iteration */
/// The instant where the actual downtime of the VM began.
downtime_start_time: Instant,
/// The instant where the migration began.
migration_start_time: Instant,

/* ---------------------------------------------- */
/* Properties that are updated in every iteration */
/// The iteration number. It is strictly monotonically increasing.
iteration: u64,
iteration_cost_time: Duration,
/// The instant where the current iteration began.
iteration_start_time: Instant,
mb_per_sec: f64,
pages_per_second: u64,
pending_size: u64,
start_time: Instant,
threshold_size: u64,
total_time: Duration,
/// The duration of the previous iteration.
iteration_duration: Duration,
/// The number of bytes that are to be transmitted in the current iteration.
bytes_to_transmit: u64,
/// `bytes_to_transmit` but as 4K pages.
pages_to_transmit: u64,
/// The instant where the transmission began.
/// This is after `iteration_start_time` and always shorter than
/// `iteration_duration`.
transmit_start_time: Instant,
/// The duration of the transmission began.
transmit_duration: Duration,
/// The measured throughput in bytes per sec.
bytes_per_sec: f64,
/// The calculated downtime with respect to `bytes_to_transmit` and
/// `bytes_per_sec`.
calculated_downtime_duration: Option<Duration>,
/// Total amount of transferred bytes across all iterations.
total_transferred_bytes: u64,
total_transferred_dirty_pages: u64,
/// `total_transferred_bytes` but as 4K pages.
total_transferred_pages: u64,
/// The dirty rate in pages per second (pps).
dirty_rate_pps: u64,

/* ---------------------------------------------- */
/* Properties that are updated after the last iteration */
/// The actual measured downtime from the sender VMM perspective.
downtime_duration: Duration,
/// Total duration of the migration.
migration_duration: Duration,
}

impl MigrationState {
pub fn new() -> Self {
Self {
current_dirty_pages: 0,
downtime: Duration::default(),
downtime_start: Instant::now(),
// Field will be overwritten later.
downtime_start_time: Instant::now(),
// Field will be overwritten later.
migration_start_time: Instant::now(),
iteration: 0,
iteration_cost_time: Duration::default(),
// Field will be overwritten later.
iteration_start_time: Instant::now(),
mb_per_sec: 0.0,
pages_per_second: 0,
pending_size: 0,
start_time: Instant::now(),
threshold_size: 0,
total_time: Duration::default(),
iteration_duration: Duration::default(),
bytes_to_transmit: 0,
pages_to_transmit: 0,
// Field will be overwritten later.
transmit_start_time: Instant::now(),
transmit_duration: Duration::default(),
bytes_per_sec: 0.0,
calculated_downtime_duration: None,
total_transferred_bytes: 0,
total_transferred_dirty_pages: 0,
total_transferred_pages: 0,
// Field will be overwritten later.
dirty_rate_pps: 0,
downtime_duration: Duration::default(),
// Field will be overwritten later.
migration_duration: Duration::default(),
}
}
}
Expand Down Expand Up @@ -2019,6 +2059,10 @@ impl Vmm {
}
}

/// Performs memory copy iterations in pre-copy fashion.
///
/// This transmits the initial VM memory as well as all VM memory delta transmissions while the
/// VM keeps running.
fn memory_copy_iterations(
vm: &mut Vm,
mem_send: &SendAdditionalConnections,
Expand All @@ -2027,31 +2071,17 @@ impl Vmm {
migration_timeout: Duration,
migrate_downtime_limit: Duration,
) -> result::Result<MemoryRangeTable, MigratableError> {
let mut bandwidth = 0.0;
let mut iteration_table;

// We loop until we converge (target downtime is achievable).
loop {
// todo: check if auto-converge is enabled at all?
if Self::can_increase_autoconverge_step(s) && vm.throttle_percent() < AUTO_CONVERGE_MAX
{
let current_throttle = vm.throttle_percent();
let new_throttle = current_throttle + AUTO_CONVERGE_STEP_SIZE;
let new_throttle = std::cmp::min(new_throttle, AUTO_CONVERGE_MAX);
log::info!("Increasing auto-converge: {new_throttle}%");
if new_throttle != current_throttle {
vm.set_throttle_percent(new_throttle);
}
}

// Update the start time of the iteration
s.iteration_start_time = Instant::now();

// Increment iteration counter
s.iteration += 1;

// Check if migration has timed out
// migration_timeout > 0 means enabling the timeout check, 0 means disabling the timeout check
if !migration_timeout.is_zero() && s.start_time.elapsed() > migration_timeout {
if !migration_timeout.is_zero() && s.migration_start_time.elapsed() > migration_timeout
{
warn!("Migration timed out after {migration_timeout:?}");
Request::abandon().write_to(socket)?;
Response::read_from(socket)?.ok_or_abandon(
Expand All @@ -2060,55 +2090,107 @@ impl Vmm {
)?;
}

// Get the dirty page table
iteration_table = vm.dirty_log()?;
// We always autoconverge.
if Self::can_increase_autoconverge_step(s) && vm.throttle_percent() < AUTO_CONVERGE_MAX
{
let current_throttle = vm.throttle_percent();
let new_throttle = current_throttle + AUTO_CONVERGE_STEP_SIZE;
let new_throttle = std::cmp::min(new_throttle, AUTO_CONVERGE_MAX);
info!("Increasing auto-converge: {new_throttle}%");
if new_throttle != current_throttle {
vm.set_throttle_percent(new_throttle);
}
}

// In the first iteration (`0`), we transmit the whole memory. Starting with the
// second iteration (`1`), we start the delta transmission.
iteration_table = if s.iteration == 0 {
vm.memory_range_table()?
} else {
vm.dirty_log()?
};

// Update the pending size (amount of data to transfer)
s.pending_size = iteration_table
s.bytes_to_transmit = iteration_table
.regions()
.iter()
.map(|range| range.length)
.sum();
s.pages_to_transmit = s.bytes_to_transmit.div_ceil(PAGE_SIZE as u64);

// Update thresholds
if bandwidth > 0.0 {
s.threshold_size = bandwidth as u64 * migrate_downtime_limit.as_millis() as u64;
}

// Enter the final stage of migration when the suspension conditions are met
if s.iteration > 1 && s.pending_size <= s.threshold_size {
// Unlikely happy-path.
if s.bytes_to_transmit == 0 {
break;
}

// Update the number of dirty pages
s.total_transferred_bytes += s.pending_size;
s.current_dirty_pages = s.pending_size.div_ceil(PAGE_SIZE as u64);
s.total_transferred_dirty_pages += s.current_dirty_pages;
// Update metrics and exit loop, if conditions are met.
if s.iteration > 0 {
// Refresh dirty rate: How many pages have been dirtied since the last time we
// fetched the dirty log.
if s.iteration_duration > Duration::ZERO {
let dirty_rate_pps_f64 =
s.pages_to_transmit as f64 / (s.iteration_duration.as_secs_f64());
s.dirty_rate_pps = dirty_rate_pps_f64.ceil() as u64;
} else {
s.dirty_rate_pps = 0;
}

// Update expected downtime:
// Strictly speaking, this is the time to transmit the last
// memory chunk, not the actual downtime, which will be higher.
let transmission_time_s = if s.bytes_per_sec > 0.0 {
s.bytes_to_transmit as f64 / s.bytes_per_sec
} else {
0.0
};
s.calculated_downtime_duration = Some(Duration::from_secs_f64(transmission_time_s));

// Exit the loop, when the handover conditions are met
if let Some(downtime) = s.calculated_downtime_duration
&& downtime <= migrate_downtime_limit
{
info!("Memory delta transmission stopping - cutoff condition reached!");
info!(
"iteration:{},remaining:{}MiB,downtime(calc):{}ms,mebibyte/s:{:.2},throttle:{}%,dirty_rate:{}pps",
s.iteration,
s.bytes_to_transmit / 1024 / 1024,
s.calculated_downtime_duration
.expect("should have calculated downtime by now")
.as_millis(),
s.bytes_per_sec / 1024.0 / 1024.0,
vm.throttle_percent(),
s.dirty_rate_pps
);
break;
}
}

// Send the current dirty pages
let transfer_start = Instant::now();
s.transmit_start_time = Instant::now();
mem_send.send_memory(&iteration_table, socket)?;
let transfer_time = transfer_start.elapsed().as_millis() as f64;
s.transmit_duration = s.transmit_start_time.elapsed();

s.total_transferred_bytes += s.bytes_to_transmit;
s.total_transferred_pages += s.pages_to_transmit;

// Update bandwidth
if transfer_time > 0.0 && s.pending_size > 0 {
bandwidth = s.pending_size as f64 / transfer_time;
// Convert bandwidth to MB/s
s.mb_per_sec = (bandwidth * 1000.0) / (1024.0 * 1024.0);
if s.transmit_duration > Duration::ZERO && s.bytes_to_transmit > 0 {
s.bytes_per_sec = s.bytes_to_transmit as f64 / s.transmit_duration.as_secs_f64();
}

// Update iteration cost time
s.iteration_cost_time = s.iteration_start_time.elapsed();
if s.iteration_cost_time.as_millis() > 0 {
s.pages_per_second =
s.current_dirty_pages * 1000 / s.iteration_cost_time.as_millis() as u64;
}
debug!(
"iteration {}: cost={}ms, throttle={}%",
s.iteration_duration = s.iteration_start_time.elapsed();
info!(
"iteration:{},cost={}ms,throttle={}%,transmitted={}MiB,dirty_rate={}pps,Mebibyte/s={:.2}",
s.iteration,
s.iteration_cost_time.as_millis(),
vm.throttle_percent()
s.iteration_duration.as_millis(),
vm.throttle_percent(),
s.bytes_to_transmit / 1024 / 1024,
s.dirty_rate_pps,
s.bytes_per_sec / 1024.0 / 1024.0
);

// Increment iteration counter
s.iteration += 1;
}

Ok(iteration_table)
Expand All @@ -2122,11 +2204,6 @@ impl Vmm {
) -> result::Result<(), MigratableError> {
let mem_send = SendAdditionalConnections::new(send_data_migration, &vm.guest_memory())?;

// Start logging dirty pages
vm.start_dirty_log()?;

mem_send.send_memory(&vm.memory_range_table()?, socket)?;

// Define the maximum allowed downtime 2000 seconds(2000000 milliseconds)
const MAX_MIGRATE_DOWNTIME: u64 = 2000000;

Expand All @@ -2150,6 +2227,8 @@ impl Vmm {
)));
}

// Start logging dirty pages
vm.start_dirty_log()?;
let iteration_table = Self::memory_copy_iterations(
vm,
&mem_send,
Expand All @@ -2160,11 +2239,11 @@ impl Vmm {
)?;

info!("Entering downtime phase");
s.downtime_start = Instant::now();
s.downtime_start_time = Instant::now();
// End throttle thread
info!("stopping vcpu thread");
info!("stopping vcpu throttling thread");
vm.stop_vcpu_throttling();
info!("stopped vcpu thread");
info!("stopped vcpu throttling thread");
info!("pausing VM");
vm.pause()?;
info!("paused VM");
Expand All @@ -2173,11 +2252,17 @@ impl Vmm {
let mut final_table = vm.dirty_log()?;
final_table.extend(iteration_table.clone());
mem_send.send_memory(&final_table, socket)?;

// Update statistics
s.pending_size = final_table.regions().iter().map(|range| range.length).sum();
s.total_transferred_bytes += s.pending_size;
s.current_dirty_pages = s.pending_size.div_ceil(PAGE_SIZE as u64);
s.total_transferred_dirty_pages += s.current_dirty_pages;
s.bytes_to_transmit = final_table.regions().iter().map(|range| range.length).sum();
s.pages_to_transmit = s.bytes_to_transmit.div_ceil(PAGE_SIZE as u64);
s.total_transferred_bytes += s.bytes_to_transmit;
s.total_transferred_pages += s.pages_to_transmit;

info!(
"Memory Migration finished: transmitted {} bytes in total",
s.total_transferred_bytes
);

// Stop logging dirty pages
vm.stop_dirty_log()?;
Expand Down Expand Up @@ -2326,17 +2411,22 @@ impl Vmm {
)?;

// Record downtime
s.downtime = s.downtime_start.elapsed();
s.downtime_duration = s.downtime_start_time.elapsed();

// Stop logging dirty pages
if !send_data_migration.local {
vm.stop_dirty_log()?;
}

// Record total migration time
s.total_time = s.start_time.elapsed();
s.migration_duration = s.migration_start_time.elapsed();

info!("Migration complete");
info!(
"Migration complete: downtime: {:.3}s, total: {:1}s, iterations: {}",
s.downtime_duration.as_secs_f64(),
s.migration_duration.as_secs_f64(),
s.iteration,
);

// Let every Migratable object know about the migration being complete
vm.complete_migration()
Expand Down
Loading