From 762edccee5a148d3595b73d73fa15b08a896a99a Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Fri, 20 Feb 2026 20:59:52 +0000 Subject: [PATCH] Split datastore::volume file into several files Split volume.rs into several files, all within a new volume module. No code changes here, just moving stuff around! Unfortunately the diff is still large. --- dev-tools/omdb/src/bin/omdb/db.rs | 6 +- nexus/db-queries/src/db/datastore/disk.rs | 6 +- nexus/db-queries/src/db/datastore/mod.rs | 3 +- .../{volume.rs => volume/datastore.rs} | 4010 ++++------------- .../db-queries/src/db/datastore/volume/mod.rs | 10 + .../src/db/datastore/volume/replacement.rs | 969 ++++ .../src/db/datastore/volume/test.rs | 1584 +++++++ nexus/src/app/instance_platform/mod.rs | 2 +- nexus/src/app/sagas/common_storage.rs | 2 +- nexus/src/app/sagas/disk_create.rs | 5 +- nexus/src/app/sagas/image_create.rs | 8 +- .../src/app/sagas/region_replacement_drive.rs | 5 +- .../src/app/sagas/region_replacement_start.rs | 10 +- .../region_snapshot_replacement_start.rs | 10 +- .../sagas/region_snapshot_replacement_step.rs | 10 +- nexus/src/app/sagas/snapshot_create.rs | 2 +- nexus/src/app/sagas/volume_delete.rs | 4 +- .../integration_tests/volume_management.rs | 54 +- 18 files changed, 3383 insertions(+), 3317 deletions(-) rename nexus/db-queries/src/db/datastore/{volume.rs => volume/datastore.rs} (58%) create mode 100644 nexus/db-queries/src/db/datastore/volume/mod.rs create mode 100644 nexus/db-queries/src/db/datastore/volume/replacement.rs create mode 100644 nexus/db-queries/src/db/datastore/volume/test.rs diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index fc770fbcc1f..b41306da534 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -117,15 +117,15 @@ use nexus_db_queries::context::OpContext; use nexus_db_queries::db; use nexus_db_queries::db::DataStore; use nexus_db_queries::db::datastore::CrucibleDisk; -use nexus_db_queries::db::datastore::CrucibleTargets; use nexus_db_queries::db::datastore::Disk; use nexus_db_queries::db::datastore::InstanceAndActiveVmm; use nexus_db_queries::db::datastore::InstanceStateComputer; use nexus_db_queries::db::datastore::LocalStorageAllocation; use nexus_db_queries::db::datastore::LocalStorageDisk; use nexus_db_queries::db::datastore::SQL_BATCH_SIZE; -use nexus_db_queries::db::datastore::VolumeCookedResult; -use nexus_db_queries::db::datastore::read_only_resources_associated_with_volume; +use nexus_db_queries::db::datastore::volume::CrucibleTargets; +use nexus_db_queries::db::datastore::volume::VolumeCookedResult; +use nexus_db_queries::db::datastore::volume::read_only_resources_associated_with_volume; use nexus_db_queries::db::identity::Asset; use nexus_db_queries::db::model::ServiceKind; use nexus_db_queries::db::pagination::Paginator; diff --git a/nexus/db-queries/src/db/datastore/disk.rs b/nexus/db-queries/src/db/datastore/disk.rs index 201b4aee241..cf446b844d3 100644 --- a/nexus/db-queries/src/db/datastore/disk.rs +++ b/nexus/db-queries/src/db/datastore/disk.rs @@ -1886,9 +1886,9 @@ impl DataStore { params: &disk_types::DiskCreate, src: &ReadOnlyDiskSource, ) -> Result { - use crate::db::datastore::CrucibleTargets; - use crate::db::datastore::VolumeCheckoutReason; - use crate::db::datastore::read_only_resources_associated_with_volume; + use crate::db::datastore::volume::CrucibleTargets; + use crate::db::datastore::volume::VolumeCheckoutReason; + use crate::db::datastore::volume::read_only_resources_associated_with_volume; use sled_agent_client::VolumeConstructionRequest; // For idempotency, first check if the disk already exists, and if it diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 8350472e638..9f6cc31781b 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -147,7 +147,7 @@ mod utilization; mod v2p_mapping; mod virtual_provisioning_collection; mod vmm; -mod volume; +pub mod volume; mod volume_repair; mod vpc; pub mod webhook_delivery; @@ -197,7 +197,6 @@ pub use switch_port::SwitchPortSettingsCombinedResult; pub use user_data_export::*; pub use virtual_provisioning_collection::StorageType; pub use vmm::VmmStateUpdateResult; -pub use volume::*; // Number of unique datasets required to back a region. // TODO: This should likely turn into a configuration option. diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume/datastore.rs similarity index 58% rename from nexus/db-queries/src/db/datastore/volume.rs rename to nexus/db-queries/src/db/datastore/volume/datastore.rs index a3192f69f31..07c8eb19119 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume/datastore.rs @@ -4,13 +4,14 @@ //! [`DataStore`] methods on [`Volume`]s. -use super::DataStore; use crate::db; +use crate::db::DataStore; use crate::db::datastore::OpContext; use crate::db::datastore::REGION_REDUNDANCY_THRESHOLD; use crate::db::datastore::RunnableQuery; use crate::db::datastore::SQL_BATCH_SIZE; use crate::db::identity::Asset; +use crate::db::model; use crate::db::model::CrucibleDataset; use crate::db::model::Disk; use crate::db::model::DownstairsClientStopRequestNotification; @@ -21,14 +22,12 @@ use crate::db::model::RegionSnapshot; use crate::db::model::UpstairsRepairNotification; use crate::db::model::UpstairsRepairNotificationType; use crate::db::model::UpstairsRepairProgress; -use crate::db::model::Volume; use crate::db::model::VolumeResourceUsage; use crate::db::model::VolumeResourceUsageRecord; use crate::db::model::VolumeResourceUsageType; use crate::db::model::to_db_typed_uuid; use crate::db::pagination::Paginator; use crate::db::pagination::paginated; -use anyhow::anyhow; use anyhow::bail; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; @@ -87,7 +86,7 @@ pub enum VolumeCheckoutReason { } #[derive(Debug, thiserror::Error)] -pub(super) enum VolumeGetError { +pub enum VolumeGetError { #[error("Serde error during volume_checkout: {0}")] SerdeError(#[from] serde_json::Error), @@ -116,7 +115,7 @@ impl From for Error { } #[derive(Debug, thiserror::Error)] -pub(super) enum VolumeCreationError { +pub enum VolumeCreationError { #[error("Error from Volume creation: {0}")] Public(Error), @@ -179,48 +178,6 @@ enum RemoveRopError { CouldNotFindResource(String), } -#[derive(Debug, thiserror::Error)] -enum ReplaceRegionError { - #[error("Error from Volume region replacement: {0}")] - Public(Error), - - #[error("Serde error during Volume region replacement: {0}")] - SerdeError(#[from] serde_json::Error), - - #[error("Region replacement error: {0}")] - RegionReplacementError(#[from] anyhow::Error), -} - -#[derive(Debug, thiserror::Error)] -enum ReplaceSnapshotError { - #[error("Error from Volume snapshot replacement: {0}")] - Public(Error), - - #[error("Serde error during Volume snapshot replacement: {0}")] - SerdeError(#[from] serde_json::Error), - - #[error("Snapshot replacement error: {0}")] - SnapshotReplacementError(#[from] anyhow::Error), - - #[error("Replaced {0} targets, expected {1}")] - UnexpectedReplacedTargets(usize, usize), - - #[error("Updated {0} database rows, expected {1}")] - UnexpectedDatabaseUpdate(usize, usize), - - #[error( - "Address parsing error during Volume snapshot \ - replacement: {0}" - )] - AddressParseError(#[from] AddrParseError), - - #[error("Could not match read-only resource to {0}")] - CouldNotFindResource(String), - - #[error("Multiple volume resource usage records for {0}")] - MultipleResourceUsageRecords(String), -} - /// Crucible resources freed by previous volume deletes #[derive(Debug, Serialize, Deserialize)] pub struct FreedCrucibleResources { @@ -242,18 +199,18 @@ pub struct SourceVolume(pub VolumeUuid); pub struct DestVolume(pub VolumeUuid); impl DataStore { - pub(super) async fn volume_create_in_txn( + pub async fn volume_create_in_txn( conn: &async_bb8_diesel::Connection, err: OptionalError, volume_id: VolumeUuid, vcr: VolumeConstructionRequest, crucible_targets: CrucibleTargets, - ) -> Result { + ) -> Result { use nexus_db_schema::schema::volume::dsl; - let maybe_volume: Option = dsl::volume + let maybe_volume: Option = dsl::volume .filter(dsl::id.eq(to_db_typed_uuid(volume_id))) - .select(Volume::as_select()) + .select(model::Volume::as_select()) .first_async(conn) .await .optional()?; @@ -267,11 +224,11 @@ impl DataStore { let vcr_string = serde_json::to_string(&vcr) .map_err(|e| err.bail(VolumeCreationError::SerdeError(e)))?; - let volume = Volume::new(volume_id, vcr_string); + let volume = model::Volume::new(volume_id, vcr_string); - let volume: Volume = diesel::insert_into(dsl::volume) + let volume: model::Volume = diesel::insert_into(dsl::volume) .values(volume.clone()) - .returning(Volume::as_returning()) + .returning(model::Volume::as_returning()) .get_result_async(conn) .await .map_err(|e| { @@ -365,7 +322,7 @@ impl DataStore { .optional() } - async fn read_only_target_to_volume_resource_usage( + pub async fn read_only_target_to_volume_resource_usage( conn: &async_bb8_diesel::Connection, read_only_target: &SocketAddrV6, ) -> Result, diesel::result::Error> { @@ -416,7 +373,7 @@ impl DataStore { &self, volume_id: VolumeUuid, vcr: VolumeConstructionRequest, - ) -> CreateResult { + ) -> CreateResult { // Grab all the targets that the volume construction request references. // Do this outside the transaction, as the data inside volume doesn't // change and this would simply add to the transaction time. @@ -461,21 +418,21 @@ impl DataStore { pub(super) async fn volume_get_impl( conn: &async_bb8_diesel::Connection, volume_id: VolumeUuid, - ) -> Result, diesel::result::Error> { + ) -> Result, diesel::result::Error> { use nexus_db_schema::schema::volume::dsl; dsl::volume .filter(dsl::id.eq(to_db_typed_uuid(volume_id))) - .select(Volume::as_select()) - .first_async::(conn) + .select(model::Volume::as_select()) + .first_async::(conn) .await .optional() } - /// Return a `Option` based on id, even if it's soft deleted. + /// Return a `Option` based on id, even if it's soft deleted. pub async fn volume_get( &self, volume_id: VolumeUuid, - ) -> LookupResult> { + ) -> LookupResult> { let conn = self.pool_connection_unauthorized().await?; Self::volume_get_impl(&conn, volume_id) .await @@ -498,7 +455,7 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } - fn volume_usage_records_for_resource_query( + pub fn volume_usage_records_for_resource_query( resource: VolumeResourceUsage, ) -> impl RunnableQuery { use nexus_db_schema::schema::volume_resource_usage::dsl; @@ -854,18 +811,18 @@ impl DataStore { } } - pub(super) async fn volume_checkout_in_txn( + pub async fn volume_checkout_in_txn( conn: &async_bb8_diesel::Connection, err: OptionalError, volume_id: VolumeUuid, reason: VolumeCheckoutReason, - ) -> Result { + ) -> Result { use nexus_db_schema::schema::volume::dsl; // Grab the volume in question. let volume = dsl::volume .filter(dsl::id.eq(to_db_typed_uuid(volume_id))) - .select(Volume::as_select()) + .select(model::Volume::as_select()) .get_result_async(conn) .await?; @@ -1047,7 +1004,7 @@ impl DataStore { &self, volume_id: VolumeUuid, reason: VolumeCheckoutReason, - ) -> LookupResult { + ) -> LookupResult { // We perform a transaction here, to be sure that on completion // of this, the database contains an updated version of the // volume with the generation number incremented (for the volume @@ -1144,7 +1101,7 @@ impl DataStore { source_volume_id: SourceVolume, dest_volume_id: DestVolume, reason: VolumeCheckoutReason, - ) -> CreateResult { + ) -> CreateResult { let volume = self.volume_checkout(source_volume_id.0, reason).await?; let vcr: sled_agent_client::VolumeConstructionRequest = @@ -1211,7 +1168,7 @@ impl DataStore { // b) the volume record does not exist (null due to left join) // // so return an Option and check below - Option::::as_select(), + Option::::as_select(), )) .load_async(conn) .await?; @@ -1371,7 +1328,7 @@ impl DataStore { let volume = dsl::volume .filter(dsl::id.eq(to_db_typed_uuid(volume_id))) - .select(Volume::as_select()) + .select(model::Volume::as_select()) .get_result_async(conn) .await .optional()?; @@ -1778,7 +1735,7 @@ impl DataStore { let volume = dsl::volume .filter(dsl::id.eq(to_db_typed_uuid(volume_id))) - .select(Volume::as_select()) + .select(model::Volume::as_select()) .get_result_async(conn) .await .optional()?; @@ -2586,7 +2543,7 @@ impl DataStore { } /// Check if a region is present in a Volume Construction Request -fn region_in_vcr( +pub fn region_in_vcr( vcr: &VolumeConstructionRequest, region: &SocketAddrV6, ) -> anyhow::Result { @@ -2636,7 +2593,7 @@ fn region_in_vcr( /// Check if a read-only target is present anywhere in a Volume Construction /// Request -fn read_only_target_in_vcr( +pub fn read_only_target_in_vcr( vcr: &VolumeConstructionRequest, read_only_target: &SocketAddrV6, ) -> anyhow::Result { @@ -2705,768 +2662,260 @@ fn read_only_target_in_vcr( Ok(false) } -#[derive(Clone)] -pub struct VolumeReplacementParams { - pub volume_id: VolumeUuid, - pub region_id: Uuid, - pub region_addr: SocketAddrV6, -} - -// types for volume_replace_snapshot and replace_read_only_target_in_vcr -// parameters - -#[derive(Debug, Clone, Copy)] -pub struct VolumeWithTarget(pub VolumeUuid); - -#[derive(Debug, Clone, Copy)] -pub struct ExistingTarget(pub SocketAddrV6); - -#[derive(Debug, Clone, Copy)] -pub struct ReplacementTarget(pub SocketAddrV6); +/// Return the read-only targets from a VolumeConstructionRequest. +/// +/// The targets of a volume construction request map to resources. +pub fn read_only_resources_associated_with_volume( + vcr: &VolumeConstructionRequest, + crucible_targets: &mut CrucibleTargets, +) { + let mut parts: VecDeque<&VolumeConstructionRequest> = VecDeque::new(); + parts.push_back(&vcr); -#[derive(Debug, Clone, Copy)] -pub struct VolumeToDelete(pub VolumeUuid); + while let Some(vcr_part) = parts.pop_front() { + match vcr_part { + VolumeConstructionRequest::Volume { + sub_volumes, + read_only_parent, + .. + } => { + for sub_volume in sub_volumes { + parts.push_back(sub_volume); + } -// The result type returned from both `volume_replace_region` and -// `volume_replace_snapshot` -#[must_use] -#[derive(Debug, Serialize, Deserialize, PartialEq)] -pub enum VolumeReplaceResult { - // based on the VCRs, seems like the replacement already happened - AlreadyHappened, + if let Some(read_only_parent) = read_only_parent { + parts.push_back(read_only_parent); + } + } - // this call performed the replacement - Done, + VolumeConstructionRequest::Url { .. } => { + // no action required + } - // the "existing" volume was soft deleted - ExistingVolumeSoftDeleted, + VolumeConstructionRequest::Region { opts, .. } => { + for target in &opts.target { + if opts.read_only { + crucible_targets + .read_only_targets + .push(target.to_string()); + } + } + } - // the "existing" volume was hard deleted - ExistingVolumeHardDeleted, + VolumeConstructionRequest::File { .. } => { + // no action required + } + } + } } -impl DataStore { - async fn volume_replace_region_in_txn( - conn: &async_bb8_diesel::Connection, - err: OptionalError, - existing: VolumeReplacementParams, - replacement: VolumeReplacementParams, - ) -> Result { - // In a single transaction: - // - // - set the existing region's volume id to the replacement's volume id - // - set the replacement region's volume id to the existing's volume id - // - update the existing volume's construction request to replace the - // existing region's SocketAddrV6 with the replacement region's - // - // This function's effects can be undone by calling it with swapped - // parameters. - // - // # Example # - // - // Imagine `volume_replace_region` is called with the following, - // pretending that UUIDs are just eight uppercase letters: - // - // let existing = VolumeReplacementParams { - // volume_id: TARGET_VOL, - // region_id: TARGET_REG, - // region_addr: "[fd00:1122:3344:145::10]:40001", - // } - // - // let replace = VolumeReplacementParams { - // volume_id: NEW_VOL, - // region_id: NEW_REG, - // region_addr: "[fd00:1122:3344:322::4]:3956", - // } - // - // In the database, the relevant records (and columns) of the region - // table look like this prior to the transaction: - // - // id | volume_id - // -------------| --------- - // TARGET_REG | TARGET_VOL - // NEW_REG | NEW_VOL - // - // TARGET_VOL has a volume construction request where one of the targets - // list will contain TARGET_REG's address: - // - // { - // "type": "volume", - // "block_size": 512, - // "id": "TARGET_VOL", - // "read_only_parent": { - // ... - // }, - // "sub_volumes": [ - // { - // ... - // "opts": { - // ... - // "target": [ - // "[fd00:1122:3344:103::3]:19004", - // "[fd00:1122:3344:79::12]:27015", - // "[fd00:1122:3344:145::10]:40001" <----- - // ] - // } - // } - // ] - // } - // - // Note it is not required for the replacement volume to exist as a - // database record for this transaction. - // - // The first part of the transaction will swap the volume IDs of the - // existing and replacement region records: - // - // id | volume_id - // ------------| --------- - // TARGET_REG | NEW_VOL - // NEW_REG | TARGET_VOL - // - // The second part of the transaction will update the volume - // construction request of TARGET_VOL by finding and replacing - // TARGET_REG's address (in the appropriate targets array) with - // NEW_REG's address: - // - // { - // ... - // "target": [ - // "[fd00:1122:3344:103::3]:19004", - // "[fd00:1122:3344:79::12]:27015", - // "[fd00:1122:3344:322::4]:3956" <----- - // ] - // ... - // } - // - // After the transaction, the caller should ensure that TARGET_REG is - // referenced (via its socket address) in NEW_VOL. For an example, this - // is done as part of the region replacement start saga. - - // Grab the old volume first - let maybe_old_volume = { - volume_dsl::volume - .filter(volume_dsl::id.eq(to_db_typed_uuid(existing.volume_id))) - .select(Volume::as_select()) - .first_async::(conn) - .await - .optional() - .map_err(|e| { - err.bail_retryable_or_else(e, |e| { - ReplaceRegionError::Public(public_error_from_diesel( - e, - ErrorHandler::Server, - )) - }) - })? - }; +/// Return the read-write targets from a VolumeConstructionRequest. +/// +/// The targets of a volume construction request map to resources. +pub fn read_write_resources_associated_with_volume( + vcr: &VolumeConstructionRequest, + targets: &mut Vec, +) { + let mut parts: VecDeque<&VolumeConstructionRequest> = VecDeque::new(); + parts.push_back(&vcr); - let old_volume = if let Some(old_volume) = maybe_old_volume { - old_volume - } else { - // Existing volume was hard-deleted, so return here. We can't - // perform the region replacement now, and this will short-circuit - // the rest of the process. + while let Some(vcr_part) = parts.pop_front() { + match vcr_part { + VolumeConstructionRequest::Volume { sub_volumes, .. } => { + for sub_volume in sub_volumes { + parts.push_back(sub_volume); + } - return Ok(VolumeReplaceResult::ExistingVolumeHardDeleted); - }; + // No need to look under read-only parent + } - if old_volume.time_deleted.is_some() { - // Existing volume was soft-deleted, so return here for the same - // reason: the region replacement process should be short-circuited - // now. - return Ok(VolumeReplaceResult::ExistingVolumeSoftDeleted); - } + VolumeConstructionRequest::Url { .. } => { + // no action required + } - let old_vcr: VolumeConstructionRequest = - match serde_json::from_str(&old_volume.data()) { - Ok(vcr) => vcr, - Err(e) => { - return Err(err.bail(ReplaceRegionError::SerdeError(e))); + VolumeConstructionRequest::Region { opts, .. } => { + if !opts.read_only { + for target in &opts.target { + targets.push(target.to_string()); + } } - }; + } - // Does it look like this replacement already happened? - let old_region_in_vcr = - match region_in_vcr(&old_vcr, &existing.region_addr) { - Ok(v) => v, - Err(e) => { - return Err( - err.bail(ReplaceRegionError::RegionReplacementError(e)) - ); - } - }; - let new_region_in_vcr = - match region_in_vcr(&old_vcr, &replacement.region_addr) { - Ok(v) => v, - Err(e) => { - return Err( - err.bail(ReplaceRegionError::RegionReplacementError(e)) - ); - } - }; + VolumeConstructionRequest::File { .. } => { + // no action required + } + } + } +} - if !old_region_in_vcr && new_region_in_vcr { - // It does seem like the replacement happened - if this function is - // called twice in a row then this can happen. - return Ok(VolumeReplaceResult::AlreadyHappened); - } else if old_region_in_vcr && !new_region_in_vcr { - // The replacement hasn't happened yet, but can proceed - } else if old_region_in_vcr && new_region_in_vcr { - // Both the old region and new region exist in this VCR. Regions are - // not reused, so this is an illegal state: if the replacement of - // the old region occurred, then the new region would be present - // multiple times in the volume. We have to bail out here. - // - // The guards against this happening are: - // - // - only one replacement can occur for a volume at a time (due to - // the volume repair lock), and - // - // - region replacement does not delete the old region until the - // "region replacement finish" saga, which happens at the very end - // of the process. If it eagerly deleted the region, the crucible - // agent would be free to reuse the port for another region - // allocation, and an identical target (read: ip and port) could - // be confusing. Most of the time, we assume that the dataset - // containing that agent has been expunged, so the agent is gone, - // so this port reuse cannot occur - return Err(err.bail(ReplaceRegionError::RegionReplacementError( - anyhow!("old_region_in_vcr && new_region_in_vcr"), - ))); - } else if !old_region_in_vcr && !new_region_in_vcr { - // Neither the region we've been asked to replace or the new region - // is in the VCR. This is an illegal state, as this function would - // be performing a no-op. We have to bail out here. - // - // The guard against this happening is again that only one - // replacement can occur for a volume at a time: if it was possible - // for multiple region replacements to occur, then both would be - // attempting to swap out the same old region for different new - // regions: - // - // region replacement one: - // - // volume_replace_region_in_txn( - // .., - // existing = [fd00:1122:3344:145::10]:40001, - // replacement = [fd00:1122:3344:322::4]:3956, - // ) - // - // region replacement two: - // - // volume_replace_region_in_txn( - // .., - // existing = [fd00:1122:3344:145::10]:40001, - // replacement = [fd00:1122:3344:fd1::123]:27001, - // ) - // - // The one that replaced second would always land in this branch. - return Err(err.bail(ReplaceRegionError::RegionReplacementError( - anyhow!("!old_region_in_vcr && !new_region_in_vcr"), - ))); +/// Return the number of read-write subvolumes in a VolumeConstructionRequest. +pub fn count_read_write_sub_volumes( + vcr: &VolumeConstructionRequest, +) -> anyhow::Result { + Ok(match vcr { + VolumeConstructionRequest::Volume { sub_volumes, .. } => { + sub_volumes.len() } - use nexus_db_schema::schema::region::dsl as region_dsl; - use nexus_db_schema::schema::volume::dsl as volume_dsl; + VolumeConstructionRequest::Url { .. } => 0, - // Set the existing region's volume id to the replacement's volume id - diesel::update(region_dsl::region) - .filter(region_dsl::id.eq(existing.region_id)) - .set( - region_dsl::volume_id - .eq(to_db_typed_uuid(replacement.volume_id)), - ) - .execute_async(conn) - .await - .map_err(|e| { - err.bail_retryable_or_else(e, |e| { - ReplaceRegionError::Public(public_error_from_diesel( - e, - ErrorHandler::Server, - )) - }) - })?; + VolumeConstructionRequest::Region { .. } => { + // We don't support a pure Region VCR at the volume + // level in the database, so this choice should + // never be encountered. + bail!("Region not supported as a top level volume"); + } - // Set the replacement region's volume id to the existing's volume id - diesel::update(region_dsl::region) - .filter(region_dsl::id.eq(replacement.region_id)) - .set(region_dsl::volume_id.eq(to_db_typed_uuid(existing.volume_id))) - .execute_async(conn) - .await - .map_err(|e| { - err.bail_retryable_or_else(e, |e| { - ReplaceRegionError::Public(public_error_from_diesel( - e, - ErrorHandler::Server, - )) - }) - })?; + VolumeConstructionRequest::File { .. } => 0, + }) +} - // Update the existing volume's construction request to replace the - // existing region's SocketAddrV6 with the replacement region's +/// Returns true if the sub-volumes of a Volume are all read-only +pub fn volume_is_read_only( + vcr: &VolumeConstructionRequest, +) -> anyhow::Result { + match vcr { + VolumeConstructionRequest::Volume { sub_volumes, .. } => { + for sv in sub_volumes { + match sv { + VolumeConstructionRequest::Region { opts, .. } => { + if !opts.read_only { + return Ok(false); + } + } - // Copy the old volume's VCR, changing out the old region for the new. - let new_vcr = match replace_region_in_vcr( - &old_vcr, - existing.region_addr, - replacement.region_addr, - ) { - Ok(new_vcr) => new_vcr, - Err(e) => { - return Err( - err.bail(ReplaceRegionError::RegionReplacementError(e)) - ); + _ => { + bail!("Saw non-Region in sub-volume {:?}", sv); + } + } } - }; - let new_volume_data = serde_json::to_string(&new_vcr) - .map_err(|e| err.bail(ReplaceRegionError::SerdeError(e)))?; + Ok(true) + } - // Update the existing volume's data - diesel::update(volume_dsl::volume) - .filter(volume_dsl::id.eq(to_db_typed_uuid(existing.volume_id))) - .set(volume_dsl::data.eq(new_volume_data)) - .execute_async(conn) - .await - .map_err(|e| { - err.bail_retryable_or_else(e, |e| { - ReplaceRegionError::Public(public_error_from_diesel( - e, - ErrorHandler::Server, - )) - }) - })?; + VolumeConstructionRequest::Region { .. } => { + // We don't support a pure Region VCR at the volume + // level in the database, so this choice should + // never be encountered, but I want to know if it is. + bail!("Region not supported as a top level volume"); + } - // After region replacement, validate invariants for all volumes - #[cfg(any(test, feature = "testing"))] - Self::validate_volume_invariants(conn).await?; + VolumeConstructionRequest::File { .. } => { + // Effectively, this is read-only, as this BlockIO implementation + // does not have a `write` implementation. This will be hit if + // trying to make a snapshot or image out of a + // `YouCanBootAnythingAsLongAsItsAlpine` image source. + Ok(true) + } - Ok(VolumeReplaceResult::Done) + VolumeConstructionRequest::Url { .. } => { + // ImageSource::Url was deprecated + bail!("Saw VolumeConstructionRequest::Url"); + } } +} - /// Replace a read-write region in a Volume with a new region. - pub async fn volume_replace_region( - &self, - existing: VolumeReplacementParams, - replacement: VolumeReplacementParams, - ) -> Result { - let err = OptionalError::new(); +/// Find Regions in a Volume's subvolumes list whose target match the argument +/// IP, and add them to the supplied Vec. +fn find_matching_rw_regions_in_volume( + vcr: &VolumeConstructionRequest, + ip: &std::net::Ipv6Addr, + matched_targets: &mut Vec, +) -> anyhow::Result<()> { + let mut parts: VecDeque<&VolumeConstructionRequest> = VecDeque::new(); + parts.push_back(vcr); - let conn = self.pool_connection_unauthorized().await?; - self.transaction_retry_wrapper("volume_replace_region") - .transaction(&conn, |conn| { - let err = err.clone(); - let existing = existing.clone(); - let replacement = replacement.clone(); - async move { - Self::volume_replace_region_in_txn( - &conn, - err, - existing, - replacement, - ) - .await + while let Some(vcr_part) = parts.pop_front() { + match vcr_part { + VolumeConstructionRequest::Volume { sub_volumes, .. } => { + for sub_volume in sub_volumes { + parts.push_back(sub_volume); } - }) - .await - .map_err(|e| { - if let Some(err) = err.take() { - match err { - ReplaceRegionError::Public(e) => e, + } - ReplaceRegionError::SerdeError(_) => { - Error::internal_error(&err.to_string()) - } + VolumeConstructionRequest::Url { .. } => { + // nothing required + } - ReplaceRegionError::RegionReplacementError(_) => { - Error::internal_error(&err.to_string()) + VolumeConstructionRequest::Region { opts, .. } => { + if !opts.read_only { + for target in &opts.target { + if let SocketAddr::V6(target) = target { + if target.ip() == ip { + matched_targets.push(*target); + } } } - } else { - public_error_from_diesel(e, ErrorHandler::Server) } - }) - } - - async fn volume_replace_snapshot_in_txn( - conn: &async_bb8_diesel::Connection, - err: OptionalError, - volume_id: VolumeWithTarget, - existing: ExistingTarget, - replacement: ReplacementTarget, - volume_to_delete_id: VolumeToDelete, - ) -> Result { - use nexus_db_schema::schema::volume::dsl as volume_dsl; - use nexus_db_schema::schema::volume_resource_usage::dsl as ru_dsl; - - // Grab the old volume first - let maybe_old_volume = { - volume_dsl::volume - .filter(volume_dsl::id.eq(to_db_typed_uuid(volume_id.0))) - .select(Volume::as_select()) - .first_async::(conn) - .await - .optional() - .map_err(|e| { - err.bail_retryable_or_else(e, |e| { - ReplaceSnapshotError::Public(public_error_from_diesel( - e, - ErrorHandler::Server, - )) - }) - })? - }; - - let old_volume = if let Some(old_volume) = maybe_old_volume { - old_volume - } else { - // Existing volume was hard-deleted, so return here. We can't - // perform the region replacement now, and this will short-circuit - // the rest of the process. - - return Ok(VolumeReplaceResult::ExistingVolumeHardDeleted); - }; + } - if old_volume.time_deleted.is_some() { - // Existing volume was soft-deleted, so return here for the same - // reason: the region replacement process should be short-circuited - // now. - return Ok(VolumeReplaceResult::ExistingVolumeSoftDeleted); + VolumeConstructionRequest::File { .. } => { + // nothing required + } } + } - let old_vcr: VolumeConstructionRequest = - match serde_json::from_str(&old_volume.data()) { - Ok(vcr) => vcr, - Err(e) => { - return Err(err.bail(ReplaceSnapshotError::SerdeError(e))); - } - }; + Ok(()) +} - // Does it look like this replacement already happened? - let old_target_in_vcr = - match read_only_target_in_vcr(&old_vcr, &existing.0) { - Ok(v) => v, - Err(e) => { - return Err(err.bail( - ReplaceSnapshotError::SnapshotReplacementError(e), - )); - } - }; +fn region_sets( + vcr: &VolumeConstructionRequest, + region_sets: &mut Vec>, +) { + let mut parts: VecDeque<&VolumeConstructionRequest> = VecDeque::new(); + parts.push_back(vcr); - let new_target_in_vcr = - match read_only_target_in_vcr(&old_vcr, &replacement.0) { - Ok(v) => v, - Err(e) => { - return Err(err.bail( - ReplaceSnapshotError::SnapshotReplacementError(e), - )); + while let Some(work) = parts.pop_front() { + match work { + VolumeConstructionRequest::Volume { + sub_volumes, + read_only_parent, + .. + } => { + for sub_volume in sub_volumes { + parts.push_back(&sub_volume); } - }; - - if !old_target_in_vcr && new_target_in_vcr { - // It does seem like the replacement happened - return Ok(VolumeReplaceResult::AlreadyHappened); - } - // Update the existing volume's construction request to replace the - // existing target's SocketAddrV6 with the replacement target's - - // Copy the old volume's VCR, changing out the old target for the new. - let (new_vcr, replacements) = match replace_read_only_target_in_vcr( - &old_vcr, - existing, - replacement, - ) { - Ok(new_vcr) => new_vcr, - Err(e) => { - return Err( - err.bail(ReplaceSnapshotError::SnapshotReplacementError(e)) - ); + if let Some(read_only_parent) = read_only_parent { + parts.push_back(&read_only_parent); + } } - }; - - // Expect that this only happened once. If it happened multiple times, - // question everything: how would a snapshot be used twice?! - if replacements != 1 { - return Err(err.bail( - ReplaceSnapshotError::UnexpectedReplacedTargets( - replacements, - 1, - ), - )); - } - - let new_volume_data = serde_json::to_string(&new_vcr) - .map_err(|e| err.bail(ReplaceSnapshotError::SerdeError(e)))?; - - // Update the existing volume's data - diesel::update(volume_dsl::volume) - .filter(volume_dsl::id.eq(to_db_typed_uuid(volume_id.0))) - .set(volume_dsl::data.eq(new_volume_data)) - .execute_async(conn) - .await - .map_err(|e| { - err.bail_retryable_or_else(e, |e| { - ReplaceSnapshotError::Public(public_error_from_diesel( - e, - ErrorHandler::Server, - )) - }) - })?; - - // Make a new VCR that will stash the target to delete. The values here - // don't matter, just that it gets fed into the volume_delete machinery - // later. - let vcr = VolumeConstructionRequest::Volume { - id: *volume_to_delete_id.0.as_untyped_uuid(), - block_size: 512, - sub_volumes: vec![VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 1, - extent_count: 1, - generation: 1, - opts: sled_agent_client::CrucibleOpts { - id: *volume_to_delete_id.0.as_untyped_uuid(), - target: vec![existing.0.into()], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: true, - }, - }], - read_only_parent: None, - }; - - let volume_data = serde_json::to_string(&vcr) - .map_err(|e| err.bail(ReplaceSnapshotError::SerdeError(e)))?; - - // Update the volume to delete data - let num_updated = diesel::update(volume_dsl::volume) - .filter(volume_dsl::id.eq(to_db_typed_uuid(volume_to_delete_id.0))) - .filter(volume_dsl::time_deleted.is_null()) - .set(volume_dsl::data.eq(volume_data)) - .execute_async(conn) - .await?; - - if num_updated != 1 { - return Err(err.bail( - ReplaceSnapshotError::UnexpectedDatabaseUpdate(num_updated, 1), - )); - } - - // Update the appropriate volume resource usage records - it could - // either be a read-only region or a region snapshot, so determine what - // it is first - - let maybe_existing_usage = - Self::read_only_target_to_volume_resource_usage(conn, &existing.0) - .await?; - - let Some(existing_usage) = maybe_existing_usage else { - return Err(err.bail(ReplaceSnapshotError::CouldNotFindResource( - format!("could not find resource for {}", existing.0,), - ))); - }; - - // The "existing" target moved into the volume to delete - - Self::swap_volume_usage_records_for_resources( - conn, - existing_usage, - volume_id.0, - volume_to_delete_id.0, - ) - .await - .map_err(|e| { - err.bail_retryable_or_else(e, |e| { - ReplaceSnapshotError::Public(public_error_from_diesel( - e, - ErrorHandler::Server, - )) - }) - })?; - - let maybe_replacement_usage = - Self::read_only_target_to_volume_resource_usage( - conn, - &replacement.0, - ) - .await?; - - let Some(replacement_usage) = maybe_replacement_usage else { - return Err(err.bail(ReplaceSnapshotError::CouldNotFindResource( - format!("could not find resource for {}", existing.0,), - ))); - }; + VolumeConstructionRequest::Url { .. } => { + // nothing required + } - // The intention leaving this transaction is that the correct volume - // resource usage records exist, so: - // - // - if no usage record existed for the replacement usage, then create a - // new record that points to the volume id (this can happen if the - // volume to delete was blank when coming into this function) - // - // - if records exist for the "replacement" usage, then one of those - // will match the volume to delete id, so perform a swap instead to - // the volume id - - let existing_replacement_volume_usage_records = - Self::volume_usage_records_for_resource_query( - replacement_usage.clone(), - ) - .load_async(conn) - .await - .map_err(|e| { - err.bail_retryable_or_else(e, |e| { - ReplaceSnapshotError::Public(public_error_from_diesel( - e, - ErrorHandler::Server, - )) - }) - })? - // TODO be smart enough to .filter the above query - .into_iter() - .filter(|record| record.volume_id == volume_to_delete_id.0.into()) - .count(); + VolumeConstructionRequest::Region { opts, .. } => { + let mut targets = vec![]; - // The "replacement" target moved into the volume + for target in &opts.target { + match target { + SocketAddr::V6(v6) => { + targets.push(*v6); + } + SocketAddr::V4(_) => {} + } + } - if existing_replacement_volume_usage_records == 0 { - // No matching record - let new_record = - VolumeResourceUsageRecord::new(volume_id.0, replacement_usage); + if targets.len() == opts.target.len() { + region_sets.push(targets); + } + } - diesel::insert_into(ru_dsl::volume_resource_usage) - .values(new_record) - .execute_async(conn) - .await - .map_err(|e| { - err.bail_retryable_or_else(e, |e| { - ReplaceSnapshotError::Public(public_error_from_diesel( - e, - ErrorHandler::Server, - )) - }) - })?; - } else if existing_replacement_volume_usage_records == 1 { - // One matching record: perform swap - Self::swap_volume_usage_records_for_resources( - conn, - replacement_usage, - volume_to_delete_id.0, - volume_id.0, - ) - .await - .map_err(|e| { - err.bail_retryable_or_else(e, |e| { - ReplaceSnapshotError::Public(public_error_from_diesel( - e, - ErrorHandler::Server, - )) - }) - })?; - } else { - // More than one matching record! - return Err(err.bail( - ReplaceSnapshotError::MultipleResourceUsageRecords(format!( - "{replacement_usage:?}" - )), - )); + VolumeConstructionRequest::File { .. } => { + // nothing required + } } - - // After region snapshot replacement, validate invariants for all - // volumes - #[cfg(any(test, feature = "testing"))] - Self::validate_volume_invariants(conn).await?; - - Ok(VolumeReplaceResult::Done) - } - - /// Replace a read-only target in a Volume with a new region - /// - /// In a single transaction: - /// - /// - update a volume's serialized construction request by replacing a - /// single target. - /// - /// - stash the replaced target in a "volume to delete"'s serialized - /// construction request - /// - /// Note that this transaction does _not_ update a region snapshot's volume - /// references table! This is legal because the existing target reference is - /// written into the volume to delete's construction request. - /// - /// This function's effects can be undone by calling it with swapped - /// `existing` and `replacement` parameters. - pub async fn volume_replace_snapshot( - &self, - volume_id: VolumeWithTarget, - existing: ExistingTarget, - replacement: ReplacementTarget, - volume_to_delete_id: VolumeToDelete, - ) -> Result { - let err = OptionalError::new(); - - let conn = self.pool_connection_unauthorized().await?; - self.transaction_retry_wrapper("volume_replace_snapshot") - .transaction(&conn, |conn| { - let err = err.clone(); - - async move { - Self::volume_replace_snapshot_in_txn( - &conn, - err, - volume_id, - existing, - replacement, - volume_to_delete_id, - ) - .await - } - }) - .await - .map_err(|e| { - if let Some(err) = err.take() { - match err { - ReplaceSnapshotError::Public(e) => e, - - ReplaceSnapshotError::SerdeError(_) - | ReplaceSnapshotError::SnapshotReplacementError(_) - | ReplaceSnapshotError::UnexpectedReplacedTargets( - _, - _, - ) - | ReplaceSnapshotError::UnexpectedDatabaseUpdate( - _, - _, - ) - | ReplaceSnapshotError::AddressParseError(_) - | ReplaceSnapshotError::CouldNotFindResource(_) - | ReplaceSnapshotError::MultipleResourceUsageRecords( - _, - ) => Error::internal_error(&err.to_string()), - } - } else { - public_error_from_diesel(e, ErrorHandler::Server) - } - }) } } -/// Return the read-only targets from a VolumeConstructionRequest. -/// -/// The targets of a volume construction request map to resources. -pub fn read_only_resources_associated_with_volume( +/// Check if an ipv6 address is referenced in a Volume Construction Request +fn ipv6_addr_referenced_in_vcr( vcr: &VolumeConstructionRequest, - crucible_targets: &mut CrucibleTargets, -) { + ip: &std::net::Ipv6Addr, +) -> bool { let mut parts: VecDeque<&VolumeConstructionRequest> = VecDeque::new(); - parts.push_back(&vcr); + parts.push_back(vcr); while let Some(vcr_part) = parts.pop_front() { match vcr_part { @@ -3485,516 +2934,232 @@ pub fn read_only_resources_associated_with_volume( } VolumeConstructionRequest::Url { .. } => { - // no action required + // nothing required } VolumeConstructionRequest::Region { opts, .. } => { for target in &opts.target { - if opts.read_only { - crucible_targets - .read_only_targets - .push(target.to_string()); + match target { + SocketAddr::V6(t) => { + if t.ip() == ip { + return true; + } + } + + SocketAddr::V4(_) => {} } } } VolumeConstructionRequest::File { .. } => { - // no action required + // nothing required } } } + + false } -/// Return the read-write targets from a VolumeConstructionRequest. -/// -/// The targets of a volume construction request map to resources. -pub fn read_write_resources_associated_with_volume( +/// Check if an ipv6 net is referenced in a Volume Construction Request +fn ipv6_net_referenced_in_vcr( vcr: &VolumeConstructionRequest, - targets: &mut Vec, -) { + net: &oxnet::Ipv6Net, +) -> bool { let mut parts: VecDeque<&VolumeConstructionRequest> = VecDeque::new(); - parts.push_back(&vcr); + parts.push_back(vcr); while let Some(vcr_part) = parts.pop_front() { match vcr_part { - VolumeConstructionRequest::Volume { sub_volumes, .. } => { + VolumeConstructionRequest::Volume { + sub_volumes, + read_only_parent, + .. + } => { for sub_volume in sub_volumes { parts.push_back(sub_volume); } - // No need to look under read-only parent + if let Some(read_only_parent) = read_only_parent { + parts.push_back(read_only_parent); + } } VolumeConstructionRequest::Url { .. } => { - // no action required + // nothing required } VolumeConstructionRequest::Region { opts, .. } => { - if !opts.read_only { - for target in &opts.target { - targets.push(target.to_string()); + for target in &opts.target { + match target { + SocketAddr::V6(t) => { + if net.contains(*t.ip()) { + return true; + } + } + + SocketAddr::V4(_) => {} } } } VolumeConstructionRequest::File { .. } => { - // no action required + // nothing required } } } + + false } -/// Return the number of read-write subvolumes in a VolumeConstructionRequest. -pub fn count_read_write_sub_volumes( - vcr: &VolumeConstructionRequest, -) -> anyhow::Result { - Ok(match vcr { - VolumeConstructionRequest::Volume { sub_volumes, .. } => { - sub_volumes.len() - } - - VolumeConstructionRequest::Url { .. } => 0, - - VolumeConstructionRequest::Region { .. } => { - // We don't support a pure Region VCR at the volume - // level in the database, so this choice should - // never be encountered. - bail!("Region not supported as a top level volume"); - } - - VolumeConstructionRequest::File { .. } => 0, - }) +pub enum VolumeCookedResult { + HardDeleted, + Ok, + RegionSetWithAllExpungedMembers { region_set: Vec }, + MultipleSomeReturned { target: SocketAddrV6 }, + TargetNotFound { target: SocketAddrV6 }, } -/// Returns true if the sub-volumes of a Volume are all read-only -pub fn volume_is_read_only( - vcr: &VolumeConstructionRequest, -) -> anyhow::Result { - match vcr { - VolumeConstructionRequest::Volume { sub_volumes, .. } => { - for sv in sub_volumes { - match sv { - VolumeConstructionRequest::Region { opts, .. } => { - if !opts.read_only { - return Ok(false); - } - } - - _ => { - bail!("Saw non-Region in sub-volume {:?}", sv); - } - } - } - - Ok(true) - } - - VolumeConstructionRequest::Region { .. } => { - // We don't support a pure Region VCR at the volume - // level in the database, so this choice should - // never be encountered, but I want to know if it is. - bail!("Region not supported as a top level volume"); - } - - VolumeConstructionRequest::File { .. } => { - // Effectively, this is read-only, as this BlockIO implementation - // does not have a `write` implementation. This will be hit if - // trying to make a snapshot or image out of a - // `YouCanBootAnythingAsLongAsItsAlpine` image source. - Ok(true) - } - - VolumeConstructionRequest::Url { .. } => { - // ImageSource::Url was deprecated - bail!("Saw VolumeConstructionRequest::Url"); - } - } -} +impl DataStore { + pub async fn find_volumes_referencing_socket_addr( + &self, + opctx: &OpContext, + address: SocketAddr, + ) -> ListResultVec { + opctx.check_complex_operations_allowed()?; -/// Replace a Region in a VolumeConstructionRequest -/// -/// Note that UUIDs are not randomized by this step: Crucible will reject a -/// `target_replace` call if the replacement VolumeConstructionRequest does not -/// exactly match the original, except for a single Region difference. -/// -/// Note that the generation number _is_ bumped in this step, otherwise -/// `compare_vcr_for_update` will reject the update. -fn replace_region_in_vcr( - vcr: &VolumeConstructionRequest, - old_region: SocketAddrV6, - new_region: SocketAddrV6, -) -> anyhow::Result { - let mut new_vcr = vcr.clone(); + let mut volumes = Vec::new(); + let mut paginator = Paginator::new( + SQL_BATCH_SIZE, + dropshot::PaginationOrder::Ascending, + ); + let conn = self.pool_connection_authorized(opctx).await?; - let mut parts: VecDeque<&mut VolumeConstructionRequest> = VecDeque::new(); - parts.push_back(&mut new_vcr); + let needle = match address { + SocketAddr::V4(_) => { + return Err(Error::internal_error(&format!( + "find_volumes_referencing_socket_addr not ipv6: {address}" + ))); + } - let mut old_region_found = false; + SocketAddr::V6(addr) => addr, + }; - while let Some(vcr_part) = parts.pop_front() { - match vcr_part { - VolumeConstructionRequest::Volume { sub_volumes, .. } => { - for sub_volume in sub_volumes { - parts.push_back(sub_volume); - } + while let Some(p) = paginator.next() { + use nexus_db_schema::schema::volume::dsl; - // Skip looking at read-only parent, this function only replaces - // R/W regions - } + let haystack = + paginated(dsl::volume, dsl::id, &p.current_pagparams()) + .select(model::Volume::as_select()) + .get_results_async::(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; - VolumeConstructionRequest::Url { .. } => { - // nothing required - } + paginator = + p.found_batch(&haystack, &|r| *r.id().as_untyped_uuid()); - VolumeConstructionRequest::Region { opts, generation, .. } => { - for target in &mut opts.target { - if let SocketAddr::V6(target) = target { - if *target == old_region { - *target = new_region; - old_region_found = true; + for volume in haystack { + let vcr: VolumeConstructionRequest = + match serde_json::from_str(&volume.data()) { + Ok(vcr) => vcr, + Err(e) => { + return Err(Error::internal_error(&format!( + "cannot deserialize volume data for {}: {e}", + volume.id(), + ))); } - } - } + }; - // Bump generation number, otherwise update will be rejected - *generation = *generation + 1; - } + let rw_reference = region_in_vcr(&vcr, &needle) + .map_err(|e| Error::internal_error(&e.to_string()))?; + let ro_reference = read_only_target_in_vcr(&vcr, &needle) + .map_err(|e| Error::internal_error(&e.to_string()))?; - VolumeConstructionRequest::File { .. } => { - // nothing required + if rw_reference || ro_reference { + volumes.push(volume); + } } } - } - - if !old_region_found { - bail!("old region {old_region} not found!"); - } - - Ok(new_vcr) -} -/// Replace a read-only target in a VolumeConstructionRequest -/// -/// Note that UUIDs are not randomized by this step: Crucible will reject a -/// `target_replace` call if the replacement VolumeConstructionRequest does not -/// exactly match the original, except for a single Region difference. -/// -/// Note that the generation number _is not_ bumped in this step. -fn replace_read_only_target_in_vcr( - vcr: &VolumeConstructionRequest, - old_target: ExistingTarget, - new_target: ReplacementTarget, -) -> anyhow::Result<(VolumeConstructionRequest, usize)> { - struct Work<'a> { - vcr_part: &'a mut VolumeConstructionRequest, - under_read_only_parent: bool, + Ok(volumes) } - let mut new_vcr = vcr.clone(); - - let mut parts: VecDeque = VecDeque::new(); - parts.push_back(Work { - vcr_part: &mut new_vcr, - under_read_only_parent: false, - }); - let mut replacements = 0; + pub async fn find_volumes_referencing_ipv6_addr( + &self, + opctx: &OpContext, + needle: std::net::Ipv6Addr, + ) -> ListResultVec { + opctx.check_complex_operations_allowed()?; - while let Some(work) = parts.pop_front() { - match work.vcr_part { - VolumeConstructionRequest::Volume { - sub_volumes, - read_only_parent, - .. - } => { - for sub_volume in sub_volumes { - parts.push_back(Work { - vcr_part: sub_volume, - under_read_only_parent: work.under_read_only_parent, - }); - } + let mut volumes = Vec::new(); + let mut paginator = Paginator::new( + SQL_BATCH_SIZE, + dropshot::PaginationOrder::Ascending, + ); + let conn = self.pool_connection_authorized(opctx).await?; - if let Some(read_only_parent) = read_only_parent { - parts.push_back(Work { - vcr_part: read_only_parent, - under_read_only_parent: true, - }); - } - } + while let Some(p) = paginator.next() { + use nexus_db_schema::schema::volume::dsl; - VolumeConstructionRequest::Url { .. } => { - // nothing required - } + let haystack = + paginated(dsl::volume, dsl::id, &p.current_pagparams()) + .select(model::Volume::as_select()) + .get_results_async::(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; - VolumeConstructionRequest::Region { opts, .. } => { - if work.under_read_only_parent && !opts.read_only { - // This VCR isn't constructed properly, there's a read/write - // region under a read-only parent - bail!("read-write region under read-only parent"); - } + paginator = + p.found_batch(&haystack, &|r| *r.id().as_untyped_uuid()); - for target in &mut opts.target { - if let SocketAddr::V6(target) = target { - if *target == old_target.0 && opts.read_only { - *target = new_target.0; - replacements += 1; + for volume in haystack { + let vcr: VolumeConstructionRequest = + match serde_json::from_str(&volume.data()) { + Ok(vcr) => vcr, + Err(e) => { + return Err(Error::internal_error(&format!( + "cannot deserialize volume data for {}: {e}", + volume.id(), + ))); } - } - } - } + }; - VolumeConstructionRequest::File { .. } => { - // nothing required + if ipv6_addr_referenced_in_vcr(&vcr, &needle) { + volumes.push(volume); + } } } - } - if replacements == 0 { - bail!("target {old_target:?} not found!"); + Ok(volumes) } - Ok((new_vcr, replacements)) -} + pub async fn find_volumes_referencing_ipv6_net( + &self, + opctx: &OpContext, + needle: oxnet::Ipv6Net, + ) -> ListResultVec { + opctx.check_complex_operations_allowed()?; -/// Find Regions in a Volume's subvolumes list whose target match the argument -/// IP, and add them to the supplied Vec. -fn find_matching_rw_regions_in_volume( - vcr: &VolumeConstructionRequest, - ip: &std::net::Ipv6Addr, - matched_targets: &mut Vec, -) -> anyhow::Result<()> { - let mut parts: VecDeque<&VolumeConstructionRequest> = VecDeque::new(); - parts.push_back(vcr); + let mut volumes = Vec::new(); + let mut paginator = Paginator::new( + SQL_BATCH_SIZE, + dropshot::PaginationOrder::Ascending, + ); + let conn = self.pool_connection_authorized(opctx).await?; - while let Some(vcr_part) = parts.pop_front() { - match vcr_part { - VolumeConstructionRequest::Volume { sub_volumes, .. } => { - for sub_volume in sub_volumes { - parts.push_back(sub_volume); - } - } + while let Some(p) = paginator.next() { + use nexus_db_schema::schema::volume::dsl; - VolumeConstructionRequest::Url { .. } => { - // nothing required - } - - VolumeConstructionRequest::Region { opts, .. } => { - if !opts.read_only { - for target in &opts.target { - if let SocketAddr::V6(target) = target { - if target.ip() == ip { - matched_targets.push(*target); - } - } - } - } - } - - VolumeConstructionRequest::File { .. } => { - // nothing required - } - } - } - - Ok(()) -} - -fn region_sets( - vcr: &VolumeConstructionRequest, - region_sets: &mut Vec>, -) { - let mut parts: VecDeque<&VolumeConstructionRequest> = VecDeque::new(); - parts.push_back(vcr); - - while let Some(work) = parts.pop_front() { - match work { - VolumeConstructionRequest::Volume { - sub_volumes, - read_only_parent, - .. - } => { - for sub_volume in sub_volumes { - parts.push_back(&sub_volume); - } - - if let Some(read_only_parent) = read_only_parent { - parts.push_back(&read_only_parent); - } - } - - VolumeConstructionRequest::Url { .. } => { - // nothing required - } - - VolumeConstructionRequest::Region { opts, .. } => { - let mut targets = vec![]; - - for target in &opts.target { - match target { - SocketAddr::V6(v6) => { - targets.push(*v6); - } - SocketAddr::V4(_) => {} - } - } - - if targets.len() == opts.target.len() { - region_sets.push(targets); - } - } - - VolumeConstructionRequest::File { .. } => { - // nothing required - } - } - } -} - -/// Check if an ipv6 address is referenced in a Volume Construction Request -fn ipv6_addr_referenced_in_vcr( - vcr: &VolumeConstructionRequest, - ip: &std::net::Ipv6Addr, -) -> bool { - let mut parts: VecDeque<&VolumeConstructionRequest> = VecDeque::new(); - parts.push_back(vcr); - - while let Some(vcr_part) = parts.pop_front() { - match vcr_part { - VolumeConstructionRequest::Volume { - sub_volumes, - read_only_parent, - .. - } => { - for sub_volume in sub_volumes { - parts.push_back(sub_volume); - } - - if let Some(read_only_parent) = read_only_parent { - parts.push_back(read_only_parent); - } - } - - VolumeConstructionRequest::Url { .. } => { - // nothing required - } - - VolumeConstructionRequest::Region { opts, .. } => { - for target in &opts.target { - match target { - SocketAddr::V6(t) => { - if t.ip() == ip { - return true; - } - } - - SocketAddr::V4(_) => {} - } - } - } - - VolumeConstructionRequest::File { .. } => { - // nothing required - } - } - } - - false -} - -/// Check if an ipv6 net is referenced in a Volume Construction Request -fn ipv6_net_referenced_in_vcr( - vcr: &VolumeConstructionRequest, - net: &oxnet::Ipv6Net, -) -> bool { - let mut parts: VecDeque<&VolumeConstructionRequest> = VecDeque::new(); - parts.push_back(vcr); - - while let Some(vcr_part) = parts.pop_front() { - match vcr_part { - VolumeConstructionRequest::Volume { - sub_volumes, - read_only_parent, - .. - } => { - for sub_volume in sub_volumes { - parts.push_back(sub_volume); - } - - if let Some(read_only_parent) = read_only_parent { - parts.push_back(read_only_parent); - } - } - - VolumeConstructionRequest::Url { .. } => { - // nothing required - } - - VolumeConstructionRequest::Region { opts, .. } => { - for target in &opts.target { - match target { - SocketAddr::V6(t) => { - if net.contains(*t.ip()) { - return true; - } - } - - SocketAddr::V4(_) => {} - } - } - } - - VolumeConstructionRequest::File { .. } => { - // nothing required - } - } - } - - false -} - -pub enum VolumeCookedResult { - HardDeleted, - Ok, - RegionSetWithAllExpungedMembers { region_set: Vec }, - MultipleSomeReturned { target: SocketAddrV6 }, - TargetNotFound { target: SocketAddrV6 }, -} - -impl DataStore { - pub async fn find_volumes_referencing_socket_addr( - &self, - opctx: &OpContext, - address: SocketAddr, - ) -> ListResultVec { - opctx.check_complex_operations_allowed()?; - - let mut volumes = Vec::new(); - let mut paginator = Paginator::new( - SQL_BATCH_SIZE, - dropshot::PaginationOrder::Ascending, - ); - let conn = self.pool_connection_authorized(opctx).await?; - - let needle = match address { - SocketAddr::V4(_) => { - return Err(Error::internal_error(&format!( - "find_volumes_referencing_socket_addr not ipv6: {address}" - ))); - } - - SocketAddr::V6(addr) => addr, - }; - - while let Some(p) = paginator.next() { - use nexus_db_schema::schema::volume::dsl; - - let haystack = - paginated(dsl::volume, dsl::id, &p.current_pagparams()) - .select(Volume::as_select()) - .get_results_async::(&*conn) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - })?; + let haystack = + paginated(dsl::volume, dsl::id, &p.current_pagparams()) + .select(model::Volume::as_select()) + .get_results_async::(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; paginator = p.found_batch(&haystack, &|r| *r.id().as_untyped_uuid()); @@ -4009,2120 +3174,453 @@ impl DataStore { volume.id(), ))); } - }; - - let rw_reference = region_in_vcr(&vcr, &needle) - .map_err(|e| Error::internal_error(&e.to_string()))?; - let ro_reference = read_only_target_in_vcr(&vcr, &needle) - .map_err(|e| Error::internal_error(&e.to_string()))?; - - if rw_reference || ro_reference { - volumes.push(volume); - } - } - } - - Ok(volumes) - } - - pub async fn find_volumes_referencing_ipv6_addr( - &self, - opctx: &OpContext, - needle: std::net::Ipv6Addr, - ) -> ListResultVec { - opctx.check_complex_operations_allowed()?; - - let mut volumes = Vec::new(); - let mut paginator = Paginator::new( - SQL_BATCH_SIZE, - dropshot::PaginationOrder::Ascending, - ); - let conn = self.pool_connection_authorized(opctx).await?; - - while let Some(p) = paginator.next() { - use nexus_db_schema::schema::volume::dsl; - - let haystack = - paginated(dsl::volume, dsl::id, &p.current_pagparams()) - .select(Volume::as_select()) - .get_results_async::(&*conn) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - })?; - - paginator = - p.found_batch(&haystack, &|r| *r.id().as_untyped_uuid()); - - for volume in haystack { - let vcr: VolumeConstructionRequest = - match serde_json::from_str(&volume.data()) { - Ok(vcr) => vcr, - Err(e) => { - return Err(Error::internal_error(&format!( - "cannot deserialize volume data for {}: {e}", - volume.id(), - ))); - } - }; - - if ipv6_addr_referenced_in_vcr(&vcr, &needle) { - volumes.push(volume); - } - } - } - - Ok(volumes) - } - - pub async fn find_volumes_referencing_ipv6_net( - &self, - opctx: &OpContext, - needle: oxnet::Ipv6Net, - ) -> ListResultVec { - opctx.check_complex_operations_allowed()?; - - let mut volumes = Vec::new(); - let mut paginator = Paginator::new( - SQL_BATCH_SIZE, - dropshot::PaginationOrder::Ascending, - ); - let conn = self.pool_connection_authorized(opctx).await?; - - while let Some(p) = paginator.next() { - use nexus_db_schema::schema::volume::dsl; - - let haystack = - paginated(dsl::volume, dsl::id, &p.current_pagparams()) - .select(Volume::as_select()) - .get_results_async::(&*conn) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - })?; - - paginator = - p.found_batch(&haystack, &|r| *r.id().as_untyped_uuid()); - - for volume in haystack { - let vcr: VolumeConstructionRequest = - match serde_json::from_str(&volume.data()) { - Ok(vcr) => vcr, - Err(e) => { - return Err(Error::internal_error(&format!( - "cannot deserialize volume data for {}: {e}", - volume.id(), - ))); - } - }; - - if ipv6_net_referenced_in_vcr(&vcr, &needle) { - volumes.push(volume); - } - } - } - - Ok(volumes) - } - - /// Returns Some(bool) depending on if a read-only target exists in a - /// volume, None if the volume was deleted, or an error otherwise. - pub async fn volume_references_read_only_target( - &self, - volume_id: VolumeUuid, - address: SocketAddrV6, - ) -> LookupResult> { - let Some(volume) = self.volume_get(volume_id).await? else { - return Ok(None); - }; - - let vcr: VolumeConstructionRequest = - match serde_json::from_str(&volume.data()) { - Ok(vcr) => vcr, - - Err(e) => { - return Err(Error::internal_error(&format!( - "cannot deserialize volume data for {}: {e}", - volume.id(), - ))); - } - }; - - let reference = - read_only_target_in_vcr(&vcr, &address).map_err(|e| { - Error::internal_error(&format!( - "cannot deserialize volume data for {}: {e}", - volume.id(), - )) - })?; - - Ok(Some(reference)) - } - - pub async fn volume_cooked( - &self, - opctx: &OpContext, - volume_id: VolumeUuid, - ) -> LookupResult { - let Some(volume) = self.volume_get(volume_id).await? else { - return Ok(VolumeCookedResult::HardDeleted); - }; - - let vcr: VolumeConstructionRequest = - match serde_json::from_str(&volume.data()) { - Ok(vcr) => vcr, - - Err(e) => { - return Err(Error::internal_error(&format!( - "cannot deserialize volume data for {}: {e}", - volume.id(), - ))); - } - }; - - let expunged_regions: Vec = vec![ - self.find_read_only_regions_on_expunged_physical_disks(opctx) - .await?, - self.find_read_write_regions_on_expunged_physical_disks(opctx) - .await?, - ] - .into_iter() - .flatten() - .collect(); - - let expunged_region_snapshots: Vec = self - .find_region_snapshots_on_expunged_physical_disks(opctx) - .await?; - - let region_sets = { - let mut result = vec![]; - region_sets(&vcr, &mut result); - result - }; - - let conn = self.pool_connection_authorized(opctx).await?; - - #[derive(PartialEq)] - enum Checked { - Expunged, - Ok, - } - - for region_set in region_sets { - let mut checked_region_set = Vec::with_capacity(region_set.len()); - - for target in ®ion_set { - let maybe_ro_usage = - Self::read_only_target_to_volume_resource_usage( - &conn, &target, - ) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - })?; - - let maybe_region = Self::target_to_region( - &conn, - &target, - RegionType::ReadWrite, - ) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - })?; - - let check = match (maybe_ro_usage, maybe_region) { - (Some(usage), None) => match usage { - VolumeResourceUsage::ReadOnlyRegion { region_id } => { - if expunged_regions - .iter() - .any(|region| region.id() == region_id) - { - Checked::Expunged - } else { - Checked::Ok - } - } - - VolumeResourceUsage::RegionSnapshot { - dataset_id, - region_id, - snapshot_id, - } => { - if expunged_region_snapshots.iter().any( - |region_snapshot| { - region_snapshot.dataset_id - == dataset_id.into() - && region_snapshot.region_id - == region_id - && region_snapshot.snapshot_id - == snapshot_id - }, - ) { - Checked::Expunged - } else { - Checked::Ok - } - } - }, - - (None, Some(region)) => { - let region_id = region.id(); - if expunged_regions - .iter() - .any(|region| region.id() == region_id) - { - Checked::Expunged - } else { - Checked::Ok - } - } - - (Some(_), Some(_)) => { - // This is an error: multiple resources (read/write - // region, read-only region, and/or a region snapshot) - // share the same target addr. - return Ok(VolumeCookedResult::MultipleSomeReturned { - target: *target, - }); - } - - // volume may have been deleted after `volume_get` at - // beginning of function, and before grabbing the expunged - // resources - (None, None) => { - return Ok(VolumeCookedResult::TargetNotFound { - target: *target, - }); - } - }; - - checked_region_set.push(check); - } - - if checked_region_set.iter().all(|x| *x == Checked::Expunged) { - return Ok( - VolumeCookedResult::RegionSetWithAllExpungedMembers { - region_set, - }, - ); - } - } - - Ok(VolumeCookedResult::Ok) - } -} - -// Add some validation that runs only for tests -#[cfg(any(test, feature = "testing"))] -impl DataStore { - fn volume_invariant_violated(msg: String) -> diesel::result::Error { - diesel::result::Error::DatabaseError( - diesel::result::DatabaseErrorKind::CheckViolation, - Box::new(msg), - ) - } - - /// Tests each Volume to see if invariants hold - /// - /// If an invariant is violated, this function returns a `CheckViolation` - /// with the text of what invariant was violated. - pub(crate) async fn validate_volume_invariants( - conn: &async_bb8_diesel::Connection, - ) -> Result<(), diesel::result::Error> { - let mut paginator = Paginator::new( - SQL_BATCH_SIZE, - dropshot::PaginationOrder::Ascending, - ); - - while let Some(p) = paginator.next() { - use nexus_db_schema::schema::volume::dsl; - let haystack = - paginated(dsl::volume, dsl::id, &p.current_pagparams()) - .select(Volume::as_select()) - .get_results_async::(conn) - .await?; - - paginator = - p.found_batch(&haystack, &|v| *v.id().as_untyped_uuid()); - - for volume in haystack { - Self::validate_volume_has_all_resources(&conn, &volume).await?; - Self::validate_volume_region_sets_have_unique_targets(&volume) - .await?; - } - } - - let mut paginator = Paginator::new( - SQL_BATCH_SIZE, - dropshot::PaginationOrder::Ascending, - ); - - while let Some(p) = paginator.next() { - use nexus_db_schema::schema::region::dsl; - let haystack = - paginated(dsl::region, dsl::id, &p.current_pagparams()) - .select(Region::as_select()) - .get_results_async::(conn) - .await?; - - paginator = p.found_batch(&haystack, &|r| r.id()); - - for region in haystack { - Self::validate_read_only_region_has_no_snapshots(&conn, region) - .await?; - } - } - - Ok(()) - } - - /// Assert that the resources that comprise non-deleted volumes have not - /// been prematurely deleted. - async fn validate_volume_has_all_resources( - conn: &async_bb8_diesel::Connection, - volume: &Volume, - ) -> Result<(), diesel::result::Error> { - if volume.time_deleted.is_some() { - // Do not need to validate resources for soft-deleted volumes - return Ok(()); - } - - let vcr: VolumeConstructionRequest = - serde_json::from_str(&volume.data()).unwrap(); - - // validate all read/write resources still exist - - let num_read_write_subvolumes = match count_read_write_sub_volumes(&vcr) - { - Ok(v) => v, - Err(e) => { - return Err(Self::volume_invariant_violated(format!( - "volume {} had error: {e}", - volume.id(), - ))); - } - }; - - let mut read_write_targets = Vec::with_capacity( - REGION_REDUNDANCY_THRESHOLD * num_read_write_subvolumes, - ); - - read_write_resources_associated_with_volume( - &vcr, - &mut read_write_targets, - ); - - for target in read_write_targets { - let target = match target.parse() { - Ok(t) => t, - Err(e) => { - return Err(Self::volume_invariant_violated(format!( - "could not parse {target}: {e}" - ))); - } - }; - - let maybe_region = DataStore::target_to_region( - conn, - &target, - RegionType::ReadWrite, - ) - .await?; - - let Some(_region) = maybe_region else { - return Err(Self::volume_invariant_violated(format!( - "could not find resource for {target}" - ))); - }; - } - - // validate all read-only resources still exist - - let crucible_targets = { - let mut crucible_targets = CrucibleTargets::default(); - read_only_resources_associated_with_volume( - &vcr, - &mut crucible_targets, - ); - crucible_targets - }; - - for read_only_target in &crucible_targets.read_only_targets { - let read_only_target = read_only_target.parse().map_err(|e| { - Self::volume_invariant_violated(format!( - "could not parse {read_only_target}: {e}" - )) - })?; - - let maybe_usage = - DataStore::read_only_target_to_volume_resource_usage( - conn, - &read_only_target, - ) - .await?; - - let Some(_usage) = maybe_usage else { - return Err(Self::volume_invariant_violated(format!( - "could not find resource for {read_only_target}" - ))); - }; - } - - Ok(()) - } - - /// Assert that all the region sets have three distinct targets - async fn validate_volume_region_sets_have_unique_targets( - volume: &Volume, - ) -> Result<(), diesel::result::Error> { - let vcr: VolumeConstructionRequest = - serde_json::from_str(&volume.data()).unwrap(); - - let mut parts = VecDeque::new(); - parts.push_back(&vcr); - - while let Some(part) = parts.pop_front() { - match part { - VolumeConstructionRequest::Volume { - sub_volumes, - read_only_parent, - .. - } => { - for sub_volume in sub_volumes { - parts.push_back(sub_volume); - } - if let Some(read_only_parent) = read_only_parent { - parts.push_back(read_only_parent); - } - } - - VolumeConstructionRequest::Url { .. } => { - // nothing required - } - - VolumeConstructionRequest::Region { opts, .. } => { - let mut set = HashSet::new(); - let mut count = 0; - - for target in &opts.target { - set.insert(target); - count += 1; - } - - if set.len() != count { - return Err(Self::volume_invariant_violated(format!( - "volume {} has a region set with {} unique targets", - volume.id(), - set.len(), - ))); - } - } - - VolumeConstructionRequest::File { .. } => { - // nothing required - } - } - } - - Ok(()) - } - - /// Assert that read-only regions do not have any associated region - /// snapshots (see associated comment in `soft_delete_volume_in_txn`) - async fn validate_read_only_region_has_no_snapshots( - conn: &async_bb8_diesel::Connection, - region: Region, - ) -> Result<(), diesel::result::Error> { - if !region.read_only() { - return Ok(()); - } - - use nexus_db_schema::schema::volume_resource_usage::dsl; - - let matching_usage_records: Vec = - dsl::volume_resource_usage - .filter( - dsl::usage_type.eq(VolumeResourceUsageType::RegionSnapshot), - ) - .filter(dsl::region_snapshot_region_id.eq(region.id())) - .select(VolumeResourceUsageRecord::as_select()) - .get_results_async(conn) - .await? - .into_iter() - .map(|r| r.try_into().unwrap()) - .collect(); - - if !matching_usage_records.is_empty() { - return Err(Self::volume_invariant_violated(format!( - "read-only region {} has matching usage records: {:?}", - region.id(), - matching_usage_records, - ))); - } - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - use crate::db::datastore::REGION_REDUNDANCY_THRESHOLD; - use crate::db::datastore::test::TestDatasets; - use crate::db::pub_test_utils::TestDatabase; - use nexus_config::RegionAllocationStrategy; - use nexus_db_model::SqlU16; - use nexus_types::external_api::disk::DiskSource; - use omicron_common::api::external::ByteCount; - use omicron_test_utils::dev; - use omicron_uuid_kinds::VolumeUuid; - use sled_agent_client::CrucibleOpts; - - // Assert that Nexus will not fail to deserialize an old version of - // CrucibleResources that was serialized before schema update 6.0.0. - #[tokio::test] - async fn test_deserialize_old_crucible_resources() { - let logctx = - dev::test_setup_log("test_deserialize_old_crucible_resources"); - let log = logctx.log.new(o!()); - let db = TestDatabase::new_with_datastore(&log).await; - let datastore = db.datastore(); - - // Start with a fake volume, doesn't matter if it's empty - - let volume_id = VolumeUuid::new_v4(); - let _volume = datastore - .volume_create( - volume_id, - VolumeConstructionRequest::Volume { - id: *volume_id.as_untyped_uuid(), - block_size: 512, - sub_volumes: vec![], - read_only_parent: None, - }, - ) - .await - .unwrap(); - - // Add old CrucibleResources json in the `resources_to_clean_up` column - // - this was before the `deleting` column / field was added to - // ResourceSnapshot. - - { - use nexus_db_schema::schema::volume::dsl; - - let conn = datastore.pool_connection_unauthorized().await.unwrap(); - - let resources_to_clean_up = r#"{ - "V1": { - "datasets_and_regions": [], - "datasets_and_snapshots": [ - [ - { - "identity": { - "id": "844ee8d5-7641-4b04-bca8-7521e258028a", - "time_created": "2023-12-19T21:38:34.000000Z", - "time_modified": "2023-12-19T21:38:34.000000Z" - }, - "time_deleted": null, - "rcgen": 1, - "pool_id": "81a98506-4a97-4d92-8de5-c21f6fc71649", - "ip": "fd00:1122:3344:101::1", - "port": 32345, - "kind": "Crucible", - "size_used": 10737418240 - }, - { - "dataset_id": "b69edd77-1b3e-4f11-978c-194a0a0137d0", - "region_id": "8d668bf9-68cc-4387-8bc0-b4de7ef9744f", - "snapshot_id": "f548332c-6026-4eff-8c1c-ba202cd5c834", - "snapshot_addr": "[fd00:1122:3344:101::2]:19001", - "volume_references": 0 - } - ] - ] - } -} -"#; - - diesel::update(dsl::volume) - .filter(dsl::id.eq(to_db_typed_uuid(volume_id))) - .set(( - dsl::resources_to_clean_up.eq(resources_to_clean_up), - dsl::time_deleted.eq(Utc::now()), - )) - .execute_async(&*conn) - .await - .unwrap(); - } - - // Soft delete the volume - - let cr = datastore.soft_delete_volume(volume_id).await.unwrap(); - - // Assert the contents of the returned CrucibleResources - - let datasets_and_regions = - datastore.regions_to_delete(&cr).await.unwrap(); - let datasets_and_snapshots = - datastore.snapshots_to_delete(&cr).await.unwrap(); - - assert!(datasets_and_regions.is_empty()); - assert_eq!(datasets_and_snapshots.len(), 1); - - let region_snapshot = &datasets_and_snapshots[0].1; - - assert_eq!( - region_snapshot.snapshot_id, - "f548332c-6026-4eff-8c1c-ba202cd5c834".parse::().unwrap() - ); - assert_eq!(region_snapshot.deleting, false); - - db.terminate().await; - logctx.cleanup_successful(); - } - - #[tokio::test] - async fn test_volume_replace_region() { - let logctx = dev::test_setup_log("test_volume_replace_region"); - let log = logctx.log.new(o!()); - let db = TestDatabase::new_with_datastore(&log).await; - let opctx = db.opctx(); - let datastore = db.datastore(); - let conn = datastore.pool_connection_for_tests().await.unwrap(); - - let _test_datasets = TestDatasets::create( - &opctx, - datastore.clone(), - REGION_REDUNDANCY_THRESHOLD, - ) - .await; - - let volume_id = VolumeUuid::new_v4(); - let volume_to_delete_id = VolumeUuid::new_v4(); - - let datasets_and_regions = datastore - .disk_region_allocate( - &opctx, - volume_id, - &DiskSource::Blank { block_size: 512.try_into().unwrap() }, - ByteCount::from_gibibytes_u32(1), - &&RegionAllocationStrategy::RandomWithDistinctSleds { - seed: None, - }, - ) - .await - .unwrap(); - - let mut region_addresses: Vec = - Vec::with_capacity(datasets_and_regions.len()); - - for (i, (_, region)) in datasets_and_regions.iter().enumerate() { - // `disk_region_allocate` won't put any ports in, so add fake ones - // here - use nexus_db_schema::schema::region::dsl; - diesel::update(dsl::region) - .filter(dsl::id.eq(region.id())) - .set(dsl::port.eq(Some::((100 + i as u16).into()))) - .execute_async(&*conn) - .await - .unwrap(); - - let address: SocketAddrV6 = - datastore.region_addr(region.id()).await.unwrap().unwrap(); - - region_addresses.push(address); - } - - // Manually create a replacement region at the first dataset - let replacement_region = { - let (dataset, region) = &datasets_and_regions[0]; - let region = Region::new( - dataset.id(), - volume_to_delete_id, - region.block_size().try_into().unwrap(), - region.blocks_per_extent(), - region.extent_count(), - 111, - false, // read-write - ); - - use nexus_db_schema::schema::region::dsl; - diesel::insert_into(dsl::region) - .values(region.clone()) - .execute_async(&*conn) - .await - .unwrap(); - - region - }; - - let replacement_region_addr: SocketAddrV6 = datastore - .region_addr(replacement_region.id()) - .await - .unwrap() - .unwrap(); - - let _volume = datastore - .volume_create( - volume_id, - VolumeConstructionRequest::Volume { - id: *volume_id.as_untyped_uuid(), - block_size: 512, - sub_volumes: vec![VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: *volume_id.as_untyped_uuid(), - target: vec![ - // target to replace - region_addresses[0].into(), - region_addresses[1].into(), - region_addresses[2].into(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: false, - }, - }], - read_only_parent: None, - }, - ) - .await - .unwrap(); - - // Replace one - - let volume_replace_region_result = datastore - .volume_replace_region( - /* target */ - db::datastore::VolumeReplacementParams { - volume_id, - region_id: datasets_and_regions[0].1.id(), - region_addr: region_addresses[0], - }, - /* replacement */ - db::datastore::VolumeReplacementParams { - volume_id: volume_to_delete_id, - region_id: replacement_region.id(), - region_addr: replacement_region_addr, - }, - ) - .await - .unwrap(); - - assert_eq!(volume_replace_region_result, VolumeReplaceResult::Done); + }; - let vcr: VolumeConstructionRequest = serde_json::from_str( - datastore.volume_get(volume_id).await.unwrap().unwrap().data(), - ) - .unwrap(); + if ipv6_net_referenced_in_vcr(&vcr, &needle) { + volumes.push(volume); + } + } + } - // Ensure the shape of the resulting VCR - assert_eq!( - &vcr, - &VolumeConstructionRequest::Volume { - id: *volume_id.as_untyped_uuid(), - block_size: 512, - sub_volumes: vec![VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 2, // generation number bumped - opts: CrucibleOpts { - id: *volume_id.as_untyped_uuid(), - target: vec![ - replacement_region_addr.into(), // replaced - region_addresses[1].into(), - region_addresses[2].into(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: false, - }, - }], - read_only_parent: None, - }, - ); + Ok(volumes) + } - // Now undo the replacement. Note volume ID is not swapped. - let volume_replace_region_result = datastore - .volume_replace_region( - /* target */ - db::datastore::VolumeReplacementParams { - volume_id, - region_id: replacement_region.id(), - region_addr: replacement_region_addr, - }, - /* replacement */ - db::datastore::VolumeReplacementParams { - volume_id: volume_to_delete_id, - region_id: datasets_and_regions[0].1.id(), - region_addr: region_addresses[0], - }, - ) - .await - .unwrap(); + /// Returns Some(bool) depending on if a read-only target exists in a + /// volume, None if the volume was deleted, or an error otherwise. + pub async fn volume_references_read_only_target( + &self, + volume_id: VolumeUuid, + address: SocketAddrV6, + ) -> LookupResult> { + let Some(volume) = self.volume_get(volume_id).await? else { + return Ok(None); + }; - assert_eq!(volume_replace_region_result, VolumeReplaceResult::Done); + let vcr: VolumeConstructionRequest = + match serde_json::from_str(&volume.data()) { + Ok(vcr) => vcr, - let vcr: VolumeConstructionRequest = serde_json::from_str( - datastore.volume_get(volume_id).await.unwrap().unwrap().data(), - ) - .unwrap(); + Err(e) => { + return Err(Error::internal_error(&format!( + "cannot deserialize volume data for {}: {e}", + volume.id(), + ))); + } + }; - // Ensure the shape of the resulting VCR - assert_eq!( - &vcr, - &VolumeConstructionRequest::Volume { - id: *volume_id.as_untyped_uuid(), - block_size: 512, - sub_volumes: vec![VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 3, // generation number bumped - opts: CrucibleOpts { - id: *volume_id.as_untyped_uuid(), - target: vec![ - region_addresses[0].into(), // back to what it was - region_addresses[1].into(), - region_addresses[2].into(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: false, - }, - }], - read_only_parent: None, - }, - ); + let reference = + read_only_target_in_vcr(&vcr, &address).map_err(|e| { + Error::internal_error(&format!( + "cannot deserialize volume data for {}: {e}", + volume.id(), + )) + })?; - db.terminate().await; - logctx.cleanup_successful(); + Ok(Some(reference)) } - #[tokio::test] - async fn test_volume_replace_snapshot() { - let logctx = dev::test_setup_log("test_volume_replace_snapshot"); - let log = logctx.log.new(o!()); - let db = TestDatabase::new_with_datastore(&log).await; - let opctx = db.opctx(); - let datastore = db.datastore(); - let conn = datastore.pool_connection_for_tests().await.unwrap(); - - let _test_datasets = TestDatasets::create( - &opctx, - datastore.clone(), - REGION_REDUNDANCY_THRESHOLD, - ) - .await; - - let volume_id = VolumeUuid::new_v4(); - let volume_to_delete_id = VolumeUuid::new_v4(); - - let datasets_and_regions = datastore - .disk_region_allocate( - &opctx, - volume_id, - &DiskSource::Blank { block_size: 512.try_into().unwrap() }, - ByteCount::from_gibibytes_u32(1), - &&RegionAllocationStrategy::RandomWithDistinctSleds { - seed: None, - }, - ) - .await - .unwrap(); - - let mut region_addresses: Vec = - Vec::with_capacity(datasets_and_regions.len()); - - for (i, (_, region)) in datasets_and_regions.iter().enumerate() { - // `disk_region_allocate` won't put any ports in, so add fake ones - // here - use nexus_db_schema::schema::region::dsl; - diesel::update(dsl::region) - .filter(dsl::id.eq(region.id())) - .set(dsl::port.eq(Some::((100 + i as u16).into()))) - .execute_async(&*conn) - .await - .unwrap(); + pub async fn volume_cooked( + &self, + opctx: &OpContext, + volume_id: VolumeUuid, + ) -> LookupResult { + let Some(volume) = self.volume_get(volume_id).await? else { + return Ok(VolumeCookedResult::HardDeleted); + }; - let address: SocketAddrV6 = - datastore.region_addr(region.id()).await.unwrap().unwrap(); + let vcr: VolumeConstructionRequest = + match serde_json::from_str(&volume.data()) { + Ok(vcr) => vcr, - region_addresses.push(address); - } + Err(e) => { + return Err(Error::internal_error(&format!( + "cannot deserialize volume data for {}: {e}", + volume.id(), + ))); + } + }; - // Manually create a replacement region at the first dataset - let replacement_region = { - let (dataset, region) = &datasets_and_regions[0]; - let region = Region::new( - dataset.id(), - volume_to_delete_id, - region.block_size().try_into().unwrap(), - region.blocks_per_extent(), - region.extent_count(), - 111, - true, // read-only - ); + let expunged_regions: Vec = vec![ + self.find_read_only_regions_on_expunged_physical_disks(opctx) + .await?, + self.find_read_write_regions_on_expunged_physical_disks(opctx) + .await?, + ] + .into_iter() + .flatten() + .collect(); - use nexus_db_schema::schema::region::dsl; - diesel::insert_into(dsl::region) - .values(region.clone()) - .execute_async(&*conn) - .await - .unwrap(); + let expunged_region_snapshots: Vec = self + .find_region_snapshots_on_expunged_physical_disks(opctx) + .await?; - region + let region_sets = { + let mut result = vec![]; + region_sets(&vcr, &mut result); + result }; - let replacement_region_addr: SocketAddrV6 = datastore - .region_addr(replacement_region.id()) - .await - .unwrap() - .unwrap(); - - // need to add region snapshot objects to satisfy volume create - // transaction's search for resources - - let address_1: SocketAddrV6 = - "[fd00:1122:3344:104::1]:400".parse().unwrap(); - let address_2: SocketAddrV6 = - "[fd00:1122:3344:105::1]:401".parse().unwrap(); - let address_3: SocketAddrV6 = - "[fd00:1122:3344:106::1]:402".parse().unwrap(); - - let region_snapshots = [ - RegionSnapshot::new( - DatasetUuid::new_v4(), - Uuid::new_v4(), - Uuid::new_v4(), - address_1.to_string(), - ), - RegionSnapshot::new( - DatasetUuid::new_v4(), - Uuid::new_v4(), - Uuid::new_v4(), - address_2.to_string(), - ), - RegionSnapshot::new( - DatasetUuid::new_v4(), - Uuid::new_v4(), - Uuid::new_v4(), - address_3.to_string(), - ), - ]; - - datastore - .region_snapshot_create(region_snapshots[0].clone()) - .await - .unwrap(); - datastore - .region_snapshot_create(region_snapshots[1].clone()) - .await - .unwrap(); - datastore - .region_snapshot_create(region_snapshots[2].clone()) - .await - .unwrap(); + let conn = self.pool_connection_authorized(opctx).await?; - // Insert two volumes: one with the target to replace, and one temporary - // "volume to delete" that's blank. Validate the pre-replacement volume - // resource usage records. + #[derive(PartialEq)] + enum Checked { + Expunged, + Ok, + } - let rop_id = Uuid::new_v4(); + for region_set in region_sets { + let mut checked_region_set = Vec::with_capacity(region_set.len()); - datastore - .volume_create( - volume_id, - VolumeConstructionRequest::Volume { - id: *volume_id.as_untyped_uuid(), - block_size: 512, - sub_volumes: vec![VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: *volume_id.as_untyped_uuid(), - target: vec![ - region_addresses[0].into(), - region_addresses[1].into(), - region_addresses[2].into(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: false, - }, - }], - read_only_parent: Some(Box::new( - VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: rop_id, - target: vec![ - // target to replace - address_1.into(), - address_2.into(), - address_3.into(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: true, - }, - }, - )), - }, - ) - .await - .unwrap(); - - for region_snapshot in ®ion_snapshots { - let usage = datastore - .volume_usage_records_for_resource( - VolumeResourceUsage::RegionSnapshot { - dataset_id: region_snapshot.dataset_id(), - region_id: region_snapshot.region_id, - snapshot_id: region_snapshot.snapshot_id, - }, + for target in ®ion_set { + let maybe_ro_usage = + Self::read_only_target_to_volume_resource_usage( + &conn, &target, + ) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + let maybe_region = Self::target_to_region( + &conn, + &target, + RegionType::ReadWrite, ) .await - .unwrap(); - - assert_eq!(usage.len(), 1); - assert_eq!(usage[0].volume_id(), volume_id); - } + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; - datastore - .volume_create( - volume_to_delete_id, - VolumeConstructionRequest::Volume { - id: *volume_to_delete_id.as_untyped_uuid(), - block_size: 512, - sub_volumes: vec![], - read_only_parent: None, - }, - ) - .await - .unwrap(); + let check = match (maybe_ro_usage, maybe_region) { + (Some(usage), None) => match usage { + VolumeResourceUsage::ReadOnlyRegion { region_id } => { + if expunged_regions + .iter() + .any(|region| region.id() == region_id) + { + Checked::Expunged + } else { + Checked::Ok + } + } - // `volume_create` above was called with a blank volume, so no usage - // record will have been created for the read-only region + VolumeResourceUsage::RegionSnapshot { + dataset_id, + region_id, + snapshot_id, + } => { + if expunged_region_snapshots.iter().any( + |region_snapshot| { + region_snapshot.dataset_id + == dataset_id.into() + && region_snapshot.region_id + == region_id + && region_snapshot.snapshot_id + == snapshot_id + }, + ) { + Checked::Expunged + } else { + Checked::Ok + } + } + }, - let usage = datastore - .volume_usage_records_for_resource( - VolumeResourceUsage::ReadOnlyRegion { - region_id: replacement_region.id(), - }, - ) - .await - .unwrap(); + (None, Some(region)) => { + let region_id = region.id(); + if expunged_regions + .iter() + .any(|region| region.id() == region_id) + { + Checked::Expunged + } else { + Checked::Ok + } + } - assert!(usage.is_empty()); + (Some(_), Some(_)) => { + // This is an error: multiple resources (read/write + // region, read-only region, and/or a region snapshot) + // share the same target addr. + return Ok(VolumeCookedResult::MultipleSomeReturned { + target: *target, + }); + } - // Do the replacement + // volume may have been deleted after `volume_get` at + // beginning of function, and before grabbing the expunged + // resources + (None, None) => { + return Ok(VolumeCookedResult::TargetNotFound { + target: *target, + }); + } + }; - let volume_replace_snapshot_result = datastore - .volume_replace_snapshot( - VolumeWithTarget(volume_id), - ExistingTarget(address_1), - ReplacementTarget(replacement_region_addr), - VolumeToDelete(volume_to_delete_id), - ) - .await - .unwrap(); + checked_region_set.push(check); + } - assert_eq!(volume_replace_snapshot_result, VolumeReplaceResult::Done); + if checked_region_set.iter().all(|x| *x == Checked::Expunged) { + return Ok( + VolumeCookedResult::RegionSetWithAllExpungedMembers { + region_set, + }, + ); + } + } - // Ensure the shape of the resulting VCRs + Ok(VolumeCookedResult::Ok) + } +} - let vcr: VolumeConstructionRequest = serde_json::from_str( - datastore.volume_get(volume_id).await.unwrap().unwrap().data(), +// Add some validation that runs only for tests +#[cfg(any(test, feature = "testing"))] +impl DataStore { + fn volume_invariant_violated(msg: String) -> diesel::result::Error { + diesel::result::Error::DatabaseError( + diesel::result::DatabaseErrorKind::CheckViolation, + Box::new(msg), ) - .unwrap(); + } - assert_eq!( - &vcr, - &VolumeConstructionRequest::Volume { - id: *volume_id.as_untyped_uuid(), - block_size: 512, - sub_volumes: vec![VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: *volume_id.as_untyped_uuid(), - target: vec![ - region_addresses[0].into(), - region_addresses[1].into(), - region_addresses[2].into(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: false, - }, - }], - read_only_parent: Some(Box::new( - VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: rop_id, - target: vec![ - // target replaced - replacement_region_addr.into(), - address_2.into(), - address_3.into(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: true, - }, - } - )), - }, + /// Tests each Volume to see if invariants hold + /// + /// If an invariant is violated, this function returns a `CheckViolation` + /// with the text of what invariant was violated. + pub(crate) async fn validate_volume_invariants( + conn: &async_bb8_diesel::Connection, + ) -> Result<(), diesel::result::Error> { + let mut paginator = Paginator::new( + SQL_BATCH_SIZE, + dropshot::PaginationOrder::Ascending, ); - let vcr: VolumeConstructionRequest = serde_json::from_str( - datastore - .volume_get(volume_to_delete_id) - .await - .unwrap() - .unwrap() - .data(), - ) - .unwrap(); - - assert_eq!( - &vcr, - &VolumeConstructionRequest::Volume { - id: *volume_to_delete_id.as_untyped_uuid(), - block_size: 512, - sub_volumes: vec![VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 1, - extent_count: 1, - generation: 1, - opts: CrucibleOpts { - id: *volume_to_delete_id.as_untyped_uuid(), - target: vec![ - // replaced target stashed here - address_1.into(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: true, - }, - }], - read_only_parent: None, - }, - ); + while let Some(p) = paginator.next() { + use nexus_db_schema::schema::volume::dsl; + let haystack = + paginated(dsl::volume, dsl::id, &p.current_pagparams()) + .select(model::Volume::as_select()) + .get_results_async::(conn) + .await?; - // Validate the post-replacement volume resource usage records + paginator = + p.found_batch(&haystack, &|v| *v.id().as_untyped_uuid()); - for (i, region_snapshot) in region_snapshots.iter().enumerate() { - let usage = datastore - .volume_usage_records_for_resource( - VolumeResourceUsage::RegionSnapshot { - dataset_id: region_snapshot.dataset_id(), - region_id: region_snapshot.region_id, - snapshot_id: region_snapshot.snapshot_id, - }, - ) - .await - .unwrap(); + for volume in haystack { + Self::validate_volume_has_all_resources(&conn, &volume).await?; + Self::validate_volume_region_sets_have_unique_targets(&volume) + .await?; + } + } - assert_eq!(usage.len(), 1); + let mut paginator = Paginator::new( + SQL_BATCH_SIZE, + dropshot::PaginationOrder::Ascending, + ); - match i { - 0 => { - assert_eq!(usage[0].volume_id(), volume_to_delete_id); - } + while let Some(p) = paginator.next() { + use nexus_db_schema::schema::region::dsl; + let haystack = + paginated(dsl::region, dsl::id, &p.current_pagparams()) + .select(Region::as_select()) + .get_results_async::(conn) + .await?; - 1 | 2 => { - assert_eq!(usage[0].volume_id(), volume_id); - } + paginator = p.found_batch(&haystack, &|r| r.id()); - _ => panic!("out of range"), + for region in haystack { + Self::validate_read_only_region_has_no_snapshots(&conn, region) + .await?; } } - let usage = datastore - .volume_usage_records_for_resource( - VolumeResourceUsage::ReadOnlyRegion { - region_id: replacement_region.id(), - }, - ) - .await - .unwrap(); - - assert_eq!(usage.len(), 1); - assert_eq!(usage[0].volume_id(), volume_id); + Ok(()) + } - // Now undo the replacement. Note volume ID is not swapped. + /// Assert that the resources that comprise non-deleted volumes have not + /// been prematurely deleted. + async fn validate_volume_has_all_resources( + conn: &async_bb8_diesel::Connection, + volume: &model::Volume, + ) -> Result<(), diesel::result::Error> { + if volume.time_deleted.is_some() { + // Do not need to validate resources for soft-deleted volumes + return Ok(()); + } - let volume_replace_snapshot_result = datastore - .volume_replace_snapshot( - VolumeWithTarget(volume_id), - ExistingTarget(replacement_region_addr), - ReplacementTarget(address_1), - VolumeToDelete(volume_to_delete_id), - ) - .await - .unwrap(); + let vcr: VolumeConstructionRequest = + serde_json::from_str(&volume.data()).unwrap(); - assert_eq!(volume_replace_snapshot_result, VolumeReplaceResult::Done,); + // validate all read/write resources still exist - let vcr: VolumeConstructionRequest = serde_json::from_str( - datastore.volume_get(volume_id).await.unwrap().unwrap().data(), - ) - .unwrap(); + let num_read_write_subvolumes = match count_read_write_sub_volumes(&vcr) + { + Ok(v) => v, + Err(e) => { + return Err(Self::volume_invariant_violated(format!( + "volume {} had error: {e}", + volume.id(), + ))); + } + }; - // Ensure the shape of the resulting VCR - assert_eq!( - &vcr, - &VolumeConstructionRequest::Volume { - id: *volume_id.as_untyped_uuid(), - block_size: 512, - sub_volumes: vec![VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: *volume_id.as_untyped_uuid(), - target: vec![ - region_addresses[0].into(), - region_addresses[1].into(), - region_addresses[2].into(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: false, - }, - }], - read_only_parent: Some(Box::new( - VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: *rop_id.as_untyped_uuid(), - target: vec![ - // back to what it was - address_1.into(), - address_2.into(), - address_3.into(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: true, - }, - } - )), - }, + let mut read_write_targets = Vec::with_capacity( + REGION_REDUNDANCY_THRESHOLD * num_read_write_subvolumes, ); - let vcr: VolumeConstructionRequest = serde_json::from_str( - datastore - .volume_get(volume_to_delete_id) - .await - .unwrap() - .unwrap() - .data(), - ) - .unwrap(); - - assert_eq!( + read_write_resources_associated_with_volume( &vcr, - &VolumeConstructionRequest::Volume { - id: *volume_to_delete_id.as_untyped_uuid(), - block_size: 512, - sub_volumes: vec![VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 1, - extent_count: 1, - generation: 1, - opts: CrucibleOpts { - id: *volume_to_delete_id.as_untyped_uuid(), - target: vec![ - // replacement stashed here - replacement_region_addr.into(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: true, - }, - }], - read_only_parent: None, - }, + &mut read_write_targets, ); - // Validate the post-post-replacement volume resource usage records - - for region_snapshot in ®ion_snapshots { - let usage = datastore - .volume_usage_records_for_resource( - VolumeResourceUsage::RegionSnapshot { - dataset_id: region_snapshot.dataset_id(), - region_id: region_snapshot.region_id, - snapshot_id: region_snapshot.snapshot_id, - }, - ) - .await - .unwrap(); - - assert_eq!(usage.len(), 1); - assert_eq!(usage[0].volume_id(), volume_id); - } + for target in read_write_targets { + let target = match target.parse() { + Ok(t) => t, + Err(e) => { + return Err(Self::volume_invariant_violated(format!( + "could not parse {target}: {e}" + ))); + } + }; - let usage = datastore - .volume_usage_records_for_resource( - VolumeResourceUsage::ReadOnlyRegion { - region_id: replacement_region.id(), - }, + let maybe_region = DataStore::target_to_region( + conn, + &target, + RegionType::ReadWrite, ) - .await - .unwrap(); - - assert_eq!(usage.len(), 1); - assert_eq!(usage[0].volume_id(), volume_to_delete_id); - - db.terminate().await; - logctx.cleanup_successful(); - } - - #[tokio::test] - async fn test_find_volumes_referencing_socket_addr() { - let logctx = - dev::test_setup_log("test_find_volumes_referencing_socket_addr"); - let log = logctx.log.new(o!()); - let db = TestDatabase::new_with_datastore(&log).await; - let (opctx, datastore) = (db.opctx(), db.datastore()); - - let volume_id = VolumeUuid::new_v4(); - - // need to add region snapshot objects to satisfy volume create - // transaction's search for resources - - let address_1: SocketAddrV6 = - "[fd00:1122:3344:104::1]:400".parse().unwrap(); - let address_2: SocketAddrV6 = - "[fd00:1122:3344:105::1]:401".parse().unwrap(); - let address_3: SocketAddrV6 = - "[fd00:1122:3344:106::1]:402".parse().unwrap(); - - datastore - .region_snapshot_create(RegionSnapshot::new( - DatasetUuid::new_v4(), - Uuid::new_v4(), - Uuid::new_v4(), - address_1.to_string(), - )) - .await - .unwrap(); - datastore - .region_snapshot_create(RegionSnapshot::new( - DatasetUuid::new_v4(), - Uuid::new_v4(), - Uuid::new_v4(), - address_2.to_string(), - )) - .await - .unwrap(); - datastore - .region_snapshot_create(RegionSnapshot::new( - DatasetUuid::new_v4(), - Uuid::new_v4(), - Uuid::new_v4(), - address_3.to_string(), - )) - .await - .unwrap(); - - // case where the needle is found + .await?; - datastore - .volume_create( - volume_id, - VolumeConstructionRequest::Volume { - id: *volume_id.as_untyped_uuid(), - block_size: 512, - sub_volumes: vec![], - read_only_parent: Some(Box::new( - VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: Uuid::new_v4(), - target: vec![ - address_1.into(), - address_2.into(), - address_3.into(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: true, - }, - }, - )), - }, - ) - .await - .unwrap(); + let Some(_region) = maybe_region else { + return Err(Self::volume_invariant_violated(format!( + "could not find resource for {target}" + ))); + }; + } - let volumes = datastore - .find_volumes_referencing_socket_addr(&opctx, address_1.into()) - .await - .unwrap(); + // validate all read-only resources still exist - assert_eq!(volumes.len(), 1); - assert_eq!(volumes[0].id(), volume_id); + let crucible_targets = { + let mut crucible_targets = CrucibleTargets::default(); + read_only_resources_associated_with_volume( + &vcr, + &mut crucible_targets, + ); + crucible_targets + }; - // case where the needle is missing + for read_only_target in &crucible_targets.read_only_targets { + let read_only_target = read_only_target.parse().map_err(|e| { + Self::volume_invariant_violated(format!( + "could not parse {read_only_target}: {e}" + )) + })?; - let volumes = datastore - .find_volumes_referencing_socket_addr( - &opctx, - "[fd55:1122:3344:104::1]:400".parse().unwrap(), - ) - .await - .unwrap(); + let maybe_usage = + DataStore::read_only_target_to_volume_resource_usage( + conn, + &read_only_target, + ) + .await?; - assert!(volumes.is_empty()); + let Some(_usage) = maybe_usage else { + return Err(Self::volume_invariant_violated(format!( + "could not find resource for {read_only_target}" + ))); + }; + } - db.terminate().await; - logctx.cleanup_successful(); + Ok(()) } - #[test] - fn test_read_only_target_in_vcr() { - // read_only_target_in_vcr should find read-only targets - - let vcr = VolumeConstructionRequest::Volume { - id: Uuid::new_v4(), - block_size: 512, - sub_volumes: vec![], - read_only_parent: Some(Box::new( - VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: Uuid::new_v4(), - target: vec![ - "[fd00:1122:3344:104::1]:400".parse().unwrap(), - "[fd00:1122:3344:105::1]:401".parse().unwrap(), - "[fd00:1122:3344:106::1]:402".parse().unwrap(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: true, - }, - }, - )), - }; - - assert!( - read_only_target_in_vcr( - &vcr, - &"[fd00:1122:3344:104::1]:400".parse().unwrap(), - ) - .unwrap() - ); - - // read_only_target_in_vcr should _not_ find read-write targets - - let vcr = VolumeConstructionRequest::Volume { - id: Uuid::new_v4(), - block_size: 512, - sub_volumes: vec![VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: Uuid::new_v4(), - target: vec![ - "[fd00:1122:3344:104::1]:400".parse().unwrap(), - "[fd00:1122:3344:105::1]:401".parse().unwrap(), - "[fd00:1122:3344:106::1]:402".parse().unwrap(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: false, - }, - }], - read_only_parent: None, - }; + /// Assert that all the region sets have three distinct targets + async fn validate_volume_region_sets_have_unique_targets( + volume: &model::Volume, + ) -> Result<(), diesel::result::Error> { + let vcr: VolumeConstructionRequest = + serde_json::from_str(&volume.data()).unwrap(); - assert!( - !read_only_target_in_vcr( - &vcr, - &"[fd00:1122:3344:104::1]:400".parse().unwrap(), - ) - .unwrap() - ); + let mut parts = VecDeque::new(); + parts.push_back(&vcr); - // read_only_target_in_vcr should bail on incorrect VCRs (currently it - // only detects a read/write region under a read-only parent) - - let vcr = VolumeConstructionRequest::Volume { - id: Uuid::new_v4(), - block_size: 512, - sub_volumes: vec![], - read_only_parent: Some(Box::new( - VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: Uuid::new_v4(), - target: vec![ - "[fd00:1122:3344:104::1]:400".parse().unwrap(), - "[fd00:1122:3344:105::1]:401".parse().unwrap(), - "[fd00:1122:3344:106::1]:402".parse().unwrap(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: false, // invalid! - }, - }, - )), - }; + while let Some(part) = parts.pop_front() { + match part { + VolumeConstructionRequest::Volume { + sub_volumes, + read_only_parent, + .. + } => { + for sub_volume in sub_volumes { + parts.push_back(sub_volume); + } + if let Some(read_only_parent) = read_only_parent { + parts.push_back(read_only_parent); + } + } - read_only_target_in_vcr( - &vcr, - &"[fd00:1122:3344:104::1]:400".parse().unwrap(), - ) - .unwrap_err(); - } + VolumeConstructionRequest::Url { .. } => { + // nothing required + } - #[test] - fn test_replace_read_only_target_in_vcr() { - // replace_read_only_target_in_vcr should perform a replacement in a - // read-only parent - - let volume_id = Uuid::new_v4(); - - let vcr = VolumeConstructionRequest::Volume { - id: volume_id, - block_size: 512, - sub_volumes: vec![], - read_only_parent: Some(Box::new( - VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: volume_id, - target: vec![ - "[fd00:1122:3344:104::1]:400".parse().unwrap(), - "[fd00:1122:3344:105::1]:401".parse().unwrap(), - "[fd00:1122:3344:106::1]:402".parse().unwrap(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: true, - }, - }, - )), - }; + VolumeConstructionRequest::Region { opts, .. } => { + let mut set = HashSet::new(); + let mut count = 0; - let old_target = - ExistingTarget("[fd00:1122:3344:105::1]:401".parse().unwrap()); - let new_target = - ReplacementTarget("[fd99:1122:3344:105::1]:12345".parse().unwrap()); - - let (new_vcr, replacements) = - replace_read_only_target_in_vcr(&vcr, old_target, new_target) - .unwrap(); - - assert_eq!(replacements, 1); - assert_eq!( - &new_vcr, - &VolumeConstructionRequest::Volume { - id: volume_id, - block_size: 512, - sub_volumes: vec![], - read_only_parent: Some(Box::new( - VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: volume_id, - target: vec![ - "[fd00:1122:3344:104::1]:400".parse().unwrap(), - new_target.0.into(), - "[fd00:1122:3344:106::1]:402".parse().unwrap(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: true, - } + for target in &opts.target { + set.insert(target); + count += 1; } - )) - } - ); - // replace_read_only_target_in_vcr should perform a replacement in a - // read-only parent in a sub-volume - - let vcr = VolumeConstructionRequest::Volume { - id: volume_id, - block_size: 512, - sub_volumes: vec![VolumeConstructionRequest::Volume { - id: volume_id, - block_size: 512, - sub_volumes: vec![VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: volume_id, - target: vec![ - "[fd55:1122:3344:204::1]:1000".parse().unwrap(), - "[fd55:1122:3344:205::1]:1001".parse().unwrap(), - "[fd55:1122:3344:206::1]:1002".parse().unwrap(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: false, - }, - }], - read_only_parent: Some(Box::new( - VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: volume_id, - target: vec![ - "[fd33:1122:3344:304::1]:2000".parse().unwrap(), - "[fd33:1122:3344:305::1]:2001".parse().unwrap(), - "[fd33:1122:3344:306::1]:2002".parse().unwrap(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: true, - }, - }, - )), - }], - read_only_parent: Some(Box::new( - VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: volume_id, - target: vec![ - "[fd00:1122:3344:104::1]:400".parse().unwrap(), - "[fd00:1122:3344:105::1]:401".parse().unwrap(), - "[fd00:1122:3344:106::1]:402".parse().unwrap(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: true, - }, - }, - )), - }; - - let old_target = - ExistingTarget("[fd33:1122:3344:306::1]:2002".parse().unwrap()); - let new_target = - ReplacementTarget("[fd99:1122:3344:105::1]:12345".parse().unwrap()); - - let (new_vcr, replacements) = - replace_read_only_target_in_vcr(&vcr, old_target, new_target) - .unwrap(); - - assert_eq!(replacements, 1); - assert_eq!( - &new_vcr, - &VolumeConstructionRequest::Volume { - id: volume_id, - block_size: 512, - sub_volumes: vec![VolumeConstructionRequest::Volume { - id: volume_id, - block_size: 512, - sub_volumes: vec![VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: volume_id, - target: vec![ - "[fd55:1122:3344:204::1]:1000".parse().unwrap(), - "[fd55:1122:3344:205::1]:1001".parse().unwrap(), - "[fd55:1122:3344:206::1]:1002".parse().unwrap(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: false, - } - }], - read_only_parent: Some(Box::new( - VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: volume_id, - target: vec![ - "[fd33:1122:3344:304::1]:2000" - .parse() - .unwrap(), - "[fd33:1122:3344:305::1]:2001" - .parse() - .unwrap(), - new_target.0.into(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: true, - } - } - )), - }], - read_only_parent: Some(Box::new( - VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: volume_id, - target: vec![ - "[fd00:1122:3344:104::1]:400".parse().unwrap(), - "[fd00:1122:3344:105::1]:401".parse().unwrap(), - "[fd00:1122:3344:106::1]:402".parse().unwrap(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: true, - } + if set.len() != count { + return Err(Self::volume_invariant_violated(format!( + "volume {} has a region set with {} unique targets", + volume.id(), + set.len(), + ))); } - )) - } - ); - - // replace_read_only_target_in_vcr should perform multiple replacements - // if necessary (even if this is dubious!) - the caller will decide if - // this should be legal or not - - let rop = VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: volume_id, - target: vec![ - "[fd33:1122:3344:304::1]:2000".parse().unwrap(), - "[fd33:1122:3344:305::1]:2001".parse().unwrap(), - "[fd33:1122:3344:306::1]:2002".parse().unwrap(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: true, - }, - }; - - let vcr = VolumeConstructionRequest::Volume { - id: volume_id, - block_size: 512, - sub_volumes: vec![VolumeConstructionRequest::Volume { - id: volume_id, - block_size: 512, - sub_volumes: vec![VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: volume_id, - target: vec![ - "[fd55:1122:3344:204::1]:1000".parse().unwrap(), - "[fd55:1122:3344:205::1]:1001".parse().unwrap(), - "[fd55:1122:3344:206::1]:1002".parse().unwrap(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: false, - }, - }], - read_only_parent: Some(Box::new(rop.clone())), - }], - read_only_parent: Some(Box::new(rop)), - }; - - let old_target = - ExistingTarget("[fd33:1122:3344:304::1]:2000".parse().unwrap()); - let new_target = - ReplacementTarget("[fd99:1122:3344:105::1]:12345".parse().unwrap()); - - let (new_vcr, replacements) = - replace_read_only_target_in_vcr(&vcr, old_target, new_target) - .unwrap(); - - assert_eq!(replacements, 2); - - let rop = VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: volume_id, - target: vec![ - new_target.0.into(), - "[fd33:1122:3344:305::1]:2001".parse().unwrap(), - "[fd33:1122:3344:306::1]:2002".parse().unwrap(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: true, - }, - }; + } - assert_eq!( - &new_vcr, - &VolumeConstructionRequest::Volume { - id: volume_id, - block_size: 512, - sub_volumes: vec![VolumeConstructionRequest::Volume { - id: volume_id, - block_size: 512, - sub_volumes: vec![VolumeConstructionRequest::Region { - block_size: 512, - blocks_per_extent: 10, - extent_count: 10, - generation: 1, - opts: CrucibleOpts { - id: volume_id, - target: vec![ - "[fd55:1122:3344:204::1]:1000".parse().unwrap(), - "[fd55:1122:3344:205::1]:1001".parse().unwrap(), - "[fd55:1122:3344:206::1]:1002".parse().unwrap(), - ], - lossy: false, - flush_timeout: None, - key: None, - cert_pem: None, - key_pem: None, - root_cert_pem: None, - control: None, - read_only: false, - } - }], - read_only_parent: Some(Box::new(rop.clone())), - }], - read_only_parent: Some(Box::new(rop)), + VolumeConstructionRequest::File { .. } => { + // nothing required + } } - ); + } + + Ok(()) } - /// Assert that there are no "deleted" r/w regions found when the associated - /// volume hasn't been created yet. - #[tokio::test] - async fn test_no_find_deleted_region_for_no_volume() { - let logctx = - dev::test_setup_log("test_no_find_deleted_region_for_no_volume"); - let log = logctx.log.new(o!()); - let db = TestDatabase::new_with_datastore(&log).await; - let (opctx, datastore) = (db.opctx(), db.datastore()); - - let _test_datasets = TestDatasets::create( - &opctx, - datastore.clone(), - REGION_REDUNDANCY_THRESHOLD, - ) - .await; - - let volume_id = VolumeUuid::new_v4(); - - // Assert that allocating regions without creating the volume does not - // cause them to be returned as "deleted" regions, as this can cause - // sagas that allocate regions to race with the volume delete saga and - // cause premature region deletion. - - let _datasets_and_regions = datastore - .disk_region_allocate( - &opctx, - volume_id, - &DiskSource::Blank { block_size: 512.try_into().unwrap() }, - ByteCount::from_gibibytes_u32(1), - &&RegionAllocationStrategy::RandomWithDistinctSleds { - seed: None, - }, - ) - .await - .unwrap(); + /// Assert that read-only regions do not have any associated region + /// snapshots (see associated comment in `soft_delete_volume_in_txn`) + async fn validate_read_only_region_has_no_snapshots( + conn: &async_bb8_diesel::Connection, + region: Region, + ) -> Result<(), diesel::result::Error> { + if !region.read_only() { + return Ok(()); + } - let deleted_regions = datastore - .find_deleted_volume_regions() - .await - .expect("find_deleted_volume_regions"); + use nexus_db_schema::schema::volume_resource_usage::dsl; + + let matching_usage_records: Vec = + dsl::volume_resource_usage + .filter( + dsl::usage_type.eq(VolumeResourceUsageType::RegionSnapshot), + ) + .filter(dsl::region_snapshot_region_id.eq(region.id())) + .select(VolumeResourceUsageRecord::as_select()) + .get_results_async(conn) + .await? + .into_iter() + .map(|r| r.try_into().unwrap()) + .collect(); - assert!(deleted_regions.is_empty()); + if !matching_usage_records.is_empty() { + return Err(Self::volume_invariant_violated(format!( + "read-only region {} has matching usage records: {:?}", + region.id(), + matching_usage_records, + ))); + } - db.terminate().await; - logctx.cleanup_successful(); + Ok(()) } } diff --git a/nexus/db-queries/src/db/datastore/volume/mod.rs b/nexus/db-queries/src/db/datastore/volume/mod.rs new file mode 100644 index 00000000000..ecacf5e8862 --- /dev/null +++ b/nexus/db-queries/src/db/datastore/volume/mod.rs @@ -0,0 +1,10 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod datastore; +mod replacement; +mod test; + +pub use datastore::*; +pub use replacement::*; diff --git a/nexus/db-queries/src/db/datastore/volume/replacement.rs b/nexus/db-queries/src/db/datastore/volume/replacement.rs new file mode 100644 index 00000000000..106501ca67e --- /dev/null +++ b/nexus/db-queries/src/db/datastore/volume/replacement.rs @@ -0,0 +1,969 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::db::DataStore; +use crate::db::datastore::volume::read_only_target_in_vcr; +use crate::db::datastore::volume::region_in_vcr; +use crate::db::model; +use crate::db::model::VolumeResourceUsageRecord; +use crate::db::model::to_db_typed_uuid; +use anyhow::anyhow; +use anyhow::bail; +use async_bb8_diesel::AsyncRunQueryDsl; +use diesel::OptionalExtension; +use diesel::prelude::*; +use nexus_db_errors::ErrorHandler; +use nexus_db_errors::OptionalError; +use nexus_db_errors::public_error_from_diesel; +use nexus_db_lookup::DbConnection; +use omicron_common::api::external::Error; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::VolumeUuid; +use serde::Deserialize; +use serde::Serialize; +use sled_agent_client::VolumeConstructionRequest; +use std::collections::VecDeque; +use std::net::AddrParseError; +use std::net::SocketAddr; +use std::net::SocketAddrV6; +use uuid::Uuid; + +#[derive(Clone)] +pub struct VolumeReplacementParams { + pub volume_id: VolumeUuid, + pub region_id: Uuid, + pub region_addr: SocketAddrV6, +} + +// types for volume_replace_snapshot and replace_read_only_target_in_vcr +// parameters + +#[derive(Debug, Clone, Copy)] +pub struct VolumeWithTarget(pub VolumeUuid); + +#[derive(Debug, Clone, Copy)] +pub struct ExistingTarget(pub SocketAddrV6); + +#[derive(Debug, Clone, Copy)] +pub struct ReplacementTarget(pub SocketAddrV6); + +#[derive(Debug, Clone, Copy)] +pub struct VolumeToDelete(pub VolumeUuid); + +// The result type returned from both `volume_replace_region` and +// `volume_replace_snapshot` +#[must_use] +#[derive(Debug, Serialize, Deserialize, PartialEq)] +pub enum VolumeReplaceResult { + // based on the VCRs, seems like the replacement already happened + AlreadyHappened, + + // this call performed the replacement + Done, + + // the "existing" volume was soft deleted + ExistingVolumeSoftDeleted, + + // the "existing" volume was hard deleted + ExistingVolumeHardDeleted, +} + +#[derive(Debug, thiserror::Error)] +enum ReplaceRegionError { + #[error("Error from Volume region replacement: {0}")] + Public(Error), + + #[error("Serde error during Volume region replacement: {0}")] + SerdeError(#[from] serde_json::Error), + + #[error("Region replacement error: {0}")] + RegionReplacementError(#[from] anyhow::Error), +} + +#[derive(Debug, thiserror::Error)] +enum ReplaceSnapshotError { + #[error("Error from Volume snapshot replacement: {0}")] + Public(Error), + + #[error("Serde error during Volume snapshot replacement: {0}")] + SerdeError(#[from] serde_json::Error), + + #[error("Snapshot replacement error: {0}")] + SnapshotReplacementError(#[from] anyhow::Error), + + #[error("Replaced {0} targets, expected {1}")] + UnexpectedReplacedTargets(usize, usize), + + #[error("Updated {0} database rows, expected {1}")] + UnexpectedDatabaseUpdate(usize, usize), + + #[error( + "Address parsing error during Volume snapshot \ + replacement: {0}" + )] + AddressParseError(#[from] AddrParseError), + + #[error("Could not match read-only resource to {0}")] + CouldNotFindResource(String), + + #[error("Multiple volume resource usage records for {0}")] + MultipleResourceUsageRecords(String), +} + +impl DataStore { + async fn volume_replace_region_in_txn( + conn: &async_bb8_diesel::Connection, + err: OptionalError, + existing: VolumeReplacementParams, + replacement: VolumeReplacementParams, + ) -> Result { + // In a single transaction: + // + // - set the existing region's volume id to the replacement's volume id + // - set the replacement region's volume id to the existing's volume id + // - update the existing volume's construction request to replace the + // existing region's SocketAddrV6 with the replacement region's + // + // This function's effects can be undone by calling it with swapped + // parameters. + // + // # Example # + // + // Imagine `volume_replace_region` is called with the following, + // pretending that UUIDs are just eight uppercase letters: + // + // let existing = VolumeReplacementParams { + // volume_id: TARGET_VOL, + // region_id: TARGET_REG, + // region_addr: "[fd00:1122:3344:145::10]:40001", + // } + // + // let replace = VolumeReplacementParams { + // volume_id: NEW_VOL, + // region_id: NEW_REG, + // region_addr: "[fd00:1122:3344:322::4]:3956", + // } + // + // In the database, the relevant records (and columns) of the region + // table look like this prior to the transaction: + // + // id | volume_id + // -------------| --------- + // TARGET_REG | TARGET_VOL + // NEW_REG | NEW_VOL + // + // TARGET_VOL has a volume construction request where one of the targets + // list will contain TARGET_REG's address: + // + // { + // "type": "volume", + // "block_size": 512, + // "id": "TARGET_VOL", + // "read_only_parent": { + // ... + // }, + // "sub_volumes": [ + // { + // ... + // "opts": { + // ... + // "target": [ + // "[fd00:1122:3344:103::3]:19004", + // "[fd00:1122:3344:79::12]:27015", + // "[fd00:1122:3344:145::10]:40001" <----- + // ] + // } + // } + // ] + // } + // + // Note it is not required for the replacement volume to exist as a + // database record for this transaction. + // + // The first part of the transaction will swap the volume IDs of the + // existing and replacement region records: + // + // id | volume_id + // ------------| --------- + // TARGET_REG | NEW_VOL + // NEW_REG | TARGET_VOL + // + // The second part of the transaction will update the volume + // construction request of TARGET_VOL by finding and replacing + // TARGET_REG's address (in the appropriate targets array) with + // NEW_REG's address: + // + // { + // ... + // "target": [ + // "[fd00:1122:3344:103::3]:19004", + // "[fd00:1122:3344:79::12]:27015", + // "[fd00:1122:3344:322::4]:3956" <----- + // ] + // ... + // } + // + // After the transaction, the caller should ensure that TARGET_REG is + // referenced (via its socket address) in NEW_VOL. For an example, this + // is done as part of the region replacement start saga. + + // Grab the old volume first + let maybe_old_volume = { + volume_dsl::volume + .filter(volume_dsl::id.eq(to_db_typed_uuid(existing.volume_id))) + .select(model::Volume::as_select()) + .first_async::(conn) + .await + .optional() + .map_err(|e| { + err.bail_retryable_or_else(e, |e| { + ReplaceRegionError::Public(public_error_from_diesel( + e, + ErrorHandler::Server, + )) + }) + })? + }; + + let old_volume = if let Some(old_volume) = maybe_old_volume { + old_volume + } else { + // Existing volume was hard-deleted, so return here. We can't + // perform the region replacement now, and this will short-circuit + // the rest of the process. + + return Ok(VolumeReplaceResult::ExistingVolumeHardDeleted); + }; + + if old_volume.time_deleted.is_some() { + // Existing volume was soft-deleted, so return here for the same + // reason: the region replacement process should be short-circuited + // now. + return Ok(VolumeReplaceResult::ExistingVolumeSoftDeleted); + } + + let old_vcr: VolumeConstructionRequest = + match serde_json::from_str(&old_volume.data()) { + Ok(vcr) => vcr, + Err(e) => { + return Err(err.bail(ReplaceRegionError::SerdeError(e))); + } + }; + + // Does it look like this replacement already happened? + let old_region_in_vcr = + match region_in_vcr(&old_vcr, &existing.region_addr) { + Ok(v) => v, + Err(e) => { + return Err( + err.bail(ReplaceRegionError::RegionReplacementError(e)) + ); + } + }; + let new_region_in_vcr = + match region_in_vcr(&old_vcr, &replacement.region_addr) { + Ok(v) => v, + Err(e) => { + return Err( + err.bail(ReplaceRegionError::RegionReplacementError(e)) + ); + } + }; + + if !old_region_in_vcr && new_region_in_vcr { + // It does seem like the replacement happened - if this function is + // called twice in a row then this can happen. + return Ok(VolumeReplaceResult::AlreadyHappened); + } else if old_region_in_vcr && !new_region_in_vcr { + // The replacement hasn't happened yet, but can proceed + } else if old_region_in_vcr && new_region_in_vcr { + // Both the old region and new region exist in this VCR. Regions are + // not reused, so this is an illegal state: if the replacement of + // the old region occurred, then the new region would be present + // multiple times in the volume. We have to bail out here. + // + // The guards against this happening are: + // + // - only one replacement can occur for a volume at a time (due to + // the volume repair lock), and + // + // - region replacement does not delete the old region until the + // "region replacement finish" saga, which happens at the very end + // of the process. If it eagerly deleted the region, the crucible + // agent would be free to reuse the port for another region + // allocation, and an identical target (read: ip and port) could + // be confusing. Most of the time, we assume that the dataset + // containing that agent has been expunged, so the agent is gone, + // so this port reuse cannot occur + return Err(err.bail(ReplaceRegionError::RegionReplacementError( + anyhow!("old_region_in_vcr && new_region_in_vcr"), + ))); + } else if !old_region_in_vcr && !new_region_in_vcr { + // Neither the region we've been asked to replace or the new region + // is in the VCR. This is an illegal state, as this function would + // be performing a no-op. We have to bail out here. + // + // The guard against this happening is again that only one + // replacement can occur for a volume at a time: if it was possible + // for multiple region replacements to occur, then both would be + // attempting to swap out the same old region for different new + // regions: + // + // region replacement one: + // + // volume_replace_region_in_txn( + // .., + // existing = [fd00:1122:3344:145::10]:40001, + // replacement = [fd00:1122:3344:322::4]:3956, + // ) + // + // region replacement two: + // + // volume_replace_region_in_txn( + // .., + // existing = [fd00:1122:3344:145::10]:40001, + // replacement = [fd00:1122:3344:fd1::123]:27001, + // ) + // + // The one that replaced second would always land in this branch. + return Err(err.bail(ReplaceRegionError::RegionReplacementError( + anyhow!("!old_region_in_vcr && !new_region_in_vcr"), + ))); + } + + use nexus_db_schema::schema::region::dsl as region_dsl; + use nexus_db_schema::schema::volume::dsl as volume_dsl; + + // Set the existing region's volume id to the replacement's volume id + diesel::update(region_dsl::region) + .filter(region_dsl::id.eq(existing.region_id)) + .set( + region_dsl::volume_id + .eq(to_db_typed_uuid(replacement.volume_id)), + ) + .execute_async(conn) + .await + .map_err(|e| { + err.bail_retryable_or_else(e, |e| { + ReplaceRegionError::Public(public_error_from_diesel( + e, + ErrorHandler::Server, + )) + }) + })?; + + // Set the replacement region's volume id to the existing's volume id + diesel::update(region_dsl::region) + .filter(region_dsl::id.eq(replacement.region_id)) + .set(region_dsl::volume_id.eq(to_db_typed_uuid(existing.volume_id))) + .execute_async(conn) + .await + .map_err(|e| { + err.bail_retryable_or_else(e, |e| { + ReplaceRegionError::Public(public_error_from_diesel( + e, + ErrorHandler::Server, + )) + }) + })?; + + // Update the existing volume's construction request to replace the + // existing region's SocketAddrV6 with the replacement region's + + // Copy the old volume's VCR, changing out the old region for the new. + let new_vcr = match replace_region_in_vcr( + &old_vcr, + existing.region_addr, + replacement.region_addr, + ) { + Ok(new_vcr) => new_vcr, + Err(e) => { + return Err( + err.bail(ReplaceRegionError::RegionReplacementError(e)) + ); + } + }; + + let new_volume_data = serde_json::to_string(&new_vcr) + .map_err(|e| err.bail(ReplaceRegionError::SerdeError(e)))?; + + // Update the existing volume's data + diesel::update(volume_dsl::volume) + .filter(volume_dsl::id.eq(to_db_typed_uuid(existing.volume_id))) + .set(volume_dsl::data.eq(new_volume_data)) + .execute_async(conn) + .await + .map_err(|e| { + err.bail_retryable_or_else(e, |e| { + ReplaceRegionError::Public(public_error_from_diesel( + e, + ErrorHandler::Server, + )) + }) + })?; + + // After region replacement, validate invariants for all volumes + #[cfg(any(test, feature = "testing"))] + Self::validate_volume_invariants(conn).await?; + + Ok(VolumeReplaceResult::Done) + } + + /// Replace a read-write region in a Volume with a new region. + pub async fn volume_replace_region( + &self, + existing: VolumeReplacementParams, + replacement: VolumeReplacementParams, + ) -> Result { + let err = OptionalError::new(); + + let conn = self.pool_connection_unauthorized().await?; + self.transaction_retry_wrapper("volume_replace_region") + .transaction(&conn, |conn| { + let err = err.clone(); + let existing = existing.clone(); + let replacement = replacement.clone(); + async move { + Self::volume_replace_region_in_txn( + &conn, + err, + existing, + replacement, + ) + .await + } + }) + .await + .map_err(|e| { + if let Some(err) = err.take() { + match err { + ReplaceRegionError::Public(e) => e, + + ReplaceRegionError::SerdeError(_) => { + Error::internal_error(&err.to_string()) + } + + ReplaceRegionError::RegionReplacementError(_) => { + Error::internal_error(&err.to_string()) + } + } + } else { + public_error_from_diesel(e, ErrorHandler::Server) + } + }) + } + + async fn volume_replace_snapshot_in_txn( + conn: &async_bb8_diesel::Connection, + err: OptionalError, + volume_id: VolumeWithTarget, + existing: ExistingTarget, + replacement: ReplacementTarget, + volume_to_delete_id: VolumeToDelete, + ) -> Result { + use nexus_db_schema::schema::volume::dsl as volume_dsl; + use nexus_db_schema::schema::volume_resource_usage::dsl as ru_dsl; + + // Grab the old volume first + let maybe_old_volume = { + volume_dsl::volume + .filter(volume_dsl::id.eq(to_db_typed_uuid(volume_id.0))) + .select(model::Volume::as_select()) + .first_async::(conn) + .await + .optional() + .map_err(|e| { + err.bail_retryable_or_else(e, |e| { + ReplaceSnapshotError::Public(public_error_from_diesel( + e, + ErrorHandler::Server, + )) + }) + })? + }; + + let old_volume = if let Some(old_volume) = maybe_old_volume { + old_volume + } else { + // Existing volume was hard-deleted, so return here. We can't + // perform the region replacement now, and this will short-circuit + // the rest of the process. + + return Ok(VolumeReplaceResult::ExistingVolumeHardDeleted); + }; + + if old_volume.time_deleted.is_some() { + // Existing volume was soft-deleted, so return here for the same + // reason: the region replacement process should be short-circuited + // now. + return Ok(VolumeReplaceResult::ExistingVolumeSoftDeleted); + } + + let old_vcr: VolumeConstructionRequest = + match serde_json::from_str(&old_volume.data()) { + Ok(vcr) => vcr, + Err(e) => { + return Err(err.bail(ReplaceSnapshotError::SerdeError(e))); + } + }; + + // Does it look like this replacement already happened? + let old_target_in_vcr = + match read_only_target_in_vcr(&old_vcr, &existing.0) { + Ok(v) => v, + Err(e) => { + return Err(err.bail( + ReplaceSnapshotError::SnapshotReplacementError(e), + )); + } + }; + + let new_target_in_vcr = + match read_only_target_in_vcr(&old_vcr, &replacement.0) { + Ok(v) => v, + Err(e) => { + return Err(err.bail( + ReplaceSnapshotError::SnapshotReplacementError(e), + )); + } + }; + + if !old_target_in_vcr && new_target_in_vcr { + // It does seem like the replacement happened + return Ok(VolumeReplaceResult::AlreadyHappened); + } + + // Update the existing volume's construction request to replace the + // existing target's SocketAddrV6 with the replacement target's + + // Copy the old volume's VCR, changing out the old target for the new. + let (new_vcr, replacements) = match replace_read_only_target_in_vcr( + &old_vcr, + existing, + replacement, + ) { + Ok(new_vcr) => new_vcr, + Err(e) => { + return Err( + err.bail(ReplaceSnapshotError::SnapshotReplacementError(e)) + ); + } + }; + + // Expect that this only happened once. If it happened multiple times, + // question everything: how would a snapshot be used twice?! + + if replacements != 1 { + return Err(err.bail( + ReplaceSnapshotError::UnexpectedReplacedTargets( + replacements, + 1, + ), + )); + } + + let new_volume_data = serde_json::to_string(&new_vcr) + .map_err(|e| err.bail(ReplaceSnapshotError::SerdeError(e)))?; + + // Update the existing volume's data + diesel::update(volume_dsl::volume) + .filter(volume_dsl::id.eq(to_db_typed_uuid(volume_id.0))) + .set(volume_dsl::data.eq(new_volume_data)) + .execute_async(conn) + .await + .map_err(|e| { + err.bail_retryable_or_else(e, |e| { + ReplaceSnapshotError::Public(public_error_from_diesel( + e, + ErrorHandler::Server, + )) + }) + })?; + + // Make a new VCR that will stash the target to delete. The values here + // don't matter, just that it gets fed into the volume_delete machinery + // later. + let vcr = VolumeConstructionRequest::Volume { + id: *volume_to_delete_id.0.as_untyped_uuid(), + block_size: 512, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 1, + extent_count: 1, + generation: 1, + opts: sled_agent_client::CrucibleOpts { + id: *volume_to_delete_id.0.as_untyped_uuid(), + target: vec![existing.0.into()], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }], + read_only_parent: None, + }; + + let volume_data = serde_json::to_string(&vcr) + .map_err(|e| err.bail(ReplaceSnapshotError::SerdeError(e)))?; + + // Update the volume to delete data + let num_updated = diesel::update(volume_dsl::volume) + .filter(volume_dsl::id.eq(to_db_typed_uuid(volume_to_delete_id.0))) + .filter(volume_dsl::time_deleted.is_null()) + .set(volume_dsl::data.eq(volume_data)) + .execute_async(conn) + .await?; + + if num_updated != 1 { + return Err(err.bail( + ReplaceSnapshotError::UnexpectedDatabaseUpdate(num_updated, 1), + )); + } + + // Update the appropriate volume resource usage records - it could + // either be a read-only region or a region snapshot, so determine what + // it is first + + let maybe_existing_usage = + Self::read_only_target_to_volume_resource_usage(conn, &existing.0) + .await?; + + let Some(existing_usage) = maybe_existing_usage else { + return Err(err.bail(ReplaceSnapshotError::CouldNotFindResource( + format!("could not find resource for {}", existing.0,), + ))); + }; + + // The "existing" target moved into the volume to delete + + Self::swap_volume_usage_records_for_resources( + conn, + existing_usage, + volume_id.0, + volume_to_delete_id.0, + ) + .await + .map_err(|e| { + err.bail_retryable_or_else(e, |e| { + ReplaceSnapshotError::Public(public_error_from_diesel( + e, + ErrorHandler::Server, + )) + }) + })?; + + let maybe_replacement_usage = + Self::read_only_target_to_volume_resource_usage( + conn, + &replacement.0, + ) + .await?; + + let Some(replacement_usage) = maybe_replacement_usage else { + return Err(err.bail(ReplaceSnapshotError::CouldNotFindResource( + format!("could not find resource for {}", existing.0,), + ))); + }; + + // The intention leaving this transaction is that the correct volume + // resource usage records exist, so: + // + // - if no usage record existed for the replacement usage, then create a + // new record that points to the volume id (this can happen if the + // volume to delete was blank when coming into this function) + // + // - if records exist for the "replacement" usage, then one of those + // will match the volume to delete id, so perform a swap instead to + // the volume id + + let existing_replacement_volume_usage_records = + Self::volume_usage_records_for_resource_query( + replacement_usage.clone(), + ) + .load_async(conn) + .await + .map_err(|e| { + err.bail_retryable_or_else(e, |e| { + ReplaceSnapshotError::Public(public_error_from_diesel( + e, + ErrorHandler::Server, + )) + }) + })? + // TODO be smart enough to .filter the above query + .into_iter() + .filter(|record| record.volume_id == volume_to_delete_id.0.into()) + .count(); + + // The "replacement" target moved into the volume + + if existing_replacement_volume_usage_records == 0 { + // No matching record + let new_record = + VolumeResourceUsageRecord::new(volume_id.0, replacement_usage); + + diesel::insert_into(ru_dsl::volume_resource_usage) + .values(new_record) + .execute_async(conn) + .await + .map_err(|e| { + err.bail_retryable_or_else(e, |e| { + ReplaceSnapshotError::Public(public_error_from_diesel( + e, + ErrorHandler::Server, + )) + }) + })?; + } else if existing_replacement_volume_usage_records == 1 { + // One matching record: perform swap + Self::swap_volume_usage_records_for_resources( + conn, + replacement_usage, + volume_to_delete_id.0, + volume_id.0, + ) + .await + .map_err(|e| { + err.bail_retryable_or_else(e, |e| { + ReplaceSnapshotError::Public(public_error_from_diesel( + e, + ErrorHandler::Server, + )) + }) + })?; + } else { + // More than one matching record! + return Err(err.bail( + ReplaceSnapshotError::MultipleResourceUsageRecords(format!( + "{replacement_usage:?}" + )), + )); + } + + // After region snapshot replacement, validate invariants for all + // volumes + #[cfg(any(test, feature = "testing"))] + Self::validate_volume_invariants(conn).await?; + + Ok(VolumeReplaceResult::Done) + } + + /// Replace a read-only target in a Volume with a new region + /// + /// In a single transaction: + /// + /// - update a volume's serialized construction request by replacing a + /// single target. + /// + /// - stash the replaced target in a "volume to delete"'s serialized + /// construction request + /// + /// Note that this transaction does _not_ update a region snapshot's volume + /// references table! This is legal because the existing target reference is + /// written into the volume to delete's construction request. + /// + /// This function's effects can be undone by calling it with swapped + /// `existing` and `replacement` parameters. + pub async fn volume_replace_snapshot( + &self, + volume_id: VolumeWithTarget, + existing: ExistingTarget, + replacement: ReplacementTarget, + volume_to_delete_id: VolumeToDelete, + ) -> Result { + let err = OptionalError::new(); + + let conn = self.pool_connection_unauthorized().await?; + self.transaction_retry_wrapper("volume_replace_snapshot") + .transaction(&conn, |conn| { + let err = err.clone(); + + async move { + Self::volume_replace_snapshot_in_txn( + &conn, + err, + volume_id, + existing, + replacement, + volume_to_delete_id, + ) + .await + } + }) + .await + .map_err(|e| { + if let Some(err) = err.take() { + match err { + ReplaceSnapshotError::Public(e) => e, + + ReplaceSnapshotError::SerdeError(_) + | ReplaceSnapshotError::SnapshotReplacementError(_) + | ReplaceSnapshotError::UnexpectedReplacedTargets( + _, + _, + ) + | ReplaceSnapshotError::UnexpectedDatabaseUpdate( + _, + _, + ) + | ReplaceSnapshotError::AddressParseError(_) + | ReplaceSnapshotError::CouldNotFindResource(_) + | ReplaceSnapshotError::MultipleResourceUsageRecords( + _, + ) => Error::internal_error(&err.to_string()), + } + } else { + public_error_from_diesel(e, ErrorHandler::Server) + } + }) + } +} + +/// Replace a Region in a VolumeConstructionRequest +/// +/// Note that UUIDs are not randomized by this step: Crucible will reject a +/// `target_replace` call if the replacement VolumeConstructionRequest does not +/// exactly match the original, except for a single Region difference. +/// +/// Note that the generation number _is_ bumped in this step, otherwise +/// `compare_vcr_for_update` will reject the update. +pub fn replace_region_in_vcr( + vcr: &VolumeConstructionRequest, + old_region: SocketAddrV6, + new_region: SocketAddrV6, +) -> anyhow::Result { + let mut new_vcr = vcr.clone(); + + let mut parts: VecDeque<&mut VolumeConstructionRequest> = VecDeque::new(); + parts.push_back(&mut new_vcr); + + let mut old_region_found = false; + + while let Some(vcr_part) = parts.pop_front() { + match vcr_part { + VolumeConstructionRequest::Volume { sub_volumes, .. } => { + for sub_volume in sub_volumes { + parts.push_back(sub_volume); + } + + // Skip looking at read-only parent, this function only replaces + // R/W regions + } + + VolumeConstructionRequest::Url { .. } => { + // nothing required + } + + VolumeConstructionRequest::Region { opts, generation, .. } => { + for target in &mut opts.target { + if let SocketAddr::V6(target) = target { + if *target == old_region { + *target = new_region; + old_region_found = true; + } + } + } + + // Bump generation number, otherwise update will be rejected + *generation = *generation + 1; + } + + VolumeConstructionRequest::File { .. } => { + // nothing required + } + } + } + + if !old_region_found { + bail!("old region {old_region} not found!"); + } + + Ok(new_vcr) +} + +/// Replace a read-only target in a VolumeConstructionRequest +/// +/// Note that UUIDs are not randomized by this step: Crucible will reject a +/// `target_replace` call if the replacement VolumeConstructionRequest does not +/// exactly match the original, except for a single Region difference. +/// +/// Note that the generation number _is not_ bumped in this step. +pub fn replace_read_only_target_in_vcr( + vcr: &VolumeConstructionRequest, + old_target: ExistingTarget, + new_target: ReplacementTarget, +) -> anyhow::Result<(VolumeConstructionRequest, usize)> { + struct Work<'a> { + vcr_part: &'a mut VolumeConstructionRequest, + under_read_only_parent: bool, + } + let mut new_vcr = vcr.clone(); + + let mut parts: VecDeque = VecDeque::new(); + parts.push_back(Work { + vcr_part: &mut new_vcr, + under_read_only_parent: false, + }); + + let mut replacements = 0; + + while let Some(work) = parts.pop_front() { + match work.vcr_part { + VolumeConstructionRequest::Volume { + sub_volumes, + read_only_parent, + .. + } => { + for sub_volume in sub_volumes { + parts.push_back(Work { + vcr_part: sub_volume, + under_read_only_parent: work.under_read_only_parent, + }); + } + + if let Some(read_only_parent) = read_only_parent { + parts.push_back(Work { + vcr_part: read_only_parent, + under_read_only_parent: true, + }); + } + } + + VolumeConstructionRequest::Url { .. } => { + // nothing required + } + + VolumeConstructionRequest::Region { opts, .. } => { + if work.under_read_only_parent && !opts.read_only { + // This VCR isn't constructed properly, there's a read/write + // region under a read-only parent + bail!("read-write region under read-only parent"); + } + + for target in &mut opts.target { + if let SocketAddr::V6(target) = target { + if *target == old_target.0 && opts.read_only { + *target = new_target.0; + replacements += 1; + } + } + } + } + + VolumeConstructionRequest::File { .. } => { + // nothing required + } + } + } + + if replacements == 0 { + bail!("target {old_target:?} not found!"); + } + + Ok((new_vcr, replacements)) +} diff --git a/nexus/db-queries/src/db/datastore/volume/test.rs b/nexus/db-queries/src/db/datastore/volume/test.rs new file mode 100644 index 00000000000..70a44651ead --- /dev/null +++ b/nexus/db-queries/src/db/datastore/volume/test.rs @@ -0,0 +1,1584 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +#[cfg(test)] +mod tests { + use crate::db; + use crate::db::datastore::REGION_REDUNDANCY_THRESHOLD; + use crate::db::datastore::test::TestDatasets; + use crate::db::datastore::volume::ExistingTarget; + use crate::db::datastore::volume::ReplacementTarget; + use crate::db::datastore::volume::VolumeReplaceResult; + use crate::db::datastore::volume::VolumeToDelete; + use crate::db::datastore::volume::VolumeWithTarget; + use crate::db::datastore::volume::read_only_target_in_vcr; + use crate::db::datastore::volume::replace_read_only_target_in_vcr; + use crate::db::pub_test_utils::TestDatabase; + use crate::diesel::ExpressionMethods; + use async_bb8_diesel::AsyncRunQueryDsl; + use chrono::Utc; + use nexus_config::RegionAllocationStrategy; + use nexus_db_model::Region; + use nexus_db_model::RegionSnapshot; + use nexus_db_model::SqlU16; + use nexus_db_model::VolumeResourceUsage; + use nexus_db_model::to_db_typed_uuid; + use nexus_types::external_api::disk::DiskSource; + use nexus_types::identity::Asset; + use omicron_common::api::external::ByteCount; + use omicron_test_utils::dev; + use omicron_uuid_kinds::DatasetUuid; + use omicron_uuid_kinds::GenericUuid; + use omicron_uuid_kinds::VolumeUuid; + use sled_agent_client::CrucibleOpts; + use sled_agent_client::VolumeConstructionRequest; + use std::net::SocketAddrV6; + use uuid::Uuid; + + // Assert that Nexus will not fail to deserialize an old version of + // CrucibleResources that was serialized before schema update 6.0.0. + #[tokio::test] + async fn test_deserialize_old_crucible_resources() { + let logctx = + dev::test_setup_log("test_deserialize_old_crucible_resources"); + let log = logctx.log.new(o!()); + let db = TestDatabase::new_with_datastore(&log).await; + let datastore = db.datastore(); + + // Start with a fake volume, doesn't matter if it's empty + + let volume_id = VolumeUuid::new_v4(); + let _volume = datastore + .volume_create( + volume_id, + VolumeConstructionRequest::Volume { + id: *volume_id.as_untyped_uuid(), + block_size: 512, + sub_volumes: vec![], + read_only_parent: None, + }, + ) + .await + .unwrap(); + + // Add old CrucibleResources json in the `resources_to_clean_up` column + // - this was before the `deleting` column / field was added to + // ResourceSnapshot. + + { + use nexus_db_schema::schema::volume::dsl; + + let conn = datastore.pool_connection_unauthorized().await.unwrap(); + + let resources_to_clean_up = r#"{ + "V1": { + "datasets_and_regions": [], + "datasets_and_snapshots": [ + [ + { + "identity": { + "id": "844ee8d5-7641-4b04-bca8-7521e258028a", + "time_created": "2023-12-19T21:38:34.000000Z", + "time_modified": "2023-12-19T21:38:34.000000Z" + }, + "time_deleted": null, + "rcgen": 1, + "pool_id": "81a98506-4a97-4d92-8de5-c21f6fc71649", + "ip": "fd00:1122:3344:101::1", + "port": 32345, + "kind": "Crucible", + "size_used": 10737418240 + }, + { + "dataset_id": "b69edd77-1b3e-4f11-978c-194a0a0137d0", + "region_id": "8d668bf9-68cc-4387-8bc0-b4de7ef9744f", + "snapshot_id": "f548332c-6026-4eff-8c1c-ba202cd5c834", + "snapshot_addr": "[fd00:1122:3344:101::2]:19001", + "volume_references": 0 + } + ] + ] + } +} +"#; + + diesel::update(dsl::volume) + .filter(dsl::id.eq(to_db_typed_uuid(volume_id))) + .set(( + dsl::resources_to_clean_up.eq(resources_to_clean_up), + dsl::time_deleted.eq(Utc::now()), + )) + .execute_async(&*conn) + .await + .unwrap(); + } + + // Soft delete the volume + + let cr = datastore.soft_delete_volume(volume_id).await.unwrap(); + + // Assert the contents of the returned CrucibleResources + + let datasets_and_regions = + datastore.regions_to_delete(&cr).await.unwrap(); + let datasets_and_snapshots = + datastore.snapshots_to_delete(&cr).await.unwrap(); + + assert!(datasets_and_regions.is_empty()); + assert_eq!(datasets_and_snapshots.len(), 1); + + let region_snapshot = &datasets_and_snapshots[0].1; + + assert_eq!( + region_snapshot.snapshot_id, + "f548332c-6026-4eff-8c1c-ba202cd5c834".parse::().unwrap() + ); + assert_eq!(region_snapshot.deleting, false); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_volume_replace_region() { + let logctx = dev::test_setup_log("test_volume_replace_region"); + let log = logctx.log.new(o!()); + let db = TestDatabase::new_with_datastore(&log).await; + let opctx = db.opctx(); + let datastore = db.datastore(); + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + let _test_datasets = TestDatasets::create( + &opctx, + datastore.clone(), + REGION_REDUNDANCY_THRESHOLD, + ) + .await; + + let volume_id = VolumeUuid::new_v4(); + let volume_to_delete_id = VolumeUuid::new_v4(); + + let datasets_and_regions = datastore + .disk_region_allocate( + &opctx, + volume_id, + &DiskSource::Blank { block_size: 512.try_into().unwrap() }, + ByteCount::from_gibibytes_u32(1), + &&RegionAllocationStrategy::RandomWithDistinctSleds { + seed: None, + }, + ) + .await + .unwrap(); + + let mut region_addresses: Vec = + Vec::with_capacity(datasets_and_regions.len()); + + for (i, (_, region)) in datasets_and_regions.iter().enumerate() { + // `disk_region_allocate` won't put any ports in, so add fake ones + // here + use nexus_db_schema::schema::region::dsl; + diesel::update(dsl::region) + .filter(dsl::id.eq(region.id())) + .set(dsl::port.eq(Some::((100 + i as u16).into()))) + .execute_async(&*conn) + .await + .unwrap(); + + let address: SocketAddrV6 = + datastore.region_addr(region.id()).await.unwrap().unwrap(); + + region_addresses.push(address); + } + + // Manually create a replacement region at the first dataset + let replacement_region = { + let (dataset, region) = &datasets_and_regions[0]; + let region = Region::new( + dataset.id(), + volume_to_delete_id, + region.block_size().try_into().unwrap(), + region.blocks_per_extent(), + region.extent_count(), + 111, + false, // read-write + ); + + use nexus_db_schema::schema::region::dsl; + diesel::insert_into(dsl::region) + .values(region.clone()) + .execute_async(&*conn) + .await + .unwrap(); + + region + }; + + let replacement_region_addr: SocketAddrV6 = datastore + .region_addr(replacement_region.id()) + .await + .unwrap() + .unwrap(); + + let _volume = datastore + .volume_create( + volume_id, + VolumeConstructionRequest::Volume { + id: *volume_id.as_untyped_uuid(), + block_size: 512, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: *volume_id.as_untyped_uuid(), + target: vec![ + // target to replace + region_addresses[0].into(), + region_addresses[1].into(), + region_addresses[2].into(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: false, + }, + }], + read_only_parent: None, + }, + ) + .await + .unwrap(); + + // Replace one + + let volume_replace_region_result = datastore + .volume_replace_region( + /* target */ + db::datastore::volume::VolumeReplacementParams { + volume_id, + region_id: datasets_and_regions[0].1.id(), + region_addr: region_addresses[0], + }, + /* replacement */ + db::datastore::volume::VolumeReplacementParams { + volume_id: volume_to_delete_id, + region_id: replacement_region.id(), + region_addr: replacement_region_addr, + }, + ) + .await + .unwrap(); + + assert_eq!(volume_replace_region_result, VolumeReplaceResult::Done); + + let vcr: VolumeConstructionRequest = serde_json::from_str( + datastore.volume_get(volume_id).await.unwrap().unwrap().data(), + ) + .unwrap(); + + // Ensure the shape of the resulting VCR + assert_eq!( + &vcr, + &VolumeConstructionRequest::Volume { + id: *volume_id.as_untyped_uuid(), + block_size: 512, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 2, // generation number bumped + opts: CrucibleOpts { + id: *volume_id.as_untyped_uuid(), + target: vec![ + replacement_region_addr.into(), // replaced + region_addresses[1].into(), + region_addresses[2].into(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: false, + }, + }], + read_only_parent: None, + }, + ); + + // Now undo the replacement. Note volume ID is not swapped. + let volume_replace_region_result = datastore + .volume_replace_region( + /* target */ + db::datastore::volume::VolumeReplacementParams { + volume_id, + region_id: replacement_region.id(), + region_addr: replacement_region_addr, + }, + /* replacement */ + db::datastore::volume::VolumeReplacementParams { + volume_id: volume_to_delete_id, + region_id: datasets_and_regions[0].1.id(), + region_addr: region_addresses[0], + }, + ) + .await + .unwrap(); + + assert_eq!(volume_replace_region_result, VolumeReplaceResult::Done); + + let vcr: VolumeConstructionRequest = serde_json::from_str( + datastore.volume_get(volume_id).await.unwrap().unwrap().data(), + ) + .unwrap(); + + // Ensure the shape of the resulting VCR + assert_eq!( + &vcr, + &VolumeConstructionRequest::Volume { + id: *volume_id.as_untyped_uuid(), + block_size: 512, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 3, // generation number bumped + opts: CrucibleOpts { + id: *volume_id.as_untyped_uuid(), + target: vec![ + region_addresses[0].into(), // back to what it was + region_addresses[1].into(), + region_addresses[2].into(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: false, + }, + }], + read_only_parent: None, + }, + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_volume_replace_snapshot() { + let logctx = dev::test_setup_log("test_volume_replace_snapshot"); + let log = logctx.log.new(o!()); + let db = TestDatabase::new_with_datastore(&log).await; + let opctx = db.opctx(); + let datastore = db.datastore(); + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + let _test_datasets = TestDatasets::create( + &opctx, + datastore.clone(), + REGION_REDUNDANCY_THRESHOLD, + ) + .await; + + let volume_id = VolumeUuid::new_v4(); + let volume_to_delete_id = VolumeUuid::new_v4(); + + let datasets_and_regions = datastore + .disk_region_allocate( + &opctx, + volume_id, + &DiskSource::Blank { block_size: 512.try_into().unwrap() }, + ByteCount::from_gibibytes_u32(1), + &&RegionAllocationStrategy::RandomWithDistinctSleds { + seed: None, + }, + ) + .await + .unwrap(); + + let mut region_addresses: Vec = + Vec::with_capacity(datasets_and_regions.len()); + + for (i, (_, region)) in datasets_and_regions.iter().enumerate() { + // `disk_region_allocate` won't put any ports in, so add fake ones + // here + use nexus_db_schema::schema::region::dsl; + diesel::update(dsl::region) + .filter(dsl::id.eq(region.id())) + .set(dsl::port.eq(Some::((100 + i as u16).into()))) + .execute_async(&*conn) + .await + .unwrap(); + + let address: SocketAddrV6 = + datastore.region_addr(region.id()).await.unwrap().unwrap(); + + region_addresses.push(address); + } + + // Manually create a replacement region at the first dataset + let replacement_region = { + let (dataset, region) = &datasets_and_regions[0]; + let region = Region::new( + dataset.id(), + volume_to_delete_id, + region.block_size().try_into().unwrap(), + region.blocks_per_extent(), + region.extent_count(), + 111, + true, // read-only + ); + + use nexus_db_schema::schema::region::dsl; + diesel::insert_into(dsl::region) + .values(region.clone()) + .execute_async(&*conn) + .await + .unwrap(); + + region + }; + + let replacement_region_addr: SocketAddrV6 = datastore + .region_addr(replacement_region.id()) + .await + .unwrap() + .unwrap(); + + // need to add region snapshot objects to satisfy volume create + // transaction's search for resources + + let address_1: SocketAddrV6 = + "[fd00:1122:3344:104::1]:400".parse().unwrap(); + let address_2: SocketAddrV6 = + "[fd00:1122:3344:105::1]:401".parse().unwrap(); + let address_3: SocketAddrV6 = + "[fd00:1122:3344:106::1]:402".parse().unwrap(); + + let region_snapshots = [ + RegionSnapshot::new( + DatasetUuid::new_v4(), + Uuid::new_v4(), + Uuid::new_v4(), + address_1.to_string(), + ), + RegionSnapshot::new( + DatasetUuid::new_v4(), + Uuid::new_v4(), + Uuid::new_v4(), + address_2.to_string(), + ), + RegionSnapshot::new( + DatasetUuid::new_v4(), + Uuid::new_v4(), + Uuid::new_v4(), + address_3.to_string(), + ), + ]; + + datastore + .region_snapshot_create(region_snapshots[0].clone()) + .await + .unwrap(); + datastore + .region_snapshot_create(region_snapshots[1].clone()) + .await + .unwrap(); + datastore + .region_snapshot_create(region_snapshots[2].clone()) + .await + .unwrap(); + + // Insert two volumes: one with the target to replace, and one temporary + // "volume to delete" that's blank. Validate the pre-replacement volume + // resource usage records. + + let rop_id = Uuid::new_v4(); + + datastore + .volume_create( + volume_id, + VolumeConstructionRequest::Volume { + id: *volume_id.as_untyped_uuid(), + block_size: 512, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: *volume_id.as_untyped_uuid(), + target: vec![ + region_addresses[0].into(), + region_addresses[1].into(), + region_addresses[2].into(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: false, + }, + }], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: rop_id, + target: vec![ + // target to replace + address_1.into(), + address_2.into(), + address_3.into(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }, + )), + }, + ) + .await + .unwrap(); + + for region_snapshot in ®ion_snapshots { + let usage = datastore + .volume_usage_records_for_resource( + VolumeResourceUsage::RegionSnapshot { + dataset_id: region_snapshot.dataset_id(), + region_id: region_snapshot.region_id, + snapshot_id: region_snapshot.snapshot_id, + }, + ) + .await + .unwrap(); + + assert_eq!(usage.len(), 1); + assert_eq!(usage[0].volume_id(), volume_id); + } + + datastore + .volume_create( + volume_to_delete_id, + VolumeConstructionRequest::Volume { + id: *volume_to_delete_id.as_untyped_uuid(), + block_size: 512, + sub_volumes: vec![], + read_only_parent: None, + }, + ) + .await + .unwrap(); + + // `volume_create` above was called with a blank volume, so no usage + // record will have been created for the read-only region + + let usage = datastore + .volume_usage_records_for_resource( + VolumeResourceUsage::ReadOnlyRegion { + region_id: replacement_region.id(), + }, + ) + .await + .unwrap(); + + assert!(usage.is_empty()); + + // Do the replacement + + let volume_replace_snapshot_result = datastore + .volume_replace_snapshot( + VolumeWithTarget(volume_id), + ExistingTarget(address_1), + ReplacementTarget(replacement_region_addr), + VolumeToDelete(volume_to_delete_id), + ) + .await + .unwrap(); + + assert_eq!(volume_replace_snapshot_result, VolumeReplaceResult::Done); + + // Ensure the shape of the resulting VCRs + + let vcr: VolumeConstructionRequest = serde_json::from_str( + datastore.volume_get(volume_id).await.unwrap().unwrap().data(), + ) + .unwrap(); + + assert_eq!( + &vcr, + &VolumeConstructionRequest::Volume { + id: *volume_id.as_untyped_uuid(), + block_size: 512, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: *volume_id.as_untyped_uuid(), + target: vec![ + region_addresses[0].into(), + region_addresses[1].into(), + region_addresses[2].into(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: false, + }, + }], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: rop_id, + target: vec![ + // target replaced + replacement_region_addr.into(), + address_2.into(), + address_3.into(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + } + )), + }, + ); + + let vcr: VolumeConstructionRequest = serde_json::from_str( + datastore + .volume_get(volume_to_delete_id) + .await + .unwrap() + .unwrap() + .data(), + ) + .unwrap(); + + assert_eq!( + &vcr, + &VolumeConstructionRequest::Volume { + id: *volume_to_delete_id.as_untyped_uuid(), + block_size: 512, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 1, + extent_count: 1, + generation: 1, + opts: CrucibleOpts { + id: *volume_to_delete_id.as_untyped_uuid(), + target: vec![ + // replaced target stashed here + address_1.into(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }], + read_only_parent: None, + }, + ); + + // Validate the post-replacement volume resource usage records + + for (i, region_snapshot) in region_snapshots.iter().enumerate() { + let usage = datastore + .volume_usage_records_for_resource( + VolumeResourceUsage::RegionSnapshot { + dataset_id: region_snapshot.dataset_id(), + region_id: region_snapshot.region_id, + snapshot_id: region_snapshot.snapshot_id, + }, + ) + .await + .unwrap(); + + assert_eq!(usage.len(), 1); + + match i { + 0 => { + assert_eq!(usage[0].volume_id(), volume_to_delete_id); + } + + 1 | 2 => { + assert_eq!(usage[0].volume_id(), volume_id); + } + + _ => panic!("out of range"), + } + } + + let usage = datastore + .volume_usage_records_for_resource( + VolumeResourceUsage::ReadOnlyRegion { + region_id: replacement_region.id(), + }, + ) + .await + .unwrap(); + + assert_eq!(usage.len(), 1); + assert_eq!(usage[0].volume_id(), volume_id); + + // Now undo the replacement. Note volume ID is not swapped. + + let volume_replace_snapshot_result = datastore + .volume_replace_snapshot( + VolumeWithTarget(volume_id), + ExistingTarget(replacement_region_addr), + ReplacementTarget(address_1), + VolumeToDelete(volume_to_delete_id), + ) + .await + .unwrap(); + + assert_eq!(volume_replace_snapshot_result, VolumeReplaceResult::Done,); + + let vcr: VolumeConstructionRequest = serde_json::from_str( + datastore.volume_get(volume_id).await.unwrap().unwrap().data(), + ) + .unwrap(); + + // Ensure the shape of the resulting VCR + assert_eq!( + &vcr, + &VolumeConstructionRequest::Volume { + id: *volume_id.as_untyped_uuid(), + block_size: 512, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: *volume_id.as_untyped_uuid(), + target: vec![ + region_addresses[0].into(), + region_addresses[1].into(), + region_addresses[2].into(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: false, + }, + }], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: *rop_id.as_untyped_uuid(), + target: vec![ + // back to what it was + address_1.into(), + address_2.into(), + address_3.into(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + } + )), + }, + ); + + let vcr: VolumeConstructionRequest = serde_json::from_str( + datastore + .volume_get(volume_to_delete_id) + .await + .unwrap() + .unwrap() + .data(), + ) + .unwrap(); + + assert_eq!( + &vcr, + &VolumeConstructionRequest::Volume { + id: *volume_to_delete_id.as_untyped_uuid(), + block_size: 512, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 1, + extent_count: 1, + generation: 1, + opts: CrucibleOpts { + id: *volume_to_delete_id.as_untyped_uuid(), + target: vec![ + // replacement stashed here + replacement_region_addr.into(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }], + read_only_parent: None, + }, + ); + + // Validate the post-post-replacement volume resource usage records + + for region_snapshot in ®ion_snapshots { + let usage = datastore + .volume_usage_records_for_resource( + VolumeResourceUsage::RegionSnapshot { + dataset_id: region_snapshot.dataset_id(), + region_id: region_snapshot.region_id, + snapshot_id: region_snapshot.snapshot_id, + }, + ) + .await + .unwrap(); + + assert_eq!(usage.len(), 1); + assert_eq!(usage[0].volume_id(), volume_id); + } + + let usage = datastore + .volume_usage_records_for_resource( + VolumeResourceUsage::ReadOnlyRegion { + region_id: replacement_region.id(), + }, + ) + .await + .unwrap(); + + assert_eq!(usage.len(), 1); + assert_eq!(usage[0].volume_id(), volume_to_delete_id); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_find_volumes_referencing_socket_addr() { + let logctx = + dev::test_setup_log("test_find_volumes_referencing_socket_addr"); + let log = logctx.log.new(o!()); + let db = TestDatabase::new_with_datastore(&log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let volume_id = VolumeUuid::new_v4(); + + // need to add region snapshot objects to satisfy volume create + // transaction's search for resources + + let address_1: SocketAddrV6 = + "[fd00:1122:3344:104::1]:400".parse().unwrap(); + let address_2: SocketAddrV6 = + "[fd00:1122:3344:105::1]:401".parse().unwrap(); + let address_3: SocketAddrV6 = + "[fd00:1122:3344:106::1]:402".parse().unwrap(); + + datastore + .region_snapshot_create(RegionSnapshot::new( + DatasetUuid::new_v4(), + Uuid::new_v4(), + Uuid::new_v4(), + address_1.to_string(), + )) + .await + .unwrap(); + datastore + .region_snapshot_create(RegionSnapshot::new( + DatasetUuid::new_v4(), + Uuid::new_v4(), + Uuid::new_v4(), + address_2.to_string(), + )) + .await + .unwrap(); + datastore + .region_snapshot_create(RegionSnapshot::new( + DatasetUuid::new_v4(), + Uuid::new_v4(), + Uuid::new_v4(), + address_3.to_string(), + )) + .await + .unwrap(); + + // case where the needle is found + + datastore + .volume_create( + volume_id, + VolumeConstructionRequest::Volume { + id: *volume_id.as_untyped_uuid(), + block_size: 512, + sub_volumes: vec![], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: Uuid::new_v4(), + target: vec![ + address_1.into(), + address_2.into(), + address_3.into(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }, + )), + }, + ) + .await + .unwrap(); + + let volumes = datastore + .find_volumes_referencing_socket_addr(&opctx, address_1.into()) + .await + .unwrap(); + + assert_eq!(volumes.len(), 1); + assert_eq!(volumes[0].id(), volume_id); + + // case where the needle is missing + + let volumes = datastore + .find_volumes_referencing_socket_addr( + &opctx, + "[fd55:1122:3344:104::1]:400".parse().unwrap(), + ) + .await + .unwrap(); + + assert!(volumes.is_empty()); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[test] + fn test_read_only_target_in_vcr() { + // read_only_target_in_vcr should find read-only targets + + let vcr = VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), + block_size: 512, + sub_volumes: vec![], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: Uuid::new_v4(), + target: vec![ + "[fd00:1122:3344:104::1]:400".parse().unwrap(), + "[fd00:1122:3344:105::1]:401".parse().unwrap(), + "[fd00:1122:3344:106::1]:402".parse().unwrap(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }, + )), + }; + + assert!( + read_only_target_in_vcr( + &vcr, + &"[fd00:1122:3344:104::1]:400".parse().unwrap(), + ) + .unwrap() + ); + + // read_only_target_in_vcr should _not_ find read-write targets + + let vcr = VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), + block_size: 512, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: Uuid::new_v4(), + target: vec![ + "[fd00:1122:3344:104::1]:400".parse().unwrap(), + "[fd00:1122:3344:105::1]:401".parse().unwrap(), + "[fd00:1122:3344:106::1]:402".parse().unwrap(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: false, + }, + }], + read_only_parent: None, + }; + + assert!( + !read_only_target_in_vcr( + &vcr, + &"[fd00:1122:3344:104::1]:400".parse().unwrap(), + ) + .unwrap() + ); + + // read_only_target_in_vcr should bail on incorrect VCRs (currently it + // only detects a read/write region under a read-only parent) + + let vcr = VolumeConstructionRequest::Volume { + id: Uuid::new_v4(), + block_size: 512, + sub_volumes: vec![], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: Uuid::new_v4(), + target: vec![ + "[fd00:1122:3344:104::1]:400".parse().unwrap(), + "[fd00:1122:3344:105::1]:401".parse().unwrap(), + "[fd00:1122:3344:106::1]:402".parse().unwrap(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: false, // invalid! + }, + }, + )), + }; + + read_only_target_in_vcr( + &vcr, + &"[fd00:1122:3344:104::1]:400".parse().unwrap(), + ) + .unwrap_err(); + } + + #[test] + fn test_replace_read_only_target_in_vcr() { + // replace_read_only_target_in_vcr should perform a replacement in a + // read-only parent + + let volume_id = Uuid::new_v4(); + + let vcr = VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: volume_id, + target: vec![ + "[fd00:1122:3344:104::1]:400".parse().unwrap(), + "[fd00:1122:3344:105::1]:401".parse().unwrap(), + "[fd00:1122:3344:106::1]:402".parse().unwrap(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }, + )), + }; + + let old_target = + ExistingTarget("[fd00:1122:3344:105::1]:401".parse().unwrap()); + let new_target = + ReplacementTarget("[fd99:1122:3344:105::1]:12345".parse().unwrap()); + + let (new_vcr, replacements) = + replace_read_only_target_in_vcr(&vcr, old_target, new_target) + .unwrap(); + + assert_eq!(replacements, 1); + assert_eq!( + &new_vcr, + &VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: volume_id, + target: vec![ + "[fd00:1122:3344:104::1]:400".parse().unwrap(), + new_target.0.into(), + "[fd00:1122:3344:106::1]:402".parse().unwrap(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + } + } + )) + } + ); + + // replace_read_only_target_in_vcr should perform a replacement in a + // read-only parent in a sub-volume + + let vcr = VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: volume_id, + target: vec![ + "[fd55:1122:3344:204::1]:1000".parse().unwrap(), + "[fd55:1122:3344:205::1]:1001".parse().unwrap(), + "[fd55:1122:3344:206::1]:1002".parse().unwrap(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: false, + }, + }], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: volume_id, + target: vec![ + "[fd33:1122:3344:304::1]:2000".parse().unwrap(), + "[fd33:1122:3344:305::1]:2001".parse().unwrap(), + "[fd33:1122:3344:306::1]:2002".parse().unwrap(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }, + )), + }], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: volume_id, + target: vec![ + "[fd00:1122:3344:104::1]:400".parse().unwrap(), + "[fd00:1122:3344:105::1]:401".parse().unwrap(), + "[fd00:1122:3344:106::1]:402".parse().unwrap(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }, + )), + }; + + let old_target = + ExistingTarget("[fd33:1122:3344:306::1]:2002".parse().unwrap()); + let new_target = + ReplacementTarget("[fd99:1122:3344:105::1]:12345".parse().unwrap()); + + let (new_vcr, replacements) = + replace_read_only_target_in_vcr(&vcr, old_target, new_target) + .unwrap(); + + assert_eq!(replacements, 1); + assert_eq!( + &new_vcr, + &VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: volume_id, + target: vec![ + "[fd55:1122:3344:204::1]:1000".parse().unwrap(), + "[fd55:1122:3344:205::1]:1001".parse().unwrap(), + "[fd55:1122:3344:206::1]:1002".parse().unwrap(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: false, + } + }], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: volume_id, + target: vec![ + "[fd33:1122:3344:304::1]:2000" + .parse() + .unwrap(), + "[fd33:1122:3344:305::1]:2001" + .parse() + .unwrap(), + new_target.0.into(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + } + } + )), + }], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: volume_id, + target: vec![ + "[fd00:1122:3344:104::1]:400".parse().unwrap(), + "[fd00:1122:3344:105::1]:401".parse().unwrap(), + "[fd00:1122:3344:106::1]:402".parse().unwrap(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + } + } + )) + } + ); + + // replace_read_only_target_in_vcr should perform multiple replacements + // if necessary (even if this is dubious!) - the caller will decide if + // this should be legal or not + + let rop = VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: volume_id, + target: vec![ + "[fd33:1122:3344:304::1]:2000".parse().unwrap(), + "[fd33:1122:3344:305::1]:2001".parse().unwrap(), + "[fd33:1122:3344:306::1]:2002".parse().unwrap(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }; + + let vcr = VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: volume_id, + target: vec![ + "[fd55:1122:3344:204::1]:1000".parse().unwrap(), + "[fd55:1122:3344:205::1]:1001".parse().unwrap(), + "[fd55:1122:3344:206::1]:1002".parse().unwrap(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: false, + }, + }], + read_only_parent: Some(Box::new(rop.clone())), + }], + read_only_parent: Some(Box::new(rop)), + }; + + let old_target = + ExistingTarget("[fd33:1122:3344:304::1]:2000".parse().unwrap()); + let new_target = + ReplacementTarget("[fd99:1122:3344:105::1]:12345".parse().unwrap()); + + let (new_vcr, replacements) = + replace_read_only_target_in_vcr(&vcr, old_target, new_target) + .unwrap(); + + assert_eq!(replacements, 2); + + let rop = VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: volume_id, + target: vec![ + new_target.0.into(), + "[fd33:1122:3344:305::1]:2001".parse().unwrap(), + "[fd33:1122:3344:306::1]:2002".parse().unwrap(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }; + + assert_eq!( + &new_vcr, + &VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 10, + extent_count: 10, + generation: 1, + opts: CrucibleOpts { + id: volume_id, + target: vec![ + "[fd55:1122:3344:204::1]:1000".parse().unwrap(), + "[fd55:1122:3344:205::1]:1001".parse().unwrap(), + "[fd55:1122:3344:206::1]:1002".parse().unwrap(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: false, + } + }], + read_only_parent: Some(Box::new(rop.clone())), + }], + read_only_parent: Some(Box::new(rop)), + } + ); + } + + /// Assert that there are no "deleted" r/w regions found when the associated + /// volume hasn't been created yet. + #[tokio::test] + async fn test_no_find_deleted_region_for_no_volume() { + let logctx = + dev::test_setup_log("test_no_find_deleted_region_for_no_volume"); + let log = logctx.log.new(o!()); + let db = TestDatabase::new_with_datastore(&log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let _test_datasets = TestDatasets::create( + &opctx, + datastore.clone(), + REGION_REDUNDANCY_THRESHOLD, + ) + .await; + + let volume_id = VolumeUuid::new_v4(); + + // Assert that allocating regions without creating the volume does not + // cause them to be returned as "deleted" regions, as this can cause + // sagas that allocate regions to race with the volume delete saga and + // cause premature region deletion. + + let _datasets_and_regions = datastore + .disk_region_allocate( + &opctx, + volume_id, + &DiskSource::Blank { block_size: 512.try_into().unwrap() }, + ByteCount::from_gibibytes_u32(1), + &&RegionAllocationStrategy::RandomWithDistinctSleds { + seed: None, + }, + ) + .await + .unwrap(); + + let deleted_regions = datastore + .find_deleted_volume_regions() + .await + .expect("find_deleted_volume_regions"); + + assert!(deleted_regions.is_empty()); + + db.terminate().await; + logctx.cleanup_successful(); + } +} diff --git a/nexus/src/app/instance_platform/mod.rs b/nexus/src/app/instance_platform/mod.rs index 6295bedbfbd..1238426714e 100644 --- a/nexus/src/app/instance_platform/mod.rs +++ b/nexus/src/app/instance_platform/mod.rs @@ -496,7 +496,7 @@ impl super::Nexus { // backends' volume construction requests. Calling // `volume_checkout` bumps the volumes' generation numbers. - use db::datastore::VolumeCheckoutReason; + use db::datastore::volume::VolumeCheckoutReason; let volume = self .db_datastore diff --git a/nexus/src/app/sagas/common_storage.rs b/nexus/src/app/sagas/common_storage.rs index b160c68e4c9..1a76994772b 100644 --- a/nexus/src/app/sagas/common_storage.rs +++ b/nexus/src/app/sagas/common_storage.rs @@ -92,7 +92,7 @@ pub(crate) async fn call_pantry_attach_for_disk( .datastore() .volume_checkout( disk.volume_id(), - db::datastore::VolumeCheckoutReason::Pantry, + db::datastore::volume::VolumeCheckoutReason::Pantry, ) .await .map_err(saga_action_failed)?; diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index 307c6825d86..ae2cbfd3de7 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -9,6 +9,7 @@ use super::{ call_pantry_attach_for_disk, call_pantry_detach, get_pantry_address, }, }; +use crate::app::db::datastore::volume::VolumeCheckoutReason; use crate::app::sagas::declare_saga_actions; use crate::app::{authn, authz, db}; use nexus_db_lookup::LookupPath; @@ -563,7 +564,7 @@ async fn sdc_regions_ensure( .datastore() .volume_checkout( db_snapshot.volume_id(), - db::datastore::VolumeCheckoutReason::ReadOnlyCopy, + VolumeCheckoutReason::ReadOnlyCopy, ) .await .map_err(saga_action_failed)?; @@ -606,7 +607,7 @@ async fn sdc_regions_ensure( .datastore() .volume_checkout( image.volume_id(), - db::datastore::VolumeCheckoutReason::ReadOnlyCopy, + VolumeCheckoutReason::ReadOnlyCopy, ) .await .map_err(saga_action_failed)?; diff --git a/nexus/src/app/sagas/image_create.rs b/nexus/src/app/sagas/image_create.rs index 8e5a8a0ae40..dab642214de 100644 --- a/nexus/src/app/sagas/image_create.rs +++ b/nexus/src/app/sagas/image_create.rs @@ -153,9 +153,11 @@ async fn simc_get_source_volume( osagactx .datastore() .volume_checkout_randomize_ids( - db::datastore::SourceVolume(db_snapshot.volume_id()), - db::datastore::DestVolume(dest_volume_id), - db::datastore::VolumeCheckoutReason::ReadOnlyCopy, + db::datastore::volume::SourceVolume( + db_snapshot.volume_id(), + ), + db::datastore::volume::DestVolume(dest_volume_id), + db::datastore::volume::VolumeCheckoutReason::ReadOnlyCopy, ) .await .map_err(saga_action_failed)?; diff --git a/nexus/src/app/sagas/region_replacement_drive.rs b/nexus/src/app/sagas/region_replacement_drive.rs index b55a225d756..32a6a458d2f 100644 --- a/nexus/src/app/sagas/region_replacement_drive.rs +++ b/nexus/src/app/sagas/region_replacement_drive.rs @@ -1446,7 +1446,10 @@ async fn execute_pantry_drive_action( ); let disk_volume = datastore - .volume_checkout(volume_id, db::datastore::VolumeCheckoutReason::Pantry) + .volume_checkout( + volume_id, + db::datastore::volume::VolumeCheckoutReason::Pantry, + ) .await .map_err(saga_action_failed)?; diff --git a/nexus/src/app/sagas/region_replacement_start.rs b/nexus/src/app/sagas/region_replacement_start.rs index 3e3ff825578..456d94e7979 100644 --- a/nexus/src/app/sagas/region_replacement_start.rs +++ b/nexus/src/app/sagas/region_replacement_start.rs @@ -50,7 +50,7 @@ use super::{ SagaInitError, }; use crate::app::RegionAllocationStrategy; -use crate::app::db::datastore::VolumeReplaceResult; +use crate::app::db::datastore::volume::VolumeReplaceResult; use crate::app::sagas::common_storage::find_only_new_region; use crate::app::sagas::declare_saga_actions; use crate::app::{authn, db}; @@ -538,13 +538,13 @@ async fn srrs_replace_region_in_volume( .datastore() .volume_replace_region( /* target */ - db::datastore::VolumeReplacementParams { + db::datastore::volume::VolumeReplacementParams { volume_id: old_volume_id, region_id: db_region.id(), region_addr: old_region_address, }, /* replacement */ - db::datastore::VolumeReplacementParams { + db::datastore::volume::VolumeReplacementParams { volume_id: new_volume_id, region_id: ensured_region.id.0.parse().unwrap(), region_addr: new_region_address, @@ -637,13 +637,13 @@ async fn srrs_replace_region_in_volume_undo( .datastore() .volume_replace_region( /* target */ - db::datastore::VolumeReplacementParams { + db::datastore::volume::VolumeReplacementParams { volume_id: old_volume_id, region_id: ensured_region.id.0.parse().unwrap(), region_addr: new_region_address, }, /* replacement */ - db::datastore::VolumeReplacementParams { + db::datastore::volume::VolumeReplacementParams { volume_id: new_volume_id, region_id: db_region.id(), region_addr: old_region_address, diff --git a/nexus/src/app/sagas/region_snapshot_replacement_start.rs b/nexus/src/app/sagas/region_snapshot_replacement_start.rs index b4822abb3e1..675b419caba 100644 --- a/nexus/src/app/sagas/region_snapshot_replacement_start.rs +++ b/nexus/src/app/sagas/region_snapshot_replacement_start.rs @@ -52,13 +52,13 @@ use super::{ SagaInitError, }; use crate::app::RegionAllocationStrategy; -use crate::app::db::datastore::ExistingTarget; use crate::app::db::datastore::RegionAllocationFor; use crate::app::db::datastore::RegionAllocationParameters; -use crate::app::db::datastore::ReplacementTarget; -use crate::app::db::datastore::VolumeReplaceResult; -use crate::app::db::datastore::VolumeToDelete; -use crate::app::db::datastore::VolumeWithTarget; +use crate::app::db::datastore::volume::ExistingTarget; +use crate::app::db::datastore::volume::ReplacementTarget; +use crate::app::db::datastore::volume::VolumeReplaceResult; +use crate::app::db::datastore::volume::VolumeToDelete; +use crate::app::db::datastore::volume::VolumeWithTarget; use crate::app::sagas::common_storage::find_only_new_region; use crate::app::sagas::declare_saga_actions; use crate::app::{authn, db}; diff --git a/nexus/src/app/sagas/region_snapshot_replacement_step.rs b/nexus/src/app/sagas/region_snapshot_replacement_step.rs index aab52af4aea..97210da9c3e 100644 --- a/nexus/src/app/sagas/region_snapshot_replacement_step.rs +++ b/nexus/src/app/sagas/region_snapshot_replacement_step.rs @@ -47,11 +47,11 @@ use super::{ ACTION_GENERATE_ID, ActionRegistry, NexusActionContext, NexusSaga, SagaInitError, }; -use crate::app::db::datastore::ExistingTarget; -use crate::app::db::datastore::ReplacementTarget; -use crate::app::db::datastore::VolumeReplaceResult; -use crate::app::db::datastore::VolumeToDelete; -use crate::app::db::datastore::VolumeWithTarget; +use crate::app::db::datastore::volume::ExistingTarget; +use crate::app::db::datastore::volume::ReplacementTarget; +use crate::app::db::datastore::volume::VolumeReplaceResult; +use crate::app::db::datastore::volume::VolumeToDelete; +use crate::app::db::datastore::volume::VolumeWithTarget; use crate::app::sagas::declare_saga_actions; use crate::app::{authn, authz, db}; use nexus_db_lookup::LookupPath; diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index ce2e81600c9..fa004d3c94f 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -1521,7 +1521,7 @@ async fn ssc_create_volume_record( .datastore() .volume_checkout( params.disk.volume_id(), - db::datastore::VolumeCheckoutReason::CopyAndModify, + db::datastore::volume::VolumeCheckoutReason::CopyAndModify, ) .await .map_err(saga_action_failed)?; diff --git a/nexus/src/app/sagas/volume_delete.rs b/nexus/src/app/sagas/volume_delete.rs index b02e7184b8d..82dd8e7688c 100644 --- a/nexus/src/app/sagas/volume_delete.rs +++ b/nexus/src/app/sagas/volume_delete.rs @@ -28,8 +28,8 @@ use super::NexusActionContext; use super::NexusSaga; use crate::app::sagas::declare_saga_actions; use nexus_db_queries::authn; -use nexus_db_queries::db::datastore::CrucibleResources; -use nexus_db_queries::db::datastore::FreedCrucibleResources; +use nexus_db_queries::db::datastore::volume::CrucibleResources; +use nexus_db_queries::db::datastore::volume::FreedCrucibleResources; use nexus_types::saga::saga_action_failed; use omicron_common::api::external::Error; use omicron_uuid_kinds::VolumeUuid; diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs index 5b728763597..0e0256d8311 100644 --- a/nexus/tests/integration_tests/volume_management.rs +++ b/nexus/tests/integration_tests/volume_management.rs @@ -27,18 +27,18 @@ use nexus_db_model::to_db_typed_uuid; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; use nexus_db_queries::db::DataStore; -use nexus_db_queries::db::datastore::CrucibleResources; -use nexus_db_queries::db::datastore::DestVolume; use nexus_db_queries::db::datastore::Disk; -use nexus_db_queries::db::datastore::ExistingTarget; use nexus_db_queries::db::datastore::RegionAllocationFor; use nexus_db_queries::db::datastore::RegionAllocationParameters; -use nexus_db_queries::db::datastore::ReplacementTarget; use nexus_db_queries::db::datastore::SQL_BATCH_SIZE; -use nexus_db_queries::db::datastore::SourceVolume; -use nexus_db_queries::db::datastore::VolumeReplaceResult; -use nexus_db_queries::db::datastore::VolumeToDelete; -use nexus_db_queries::db::datastore::VolumeWithTarget; +use nexus_db_queries::db::datastore::volume::CrucibleResources; +use nexus_db_queries::db::datastore::volume::DestVolume; +use nexus_db_queries::db::datastore::volume::ExistingTarget; +use nexus_db_queries::db::datastore::volume::ReplacementTarget; +use nexus_db_queries::db::datastore::volume::SourceVolume; +use nexus_db_queries::db::datastore::volume::VolumeReplaceResult; +use nexus_db_queries::db::datastore::volume::VolumeToDelete; +use nexus_db_queries::db::datastore::volume::VolumeWithTarget; use nexus_db_queries::db::pagination::Paginator; use nexus_db_queries::db::pagination::paginated; use nexus_test_utils::http_testing::AuthnMode; @@ -1445,7 +1445,7 @@ async fn test_volume_remove_read_only_parent_base( let new_vol = datastore .volume_checkout( volume_id, - db::datastore::VolumeCheckoutReason::CopyAndModify, + db::datastore::volume::VolumeCheckoutReason::CopyAndModify, ) .await .unwrap(); @@ -1470,7 +1470,7 @@ async fn test_volume_remove_read_only_parent_base( let new_vol = datastore .volume_checkout( t_vid, - db::datastore::VolumeCheckoutReason::CopyAndModify, + db::datastore::volume::VolumeCheckoutReason::CopyAndModify, ) .await .unwrap(); @@ -1503,7 +1503,7 @@ async fn test_volume_remove_read_only_parent_base( let new_vol = datastore .volume_checkout( t_vid, - db::datastore::VolumeCheckoutReason::CopyAndModify, + db::datastore::volume::VolumeCheckoutReason::CopyAndModify, ) .await .unwrap(); @@ -1645,7 +1645,7 @@ async fn test_volume_remove_rop_saga(cptestctx: &ControlPlaneTestContext) { let new_vol = datastore .volume_checkout( volume_id, - db::datastore::VolumeCheckoutReason::CopyAndModify, + db::datastore::volume::VolumeCheckoutReason::CopyAndModify, ) .await .unwrap(); @@ -1703,7 +1703,7 @@ async fn test_volume_remove_rop_saga_twice( let new_vol = datastore .volume_checkout( volume_id, - db::datastore::VolumeCheckoutReason::CopyAndModify, + db::datastore::volume::VolumeCheckoutReason::CopyAndModify, ) .await .unwrap(); @@ -1806,7 +1806,7 @@ async fn test_volume_remove_rop_saga_deleted_volume( let new_vol = datastore .volume_checkout( volume_id, - db::datastore::VolumeCheckoutReason::CopyAndModify, + db::datastore::volume::VolumeCheckoutReason::CopyAndModify, ) .await .unwrap(); @@ -1858,7 +1858,7 @@ async fn test_volume_checkout(cptestctx: &ControlPlaneTestContext) { let new_vol = datastore .volume_checkout( volume_id, - db::datastore::VolumeCheckoutReason::CopyAndModify, + db::datastore::volume::VolumeCheckoutReason::CopyAndModify, ) .await .unwrap(); @@ -1868,7 +1868,7 @@ async fn test_volume_checkout(cptestctx: &ControlPlaneTestContext) { let new_vol = datastore .volume_checkout( volume_id, - db::datastore::VolumeCheckoutReason::CopyAndModify, + db::datastore::volume::VolumeCheckoutReason::CopyAndModify, ) .await .unwrap(); @@ -1909,7 +1909,7 @@ async fn test_volume_checkout_updates_nothing( let new_vol = datastore .volume_checkout( volume_id, - db::datastore::VolumeCheckoutReason::CopyAndModify, + db::datastore::volume::VolumeCheckoutReason::CopyAndModify, ) .await .unwrap(); @@ -1917,7 +1917,7 @@ async fn test_volume_checkout_updates_nothing( let new_vol = datastore .volume_checkout( volume_id, - db::datastore::VolumeCheckoutReason::CopyAndModify, + db::datastore::volume::VolumeCheckoutReason::CopyAndModify, ) .await .unwrap(); @@ -1959,7 +1959,7 @@ async fn test_volume_checkout_updates_multiple_gen( let new_vol = datastore .volume_checkout( volume_id, - db::datastore::VolumeCheckoutReason::CopyAndModify, + db::datastore::volume::VolumeCheckoutReason::CopyAndModify, ) .await .unwrap(); @@ -1969,7 +1969,7 @@ async fn test_volume_checkout_updates_multiple_gen( let new_vol = datastore .volume_checkout( volume_id, - db::datastore::VolumeCheckoutReason::CopyAndModify, + db::datastore::volume::VolumeCheckoutReason::CopyAndModify, ) .await .unwrap(); @@ -1979,7 +1979,7 @@ async fn test_volume_checkout_updates_multiple_gen( let new_vol = datastore .volume_checkout( volume_id, - db::datastore::VolumeCheckoutReason::CopyAndModify, + db::datastore::volume::VolumeCheckoutReason::CopyAndModify, ) .await .unwrap(); @@ -2027,7 +2027,7 @@ async fn test_volume_checkout_updates_sparse_multiple_gen( let new_vol = datastore .volume_checkout( volume_id, - db::datastore::VolumeCheckoutReason::CopyAndModify, + db::datastore::volume::VolumeCheckoutReason::CopyAndModify, ) .await .unwrap(); @@ -2037,7 +2037,7 @@ async fn test_volume_checkout_updates_sparse_multiple_gen( let new_vol = datastore .volume_checkout( volume_id, - db::datastore::VolumeCheckoutReason::CopyAndModify, + db::datastore::volume::VolumeCheckoutReason::CopyAndModify, ) .await .unwrap(); @@ -2085,7 +2085,7 @@ async fn test_volume_checkout_updates_sparse_mid_multiple_gen( let new_vol = datastore .volume_checkout( volume_id, - db::datastore::VolumeCheckoutReason::CopyAndModify, + db::datastore::volume::VolumeCheckoutReason::CopyAndModify, ) .await .unwrap(); @@ -2095,7 +2095,7 @@ async fn test_volume_checkout_updates_sparse_mid_multiple_gen( let new_vol = datastore .volume_checkout( volume_id, - db::datastore::VolumeCheckoutReason::CopyAndModify, + db::datastore::volume::VolumeCheckoutReason::CopyAndModify, ) .await .unwrap(); @@ -2137,7 +2137,7 @@ async fn test_volume_checkout_randomize_ids_only_read_only( .volume_checkout_randomize_ids( SourceVolume(volume_id), DestVolume(VolumeUuid::new_v4()), - db::datastore::VolumeCheckoutReason::CopyAndModify, + db::datastore::volume::VolumeCheckoutReason::CopyAndModify, ) .await; assert!(r.is_err()); @@ -6400,7 +6400,7 @@ async fn test_volume_create_wont_use_deleted_region_snapshots( let volume_copy = datastore .volume_checkout( db_snapshot.volume_id(), - db::datastore::VolumeCheckoutReason::ReadOnlyCopy, + db::datastore::volume::VolumeCheckoutReason::ReadOnlyCopy, ) .await .unwrap();