-
Notifications
You must be signed in to change notification settings - Fork 79
Reconfigurator: Add live test that executes expunged zones that were never in service #10081
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
d828e43
18a74c0
31eaa26
ed69397
7c0d57b
25cde51
4ad75be
e40ae95
e0228d3
4889ebc
0f24ae2
48695aa
aa02913
bb4505a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,7 +12,6 @@ use nexus_config::PostgresConfigWithUrl; | |
| use nexus_db_queries::context::OpContext; | ||
| use nexus_db_queries::db::DataStore; | ||
| use nexus_types::deployment::SledFilter; | ||
| use omicron_common::address::Ipv6Subnet; | ||
| use slog::info; | ||
| use slog::o; | ||
| use std::ffi::OsStr; | ||
|
|
@@ -69,6 +68,23 @@ impl LiveTestContext { | |
| &self.datastore | ||
| } | ||
|
|
||
| /// Establish a new `DataStore` connection pointed at this deployed system's | ||
| /// database | ||
| /// | ||
| /// Most consumers should prefer `datastore()`, which returns a reference to | ||
| /// a `DataStore` constructed when this context was created. This method is | ||
| /// useful if a caller needs to reevaluate what Cockroach instances are | ||
| /// available in DNS (e.g., due to zone expungement) or needs a `DataStore` | ||
| /// instance that is not shared. | ||
| pub async fn new_datastore_connection( | ||
| &self, | ||
| ) -> anyhow::Result<(OpContext, Arc<DataStore>)> { | ||
| let log = &self.logctx.log; | ||
| let datastore = create_datastore(log, &self.resolver).await?; | ||
| let opctx = OpContext::for_tests(log.clone(), datastore.clone()); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was surprised this returned a new |
||
| Ok((opctx, datastore)) | ||
| } | ||
|
|
||
| /// Returns a client for a Nexus internal API at the given socket address | ||
| pub fn specific_internal_nexus_client( | ||
| &self, | ||
|
|
@@ -96,20 +112,14 @@ impl LiveTestContext { | |
| } | ||
|
|
||
| fn create_resolver(log: &slog::Logger) -> Result<Resolver, anyhow::Error> { | ||
| // In principle, we should look at /etc/resolv.conf to find the DNS servers. | ||
| // In practice, this usually isn't populated today. See | ||
| // oxidecomputer/omicron#2122. | ||
| // | ||
| // However, the address selected below should work for most existing Omicron | ||
| // deployments today. That's because while the base subnet is in principle | ||
| // configurable in config-rss.toml, it's very uncommon to change it from the | ||
| // default value used here. | ||
| let subnet = Ipv6Subnet::new("fd00:1122:3344:0100::".parse().unwrap()); | ||
| eprintln!("note: using DNS server for subnet {}", subnet.net()); | ||
| internal_dns_resolver::Resolver::new_from_subnet(log.clone(), subnet) | ||
| .with_context(|| { | ||
| format!("creating DNS resolver for subnet {}", subnet.net()) | ||
| }) | ||
| // The internal DNS servers are populated in /etc/resolv.conf in the switch | ||
| // zone, which is where we expect live tests to run. Notify the user that | ||
| // we're going to attempt DNS resolution via the default system path. | ||
| eprintln!( | ||
| "note: using DNS from system config (typically /etc/resolv.conf)", | ||
| ); | ||
| internal_dns_resolver::Resolver::new_from_system_conf(log.clone()) | ||
| .context("creating DNS resolver from system config") | ||
| } | ||
|
|
||
| /// Creates a DataStore pointing at the CockroachDB cluster that's in DNS | ||
|
|
@@ -213,7 +223,7 @@ async fn check_hardware_environment( | |
| ) -> Result<(), anyhow::Error> { | ||
| const ALLOWED_GIMLET_SERIALS: &[&str] = &[ | ||
| // Serial number lists can be generated with: | ||
| // inventron env system list -Hpo serial -F type=gimlet <ENVIRONMENT> | ||
| // inventron env system list -Hpo serial -F type=cosmo -F type=gimlet <ENVIRONMENT> | ||
|
|
||
| // test rig: "madrid" | ||
| "BRM42220081", | ||
|
|
@@ -222,19 +232,19 @@ async fn check_hardware_environment( | |
| "BRM42220004", | ||
| // test rig: "london" | ||
| "BRM42220036", | ||
| "BRM42220062", | ||
| "2CN2M459", | ||
| "BRM42220030", | ||
| "BRM44220007", | ||
| "2RGCFG10", | ||
| // test rig: "dublin" | ||
| "BRM42220026", | ||
| "2F8JEXDK", | ||
| "BRM27230037", | ||
| "BRM23230018", | ||
| "BRM23230010", | ||
| // test rig: "berlin" | ||
| "BRM42220011", | ||
| "BRM44220007", | ||
| "BRM42220082", | ||
| "BRM06240029", | ||
| "271FVPY0", | ||
| ]; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any chance we can add Dogfood here as well? So that we can run these after an update :)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we should do that - the live tests are considerably more destructive than what I'd be comfortable running on dogfood, because they allow nearly-arbitrary editing of the system's target blueprint. |
||
|
|
||
| // Refuse to operate in an environment that might contain real Oxide | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -28,9 +28,28 @@ use std::time::Duration; | |||||
| pub async fn blueprint_load_target_enabled( | ||||||
| log: &slog::Logger, | ||||||
| nexus: &nexus_lockstep_client::Client, | ||||||
| ) -> Result<Blueprint, anyhow::Error> { | ||||||
| blueprint_load_target_impl(log, nexus, true).await | ||||||
| } | ||||||
|
|
||||||
| /// Return the current target blueprint | ||||||
| /// | ||||||
| /// Also validates that it's disabled. If an operator has enabled execution, we | ||||||
| /// don't want to proceed with tests. | ||||||
| pub async fn blueprint_load_target_disabled( | ||||||
| log: &slog::Logger, | ||||||
| nexus: &nexus_lockstep_client::Client, | ||||||
| ) -> Result<Blueprint, anyhow::Error> { | ||||||
| blueprint_load_target_impl(log, nexus, false).await | ||||||
| } | ||||||
|
|
||||||
| async fn blueprint_load_target_impl( | ||||||
| log: &slog::Logger, | ||||||
| nexus: &nexus_lockstep_client::Client, | ||||||
| expect_enabled: bool, | ||||||
| ) -> Result<Blueprint, anyhow::Error> { | ||||||
| // Fetch the current target configuration. | ||||||
| info!(log, "editing current target blueprint"); | ||||||
| info!(log, "loading current target blueprint"); | ||||||
| let target_blueprint = nexus | ||||||
| .blueprint_target_view() | ||||||
| .await | ||||||
|
|
@@ -40,9 +59,11 @@ pub async fn blueprint_load_target_enabled( | |||||
| debug!(log, "found current target blueprint"; | ||||||
| "blueprint_id" => %target_blueprint.target_id | ||||||
| ); | ||||||
|
|
||||||
| let expect_inverse = if !expect_enabled { "enabled" } else { "disabled" }; | ||||||
| ensure!( | ||||||
| target_blueprint.enabled, | ||||||
| "refusing to operate on a system with target blueprint disabled" | ||||||
| target_blueprint.enabled == expect_enabled, | ||||||
| "refusing to operate on a system with target blueprint {expect_inverse}" | ||||||
| ); | ||||||
|
|
||||||
| let blueprint = nexus | ||||||
|
|
@@ -78,14 +99,59 @@ pub async fn blueprint_load_target_enabled( | |||||
| /// case, a developer enables the initial target blueprint before running these | ||||||
| /// tests and then doesn't need to think about it again for the lifetime of | ||||||
| /// their test environment. | ||||||
| pub async fn blueprint_edit_current_target( | ||||||
| pub async fn blueprint_edit_current_target_enabled<F>( | ||||||
| log: &slog::Logger, | ||||||
| nexus: &nexus_lockstep_client::Client, | ||||||
| edit_fn: F, | ||||||
| ) -> Result<(Blueprint, Blueprint), anyhow::Error> | ||||||
| where | ||||||
| F: FnOnce(&mut BlueprintBuilder) -> Result<(), anyhow::Error>, | ||||||
| { | ||||||
| blueprint_edit_current_target_impl(log, nexus, true, edit_fn).await | ||||||
| } | ||||||
|
|
||||||
| /// Modify the system by editing the current target blueprint | ||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| /// | ||||||
| /// More precisely, this function: | ||||||
| /// | ||||||
| /// - fetches the current target blueprint | ||||||
| /// - creates a new BlueprintBuilder based on it | ||||||
| /// - invokes the caller's `edit_fn`, which may modify the builder however it | ||||||
| /// likes | ||||||
| /// - generates a new blueprint (thus based on the current target) | ||||||
| /// - uploads the new blueprint | ||||||
| /// - sets the new blueprint as the current target | ||||||
| /// - disables the new blueprint | ||||||
| /// | ||||||
| /// ## Errors | ||||||
| /// | ||||||
| /// This function fails if the current target blueprint is not already disabled. | ||||||
| /// Callers of this function expect execution to be - and remain - disabled. If | ||||||
| /// that isn't the case, we don't want to inadvertently proceed. | ||||||
| pub async fn blueprint_edit_current_target_disabled<F>( | ||||||
| log: &slog::Logger, | ||||||
| nexus: &nexus_lockstep_client::Client, | ||||||
| edit_fn: F, | ||||||
| ) -> Result<(Blueprint, Blueprint), anyhow::Error> | ||||||
| where | ||||||
| F: FnOnce(&mut BlueprintBuilder) -> Result<(), anyhow::Error>, | ||||||
| { | ||||||
| blueprint_edit_current_target_impl(log, nexus, false, edit_fn).await | ||||||
| } | ||||||
|
|
||||||
| async fn blueprint_edit_current_target_impl<F>( | ||||||
| log: &slog::Logger, | ||||||
| nexus: &nexus_lockstep_client::Client, | ||||||
| edit_fn: &dyn Fn(&mut BlueprintBuilder) -> Result<(), anyhow::Error>, | ||||||
| ) -> Result<(Blueprint, Blueprint), anyhow::Error> { | ||||||
| expect_enabled: bool, | ||||||
| edit_fn: F, | ||||||
| ) -> Result<(Blueprint, Blueprint), anyhow::Error> | ||||||
| where | ||||||
| F: FnOnce(&mut BlueprintBuilder) -> Result<(), anyhow::Error>, | ||||||
| { | ||||||
| // Fetch the current target configuration. | ||||||
| info!(log, "editing current target blueprint"); | ||||||
| let blueprint1 = blueprint_load_target_enabled(log, nexus).await?; | ||||||
| let blueprint1 = | ||||||
| blueprint_load_target_impl(log, nexus, expect_enabled).await?; | ||||||
|
|
||||||
| // Make a new builder based on that blueprint and use `edit_fn` to edit it. | ||||||
| let mut builder = BlueprintBuilder::new_based_on( | ||||||
|
|
@@ -113,14 +179,15 @@ pub async fn blueprint_edit_current_target( | |||||
| ); | ||||||
| nexus | ||||||
| .blueprint_target_set(&BlueprintTargetSet { | ||||||
| enabled: true, | ||||||
| enabled: expect_enabled, | ||||||
| target_id: blueprint2.id, | ||||||
| }) | ||||||
| .await | ||||||
| .expect("setting new target"); | ||||||
| .context("setting new target")?; | ||||||
| info!(log, "finished editing target blueprint"; | ||||||
| "old_target_id" => %blueprint1.id, | ||||||
| "new_target_id" => %blueprint2.id, | ||||||
| "enabled" => %expect_enabled, | ||||||
| ); | ||||||
|
|
||||||
| Ok((blueprint1, blueprint2)) | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I take it that you don't need to enable the target blueprint yourself any more?