From c8cddc3158ff9e3b96aa552e5ee2daceb167c6d7 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Thu, 19 Mar 2026 12:56:57 +0000 Subject: [PATCH] kms: require attestation and explicit KMS MR allowlist --- docs/auth-simple-operations.md | 4 +- docs/deployment.md | 20 +- docs/tutorials/kms-build-configuration.md | 2 - docs/tutorials/kms-cvm-deployment.md | 32 +- .../troubleshooting-kms-deployment.md | 6 +- kms/auth-simple/README.md | 9 +- kms/auth-simple/auth-config.example.json | 4 +- kms/auth-simple/bun.lock | 4 +- kms/auth-simple/index.test.ts | 31 +- kms/auth-simple/index.ts | 2 +- kms/dstack-app/compose-dev.yaml | 1 - kms/dstack-app/compose-simple.yaml | 1 - kms/kms.toml | 1 - kms/src/config.rs | 1 - kms/src/main_service.rs | 7 +- kms/src/onboard_service.rs | 168 +++---- tests/docs/kms-bootstrap-onboard.md | 456 ++++++++++++++++++ tests/docs/kms-self-authrization.md | 110 ++--- 18 files changed, 636 insertions(+), 223 deletions(-) create mode 100644 tests/docs/kms-bootstrap-onboard.md diff --git a/docs/auth-simple-operations.md b/docs/auth-simple-operations.md index 8b253b9c2..fad7c8fc9 100644 --- a/docs/auth-simple-operations.md +++ b/docs/auth-simple-operations.md @@ -36,7 +36,7 @@ The config is re-read on each request, so changes take effect immediately withou } ``` -> **Note:** Only `osImages` is required. Add `gatewayAppId` after deploying the Gateway. Add `apps` entries as you deploy applications. +> **Note:** `osImages` is always required. For KMS authorization, you must also populate `kms.mrAggregated`; if it is left empty, auth-simple denies all KMS boots. Add `gatewayAppId` after deploying the Gateway. Add `apps` entries as you deploy applications. --- @@ -240,7 +240,7 @@ The `mrAggregated` is sent by the booting KMS in its auth request. To get this v KMS boot auth request: { osImageHash: '0x...', mrAggregated: '0x...', ... } ``` -2. **Initial setup**: Leave `kms.mrAggregated` empty for the first KMS (empty array allows any). After it boots, check the logs and add the value. +2. **Initial setup**: capture the first KMS measurement with `Onboard.GetAttestationInfo` or from auth logs, then add it to `kms.mrAggregated` before bootstrap. An empty array now denies all KMS boots. ### Add to Config diff --git a/docs/deployment.md b/docs/deployment.md index bb9a1f227..564a9e922 100644 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -98,13 +98,16 @@ Start in separate terminals: For production, deploy KMS and Gateway as CVMs with hardware-rooted security. Production deployments require: - KMS running in a CVM (not on the host) - Auth server for authorization (webhook mode) +- KMS measurements allowlisted before bootstrap / onboarding / trusted RPCs can succeed + +If you skip the KMS allowlist step, the VM may boot and the onboard UI may still appear, but the KMS will reject bootstrap, onboarding, or later trusted RPCs with authorization errors. ### Production Checklist **Required:** 1. Set up TDX host with dstack-vmm -2. Deploy KMS as CVM (with auth server) +2. Deploy KMS as CVM (with auth server, capture its attestation info, and allowlist the KMS `mrAggregated` before bootstrap) 3. Deploy Gateway as CVM **Optional Add-ons:** @@ -197,11 +200,16 @@ Create `auth-config.json` for initial KMS deployment: ```json { "osImages": ["0x"], - "kms": { "allowAnyDevice": true }, + "kms": { + "mrAggregated": ["0x"], + "allowAnyDevice": true + }, "apps": {} } ``` +> **Important:** `auth-simple` now treats an empty `kms.mrAggregated` allowlist as deny-all for KMS. Capture the current KMS measurement with `Onboard.GetAttestationInfo` and add it before bootstrap. + Run auth-simple: ```bash @@ -460,7 +468,6 @@ Additional KMS instances can onboard from an existing KMS to share the same root [core.onboard] enabled = true auto_bootstrap_domain = "" # Empty = onboard mode -quote_enabled = true # Require TDX attestation address = "0.0.0.0" port = 9203 # HTTP port for onboard UI ``` @@ -480,7 +487,12 @@ curl http://:9203/finish # Restart KMS - it will now serve as a full KMS with shared keys ``` -> **Note:** For KMS onboarding with `quote_enabled = true`, add the KMS mrAggregated hash to your auth server's `kms.mrAggregated` whitelist. +> **Note:** KMS onboarding requires attested KMS instances, and both sides must already be authorized. Add the relevant KMS `mrAggregated` hashes to your auth backend first: +> +> - the destination KMS must allow the source KMS +> - the source KMS must allow the destination KMS +> +> If you skip this, `Onboard.Onboard` or later trusted RPCs will fail with KMS authorization errors. --- diff --git a/docs/tutorials/kms-build-configuration.md b/docs/tutorials/kms-build-configuration.md index be06b036f..502346879 100644 --- a/docs/tutorials/kms-build-configuration.md +++ b/docs/tutorials/kms-build-configuration.md @@ -207,7 +207,6 @@ url = "http://127.0.0.1:9200" [core.onboard] enabled = true auto_bootstrap_domain = "" -quote_enabled = true address = "0.0.0.0" port = 9100 EOF @@ -495,7 +494,6 @@ enabled = true # Empty domain = manual bootstrap mode (ensures bootstrap-info.json is written) auto_bootstrap_domain = "" # Enable TDX quotes - works because KMS runs in CVM -quote_enabled = true address = "0.0.0.0" port = 9100 EOF diff --git a/docs/tutorials/kms-cvm-deployment.md b/docs/tutorials/kms-cvm-deployment.md index 9e90a566f..a133b3683 100644 --- a/docs/tutorials/kms-cvm-deployment.md +++ b/docs/tutorials/kms-cvm-deployment.md @@ -188,7 +188,6 @@ configs: [core.onboard] enabled = true auto_bootstrap_domain = "" - quote_enabled = true address = "0.0.0.0" port = 9100 EOF @@ -314,12 +313,41 @@ Onboarding ``` > **Important:** KMS is now in onboard mode — a plain HTTP server waiting for bootstrap. It will **not** serve TLS or respond to `KMS.GetMeta` until you complete the next step. +> +> **Critical prerequisite:** before bootstrap can succeed, the KMS must already be authorized by your auth backend. +> +> - For `auth-simple`, add the KMS `mrAggregated` to `kms.mrAggregated` +> - For `auth-eth`, add the KMS `mrAggregated` on-chain with `addKmsAggregatedMr(...)` +> +> You can fetch the value before bootstrap with: +> +> ```bash +> curl -s -X POST \ +> -H "Content-Type: application/json" \ +> -d '{}' \ +> "http://localhost:9100/prpc/Onboard.GetAttestationInfo?json" | jq . +> ``` +> +> If you skip this step, `Onboard.Bootstrap` will fail with a KMS authorization error and the KMS will not enter normal service. +> +> **Pre-bootstrap checklist:** +> +> 1. `Onboard.GetAttestationInfo` returns the current KMS measurement +> 2. that `mrAggregated` has been allowlisted in your auth backend +> 3. the auth backend is reachable from the KMS CVM +> 4. you are still calling the onboard HTTP endpoint, not the post-bootstrap TLS endpoint ### Step 6: Bootstrap KMS With KMS in onboard mode, trigger key generation by calling the Bootstrap RPC endpoint. This generates root keys, a TDX attestation quote, and writes `bootstrap-info.json`: ```bash +# Inspect the KMS measurement before bootstrap +curl -s -X POST \ + -H "Content-Type: application/json" \ + -d '{}' \ + "http://localhost:9100/prpc/Onboard.GetAttestationInfo?json" | jq . + # Replace kms.yourdomain.com with your actual KMS domain curl -s -X POST \ -H "Content-Type: application/json" \ @@ -327,7 +355,7 @@ curl -s -X POST \ "http://localhost:9100/prpc/Onboard.Bootstrap?json" | tee ~/kms-deploy/bootstrap-info.json | jq . ``` -> **Note:** This uses plain `http://` — KMS is still in onboard mode (no TLS yet). The `tee` command saves the response to `bootstrap-info.json` while also displaying it. You'll need this file later to register KMS on-chain. +> **Note:** This uses plain `http://` — KMS is still in onboard mode (no TLS yet). The `tee` command saves the response to `bootstrap-info.json` while also displaying it. You'll need this file later to register KMS on-chain. If this call fails with a KMS authorization error, allowlist the `mrAggregated` value first and retry. Expected response: diff --git a/docs/tutorials/troubleshooting-kms-deployment.md b/docs/tutorials/troubleshooting-kms-deployment.md index 74a396319..f60b3bb02 100644 --- a/docs/tutorials/troubleshooting-kms-deployment.md +++ b/docs/tutorials/troubleshooting-kms-deployment.md @@ -241,7 +241,7 @@ export DSTACK_VMM_AUTH_PASSWORD=$(cat ~/.dstack/secrets/vmm-auth-token) "quote": null ``` -This indicates quote_enabled might be false, guest-agent issues, or **SGX not properly configured**: +This indicates guest-agent issues, simulator misconfiguration, or **SGX not properly configured**: ```bash # Check CVM logs for TDX-related errors (replace VM_ID with actual ID from lsvm) @@ -259,9 +259,7 @@ curl -s -H "Authorization: Bearer $(cat ~/.dstack/secrets/vmm-auth-token)" \ 2. **SGX Auto MP Registration not enabled** - Without this BIOS setting, your platform isn't registered with Intel's PCS, and attestation quotes cannot be verified. Re-enter BIOS and enable "SGX Auto MP Registration". -3. **quote_enabled is false** - Verify your `kms.toml` has `quote_enabled = true` in the `[core.onboard]` section. - -4. **Guest-agent not running** - The `/var/run/dstack.sock` socket must exist inside the CVM. +3. **Guest-agent / simulator not running** - The KMS must be able to reach a working dstack guest agent endpoint. In a real CVM, `/var/run/dstack.sock` must exist. For local development, start `sdk/simulator` first. ### CVM Fails with "QGS error code: 0x12001" diff --git a/kms/auth-simple/README.md b/kms/auth-simple/README.md index e13c9459b..dbb425aa5 100644 --- a/kms/auth-simple/README.md +++ b/kms/auth-simple/README.md @@ -20,12 +20,13 @@ bun install Create `auth-config.json` (see `auth-config.example.json`). -For initial KMS deployment, you only need the OS image hash: +For KMS deployment, you must allowlist both the OS image hash and the KMS `mrAggregated` value: ```json { "osImages": ["0x0b327bcd642788b0517de3ff46d31ebd3847b6c64ea40bacde268bb9f1c8ec83"], "kms": { + "mrAggregated": ["0x"], "allowAnyDevice": true }, "apps": {} @@ -39,7 +40,7 @@ Add more fields as you deploy Gateway and apps: "osImages": ["0x..."], "gatewayAppId": "0x...", "kms": { - "mrAggregated": [], + "mrAggregated": ["0x..."], "devices": [], "allowAnyDevice": true }, @@ -59,7 +60,7 @@ Add more fields as you deploy Gateway and apps: |-------|----------|-------------| | `osImages` | Yes | Allowed OS image hashes (from `digest.txt`) | | `gatewayAppId` | No | Gateway app ID (add after Gateway deployment) | -| `kms.mrAggregated` | No | Allowed KMS aggregated MR values | +| `kms.mrAggregated` | Yes for KMS authorization | Allowed KMS aggregated MR values. An empty array denies all KMS boots. | | `kms.devices` | No | Allowed KMS device IDs | | `kms.allowAnyDevice` | No | If true, skip device ID check for KMS | | `apps..composeHashes` | No | Allowed compose hashes for this app | @@ -160,7 +161,7 @@ KMS boot authorization. 1. `tcbStatus` must be "UpToDate" 2. `osImageHash` must be in `osImages` array -3. `mrAggregated` must be in `kms.mrAggregated` (if non-empty) +3. `mrAggregated` must be in `kms.mrAggregated` 4. `deviceId` must be in `kms.devices` (unless `allowAnyDevice` is true) ### App Boot Validation diff --git a/kms/auth-simple/auth-config.example.json b/kms/auth-simple/auth-config.example.json index 40b3d00da..54ba85918 100644 --- a/kms/auth-simple/auth-config.example.json +++ b/kms/auth-simple/auth-config.example.json @@ -3,7 +3,9 @@ "0x0b327bcd642788b0517de3ff46d31ebd3847b6c64ea40bacde268bb9f1c8ec83" ], "kms": { - "mrAggregated": [], + "mrAggregated": [ + "0x" + ], "devices": [], "allowAnyDevice": true }, diff --git a/kms/auth-simple/bun.lock b/kms/auth-simple/bun.lock index acd5f8a5f..eb4a4740d 100644 --- a/kms/auth-simple/bun.lock +++ b/kms/auth-simple/bun.lock @@ -6,7 +6,7 @@ "name": "auth-simple", "dependencies": { "@hono/zod-validator": "0.2.2", - "hono": "4.10.3", + "hono": "4.12.7", "zod": "3.25.76", }, "devDependencies": { @@ -217,7 +217,7 @@ "glob-parent": ["glob-parent@5.1.2", "", { "dependencies": { "is-glob": "^4.0.1" } }, "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow=="], - "hono": ["hono@4.10.3", "", {}, "sha512-2LOYWUbnhdxdL8MNbNg9XZig6k+cZXm5IjHn2Aviv7honhBMOHb+jxrKIeJRZJRmn+htUCKhaicxwXuUDlchRA=="], + "hono": ["hono@4.12.7", "", {}, "sha512-jq9l1DM0zVIvsm3lv9Nw9nlJnMNPOcAtsbsgiUhWcFzPE99Gvo6yRTlszSLLYacMeQ6quHD6hMfId8crVHvexw=="], "human-signals": ["human-signals@5.0.0", "", {}, "sha512-AXcZb6vzzrFAUE61HnN4mpLqd/cSIwNQjtNWR0euPm6y0iqx3G4gOXaIDdtdDwZmhwe82LA6+zinmW4UBWVePQ=="], diff --git a/kms/auth-simple/index.test.ts b/kms/auth-simple/index.test.ts index cd931431e..856a0deda 100644 --- a/kms/auth-simple/index.test.ts +++ b/kms/auth-simple/index.test.ts @@ -75,7 +75,10 @@ describe('auth-simple', () => { writeTestConfig({ gatewayAppId: '0xgateway', osImages: ['0x1fbb0cf9cc6cfbf23d6b779776fabad2c5403d643badb9e5e238615e4960a78a'], - kms: { allowAnyDevice: true } + kms: { + mrAggregated: ['0xabc123'], + allowAnyDevice: true + } }); const res = await app.fetch(new Request('http://localhost/bootAuth/kms', { @@ -93,7 +96,10 @@ describe('auth-simple', () => { writeTestConfig({ gatewayAppId: '0xgateway', osImages: ['0xdifferentimage'], - kms: { allowAnyDevice: true } + kms: { + mrAggregated: ['0xabc123'], + allowAnyDevice: true + } }); const res = await app.fetch(new Request('http://localhost/bootAuth/kms', { @@ -128,6 +134,27 @@ describe('auth-simple', () => { expect(json.reason).toContain('MR'); }); + it('rejects KMS boot when the allowlist is empty', async () => { + writeTestConfig({ + gatewayAppId: '0xgateway', + osImages: ['0x1fbb0cf9cc6cfbf23d6b779776fabad2c5403d643badb9e5e238615e4960a78a'], + kms: { + mrAggregated: [], + allowAnyDevice: true + } + }); + + const res = await app.fetch(new Request('http://localhost/bootAuth/kms', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(baseBootInfo) + })); + const json = await res.json(); + + expect(json.isAllowed).toBe(false); + expect(json.reason).toContain('MR'); + }); + it('allows KMS boot with allowAnyDevice', async () => { writeTestConfig({ gatewayAppId: '0xgateway', diff --git a/kms/auth-simple/index.ts b/kms/auth-simple/index.ts index bda04b110..7307f49cb 100644 --- a/kms/auth-simple/index.ts +++ b/kms/auth-simple/index.ts @@ -122,7 +122,7 @@ class ConfigBackend { // check aggregated MR const allowedMrs = config.kms.mrAggregated.map(normalizeHex); - if (allowedMrs.length > 0 && !allowedMrs.includes(mrAggregated)) { + if (!allowedMrs.includes(mrAggregated)) { return { isAllowed: false, reason: 'aggregated MR not allowed', diff --git a/kms/dstack-app/compose-dev.yaml b/kms/dstack-app/compose-dev.yaml index aacb0e817..755f00e36 100644 --- a/kms/dstack-app/compose-dev.yaml +++ b/kms/dstack-app/compose-dev.yaml @@ -80,6 +80,5 @@ configs: [core.onboard] enabled = true auto_bootstrap_domain = "" - quote_enabled = true address = "0.0.0.0" port = 8000 diff --git a/kms/dstack-app/compose-simple.yaml b/kms/dstack-app/compose-simple.yaml index f40e58d36..522ad95d0 100644 --- a/kms/dstack-app/compose-simple.yaml +++ b/kms/dstack-app/compose-simple.yaml @@ -56,6 +56,5 @@ configs: [core.onboard] enabled = true auto_bootstrap_domain = "" - quote_enabled = true address = "0.0.0.0" port = 8000 diff --git a/kms/kms.toml b/kms/kms.toml index 1f354066e..70b2b7177 100644 --- a/kms/kms.toml +++ b/kms/kms.toml @@ -45,6 +45,5 @@ gateway_app_id = "any" [core.onboard] enabled = true auto_bootstrap_domain = "" -quote_enabled = true address = "0.0.0.0" port = 8000 diff --git a/kms/src/config.rs b/kms/src/config.rs index 36874e1b9..3eaa2d117 100644 --- a/kms/src/config.rs +++ b/kms/src/config.rs @@ -118,6 +118,5 @@ pub(crate) struct Dev { #[derive(Debug, Clone, Deserialize)] pub(crate) struct OnboardConfig { pub enabled: bool, - pub quote_enabled: bool, pub auto_bootstrap_domain: String, } diff --git a/kms/src/main_service.rs b/kms/src/main_service.rs index 22d865211..9965fc8d3 100644 --- a/kms/src/main_service.rs +++ b/kms/src/main_service.rs @@ -99,9 +99,6 @@ struct BootConfig { impl RpcHandler { async fn ensure_self_allowed(&self) -> Result<()> { - if !self.state.config.onboard.quote_enabled { - return Ok(()); - } let boot_info = self .state .self_boot_info @@ -355,9 +352,7 @@ impl KmsRpc for RpcHandler { self.ensure_self_allowed() .await .context("KMS self authorization failed")?; - if self.state.config.onboard.quote_enabled { - let _info = self.ensure_kms_allowed(&request.vm_config).await?; - } + let _info = self.ensure_kms_allowed(&request.vm_config).await?; Ok(KmsKeyResponse { temp_ca_key: self.state.inner.temp_ca_key.clone(), keys: vec![KmsKeys { diff --git a/kms/src/onboard_service.rs b/kms/src/onboard_service.rs index 2a5d945f9..93eeb5629 100644 --- a/kms/src/onboard_service.rs +++ b/kms/src/onboard_service.rs @@ -60,29 +60,22 @@ impl RpcCall for OnboardHandler { impl OnboardRpc for OnboardHandler { async fn bootstrap(self, request: BootstrapRequest) -> Result { - let quote_enabled = self.state.config.onboard.quote_enabled; - if quote_enabled { - ensure_self_kms_allowed(&self.state.config) - .await - .context("KMS is not allowed to bootstrap")?; - } - let keys = Keys::generate(&request.domain, quote_enabled) + ensure_self_kms_allowed(&self.state.config) + .await + .context("KMS is not allowed to bootstrap")?; + let keys = Keys::generate(&request.domain) .await .context("Failed to generate keys")?; let k256_pubkey = keys.k256_key.verifying_key().to_sec1_bytes().to_vec(); let ca_pubkey = keys.ca_key.public_key_der(); - let attestation = if quote_enabled { - Some(attest_keys(&ca_pubkey, &k256_pubkey).await?) - } else { - None - }; + let attestation = attest_keys(&ca_pubkey, &k256_pubkey).await?; let cfg = &self.state.config; let response = BootstrapResponse { ca_pubkey, k256_pubkey, - attestation: attestation.unwrap_or_default(), + attestation, }; // Store the bootstrap info safe_write(cfg.bootstrap_info(), serde_json::to_vec(&response)?)?; @@ -101,7 +94,6 @@ impl OnboardRpc for OnboardHandler { &self.state.config, &source_url, &request.domain, - self.state.config.onboard.quote_enabled, self.state.config.pccs_url.clone(), ) .await @@ -169,12 +161,12 @@ struct Keys { } impl Keys { - async fn generate(domain: &str, quote_enabled: bool) -> Result { + async fn generate(domain: &str) -> Result { let tmp_ca_key = KeyPair::generate_for(&PKCS_ECDSA_P256_SHA256)?; let ca_key = KeyPair::generate_for(&PKCS_ECDSA_P256_SHA256)?; let rpc_key = KeyPair::generate_for(&PKCS_ECDSA_P256_SHA256)?; let k256_key = SigningKey::random(&mut rand::rngs::OsRng); - Self::from_keys(tmp_ca_key, ca_key, rpc_key, k256_key, domain, quote_enabled).await + Self::from_keys(tmp_ca_key, ca_key, rpc_key, k256_key, domain).await } async fn from_keys( @@ -183,7 +175,6 @@ impl Keys { rpc_key: KeyPair, k256_key: SigningKey, domain: &str, - quote_enabled: bool, ) -> Result { let tmp_ca_cert = CertRequest::builder() .org_name("Dstack") @@ -201,25 +192,20 @@ impl Keys { .key(&ca_key) .build() .self_signed()?; - let attestation = if quote_enabled { - let pubkey = rpc_key.public_key_der(); - let report_data = QuoteContentType::RaTlsCert.to_report_data(&pubkey); - let response = app_attest(report_data.to_vec()) - .await - .context("Failed to get quote")?; - let attestation = VersionedAttestation::from_scale(&response.attestation) - .context("Invalid attestation")?; - Some(attestation) - } else { - None - }; + let pubkey = rpc_key.public_key_der(); + let report_data = QuoteContentType::RaTlsCert.to_report_data(&pubkey); + let response = app_attest(report_data.to_vec()) + .await + .context("Failed to get quote")?; + let attestation = VersionedAttestation::from_scale(&response.attestation) + .context("Invalid attestation")?; // Sign WWW server cert with KMS cert let rpc_cert = CertRequest::builder() .subject(domain) .alt_names(&[domain.to_string()]) .special_usage("kms:rpc") - .maybe_attestation(attestation.as_ref()) + .maybe_attestation(Some(&attestation)) .key(&rpc_key) .build() .signed_by(&ca_cert, &ca_key)?; @@ -239,54 +225,44 @@ impl Keys { cfg: &KmsConfig, other_kms_url: &str, domain: &str, - quote_enabled: bool, pccs_url: Option, ) -> Result { - let mut source_attestation_slot = None; - let mut kms_client = if quote_enabled { - let attestation_slot = Arc::new(Mutex::new(None::)); - let attestation_slot_out = attestation_slot.clone(); - let client = RaClientConfig::builder() - .tls_no_check(true) - .remote_uri(other_kms_url.to_string()) - .cert_validator(Box::new(move |info: Option| { - let Some(info) = info else { - bail!("Source KMS did not present a TLS certificate"); - }; - let Some(attestation) = info.attestation else { - bail!("Source KMS certificate does not contain attestation"); - }; - let mut slot = attestation_slot_out - .lock() - .map_err(|_| anyhow::anyhow!("source attestation mutex poisoned"))?; - *slot = Some(attestation); - Ok(()) - })) - .maybe_pccs_url(pccs_url.clone()) - .build() - .into_client()?; - source_attestation_slot = Some(attestation_slot); - KmsClient::new(client) - } else { - KmsClient::new(RaClient::new(other_kms_url.into(), true)?) - }; - - if quote_enabled { - let tmp_ca = kms_client.get_temp_ca_cert().await?; - let (ra_cert, ra_key) = gen_ra_cert(tmp_ca.temp_ca_cert, tmp_ca.temp_ca_key).await?; - let ra_client = RaClient::new_mtls(other_kms_url.into(), ra_cert, ra_key, pccs_url) - .context("Failed to create client")?; - kms_client = KmsClient::new(ra_client); - let source_attestation = source_attestation_slot - .context("source attestation slot missing")? - .lock() - .map_err(|_| anyhow::anyhow!("source attestation mutex poisoned"))? - .clone() - .context("Missing source KMS attestation")?; - ensure_remote_kms_allowed(cfg, &source_attestation) - .await - .context("Source KMS is not allowed for onboarding")?; - } + let attestation_slot = Arc::new(Mutex::new(None::)); + let attestation_slot_out = attestation_slot.clone(); + let client = RaClientConfig::builder() + .tls_no_check(true) + .remote_uri(other_kms_url.to_string()) + .cert_validator(Box::new(move |info: Option| { + let Some(info) = info else { + bail!("Source KMS did not present a TLS certificate"); + }; + let Some(attestation) = info.attestation else { + bail!("Source KMS certificate does not contain attestation"); + }; + let mut slot = attestation_slot_out + .lock() + .map_err(|_| anyhow::anyhow!("source attestation mutex poisoned"))?; + *slot = Some(attestation); + Ok(()) + })) + .maybe_pccs_url(pccs_url.clone()) + .build() + .into_client()?; + let mut kms_client = KmsClient::new(client); + + let tmp_ca = kms_client.get_temp_ca_cert().await?; + let (ra_cert, ra_key) = gen_ra_cert(tmp_ca.temp_ca_cert, tmp_ca.temp_ca_key).await?; + let ra_client = RaClient::new_mtls(other_kms_url.into(), ra_cert, ra_key, pccs_url) + .context("Failed to create client")?; + kms_client = KmsClient::new(ra_client); + let source_attestation = attestation_slot + .lock() + .map_err(|_| anyhow::anyhow!("source attestation mutex poisoned"))? + .clone() + .context("Missing source KMS attestation")?; + ensure_remote_kms_allowed(cfg, &source_attestation) + .await + .context("Source KMS is not allowed for onboarding")?; let info = dstack_client().info().await.context("Failed to get info")?; let keys_res = kms_client @@ -308,15 +284,7 @@ impl Keys { KeyPair::from_pem(&tmp_ca_key_pem).context("Failed to parse tmp CA key")?; let ecdsa_key = SigningKey::from_slice(&root_k256_key).context("Failed to parse ECDSA key")?; - Self::from_keys( - tmp_ca_key, - ca_key, - rpc_key, - ecdsa_key, - domain, - quote_enabled, - ) - .await + Self::from_keys(tmp_ca_key, ca_key, rpc_key, ecdsa_key, domain).await } fn store(&self, cfg: &KmsConfig) -> Result<()> { @@ -360,16 +328,9 @@ pub(crate) async fn update_certs(cfg: &KmsConfig) -> Result<()> { let domain = domain.trim(); // Regenerate certificates using existing keys - let keys = Keys::from_keys( - tmp_ca_key, - ca_key, - rpc_key, - k256_key, - domain, - cfg.onboard.quote_enabled, - ) - .await - .context("Failed to regenerate certificates")?; + let keys = Keys::from_keys(tmp_ca_key, ca_key, rpc_key, k256_key, domain) + .await + .context("Failed to regenerate certificates")?; // Write the new certificates to files keys.store_certs(cfg)?; @@ -378,17 +339,12 @@ pub(crate) async fn update_certs(cfg: &KmsConfig) -> Result<()> { } pub(crate) async fn bootstrap_keys(cfg: &KmsConfig) -> Result<()> { - if cfg.onboard.quote_enabled { - ensure_self_kms_allowed(cfg) - .await - .context("KMS is not allowed to auto-bootstrap")?; - } - let keys = Keys::generate( - &cfg.onboard.auto_bootstrap_domain, - cfg.onboard.quote_enabled, - ) - .await - .context("Failed to generate keys")?; + ensure_self_kms_allowed(cfg) + .await + .context("KMS is not allowed to auto-bootstrap")?; + let keys = Keys::generate(&cfg.onboard.auto_bootstrap_domain) + .await + .context("Failed to generate keys")?; keys.store(cfg)?; Ok(()) } diff --git a/tests/docs/kms-bootstrap-onboard.md b/tests/docs/kms-bootstrap-onboard.md new file mode 100644 index 000000000..f3b60f80a --- /dev/null +++ b/tests/docs/kms-bootstrap-onboard.md @@ -0,0 +1,456 @@ +# KMS Bootstrap / Onboard / Trusted RPC Manual Test Guide + +This document describes a manual, AI-executable integration flow for validating: + +1. KMS bootstrap +2. KMS onboard from an existing KMS +3. post-onboard trusted runtime RPCs + +It is intentionally written as a deployment runbook so an AI agent can execute it step by step on teepod / dstack-vmm without depending on `kms/e2e/`. + +--- + +## 1. Scope + +This guide covers the normal happy-path flow: + +1. deploy `kms-src` +2. bootstrap `kms-src` +3. finish `kms-src` +4. deploy `kms-dst` +5. onboard `kms-dst` from `kms-src` +6. finish `kms-dst` +7. probe trusted runtime RPCs on the running KMS + +It also includes a compact deny-case matrix for common service-rejection paths so a deployment run can validate both success and failure behavior in one pass. + +For a deeper authorization-focused runbook, also see: + +- `tests/docs/kms-self-authrization.md` + +--- + +## 2. Topology + +```text +Host / operator machine +├── auth-simple-src (policy for source KMS) +├── auth-simple-dst (policy for destination KMS) +├── kms-src (bootstrapped first) +└── kms-dst (onboarded from kms-src) +``` + +Both KMS instances are expected to run with attestation enabled. For local development without TDX hardware, use `sdk/simulator`. + +Policy reminder: + +- source-side auth must allow: + - `kms-src` itself + - `kms-dst` when it calls `GetKmsKey` during onboarding +- destination-side auth must allow: + - `kms-src` during onboarding + - `kms-dst` itself before you probe trusted runtime RPCs on `kms-dst` + +--- + +## 3. Prerequisites + +Before starting, make sure the following are available: + +1. a KMS image or branch containing the code under test +2. a working teepod / dstack-vmm target +3. routable HTTPS entrypoints for onboard and runtime RPC +4. `curl`, `jq`, Python 3, and `bun` +5. an auth service such as `kms/auth-simple`, or an equivalent webhook + +Recommended references: + +- `docs/tutorials/kms-cvm-deployment.md` +- `docs/tutorials/troubleshooting-kms-deployment.md` +- `kms/auth-simple/README.md` +- `tests/docs/kms-self-authrization.md` + +Operational notes: + +1. Prefer a **prebuilt KMS image**. +2. `Boot Progress: done` does **not** guarantee the onboard endpoint is ready. +3. The onboarding completion endpoint is **GET `/finish`**. +4. On teepod, onboard mode usually uses the `-8000` URL, while runtime TLS KMS RPC usually uses the `-8000s` URL. +5. If you use a very small custom webhook instead of the real auth service, `KMS.GetMeta` may fail because `auth_api.get_info()` expects extra chain / contract metadata fields. In that case, use `GetTempCaCert` as the runtime readiness probe. + +--- + +## 4. Shared setup + +### 4.1 Create a workspace + +```bash +export REPO_ROOT="$(git rev-parse --show-toplevel)" +mkdir -p /tmp/kms-bootstrap-onboard +cd /tmp/kms-bootstrap-onboard +``` + +### 4.2 Prepare auth services + +Use two independently controllable auth services: + +- one for `kms-src` +- one for `kms-dst` + +They can be: + +1. host-local if reachable by CVMs +2. public services +3. sidecars inside each KMS deployment + +At minimum, both policies must allow the KMS instance they serve. During onboard, source-side policy must also allow the destination KMS caller. + +For `auth-simple`, `kms.mrAggregated = []` is a deny-all policy for KMS. Add the current KMS MR values explicitly when switching a test from deny to allow. + +### 4.3 Deploy `kms-src` and `kms-dst` + +Deploy both KMS instances in onboard mode with: + +- `core.onboard.enabled = true` +- `core.onboard.auto_bootstrap_domain = ""` +- `core.auth_api.type = "webhook"` + +Record: + +```bash +export KMS_SRC_ONBOARD='https:///' +export KMS_DST_ONBOARD='https:///' +``` + +Wait until the onboard endpoints actually respond: + +```bash +until curl -sk -X POST "${KMS_SRC_ONBOARD%/}/prpc/Onboard.GetAttestationInfo?json" \ + -H 'Content-Type: application/json' -d '{}' >/dev/null 2>&1; do + echo "waiting for kms-src onboard endpoint..." + sleep 10 +done + +until curl -sk -X POST "${KMS_DST_ONBOARD%/}/prpc/Onboard.GetAttestationInfo?json" \ + -H 'Content-Type: application/json' -d '{}' >/dev/null 2>&1; do + echo "waiting for kms-dst onboard endpoint..." + sleep 10 +done +``` + +Capture initial attestation info: + +```bash +curl -sk -X POST "${KMS_SRC_ONBOARD%/}/prpc/Onboard.GetAttestationInfo?json" \ + -H 'Content-Type: application/json' -d '{}' \ + | tee /tmp/kms-bootstrap-onboard/kms-src-att.json | jq . + +curl -sk -X POST "${KMS_DST_ONBOARD%/}/prpc/Onboard.GetAttestationInfo?json" \ + -H 'Content-Type: application/json' -d '{}' \ + | tee /tmp/kms-bootstrap-onboard/kms-dst-att.json | jq . +``` + +--- + +## 5. Bootstrap `kms-src` + +### 5.1 Call bootstrap + +```bash +curl -sk -X POST "${KMS_SRC_ONBOARD%/}/prpc/Onboard.Bootstrap?json" \ + -H 'Content-Type: application/json' \ + -d '{"domain":"kms-src.example.test"}' \ + | tee /tmp/kms-bootstrap-onboard/kms-src-bootstrap.json | jq . +``` + +### Expected result + +- response contains: + - `ca_pubkey` + - `k256_pubkey` + - `attestation` +- no `.error` + +### 5.2 Finish onboard mode + +```bash +curl -sk "${KMS_SRC_ONBOARD%/}/finish" \ + | tee /tmp/kms-bootstrap-onboard/kms-src-finish.txt +``` + +### 5.3 Record runtime endpoint + +```bash +export KMS_SRC_RUNTIME='https://' +``` + +On teepod, this is typically the `-8000s` style URL. + +### 5.4 Probe runtime metadata + +```bash +curl -sk "${KMS_SRC_RUNTIME%/}/prpc/KMS.GetMeta?json" \ + | tee /tmp/kms-bootstrap-onboard/kms-src-meta.json | jq . +``` + +### Expected result + +- `KMS.GetMeta` succeeds when the configured auth service implements `auth_api.get_info()`-compatible fields +- returned metadata includes: + - `ca_cert` + - `k256_pubkey` + - `bootstrap_info` + +If `KMS.GetMeta` fails because your minimal webhook does not return chain / contract info, use `GetTempCaCert` below as the runtime readiness probe instead. + +--- + +## 6. Onboard `kms-dst` from `kms-src` + +Before this step: + +- destination-side auth must allow `kms-src` +- source-side auth must allow `kms-dst` to call `GetKmsKey` +- if you plan to probe trusted runtime RPCs on `kms-dst` immediately after onboard, destination-side auth must also allow `kms-dst` itself + +### 6.1 Call onboard + +```bash +curl -sk -X POST "${KMS_DST_ONBOARD%/}/prpc/Onboard.Onboard?json" \ + -H 'Content-Type: application/json' \ + -d "{\"source_url\":\"${KMS_SRC_RUNTIME%/}/prpc\",\"domain\":\"kms-dst.example.test\"}" \ + | tee /tmp/kms-bootstrap-onboard/kms-dst-onboard.json | jq . +``` + +### Expected result + +- response is `{}` or otherwise empty success +- no `.error` + +### 6.2 Finish onboard mode + +```bash +curl -sk "${KMS_DST_ONBOARD%/}/finish" \ + | tee /tmp/kms-bootstrap-onboard/kms-dst-finish.txt +``` + +### 6.3 Record runtime endpoint + +```bash +export KMS_DST_RUNTIME='https://' +``` + +Again, on teepod this is usually the `-8000s` style URL. + +### 6.4 Probe runtime metadata + +```bash +curl -sk "${KMS_DST_RUNTIME%/}/prpc/KMS.GetMeta?json" \ + | tee /tmp/kms-bootstrap-onboard/kms-dst-meta.json | jq . +``` + +### Expected result + +- `KMS.GetMeta` succeeds when the configured auth service implements `auth_api.get_info()`-compatible fields +- `kms-dst` now serves as a normal runtime KMS + +If `KMS.GetMeta` fails because your minimal webhook does not return chain / contract info, continue with the trusted RPC probes below. Those are the better canary for this manual flow. + +--- + +## 7. Trusted runtime RPC checks + +This section folds the runtime trusted-RPC verification into the same flow. + +### Deny-case matrix + +| Case | Policy change | Expected failure point | Typical error shape | +| --- | --- | --- | --- | +| bootstrap deny | source-side auth leaves `kms.mrAggregated` empty or omits the current `kms-src` MR | `Onboard.Bootstrap` on `kms-src` | `KMS is not allowed to bootstrap`, `MR aggregated not allowed` | +| onboard deny (receiver-side) | destination-side auth leaves `kms.mrAggregated` empty or omits the current `kms-src` MR | `Onboard.Onboard` on `kms-dst` | source KMS not allowed / onboarding failed | +| onboard deny (source-side) | source-side auth leaves `kms.mrAggregated` empty or omits the current `kms-dst` MR | `Onboard.Onboard` on `kms-dst` | source rejected destination caller / `GetKmsKey` authorization failed | +| runtime deny | auth removes the running KMS from `kms.mrAggregated` | `GetTempCaCert` or another trusted RPC | `KMS self authorization failed`, `KMS is not allowed` | + +Use the happy-path steps below first, then flip policies one by one and rerun the indicated probe. + +### 7.1 Minimum canary: `GetTempCaCert` + +```bash +curl -sk "${KMS_SRC_RUNTIME%/}/prpc/KMS.GetTempCaCert?json" \ + | tee /tmp/kms-bootstrap-onboard/kms-src-get-temp-ca.json | jq . +``` + +Expected result: + +- success +- response contains: + - `temp_ca_cert` + - `temp_ca_key` + - `ca_cert` + +### 7.2 `GetKmsKey` + +This RPC is normally exercised by onboard itself, but you can also treat a successful onboard as proof that: + +- source KMS accepted the destination KMS as an attested caller +- source KMS returned its shared keys + +If you want a standalone explicit probe, use an attested KMS client path and call: + +```text +KMS.GetKmsKey +``` + +Expected result: + +- succeeds only for an attested / authorized KMS caller + +### 7.3 `GetAppKey` + +This requires an attested app caller plus valid `vm_config`. + +Expected result: + +- success for an attested and authorized app caller +- returned fields should include app key material and `gateway_app_id` + +### 7.4 `SignCert` + +This requires a valid CSR plus verified attestation. + +Expected result: + +- success for a valid attested app CSR +- returned `certificate_chain` is non-empty + +### 7.5 Optional regression check + +After a normal happy-path run, flip source-side auth policy to deny `kms-src` itself and retry: + +```bash +curl -sk "${KMS_SRC_RUNTIME%/}/prpc/KMS.GetTempCaCert?json" \ + | tee /tmp/kms-bootstrap-onboard/kms-src-get-temp-ca-after-deny.json | jq . +``` + +Expected result: + +- trusted runtime RPCs fail after the KMS is no longer authorized + +This overlaps with `kms-self-authrization.md`, but is useful as a quick post-deploy sanity check. + +### 7.6 Recommended deny-case checks + +To make this flow more robust, add these negative checks to the same run and save each failure response as evidence. + +#### A. Bootstrap deny + +Before the successful bootstrap run, configure source-side auth so that `kms-src` is not allowlisted by MR (for example, leave `kms.mrAggregated` empty), then call: + +```bash +curl -sk -X POST "${KMS_SRC_ONBOARD%/}/prpc/Onboard.Bootstrap?json" \ + -H 'Content-Type: application/json' \ + -d '{"domain":"kms-src.example.test"}' \ + | tee /tmp/kms-bootstrap-onboard/kms-src-bootstrap-denied.json | jq . +``` + +Expected result: + +- response contains `.error` +- error indicates the KMS itself is not allowed to bootstrap + +Then allowlist `kms-src` and rerun the normal bootstrap flow. + +#### B1. Onboard deny at the receiver side + +Before the successful onboard run, make destination-side policy leave `kms-src` out of `kms.mrAggregated` (for example, keep it empty), then call: + +```bash +curl -sk -X POST "${KMS_DST_ONBOARD%/}/prpc/Onboard.Onboard?json" \ + -H 'Content-Type: application/json' \ + -d "{\"source_url\":\"${KMS_SRC_RUNTIME%/}/prpc\",\"domain\":\"kms-dst.example.test\"}" \ + | tee /tmp/kms-bootstrap-onboard/kms-dst-onboard-denied.json | jq . +``` + +Expected result: + +- response contains `.error` +- the error indicates the receiver refused the source KMS, source authorization failed, or onboarding failed before keys were accepted + +Then restore destination-side allowlists. + +#### B2. Onboard deny at the source side + +Make source-side policy leave `kms-dst` out of `kms.mrAggregated`, then call the same onboard request again: + +```bash +curl -sk -X POST "${KMS_DST_ONBOARD%/}/prpc/Onboard.Onboard?json" \ + -H 'Content-Type: application/json' \ + -d "{\"source_url\":\"${KMS_SRC_RUNTIME%/}/prpc\",\"domain\":\"kms-dst.example.test\"}" \ + | tee /tmp/kms-bootstrap-onboard/kms-dst-onboard-denied-by-src.json | jq . +``` + +Expected result: + +- response contains `.error` +- the error indicates the source KMS rejected the destination KMS caller, or `GetKmsKey` authorization failed + +Then restore both source-side and destination-side allowlists and rerun the normal onboard flow. + +#### C. Trusted RPC deny + +After a successful bootstrap or onboard, remove the running KMS's own MR from `kms.mrAggregated` and retry: + +```bash +curl -sk "${KMS_SRC_RUNTIME%/}/prpc/KMS.GetTempCaCert?json" \ + | tee /tmp/kms-bootstrap-onboard/kms-src-get-temp-ca-denied.json | jq . +``` + +Expected result: + +- response contains `.error` +- error indicates KMS self authorization failed or the KMS is not allowed + +You can repeat the same check on `kms-dst` after onboard by removing `kms-dst` from destination-side policy and retrying `KMS.GetTempCaCert`. + +--- + +## 8. Evidence to capture + +For each run, save: + +1. `Onboard.GetAttestationInfo` output for both KMS instances +2. bootstrap response +3. onboard response +4. `/finish` responses +5. runtime `KMS.GetMeta` responses +6. trusted RPC responses such as `GetTempCaCert` +7. deny-case responses such as `kms-src-bootstrap-denied.json`, `kms-dst-onboard-denied.json`, `kms-dst-onboard-denied-by-src.json`, and `kms-src-get-temp-ca-denied.json` +8. auth policy snapshots used during the run + +Recommended archive: + +```bash +tar czf /tmp/kms-bootstrap-onboard-results.tar.gz /tmp/kms-bootstrap-onboard +``` + +--- + +## 9. Success criteria summary + +The flow is considered validated if all of the following are true: + +1. `kms-src` bootstrap succeeds +2. `kms-src` transitions to runtime mode successfully +3. `kms-dst` onboard succeeds against `kms-src` +4. `kms-dst` transitions to runtime mode successfully +5. runtime metadata probes succeed on both KMS instances, or `GetTempCaCert` succeeds when `GetMeta` is unavailable with a minimal webhook +6. at least one trusted runtime RPC such as `GetTempCaCert` succeeds +7. the selected deny cases fail at the expected RPC with an authorization error + +--- + +## 10. Cleanup + +Remove the test CVMs using your normal teepod / `vmm-cli.py remove` flow. + +If you ran host-local auth services, stop them as well. diff --git a/tests/docs/kms-self-authrization.md b/tests/docs/kms-self-authrization.md index eaaa85d95..454484631 100644 --- a/tests/docs/kms-self-authrization.md +++ b/tests/docs/kms-self-authrization.md @@ -1,13 +1,13 @@ # KMS Self-Authorization Manual Integration Test Guide -This document describes a manual, AI-executable integration test flow for the KMS self-authorization changes introduced in PR #573. +This document describes a manual, AI-executable integration test flow for KMS self-authorization and quote-required KMS behavior. The goal is to validate the following behaviors without depending on `kms/e2e/` from PR #538: -1. **Bootstrap self-check**: a KMS with `quote_enabled = true` must call the auth API and verify that **itself** is allowed before bootstrap succeeds. -2. **Onboard receiver-side source check**: a new KMS with `quote_enabled = true` must reject onboarding if the **source KMS** is not allowed by the receiver's auth policy. +1. **Bootstrap self-check**: a KMS must call the auth API and verify that **itself** is allowed before bootstrap succeeds. +2. **Onboard receiver-side source check**: a new KMS must reject onboarding if the **source KMS** is not allowed by the receiver's auth policy. 3. **Trusted RPC self-check**: trusted KMS RPCs such as `GetTempCaCert`, `GetKmsKey`, `GetAppKey`, and `SignCert` must fail when the running KMS is no longer allowed by its auth policy. -4. **Compatibility**: when `quote_enabled = false`, the new bootstrap/onboard self-authorization checks should be skipped. +4. **Attestation requirement**: KMS always requires attestation; for local development without TDX hardware, use `sdk/simulator`. This guide is written as a deployment-and-test runbook so an AI agent can follow it end-to-end. @@ -20,10 +20,9 @@ This guide is written as a deployment-and-test runbook so an AI agent can follow > 3. `Boot Progress: done` only means the VM guest boot finished. It does **not** guarantee the KMS onboard endpoint is already ready. > 4. If you inject helper scripts through `docker-compose.yaml`, prefer inline `configs.content` over `configs.file` unless you have confirmed the extra files are copied into the deployment bundle. > 5. The onboard completion endpoint is **GET `/finish`**, not POST. -> 6. Do **not** reuse a previously captured `mr_aggregated` across redeploys. In practice, the measured value changed across fresh `kms-noquote` redeploys, so auth policies must be generated from the attestation of the **current** VM under test. -> 7. With `quote_enabled = false`, `Onboard.Bootstrap` skipped the new auth check as expected and returned an empty `attestation` field. -> 8. With `quote_enabled = false`, runtime trusted RPC self-checks were also skipped: `KMS.GetTempCaCert` still succeeded under a deny policy. -> 9. End-to-end onboard into a `quote_enabled = false` receiver did **not** complete against a quoted source KMS. The new receiver-side source check was skipped, but the flow later failed on the existing source-side `GetKmsKey` requirement with `No attestation provided`. +> 6. Do **not** reuse a previously captured `mr_aggregated` across redeploys. Auth policies must be generated from the attestation of the **current** VM under test. +> 7. KMS now always requires quote/attestation. For local development without TDX hardware, use `sdk/simulator` instead of trying to run a no-attestation KMS flow. +> 8. For `auth-simple`, `kms.mrAggregated = []` is a deny-all policy for KMS. Use that as the baseline deny configuration, then add the measured KMS MR values for allow cases. --- @@ -38,7 +37,7 @@ This guide is written as a deployment-and-test runbook so an AI agent can follow 7. [Test case 2: bootstrap succeeds after self is whitelisted](#7-test-case-2-bootstrap-succeeds-after-self-is-whitelisted) 8. [Test case 3: receiver rejects onboarding from a denied source KMS](#8-test-case-3-receiver-rejects-onboarding-from-a-denied-source-kms) 9. [Test case 4: trusted RPCs fail when the running KMS is no longer allowed](#9-test-case-4-trusted-rpcs-fail-when-the-running-kms-is-no-longer-allowed) -10. [Test case 5: `quote_enabled = false` remains compatible](#10-test-case-5-quote_enabled--false-remains-compatible) +10. [Test case 5: local development should use the simulator](#10-test-case-5-local-development-should-use-the-simulator) 11. [Evidence to capture](#11-evidence-to-capture) 12. [Cleanup](#12-cleanup) @@ -58,7 +57,7 @@ This keeps the test independent from PR #538 while still exercising real deploym ## 2. Test strategy -Use **real KMS CVMs** with `quote_enabled = true` and a hot-reloadable `auth-simple` policy. +Use **real KMS CVMs** with a hot-reloadable `auth-simple` policy. Why `auth-simple`: @@ -80,7 +79,6 @@ Host / operator machine ├── auth-simple-dst (target KMS auth policy) ├── kms-src (bootstrapped, later used as source KMS) ├── kms-dst (fresh KMS used for onboard tests) -└── optional kms-noquote (fresh KMS with quote_enabled = false) ``` Policy responsibilities: @@ -194,6 +192,8 @@ cat > /tmp/kms-self-auth/auth-dst.json <<'EOF' EOF ``` +These placeholder configs intentionally deny all KMS boots until you populate `kms.mrAggregated` with the measured source or destination KMS values for the current run. + Start the services: ```bash @@ -222,7 +222,6 @@ Requirements for **both** VMs: - `core.onboard.enabled = true` - `core.onboard.auto_bootstrap_domain = ""` -- `core.onboard.quote_enabled = true` - `core.auth_api.type = "webhook"` Point them at different auth services or sidecars: @@ -392,7 +391,7 @@ cp /tmp/kms-self-auth/auth-dst-allow-src.json /tmp/kms-self-auth/auth-dst.json ### Purpose -Verify that a KMS with `quote_enabled = true` refuses bootstrap if the auth API denies **its own** measurements. +Verify that a KMS refuses bootstrap if the auth API denies **its own** measurements. ### Steps @@ -619,85 +618,31 @@ The important part is that the running KMS must not rely only on bootstrap-time --- -## 10. Test case 5: `quote_enabled = false` remains compatible +## 10. Test case 5: local development should use the simulator ### Purpose -Verify that the new checks are skipped when `quote_enabled = false`. +KMS now always requires attestation. For local development without TDX hardware, use `sdk/simulator` so bootstrap, onboard, and trusted RPC flows still exercise the quoted path. ### Suggested minimal coverage -Deploy an extra KMS named `kms-noquote` with: - -```toml -[core.onboard] -enabled = true -auto_bootstrap_domain = "" -quote_enabled = false -``` - -Point it to an auth policy that would otherwise deny it. - -### Check A: bootstrap compatibility - -1. Deploy `kms-noquote` with a deny policy. -2. Call: - -```bash -curl -sf -X POST "${KMS_NOQUOTE_ONBOARD%/}/prpc/Onboard.Bootstrap?json" \ - -H 'Content-Type: application/json' \ - -d '{"domain":"kms-noquote.example.test"}' \ - | tee /tmp/kms-self-auth/bootstrap-noquote.json | jq . -``` - -### Expected result - -- bootstrap succeeds even though the auth policy would deny a quoted KMS -- the response's `attestation` field is empty - -### Optional runtime compatibility check - -After bootstrap and `GET /finish`, probe a trusted RPC while the auth policy still denies the KMS: +1. Start the simulator: ```bash -curl -sk "${KMS_NOQUOTE_RUNTIME%/}/prpc/KMS.GetTempCaCert?json" \ - | tee /tmp/kms-self-auth/get-temp-ca-noquote-deny.json | jq . +cd dstack/sdk/simulator +./build.sh +./dstack-simulator ``` -Expected result: - -- `GetTempCaCert` still succeeds, because the new runtime self-check is skipped when `quote_enabled = false` - -### Check B: noquote receiver still cannot onboard from a quoted source - -If you want to test the onboard path too: - -1. keep `kms-src` allowed on the source side -2. deploy `kms-noquote` as a fresh onboarding target -3. keep the receiver-side policy in deny mode -4. call `Onboard.Onboard` - -Expected result: +2. Point the guest agent client at the simulator endpoint as documented in the SDK README. +3. Run KMS locally against the simulator-backed guest agent. +4. Verify bootstrap and trusted RPCs still produce attestation-backed behavior. -- the new receiver-side source authorization check is skipped -- but end-to-end onboarding still fails later with a source-side error similar to: - -```json -{ - "error": "Failed to onboard: Request failed with status=400 Bad Request, error={\"error\":\"No attestation provided\"}" -} -``` - -Reason: +### Expected result -- this failure is **not** from the new receiver-side check added in PR #573 -- it comes from the existing source-side `GetKmsKey` path, which still expects attestation from the onboarding target -- therefore this failure is **correct** when the target KMS has `quote_enabled = false` but the source KMS still requires attested callers -- so `quote_enabled = false` compatibility is intentionally limited to: - - bootstrap - - skipping the new receiver-side source check - - skipping the new runtime self-check -- it does **not** mean end-to-end noquote onboarding into a quoted source KMS should succeed +- local development still uses the same quote-required logic +- there is no separate no-quote KMS mode to validate anymore +- simulator-backed development should be treated as the replacement for the old noquote/dev workflow --- @@ -737,9 +682,8 @@ Then remove test CVMs using your normal `vmm-cli.py remove` or teepod cleanup fl The change is considered validated if all of the following are true: -1. bootstrap fails under deny policy when `quote_enabled = true` +1. bootstrap fails under deny policy 2. bootstrap succeeds after self allowlisting 3. onboarding rejects a denied source KMS on the receiver side 4. runtime trusted RPCs stop working after the source KMS is removed from the allowlist -5. with `quote_enabled = false`, bootstrap and runtime trusted RPCs skip the new checks -6. with `quote_enabled = false`, receiver-side onboarding does not fail on the **new** source-authorization check, but it still correctly fails against a quoted source KMS that requires attested callers +5. local development without TDX hardware is expected to use `sdk/simulator` rather than a no-quote KMS mode