diff --git a/Cargo.lock b/Cargo.lock
index 7d475254..f3014d37 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2391,6 +2391,40 @@ dependencies = [
"tracing",
]
+[[package]]
+name = "dstack-auth"
+version = "0.5.11"
+dependencies = [
+ "anyhow",
+ "clap",
+ "rocket",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "dstack-cli"
+version = "0.5.11"
+dependencies = [
+ "anyhow",
+ "clap",
+ "dstack-cli-core",
+ "serde_json",
+ "tokio",
+]
+
+[[package]]
+name = "dstack-cli-core"
+version = "0.5.11"
+dependencies = [
+ "anyhow",
+ "dstack-vmm-rpc",
+ "http-client",
+ "rustix 0.38.44",
+ "serde_json",
+ "toml",
+]
+
[[package]]
name = "dstack-gateway"
version = "0.5.11"
@@ -2861,6 +2895,21 @@ dependencies = [
"serde_json",
]
+[[package]]
+name = "dstackup"
+version = "0.5.11"
+dependencies = [
+ "anyhow",
+ "clap",
+ "dstack-cli-core",
+ "hex",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "sha2 0.10.9",
+ "tokio",
+]
+
[[package]]
name = "dunce"
version = "1.0.5"
diff --git a/Cargo.toml b/Cargo.toml
index 90bf4e9e..b873a680 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -63,6 +63,10 @@ members = [
"sdk/rust",
"sdk/rust/types",
"no_std_check",
+ "crates/dstack-cli-core",
+ "crates/dstack-cli",
+ "crates/dstackup",
+ "crates/dstack-auth",
]
resolver = "2"
@@ -75,6 +79,7 @@ dstack-gateway-rpc = { path = "gateway/rpc" }
dstack-kms-rpc = { path = "kms/rpc" }
dstack-guest-agent-rpc = { path = "guest-agent/rpc" }
dstack-vmm-rpc = { path = "vmm/rpc" }
+dstack-cli-core = { path = "crates/dstack-cli-core" }
dstack-port-forward = { path = "port-forward" }
cc-eventlog = { path = "cc-eventlog" }
supervisor = { path = "supervisor" }
diff --git a/README.md b/README.md
index b0bb0dfe..09c2b806 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,7 @@ AI providers ask users to trust them with sensitive data. But trust doesn't scal
| Platform | Status | Attestation |
|----------|--------|-------------|
| **Bare metal TDX** | Available | TDX |
+| **Bare metal AMD SEV-SNP** | Host support; requires an SNP-capable guest image | SEV-SNP |
| **[Phala Cloud](https://cloud.phala.network)** | Available | TDX |
| **GCP Confidential VMs** | Available | TDX + TPM |
| **AWS Nitro Enclaves** | Available | NSM |
@@ -66,15 +67,15 @@ services:
- "8000:8000"
```
-Deploy to any Intel TDX host using a guest OS image from [meta-dstack releases](https://github.com/Dstack-TEE/meta-dstack/releases), or use [Phala Cloud](https://cloud.phala.network) for managed infrastructure.
+Deploy to a self-hosted TDX machine with the `dstackup install` -> `dstack deploy` workflow, or use [Phala Cloud](https://cloud.phala.network) for managed infrastructure. AMD SEV-SNP hosts use the same workflow when the selected guest image includes `digest.sev.txt`.
-Setting up dstack on your own hardware? See the [full deployment guide →](./docs/deployment.md)
+Setting up dstack on your own hardware? Start with the [self-hosted quick onboarding guide](./docs/onboarding.md)
## Architecture

-Your container runs inside a Confidential VM (Intel TDX) with optional GPU isolation via NVIDIA Confidential Computing. The CPU TEE protects application logic; the GPU TEE protects model weights and inference data.
+Your container runs inside a Confidential VM, such as Intel TDX or AMD SEV-SNP, with optional GPU isolation via NVIDIA Confidential Computing. The CPU TEE protects application logic; the GPU TEE protects model weights and inference data.
**Core components:**
@@ -107,6 +108,8 @@ Apps communicate with the guest agent via HTTP over `/var/run/dstack.sock`. Use
- [Verification](./docs/verification.md) - How to verify TEE attestation
**For Operators**
+- [Hardware Enablement](./docs/hardware-enablement.md) - Prepare a TDX or AMD SEV-SNP host
+- [Self-hosted Quick Onboarding](./docs/onboarding.md) - First app on one host
- [Deployment](./docs/deployment.md) - Self-hosting on TDX hardware
- [On-Chain Governance](./docs/onchain-governance.md) - Smart contract authorization
- [Gateway](./docs/dstack-gateway.md) - Gateway configuration
@@ -174,7 +177,7 @@ Yes. dstack runs on any Intel TDX-capable server. See the [deployment guide](./d
What TEE hardware is supported?
-Currently: Intel TDX (4th/5th Gen Xeon) and NVIDIA Confidential Computing (H100, Blackwell). AMD SEV-SNP support is planned.
+Currently: Intel TDX, AMD SEV-SNP, AWS Nitro Enclaves, GCP Confidential VMs, and NVIDIA Confidential Computing GPUs (H100, Blackwell).
diff --git a/crates/dstack-auth/Cargo.toml b/crates/dstack-auth/Cargo.toml
new file mode 100644
index 00000000..d9fb1879
--- /dev/null
+++ b/crates/dstack-auth/Cargo.toml
@@ -0,0 +1,20 @@
+# SPDX-FileCopyrightText: © 2026 Phala Network
+#
+# SPDX-License-Identifier: Apache-2.0
+
+[package]
+name = "dstack-auth"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[[bin]]
+name = "dstack-auth"
+path = "src/main.rs"
+
+[dependencies]
+anyhow.workspace = true
+clap.workspace = true
+rocket = { workspace = true, features = ["json"] }
+serde.workspace = true
+serde_json.workspace = true
diff --git a/crates/dstack-auth/src/main.rs b/crates/dstack-auth/src/main.rs
new file mode 100644
index 00000000..217d9294
--- /dev/null
+++ b/crates/dstack-auth/src/main.rs
@@ -0,0 +1,320 @@
+// SPDX-FileCopyrightText: © 2026 Phala Network
+//
+// SPDX-License-Identifier: Apache-2.0
+
+//! `dstack-auth` — the single-operator KMS auth webhook (Rust reimplementation
+//! of `auth-simple`).
+//!
+//! Runs on the host as `dstack-auth.service`; the KMS-in-CVM reaches it at
+//! `http://10.0.2.2:` under user-mode networking and POSTs `BootInfo` to
+//! `/bootAuth/app` (compose-hash allowlist) and `/bootAuth/kms` (mrAggregated
+//! allowlist). The allowlist JSON is re-read on every request, so `dstack run`
+//! can add an app without a restart. Fails closed: a missing/invalid allowlist
+//! denies everything.
+//!
+//! Deliberate single-node deviation from `auth-simple`: it does NOT enforce
+//! `tcbStatus == UpToDate`. Real TDX hosts routinely report a non-`UpToDate`
+//! TCB (microcode / TDX-module behind), and in the single-node model the
+//! operator already controls and trusts their own host, so a hard TCB gate
+//! would be friction without a corresponding trust gain here. Re-add the check
+//! (capture `tcbStatus`, deny unless `UpToDate`) if this grows into a
+//! multi-tenant / hosted deployment.
+
+use anyhow::Result;
+use clap::Parser;
+use rocket::serde::json::Json;
+use rocket::{get, post, routes, State};
+use serde::{Deserialize, Serialize};
+use serde_json::json;
+use std::collections::HashMap;
+use std::path::PathBuf;
+
+#[derive(Parser, Clone)]
+#[command(
+ name = "dstack-auth",
+ version,
+ about = "single-operator KMS auth webhook"
+)]
+struct Cli {
+ /// path to the allowlist JSON (re-read on every request).
+ #[arg(long, default_value = "/var/lib/dstack/auth-allowlist.json")]
+ config: PathBuf,
+ /// bind address. Defaults to loopback (reachable from CVMs at 10.0.2.2 via
+ /// user-mode networking, and not exposed externally).
+ #[arg(long, default_value = "127.0.0.1")]
+ address: String,
+ /// bind port.
+ #[arg(long, default_value_t = 8001)]
+ port: u16,
+}
+
+/// boot info the KMS sends (camelCase; byte fields are hex strings). Only the
+/// fields the allowlist checks are captured; the rest are ignored.
+#[derive(Deserialize, Default)]
+#[serde(rename_all = "camelCase", default)]
+struct BootInfo {
+ mr_aggregated: String,
+ os_image_hash: String,
+ app_id: String,
+ compose_hash: String,
+ device_id: String,
+}
+
+#[derive(Serialize)]
+#[serde(rename_all = "camelCase")]
+struct BootResponse {
+ is_allowed: bool,
+ gateway_app_id: String,
+ reason: String,
+}
+
+#[derive(Deserialize, Default)]
+#[serde(rename_all = "camelCase", default)]
+struct Allowlist {
+ os_images: Vec,
+ gateway_app_id: String,
+ kms: KmsRules,
+ apps: HashMap,
+}
+
+#[derive(Deserialize, Default)]
+#[serde(rename_all = "camelCase", default)]
+struct KmsRules {
+ mr_aggregated: Vec,
+ devices: Vec,
+ allow_any_device: bool,
+}
+
+#[derive(Deserialize, Default)]
+#[serde(rename_all = "camelCase", default)]
+struct AppRules {
+ compose_hashes: Vec,
+ devices: Vec,
+ allow_any_device: bool,
+}
+
+/// normalize a hex string for comparison: trim, drop a `0x`/`0X` prefix,
+/// lowercase. MUST stay in sync with `dstack-cli-core::config::norm_hex` — both
+/// `dstack run` (writing the allowlist) and this webhook (reading it) must
+/// agree on the canonical form, or apps are silently denied.
+fn norm(s: &str) -> String {
+ let s = s.trim();
+ let s = s
+ .strip_prefix("0x")
+ .or_else(|| s.strip_prefix("0X"))
+ .unwrap_or(s);
+ s.to_lowercase()
+}
+
+fn contains(list: &[String], value: &str) -> bool {
+ let v = norm(value);
+ list.iter().any(|x| norm(x) == v)
+}
+
+/// matches auth-simple: an empty `devices` list means "any device" even when
+/// `allowAnyDevice` is false (it only enforces a non-empty list).
+fn device_ok(allow_any: bool, devices: &[String], device_id: &str) -> bool {
+ allow_any || devices.is_empty() || contains(devices, device_id)
+}
+
+fn deny(al: &Allowlist, reason: &str) -> BootResponse {
+ BootResponse {
+ is_allowed: false,
+ gateway_app_id: al.gateway_app_id.clone(),
+ reason: reason.to_string(),
+ }
+}
+
+fn allow(al: &Allowlist) -> BootResponse {
+ BootResponse {
+ is_allowed: true,
+ gateway_app_id: al.gateway_app_id.clone(),
+ reason: "ok".to_string(),
+ }
+}
+
+fn check_app(info: &BootInfo, al: &Allowlist) -> BootResponse {
+ if !al.os_images.is_empty() && !contains(&al.os_images, &info.os_image_hash) {
+ return deny(al, "os image not allowed");
+ }
+ let app_id = norm(&info.app_id);
+ let Some(app) = al
+ .apps
+ .iter()
+ .find(|(k, _)| norm(k) == app_id)
+ .map(|(_, v)| v)
+ else {
+ return deny(al, "app not registered");
+ };
+ if !contains(&app.compose_hashes, &info.compose_hash) {
+ return deny(al, "compose hash not allowed");
+ }
+ if !device_ok(app.allow_any_device, &app.devices, &info.device_id) {
+ return deny(al, "device not allowed");
+ }
+ allow(al)
+}
+
+fn check_kms(info: &BootInfo, al: &Allowlist) -> BootResponse {
+ if !contains(&al.kms.mr_aggregated, &info.mr_aggregated) {
+ return deny(al, "kms mrAggregated not allowed");
+ }
+ if !device_ok(al.kms.allow_any_device, &al.kms.devices, &info.device_id) {
+ return deny(al, "device not allowed");
+ }
+ allow(al)
+}
+
+/// load the allowlist, failing closed (deny-all) if it's missing or invalid.
+fn load(path: &PathBuf) -> Allowlist {
+ match std::fs::read_to_string(path) {
+ Ok(body) => serde_json::from_str(&body).unwrap_or_else(|e| {
+ rocket::warn!("allowlist {} is invalid: {e}; denying all", path.display());
+ Allowlist::default()
+ }),
+ Err(e) => {
+ rocket::warn!("allowlist {} unreadable: {e}; denying all", path.display());
+ Allowlist::default()
+ }
+ }
+}
+
+#[post("/bootAuth/app", data = "")]
+fn boot_app(info: Json, cli: &State) -> Json {
+ let r = check_app(&info, &load(&cli.config));
+ rocket::info!(
+ "bootAuth/app app={} compose={} -> allowed={} ({})",
+ norm(&info.app_id),
+ norm(&info.compose_hash),
+ r.is_allowed,
+ r.reason
+ );
+ Json(r)
+}
+
+#[post("/bootAuth/kms", data = "")]
+fn boot_kms(info: Json, cli: &State) -> Json {
+ let r = check_kms(&info, &load(&cli.config));
+ rocket::info!(
+ "bootAuth/kms mr={} -> allowed={} ({})",
+ norm(&info.mr_aggregated),
+ r.is_allowed,
+ r.reason
+ );
+ Json(r)
+}
+
+/// info endpoint the KMS GETs to populate its metadata. Single-node: no chain.
+#[get("/")]
+fn info() -> Json {
+ Json(json!({
+ "status": "ok",
+ "kmsContractAddr": "",
+ "ethRpcUrl": "",
+ "gatewayAppId": "",
+ "chainId": 0,
+ "appImplementation": ""
+ }))
+}
+
+#[rocket::main]
+async fn main() -> Result<()> {
+ let cli = Cli::parse();
+ let figment = rocket::Config::figment()
+ .merge(("address", cli.address.clone()))
+ .merge(("port", cli.port));
+ rocket::custom(figment)
+ .manage(cli)
+ .mount("/", routes![info, boot_app, boot_kms])
+ .launch()
+ .await
+ .map_err(|e| anyhow::anyhow!("auth webhook failed: {e}"))?;
+ Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn allowlist() -> Allowlist {
+ serde_json::from_str(
+ r#"{
+ "osImages": ["0xIMG"],
+ "kms": { "mrAggregated": ["0xMR"], "allowAnyDevice": true },
+ "apps": { "0xApp1": { "composeHashes": ["0xHASH"], "allowAnyDevice": true } }
+ }"#,
+ )
+ .unwrap()
+ }
+
+ fn boot(app: &str, hash: &str, img: &str) -> BootInfo {
+ BootInfo {
+ app_id: app.into(),
+ compose_hash: hash.into(),
+ os_image_hash: img.into(),
+ ..Default::default()
+ }
+ }
+
+ #[test]
+ fn app_allowed_with_normalized_hex() {
+ // differing 0x/case must still match.
+ let r = check_app(&boot("APP1", "hash", "img"), &allowlist());
+ assert!(r.is_allowed, "{}", r.reason);
+ }
+
+ #[test]
+ fn app_denied_unknown_app_hash_or_image() {
+ let al = allowlist();
+ assert!(!check_app(&boot("0xnope", "0xHASH", "0xIMG"), &al).is_allowed);
+ assert!(!check_app(&boot("0xApp1", "0xnope", "0xIMG"), &al).is_allowed);
+ assert!(!check_app(&boot("0xApp1", "0xHASH", "0xnope"), &al).is_allowed);
+ }
+
+ #[test]
+ fn kms_allowlist_and_empty_default() {
+ let al = allowlist();
+ let info = BootInfo {
+ mr_aggregated: "0xMR".into(),
+ ..Default::default()
+ };
+ assert!(check_kms(&info, &al).is_allowed);
+ // fail closed: empty allowlist denies (the single-node case never calls this).
+ assert!(!check_kms(&info, &Allowlist::default()).is_allowed);
+ }
+
+ // wire-contract snapshot: BootInfo as the KMS serializes it (camelCase).
+ // Keep these field names in sync with the kms BootInfo. `#[serde(default)]`
+ // means extra fields are ignored AND a renamed field deserializes to "" —
+ // which fails closed, but silently — so this test pins the names we depend
+ // on: if the KMS renames one, the matching assertion here breaks first.
+ #[test]
+ fn deserializes_the_kms_bootinfo_wire_contract() {
+ let wire = r#"{
+ "attestationMode": "dstack",
+ "mrAggregated": "0xAABB",
+ "osImageHash": "0xC2AA",
+ "mrSystem": "0xdead",
+ "appId": "0xApp1",
+ "composeHash": "0xHASH",
+ "instanceId": "0x01",
+ "deviceId": "0xDEV",
+ "keyProviderInfo": "kp",
+ "tcbStatus": "UpToDate",
+ "advisoryIds": []
+ }"#;
+ let info: BootInfo = serde_json::from_str(wire).expect("kms BootInfo must deserialize");
+ assert_eq!(norm(&info.mr_aggregated), "aabb");
+ assert_eq!(norm(&info.os_image_hash), "c2aa");
+ assert_eq!(norm(&info.app_id), "app1");
+ assert_eq!(norm(&info.compose_hash), "hash");
+ assert_eq!(norm(&info.device_id), "dev");
+ // a check using this payload should pass against a matching allowlist.
+ let info2: BootInfo = serde_json::from_str(wire).unwrap();
+ let al: Allowlist = serde_json::from_str(
+ r#"{"osImages":["0xC2AA"],"apps":{"0xApp1":{"composeHashes":["0xHASH"],"allowAnyDevice":true}}}"#,
+ )
+ .unwrap();
+ assert!(check_app(&info2, &al).is_allowed);
+ }
+}
diff --git a/crates/dstack-cli-core/Cargo.toml b/crates/dstack-cli-core/Cargo.toml
new file mode 100644
index 00000000..ea74358b
--- /dev/null
+++ b/crates/dstack-cli-core/Cargo.toml
@@ -0,0 +1,21 @@
+# SPDX-FileCopyrightText: © 2026 Phala Network
+#
+# SPDX-License-Identifier: Apache-2.0
+
+[package]
+name = "dstack-cli-core"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+anyhow.workspace = true
+http-client = { workspace = true, features = ["prpc"] }
+dstack-vmm-rpc.workspace = true
+serde_json.workspace = true
+# advisory file locking (flock) for the allowlist/state read-modify-write;
+# already in the dependency tree transitively, so no extra compile cost.
+rustix = { version = "0.38", features = ["fs"] }
+
+[dev-dependencies]
+toml.workspace = true
diff --git a/crates/dstack-cli-core/src/compose.rs b/crates/dstack-cli-core/src/compose.rs
new file mode 100644
index 00000000..5c395213
--- /dev/null
+++ b/crates/dstack-cli-core/src/compose.rs
@@ -0,0 +1,37 @@
+// SPDX-FileCopyrightText: © 2026 Phala Network
+//
+// SPDX-License-Identifier: Apache-2.0
+
+//! build the app-compose manifest — the JSON document the VMM hashes (to derive
+//! the app id) and deploys. The raw docker-compose YAML is embedded as a string.
+
+use serde_json::json;
+
+/// build a minimal app-compose manifest from a docker-compose YAML body
+/// (single-node, no gateway).
+///
+/// `kms_enabled` selects KMS mode (deterministic, upgradeable per-app keys);
+/// gateway and local-key-provider are off for the direct-port single-node flow.
+pub fn build_app_compose(name: &str, docker_compose_yaml: &str, kms_enabled: bool) -> String {
+ let manifest = json!({
+ "manifest_version": 2,
+ "name": name,
+ "runner": "docker-compose",
+ "docker_compose_file": docker_compose_yaml,
+ "kms_enabled": kms_enabled,
+ "gateway_enabled": false,
+ "local_key_provider_enabled": false,
+ "public_logs": true,
+ "public_sysinfo": true,
+ "no_instance_id": false,
+ // don't block boot on `chronyc waitsync` — the manifest default is true,
+ // but the single-node direct-port flow has no gateway/RA-TLS that needs a
+ // pre-synced clock, and the strict wait hard-fails (→ reboot loop) whenever
+ // chrony has no usable source. chronyd still syncs in the background.
+ // (NTS is also currently broken in guest images — see dstack#745.)
+ "secure_time": false,
+ });
+ // pretty-print via Value's Display (`{:#}`) — infallible, and byte-identical
+ // to serde_json::to_string_pretty (avoids an expect on an unfailable Result).
+ format!("{manifest:#}")
+}
diff --git a/crates/dstack-cli-core/src/config.rs b/crates/dstack-cli-core/src/config.rs
new file mode 100644
index 00000000..a0ca8241
--- /dev/null
+++ b/crates/dstack-cli-core/src/config.rs
@@ -0,0 +1,509 @@
+// SPDX-FileCopyrightText: © 2026 Phala Network
+//
+// SPDX-License-Identifier: Apache-2.0
+
+//! render the config files `dstackup install` writes:
+//!
+//! * `kms.toml` — embedded into the KMS-in-CVM app-compose; this is the
+//! single-node config (webhook auth + `enforce_self_authorization =
+//! false` + a set `auto_bootstrap_domain`, the combination validated to make
+//! bootstrap hands-off).
+//! * `auth-allowlist.json` — read by the host-side Rust auth webhook.
+//! * `vmm.toml` — the host VMM config (gateway + auth-token gating off).
+
+use crate::host::Platform;
+use anyhow::{Context, Result};
+use serde_json::json;
+use std::path::Path;
+
+/// normalize a hex string for comparison: trim, drop a single `0x`/`0X`
+/// prefix, lowercase. MUST stay in sync with `dstack-auth`'s `norm()` — the
+/// webhook compares allowlist entries against KMS-supplied hashes with the same
+/// rule, so a divergence here silently denies (or wrongly allows) apps.
+pub fn norm_hex(s: &str) -> String {
+ let s = s.trim();
+ let s = s
+ .strip_prefix("0x")
+ .or_else(|| s.strip_prefix("0X"))
+ .unwrap_or(s);
+ s.to_lowercase()
+}
+
+/// register an app (id + compose hash) in the auth webhook's allowlist file,
+/// so the KMS will issue keys to it. Read-modify-write; idempotent.
+///
+/// Holds an exclusive lock for the whole read-modify-write (so two concurrent
+/// `dstack run`s can't clobber each other) and writes atomically (so a crash or
+/// partial write can't leave torn JSON — which the webhook would read as
+/// deny-all). The stored hash is normalized so the on-disk file can't
+/// accumulate visually-distinct-but-equal entries.
+pub fn register_app_in_allowlist(path: &Path, app_id: &str, compose_hash: &str) -> Result<()> {
+ let _lock = crate::fsutil::lock_exclusive(path)?;
+ let body = match std::fs::read_to_string(path) {
+ Ok(b) => b,
+ Err(e) if e.kind() == std::io::ErrorKind::NotFound => anyhow::bail!(
+ "allowlist {} does not exist — run `dstackup install` first, or check the --allowlist path",
+ path.display()
+ ),
+ Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
+ return Err(e).with_context(|| {
+ format!(
+ "reading allowlist {} (it is usually root-owned — run with sudo)",
+ path.display()
+ )
+ })
+ }
+ Err(e) => return Err(e).with_context(|| format!("reading allowlist {}", path.display())),
+ };
+ let mut v: serde_json::Value = serde_json::from_str(&body).context("parsing allowlist json")?;
+ let apps = v
+ .get_mut("apps")
+ .and_then(|a| a.as_object_mut())
+ .context("allowlist has no `apps` object")?;
+ let entry = apps
+ .entry(norm_hex(app_id))
+ .or_insert_with(|| json!({ "composeHashes": [], "devices": [], "allowAnyDevice": true }));
+ let hashes = entry
+ .get_mut("composeHashes")
+ .and_then(|h| h.as_array_mut())
+ .context("app entry missing `composeHashes`")?;
+ let norm = norm_hex(compose_hash);
+ let present = hashes
+ .iter()
+ .any(|h| h.as_str().map(|s| norm_hex(s) == norm).unwrap_or(false));
+ if !present {
+ hashes.push(serde_json::Value::String(norm));
+ }
+ crate::fsutil::write_atomic(path, &serde_json::to_string_pretty(&v)?)
+ .with_context(|| format!("writing allowlist {}", path.display()))?;
+ Ok(())
+}
+
+/// public OS-image download URL template used by the KMS image-hash verifier.
+pub const DEFAULT_IMAGE_DOWNLOAD_URL: &str =
+ "https://download.dstack.org/os-images/mr_{OS_IMAGE_HASH}.tar.gz";
+
+/// inputs that parameterize the rendered configs.
+#[derive(Debug, Clone)]
+pub struct HostConfig {
+ /// URL the KMS-in-CVM uses to reach the host auth webhook
+ /// (the host as seen from the CVM under user-mode networking, e.g.
+ /// `http://10.0.2.2:8001`).
+ pub auth_webhook_url: String,
+ /// KMS bootstrap domain — the host address as seen from the CVM
+ /// (e.g. `10.0.2.2`); the bootstrapped RPC cert is issued for this.
+ pub kms_bootstrap_domain: String,
+ /// OS image hash to allow apps to boot from (the measured guest image).
+ pub os_image_hash: String,
+ /// OS image download URL template (must contain `{OS_IMAGE_HASH}`).
+ pub image_download_url: String,
+ /// whether the KMS verifies the OS image hash on app key requests.
+ pub verify_os_image: bool,
+ /// confidential-computing platform (selects SNP-specific KMS settings).
+ pub platform: Platform,
+}
+
+impl Default for HostConfig {
+ fn default() -> Self {
+ Self {
+ auth_webhook_url: "http://10.0.2.2:8001".to_string(),
+ kms_bootstrap_domain: "10.0.2.2".to_string(),
+ os_image_hash: String::new(),
+ image_download_url: DEFAULT_IMAGE_DOWNLOAD_URL.to_string(),
+ verify_os_image: true,
+ platform: Platform::Tdx,
+ }
+ }
+}
+
+/// render the single-node KMS config (lives at `/kms/kms.toml` inside the CVM).
+pub fn kms_toml(cfg: &HostConfig) -> String {
+ format!(
+ r#"# generated by `dstackup install` — single-node KMS
+
+[rpc]
+address = "0.0.0.0"
+port = 8000
+
+[rpc.tls]
+key = "/kms/certs/rpc.key"
+certs = "/kms/certs/rpc.crt"
+
+[rpc.tls.mutual]
+ca_certs = "/kms/certs/tmp-ca.crt"
+mandatory = false
+
+[core]
+cert_dir = "/kms/certs"
+admin_token_hash = ""
+# single-node: the KMS does not self-attest to its own auth API before
+# bootstrap (it still attests the genesis keys via the guest agent, and app
+# auth + per-app quote checks are unaffected).
+enforce_self_authorization = false
+{sev_snp}
+[core.image]
+verify = {verify}
+cache_dir = "/kms/images"
+download_url = "{download_url}"
+download_timeout = "2m"
+
+[core.metrics]
+enabled = false
+
+[core.auth_api]
+type = "webhook"
+
+[core.auth_api.webhook]
+url = "{webhook_url}"
+
+[core.onboard]
+enabled = true
+auto_bootstrap_domain = "{bootstrap_domain}"
+address = "0.0.0.0"
+port = 8000
+"#,
+ // AMD SEV-SNP gates EVERY key release (incl. the KMS's own bootstrap) on
+ // `sev_snp_key_release`, which defaults to false — so it must be set on
+ // SNP or the KMS refuses to release keys. Harmless/ignored on TDX.
+ sev_snp = match cfg.platform {
+ Platform::AmdSevSnp => "sev_snp_key_release = true\namd_kds_base_url = \"\"\n",
+ Platform::Tdx => "",
+ },
+ verify = cfg.verify_os_image,
+ download_url = cfg.image_download_url,
+ webhook_url = cfg.auth_webhook_url,
+ bootstrap_domain = cfg.kms_bootstrap_domain,
+ )
+}
+
+/// render the host-side auth webhook allowlist.
+///
+/// single-node (no gateway): the OS image is allowed, the KMS `mrAggregated`
+/// allowlist is empty (no replication; self-bootstrap is hands-off), and per-app
+/// compose hashes are added by `dstack run`.
+pub fn auth_allowlist_json(cfg: &HostConfig) -> String {
+ let allowlist = json!({
+ "osImages": if cfg.os_image_hash.is_empty() {
+ Vec::::new()
+ } else {
+ vec![cfg.os_image_hash.clone()]
+ },
+ "kms": {
+ "mrAggregated": [],
+ "devices": [],
+ "allowAnyDevice": true
+ },
+ "apps": {}
+ });
+ // infallible pretty-print via Value's Display; see compose::build_app_compose.
+ format!("{allowlist:#}")
+}
+
+/// default pinned, reproducibly-built KMS image (Docker Hub).
+pub const DEFAULT_KMS_IMAGE: &str = "dstacktee/dstack-kms:0.5.11";
+
+/// build the KMS-in-CVM app-compose manifest. An init script writes the
+/// rendered `kms.toml` into the guest and the KMS container mounts it. On TDX
+/// the CVM uses the SGX local key provider to seal the KMS root key; AMD
+/// SEV-SNP has no such provider, so it's disabled there.
+pub fn kms_app_compose(kms_toml: &str, kms_image: &str, platform: Platform) -> String {
+ let docker_compose = format!(
+ r#"services:
+ kms:
+ image: {kms_image}
+ volumes:
+ - kms-volume:/kms
+ - /var/run/dstack.sock:/var/run/dstack.sock
+ - /dstack/kms-config/kms.toml:/kms/kms.toml:ro
+ ports:
+ - "8000:8000"
+ restart: unless-stopped
+ command: sh -c 'mkdir -p /kms/certs /kms/images && exec dstack-kms -c /kms/kms.toml'
+volumes:
+ kms-volume:
+"#
+ );
+ let init_script = format!(
+ "mkdir -p /dstack/kms-config\ncat > /dstack/kms-config/kms.toml <<'KMSTOML'\n{kms_toml}\nKMSTOML\ntrue\n"
+ );
+ let manifest = json!({
+ "manifest_version": 2,
+ "name": "dstack-kms",
+ "runner": "docker-compose",
+ "docker_compose_file": docker_compose,
+ "init_script": init_script,
+ "kms_enabled": false,
+ "gateway_enabled": false,
+ "local_key_provider_enabled": platform == Platform::Tdx,
+ "public_logs": true,
+ "public_sysinfo": true,
+ "public_tcbinfo": true,
+ "no_instance_id": false,
+ "secure_time": false,
+ "allowed_envs": []
+ });
+ // infallible pretty-print via Value's Display; see compose::build_app_compose.
+ format!("{manifest:#}")
+}
+
+/// inputs for rendering `vmm.toml`. Defaults target a localhost dashboard and
+/// reuse of an existing local key provider; the isolation knobs (ports, cid
+/// range, prefix) let a fresh instance coexist with an existing VMM.
+#[derive(Debug, Clone)]
+pub struct VmmRender {
+ /// Rocket endpoint for the dashboard + management API
+ /// (e.g. `tcp:127.0.0.1:9080`, or `unix:`).
+ pub dashboard_addr: String,
+ /// guest image directory.
+ pub image_path: String,
+ /// qemu binary path.
+ pub qemu_path: String,
+ /// run directory for the supervisor socket/pid/log.
+ pub run_dir: String,
+ /// VM storage directory (isolated per install; default `~/.dstack-vmm/vm`).
+ pub vm_path: String,
+ /// supervisor binary path.
+ pub supervisor_exe: String,
+ /// CID pool start (raise to coexist with an existing VMM).
+ pub cid_start: u32,
+ /// CID pool size.
+ pub cid_pool_size: u32,
+ /// host-api vsock port (raise to coexist with an existing VMM on 10000).
+ pub host_api_port: u32,
+ /// local key-provider address (reuse the running one).
+ pub key_provider_addr: String,
+ /// local key-provider port.
+ pub key_provider_port: u32,
+ /// KMS URLs injected into app CVMs (the guest-visible KMS address).
+ pub kms_urls: Vec,
+ /// confidential-computing platform (selects qemu/share-mode for the CVMs).
+ pub platform: Platform,
+}
+
+impl Default for VmmRender {
+ fn default() -> Self {
+ Self {
+ dashboard_addr: "tcp:127.0.0.1:9080".to_string(),
+ image_path: "/var/lib/dstack/images".to_string(),
+ qemu_path: "/usr/bin/qemu-system-x86_64".to_string(),
+ run_dir: "/var/lib/dstack/run".to_string(),
+ vm_path: "/var/lib/dstack/vm".to_string(),
+ supervisor_exe: "/usr/bin/dstack-supervisor".to_string(),
+ cid_start: 1000,
+ cid_pool_size: 1000,
+ host_api_port: 10000,
+ key_provider_addr: "127.0.0.1".to_string(),
+ key_provider_port: 3443,
+ kms_urls: Vec::new(),
+ platform: Platform::Tdx,
+ }
+ }
+}
+
+/// render the host `vmm.toml`. Gateway and auth-token gating are off
+/// (single-node direct-port access); CVMs use user-mode networking with host
+/// port mapping.
+pub fn vmm_toml(r: &VmmRender) -> String {
+ format!(
+ r#"# generated by `dstackup install`
+
+workers = 8
+max_blocking = 64
+ident = "dstack VMM"
+temp_dir = "/tmp"
+keep_alive = 10
+log_level = "info"
+address = "{dashboard_addr}"
+reuse = true
+kms_url = ""
+event_buffer_size = 20
+node_name = ""
+run_path = "{vm_path}"
+
+[image]
+path = "{image_path}"
+registry = ""
+
+[cvm]
+platform = "{platform}"
+qemu_path = "{qemu_path}"
+kms_urls = [{kms_urls}]
+gateway_urls = []
+pccs_url = ""
+docker_registry = ""
+cid_start = {cid_start}
+cid_pool_size = {cid_pool_size}
+max_allocable_vcpu = 20
+max_allocable_memory_in_mb = 100_000
+qmp_socket = false
+user = ""
+use_mrconfigid = {use_mrconfigid}
+qemu_pci_hole64_size = 0
+qemu_hotplug_off = false
+host_share_mode = "{host_share_mode}"
+qgs_port = 4050
+
+[cvm.product]
+sys_vendor = "dstack"
+product_name = "dstack"
+
+[cvm.networking]
+mode = "user"
+net = "10.0.2.0/24"
+dhcp_start = "10.0.2.10"
+restrict = false
+forward_service_enabled = false
+
+[cvm.port_mapping]
+enabled = true
+address = "127.0.0.1"
+range = [
+ {{ protocol = "tcp", from = 1, to = 20000 }},
+]
+
+[cvm.auto_restart]
+enabled = true
+interval = 20
+
+[cvm.gpu]
+enabled = false
+listing = []
+exclude = []
+include = []
+allow_attach_all = false
+
+[gateway]
+base_domain = "localhost"
+port = 8082
+agent_port = 8090
+
+[auth]
+enabled = false
+tokens = []
+
+[supervisor]
+exe = "{supervisor_exe}"
+sock = "{run_dir}/supervisor.sock"
+pid_file = "{run_dir}/supervisor.pid"
+log_file = "{run_dir}/supervisor.log"
+detached = true
+auto_start = true
+
+[host_api]
+ident = "dstack VMM"
+address = "vsock:2"
+port = {host_api_port}
+
+[key_provider]
+enabled = true
+address = "{kp_addr}"
+port = {kp_port}
+"#,
+ dashboard_addr = r.dashboard_addr,
+ image_path = r.image_path,
+ vm_path = r.vm_path,
+ qemu_path = r.qemu_path,
+ platform = r.platform.vmm_str(),
+ // SNP CVMs share the host dir via a virtual disk (9p doesn't play with
+ // SNP memory encryption) and bind measurements via mrconfigid.
+ use_mrconfigid = r.platform == Platform::AmdSevSnp,
+ host_share_mode = match r.platform {
+ Platform::AmdSevSnp => "vhd",
+ Platform::Tdx => "9p",
+ },
+ kms_urls = r
+ .kms_urls
+ .iter()
+ .map(|u| format!("\"{u}\""))
+ .collect::>()
+ .join(", "),
+ cid_start = r.cid_start,
+ cid_pool_size = r.cid_pool_size,
+ supervisor_exe = r.supervisor_exe,
+ run_dir = r.run_dir,
+ host_api_port = r.host_api_port,
+ kp_addr = r.key_provider_addr,
+ kp_port = r.key_provider_port,
+ )
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn vmm_toml_is_valid_and_parameterized() {
+ let r = VmmRender {
+ dashboard_addr: "tcp:127.0.0.1:19080".into(),
+ cid_start: 2000,
+ host_api_port: 10001,
+ ..Default::default()
+ };
+ let rendered = vmm_toml(&r);
+ assert!(rendered.contains(r#"address = "tcp:127.0.0.1:19080""#));
+ assert!(rendered.contains("cid_start = 2000"));
+ assert!(rendered.contains("port = 10001"));
+ toml::from_str::(&rendered).expect("vmm.toml must be valid TOML");
+ }
+
+ #[test]
+ fn kms_toml_has_single_node_invariants() {
+ let cfg = HostConfig {
+ auth_webhook_url: "http://10.0.2.2:8001".into(),
+ kms_bootstrap_domain: "10.0.2.2".into(),
+ ..Default::default()
+ };
+ let toml = kms_toml(&cfg);
+ assert!(toml.contains("enforce_self_authorization = false"));
+ assert!(toml.contains(r#"auto_bootstrap_domain = "10.0.2.2""#));
+ assert!(toml.contains(r#"type = "webhook""#));
+ assert!(toml.contains(r#"url = "http://10.0.2.2:8001""#));
+ // sanity: it parses as TOML.
+ toml::from_str::(&toml).expect("kms.toml must be valid TOML");
+ }
+
+ #[test]
+ fn platform_specific_rendering() {
+ // TDX defaults: no SNP key-release, 9p share, mrconfigid off, SGX provider.
+ let tdx = kms_toml(&HostConfig::default());
+ assert!(!tdx.contains("sev_snp_key_release"));
+ toml::from_str::(&tdx).expect("tdx kms.toml valid");
+ let tdx_vmm = vmm_toml(&VmmRender::default());
+ assert!(tdx_vmm.contains(r#"platform = "tdx""#));
+ assert!(tdx_vmm.contains(r#"host_share_mode = "9p""#));
+ assert!(tdx_vmm.contains("use_mrconfigid = false"));
+ toml::from_str::(&tdx_vmm).expect("tdx vmm.toml valid");
+ assert!(kms_app_compose("x", "img", Platform::Tdx)
+ .contains(r#""local_key_provider_enabled": true"#));
+
+ // SNP: key-release gate set, vhd share, mrconfigid on, no local provider.
+ let snp = kms_toml(&HostConfig {
+ platform: Platform::AmdSevSnp,
+ ..Default::default()
+ });
+ assert!(snp.contains("sev_snp_key_release = true"));
+ toml::from_str::(&snp).expect("snp kms.toml valid");
+ let snp_vmm = vmm_toml(&VmmRender {
+ platform: Platform::AmdSevSnp,
+ ..Default::default()
+ });
+ assert!(snp_vmm.contains(r#"platform = "amd-sev-snp""#));
+ assert!(snp_vmm.contains(r#"host_share_mode = "vhd""#));
+ assert!(snp_vmm.contains("use_mrconfigid = true"));
+ toml::from_str::(&snp_vmm).expect("snp vmm.toml valid");
+ assert!(kms_app_compose("x", "img", Platform::AmdSevSnp)
+ .contains(r#""local_key_provider_enabled": false"#));
+ }
+
+ #[test]
+ fn allowlist_shape() {
+ let cfg = HostConfig {
+ os_image_hash: "0xabc".into(),
+ ..Default::default()
+ };
+ let v: serde_json::Value = serde_json::from_str(&auth_allowlist_json(&cfg)).unwrap();
+ assert_eq!(v["osImages"][0], "0xabc");
+ assert_eq!(v["kms"]["mrAggregated"].as_array().unwrap().len(), 0);
+ assert!(v["apps"].as_object().unwrap().is_empty());
+ }
+}
diff --git a/crates/dstack-cli-core/src/fsutil.rs b/crates/dstack-cli-core/src/fsutil.rs
new file mode 100644
index 00000000..e03567e0
--- /dev/null
+++ b/crates/dstack-cli-core/src/fsutil.rs
@@ -0,0 +1,102 @@
+// SPDX-FileCopyrightText: © 2026 Phala Network
+//
+// SPDX-License-Identifier: Apache-2.0
+
+//! small filesystem helpers: atomic file replace + advisory locking.
+//!
+//! The allowlist and the install state file are read-modify-written from more
+//! than one process (`dstack run` adds an app while the webhook reads; a second
+//! `dstack run` can race the first). A torn write there is not cosmetic: the
+//! auth webhook fails *closed* on invalid JSON, so a half-written allowlist
+//! denies keys to every app on the host. These helpers make the write atomic
+//! and serialize concurrent writers.
+
+use anyhow::{Context, Result};
+use std::ffi::OsString;
+use std::fs::{File, OpenOptions};
+use std::io::Write;
+use std::path::{Path, PathBuf};
+
+/// `path` with `suffix` appended to its full name (not replacing the extension,
+/// so `a/b.json` + `.tmp` → `a/b.json.tmp`, a sibling in the same directory).
+fn sibling(path: &Path, suffix: &str) -> PathBuf {
+ let mut s: OsString = path.as_os_str().to_os_string();
+ s.push(suffix);
+ PathBuf::from(s)
+}
+
+/// atomically replace `path`'s contents: write a sibling temp file, fsync it,
+/// rename it over the target, then fsync the directory. A reader (or a crash)
+/// sees either the old file or the new one, never a fragment, and the rename is
+/// durable across a power loss. `tmp` and `path` are in the same directory so
+/// the rename is atomic.
+pub fn write_atomic(path: &Path, contents: &str) -> Result<()> {
+ let tmp = sibling(path, ".tmp");
+ let mut f =
+ File::create(&tmp).with_context(|| format!("creating temp file {}", tmp.display()))?;
+ f.write_all(contents.as_bytes())
+ .with_context(|| format!("writing {}", tmp.display()))?;
+ f.sync_all()
+ .with_context(|| format!("syncing {}", tmp.display()))?;
+ drop(f);
+ std::fs::rename(&tmp, path)
+ .with_context(|| format!("renaming {} -> {}", tmp.display(), path.display()))?;
+ // fsync the containing directory so the rename itself survives a crash.
+ if let Some(dir) = path.parent().filter(|d| !d.as_os_str().is_empty()) {
+ if let Ok(d) = File::open(dir) {
+ let _ = d.sync_all();
+ }
+ }
+ Ok(())
+}
+
+/// acquire an exclusive advisory lock tied to `path` (held on a sibling
+/// `.lock` file). The lock releases when the returned guard is dropped —
+/// including on process exit, so a crash never leaves a stale lock. Hold it
+/// around a read-modify-write of `path` to serialize concurrent processes.
+#[must_use = "the lock is released when the returned guard is dropped"]
+pub fn lock_exclusive(path: &Path) -> Result {
+ let lock_path = sibling(path, ".lock");
+ let f = OpenOptions::new()
+ .create(true)
+ .truncate(false)
+ .write(true)
+ .open(&lock_path)
+ .with_context(|| format!("opening lock {}", lock_path.display()))?;
+ rustix::fs::flock(&f, rustix::fs::FlockOperation::LockExclusive)
+ .with_context(|| format!("locking {}", lock_path.display()))?;
+ Ok(f)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn atomic_write_replaces_contents() {
+ let dir = std::env::temp_dir().join(format!("dstack-fsutil-{}", std::process::id()));
+ std::fs::create_dir_all(&dir).unwrap();
+ let p = dir.join("x.json");
+ write_atomic(&p, "one").unwrap();
+ assert_eq!(std::fs::read_to_string(&p).unwrap(), "one");
+ write_atomic(&p, "two").unwrap();
+ assert_eq!(std::fs::read_to_string(&p).unwrap(), "two");
+ // no temp file left behind.
+ assert!(!sibling(&p, ".tmp").exists());
+ let _ = std::fs::remove_dir_all(&dir);
+ }
+
+ #[test]
+ fn lock_is_reentrant_within_process_after_drop() {
+ let dir = std::env::temp_dir().join(format!("dstack-fslock-{}", std::process::id()));
+ std::fs::create_dir_all(&dir).unwrap();
+ let p = dir.join("y.json");
+ std::fs::write(&p, "{}").unwrap();
+ {
+ let _g = lock_exclusive(&p).unwrap();
+ }
+ // re-acquire after the first guard dropped.
+ let _g2 = lock_exclusive(&p).unwrap();
+ let _ = std::fs::remove_dir_all(&dir);
+ }
+}
diff --git a/crates/dstack-cli-core/src/host.rs b/crates/dstack-cli-core/src/host.rs
new file mode 100644
index 00000000..3a8688fe
--- /dev/null
+++ b/crates/dstack-cli-core/src/host.rs
@@ -0,0 +1,277 @@
+// SPDX-FileCopyrightText: © 2026 Phala Network
+//
+// SPDX-License-Identifier: Apache-2.0
+
+//! host environment checks used by `dstackup` — SGX presence and the primary IP.
+
+use anyhow::{bail, Result};
+use std::net::{IpAddr, UdpSocket};
+use std::path::Path;
+
+/// presence of the SGX device nodes the local key provider needs.
+#[derive(Debug, Clone, Copy)]
+pub struct Sgx {
+ pub enclave: bool,
+ pub provision: bool,
+}
+
+impl Sgx {
+ pub fn ok(&self) -> bool {
+ self.enclave && self.provision
+ }
+}
+
+/// check for `/dev/sgx_enclave` and `/dev/sgx_provision`.
+pub fn check_sgx() -> Sgx {
+ Sgx {
+ enclave: Path::new("/dev/sgx_enclave").exists(),
+ provision: Path::new("/dev/sgx_provision").exists(),
+ }
+}
+
+/// check for the AMD secure processor device the host VMM needs for SEV-SNP.
+pub fn check_sev() -> bool {
+ Path::new("/dev/sev").exists()
+}
+
+/// require SGX, with a clear message if it is missing (design decision: fail fast
+/// rather than silently degrade to a host-mode KMS with no real attestation).
+pub fn require_sgx() -> Result<()> {
+ let sgx = check_sgx();
+ if !sgx.ok() {
+ let mut missing = Vec::new();
+ if !sgx.enclave {
+ missing.push("/dev/sgx_enclave");
+ }
+ if !sgx.provision {
+ missing.push("/dev/sgx_provision");
+ }
+ bail!(
+ "sgx not available (missing {}); dstack requires Intel SGX for the local key provider — enable SGX in BIOS, or run on a TDX+SGX host",
+ missing.join(", ")
+ );
+ }
+ Ok(())
+}
+
+/// the confidential-computing platform a host launches CVMs on.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum Platform {
+ /// Intel TDX (with an SGX-backed local key provider).
+ #[default]
+ Tdx,
+ /// AMD SEV-SNP.
+ AmdSevSnp,
+}
+
+impl Platform {
+ /// the `[cvm] platform` value the VMM expects in `vmm.toml`.
+ pub fn vmm_str(self) -> &'static str {
+ match self {
+ Platform::Tdx => "tdx",
+ Platform::AmdSevSnp => "amd-sev-snp",
+ }
+ }
+
+ /// parse a `--platform` value: `tdx` | `amd-sev-snp` | `auto` (None).
+ pub fn parse_opt(s: &str) -> Result