Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ yaml-rust2 = "0.10.4"
luks2 = "0.5.0"
scopeguard = "1.2.0"
tar = "0.4"
proxy-protocol = "0.5.0"

[profile.release]
panic = "abort"
24 changes: 24 additions & 0 deletions dstack-types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,30 @@ pub struct AppCompose {
pub storage_fs: Option<String>,
#[serde(default, with = "human_size")]
pub swap_size: u64,
/// Per-port policy consumed by the gateway (PROXY protocol opt-in,
/// optional port whitelist).
#[serde(default)]
pub port_policy: PortPolicy,
}

#[derive(Deserialize, Serialize, Debug, Clone, Default)]
pub struct PortPolicy {
/// Per-port attributes (PROXY protocol opt-in, etc.).
#[serde(default)]
pub ports: Vec<PortAttrs>,
/// When true, the gateway only forwards traffic to ports listed in `ports`.
/// All other ports are rejected at TCP-accept time.
#[serde(default)]
pub restrict_mode: bool,
}

#[derive(Deserialize, Serialize, Debug, Clone)]
pub struct PortAttrs {
pub port: u16,
/// Whether the gateway should send a PROXY protocol header on outbound
/// connections to this port.
#[serde(default)]
pub pp: bool,
}

fn default_true() -> bool {
Expand Down
19 changes: 18 additions & 1 deletion dstack-util/src/system_setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ use crate::{
use cert_client::CertRequestClient;
use cmd_lib::run_fun as cmd;
use dstack_gateway_rpc::{
gateway_client::GatewayClient, RegisterCvmRequest, RegisterCvmResponse, WireGuardPeer,
gateway_client::GatewayClient, PortAttrs as RpcPortAttrs, PortPolicy as RpcPortPolicy,
RegisterCvmRequest, RegisterCvmResponse, WireGuardPeer,
};
use ra_tls::rcgen::{KeyPair, PKCS_ECDSA_P256_SHA256};
use serde_human_bytes as hex_bytes;
Expand Down Expand Up @@ -446,11 +447,26 @@ impl<'a> GatewayContext<'a> {
gateway_url: &str,
key_store: &GatewayKeyStore,
) -> Result<RegisterCvmResponse> {
let port_policy = RpcPortPolicy {
ports: self
.shared
.app_compose
.port_policy
.ports
.iter()
.map(|p| RpcPortAttrs {
port: p.port as u32,
pp: p.pp,
})
.collect(),
restrict_mode: self.shared.app_compose.port_policy.restrict_mode,
};
let client =
self.create_gateway_client(gateway_url, &key_store.client_key, &key_store.client_cert)?;
let result = client
.register_cvm(RegisterCvmRequest {
client_public_key: key_store.wg_pk.clone(),
port_policy: Some(port_policy.clone()),
})
.await
.context("Failed to register CVM");
Expand All @@ -471,6 +487,7 @@ impl<'a> GatewayContext<'a> {
client
.register_cvm(RegisterCvmRequest {
client_public_key: key_store.wg_pk.clone(),
port_policy: Some(port_policy),
})
.await
.context("Failed to register CVM")
Expand Down
1 change: 1 addition & 0 deletions gateway/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ hyper-rustls.workspace = true
http-body-util.workspace = true
x509-parser.workspace = true
jemallocator.workspace = true
proxy-protocol.workspace = true
wavekv.workspace = true
tdx-attest.workspace = true
flate2.workspace = true
Expand Down
8 changes: 8 additions & 0 deletions gateway/dstack-app/builder/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ localhost_enabled = false
app_address_ns_compat = true
workers = ${PROXY_WORKERS:-32}
max_connections_per_app = ${MAX_CONNECTIONS_PER_APP:-0}
inbound_pp_enabled = ${INBOUND_PP_ENABLED:-false}

[core.proxy.timeouts]
connect = "${TIMEOUT_CONNECT:-5s}"
Expand All @@ -122,6 +123,13 @@ idle = "${TIMEOUT_IDLE:-10m}"
write = "${TIMEOUT_WRITE:-5s}"
shutdown = "${TIMEOUT_SHUTDOWN:-5s}"
total = "${TIMEOUT_TOTAL:-5h}"
pp_header = "${TIMEOUT_PP_HEADER:-5s}"

[core.proxy.port_policy_fetch]
timeout = "${PORT_POLICY_FETCH_TIMEOUT:-10s}"
max_retries = ${PORT_POLICY_FETCH_MAX_RETRIES:-5}
backoff_initial = "${PORT_POLICY_FETCH_BACKOFF_INITIAL:-1s}"
backoff_max = "${PORT_POLICY_FETCH_BACKOFF_MAX:-30s}"

[core.recycle]
enabled = true
Expand Down
13 changes: 11 additions & 2 deletions gateway/dstack-app/deploy-to-vmm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ if [ -f ".env" ]; then
# Load variables from .env
echo "Loading environment variables from .env file..."
set -a
# shellcheck disable=SC1091
source .env
set +a
else
Expand Down Expand Up @@ -92,7 +93,14 @@ GUEST_AGENT_ADDR=127.0.0.1:9206
WG_ADDR=0.0.0.0:9202

# The token used to launch the App
APP_LAUNCH_TOKEN=$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 32 | head -n 1)
APP_LAUNCH_TOKEN=$(tr -dc 'a-zA-Z0-9' < /dev/urandom | fold -w 32 | head -n 1)

# PROXY protocol: read v1/v2 header from inbound connections (e.g. when this
# gateway sits behind a PP-aware L4 LB such as Cloudflare Spectrum or haproxy
# with send-proxy). Set to "true" only if the upstream LB is configured to
# send PROXY headers; otherwise leave disabled or every connection will be
# rejected.
# INBOUND_PP_ENABLED=false

EOF
echo "Please edit the .env file and set the required variables, then run this script again."
Expand Down Expand Up @@ -125,7 +133,7 @@ done

CLI="../../vmm/src/vmm-cli.py --url $VMM_RPC"

WG_PORT=$(echo $WG_ADDR | cut -d':' -f2)
WG_PORT=$(echo "$WG_ADDR" | cut -d':' -f2)
COMPOSE_TMP=$(mktemp)

cp docker-compose.yaml "$COMPOSE_TMP"
Expand Down Expand Up @@ -175,6 +183,7 @@ APP_LAUNCH_TOKEN=$APP_LAUNCH_TOKEN
RPC_DOMAIN=$RPC_DOMAIN
NODE_ID=$NODE_ID
PROXY_LISTEN_PORT=$PROXY_LISTEN_PORT
INBOUND_PP_ENABLED=${INBOUND_PP_ENABLED:-false}
EOF

if [ -n "$APP_COMPOSE_FILE" ]; then
Expand Down
6 changes: 6 additions & 0 deletions gateway/dstack-app/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ services:
- TIMEOUT_TOTAL=${TIMEOUT_TOTAL:-5h}
- ADMIN_LISTEN_ADDR=${ADMIN_LISTEN_ADDR:-0.0.0.0}
- ADMIN_LISTEN_PORT=${ADMIN_LISTEN_PORT:-8001}
- INBOUND_PP_ENABLED=${INBOUND_PP_ENABLED:-false}
- TIMEOUT_PP_HEADER=${TIMEOUT_PP_HEADER:-5s}
- PORT_POLICY_FETCH_TIMEOUT=${PORT_POLICY_FETCH_TIMEOUT:-10s}
- PORT_POLICY_FETCH_MAX_RETRIES=${PORT_POLICY_FETCH_MAX_RETRIES:-5}
- PORT_POLICY_FETCH_BACKOFF_INITIAL=${PORT_POLICY_FETCH_BACKOFF_INITIAL:-1s}
- PORT_POLICY_FETCH_BACKOFF_MAX=${PORT_POLICY_FETCH_BACKOFF_MAX:-30s}
restart: always

volumes:
Expand Down
14 changes: 14 additions & 0 deletions gateway/gateway.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,18 @@ workers = 32
external_port = 443
# Maximum concurrent connections per app. 0 means unlimited.
max_connections_per_app = 2000
# Whether to read PROXY protocol from inbound connections (e.g. from Cloudflare).
inbound_pp_enabled = false

[core.proxy.port_policy_fetch]
# Background lazy-fetch of port_policy from legacy CVM agents.
# Single Info() RPC timeout.
timeout = "10s"
# Retries cover the WireGuard / agent warmup window after registration.
max_retries = 5
# Exponential backoff between retries; doubles each attempt up to backoff_max.
backoff_initial = "1s"
backoff_max = "30s"

[core.proxy.timeouts]
# Timeout for establishing a connection to the target app.
Expand All @@ -81,6 +93,8 @@ write = "5s"
shutdown = "5s"
# Timeout for total connection duration.
total = "5h"
# Timeout for proxy protocol header.
pp_header = "5s"

[core.recycle]
enabled = true
Expand Down
70 changes: 70 additions & 0 deletions gateway/rpc/proto/gateway_rpc.proto
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,31 @@ package gateway;
message RegisterCvmRequest {
// The public key of the WireGuard interface of the CVM.
string client_public_key = 1;
// Per-port policy the gateway should apply when proxying to this CVM.
// Wrapped in a message so we can distinguish "not reported" (old CVM →
// gateway falls back to fetching app-compose via Info()) from "reported
// empty" (new CVM with no special port behaviour).
optional PortPolicy port_policy = 2;
}

// PortPolicy carries the gateway-relevant per-port configuration declared by
// the app in its compose file. Keeping `ports` and `restrict_mode` together
// lets a single Option distinguish "not reported" from "reported".
message PortPolicy {
// Per-port attributes (PROXY protocol opt-in, etc.).
repeated PortAttrs ports = 1;
// When true, the gateway only forwards traffic to ports listed in `ports`
// and rejects connections to any other port at TCP-accept time.
bool restrict_mode = 2;
}

// PortAttrs declares per-port behaviour for the gateway.
message PortAttrs {
// The CVM port these attributes apply to.
uint32 port = 1;
// Whether the gateway should send a PROXY protocol header on outbound
// connections to this port.
bool pp = 2;
}

// DebugRegisterCvmRequest is the request for DebugRegisterCvm (only works when debug_mode is enabled).
Expand Down Expand Up @@ -414,6 +439,18 @@ service Admin {
rpc GetCertbotConfig(google.protobuf.Empty) returns (CertbotConfigResponse) {}
// Set global certbot configuration (includes ACME URL)
rpc SetCertbotConfig(SetCertbotConfigRequest) returns (google.protobuf.Empty) {}

// ==================== Per-Instance Port Policy Override ====================
// Set an admin override for an instance's port policy. Takes precedence
// over the policy reported by the instance itself, and survives app
// upgrades. Errors if the instance is not registered.
rpc SetInstancePortPolicy(SetInstancePortPolicyRequest) returns (google.protobuf.Empty) {}
// Clear the admin override for an instance, reverting to the
// instance-reported policy. Errors if the instance is not registered.
rpc ClearInstancePortPolicy(ClearInstancePortPolicyRequest) returns (google.protobuf.Empty) {}
// Inspect both the admin override and the instance-reported policy for an
// instance, plus the effective policy the proxy will enforce.
rpc GetInstancePortPolicy(GetInstancePortPolicyRequest) returns (GetInstancePortPolicyResponse) {}
}

// ==================== DNS Credential Messages ====================
Expand Down Expand Up @@ -623,3 +660,36 @@ message SetCertbotConfigRequest {
// ACME server URL (empty means use default Let's Encrypt production)
optional string acme_url = 4;
}

// ==================== Per-Instance Port Policy Override Messages ====================

// Set an admin override for an instance.
message SetInstancePortPolicyRequest {
// The instance to override.
string instance_id = 1;
// The policy to apply. An empty `ports` list with `restrict_mode = true`
// is a valid "deny everything" lockdown.
PortPolicy policy = 2;
}

// Clear the admin override for an instance.
message ClearInstancePortPolicyRequest {
string instance_id = 1;
}

// Inspect an instance's port-policy state.
message GetInstancePortPolicyRequest {
string instance_id = 1;
}

message GetInstancePortPolicyResponse {
// The policy the proxy will actually enforce. Absent when neither admin
// nor instance has set anything (fail-close until populated).
optional PortPolicy effective = 1;
// Where `effective` came from: "admin", "instance", or "none".
string source = 2;
// The policy reported by the instance itself, if any.
optional PortPolicy instance_reported = 3;
// The admin override, if any.
optional PortPolicy admin_override = 4;
}
Loading
Loading