Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ yaml-rust2 = "0.10.4"
luks2 = "0.5.0"
scopeguard = "1.2.0"
tar = "0.4"
proxy-protocol = "0.5.0"

[profile.release]
panic = "abort"
12 changes: 12 additions & 0 deletions dstack-types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,18 @@ pub struct AppCompose {
pub storage_fs: Option<String>,
#[serde(default, with = "human_size")]
pub swap_size: u64,
/// Per-port attributes consumed by the gateway (e.g. PROXY protocol).
#[serde(default)]
pub ports: Vec<PortAttrs>,
}

#[derive(Deserialize, Serialize, Debug, Clone)]
pub struct PortAttrs {
pub port: u16,
/// Whether the gateway should send a PROXY protocol header on outbound
/// connections to this port.
#[serde(default)]
pub pp: bool,
}

fn default_true() -> bool {
Expand Down
17 changes: 16 additions & 1 deletion dstack-util/src/system_setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ use crate::{
use cert_client::CertRequestClient;
use cmd_lib::run_fun as cmd;
use dstack_gateway_rpc::{
gateway_client::GatewayClient, RegisterCvmRequest, RegisterCvmResponse, WireGuardPeer,
gateway_client::GatewayClient, PortAttrs as RpcPortAttrs, PortAttrsList, RegisterCvmRequest,
RegisterCvmResponse, WireGuardPeer,
};
use ra_tls::rcgen::{KeyPair, PKCS_ECDSA_P256_SHA256};
use serde_human_bytes as hex_bytes;
Expand Down Expand Up @@ -446,11 +447,24 @@ impl<'a> GatewayContext<'a> {
gateway_url: &str,
key_store: &GatewayKeyStore,
) -> Result<RegisterCvmResponse> {
let port_attrs = PortAttrsList {
attrs: self
.shared
.app_compose
.ports
.iter()
.map(|p| RpcPortAttrs {
port: p.port as u32,
pp: p.pp,
})
.collect(),
};
let client =
self.create_gateway_client(gateway_url, &key_store.client_key, &key_store.client_cert)?;
let result = client
.register_cvm(RegisterCvmRequest {
client_public_key: key_store.wg_pk.clone(),
port_attrs: Some(port_attrs.clone()),
})
.await
.context("Failed to register CVM");
Expand All @@ -471,6 +485,7 @@ impl<'a> GatewayContext<'a> {
client
.register_cvm(RegisterCvmRequest {
client_public_key: key_store.wg_pk.clone(),
port_attrs: Some(port_attrs),
})
.await
.context("Failed to register CVM")
Expand Down
1 change: 1 addition & 0 deletions gateway/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ hyper-rustls.workspace = true
http-body-util.workspace = true
x509-parser.workspace = true
jemallocator.workspace = true
proxy-protocol.workspace = true
wavekv.workspace = true
tdx-attest.workspace = true
flate2.workspace = true
Expand Down
8 changes: 8 additions & 0 deletions gateway/dstack-app/builder/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ localhost_enabled = false
app_address_ns_compat = true
workers = ${PROXY_WORKERS:-32}
max_connections_per_app = ${MAX_CONNECTIONS_PER_APP:-0}
inbound_pp_enabled = ${INBOUND_PP_ENABLED:-false}

[core.proxy.timeouts]
connect = "${TIMEOUT_CONNECT:-5s}"
Expand All @@ -122,6 +123,13 @@ idle = "${TIMEOUT_IDLE:-10m}"
write = "${TIMEOUT_WRITE:-5s}"
shutdown = "${TIMEOUT_SHUTDOWN:-5s}"
total = "${TIMEOUT_TOTAL:-5h}"
pp_header = "${TIMEOUT_PP_HEADER:-5s}"

[core.proxy.port_attrs_fetch]
timeout = "${PORT_ATTRS_FETCH_TIMEOUT:-10s}"
max_retries = ${PORT_ATTRS_FETCH_MAX_RETRIES:-5}
backoff_initial = "${PORT_ATTRS_FETCH_BACKOFF_INITIAL:-1s}"
backoff_max = "${PORT_ATTRS_FETCH_BACKOFF_MAX:-30s}"

[core.recycle]
enabled = true
Expand Down
13 changes: 11 additions & 2 deletions gateway/dstack-app/deploy-to-vmm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ if [ -f ".env" ]; then
# Load variables from .env
echo "Loading environment variables from .env file..."
set -a
# shellcheck disable=SC1091
source .env
set +a
else
Expand Down Expand Up @@ -92,7 +93,14 @@ GUEST_AGENT_ADDR=127.0.0.1:9206
WG_ADDR=0.0.0.0:9202

# The token used to launch the App
APP_LAUNCH_TOKEN=$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 32 | head -n 1)
APP_LAUNCH_TOKEN=$(tr -dc 'a-zA-Z0-9' < /dev/urandom | fold -w 32 | head -n 1)

# PROXY protocol: read v1/v2 header from inbound connections (e.g. when this
# gateway sits behind a PP-aware L4 LB such as Cloudflare Spectrum or haproxy
# with send-proxy). Set to "true" only if the upstream LB is configured to
# send PROXY headers; otherwise leave disabled or every connection will be
# rejected.
# INBOUND_PP_ENABLED=false

EOF
echo "Please edit the .env file and set the required variables, then run this script again."
Expand Down Expand Up @@ -125,7 +133,7 @@ done

CLI="../../vmm/src/vmm-cli.py --url $VMM_RPC"

WG_PORT=$(echo $WG_ADDR | cut -d':' -f2)
WG_PORT=$(echo "$WG_ADDR" | cut -d':' -f2)
COMPOSE_TMP=$(mktemp)

cp docker-compose.yaml "$COMPOSE_TMP"
Expand Down Expand Up @@ -175,6 +183,7 @@ APP_LAUNCH_TOKEN=$APP_LAUNCH_TOKEN
RPC_DOMAIN=$RPC_DOMAIN
NODE_ID=$NODE_ID
PROXY_LISTEN_PORT=$PROXY_LISTEN_PORT
INBOUND_PP_ENABLED=${INBOUND_PP_ENABLED:-false}
EOF

if [ -n "$APP_COMPOSE_FILE" ]; then
Expand Down
6 changes: 6 additions & 0 deletions gateway/dstack-app/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ services:
- TIMEOUT_TOTAL=${TIMEOUT_TOTAL:-5h}
- ADMIN_LISTEN_ADDR=${ADMIN_LISTEN_ADDR:-0.0.0.0}
- ADMIN_LISTEN_PORT=${ADMIN_LISTEN_PORT:-8001}
- INBOUND_PP_ENABLED=${INBOUND_PP_ENABLED:-false}
- TIMEOUT_PP_HEADER=${TIMEOUT_PP_HEADER:-5s}
- PORT_ATTRS_FETCH_TIMEOUT=${PORT_ATTRS_FETCH_TIMEOUT:-10s}
- PORT_ATTRS_FETCH_MAX_RETRIES=${PORT_ATTRS_FETCH_MAX_RETRIES:-5}
- PORT_ATTRS_FETCH_BACKOFF_INITIAL=${PORT_ATTRS_FETCH_BACKOFF_INITIAL:-1s}
- PORT_ATTRS_FETCH_BACKOFF_MAX=${PORT_ATTRS_FETCH_BACKOFF_MAX:-30s}
restart: always

volumes:
Expand Down
14 changes: 14 additions & 0 deletions gateway/gateway.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,18 @@ workers = 32
external_port = 443
# Maximum concurrent connections per app. 0 means unlimited.
max_connections_per_app = 2000
# Whether to read PROXY protocol from inbound connections (e.g. from Cloudflare).
inbound_pp_enabled = false

[core.proxy.port_attrs_fetch]
# Background lazy-fetch of port_attrs from legacy CVM agents.
# Single Info() RPC timeout.
timeout = "10s"
# Retries cover the WireGuard / agent warmup window after registration.
max_retries = 5
# Exponential backoff between retries; doubles each attempt up to backoff_max.
backoff_initial = "1s"
backoff_max = "30s"

[core.proxy.timeouts]
# Timeout for establishing a connection to the target app.
Expand All @@ -81,6 +93,8 @@ write = "5s"
shutdown = "5s"
# Timeout for total connection duration.
total = "5h"
# Timeout for proxy protocol header.
pp_header = "5s"

[core.recycle]
enabled = true
Expand Down
19 changes: 19 additions & 0 deletions gateway/rpc/proto/gateway_rpc.proto
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,25 @@ package gateway;
message RegisterCvmRequest {
// The public key of the WireGuard interface of the CVM.
string client_public_key = 1;
// Per-port attributes the gateway should apply when proxying to this CVM.
// Wrapped in a message so we can distinguish "not reported" (old CVM →
// gateway falls back to fetching app-compose via Info()) from "reported
// empty" (new CVM with no special port behaviour).
optional PortAttrsList port_attrs = 2;
}

// PortAttrsList wraps a list of PortAttrs so it can be optional on the wire.
message PortAttrsList {
repeated PortAttrs attrs = 1;
}

// PortAttrs declares per-port behaviour for the gateway.
message PortAttrs {
// The CVM port these attributes apply to.
uint32 port = 1;
// Whether the gateway should send a PROXY protocol header on outbound
// connections to this port.
bool pp = 2;
}

// DebugRegisterCvmRequest is the request for DebugRegisterCvm (only works when debug_mode is enabled).
Expand Down
29 changes: 29 additions & 0 deletions gateway/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,32 @@ pub struct ProxyConfig {
pub app_address_ns_compat: bool,
/// Maximum concurrent connections per app. 0 means unlimited.
pub max_connections_per_app: u64,
/// Port the dstack guest-agent listens on inside each CVM. Used by the
/// gateway to fetch app metadata (e.g. port_attrs for legacy CVMs).
pub agent_port: u16,
/// Whether to read PROXY protocol headers from inbound connections
/// (e.g. when behind a PP-aware load balancer like Cloudflare).
#[serde(default)]
pub inbound_pp_enabled: bool,
/// Background lazy-fetch behaviour for `port_attrs` (legacy CVMs).
pub port_attrs_fetch: PortAttrsFetchConfig,
}

#[derive(Debug, Clone, Deserialize)]
pub struct PortAttrsFetchConfig {
/// Timeout for a single `Info()` RPC attempt.
#[serde(with = "serde_duration")]
pub timeout: Duration,
/// Maximum number of attempts after the initial try (0 = no retry).
/// Retries cover the window where a freshly-registered CVM hasn't
/// finished its WireGuard handshake yet.
pub max_retries: u32,
/// Delay before the first retry; doubles on each subsequent retry,
/// capped at `backoff_max`.
#[serde(with = "serde_duration")]
pub backoff_initial: Duration,
#[serde(with = "serde_duration")]
pub backoff_max: Duration,
}

#[derive(Debug, Clone, Deserialize)]
Expand All @@ -142,6 +168,9 @@ pub struct Timeouts {
pub write: Duration,
#[serde(with = "serde_duration")]
pub shutdown: Duration,
/// Timeout for reading the proxy protocol header from inbound connections.
#[serde(with = "serde_duration")]
pub pp_header: Duration,
}

#[derive(Debug, Clone, Deserialize, Serialize)]
Expand Down
2 changes: 2 additions & 0 deletions gateway/src/debug_service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ impl DebugRpc for DebugRpcHandler {
&request.app_id,
&request.instance_id,
&request.client_public_key,
"",
None,
)
}

Expand Down
18 changes: 18 additions & 0 deletions gateway/src/kv/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,31 @@ use serde::{Deserialize, Serialize};
use tokio::sync::watch;
use wavekv::{node::NodeState, types::NodeId, Node};

/// Per-port flags applied by the gateway when proxying to a CVM port.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
pub struct PortFlags {
/// Send a PROXY protocol header on outbound connections to this port.
#[serde(default)]
pub pp: bool,
}

/// Instance core data (persistent)
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct InstanceData {
pub app_id: String,
pub ip: Ipv4Addr,
pub public_key: String,
pub reg_time: u64,
/// Per-port flags reported at registration. `None` means "not reported"
/// (legacy CVM); the gateway will fall back to fetching app-compose via
/// Info() on first connection and populate this lazily.
#[serde(default)]
pub port_attrs: Option<BTreeMap<u16, PortFlags>>,
/// Hex-encoded compose_hash that `port_attrs` was learned against.
/// When a re-registration presents a different compose_hash (app upgrade),
/// the cache is invalidated and re-fetched lazily.
#[serde(default)]
pub port_attrs_hash: String,
}

/// Gateway node status (stored separately for independent updates)
Expand Down
1 change: 1 addition & 0 deletions gateway/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ mod distributed_certbot;
mod kv;
mod main_service;
mod models;
mod pp;
mod proxy;
mod web_routes;

Expand Down
Loading
Loading