Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions crates/openshell-driver-kubernetes/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ pub struct KubernetesComputeConfig {
pub namespace: String,
pub default_image: String,
pub image_pull_policy: String,
/// Image that provides the `openshell-sandbox` supervisor binary.
/// An init container copies the binary from this image into a shared
/// emptyDir volume before the sandbox container starts.
pub supervisor_image: String,
/// Kubernetes `imagePullPolicy` for the supervisor init container.
/// Empty string delegates to the Kubernetes default.
pub supervisor_image_pull_policy: String,
pub grpc_endpoint: String,
pub ssh_socket_path: String,
pub ssh_handshake_secret: String,
Expand Down
164 changes: 118 additions & 46 deletions crates/openshell-driver-kubernetes/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,8 @@ impl KubernetesComputeDriver {
sandbox.spec.as_ref(),
&self.config.default_image,
&self.config.image_pull_policy,
&self.config.supervisor_image,
&self.config.supervisor_image_pull_policy,
&sandbox.id,
&sandbox.name,
&self.config.grpc_endpoint,
Expand Down Expand Up @@ -657,27 +659,21 @@ fn map_kube_event_to_platform(
}

/// Path where the supervisor binary is mounted inside the agent container.
/// The supervisor is always side-loaded from the k3s node filesystem via a
/// read-only hostPath volume — it is never baked into sandbox images.
const SUPERVISOR_MOUNT_PATH: &str = "/opt/openshell/bin";

/// Name of the volume used to side-load the supervisor binary.
const SUPERVISOR_VOLUME_NAME: &str = "openshell-supervisor-bin";

/// Path on the k3s node filesystem where the supervisor binary lives.
/// This is baked into the cluster image at build time and can be updated
/// via `docker cp` during local development.
const SUPERVISOR_HOST_PATH: &str = "/opt/openshell/bin";
/// Name of the init container that installs the supervisor binary.
const SUPERVISOR_INIT_CONTAINER_NAME: &str = "openshell-supervisor-install";

/// Build the hostPath volume definition that exposes the supervisor binary
/// from the k3s node filesystem.
/// Build the emptyDir volume that holds the supervisor binary.
///
/// The init container writes the binary here; the agent container reads it.
fn supervisor_volume() -> serde_json::Value {
serde_json::json!({
"name": SUPERVISOR_VOLUME_NAME,
"hostPath": {
"path": SUPERVISOR_HOST_PATH,
"type": "DirectoryOrCreate"
}
"emptyDir": {}
})
}

Expand All @@ -690,26 +686,61 @@ fn supervisor_volume_mount() -> serde_json::Value {
})
}

/// Apply supervisor side-load transforms to an already-built pod template JSON.
/// Build the init container that copies the supervisor binary into the emptyDir.
///
/// This injects the hostPath volume, volume mount, command override, and
/// `runAsUser: 0` into the pod template, targeting the `agent` container
/// (or the first container if no `agent` is found).
/// The supervisor image is expected to have `openshell-sandbox` on its PATH
/// (e.g. at `/usr/local/bin/openshell-sandbox`). The init container resolves
/// the binary via `command -v` and copies it into the shared emptyDir volume
/// so the agent container can execute it from a fixed, writable path.
fn supervisor_init_container(
supervisor_image: &str,
supervisor_image_pull_policy: &str,
) -> serde_json::Value {
let copy_cmd = format!(
"set -e && \
mkdir -p {SUPERVISOR_MOUNT_PATH} && \
SUPERVISOR=$(command -v openshell-sandbox) && \
cp \"$SUPERVISOR\" {SUPERVISOR_MOUNT_PATH}/openshell-sandbox && \
chmod +x {SUPERVISOR_MOUNT_PATH}/openshell-sandbox"
);
let mut spec = serde_json::json!({
"name": SUPERVISOR_INIT_CONTAINER_NAME,
"image": supervisor_image,
"command": ["sh", "-c", copy_cmd],
"securityContext": {"runAsUser": 0},
"volumeMounts": [{
"name": SUPERVISOR_VOLUME_NAME,
"mountPath": SUPERVISOR_MOUNT_PATH,
"readOnly": false
}]
});
if !supervisor_image_pull_policy.is_empty() {
spec["imagePullPolicy"] = serde_json::json!(supervisor_image_pull_policy);
}
spec
}

/// Apply supervisor side-load transforms to an already-built pod template JSON.
///
/// The supervisor binary is always side-loaded from the k3s node filesystem
/// via a read-only hostPath volume. No init container is needed.
/// Injects an emptyDir volume, an init container that copies the supervisor
/// binary from the supervisor image into that volume, and a read-only volume
/// mount + command override on the agent container.
///
/// The `runAsUser: 0` override ensures the supervisor binary runs as root
/// regardless of the image's `USER` directive. The supervisor needs root for
/// network namespace creation, proxy setup, and Landlock/seccomp configuration.
/// It drops to the appropriate non-root user for child processes via the
/// policy's `run_as_user`/`run_as_group`.
fn apply_supervisor_sideload(pod_template: &mut serde_json::Value) {
fn apply_supervisor_sideload(
pod_template: &mut serde_json::Value,
supervisor_image: &str,
supervisor_image_pull_policy: &str,
) {
let Some(spec) = pod_template.get_mut("spec").and_then(|v| v.as_object_mut()) else {
return;
};

// 1. Add the hostPath volume to spec.volumes
// 1. Add the emptyDir volume to spec.volumes
let volumes = spec
.entry("volumes")
.or_insert_with(|| serde_json::json!([]))
Expand All @@ -718,7 +749,19 @@ fn apply_supervisor_sideload(pod_template: &mut serde_json::Value) {
volumes.push(supervisor_volume());
}

// 2. Find the agent container and add volume mount + command override
// 2. Add the init container that copies the binary into the emptyDir
let init_containers = spec
.entry("initContainers")
.or_insert_with(|| serde_json::json!([]))
.as_array_mut();
if let Some(init_containers) = init_containers {
init_containers.push(supervisor_init_container(
supervisor_image,
supervisor_image_pull_policy,
));
}

// 3. Find the agent container and add volume mount + command override
let Some(containers) = spec.get_mut("containers").and_then(|v| v.as_array_mut()) else {
return;
};
Expand Down Expand Up @@ -881,6 +924,8 @@ fn sandbox_to_k8s_spec(
spec: Option<&SandboxSpec>,
default_image: &str,
image_pull_policy: &str,
supervisor_image: &str,
supervisor_image_pull_policy: &str,
sandbox_id: &str,
sandbox_name: &str,
grpc_endpoint: &str,
Expand Down Expand Up @@ -921,6 +966,8 @@ fn sandbox_to_k8s_spec(
spec.gpu,
default_image,
image_pull_policy,
supervisor_image,
supervisor_image_pull_policy,
sandbox_id,
sandbox_name,
grpc_endpoint,
Expand Down Expand Up @@ -967,6 +1014,8 @@ fn sandbox_to_k8s_spec(
spec.as_ref().is_some_and(|s| s.gpu),
default_image,
image_pull_policy,
supervisor_image,
supervisor_image_pull_policy,
sandbox_id,
sandbox_name,
grpc_endpoint,
Expand All @@ -992,6 +1041,8 @@ fn sandbox_template_to_k8s(
gpu: bool,
default_image: &str,
image_pull_policy: &str,
supervisor_image: &str,
supervisor_image_pull_policy: &str,
sandbox_id: &str,
sandbox_name: &str,
grpc_endpoint: &str,
Expand All @@ -1003,9 +1054,6 @@ fn sandbox_template_to_k8s(
host_gateway_ip: &str,
inject_workspace: bool,
) -> serde_json::Value {
// The supervisor binary is always side-loaded from the node filesystem
// via a hostPath volume, regardless of which sandbox image is used.

let mut metadata = serde_json::Map::new();
if !template.labels.is_empty() {
metadata.insert("labels".to_string(), serde_json::json!(template.labels));
Expand Down Expand Up @@ -1122,8 +1170,9 @@ fn sandbox_template_to_k8s(

let mut result = serde_json::Value::Object(template_value);

// Always side-load the supervisor binary from the node filesystem
apply_supervisor_sideload(&mut result);
// Side-load the supervisor binary via an init container that copies it
// from the supervisor image into a shared emptyDir volume.
apply_supervisor_sideload(&mut result, supervisor_image, supervisor_image_pull_policy);

// Inject workspace persistence (init container + PVC volume mount) so
// that /sandbox data survives pod rescheduling. Skipped when the user
Expand Down Expand Up @@ -1437,7 +1486,7 @@ mod tests {
}
});

apply_supervisor_sideload(&mut pod_template);
apply_supervisor_sideload(&mut pod_template, "custom-image:latest", "IfNotPresent");

let sc = &pod_template["spec"]["containers"][0]["securityContext"];
assert_eq!(sc["runAsUser"], 0, "runAsUser must be 0 for supervisor");
Expand All @@ -1461,7 +1510,7 @@ mod tests {
}
});

apply_supervisor_sideload(&mut pod_template);
apply_supervisor_sideload(&mut pod_template, "supervisor-image:latest", "IfNotPresent");

let sc = &pod_template["spec"]["containers"][0]["securityContext"];
assert_eq!(
Expand All @@ -1471,7 +1520,7 @@ mod tests {
}

#[test]
fn supervisor_sideload_injects_hostpath_volume_and_mount() {
fn supervisor_sideload_injects_emptydir_volume_init_container_and_mount() {
let mut pod_template = serde_json::json!({
"spec": {
"containers": [{
Expand All @@ -1481,24 +1530,29 @@ mod tests {
}
});

apply_supervisor_sideload(&mut pod_template);

// No init containers should be present (hostPath, not emptyDir+init)
assert!(
pod_template["spec"]["initContainers"].is_null(),
"hostPath sideload should not create init containers"
);
apply_supervisor_sideload(&mut pod_template, "supervisor-image:latest", "IfNotPresent");

// Volume should be a hostPath volume
// Volume should be an emptyDir
let volumes = pod_template["spec"]["volumes"]
.as_array()
.expect("volumes should exist");
assert_eq!(volumes.len(), 1);
assert_eq!(volumes[0]["name"], SUPERVISOR_VOLUME_NAME);
assert_eq!(volumes[0]["hostPath"]["path"], SUPERVISOR_HOST_PATH);
assert_eq!(volumes[0]["hostPath"]["type"], "DirectoryOrCreate");
assert!(
volumes[0]["emptyDir"].is_object(),
"volume should be emptyDir, not hostPath"
);

// Init container should use the supervisor image, not the sandbox image
let init_containers = pod_template["spec"]["initContainers"]
.as_array()
.expect("initContainers should exist");
assert_eq!(init_containers.len(), 1);
assert_eq!(init_containers[0]["name"], SUPERVISOR_INIT_CONTAINER_NAME);
assert_eq!(init_containers[0]["image"], "supervisor-image:latest");
assert_eq!(init_containers[0]["imagePullPolicy"], "IfNotPresent");

// Agent container command should be overridden
// Agent container command should be overridden to the emptyDir path
let command = pod_template["spec"]["containers"][0]["command"]
.as_array()
.expect("command should be set");
Expand All @@ -1507,7 +1561,7 @@ mod tests {
format!("{SUPERVISOR_MOUNT_PATH}/openshell-sandbox")
);

// Volume mount should be read-only
// Agent volume mount should be read-only
let mounts = pod_template["spec"]["containers"][0]["volumeMounts"]
.as_array()
.expect("volumeMounts should exist");
Expand Down Expand Up @@ -1572,6 +1626,8 @@ mod tests {
true,
"openshell/sandbox:latest",
"",
"openshell/supervisor:latest",
"",
"sandbox-id",
"sandbox-name",
"https://gateway.example.com",
Expand Down Expand Up @@ -1614,6 +1670,8 @@ mod tests {
true,
"openshell/sandbox:latest",
"",
"openshell/supervisor:latest",
"",
"sandbox-id",
"sandbox-name",
"https://gateway.example.com",
Expand Down Expand Up @@ -1652,6 +1710,8 @@ mod tests {
false,
"openshell/sandbox:latest",
"",
"openshell/supervisor:latest",
"",
"sandbox-id",
"sandbox-name",
"https://gateway.example.com",
Expand Down Expand Up @@ -1686,6 +1746,8 @@ mod tests {
true,
"openshell/sandbox:latest",
"",
"openshell/supervisor:latest",
"",
"sandbox-id",
"sandbox-name",
"https://gateway.example.com",
Expand Down Expand Up @@ -1713,6 +1775,8 @@ mod tests {
false,
"openshell/sandbox:latest",
"",
"openshell/supervisor:latest",
"",
"sandbox-id",
"sandbox-name",
"https://gateway.example.com",
Expand Down Expand Up @@ -1744,6 +1808,8 @@ mod tests {
false,
"openshell/sandbox:latest",
"",
"openshell/supervisor:latest",
"",
"sandbox-id",
"sandbox-name",
"https://gateway.example.com",
Expand All @@ -1770,6 +1836,8 @@ mod tests {
false,
"openshell/sandbox:latest",
"",
"openshell/supervisor:latest",
"",
"sandbox-id",
"sandbox-name",
"https://gateway.example.com",
Expand Down Expand Up @@ -1913,6 +1981,8 @@ mod tests {
false,
"openshell/sandbox:latest",
"",
"openshell/supervisor:latest",
"",
"sandbox-id",
"sandbox-name",
"https://gateway.example.com",
Expand All @@ -1925,12 +1995,14 @@ mod tests {
false, // user provided custom VCTs
);

// No init container should be present
// Only the supervisor init container should be present — no workspace init container
let init_containers = pod_template["spec"]["initContainers"]
.as_array()
.expect("supervisor init container should always be present");
assert!(
pod_template["spec"]["initContainers"].is_null()
|| pod_template["spec"]["initContainers"]
.as_array()
.is_none_or(Vec::is_empty),
!init_containers
.iter()
.any(|c| c["name"] == WORKSPACE_INIT_CONTAINER_NAME),
"workspace init container must NOT be present when inject_workspace is false"
);

Expand Down
10 changes: 10 additions & 0 deletions crates/openshell-driver-kubernetes/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ struct Args {

#[arg(long, env = "OPENSHELL_HOST_GATEWAY_IP")]
host_gateway_ip: Option<String>,

#[arg(long, env = "OPENSHELL_SUPERVISOR_IMAGE")]
supervisor_image: Option<String>,

#[arg(long, env = "OPENSHELL_SUPERVISOR_IMAGE_PULL_POLICY")]
supervisor_image_pull_policy: Option<String>,
}

#[tokio::main]
Expand All @@ -72,6 +78,10 @@ async fn main() -> Result<()> {
namespace: args.sandbox_namespace,
default_image: args.sandbox_image.unwrap_or_default(),
image_pull_policy: args.sandbox_image_pull_policy.unwrap_or_default(),
supervisor_image: args
.supervisor_image
.unwrap_or_else(|| openshell_core::config::DEFAULT_SUPERVISOR_IMAGE.to_string()),
supervisor_image_pull_policy: args.supervisor_image_pull_policy.unwrap_or_default(),
grpc_endpoint: args.grpc_endpoint.unwrap_or_default(),
ssh_socket_path: args.sandbox_ssh_socket_path,
ssh_handshake_secret: args.ssh_handshake_secret,
Expand Down
Loading
Loading