From 8bfd7be71f45bb4875b192b387dd61fe38337c7a Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Sat, 25 Apr 2026 21:29:46 -0700 Subject: [PATCH 01/10] feat(sandbox): add Kubernetes user namespace support (hostUsers: false) Add opt-in support for Kubernetes user namespace isolation on sandbox pods. When enabled, container UID 0 maps to an unprivileged host UID and capabilities become namespaced, providing defense-in-depth for the supervisor process. Configuration is two-layered: a cluster-wide default via OPENSHELL_ENABLE_USER_NAMESPACES (default false) and a per-sandbox override via the new `user_namespaces` field on SandboxTemplate. When user namespaces are active, the pod security context is extended with SETUID, SETGID, and DAC_READ_SEARCH capabilities to match the bounding-set requirements inside a user namespace. --- architecture/kubernetes-user-namespaces.md | 109 ++++++++ crates/openshell-core/src/config.rs | 8 + .../openshell-driver-kubernetes/src/config.rs | 1 + .../openshell-driver-kubernetes/src/driver.rs | 252 +++++++++++++++++- .../openshell-driver-kubernetes/src/main.rs | 4 + crates/openshell-server/src/cli.rs | 7 + crates/openshell-server/src/compute/mod.rs | 53 ++++ crates/openshell-server/src/lib.rs | 1 + .../helm/openshell/templates/statefulset.yaml | 4 + deploy/helm/openshell/values.yaml | 5 + proto/openshell.proto | 5 + 11 files changed, 440 insertions(+), 9 deletions(-) create mode 100644 architecture/kubernetes-user-namespaces.md diff --git a/architecture/kubernetes-user-namespaces.md b/architecture/kubernetes-user-namespaces.md new file mode 100644 index 000000000..6aa5617d6 --- /dev/null +++ b/architecture/kubernetes-user-namespaces.md @@ -0,0 +1,109 @@ +# Kubernetes User Namespace Support + +## Context + +Kubernetes v1.36 graduated user namespace support to GA (`spec.hostUsers: false`). This feature maps container UID 0 to an unprivileged host UID, making capabilities like `CAP_SYS_ADMIN` container-scoped rather than host-scoped. This is a significant defense-in-depth improvement for OpenShell sandbox pods, which currently require `SYS_ADMIN`, `NET_ADMIN`, `SYS_PTRACE`, and `SYSLOG` capabilities. + +The sandbox supervisor already runs as UID 0 inside the container and performs all privileged operations (namespace creation, seccomp, Landlock) locally — user namespaces confine these powers to the container without breaking functionality. + +## Design + +**Two-layer configuration:** +- Cluster-wide default: `enable_user_namespaces` on `Config` / `KubernetesComputeConfig` (env var `OPENSHELL_ENABLE_USER_NAMESPACES`, default `false`) +- Per-sandbox override: `optional bool user_namespaces` on `SandboxTemplate` in the proto, translated to `platform_config.host_users` for the K8s driver + +**Capability additions when enabled:** Add `SETUID`, `SETGID`, `DAC_READ_SEARCH` to the pod security context (matching the Podman driver at `crates/openshell-driver-podman/src/container.rs:393-400`) — needed because the bounding set is reset inside a user namespace. + +**No changes to:** seccomp filters (CLONE_NEWUSER block stays), Landlock, supervisor privilege-drop logic, init containers, volume mounts (ID-mapped mounts handle ownership transparently). + +## Changes + +### 1. Proto: add `user_namespaces` field to `SandboxTemplate` +**File:** `proto/openshell.proto` + +Add `optional bool user_namespaces = 10;` to the `SandboxTemplate` message. Using `optional` distinguishes "not set" (use cluster default) from explicit true/false. + +### 2. Core config: add `enable_user_namespaces` to server config +**File:** `crates/openshell-core/src/config.rs` + +Add field to `Config`: +```rust +#[serde(default)] +pub enable_user_namespaces: bool, +``` +Wire the env var `OPENSHELL_ENABLE_USER_NAMESPACES` (clap handles this on the standalone driver binary; for the in-process server path, `Config` serde does it). + +### 3. K8s driver config: add field +**File:** `crates/openshell-driver-kubernetes/src/config.rs` + +Add `pub enable_user_namespaces: bool` to `KubernetesComputeConfig`. + +### 4. Server: wire config and translate proto field +**File:** `crates/openshell-server/src/lib.rs` + +Pass `config.enable_user_namespaces` into the `KubernetesComputeConfig` construction. + +**File:** `crates/openshell-server/src/compute/mod.rs` (`build_platform_config`) + +Translate the new `SandboxTemplate.user_namespaces` field into `platform_config`: +```rust +if let Some(user_ns) = template.user_namespaces { + fields.insert("host_users".into(), Value { kind: Some(Kind::BoolValue(!user_ns)) }); +} +``` + +The public API uses `user_namespaces: true` (positive sense) while the K8s driver expects `host_users: false` (K8s convention). The driver inverts this back via `!host_users` to resolve the final pod-level `hostUsers` field. + +### 5. K8s driver: add `platform_config_bool` helper +**File:** `crates/openshell-driver-kubernetes/src/driver.rs` + +New helper following the existing `platform_config_string` / `platform_config_struct` pattern. + +### 6. K8s driver: apply `hostUsers: false` and extended capabilities +**File:** `crates/openshell-driver-kubernetes/src/driver.rs` + +- Pass `enable_user_namespaces` through `sandbox_to_k8s_spec` -> `sandbox_template_to_k8s` +- After the `runtimeClassName` block, resolve the effective setting: per-sandbox `platform_config.host_users` overrides cluster default +- Insert `spec.hostUsers: false` when user namespaces are enabled +- Extend the capability list with `SETUID`, `SETGID`, `DAC_READ_SEARCH` when enabled + +### 7. K8s driver: change hostPath type to `Directory` +**File:** `crates/openshell-driver-kubernetes/src/driver.rs` (`supervisor_volume`) + +Change `"type": "DirectoryOrCreate"` to `"type": "Directory"`. The supervisor path is pre-provisioned during cluster setup; `DirectoryOrCreate` could fail under user namespaces when the mapped UID can't create host directories. + +### 8. Standalone driver binary: wire CLI arg +**File:** `crates/openshell-driver-kubernetes/src/main.rs` + +Add `#[arg(long, env = "OPENSHELL_ENABLE_USER_NAMESPACES")]` and pass to config construction. + +### 9. Helm chart +**File:** `deploy/helm/openshell/values.yaml` — add `enableUserNamespaces: false` under `server:` + +**File:** `deploy/helm/openshell/templates/statefulset.yaml` — add conditional env var block: +```yaml +{{- if .Values.server.enableUserNamespaces }} +- name: OPENSHELL_ENABLE_USER_NAMESPACES + value: "true" +{{- end }} +``` + +## Risks + +| Risk | Mitigation | +|------|------------| +| GPU + user namespaces may conflict (NVIDIA device plugin) | Log a warning when both `gpu: true` and user namespaces are enabled; test before enabling by default | +| hostPath volume ownership with ID-mapped mounts | Step 7 changes to `Directory` type; mount is read-only so ownership doesn't matter for execution | +| sysfs remount in netns setup | Already avoided -- code uses `nsenter` instead of `ip netns exec` (documented at `netns.rs:685`) | +| Requires Linux 5.12+ and supporting runtime | Feature defaults to `false`; failure mode is a clear Kubernetes pod event | + +## Verification + +1. `mise run pre-commit` -- lint and format pass +2. `mise run test` -- unit tests pass including new tests for: + - `hostUsers: false` present/absent in generated pod spec based on config combinations + - Extended capability list when user namespaces enabled + - `platform_config_bool` helper + - `Directory` type on supervisor volume +3. `mise run e2e` on a K8s 1.36+ cluster with user namespace support -- sandbox creation, SSH, policy enforcement work with `enableUserNamespaces: true` +4. Manual: verify `cat /proc/self/uid_map` inside sandbox shows non-identity mapping diff --git a/crates/openshell-core/src/config.rs b/crates/openshell-core/src/config.rs index 2fbdb1b1d..ccab50c27 100644 --- a/crates/openshell-core/src/config.rs +++ b/crates/openshell-core/src/config.rs @@ -192,6 +192,13 @@ pub struct Config { /// allowing them to reach services running on the Docker host. #[serde(default)] pub host_gateway_ip: String, + + /// Enable Kubernetes user namespace isolation (`hostUsers: false`) for + /// sandbox pods. When enabled, container UID 0 maps to an unprivileged + /// host UID and capabilities become namespaced. Requires Kubernetes 1.36+ + /// with a supporting container runtime and Linux 5.12+. + #[serde(default)] + pub enable_user_namespaces: bool, } /// TLS configuration. @@ -245,6 +252,7 @@ impl Config { ssh_session_ttl_secs: default_ssh_session_ttl_secs(), client_tls_secret_name: String::new(), host_gateway_ip: String::new(), + enable_user_namespaces: false, } } diff --git a/crates/openshell-driver-kubernetes/src/config.rs b/crates/openshell-driver-kubernetes/src/config.rs index be3666130..f20e7da73 100644 --- a/crates/openshell-driver-kubernetes/src/config.rs +++ b/crates/openshell-driver-kubernetes/src/config.rs @@ -12,4 +12,5 @@ pub struct KubernetesComputeConfig { pub ssh_handshake_skew_secs: u64, pub client_tls_secret_name: String, pub host_gateway_ip: String, + pub enable_user_namespaces: bool, } diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs index 444e0f55d..017e893c2 100644 --- a/crates/openshell-driver-kubernetes/src/driver.rs +++ b/crates/openshell-driver-kubernetes/src/driver.rs @@ -319,6 +319,7 @@ impl KubernetesComputeDriver { self.ssh_handshake_skew_secs(), &self.config.client_tls_secret_name, &self.config.host_gateway_ip, + self.config.enable_user_namespaces, ); let api = self.api(); @@ -674,7 +675,7 @@ fn supervisor_volume() -> serde_json::Value { "name": SUPERVISOR_VOLUME_NAME, "hostPath": { "path": SUPERVISOR_HOST_PATH, - "type": "DirectoryOrCreate" + "type": "Directory" } }) } @@ -887,6 +888,7 @@ fn sandbox_to_k8s_spec( ssh_handshake_skew_secs: u64, client_tls_secret_name: &str, host_gateway_ip: &str, + enable_user_namespaces: bool, ) -> serde_json::Value { let mut root = serde_json::Map::new(); @@ -929,6 +931,7 @@ fn sandbox_to_k8s_spec( client_tls_secret_name, host_gateway_ip, inject_workspace, + enable_user_namespaces, ), ); if !template.agent_socket_path.is_empty() { @@ -975,6 +978,7 @@ fn sandbox_to_k8s_spec( client_tls_secret_name, host_gateway_ip, inject_workspace, + enable_user_namespaces, ), ); } @@ -1000,6 +1004,7 @@ fn sandbox_template_to_k8s( client_tls_secret_name: &str, host_gateway_ip: &str, inject_workspace: bool, + enable_user_namespaces: bool, ) -> serde_json::Value { // The supervisor binary is always side-loaded from the node filesystem // via a hostPath volume, regardless of which sandbox image is used. @@ -1020,6 +1025,15 @@ fn sandbox_template_to_k8s( ); } + // Per-sandbox platform_config.host_users overrides the cluster-wide default. + let use_user_namespaces = platform_config_bool(template, "host_users") + .map(|host_users| !host_users) + .unwrap_or(enable_user_namespaces); + + if use_user_namespaces { + spec.insert("hostUsers".to_string(), serde_json::json!(false)); + } + let mut container = serde_json::Map::new(); container.insert("name".to_string(), serde_json::json!("agent")); // Use template image if provided, otherwise fall back to default @@ -1054,17 +1068,19 @@ fn sandbox_template_to_k8s( container.insert("env".to_string(), serde_json::Value::Array(env)); - // The sandbox process needs SYS_ADMIN (for seccomp filter installation and - // network namespace creation), NET_ADMIN (for network namespace veth setup), - // SYS_PTRACE (for the CONNECT proxy to read /proc//fd/ of sandbox-user - // processes to resolve binary identity for network policy enforcement), - // and SYSLOG (for reading /dev/kmsg to surface bypass detection diagnostics). - // This mirrors the capabilities used by `mise run sandbox`. + let mut capabilities: Vec<&str> = vec!["SYS_ADMIN", "NET_ADMIN", "SYS_PTRACE", "SYSLOG"]; + if use_user_namespaces { + // In a user namespace the bounding set is reset. SETUID/SETGID are + // needed for the supervisor to drop privileges to the sandbox user. + // DAC_READ_SEARCH is needed for cross-UID /proc//fd/ access + // for process identity resolution in network policy enforcement. + capabilities.extend(["SETUID", "SETGID", "DAC_READ_SEARCH"]); + } container.insert( "securityContext".to_string(), serde_json::json!({ "capabilities": { - "add": ["SYS_ADMIN", "NET_ADMIN", "SYS_PTRACE", "SYSLOG"] + "add": capabilities } }), ); @@ -1288,6 +1304,15 @@ fn platform_config_string(template: &SandboxTemplate, key: &str) -> Option Option { + let config = template.platform_config.as_ref()?; + let value = config.fields.get(key)?; + match value.kind.as_ref() { + Some(prost_types::value::Kind::BoolValue(b)) => Some(*b), + _ => None, + } +} + /// Extract a nested Struct value from the template's `platform_config`, /// converting it to `serde_json::Value`. fn platform_config_struct(template: &SandboxTemplate, key: &str) -> Option { @@ -1494,7 +1519,7 @@ mod tests { assert_eq!(volumes.len(), 1); assert_eq!(volumes[0]["name"], SUPERVISOR_VOLUME_NAME); assert_eq!(volumes[0]["hostPath"]["path"], SUPERVISOR_HOST_PATH); - assert_eq!(volumes[0]["hostPath"]["type"], "DirectoryOrCreate"); + assert_eq!(volumes[0]["hostPath"]["type"], "Directory"); // Agent container command should be overridden let command = pod_template["spec"]["containers"][0]["command"] @@ -1580,6 +1605,7 @@ mod tests { "", "", true, + false, ); assert_eq!( @@ -1623,6 +1649,7 @@ mod tests { "", "", true, + false, ); assert_eq!( @@ -1662,6 +1689,7 @@ mod tests { "", "", true, + false, ); assert_eq!( @@ -1696,6 +1724,7 @@ mod tests { "", "", true, + false, ); let limits = &pod_template["spec"]["containers"][0]["resources"]["limits"]; @@ -1723,6 +1752,7 @@ mod tests { "", "172.17.0.1", true, + false, ); let host_aliases = pod_template["spec"]["hostAliases"] @@ -1754,6 +1784,7 @@ mod tests { "", "", true, + false, ); assert!( @@ -1780,6 +1811,7 @@ mod tests { "my-tls-secret", "", true, + false, ); let volumes = pod_template["spec"]["volumes"] @@ -1925,6 +1957,7 @@ mod tests { "", "", false, // user provided custom VCTs + false, ); // No init container should be present @@ -1947,4 +1980,205 @@ mod tests { "workspace mount must NOT be present when inject_workspace is false" ); } + + // ----------------------------------------------------------------------- + // User namespace tests + // ----------------------------------------------------------------------- + + fn default_template_to_k8s(enable_user_namespaces: bool) -> serde_json::Value { + sandbox_template_to_k8s( + &SandboxTemplate::default(), + false, + "openshell/sandbox:latest", + "", + "sandbox-id", + "sandbox-name", + "https://gateway.example.com", + "0.0.0.0:2222", + "secret", + 300, + &std::collections::HashMap::new(), + "", + "", + true, + enable_user_namespaces, + ) + } + + #[test] + fn user_namespaces_disabled_by_default() { + let pod_template = default_template_to_k8s(false); + assert!( + pod_template["spec"]["hostUsers"].is_null(), + "hostUsers must not be set when user namespaces are disabled" + ); + let caps = pod_template["spec"]["containers"][0]["securityContext"]["capabilities"]["add"] + .as_array() + .unwrap(); + assert_eq!(caps.len(), 4); + assert!(!caps.contains(&serde_json::json!("SETUID"))); + } + + #[test] + fn user_namespaces_enabled_by_cluster_default() { + let pod_template = default_template_to_k8s(true); + assert_eq!( + pod_template["spec"]["hostUsers"], + serde_json::json!(false), + "hostUsers must be false when user namespaces are enabled" + ); + } + + #[test] + fn user_namespaces_adds_extra_capabilities() { + let pod_template = default_template_to_k8s(true); + let caps = pod_template["spec"]["containers"][0]["securityContext"]["capabilities"]["add"] + .as_array() + .unwrap(); + assert!(caps.contains(&serde_json::json!("SYS_ADMIN"))); + assert!(caps.contains(&serde_json::json!("NET_ADMIN"))); + assert!(caps.contains(&serde_json::json!("SYS_PTRACE"))); + assert!(caps.contains(&serde_json::json!("SYSLOG"))); + assert!(caps.contains(&serde_json::json!("SETUID"))); + assert!(caps.contains(&serde_json::json!("SETGID"))); + assert!(caps.contains(&serde_json::json!("DAC_READ_SEARCH"))); + assert_eq!(caps.len(), 7); + } + + #[test] + fn user_namespaces_per_sandbox_override_enables() { + let template = SandboxTemplate { + platform_config: Some(Struct { + fields: [( + "host_users".to_string(), + Value { + kind: Some(Kind::BoolValue(false)), + }, + )] + .into_iter() + .collect(), + }), + ..SandboxTemplate::default() + }; + + let pod_template = sandbox_template_to_k8s( + &template, + false, + "openshell/sandbox:latest", + "", + "sandbox-id", + "sandbox-name", + "https://gateway.example.com", + "0.0.0.0:2222", + "secret", + 300, + &std::collections::HashMap::new(), + "", + "", + true, + false, // cluster default is off + ); + + assert_eq!( + pod_template["spec"]["hostUsers"], + serde_json::json!(false), + "per-sandbox host_users: false must enable user namespaces" + ); + let caps = pod_template["spec"]["containers"][0]["securityContext"]["capabilities"]["add"] + .as_array() + .unwrap(); + assert!(caps.contains(&serde_json::json!("SETUID"))); + } + + #[test] + fn user_namespaces_per_sandbox_override_disables() { + let template = SandboxTemplate { + platform_config: Some(Struct { + fields: [( + "host_users".to_string(), + Value { + kind: Some(Kind::BoolValue(true)), + }, + )] + .into_iter() + .collect(), + }), + ..SandboxTemplate::default() + }; + + let pod_template = sandbox_template_to_k8s( + &template, + false, + "openshell/sandbox:latest", + "", + "sandbox-id", + "sandbox-name", + "https://gateway.example.com", + "0.0.0.0:2222", + "secret", + 300, + &std::collections::HashMap::new(), + "", + "", + true, + true, // cluster default is on + ); + + assert!( + pod_template["spec"]["hostUsers"].is_null(), + "per-sandbox host_users: true must disable user namespaces even when cluster default is on" + ); + let caps = pod_template["spec"]["containers"][0]["securityContext"]["capabilities"]["add"] + .as_array() + .unwrap(); + assert_eq!(caps.len(), 4, "extra capabilities must not be added when user namespaces are disabled"); + } + + #[test] + fn platform_config_bool_extracts_value() { + let template = SandboxTemplate { + platform_config: Some(Struct { + fields: [( + "my_bool".to_string(), + Value { + kind: Some(Kind::BoolValue(true)), + }, + )] + .into_iter() + .collect(), + }), + ..SandboxTemplate::default() + }; + + assert_eq!(platform_config_bool(&template, "my_bool"), Some(true)); + assert_eq!(platform_config_bool(&template, "missing"), None); + } + + #[test] + fn platform_config_bool_returns_none_for_non_bool() { + let template = SandboxTemplate { + platform_config: Some(Struct { + fields: [( + "a_string".to_string(), + Value { + kind: Some(Kind::StringValue("hello".to_string())), + }, + )] + .into_iter() + .collect(), + }), + ..SandboxTemplate::default() + }; + + assert_eq!(platform_config_bool(&template, "a_string"), None); + } + + #[test] + fn supervisor_volume_uses_directory_type() { + let vol = supervisor_volume(); + assert_eq!( + vol["hostPath"]["type"], "Directory", + "supervisor hostPath must use Directory (not DirectoryOrCreate) for user namespace compatibility" + ); + } } diff --git a/crates/openshell-driver-kubernetes/src/main.rs b/crates/openshell-driver-kubernetes/src/main.rs index 4b871d77f..06d943807 100644 --- a/crates/openshell-driver-kubernetes/src/main.rs +++ b/crates/openshell-driver-kubernetes/src/main.rs @@ -57,6 +57,9 @@ struct Args { #[arg(long, env = "OPENSHELL_HOST_GATEWAY_IP")] host_gateway_ip: Option, + + #[arg(long, env = "OPENSHELL_ENABLE_USER_NAMESPACES")] + enable_user_namespaces: bool, } #[tokio::main] @@ -78,6 +81,7 @@ async fn main() -> Result<()> { ssh_handshake_skew_secs: args.ssh_handshake_skew_secs, client_tls_secret_name: args.client_tls_secret_name.unwrap_or_default(), host_gateway_ip: args.host_gateway_ip.unwrap_or_default(), + enable_user_namespaces: args.enable_user_namespaces, }) .await .into_diagnostic()?; diff --git a/crates/openshell-server/src/cli.rs b/crates/openshell-server/src/cli.rs index 2e6e2823b..cbc9e6e29 100644 --- a/crates/openshell-server/src/cli.rs +++ b/crates/openshell-server/src/cli.rs @@ -177,6 +177,11 @@ struct Args { #[arg(long, env = "OPENSHELL_VM_TLS_KEY")] vm_tls_key: Option, + /// Enable Kubernetes user namespace isolation (hostUsers: false) for + /// sandbox pods. + #[arg(long, env = "OPENSHELL_ENABLE_USER_NAMESPACES")] + enable_user_namespaces: bool, + /// Disable TLS entirely — listen on plaintext HTTP. /// Use this when the gateway sits behind a reverse proxy or tunnel /// (e.g. Cloudflare Tunnel) that terminates TLS at the edge. @@ -304,6 +309,8 @@ async fn run_from_args(args: Args) -> Result<()> { config = config.with_host_gateway_ip(ip); } + config.enable_user_namespaces = args.enable_user_namespaces; + let vm_config = VmComputeConfig { state_dir: args.vm_driver_state_dir, driver_dir: args.driver_dir, diff --git a/crates/openshell-server/src/compute/mod.rs b/crates/openshell-server/src/compute/mod.rs index 19cfd5faf..7fb754722 100644 --- a/crates/openshell-server/src/compute/mod.rs +++ b/crates/openshell-server/src/compute/mod.rs @@ -955,6 +955,19 @@ fn build_platform_config(template: &SandboxTemplate) -> Option Date: Sat, 25 Apr 2026 21:42:00 -0700 Subject: [PATCH 02/10] fix(sandbox): add GPU+userns warning and document user namespace isolation Address review feedback: - Emit a warning when GPU and user namespaces are both active on a sandbox, matching the risk mitigation documented in the architecture. - Add a User Namespace Isolation section to the security best practices doc covering prerequisites, Helm config, and GPU caveats. --- crates/openshell-driver-kubernetes/src/driver.rs | 6 ++++++ docs/security/best-practices.mdx | 14 ++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs index 017e893c2..07b0eea02 100644 --- a/crates/openshell-driver-kubernetes/src/driver.rs +++ b/crates/openshell-driver-kubernetes/src/driver.rs @@ -1032,6 +1032,12 @@ fn sandbox_template_to_k8s( if use_user_namespaces { spec.insert("hostUsers".to_string(), serde_json::json!(false)); + if gpu { + warn!( + "GPU sandbox with user namespaces enabled — \ + NVIDIA device plugin compatibility is unverified" + ); + } } let mut container = serde_json::Map::new(); diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index 6a618520f..ff8f8fedf 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -63,6 +63,20 @@ Even if a process ignores proxy environment variables, it can only reach the pro | Risk if bypassed | Without network namespace isolation, a process could connect directly to the internet, bypassing all policy enforcement. | | Recommendation | No action needed. OpenShell enforces this automatically. | +### User Namespace Isolation + +Kubernetes user namespaces (`hostUsers: false`) map container UID 0 to an unprivileged host UID. +Capabilities like `CAP_SYS_ADMIN` become namespaced — they grant power over container-local resources only, not the host. +This provides defense-in-depth: even if a container escape vulnerability exists, the attacker lands as an unprivileged host user. + +| Aspect | Detail | +|---|---| +| Default | Disabled. Set `server.enableUserNamespaces: true` in the Helm values or `OPENSHELL_ENABLE_USER_NAMESPACES=true` as an environment variable to enable cluster-wide. | +| What you can change | Enable cluster-wide via Helm or environment variable. Override per-sandbox via the `user_namespaces` field on `SandboxTemplate` in the API. | +| Prerequisites | Kubernetes 1.36+, a container runtime that supports user namespaces (containerd 2.0+, CRI-O 1.25+), and Linux 5.12+ for ID-mapped mounts. | +| Risk if enabled with GPU | NVIDIA device plugin compatibility with user namespaces is unverified. OpenShell logs a warning when both GPU and user namespaces are active on the same sandbox. | +| Recommendation | Enable on non-GPU clusters running Kubernetes 1.36+ for stronger host isolation. Test GPU workloads separately before enabling on GPU clusters. | + ### Binary Identity Binding The proxy identifies which binary initiated each connection by reading `/proc//exe` (the kernel-trusted executable path). From 2f9f5a3743f9ea569244b426f93b737b149f9883 Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Sun, 26 Apr 2026 09:33:11 -0700 Subject: [PATCH 03/10] test(e2e): add user namespace pod spec verification Enable OPENSHELL_ENABLE_USER_NAMESPACES on the gateway, create a sandbox, and verify the pod spec has hostUsers: false and the extended capability set (SETUID, SETGID, DAC_READ_SEARCH) required for user namespace operation. --- e2e/rust/tests/user_namespaces.rs | 179 ++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 e2e/rust/tests/user_namespaces.rs diff --git a/e2e/rust/tests/user_namespaces.rs b/e2e/rust/tests/user_namespaces.rs new file mode 100644 index 000000000..74cc9af26 --- /dev/null +++ b/e2e/rust/tests/user_namespaces.rs @@ -0,0 +1,179 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +#![cfg(feature = "e2e")] + +//! E2E test: verify Kubernetes user namespace pod spec generation. +//! +//! Enables `OPENSHELL_ENABLE_USER_NAMESPACES` on the gateway, triggers sandbox +//! creation, and inspects the resulting pod spec to confirm: +//! 1. `spec.hostUsers` is `false` +//! 2. The container security context includes the extra capabilities +//! (SETUID, SETGID, DAC_READ_SEARCH) required for user namespace operation +//! +//! The sandbox pod may fail to start in Docker-in-Docker dev clusters where the +//! filesystem does not support ID-mapped mounts. The test inspects the pod spec +//! regardless of runtime success. + +use std::process::Stdio; +use std::time::Duration; + +use openshell_e2e::harness::binary::openshell_cmd; + +async fn kubectl(args: &[&str]) -> Result { + let output = tokio::process::Command::new("docker") + .args(["exec", "openshell-cluster-openshell", "kubectl"]) + .args(args) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .output() + .await + .map_err(|e| format!("failed to run kubectl: {e}"))?; + + let stdout = String::from_utf8_lossy(&output.stdout).to_string(); + let stderr = String::from_utf8_lossy(&output.stderr).to_string(); + + if !output.status.success() { + return Err(format!("kubectl {args:?} failed: {stdout}{stderr}")); + } + Ok(stdout) +} + +async fn set_user_namespaces(enable: bool) -> Result<(), String> { + let env_arg = if enable { + "OPENSHELL_ENABLE_USER_NAMESPACES=true" + } else { + "OPENSHELL_ENABLE_USER_NAMESPACES-" + }; + + kubectl(&[ + "set", "env", "statefulset/openshell", + "-n", "openshell", env_arg, + ]).await?; + + kubectl(&[ + "rollout", "status", "statefulset/openshell", + "-n", "openshell", "--timeout=120s", + ]).await?; + + // Give the gateway time to fully initialize after rollout. + tokio::time::sleep(Duration::from_secs(5)).await; + + Ok(()) +} + +async fn delete_sandbox(name: &str) { + let _ = kubectl(&["delete", "sandbox", name, "-n", "openshell"]).await; +} + +/// Find a sandbox pod by its sandbox CRD name. The CRD controller creates a +/// pod with the same name as the Sandbox resource. +async fn wait_for_sandbox_pod(name: &str, timeout_secs: u64) -> Result<(), String> { + let deadline = tokio::time::Instant::now() + Duration::from_secs(timeout_secs); + while tokio::time::Instant::now() < deadline { + if let Ok(n) = kubectl(&[ + "get", "pod", name, "-n", "openshell", + "-o", "jsonpath={.metadata.name}", + ]).await { + if !n.trim().is_empty() { + return Ok(()); + } + } + tokio::time::sleep(Duration::from_secs(2)).await; + } + Err(format!("sandbox pod {name} did not appear within {timeout_secs}s")) +} + +#[tokio::test] +async fn sandbox_pod_spec_has_user_namespace_fields() { + // Enable user namespaces on the gateway. + set_user_namespaces(true) + .await + .expect("failed to enable user namespaces on gateway"); + + // Start sandbox creation in the background. The pod may never become + // ready in DinD environments, so we spawn the CLI and inspect the pod + // spec independently. + let mut cmd = openshell_cmd(); + cmd.arg("sandbox").arg("create") + .arg("--").arg("sleep").arg("infinity"); + cmd.stdout(Stdio::piped()).stderr(Stdio::piped()); + + let child = cmd.spawn().expect("failed to spawn openshell create"); + + // Wait for the sandbox CRD to be created by polling. + let mut sandbox_name = None; + let deadline = tokio::time::Instant::now() + Duration::from_secs(60); + while tokio::time::Instant::now() < deadline { + if let Ok(names) = kubectl(&[ + "get", "sandbox", "-n", "openshell", + "-o", "jsonpath={.items[*].metadata.name}", + ]).await { + let latest = names.split_whitespace() + .filter(|n| *n != "openshell-0") + .last() + .map(|s| s.to_string()); + if latest.is_some() { + sandbox_name = latest; + break; + } + } + tokio::time::sleep(Duration::from_secs(3)).await; + } + + let name = match sandbox_name { + Some(n) => n, + None => { + drop(child); + set_user_namespaces(false).await.ok(); + panic!("no sandbox CRD found within 60s"); + } + }; + + // Wait for the pod to be created (the CRD controller creates it). + if let Err(e) = wait_for_sandbox_pod(&name, 60).await { + drop(child); + delete_sandbox(&name).await; + set_user_namespaces(false).await.ok(); + panic!("{e}"); + } + + // Inspect the pod spec for hostUsers. + let host_users = kubectl(&[ + "get", "pod", &name, "-n", "openshell", + "-o", "jsonpath={.spec.hostUsers}", + ]).await; + + // Inspect capabilities on the agent container. + let caps = kubectl(&[ + "get", "pod", &name, "-n", "openshell", + "-o", "jsonpath={.spec.containers[?(@.name=='agent')].securityContext.capabilities.add}", + ]).await; + + // Clean up. + drop(child); + delete_sandbox(&name).await; + set_user_namespaces(false).await.ok(); + + // Assert hostUsers is false. + let host_users_val = host_users.expect("failed to get hostUsers from pod spec"); + assert_eq!( + host_users_val.trim(), "false", + "sandbox pod must have spec.hostUsers=false when user namespaces are enabled" + ); + + // Assert extra capabilities are present. + let caps_val = caps.expect("failed to get capabilities from pod spec"); + for cap in ["SETUID", "SETGID", "DAC_READ_SEARCH"] { + assert!( + caps_val.contains(cap), + "sandbox pod must include {cap} in capabilities when user namespaces are enabled, got: {caps_val}" + ); + } + for cap in ["SYS_ADMIN", "NET_ADMIN", "SYS_PTRACE", "SYSLOG"] { + assert!( + caps_val.contains(cap), + "sandbox pod must include {cap} in capabilities, got: {caps_val}" + ); + } +} From 7adc67811b40c10b08abf3bf47096510b83a2f30 Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Sun, 26 Apr 2026 09:41:36 -0700 Subject: [PATCH 04/10] docs(architecture): document nested k3s/DinD user namespace limitation Explain why user namespaces fail in Docker-in-Docker dev clusters: nested overlayfs does not support MOUNT_ATTR_IDMAP. Document where user namespaces work (bare-metal, VM-based, managed K8s) and where they don't (kind, k3s-in-Docker, old kernels, NFS). --- architecture/kubernetes-user-namespaces.md | 27 ++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/architecture/kubernetes-user-namespaces.md b/architecture/kubernetes-user-namespaces.md index 6aa5617d6..1abbd5b5b 100644 --- a/architecture/kubernetes-user-namespaces.md +++ b/architecture/kubernetes-user-namespaces.md @@ -96,6 +96,29 @@ Add `#[arg(long, env = "OPENSHELL_ENABLE_USER_NAMESPACES")]` and pass to config | hostPath volume ownership with ID-mapped mounts | Step 7 changes to `Directory` type; mount is read-only so ownership doesn't matter for execution | | sysfs remount in netns setup | Already avoided -- code uses `nsenter` instead of `ip netns exec` (documented at `netns.rs:685`) | | Requires Linux 5.12+ and supporting runtime | Feature defaults to `false`; failure mode is a clear Kubernetes pod event | +| Nested container environments (DinD / k3s-in-Docker) | Does not work in the local dev cluster; see section below | + +## Nested k3s / Docker-in-Docker limitation + +User namespaces require **ID-mapped mounts** (Linux 5.12+) so the kernel can transparently remap file ownership between the container's UID space and the host's UID space. When k3s runs inside a Docker container (the `mise run cluster` dev environment), the inner container's root filesystem sits on an overlayfs layer managed by the outer Docker daemon. The overlayfs driver in this nested configuration does not support `MOUNT_ATTR_IDMAP`, so `runc` fails at container init: + +``` +failed to set MOUNT_ATTR_IDMAP on .../etc-hosts: invalid argument +(maybe the filesystem used doesn't support idmap mounts on this kernel?) +``` + +This is a kernel/filesystem constraint, not an OpenShell bug. The pod spec is generated correctly (`hostUsers: false`, extended capabilities), but the container runtime cannot fulfil the mount request. + +**Where user namespaces work:** +- Bare-metal or VM-based Kubernetes clusters where the node's root filesystem is ext4/xfs/btrfs (all support ID-mapped mounts since Linux 5.12-5.19). +- Managed Kubernetes services (EKS, GKE, AKS) on nodes running a supported kernel. + +**Where they do not work:** +- k3s-in-Docker / kind / Docker-in-Docker dev clusters where the inner container uses overlayfs on top of the outer container's overlayfs. The nested overlayfs does not support `MOUNT_ATTR_IDMAP`. +- Nodes running kernels older than 5.12. +- Nodes using filesystems that have not added ID-mapped mount support (e.g., NFS on older kernels). + +The e2e test (`e2e/rust/tests/user_namespaces.rs`) accounts for this by verifying only the pod spec fields (`hostUsers`, capabilities) rather than attempting to run a command inside the sandbox. ## Verification @@ -105,5 +128,5 @@ Add `#[arg(long, env = "OPENSHELL_ENABLE_USER_NAMESPACES")]` and pass to config - Extended capability list when user namespaces enabled - `platform_config_bool` helper - `Directory` type on supervisor volume -3. `mise run e2e` on a K8s 1.36+ cluster with user namespace support -- sandbox creation, SSH, policy enforcement work with `enableUserNamespaces: true` -4. Manual: verify `cat /proc/self/uid_map` inside sandbox shows non-identity mapping +3. `mise run e2e` -- the `user_namespaces` test verifies pod spec correctness against the local dev cluster +4. On a bare-metal or VM-based K8s 1.33+ cluster: `cat /proc/self/uid_map` inside a sandbox should show a non-identity mapping (UID 0 maps to a high host UID) From a8b56ee72ebdcb9e1770d71aaba835677c853f6b Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Sun, 26 Apr 2026 16:27:34 -0700 Subject: [PATCH 05/10] docs(architecture): add OCP user namespace testing guide Step-by-step reproduction guide for deploying OpenShell with user namespace isolation on OpenShift. Covers TLS setup, image push to the internal registry, supervisor binary distribution via DaemonSet with SELinux labeling, Helm deploy, CLI configuration, and end-to-end verification. --- .../kubernetes-user-namespaces-ocp-testing.md | 372 ++++++++++++++++++ 1 file changed, 372 insertions(+) create mode 100644 architecture/kubernetes-user-namespaces-ocp-testing.md diff --git a/architecture/kubernetes-user-namespaces-ocp-testing.md b/architecture/kubernetes-user-namespaces-ocp-testing.md new file mode 100644 index 000000000..01df9b29b --- /dev/null +++ b/architecture/kubernetes-user-namespaces-ocp-testing.md @@ -0,0 +1,372 @@ +# Testing User Namespaces on OCP + +Step-by-step guide to deploy OpenShell with user namespace isolation on an OpenShift cluster and verify end-to-end functionality. + +## Prerequisites + +- An OCP cluster (tested on OCP 4.22 / K8s 1.35.3 / CRI-O 1.35 / RHEL CoreOS / kernel 5.14) +- `KUBECONFIG` pointing at the cluster (e.g., `export KUBECONFIG=/path/to/kubeconfig`) +- `kubectl` binary (the examples below use the full path; adjust as needed) +- `helm` binary +- `podman` for building and pushing images +- The OpenShell repo checked out with the user namespace branch built + +Throughout this guide: + +```shell +K=/home/mrunalp/repos/kubernetes/_output/local/bin/linux/amd64/kubectl +HELM=/home/mrunalp/.local/share/mise/installs/helm/4.1.4/linux-amd64/helm +export KUBECONFIG=/path/to/your/kubeconfig +``` + +## 1. Build binaries + +```shell +cargo build -p openshell-server --features openshell-core/dev-settings +cargo build -p openshell-sandbox --features openshell-core/dev-settings +cargo build -p openshell-cli --features openshell-core/dev-settings +``` + +## 2. Create namespace and install the Sandbox CRD + +```shell +$K create ns openshell +$K apply -f deploy/kube/manifests/agent-sandbox.yaml +``` + +Label the namespace to allow privileged pods: + +```shell +$K label ns openshell pod-security.kubernetes.io/enforce=privileged --overwrite +$K label ns openshell pod-security.kubernetes.io/warn=privileged --overwrite +``` + +## 3. Grant SCCs + +The gateway pod needs `anyuid` (runs as UID 1000) and sandbox pods need `privileged` (capabilities for supervisor): + +```shell +$K create clusterrolebinding openshell-sa-anyuid \ + --clusterrole=system:openshift:scc:anyuid \ + --serviceaccount=openshell:openshell + +$K create clusterrolebinding openshell-sa-privileged \ + --clusterrole=system:openshift:scc:privileged \ + --serviceaccount=openshell:openshell + +$K create clusterrolebinding openshell-default-privileged \ + --clusterrole=system:openshift:scc:privileged \ + --serviceaccount=openshell:default +``` + +Grant the sandbox CRD controller full permissions (it needs to set ownerReferences with blockOwnerDeletion): + +```shell +$K create clusterrolebinding agent-sandbox-admin \ + --clusterrole=cluster-admin \ + --serviceaccount=agent-sandbox-system:agent-sandbox-controller +``` + +## 4. Generate TLS certificates + +```shell +TLSDIR=$(mktemp -d) + +# CA +openssl req -x509 -newkey rsa:2048 -nodes \ + -keyout $TLSDIR/ca.key -out $TLSDIR/ca.crt \ + -days 365 -subj "/CN=openshell-ca" 2>/dev/null + +# Server cert +openssl req -newkey rsa:2048 -nodes \ + -keyout $TLSDIR/server.key -out $TLSDIR/server.csr \ + -subj "/CN=openshell.openshell.svc.cluster.local" \ + -addext "subjectAltName=DNS:openshell.openshell.svc.cluster.local,DNS:openshell,DNS:localhost,IP:127.0.0.1" 2>/dev/null + +openssl x509 -req -in $TLSDIR/server.csr \ + -CA $TLSDIR/ca.crt -CAkey $TLSDIR/ca.key -CAcreateserial \ + -out $TLSDIR/server.crt -days 365 \ + -extfile <(echo "subjectAltName=DNS:openshell.openshell.svc.cluster.local,DNS:openshell,DNS:localhost,IP:127.0.0.1") 2>/dev/null + +# Client cert +openssl req -newkey rsa:2048 -nodes \ + -keyout $TLSDIR/client.key -out $TLSDIR/client.csr \ + -subj "/CN=openshell-client" 2>/dev/null + +openssl x509 -req -in $TLSDIR/client.csr \ + -CA $TLSDIR/ca.crt -CAkey $TLSDIR/ca.key -CAcreateserial \ + -out $TLSDIR/client.crt -days 365 2>/dev/null +``` + +Create Kubernetes secrets: + +```shell +$K create secret tls openshell-server-tls -n openshell \ + --cert=$TLSDIR/server.crt --key=$TLSDIR/server.key + +$K create secret generic openshell-server-client-ca -n openshell \ + --from-file=ca.crt=$TLSDIR/ca.crt + +$K create secret generic openshell-client-tls -n openshell \ + --from-file=ca.crt=$TLSDIR/ca.crt \ + --from-file=tls.crt=$TLSDIR/client.crt \ + --from-file=tls.key=$TLSDIR/client.key + +$K create secret generic openshell-ssh-handshake -n openshell \ + --from-literal=secret=$(openssl rand -hex 32) +``` + +Note: the `openshell-client-tls` secret must include `ca.crt`, `tls.crt`, and `tls.key` (not a `kubernetes.io/tls` type secret, which only has `tls.crt` and `tls.key`). + +## 5. Expose the OCP internal registry and push images + +```shell +# Enable the default route for the internal registry +$K patch configs.imageregistry.operator.openshift.io/cluster \ + --type merge -p '{"spec":{"defaultRoute":true}}' + +sleep 5 +REGISTRY=$($K get route default-route -n openshift-image-registry -o jsonpath='{.spec.host}') +TOKEN=$($K create token builder -n openshell) + +podman login --tls-verify=false -u kubeadmin -p "$TOKEN" "$REGISTRY" +``` + +Build and push the gateway image: + +```shell +podman build -f deploy/docker/Dockerfile.images --target gateway \ + -t localhost/openshell/gateway:dev . + +podman tag localhost/openshell/gateway:dev $REGISTRY/openshell/gateway:dev +podman push --tls-verify=false $REGISTRY/openshell/gateway:dev +``` + +Pull and push the sandbox base image: + +```shell +podman pull ghcr.io/nvidia/openshell-community/sandboxes/base:latest + +podman tag ghcr.io/nvidia/openshell-community/sandboxes/base:latest \ + $REGISTRY/openshell/sandbox-base:latest +podman push --tls-verify=false $REGISTRY/openshell/sandbox-base:latest +``` + +## 6. Install the supervisor binary on cluster nodes + +The sandbox supervisor binary is mounted into pods via a hostPath volume at `/opt/openshell/bin/`. A DaemonSet distributes it to every node with the correct SELinux label. + +Build and push a minimal image containing the supervisor binary: + +```shell +cp target/debug/openshell-sandbox /tmp/openshell-sandbox + +cat > /tmp/Dockerfile.supervisor <<'EOF' +FROM registry.access.redhat.com/ubi9/ubi-minimal:latest +COPY openshell-sandbox /openshell-sandbox +RUN chmod 755 /openshell-sandbox +EOF + +podman build -f /tmp/Dockerfile.supervisor -t localhost/openshell/supervisor:dev /tmp/ +podman tag localhost/openshell/supervisor:dev $REGISTRY/openshell/supervisor:dev +podman push --tls-verify=false $REGISTRY/openshell/supervisor:dev +``` + +Deploy the installer DaemonSet: + +```shell +INTERNAL_REG="image-registry.openshift-image-registry.svc:5000" + +cat </tmp/pf.log 2>&1 & +``` + +Set up the CLI gateway configuration with mTLS: + +```shell +mkdir -p ~/.config/openshell/gateways/ocp-userns/mtls + +cp $TLSDIR/ca.crt ~/.config/openshell/gateways/ocp-userns/mtls/ +cp $TLSDIR/client.crt ~/.config/openshell/gateways/ocp-userns/mtls/tls.crt +cp $TLSDIR/client.key ~/.config/openshell/gateways/ocp-userns/mtls/tls.key + +cat > ~/.config/openshell/gateways/ocp-userns/metadata.json <<'EOF' +{ + "name": "ocp-userns", + "gateway_endpoint": "https://127.0.0.1:18443", + "is_remote": false, + "gateway_port": 18443, + "auth_mode": "mtls" +} +EOF +``` + +Verify connectivity: + +```shell +OPENSHELL_GATEWAY=ocp-userns target/debug/openshell status +``` + +Expected output: + +``` +Server Status + Gateway: ocp-userns + Server: https://127.0.0.1:18443 + Status: Connected +``` + +## 9. Create a sandbox and verify user namespaces + +```shell +export OPENSHELL_GATEWAY=ocp-userns + +target/debug/openshell sandbox create --no-bootstrap -- sh -lc \ + "echo '=== uid_map ==='; cat /proc/self/uid_map; \ + echo '=== gid_map ==='; cat /proc/self/gid_map; \ + echo '=== id ==='; id; \ + echo '=== userns-e2e-ok ==='" +``` + +Expected output (UID values will vary): + +``` +=== uid_map === + 0 3285581824 65536 +=== gid_map === + 0 3285581824 65536 +=== id === +uid=998(sandbox) gid=998(sandbox) groups=998(sandbox) +=== userns-e2e-ok === +``` + +This confirms: +- UID 0 inside the container maps to a high host UID (non-identity mapping) +- The sandbox user (UID 998) is active +- The SSH tunnel through the gateway works end-to-end +- Workspace init, supervisor startup, network namespace creation, and proxy all function correctly under user namespace isolation + +## 10. Cleanup + +```shell +# Delete all sandboxes +$K delete sandbox --all -n openshell + +# Uninstall the Helm release +$HELM uninstall openshell -n openshell + +# Remove the supervisor installer +$K delete daemonset openshell-supervisor-installer -n openshell + +# Remove RBAC +$K delete clusterrolebinding openshell-sa-anyuid openshell-sa-privileged \ + openshell-default-privileged agent-sandbox-admin 2>/dev/null + +# Remove the Sandbox CRD and its controller +$K delete -f deploy/kube/manifests/agent-sandbox.yaml + +# Remove the namespace +$K delete ns openshell + +# Kill port-forward +pkill -f "port-forward.*18443" + +# Remove CLI gateway config +rm -rf ~/.config/openshell/gateways/ocp-userns +``` + +## Troubleshooting + +| Symptom | Cause | Fix | +|---------|-------|-----| +| `ErrImageNeverPull` on gateway pod | Image not in the internal registry | Push with `podman push --tls-verify=false` to the OCP registry | +| `unable to validate against any security context constraint` | Missing SCC grants | Run the `clusterrolebinding` commands from step 3 | +| `cannot set blockOwnerDeletion` on sandbox creation | Sandbox CRD controller lacks RBAC | Grant `cluster-admin` to the controller SA (step 3) | +| `hostPath type check failed: /opt/openshell/bin is not a directory` | Supervisor binary not installed on node | Deploy the DaemonSet from step 6 | +| `Permission denied` accessing supervisor binary | SELinux blocking hostPath access | Ensure `chcon -t container_file_t` was applied (step 6) | +| `failed to set MOUNT_ATTR_IDMAP` | Filesystem doesn't support ID-mapped mounts | Only happens in nested container environments (DinD); native nodes work | +| Gateway pod `CrashLoopBackOff` with `unable to open database file` | PVC permissions | Use `--set server.dbUrl="sqlite:/tmp/openshell.db"` | +| `dns error: failed to lookup address` from supervisor | In-cluster DNS not resolving | Use the ClusterIP directly in `server.grpcEndpoint` instead of the DNS name | From fd925a767e30a21442cbac8f3b0a3f8dfcfda0c9 Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Sun, 26 Apr 2026 16:30:00 -0700 Subject: [PATCH 06/10] docs(architecture): add Helm deployment and OCP validation details Document that user namespaces can be deployed via Helm on any K8s 1.33+ cluster and link to the OCP testing guide. Update verification section to reflect successful end-to-end validation on OCP 4.22. --- architecture/kubernetes-user-namespaces.md | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/architecture/kubernetes-user-namespaces.md b/architecture/kubernetes-user-namespaces.md index 1abbd5b5b..8c9c12947 100644 --- a/architecture/kubernetes-user-namespaces.md +++ b/architecture/kubernetes-user-namespaces.md @@ -120,6 +120,21 @@ This is a kernel/filesystem constraint, not an OpenShell bug. The pod spec is ge The e2e test (`e2e/rust/tests/user_namespaces.rs`) accounts for this by verifying only the pod spec fields (`hostUsers`, capabilities) rather than attempting to run a command inside the sandbox. +## Deploying to a real cluster with Helm + +User namespaces can be tested end-to-end on any Kubernetes 1.33+ cluster (beta, enabled by default) or 1.36+ (GA) with a supporting container runtime. Deploy the gateway with Helm and set `server.enableUserNamespaces=true`: + +```shell +helm install openshell deploy/helm/openshell -n openshell \ + --set server.enableUserNamespaces=true \ + --set server.sandboxImage="ghcr.io/nvidia/openshell-community/sandboxes/base:latest" \ + ... +``` + +The supervisor binary must be present at `/opt/openshell/bin/openshell-sandbox` on every node (hostPath mount). On SELinux-enforcing nodes (RHEL, CoreOS), label it with `chcon -t container_file_t`. + +This has been validated end-to-end on OCP 4.22 (K8s 1.35.3, CRI-O 1.35, RHEL CoreOS, kernel 5.14) with full SSH tunnel, workspace init, and sandbox command execution under user namespace isolation. See [kubernetes-user-namespaces-ocp-testing.md](kubernetes-user-namespaces-ocp-testing.md) for the complete step-by-step reproduction guide. + ## Verification 1. `mise run pre-commit` -- lint and format pass @@ -129,4 +144,4 @@ The e2e test (`e2e/rust/tests/user_namespaces.rs`) accounts for this by verifyin - `platform_config_bool` helper - `Directory` type on supervisor volume 3. `mise run e2e` -- the `user_namespaces` test verifies pod spec correctness against the local dev cluster -4. On a bare-metal or VM-based K8s 1.33+ cluster: `cat /proc/self/uid_map` inside a sandbox should show a non-identity mapping (UID 0 maps to a high host UID) +4. On a Kubernetes 1.33+ cluster (OCP, GKE, EKS, bare-metal): deploy with Helm, create a sandbox, and verify `cat /proc/self/uid_map` shows a non-identity mapping (UID 0 maps to a high host UID) From e10297450f5f2c65e30b155d1d74d385e76bef30 Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Sun, 26 Apr 2026 18:44:50 -0700 Subject: [PATCH 07/10] fix(e2e): stabilize user namespace verification Use an explicit sandbox name and deterministic cleanup so the user namespace e2e test cannot inspect the wrong sandbox on a busy cluster. Align the documented Kubernetes requirements with the tested rollout story and save the pre-PR review notes for sharing. --- architecture/kubernetes-user-namespaces.md | 6 +- crates/openshell-core/src/config.rs | 5 +- deploy/helm/openshell/values.yaml | 3 +- docs/security/best-practices.mdx | 4 +- e2e/rust/tests/user_namespaces.rs | 83 ++++++++++++---------- proto/openshell.proto | 3 +- 6 files changed, 59 insertions(+), 45 deletions(-) diff --git a/architecture/kubernetes-user-namespaces.md b/architecture/kubernetes-user-namespaces.md index 8c9c12947..c2221cea3 100644 --- a/architecture/kubernetes-user-namespaces.md +++ b/architecture/kubernetes-user-namespaces.md @@ -14,7 +14,7 @@ The sandbox supervisor already runs as UID 0 inside the container and performs a **Capability additions when enabled:** Add `SETUID`, `SETGID`, `DAC_READ_SEARCH` to the pod security context (matching the Podman driver at `crates/openshell-driver-podman/src/container.rs:393-400`) — needed because the bounding set is reset inside a user namespace. -**No changes to:** seccomp filters (CLONE_NEWUSER block stays), Landlock, supervisor privilege-drop logic, init containers, volume mounts (ID-mapped mounts handle ownership transparently). +**No changes to:** seccomp filters (CLONE_NEWUSER block stays), Landlock, supervisor privilege-drop logic, init containers, and workspace volume ownership semantics (ID-mapped mounts handle ownership transparently). The only mount-related change is the supervisor `hostPath` type in Step 7. ## Changes @@ -122,7 +122,7 @@ The e2e test (`e2e/rust/tests/user_namespaces.rs`) accounts for this by verifyin ## Deploying to a real cluster with Helm -User namespaces can be tested end-to-end on any Kubernetes 1.33+ cluster (beta, enabled by default) or 1.36+ (GA) with a supporting container runtime. Deploy the gateway with Helm and set `server.enableUserNamespaces=true`: +User namespaces can be tested end-to-end on Kubernetes 1.33+ clusters where the feature is available (beta through 1.35, GA in 1.36+) with a supporting container runtime. Deploy the gateway with Helm and set `server.enableUserNamespaces=true`: ```shell helm install openshell deploy/helm/openshell -n openshell \ @@ -144,4 +144,4 @@ This has been validated end-to-end on OCP 4.22 (K8s 1.35.3, CRI-O 1.35, RHEL Cor - `platform_config_bool` helper - `Directory` type on supervisor volume 3. `mise run e2e` -- the `user_namespaces` test verifies pod spec correctness against the local dev cluster -4. On a Kubernetes 1.33+ cluster (OCP, GKE, EKS, bare-metal): deploy with Helm, create a sandbox, and verify `cat /proc/self/uid_map` shows a non-identity mapping (UID 0 maps to a high host UID) +4. On a Kubernetes 1.33+ cluster with user namespace support available (OCP, GKE, EKS, bare-metal): deploy with Helm, create a sandbox, and verify `cat /proc/self/uid_map` shows a non-identity mapping (UID 0 maps to a high host UID) diff --git a/crates/openshell-core/src/config.rs b/crates/openshell-core/src/config.rs index ccab50c27..c469e3aef 100644 --- a/crates/openshell-core/src/config.rs +++ b/crates/openshell-core/src/config.rs @@ -195,8 +195,9 @@ pub struct Config { /// Enable Kubernetes user namespace isolation (`hostUsers: false`) for /// sandbox pods. When enabled, container UID 0 maps to an unprivileged - /// host UID and capabilities become namespaced. Requires Kubernetes 1.36+ - /// with a supporting container runtime and Linux 5.12+. + /// host UID and capabilities become namespaced. Requires Kubernetes 1.33+ + /// with user namespace support available (beta through 1.35, GA in 1.36+), + /// plus a supporting container runtime and Linux 5.12+. #[serde(default)] pub enable_user_namespaces: bool, } diff --git a/deploy/helm/openshell/values.yaml b/deploy/helm/openshell/values.yaml index e225c3d0a..c3728aa18 100644 --- a/deploy/helm/openshell/values.yaml +++ b/deploy/helm/openshell/values.yaml @@ -97,7 +97,8 @@ server: # Auto-detected by the cluster entrypoint script. hostGatewayIP: "" # Enable Kubernetes user namespace isolation (hostUsers: false) for sandbox - # pods. Requires Kubernetes 1.36+ with a supporting container runtime and + # pods. Requires Kubernetes 1.33+ with user namespace support available + # (beta through 1.35, GA in 1.36+), plus a supporting container runtime and # Linux 5.12+. When enabled, container UID 0 maps to an unprivileged host # UID and capabilities become namespaced. enableUserNamespaces: false diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index ff8f8fedf..8ab0113e2 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -73,9 +73,9 @@ This provides defense-in-depth: even if a container escape vulnerability exists, |---|---| | Default | Disabled. Set `server.enableUserNamespaces: true` in the Helm values or `OPENSHELL_ENABLE_USER_NAMESPACES=true` as an environment variable to enable cluster-wide. | | What you can change | Enable cluster-wide via Helm or environment variable. Override per-sandbox via the `user_namespaces` field on `SandboxTemplate` in the API. | -| Prerequisites | Kubernetes 1.36+, a container runtime that supports user namespaces (containerd 2.0+, CRI-O 1.25+), and Linux 5.12+ for ID-mapped mounts. | +| Prerequisites | Kubernetes 1.33+ with user namespace support available (beta through 1.35, GA in 1.36+), a container runtime that supports user namespaces (containerd 2.0+, CRI-O 1.25+), and Linux 5.12+ for ID-mapped mounts. | | Risk if enabled with GPU | NVIDIA device plugin compatibility with user namespaces is unverified. OpenShell logs a warning when both GPU and user namespaces are active on the same sandbox. | -| Recommendation | Enable on non-GPU clusters running Kubernetes 1.36+ for stronger host isolation. Test GPU workloads separately before enabling on GPU clusters. | +| Recommendation | Enable on non-GPU clusters running Kubernetes with user namespace support available (1.33+ beta, 1.36+ GA) for stronger host isolation. Test GPU workloads separately before enabling on GPU clusters. | ### Binary Identity Binding diff --git a/e2e/rust/tests/user_namespaces.rs b/e2e/rust/tests/user_namespaces.rs index 74cc9af26..9aa714767 100644 --- a/e2e/rust/tests/user_namespaces.rs +++ b/e2e/rust/tests/user_namespaces.rs @@ -16,9 +16,10 @@ //! regardless of runtime success. use std::process::Stdio; -use std::time::Duration; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; use openshell_e2e::harness::binary::openshell_cmd; +use tokio::process::Child; async fn kubectl(args: &[&str]) -> Result { let output = tokio::process::Command::new("docker") @@ -66,6 +67,35 @@ async fn delete_sandbox(name: &str) { let _ = kubectl(&["delete", "sandbox", name, "-n", "openshell"]).await; } +fn unique_sandbox_name() -> String { + let suffix = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis(); + format!("userns-e2e-{suffix}") +} + +async fn stop_child(child: &mut Child) { + let _ = child.kill().await; + let _ = child.wait().await; +} + +async fn wait_for_sandbox(name: &str, timeout_secs: u64) -> Result<(), String> { + let deadline = tokio::time::Instant::now() + Duration::from_secs(timeout_secs); + while tokio::time::Instant::now() < deadline { + if let Ok(n) = kubectl(&[ + "get", "sandbox", name, "-n", "openshell", + "-o", "jsonpath={.metadata.name}", + ]).await { + if !n.trim().is_empty() { + return Ok(()); + } + } + tokio::time::sleep(Duration::from_secs(2)).await; + } + Err(format!("sandbox {name} did not appear within {timeout_secs}s")) +} + /// Find a sandbox pod by its sandbox CRD name. The CRD controller creates a /// pod with the same name as the Sandbox resource. async fn wait_for_sandbox_pod(name: &str, timeout_secs: u64) -> Result<(), String> { @@ -91,68 +121,49 @@ async fn sandbox_pod_spec_has_user_namespace_fields() { .await .expect("failed to enable user namespaces on gateway"); + let sandbox_name = unique_sandbox_name(); + // Start sandbox creation in the background. The pod may never become // ready in DinD environments, so we spawn the CLI and inspect the pod // spec independently. let mut cmd = openshell_cmd(); cmd.arg("sandbox").arg("create") + .arg("--name").arg(&sandbox_name) .arg("--").arg("sleep").arg("infinity"); cmd.stdout(Stdio::piped()).stderr(Stdio::piped()); - let child = cmd.spawn().expect("failed to spawn openshell create"); + let mut child = cmd.spawn().expect("failed to spawn openshell create"); - // Wait for the sandbox CRD to be created by polling. - let mut sandbox_name = None; - let deadline = tokio::time::Instant::now() + Duration::from_secs(60); - while tokio::time::Instant::now() < deadline { - if let Ok(names) = kubectl(&[ - "get", "sandbox", "-n", "openshell", - "-o", "jsonpath={.items[*].metadata.name}", - ]).await { - let latest = names.split_whitespace() - .filter(|n| *n != "openshell-0") - .last() - .map(|s| s.to_string()); - if latest.is_some() { - sandbox_name = latest; - break; - } - } - tokio::time::sleep(Duration::from_secs(3)).await; + if let Err(e) = wait_for_sandbox(&sandbox_name, 60).await { + stop_child(&mut child).await; + delete_sandbox(&sandbox_name).await; + set_user_namespaces(false).await.ok(); + panic!("{e}"); } - let name = match sandbox_name { - Some(n) => n, - None => { - drop(child); - set_user_namespaces(false).await.ok(); - panic!("no sandbox CRD found within 60s"); - } - }; - // Wait for the pod to be created (the CRD controller creates it). - if let Err(e) = wait_for_sandbox_pod(&name, 60).await { - drop(child); - delete_sandbox(&name).await; + if let Err(e) = wait_for_sandbox_pod(&sandbox_name, 60).await { + stop_child(&mut child).await; + delete_sandbox(&sandbox_name).await; set_user_namespaces(false).await.ok(); panic!("{e}"); } // Inspect the pod spec for hostUsers. let host_users = kubectl(&[ - "get", "pod", &name, "-n", "openshell", + "get", "pod", &sandbox_name, "-n", "openshell", "-o", "jsonpath={.spec.hostUsers}", ]).await; // Inspect capabilities on the agent container. let caps = kubectl(&[ - "get", "pod", &name, "-n", "openshell", + "get", "pod", &sandbox_name, "-n", "openshell", "-o", "jsonpath={.spec.containers[?(@.name=='agent')].securityContext.capabilities.add}", ]).await; // Clean up. - drop(child); - delete_sandbox(&name).await; + stop_child(&mut child).await; + delete_sandbox(&sandbox_name).await; set_user_namespaces(false).await.ok(); // Assert hostUsers is false. diff --git a/proto/openshell.proto b/proto/openshell.proto index e555bcb39..b53c71ba7 100644 --- a/proto/openshell.proto +++ b/proto/openshell.proto @@ -229,7 +229,8 @@ message SandboxTemplate { google.protobuf.Struct volume_claim_templates = 9; // Enable Kubernetes user namespace isolation (hostUsers: false). // When true, container UID 0 maps to a non-root host UID and capabilities - // become namespaced. Requires Kubernetes 1.36+ with a supporting runtime. + // become namespaced. Requires Kubernetes 1.33+ with user namespace support + // available (beta through 1.35, GA in 1.36+) and a supporting runtime. // When unset, the cluster-wide default is used. optional bool user_namespaces = 10; } From bbc5d0fe452b06caaa11711a18c50127e6e28c65 Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Sun, 26 Apr 2026 19:07:36 -0700 Subject: [PATCH 08/10] docs(architecture): use standard kubectl/helm in OCP testing guide Drop hardcoded binary paths and KUBECONFIG variable assignments. Readers are expected to have kubectl and helm on their PATH. --- .../kubernetes-user-namespaces-ocp-testing.md | 65 ++++++++----------- 1 file changed, 28 insertions(+), 37 deletions(-) diff --git a/architecture/kubernetes-user-namespaces-ocp-testing.md b/architecture/kubernetes-user-namespaces-ocp-testing.md index 01df9b29b..8c224e8c4 100644 --- a/architecture/kubernetes-user-namespaces-ocp-testing.md +++ b/architecture/kubernetes-user-namespaces-ocp-testing.md @@ -5,20 +5,11 @@ Step-by-step guide to deploy OpenShell with user namespace isolation on an OpenS ## Prerequisites - An OCP cluster (tested on OCP 4.22 / K8s 1.35.3 / CRI-O 1.35 / RHEL CoreOS / kernel 5.14) -- `KUBECONFIG` pointing at the cluster (e.g., `export KUBECONFIG=/path/to/kubeconfig`) -- `kubectl` binary (the examples below use the full path; adjust as needed) -- `helm` binary +- `kubectl` and `helm` on your `PATH` - `podman` for building and pushing images +- `KUBECONFIG` set to point at the cluster - The OpenShell repo checked out with the user namespace branch built -Throughout this guide: - -```shell -K=/home/mrunalp/repos/kubernetes/_output/local/bin/linux/amd64/kubectl -HELM=/home/mrunalp/.local/share/mise/installs/helm/4.1.4/linux-amd64/helm -export KUBECONFIG=/path/to/your/kubeconfig -``` - ## 1. Build binaries ```shell @@ -30,15 +21,15 @@ cargo build -p openshell-cli --features openshell-core/dev-settings ## 2. Create namespace and install the Sandbox CRD ```shell -$K create ns openshell -$K apply -f deploy/kube/manifests/agent-sandbox.yaml +kubectl create ns openshell +kubectl apply -f deploy/kube/manifests/agent-sandbox.yaml ``` Label the namespace to allow privileged pods: ```shell -$K label ns openshell pod-security.kubernetes.io/enforce=privileged --overwrite -$K label ns openshell pod-security.kubernetes.io/warn=privileged --overwrite +kubectl label ns openshell pod-security.kubernetes.io/enforce=privileged --overwrite +kubectl label ns openshell pod-security.kubernetes.io/warn=privileged --overwrite ``` ## 3. Grant SCCs @@ -46,15 +37,15 @@ $K label ns openshell pod-security.kubernetes.io/warn=privileged --overwrite The gateway pod needs `anyuid` (runs as UID 1000) and sandbox pods need `privileged` (capabilities for supervisor): ```shell -$K create clusterrolebinding openshell-sa-anyuid \ +kubectl create clusterrolebinding openshell-sa-anyuid \ --clusterrole=system:openshift:scc:anyuid \ --serviceaccount=openshell:openshell -$K create clusterrolebinding openshell-sa-privileged \ +kubectl create clusterrolebinding openshell-sa-privileged \ --clusterrole=system:openshift:scc:privileged \ --serviceaccount=openshell:openshell -$K create clusterrolebinding openshell-default-privileged \ +kubectl create clusterrolebinding openshell-default-privileged \ --clusterrole=system:openshift:scc:privileged \ --serviceaccount=openshell:default ``` @@ -62,7 +53,7 @@ $K create clusterrolebinding openshell-default-privileged \ Grant the sandbox CRD controller full permissions (it needs to set ownerReferences with blockOwnerDeletion): ```shell -$K create clusterrolebinding agent-sandbox-admin \ +kubectl create clusterrolebinding agent-sandbox-admin \ --clusterrole=cluster-admin \ --serviceaccount=agent-sandbox-system:agent-sandbox-controller ``` @@ -101,18 +92,18 @@ openssl x509 -req -in $TLSDIR/client.csr \ Create Kubernetes secrets: ```shell -$K create secret tls openshell-server-tls -n openshell \ +kubectl create secret tls openshell-server-tls -n openshell \ --cert=$TLSDIR/server.crt --key=$TLSDIR/server.key -$K create secret generic openshell-server-client-ca -n openshell \ +kubectl create secret generic openshell-server-client-ca -n openshell \ --from-file=ca.crt=$TLSDIR/ca.crt -$K create secret generic openshell-client-tls -n openshell \ +kubectl create secret generic openshell-client-tls -n openshell \ --from-file=ca.crt=$TLSDIR/ca.crt \ --from-file=tls.crt=$TLSDIR/client.crt \ --from-file=tls.key=$TLSDIR/client.key -$K create secret generic openshell-ssh-handshake -n openshell \ +kubectl create secret generic openshell-ssh-handshake -n openshell \ --from-literal=secret=$(openssl rand -hex 32) ``` @@ -122,12 +113,12 @@ Note: the `openshell-client-tls` secret must include `ca.crt`, `tls.crt`, and `t ```shell # Enable the default route for the internal registry -$K patch configs.imageregistry.operator.openshift.io/cluster \ +kubectl patch configs.imageregistry.operator.openshift.io/cluster \ --type merge -p '{"spec":{"defaultRoute":true}}' sleep 5 -REGISTRY=$($K get route default-route -n openshift-image-registry -o jsonpath='{.spec.host}') -TOKEN=$($K create token builder -n openshell) +REGISTRY=$(kubectl get route default-route -n openshift-image-registry -o jsonpath='{.spec.host}') +TOKEN=$(kubectl create token builder -n openshell) podman login --tls-verify=false -u kubeadmin -p "$TOKEN" "$REGISTRY" ``` @@ -177,7 +168,7 @@ Deploy the installer DaemonSet: ```shell INTERNAL_REG="image-registry.openshift-image-registry.svc:5000" -cat </tmp/pf.log 2>&1 & +nohup kubectl port-forward svc/openshell -n openshell 18443:8080 >/tmp/pf.log 2>&1 & ``` Set up the CLI gateway configuration with mTLS: @@ -333,23 +324,23 @@ This confirms: ```shell # Delete all sandboxes -$K delete sandbox --all -n openshell +kubectl delete sandbox --all -n openshell # Uninstall the Helm release -$HELM uninstall openshell -n openshell +helm uninstall openshell -n openshell # Remove the supervisor installer -$K delete daemonset openshell-supervisor-installer -n openshell +kubectl delete daemonset openshell-supervisor-installer -n openshell # Remove RBAC -$K delete clusterrolebinding openshell-sa-anyuid openshell-sa-privileged \ +kubectl delete clusterrolebinding openshell-sa-anyuid openshell-sa-privileged \ openshell-default-privileged agent-sandbox-admin 2>/dev/null # Remove the Sandbox CRD and its controller -$K delete -f deploy/kube/manifests/agent-sandbox.yaml +kubectl delete -f deploy/kube/manifests/agent-sandbox.yaml # Remove the namespace -$K delete ns openshell +kubectl delete ns openshell # Kill port-forward pkill -f "port-forward.*18443" From 21fc27c130ac7de8f4d8d915ed2df39c23f7908c Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Mon, 27 Apr 2026 14:59:28 -0700 Subject: [PATCH 09/10] docs(architecture): fix markdown lint errors Add blank lines around lists, headings, and fenced code blocks. --- .../kubernetes-user-namespaces-ocp-testing.md | 1 + architecture/kubernetes-user-namespaces.md | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/architecture/kubernetes-user-namespaces-ocp-testing.md b/architecture/kubernetes-user-namespaces-ocp-testing.md index 8c224e8c4..15d86bb64 100644 --- a/architecture/kubernetes-user-namespaces-ocp-testing.md +++ b/architecture/kubernetes-user-namespaces-ocp-testing.md @@ -315,6 +315,7 @@ uid=998(sandbox) gid=998(sandbox) groups=998(sandbox) ``` This confirms: + - UID 0 inside the container maps to a high host UID (non-identity mapping) - The sandbox user (UID 998) is active - The SSH tunnel through the gateway works end-to-end diff --git a/architecture/kubernetes-user-namespaces.md b/architecture/kubernetes-user-namespaces.md index c2221cea3..081e379b5 100644 --- a/architecture/kubernetes-user-namespaces.md +++ b/architecture/kubernetes-user-namespaces.md @@ -9,6 +9,7 @@ The sandbox supervisor already runs as UID 0 inside the container and performs a ## Design **Two-layer configuration:** + - Cluster-wide default: `enable_user_namespaces` on `Config` / `KubernetesComputeConfig` (env var `OPENSHELL_ENABLE_USER_NAMESPACES`, default `false`) - Per-sandbox override: `optional bool user_namespaces` on `SandboxTemplate` in the proto, translated to `platform_config.host_users` for the K8s driver @@ -19,26 +20,32 @@ The sandbox supervisor already runs as UID 0 inside the container and performs a ## Changes ### 1. Proto: add `user_namespaces` field to `SandboxTemplate` + **File:** `proto/openshell.proto` Add `optional bool user_namespaces = 10;` to the `SandboxTemplate` message. Using `optional` distinguishes "not set" (use cluster default) from explicit true/false. ### 2. Core config: add `enable_user_namespaces` to server config + **File:** `crates/openshell-core/src/config.rs` Add field to `Config`: + ```rust #[serde(default)] pub enable_user_namespaces: bool, ``` + Wire the env var `OPENSHELL_ENABLE_USER_NAMESPACES` (clap handles this on the standalone driver binary; for the in-process server path, `Config` serde does it). ### 3. K8s driver config: add field + **File:** `crates/openshell-driver-kubernetes/src/config.rs` Add `pub enable_user_namespaces: bool` to `KubernetesComputeConfig`. ### 4. Server: wire config and translate proto field + **File:** `crates/openshell-server/src/lib.rs` Pass `config.enable_user_namespaces` into the `KubernetesComputeConfig` construction. @@ -46,6 +53,7 @@ Pass `config.enable_user_namespaces` into the `KubernetesComputeConfig` construc **File:** `crates/openshell-server/src/compute/mod.rs` (`build_platform_config`) Translate the new `SandboxTemplate.user_namespaces` field into `platform_config`: + ```rust if let Some(user_ns) = template.user_namespaces { fields.insert("host_users".into(), Value { kind: Some(Kind::BoolValue(!user_ns)) }); @@ -55,11 +63,13 @@ if let Some(user_ns) = template.user_namespaces { The public API uses `user_namespaces: true` (positive sense) while the K8s driver expects `host_users: false` (K8s convention). The driver inverts this back via `!host_users` to resolve the final pod-level `hostUsers` field. ### 5. K8s driver: add `platform_config_bool` helper + **File:** `crates/openshell-driver-kubernetes/src/driver.rs` New helper following the existing `platform_config_string` / `platform_config_struct` pattern. ### 6. K8s driver: apply `hostUsers: false` and extended capabilities + **File:** `crates/openshell-driver-kubernetes/src/driver.rs` - Pass `enable_user_namespaces` through `sandbox_to_k8s_spec` -> `sandbox_template_to_k8s` @@ -68,19 +78,23 @@ New helper following the existing `platform_config_string` / `platform_config_st - Extend the capability list with `SETUID`, `SETGID`, `DAC_READ_SEARCH` when enabled ### 7. K8s driver: change hostPath type to `Directory` + **File:** `crates/openshell-driver-kubernetes/src/driver.rs` (`supervisor_volume`) Change `"type": "DirectoryOrCreate"` to `"type": "Directory"`. The supervisor path is pre-provisioned during cluster setup; `DirectoryOrCreate` could fail under user namespaces when the mapped UID can't create host directories. ### 8. Standalone driver binary: wire CLI arg + **File:** `crates/openshell-driver-kubernetes/src/main.rs` Add `#[arg(long, env = "OPENSHELL_ENABLE_USER_NAMESPACES")]` and pass to config construction. ### 9. Helm chart + **File:** `deploy/helm/openshell/values.yaml` — add `enableUserNamespaces: false` under `server:` **File:** `deploy/helm/openshell/templates/statefulset.yaml` — add conditional env var block: + ```yaml {{- if .Values.server.enableUserNamespaces }} - name: OPENSHELL_ENABLE_USER_NAMESPACES @@ -110,10 +124,12 @@ failed to set MOUNT_ATTR_IDMAP on .../etc-hosts: invalid argument This is a kernel/filesystem constraint, not an OpenShell bug. The pod spec is generated correctly (`hostUsers: false`, extended capabilities), but the container runtime cannot fulfil the mount request. **Where user namespaces work:** + - Bare-metal or VM-based Kubernetes clusters where the node's root filesystem is ext4/xfs/btrfs (all support ID-mapped mounts since Linux 5.12-5.19). - Managed Kubernetes services (EKS, GKE, AKS) on nodes running a supported kernel. **Where they do not work:** + - k3s-in-Docker / kind / Docker-in-Docker dev clusters where the inner container uses overlayfs on top of the outer container's overlayfs. The nested overlayfs does not support `MOUNT_ATTR_IDMAP`. - Nodes running kernels older than 5.12. - Nodes using filesystems that have not added ID-mapped mount support (e.g., NFS on older kernels). From 8e24ef2f5f1fde1f86981d409a369091a8ba0f47 Mon Sep 17 00:00:00 2001 From: Mrunal Patel Date: Mon, 27 Apr 2026 15:32:19 -0700 Subject: [PATCH 10/10] fix: apply cargo fmt formatting --- .../openshell-driver-kubernetes/src/driver.rs | 6 +++++- crates/openshell-server/src/compute/mod.rs | 17 +++++++++-------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs index 07b0eea02..6e4a6f13f 100644 --- a/crates/openshell-driver-kubernetes/src/driver.rs +++ b/crates/openshell-driver-kubernetes/src/driver.rs @@ -2137,7 +2137,11 @@ mod tests { let caps = pod_template["spec"]["containers"][0]["securityContext"]["capabilities"]["add"] .as_array() .unwrap(); - assert_eq!(caps.len(), 4, "extra capabilities must not be added when user namespaces are disabled"); + assert_eq!( + caps.len(), + 4, + "extra capabilities must not be added when user namespaces are disabled" + ); } #[test] diff --git a/crates/openshell-server/src/compute/mod.rs b/crates/openshell-server/src/compute/mod.rs index 7fb754722..3f32744f3 100644 --- a/crates/openshell-server/src/compute/mod.rs +++ b/crates/openshell-server/src/compute/mod.rs @@ -1919,7 +1919,10 @@ mod tests { // user_namespaces: true → host_users: false template.user_namespaces = Some(true); let config = build_platform_config(&template).expect("config should be Some"); - let host_users = config.fields.get("host_users").expect("host_users must exist"); + let host_users = config + .fields + .get("host_users") + .expect("host_users must exist"); assert_eq!( host_users.kind, Some(Kind::BoolValue(false)), @@ -1929,7 +1932,10 @@ mod tests { // user_namespaces: false → host_users: true template.user_namespaces = Some(false); let config = build_platform_config(&template).expect("config should be Some"); - let host_users = config.fields.get("host_users").expect("host_users must exist"); + let host_users = config + .fields + .get("host_users") + .expect("host_users must exist"); assert_eq!( host_users.kind, Some(Kind::BoolValue(true)), @@ -1940,12 +1946,7 @@ mod tests { template.user_namespaces = None; let config = build_platform_config(&template); assert!( - config.is_none() - || !config - .as_ref() - .unwrap() - .fields - .contains_key("host_users"), + config.is_none() || !config.as_ref().unwrap().fields.contains_key("host_users"), "unset user_namespaces must not produce host_users" ); }