From 5a030d053a549ea7fe24978947d276c09c55375d Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 17:20:37 -0400
Subject: [PATCH 01/19] feat(providers): add Vertex AI provider type

- Add vertex provider plugin with ANTHROPIC_VERTEX_PROJECT_ID credential
- Add vertex inference profile with Anthropic-compatible protocols
- Register vertex in provider registry and CLI
- Add vertex to supported inference provider types
- Fix scripts/podman.env to use correct env var names for local registry
- Update docs for simplified CLI install workflow

Known limitation: GCP OAuth authentication not yet implemented.
Vertex provider can be created and configured but API calls will fail
until OAuth token generation is added.
---
 crates/openshell-cli/src/main.rs              |  2 +
 crates/openshell-core/src/inference.rs        | 12 +++++
 crates/openshell-providers/src/lib.rs         |  2 +
 .../openshell-providers/src/providers/mod.rs  |  1 +
 .../src/providers/vertex.rs                   | 47 +++++++++++++++++++
 crates/openshell-server/src/inference.rs      |  2 +-
 docs/get-started/install-podman-macos.md      | 12 ++---
 scripts/podman.env                            | 10 +++-
 8 files changed, 78 insertions(+), 10 deletions(-)
 create mode 100644 crates/openshell-providers/src/providers/vertex.rs

diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs
index 0d546c7b1..5277ab805 100644
--- a/crates/openshell-cli/src/main.rs
+++ b/crates/openshell-cli/src/main.rs
@@ -615,6 +615,7 @@ enum CliProviderType {
     Gitlab,
     Github,
     Outlook,
+    Vertex,
 }
 
 #[derive(Clone, Debug, ValueEnum)]
@@ -646,6 +647,7 @@ impl CliProviderType {
             Self::Gitlab => "gitlab",
             Self::Github => "github",
             Self::Outlook => "outlook",
+            Self::Vertex => "vertex",
         }
     }
 }
diff --git a/crates/openshell-core/src/inference.rs b/crates/openshell-core/src/inference.rs
index a06c427f8..78fe72310 100644
--- a/crates/openshell-core/src/inference.rs
+++ b/crates/openshell-core/src/inference.rs
@@ -86,6 +86,16 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
     default_headers: &[],
 };
 
+static VERTEX_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
+    provider_type: "vertex",
+    default_base_url: "https://us-central1-aiplatform.googleapis.com/v1",
+    protocols: ANTHROPIC_PROTOCOLS,
+    credential_key_names: &["ANTHROPIC_VERTEX_PROJECT_ID"],
+    base_url_config_keys: &["ANTHROPIC_VERTEX_REGION", "VERTEX_BASE_URL"],
+    auth: AuthHeader::Custom("x-api-key"),
+    default_headers: &[("anthropic-version", "2023-06-01")],
+};
+
 /// Look up the inference provider profile for a given provider type.
 ///
 /// Returns `None` for provider types that don't support inference routing
@@ -95,6 +105,7 @@ pub fn profile_for(provider_type: &str) -> Option<&'static InferenceProviderProf
         "openai" => Some(&OPENAI_PROFILE),
         "anthropic" => Some(&ANTHROPIC_PROFILE),
         "nvidia" => Some(&NVIDIA_PROFILE),
+        "vertex" => Some(&VERTEX_PROFILE),
         _ => None,
     }
 }
@@ -176,6 +187,7 @@ mod tests {
         assert!(profile_for("openai").is_some());
         assert!(profile_for("anthropic").is_some());
         assert!(profile_for("nvidia").is_some());
+        assert!(profile_for("vertex").is_some());
         assert!(profile_for("OpenAI").is_some()); // case insensitive
     }
 
diff --git a/crates/openshell-providers/src/lib.rs b/crates/openshell-providers/src/lib.rs
index e2bcc0c09..2fa771950 100644
--- a/crates/openshell-providers/src/lib.rs
+++ b/crates/openshell-providers/src/lib.rs
@@ -86,6 +86,7 @@ impl ProviderRegistry {
         registry.register(providers::gitlab::GitlabProvider);
         registry.register(providers::github::GithubProvider);
         registry.register(providers::outlook::OutlookProvider);
+        registry.register(providers::vertex::VertexProvider);
         registry
     }
 
@@ -138,6 +139,7 @@ pub fn normalize_provider_type(input: &str) -> Option<&'static str> {
         "gitlab" | "glab" => Some("gitlab"),
         "github" | "gh" => Some("github"),
         "outlook" => Some("outlook"),
+        "vertex" => Some("vertex"),
         _ => None,
     }
 }
diff --git a/crates/openshell-providers/src/providers/mod.rs b/crates/openshell-providers/src/providers/mod.rs
index 6fe395135..19f9c54a5 100644
--- a/crates/openshell-providers/src/providers/mod.rs
+++ b/crates/openshell-providers/src/providers/mod.rs
@@ -12,3 +12,4 @@ pub mod nvidia;
 pub mod openai;
 pub mod opencode;
 pub mod outlook;
+pub mod vertex;
diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs
new file mode 100644
index 000000000..92e77002a
--- /dev/null
+++ b/crates/openshell-providers/src/providers/vertex.rs
@@ -0,0 +1,47 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::{
+    ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext, discover_with_spec,
+};
+
+pub struct VertexProvider;
+
+pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec {
+    id: "vertex",
+    credential_env_vars: &["ANTHROPIC_VERTEX_PROJECT_ID"],
+};
+
+impl ProviderPlugin for VertexProvider {
+    fn id(&self) -> &'static str {
+        SPEC.id
+    }
+
+    fn discover_existing(&self) -> Result<Option<crate::DiscoveredProvider>, ProviderError> {
+        discover_with_spec(&SPEC, &RealDiscoveryContext)
+    }
+
+    fn credential_env_vars(&self) -> &'static [&'static str] {
+        SPEC.credential_env_vars
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::SPEC;
+    use crate::discover_with_spec;
+    use crate::test_helpers::MockDiscoveryContext;
+
+    #[test]
+    fn discovers_vertex_env_credentials() {
+        let ctx = MockDiscoveryContext::new()
+            .with_env("ANTHROPIC_VERTEX_PROJECT_ID", "my-gcp-project");
+        let discovered = discover_with_spec(&SPEC, &ctx)
+            .expect("discovery")
+            .expect("provider");
+        assert_eq!(
+            discovered.credentials.get("ANTHROPIC_VERTEX_PROJECT_ID"),
+            Some(&"my-gcp-project".to_string())
+        );
+    }
+}
diff --git a/crates/openshell-server/src/inference.rs b/crates/openshell-server/src/inference.rs
index 0fb29bde5..5d4014b7a 100644
--- a/crates/openshell-server/src/inference.rs
+++ b/crates/openshell-server/src/inference.rs
@@ -237,7 +237,7 @@ fn resolve_provider_route(provider: &Provider) -> Result<ResolvedProviderRoute,
     let profile = openshell_core::inference::profile_for(&provider_type).ok_or_else(|| {
         Status::invalid_argument(format!(
             "provider '{name}' has unsupported type '{provider_type}' for cluster inference \
-                 (supported: openai, anthropic, nvidia)",
+                 (supported: openai, anthropic, nvidia, vertex)",
             name = provider.name
         ))
     })?;
diff --git a/docs/get-started/install-podman-macos.md b/docs/get-started/install-podman-macos.md
index 3b744c026..8a847a547 100644
--- a/docs/get-started/install-podman-macos.md
+++ b/docs/get-started/install-podman-macos.md
@@ -35,9 +35,7 @@ brew install podman mise
 bash scripts/setup-podman-macos.sh
 source scripts/podman.env
 mise run cluster:build:full
-cargo build --release -p openshell-cli
-mkdir -p ~/.local/bin
-cp target/release/openshell ~/.local/bin/
+cargo install --path crates/openshell-cli --root ~/.local
 openshell sandbox create
 ```
 
@@ -72,7 +70,9 @@ source scripts/podman.env
 This sets:
 - `CONTAINER_HOST` - Podman socket path
 - `OPENSHELL_CONTAINER_RUNTIME=podman` - Use Podman runtime
-- `OPENSHELL_REGISTRY=127.0.0.1:5000/openshell` - Local registry for component images
+- `OPENSHELL_IMAGE_REPO_BASE=127.0.0.1:5000/openshell` - Local registry for component images
+- `OPENSHELL_REGISTRY_HOST=127.0.0.1:5000` - Registry host
+- `OPENSHELL_REGISTRY_INSECURE=true` - Allow HTTP registry
 - `OPENSHELL_CLUSTER_IMAGE=localhost/openshell/cluster:dev` - Local cluster image
 
 To make these persistent, add to your shell profile (`~/.zshrc` or `~/.bashrc`):
@@ -114,9 +114,7 @@ tasks/scripts/cluster-bootstrap.sh build
 For a release-optimized binary that works system-wide:
 
 ```console
-cargo build --release -p openshell-cli
-mkdir -p ~/.local/bin
-cp target/release/openshell ~/.local/bin/
+cargo install --path crates/openshell-cli --root ~/.local
 ```
 
 ## Create a Sandbox
diff --git a/scripts/podman.env b/scripts/podman.env
index 1e74a6b71..5aba469b2 100644
--- a/scripts/podman.env
+++ b/scripts/podman.env
@@ -21,13 +21,19 @@ if command -v podman &>/dev/null; then
         export OPENSHELL_CONTAINER_RUNTIME=podman
 
         # Local development image registry
-        export OPENSHELL_REGISTRY="127.0.0.1:5000/openshell"
+        export OPENSHELL_IMAGE_REPO_BASE="127.0.0.1:5000/openshell"
+        export OPENSHELL_REGISTRY_HOST="127.0.0.1:5000"
+        export OPENSHELL_REGISTRY_NAMESPACE="openshell"
+        export OPENSHELL_REGISTRY_ENDPOINT="host.containers.internal:5000"
+        export OPENSHELL_REGISTRY_INSECURE="true"
         export OPENSHELL_CLUSTER_IMAGE="localhost/openshell/cluster:dev"
 
         echo "✓ Podman environment configured:"
         echo "  CONTAINER_HOST=${CONTAINER_HOST}"
         echo "  OPENSHELL_CONTAINER_RUNTIME=${OPENSHELL_CONTAINER_RUNTIME}"
-        echo "  OPENSHELL_REGISTRY=${OPENSHELL_REGISTRY}"
+        echo "  OPENSHELL_IMAGE_REPO_BASE=${OPENSHELL_IMAGE_REPO_BASE}"
+        echo "  OPENSHELL_REGISTRY_HOST=${OPENSHELL_REGISTRY_HOST}"
+        echo "  OPENSHELL_REGISTRY_INSECURE=${OPENSHELL_REGISTRY_INSECURE}"
         echo "  OPENSHELL_CLUSTER_IMAGE=${OPENSHELL_CLUSTER_IMAGE}"
     fi
 else

From dc3690350254ac84c78873529a44af34bef78451 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 17:26:45 -0400
Subject: [PATCH 02/19] docs: clarify that cluster:build:full also starts the
 gateway

- Note that mise run cluster:build:full builds AND starts the gateway
- Add verification step after build completes
- Clarify that gateway is already running before sandbox creation
---
 docs/get-started/install-podman-macos.md | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/docs/get-started/install-podman-macos.md b/docs/get-started/install-podman-macos.md
index 8a847a547..661abada0 100644
--- a/docs/get-started/install-podman-macos.md
+++ b/docs/get-started/install-podman-macos.md
@@ -90,12 +90,13 @@ mise run cluster:build:full
 ```
 
 This command:
-- Builds the gateway image
+- Builds the gateway and cluster images
 - Starts a local container registry at `127.0.0.1:5000`
-- Builds the cluster image
-- Pushes images to the local registry
+- Pushes the gateway image to the local registry
 - Bootstraps a k3s cluster inside a Podman container
-- Deploys the OpenShell gateway
+- Deploys and starts the OpenShell gateway
+
+**Note:** This command builds the images AND starts the gateway in one step. The gateway will be running when the command completes.
 
 Or run the script directly:
 
@@ -119,10 +120,18 @@ cargo install --path crates/openshell-cli --root ~/.local
 
 ## Create a Sandbox
 
+The gateway is now running. Create a sandbox to test it:
+
 ```console
 openshell sandbox create
 ```
 
+Verify the gateway is healthy:
+
+```console
+openshell gateway info
+```
+
 ## Cleanup
 
 To remove all OpenShell resources and optionally the Podman machine:

From a6cc6a4bd2debaee2ad26506308772c8edc7e0c6 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 17:54:14 -0400
Subject: [PATCH 03/19] docs: add Vertex AI provider to inference and provider
 docs

- Add vertex to supported provider types table in manage-providers.md
- Add Vertex AI provider tab in inference configuration docs
- Clarify two usage modes: direct API calls vs inference.local routing
- Document prerequisites (GCP project, Application Default Credentials)
- Note OAuth limitation only affects inference routing, not direct calls
- Keep Vertex docs in provider/inference pages, not installation guides
---
 docs/inference/configure.md        | 21 +++++++++++++++++++++
 docs/sandboxes/manage-providers.md |  1 +
 2 files changed, 22 insertions(+)

diff --git a/docs/inference/configure.md b/docs/inference/configure.md
index 78065689e..4798bc09c 100644
--- a/docs/inference/configure.md
+++ b/docs/inference/configure.md
@@ -100,6 +100,27 @@ This reads `ANTHROPIC_API_KEY` from your environment.
 
 ::::
 
+::::{tab-item} Google Cloud Vertex AI
+
+```console
+$ export ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id
+$ openshell provider create --name vertex-claude --type vertex --from-existing
+```
+
+This reads `ANTHROPIC_VERTEX_PROJECT_ID` from your environment and makes it available inside sandboxes.
+
+**Prerequisites:**
+- Google Cloud project with Vertex AI API enabled
+- Application Default Credentials configured: `gcloud auth application-default login`
+
+**Usage:**
+- **Direct API calls:** Attach this provider to sandboxes to inject the project ID credential. Call Vertex AI directly from your code using the Anthropic SDK.
+- **Inference routing:** Configure `inference.local` to proxy requests to Vertex AI (see "Set Inference Routing" section below).
+
+**Known Limitation:** When using inference routing, GCP OAuth authentication is not yet fully implemented. The provider can be created and configured, but API calls through `inference.local` will fail until OAuth token generation is implemented. Direct API calls from sandbox code using the Anthropic SDK work if you handle authentication yourself.
+
+::::
+
 :::::
 
 ## Set Inference Routing
diff --git a/docs/sandboxes/manage-providers.md b/docs/sandboxes/manage-providers.md
index 6d35766bf..bd75b978f 100644
--- a/docs/sandboxes/manage-providers.md
+++ b/docs/sandboxes/manage-providers.md
@@ -179,6 +179,7 @@ The following provider types are supported.
 | `nvidia` | `NVIDIA_API_KEY` | NVIDIA API Catalog |
 | `openai` | `OPENAI_API_KEY` | Any OpenAI-compatible endpoint. Set `--config OPENAI_BASE_URL` to point to the provider. Refer to {doc}`/inference/configure`. |
 | `opencode` | `OPENCODE_API_KEY`, `OPENROUTER_API_KEY`, `OPENAI_API_KEY` | opencode tool |
+| `vertex` | `ANTHROPIC_VERTEX_PROJECT_ID` | Google Cloud Vertex AI with Claude models. Requires GCP Application Default Credentials. **Note:** OAuth authentication not yet fully implemented. |
 
 :::{tip}
 Use the `generic` type for any service not listed above. You define the

From 17bf43411f27258c0e3297b8fc2a8ed6c4a0aebc Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 18:45:53 -0400
Subject: [PATCH 04/19] feat(vertex): implement GCP OAuth authentication for
 Vertex AI

- Add gcp_auth dependency for OAuth token generation
- Generate OAuth tokens from Application Default Credentials in vertex provider
- Store tokens as VERTEX_OAUTH_TOKEN credential for router authentication
- Update inference profile to use Bearer auth with OAuth tokens
- Construct Vertex-specific URLs with :streamRawPredict endpoint
- Support project ID from credentials for URL construction
- Add model parameter to build_backend_url for Vertex routing
---
 Cargo.lock                                    | 38 +++++++++++++++
 crates/openshell-core/src/inference.rs        | 11 +++--
 crates/openshell-providers/Cargo.toml         |  2 +
 .../src/providers/vertex.rs                   | 48 +++++++++++++++++--
 crates/openshell-router/src/backend.rs        | 41 +++++++++++++---
 crates/openshell-server/src/inference.rs      | 28 ++++++++++-
 6 files changed, 153 insertions(+), 15 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 98797cc24..1e2b542ee 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1509,6 +1509,32 @@ dependencies = [
  "slab",
 ]
 
+[[package]]
+name = "gcp_auth"
+version = "0.12.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2b3d0b409a042a380111af38136310839af8ac1a0917fb6e84515ed1e4bf3ee"
+dependencies = [
+ "async-trait",
+ "base64 0.22.1",
+ "bytes",
+ "chrono",
+ "http",
+ "http-body-util",
+ "hyper",
+ "hyper-rustls",
+ "hyper-util",
+ "ring",
+ "rustls-pki-types",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+ "tokio",
+ "tracing",
+ "tracing-futures",
+ "url",
+]
+
 [[package]]
 name = "generic-array"
 version = "0.14.7"
@@ -2919,8 +2945,10 @@ dependencies = [
 name = "openshell-providers"
 version = "0.0.0"
 dependencies = [
+ "gcp_auth",
  "openshell-core",
  "thiserror 2.0.18",
+ "tokio",
 ]
 
 [[package]]
@@ -5378,6 +5406,16 @@ dependencies = [
  "valuable",
 ]
 
+[[package]]
+name = "tracing-futures"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2"
+dependencies = [
+ "pin-project",
+ "tracing",
+]
+
 [[package]]
 name = "tracing-log"
 version = "0.2.0"
diff --git a/crates/openshell-core/src/inference.rs b/crates/openshell-core/src/inference.rs
index 78fe72310..0973f25db 100644
--- a/crates/openshell-core/src/inference.rs
+++ b/crates/openshell-core/src/inference.rs
@@ -88,12 +88,15 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
 
 static VERTEX_PROFILE: InferenceProviderProfile = InferenceProviderProfile {
     provider_type: "vertex",
+    // Base URL template - actual URL constructed at request time with project/region/model
     default_base_url: "https://us-central1-aiplatform.googleapis.com/v1",
     protocols: ANTHROPIC_PROTOCOLS,
-    credential_key_names: &["ANTHROPIC_VERTEX_PROJECT_ID"],
-    base_url_config_keys: &["ANTHROPIC_VERTEX_REGION", "VERTEX_BASE_URL"],
-    auth: AuthHeader::Custom("x-api-key"),
-    default_headers: &[("anthropic-version", "2023-06-01")],
+    // Look for OAuth token first, fallback to project ID (for manual config)
+    credential_key_names: &["VERTEX_OAUTH_TOKEN", "ANTHROPIC_VERTEX_PROJECT_ID"],
+    base_url_config_keys: &["VERTEX_BASE_URL", "ANTHROPIC_VERTEX_REGION"],
+    // Vertex uses OAuth Bearer tokens, not x-api-key
+    auth: AuthHeader::Bearer,
+    default_headers: &[("anthropic-version", "vertex-2023-10-16")],
 };
 
 /// Look up the inference provider profile for a given provider type.
diff --git a/crates/openshell-providers/Cargo.toml b/crates/openshell-providers/Cargo.toml
index 41f9ed6c0..0cf14ec2b 100644
--- a/crates/openshell-providers/Cargo.toml
+++ b/crates/openshell-providers/Cargo.toml
@@ -13,6 +13,8 @@ repository.workspace = true
 [dependencies]
 openshell-core = { path = "../openshell-core" }
 thiserror = { workspace = true }
+gcp_auth = "0.12"
+tokio = { workspace = true }
 
 [lints]
 workspace = true
diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs
index 92e77002a..ef7758670 100644
--- a/crates/openshell-providers/src/providers/vertex.rs
+++ b/crates/openshell-providers/src/providers/vertex.rs
@@ -2,7 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 
 use crate::{
-    ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext, discover_with_spec,
+    DiscoveredProvider, ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext,
+    discover_with_spec,
 };
 
 pub struct VertexProvider;
@@ -12,13 +13,54 @@ pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec {
     credential_env_vars: &["ANTHROPIC_VERTEX_PROJECT_ID"],
 };
 
+// Additional config keys for Vertex AI
+const VERTEX_CONFIG_KEYS: &[&str] = &["ANTHROPIC_VERTEX_REGION"];
+
+/// Generate an OAuth token from GCP Application Default Credentials for Vertex AI.
+///
+/// Returns `None` if ADC is not configured or token generation fails.
+async fn generate_oauth_token() -> Option<String> {
+    // Try to find an appropriate token provider (checks ADC, service account, metadata server, etc.)
+    let provider = gcp_auth::provider().await.ok()?;
+
+    // Get token for Vertex AI scope
+    // Vertex AI uses the Cloud Platform scope
+    let scopes = &["https://www.googleapis.com/auth/cloud-platform"];
+    let token = provider.token(scopes).await.ok()?;
+
+    Some(token.as_str().to_string())
+}
+
 impl ProviderPlugin for VertexProvider {
     fn id(&self) -> &'static str {
         SPEC.id
     }
 
-    fn discover_existing(&self) -> Result<Option<crate::DiscoveredProvider>, ProviderError> {
-        discover_with_spec(&SPEC, &RealDiscoveryContext)
+    fn discover_existing(&self) -> Result<Option<DiscoveredProvider>, ProviderError> {
+        let mut discovered = discover_with_spec(&SPEC, &RealDiscoveryContext)?;
+
+        // Add region config if present
+        if let Some(ref mut provider) = discovered {
+            for &key in VERTEX_CONFIG_KEYS {
+                if let Ok(value) = std::env::var(key) {
+                    provider.config.insert(key.to_string(), value);
+                }
+            }
+
+            // Generate OAuth token from Application Default Credentials
+            // This replaces the project ID credential with an actual OAuth token
+            // that can be used for API authentication
+            let rt = tokio::runtime::Runtime::new()
+                .map_err(|e| ProviderError::UnsupportedProvider(format!("failed to create tokio runtime: {e}")))?;
+
+            if let Some(token) = rt.block_on(generate_oauth_token()) {
+                // Store the OAuth token as VERTEX_OAUTH_TOKEN
+                // The inference router will use this as the Bearer token
+                provider.credentials.insert("VERTEX_OAUTH_TOKEN".to_string(), token);
+            }
+        }
+
+        Ok(discovered)
     }
 
     fn credential_env_vars(&self) -> &'static [&'static str] {
diff --git a/crates/openshell-router/src/backend.rs b/crates/openshell-router/src/backend.rs
index d1d7092c0..3698441f7 100644
--- a/crates/openshell-router/src/backend.rs
+++ b/crates/openshell-router/src/backend.rs
@@ -95,7 +95,7 @@ async fn send_backend_request(
     headers: Vec<(String, String)>,
     body: bytes::Bytes,
 ) -> Result<reqwest::Response, RouterError> {
-    let url = build_backend_url(&route.endpoint, path);
+    let url = build_backend_url(&route.endpoint, path, &route.model);
 
     let reqwest_method: reqwest::Method = method
         .parse()
@@ -241,7 +241,7 @@ pub async fn verify_backend_endpoint(
 
     if mock::is_mock_route(route) {
         return Ok(ValidatedEndpoint {
-            url: build_backend_url(&route.endpoint, probe.path),
+            url: build_backend_url(&route.endpoint, probe.path, &route.model),
             protocol: probe.protocol.to_string(),
         });
     }
@@ -306,7 +306,7 @@ async fn try_validation_request(
                 details,
             },
         })?;
-    let url = build_backend_url(&route.endpoint, path);
+    let url = build_backend_url(&route.endpoint, path, &route.model);
 
     if response.status().is_success() {
         return Ok(ValidatedEndpoint {
@@ -418,8 +418,23 @@ pub async fn proxy_to_backend_streaming(
     })
 }
 
-fn build_backend_url(endpoint: &str, path: &str) -> String {
+fn build_backend_url(endpoint: &str, path: &str, model: &str) -> String {
     let base = endpoint.trim_end_matches('/');
+
+    // Special handling for Vertex AI
+    if base.contains("aiplatform.googleapis.com") && path.starts_with("/v1/messages") {
+        // Vertex AI uses a different path structure:
+        // https://{region}-aiplatform.googleapis.com/v1/projects/{project}/locations/{region}/publishers/anthropic/models/{model}:streamRawPredict
+        // The base already has everything up to /models, so we append /{model}:streamRawPredict
+        let model_suffix = if model.is_empty() {
+            String::new()
+        } else {
+            format!("/{}", model)
+        };
+        return format!("{}{}:streamRawPredict", base, model_suffix);
+    }
+
+    // Deduplicate /v1 prefix for standard endpoints
     if base.ends_with("/v1") && (path == "/v1" || path.starts_with("/v1/")) {
         return format!("{base}{}", &path[3..]);
     }
@@ -438,7 +453,7 @@ mod tests {
     #[test]
     fn build_backend_url_dedupes_v1_prefix() {
         assert_eq!(
-            build_backend_url("https://api.openai.com/v1", "/v1/chat/completions"),
+            build_backend_url("https://api.openai.com/v1", "/v1/chat/completions", "gpt-4"),
             "https://api.openai.com/v1/chat/completions"
         );
     }
@@ -446,15 +461,27 @@ mod tests {
     #[test]
     fn build_backend_url_preserves_non_versioned_base() {
         assert_eq!(
-            build_backend_url("https://api.anthropic.com", "/v1/messages"),
+            build_backend_url("https://api.anthropic.com", "/v1/messages", "claude-3"),
             "https://api.anthropic.com/v1/messages"
         );
     }
 
+    #[test]
+    fn build_backend_url_handles_vertex_ai() {
+        assert_eq!(
+            build_backend_url(
+                "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1/publishers/anthropic/models",
+                "/v1/messages",
+                "claude-3-5-sonnet-20241022"
+            ),
+            "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1/publishers/anthropic/models/claude-3-5-sonnet-20241022:streamRawPredict"
+        );
+    }
+
     #[test]
     fn build_backend_url_handles_exact_v1_path() {
         assert_eq!(
-            build_backend_url("https://api.openai.com/v1", "/v1"),
+            build_backend_url("https://api.openai.com/v1", "/v1", "gpt-4"),
             "https://api.openai.com/v1"
         );
     }
diff --git a/crates/openshell-server/src/inference.rs b/crates/openshell-server/src/inference.rs
index 5d4014b7a..5faa30518 100644
--- a/crates/openshell-server/src/inference.rs
+++ b/crates/openshell-server/src/inference.rs
@@ -250,11 +250,37 @@ fn resolve_provider_route(provider: &Provider) -> Result<ResolvedProviderRoute,
             ))
         })?;
 
-    let base_url = find_provider_config_value(provider, profile.base_url_config_keys)
+    let mut base_url = find_provider_config_value(provider, profile.base_url_config_keys)
         .unwrap_or_else(|| profile.default_base_url.to_string())
         .trim()
         .to_string();
 
+    // For Vertex AI, construct the base URL with project ID and region
+    if provider_type == "vertex" {
+        let region = provider
+            .config
+            .get("ANTHROPIC_VERTEX_REGION")
+            .map(|s| s.as_str())
+            .unwrap_or("us-central1");
+
+        // Get project ID - if we have an OAuth token, we still need the project ID for URL construction
+        let project_id = provider
+            .credentials
+            .get("ANTHROPIC_VERTEX_PROJECT_ID")
+            .ok_or_else(|| {
+                Status::invalid_argument(format!(
+                    "provider '{}' missing ANTHROPIC_VERTEX_PROJECT_ID credential for Vertex AI URL construction",
+                    provider.name
+                ))
+            })?;
+
+        // Construct Vertex AI base URL: https://{region}-aiplatform.googleapis.com/v1/projects/{project}/locations/{region}/publishers/anthropic/models
+        base_url = format!(
+            "https://{}-aiplatform.googleapis.com/v1/projects/{}/locations/{}/publishers/anthropic/models",
+            region, project_id, region
+        );
+    }
+
     if base_url.is_empty() {
         return Err(Status::invalid_argument(format!(
             "provider '{name}' resolved to empty base_url",

From 5ac42babef783f00ab82d7f6eb1c8ec403842f3f Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 18:50:57 -0400
Subject: [PATCH 05/19] fix(vertex): use separate thread for OAuth token
 generation

Avoid tokio runtime nesting panic by spawning OAuth token generation
in a separate OS thread with its own runtime. This allows provider
discovery to work when called from within an existing tokio context.
---
 .../openshell-providers/src/providers/vertex.rs | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs
index ef7758670..0669c8067 100644
--- a/crates/openshell-providers/src/providers/vertex.rs
+++ b/crates/openshell-providers/src/providers/vertex.rs
@@ -48,12 +48,17 @@ impl ProviderPlugin for VertexProvider {
             }
 
             // Generate OAuth token from Application Default Credentials
-            // This replaces the project ID credential with an actual OAuth token
-            // that can be used for API authentication
-            let rt = tokio::runtime::Runtime::new()
-                .map_err(|e| ProviderError::UnsupportedProvider(format!("failed to create tokio runtime: {e}")))?;
-
-            if let Some(token) = rt.block_on(generate_oauth_token()) {
+            // Try to generate token, but don't fail if we're in a nested runtime context
+            let token = std::thread::spawn(|| {
+                tokio::runtime::Runtime::new()
+                    .ok()
+                    .and_then(|rt| rt.block_on(generate_oauth_token()))
+            })
+            .join()
+            .ok()
+            .flatten();
+
+            if let Some(token) = token {
                 // Store the OAuth token as VERTEX_OAUTH_TOKEN
                 // The inference router will use this as the Bearer token
                 provider.credentials.insert("VERTEX_OAUTH_TOKEN".to_string(), token);

From f606dc37cf261ab29461a2da659bfc94a2a11c8f Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 19:17:58 -0400
Subject: [PATCH 06/19] feat(scripts): improve cleanup script with sandbox
 deletion and better ordering

- Delete all sandboxes before destroying gateway
- Explicitly stop and remove cluster and registry containers by name
- Remove images by specific tags (localhost/openshell/*)
- Run cargo clean for build artifacts
- Add reinstall instructions to completion message
- Better error handling with 2>/dev/null redirects
---
 cleanup-openshell-podman-macos.sh | 46 +++++++++++++++++++++++++++----
 1 file changed, 40 insertions(+), 6 deletions(-)

diff --git a/cleanup-openshell-podman-macos.sh b/cleanup-openshell-podman-macos.sh
index 43efd8dd5..d6b80a411 100755
--- a/cleanup-openshell-podman-macos.sh
+++ b/cleanup-openshell-podman-macos.sh
@@ -11,19 +11,43 @@ set -e
 echo "=== OpenShell Podman Cleanup Script ==="
 echo ""
 
+# Delete all sandboxes first (before destroying gateway)
+echo "Deleting all sandboxes..."
+if command -v openshell &>/dev/null; then
+    # Get list of sandboxes and delete each one
+    openshell sandbox list --no-header 2>/dev/null | awk '{print $1}' | while read -r sandbox; do
+        if [ -n "$sandbox" ]; then
+            echo "  Deleting sandbox: $sandbox"
+            openshell sandbox delete "$sandbox" 2>/dev/null || true
+        fi
+    done
+fi
+
 # Destroy OpenShell gateway (if it exists)
 echo "Destroying OpenShell gateway..."
 if command -v openshell &>/dev/null; then
     openshell gateway destroy --name openshell 2>/dev/null || true
 fi
 
-# Stop and remove any running OpenShell containers
-echo "Stopping OpenShell containers..."
-podman ps -a | grep openshell | awk '{print $1}' | xargs -r podman rm -f || true
+# Stop and remove cluster container
+echo "Stopping cluster container..."
+podman stop openshell-cluster-openshell 2>/dev/null || true
+podman rm openshell-cluster-openshell 2>/dev/null || true
+
+# Stop and remove local registry container
+echo "Stopping local registry..."
+podman stop openshell-local-registry 2>/dev/null || true
+podman rm openshell-local-registry 2>/dev/null || true
+
+# Stop and remove any other OpenShell containers
+echo "Cleaning up remaining OpenShell containers..."
+podman ps -a | grep openshell | awk '{print $1}' | xargs -r podman rm -f 2>/dev/null || true
 
 # Remove OpenShell images
 echo "Removing OpenShell images..."
-podman images | grep -E "openshell|cluster" | awk '{print $3}' | xargs -r podman rmi -f || true
+podman rmi localhost/openshell/cluster:dev 2>/dev/null || true
+podman rmi localhost/openshell/gateway:dev 2>/dev/null || true
+podman images | grep -E "openshell|127.0.0.1:5000/openshell" | awk '{print $3}' | xargs -r podman rmi -f 2>/dev/null || true
 
 # Remove CLI binary
 echo "Removing CLI binary..."
@@ -41,8 +65,11 @@ rm -rf ~/.openshell
 echo "Removing build artifacts..."
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 cd "$SCRIPT_DIR"
-rm -rf target/
-rm -rf deploy/docker/.build/
+if command -v cargo &>/dev/null; then
+    echo "  Running cargo clean..."
+    cargo clean 2>/dev/null || true
+fi
+rm -rf deploy/docker/.build/ 2>/dev/null || true
 
 # Clean Podman cache
 echo "Cleaning Podman build cache..."
@@ -51,6 +78,13 @@ podman system prune -af --volumes
 echo ""
 echo "=== Cleanup Complete ==="
 echo ""
+echo "OpenShell containers, images, and configuration have been removed."
+echo ""
+echo "To reinstall OpenShell:"
+echo "  1. source scripts/podman.env"
+echo "  2. mise run cluster:build:full"
+echo "  3. cargo install --path crates/openshell-cli --root ~/.local"
+echo ""
 echo "To completely remove the OpenShell Podman machine:"
 echo "  podman machine stop openshell"
 echo "  podman machine rm openshell"

From d36e58b21ff50f5b410b6edb011cefe55ca27322 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 19:45:17 -0400
Subject: [PATCH 07/19] feat(sandbox): inject Vertex AI credentials as actual
 environment variables

Add selective direct injection for provider credentials that need to be
accessible as real environment variables (not placeholders). This allows
tools like `claude` CLI to read Vertex AI credentials directly.

Changes:
- Add direct_inject_credentials() list for credentials requiring direct access
- Modify from_provider_env() to support selective direct injection
- Inject ANTHROPIC_VERTEX_PROJECT_ID, VERTEX_OAUTH_TOKEN, and
  ANTHROPIC_VERTEX_REGION as actual values instead of placeholders
- Other credentials continue using openshell:resolve:env:* placeholders
  for HTTP proxy resolution

Security note: Directly injected credentials are visible via /proc/*/environ,
unlike placeholder-based credentials which are only resolved within HTTP
requests. Only credentials essential for CLI tool compatibility are included.
---
 crates/openshell-sandbox/src/secrets.rs | 53 +++++++++++++++++++++++--
 1 file changed, 49 insertions(+), 4 deletions(-)

diff --git a/crates/openshell-sandbox/src/secrets.rs b/crates/openshell-sandbox/src/secrets.rs
index a27537c91..233056f07 100644
--- a/crates/openshell-sandbox/src/secrets.rs
+++ b/crates/openshell-sandbox/src/secrets.rs
@@ -10,6 +10,25 @@ const PLACEHOLDER_PREFIX: &str = "openshell:resolve:env:";
 /// Public access to the placeholder prefix for fail-closed scanning in other modules.
 pub(crate) const PLACEHOLDER_PREFIX_PUBLIC: &str = PLACEHOLDER_PREFIX;
 
+/// Credentials that should be injected as actual values into the sandbox environment
+/// instead of being converted to placeholders.
+///
+/// These credentials are needed by tools (like `claude` CLI) that read environment
+/// variables directly rather than making HTTP requests through the proxy.
+///
+/// **Security consideration**: These values are visible to all sandbox processes via
+/// `/proc/<pid>/environ`, unlike placeholder-based credentials which are only resolved
+/// within HTTP requests. Only include credentials here when direct env var access is
+/// required for tool compatibility.
+fn direct_inject_credentials() -> &'static [&'static str] {
+    &[
+        // Vertex AI credentials for claude CLI
+        "ANTHROPIC_VERTEX_PROJECT_ID",
+        "VERTEX_OAUTH_TOKEN",
+        "ANTHROPIC_VERTEX_REGION",
+    ]
+}
+
 /// Characters that are valid in an env var key name (used to extract
 /// placeholder boundaries within concatenated strings like path segments).
 fn is_env_key_char(b: u8) -> bool {
@@ -69,6 +88,19 @@ pub struct SecretResolver {
 impl SecretResolver {
     pub(crate) fn from_provider_env(
         provider_env: HashMap<String, String>,
+    ) -> (HashMap<String, String>, Option<Self>) {
+        Self::from_provider_env_with_direct_inject(provider_env, &direct_inject_credentials())
+    }
+
+    /// Create a resolver from provider environment with selective direct injection.
+    ///
+    /// Credentials matching keys in `direct_inject` are injected as actual values
+    /// into the child environment (for tools like `claude` CLI that need real env vars).
+    /// All other credentials are converted to `openshell:resolve:env:*` placeholders
+    /// that get resolved by the HTTP proxy.
+    pub(crate) fn from_provider_env_with_direct_inject(
+        provider_env: HashMap<String, String>,
+        direct_inject: &[&str],
     ) -> (HashMap<String, String>, Option<Self>) {
         if provider_env.is_empty() {
             return (HashMap::new(), None);
@@ -78,12 +110,25 @@ impl SecretResolver {
         let mut by_placeholder = HashMap::with_capacity(provider_env.len());
 
         for (key, value) in provider_env {
-            let placeholder = placeholder_for_env_key(&key);
-            child_env.insert(key, placeholder.clone());
-            by_placeholder.insert(placeholder, value);
+            // Check if this credential should be injected directly
+            if direct_inject.contains(&key.as_str()) {
+                // Direct injection: put actual value in environment
+                child_env.insert(key, value);
+            } else {
+                // Placeholder: will be resolved by HTTP proxy
+                let placeholder = placeholder_for_env_key(&key);
+                child_env.insert(key, placeholder.clone());
+                by_placeholder.insert(placeholder, value);
+            }
         }
 
-        (child_env, Some(Self { by_placeholder }))
+        let resolver = if by_placeholder.is_empty() {
+            None
+        } else {
+            Some(Self { by_placeholder })
+        };
+
+        (child_env, resolver)
     }
 
     /// Resolve a placeholder string to the real secret value.

From 2dd3438a165a898bf3ff8c72aabbfbabab231dd9 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 20:03:59 -0400
Subject: [PATCH 08/19] feat(vertex): auto-inject CLAUDE_CODE_USE_VERTEX for
 claude CLI

- Add CLAUDE_CODE_USE_VERTEX to direct injection list
- Automatically set CLAUDE_CODE_USE_VERTEX=1 in Vertex provider credentials
- Enables claude CLI to auto-detect Vertex AI without manual config

Now sandboxes with Vertex provider will automatically have:
- ANTHROPIC_VERTEX_PROJECT_ID (from env)
- VERTEX_OAUTH_TOKEN (generated from GCP ADC)
- CLAUDE_CODE_USE_VERTEX=1 (auto-set)

The claude CLI can now use Vertex AI with zero manual configuration.
---
 crates/openshell-providers/src/providers/vertex.rs | 4 ++++
 crates/openshell-sandbox/src/secrets.rs            | 1 +
 2 files changed, 5 insertions(+)

diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs
index 0669c8067..6daadd5f9 100644
--- a/crates/openshell-providers/src/providers/vertex.rs
+++ b/crates/openshell-providers/src/providers/vertex.rs
@@ -47,6 +47,10 @@ impl ProviderPlugin for VertexProvider {
                 }
             }
 
+            // Set CLAUDE_CODE_USE_VERTEX=1 to enable Vertex AI in claude CLI
+            // Must be in credentials (not config) to be injected into sandbox environment
+            provider.credentials.insert("CLAUDE_CODE_USE_VERTEX".to_string(), "1".to_string());
+
             // Generate OAuth token from Application Default Credentials
             // Try to generate token, but don't fail if we're in a nested runtime context
             let token = std::thread::spawn(|| {
diff --git a/crates/openshell-sandbox/src/secrets.rs b/crates/openshell-sandbox/src/secrets.rs
index 233056f07..0cd188b6e 100644
--- a/crates/openshell-sandbox/src/secrets.rs
+++ b/crates/openshell-sandbox/src/secrets.rs
@@ -26,6 +26,7 @@ fn direct_inject_credentials() -> &'static [&'static str] {
         "ANTHROPIC_VERTEX_PROJECT_ID",
         "VERTEX_OAUTH_TOKEN",
         "ANTHROPIC_VERTEX_REGION",
+        "CLAUDE_CODE_USE_VERTEX",
     ]
 }
 

From bc3342de1a58a54550b8a5c2360528c561111e94 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 20:06:01 -0400
Subject: [PATCH 09/19] feat(podman): increase default memory to 12 GB for
 better build performance

- Change Podman machine default memory from 8 GB to 12 GB
- Update documentation to reflect 12 GB default
- Update troubleshooting to suggest 16 GB for build issues

12 GB provides better performance for Rust compilation and reduces
out-of-memory issues during parallel builds.
---
 docs/get-started/install-podman-macos.md | 6 +++---
 scripts/setup-podman-macos.sh            | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/get-started/install-podman-macos.md b/docs/get-started/install-podman-macos.md
index 661abada0..648f11564 100644
--- a/docs/get-started/install-podman-macos.md
+++ b/docs/get-started/install-podman-macos.md
@@ -51,7 +51,7 @@ brew install podman mise
 
 The `scripts/setup-podman-macos.sh` script automates Podman Machine configuration:
 
-- Creates a dedicated `openshell` Podman machine (8 GB RAM, 4 CPUs)
+- Creates a dedicated `openshell` Podman machine (12 GB RAM, 4 CPUs)
 - Configures cgroup delegation (required for the embedded k3s cluster)
 - Stops conflicting machines (only one can run at a time, with user confirmation)
 
@@ -161,11 +161,11 @@ openshell sandbox create
 
 ### Build fails with memory errors
 
-Increase the Podman machine memory allocation:
+Increase the Podman machine memory allocation (default is 12 GB):
 
 ```console
 podman machine stop openshell
-podman machine set openshell --memory 8192
+podman machine set openshell --memory 16384
 podman machine start openshell
 ```
 
diff --git a/scripts/setup-podman-macos.sh b/scripts/setup-podman-macos.sh
index 1538259f3..979a51e3e 100755
--- a/scripts/setup-podman-macos.sh
+++ b/scripts/setup-podman-macos.sh
@@ -9,7 +9,7 @@
 set -euo pipefail
 
 MACHINE_NAME="${PODMAN_MACHINE_NAME:-openshell}"
-MEMORY="${PODMAN_MEMORY:-8192}"
+MEMORY="${PODMAN_MEMORY:-12288}"
 CPUS="${PODMAN_CPUS:-4}"
 
 echo "=== OpenShell Podman Setup for macOS ==="

From b08de19e134b32147a7eb56b7eb7edfe134fea47 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 20:19:40 -0400
Subject: [PATCH 10/19] fix(scripts): update CLI installation command in setup
 script

Replace manual 'cargo build + cp' with 'cargo install --path'
Add verification step with 'openshell gateway info'
Keep correct 'mise run cluster:build:full' command
---
 scripts/setup-podman-macos.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/setup-podman-macos.sh b/scripts/setup-podman-macos.sh
index 979a51e3e..02fdf2343 100755
--- a/scripts/setup-podman-macos.sh
+++ b/scripts/setup-podman-macos.sh
@@ -108,9 +108,9 @@ echo "Podman machine '${MACHINE_NAME}' is ready!"
 echo ""
 echo "Next steps:"
 echo "  1. Set up environment: source scripts/podman.env"
-echo "  2. Build and deploy: mise run cluster:build:full"
-echo "  3. Build CLI: cargo build --release -p openshell-cli"
-echo "  4. Install CLI: cp target/release/openshell ~/.local/bin/"
+echo "  2. Build and deploy cluster: mise run cluster:build:full"
+echo "  3. Install CLI: cargo install --path crates/openshell-cli --root ~/.local"
+echo "  4. Verify installation: openshell gateway info"
 echo ""
 echo "To make the environment persistent, add to your shell profile (~/.zshrc):"
 echo "  source $(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/scripts/podman.env"

From b56828e9efea9a60bd6e4e1b5cf7499373ec9ae1 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 23:15:18 -0400
Subject: [PATCH 11/19] fix(router): remove model field from Vertex AI request
 bodies

Vertex AI's :streamRawPredict endpoint expects the model in the URL
path, not in the request body. The router was incorrectly inserting
the model field, causing "Extra inputs are not permitted" errors.

Changes:
- Router now detects Vertex AI endpoints and removes model field
- Added bash 3 compatibility fix for cluster-deploy-fast.sh
- Added scripts/rebuild-cluster.sh for development workflow
- Updated documentation for Vertex AI setup and rebuild process

Fixes inference routing to Vertex AI via inference.local endpoint.
---
 CONTRIBUTING.md                          | 17 ++++++++++++
 crates/openshell-router/src/backend.rs   | 19 ++++++++++---
 docs/get-started/install-podman-macos.md | 25 ++++++++++++++++-
 docs/inference/configure.md              | 15 ++++++-----
 docs/sandboxes/manage-providers.md       |  3 ++-
 scripts/rebuild-cluster.sh               | 34 ++++++++++++++++++++++++
 tasks/scripts/cluster-deploy-fast.sh     | 19 ++++++++++++-
 7 files changed, 119 insertions(+), 13 deletions(-)
 create mode 100755 scripts/rebuild-cluster.sh

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 19a398a32..d759863a8 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -176,6 +176,23 @@ These are the primary `mise` tasks for day-to-day development:
 | `mise run docs`    | Build and serve documentation locally                   |
 | `mise run clean`   | Clean build artifacts                                   |
 
+## Rebuilding After Code Changes
+
+When developing OpenShell core components (gateway, router, sandbox supervisor), you need to rebuild the cluster to test your changes:
+
+```bash
+bash scripts/rebuild-cluster.sh
+```
+
+This script stops the cluster, rebuilds the image with your changes, and restarts it.
+
+**After rebuilding:**
+- Providers need to be recreated (gateway database was reset)
+- Inference routing needs to be reconfigured
+- Sandboxes need to be recreated
+
+For a complete cleanup, see the cleanup scripts in the `scripts/` directory.
+
 ## Project Structure
 
 | Path            | Purpose                                       |
diff --git a/crates/openshell-router/src/backend.rs b/crates/openshell-router/src/backend.rs
index 3698441f7..9b5d1a000 100644
--- a/crates/openshell-router/src/backend.rs
+++ b/crates/openshell-router/src/backend.rs
@@ -137,13 +137,24 @@ async fn send_backend_request(
 
     // Set the "model" field in the JSON body to the route's configured model so the
     // backend receives the correct model ID regardless of what the client sent.
+    //
+    // Exception: Vertex AI's :streamRawPredict endpoint expects the model in the URL
+    // path (already handled in build_backend_url), not in the request body.
+    let is_vertex_ai = route.endpoint.contains("aiplatform.googleapis.com");
+
     let body = match serde_json::from_slice::<serde_json::Value>(&body) {
         Ok(mut json) => {
             if let Some(obj) = json.as_object_mut() {
-                obj.insert(
-                    "model".to_string(),
-                    serde_json::Value::String(route.model.clone()),
-                );
+                if is_vertex_ai {
+                    // Remove model field for Vertex AI (it's in the URL path)
+                    obj.remove("model");
+                } else {
+                    // Insert/override model field for standard backends
+                    obj.insert(
+                        "model".to_string(),
+                        serde_json::Value::String(route.model.clone()),
+                    );
+                }
             }
             bytes::Bytes::from(serde_json::to_vec(&json).unwrap_or_else(|_| body.to_vec()))
         }
diff --git a/docs/get-started/install-podman-macos.md b/docs/get-started/install-podman-macos.md
index 648f11564..abc0a3ac6 100644
--- a/docs/get-started/install-podman-macos.md
+++ b/docs/get-started/install-podman-macos.md
@@ -132,14 +132,37 @@ Verify the gateway is healthy:
 openshell gateway info
 ```
 
+## Rebuilding After Code Changes
+
+If you're developing OpenShell and need to test code changes, use the rebuild script:
+
+```console
+bash scripts/rebuild-cluster.sh
+```
+
+This stops the cluster, removes the old image, rebuilds with your changes, and restarts. After rebuilding:
+1. Recreate providers (gateway database was reset)
+2. Reconfigure inference routing if needed
+3. Recreate sandboxes
+
 ## Cleanup
 
-To remove all OpenShell resources and optionally the Podman machine:
+### Quick Rebuild (Development)
+
+```console
+bash scripts/rebuild-cluster.sh
+```
+
+Rebuilds the cluster with latest code changes. Use this during development.
+
+### Full Cleanup (Start Fresh)
 
 ```console
 bash cleanup-openshell-podman-macos.sh
 ```
 
+Removes all OpenShell resources and optionally the Podman machine. Use this to completely reset your installation.
+
 ## Troubleshooting
 
 ### Environment variables not set
diff --git a/docs/inference/configure.md b/docs/inference/configure.md
index 4798bc09c..e13567135 100644
--- a/docs/inference/configure.md
+++ b/docs/inference/configure.md
@@ -104,23 +104,26 @@ This reads `ANTHROPIC_API_KEY` from your environment.
 
 ```console
 $ export ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id
-$ openshell provider create --name vertex-claude --type vertex --from-existing
+$ export ANTHROPIC_VERTEX_REGION=us-east5  # Optional, defaults to us-central1
+$ openshell provider create --name vertex --type vertex --from-existing
 ```
 
-This reads `ANTHROPIC_VERTEX_PROJECT_ID` from your environment and makes it available inside sandboxes.
+This reads `ANTHROPIC_VERTEX_PROJECT_ID` and `ANTHROPIC_VERTEX_REGION` from your environment and automatically generates OAuth tokens from GCP Application Default Credentials.
 
 **Prerequisites:**
-- Google Cloud project with Vertex AI API enabled
+- Google Cloud project with Vertex AI API enabled and Claude models available
 - Application Default Credentials configured: `gcloud auth application-default login`
+- The `~/.config/gcloud/` directory must be uploaded to sandboxes for OAuth token refresh
 
 **Usage:**
-- **Direct API calls:** Attach this provider to sandboxes to inject the project ID credential. Call Vertex AI directly from your code using the Anthropic SDK.
-- **Inference routing:** Configure `inference.local` to proxy requests to Vertex AI (see "Set Inference Routing" section below).
+- **Direct API calls:** Tools like `claude` CLI automatically use Vertex AI when `CLAUDE_CODE_USE_VERTEX=1` is set
+- **Inference routing:** Configure `inference.local` to proxy requests to Vertex AI (see "Set Inference Routing" section below)
 
-**Known Limitation:** When using inference routing, GCP OAuth authentication is not yet fully implemented. The provider can be created and configured, but API calls through `inference.local` will fail until OAuth token generation is implemented. Direct API calls from sandbox code using the Anthropic SDK work if you handle authentication yourself.
+**Model ID Format:** Use `@` separator for versions (e.g., `claude-sonnet-4-5@20250929`)
 
 ::::
 
+
 :::::
 
 ## Set Inference Routing
diff --git a/docs/sandboxes/manage-providers.md b/docs/sandboxes/manage-providers.md
index bd75b978f..716c16f5a 100644
--- a/docs/sandboxes/manage-providers.md
+++ b/docs/sandboxes/manage-providers.md
@@ -179,7 +179,7 @@ The following provider types are supported.
 | `nvidia` | `NVIDIA_API_KEY` | NVIDIA API Catalog |
 | `openai` | `OPENAI_API_KEY` | Any OpenAI-compatible endpoint. Set `--config OPENAI_BASE_URL` to point to the provider. Refer to {doc}`/inference/configure`. |
 | `opencode` | `OPENCODE_API_KEY`, `OPENROUTER_API_KEY`, `OPENAI_API_KEY` | opencode tool |
-| `vertex` | `ANTHROPIC_VERTEX_PROJECT_ID` | Google Cloud Vertex AI with Claude models. Requires GCP Application Default Credentials. **Note:** OAuth authentication not yet fully implemented. |
+| `vertex` | `ANTHROPIC_VERTEX_PROJECT_ID`, `VERTEX_OAUTH_TOKEN`, `CLAUDE_CODE_USE_VERTEX` | Google Cloud Vertex AI with Claude models. Automatically generates OAuth tokens from GCP Application Default Credentials. Set `ANTHROPIC_VERTEX_REGION` (optional, defaults to `us-central1`) to control the region. |
 
 :::{tip}
 Use the `generic` type for any service not listed above. You define the
@@ -194,6 +194,7 @@ The following providers have been tested with `inference.local`. Any provider th
 |---|---|---|---|---|
 | NVIDIA API Catalog | `nvidia-prod` | `nvidia` | `https://integrate.api.nvidia.com/v1` | `NVIDIA_API_KEY` |
 | Anthropic | `anthropic-prod` | `anthropic` | `https://api.anthropic.com` | `ANTHROPIC_API_KEY` |
+| Google Vertex AI | `vertex` | `vertex` | Auto-configured per region | `ANTHROPIC_VERTEX_PROJECT_ID` (OAuth auto-generated) |
 | Baseten | `baseten` | `openai` | `https://inference.baseten.co/v1` | `OPENAI_API_KEY` |
 | Bitdeer AI | `bitdeer` | `openai` | `https://api-inference.bitdeer.ai/v1` | `OPENAI_API_KEY` |
 | Deepinfra | `deepinfra` | `openai` | `https://api.deepinfra.com/v1/openai` | `OPENAI_API_KEY` |
diff --git a/scripts/rebuild-cluster.sh b/scripts/rebuild-cluster.sh
new file mode 100755
index 000000000..f836a832a
--- /dev/null
+++ b/scripts/rebuild-cluster.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Quick rebuild script for development
+# Restarts the cluster container with the latest code changes
+
+set -euo pipefail
+
+echo "=== OpenShell Quick Rebuild ==="
+echo ""
+
+# Stop and remove cluster container
+echo "Stopping cluster container..."
+podman stop openshell-cluster-openshell 2>/dev/null || true
+podman rm openshell-cluster-openshell 2>/dev/null || true
+
+# Remove old cluster image
+echo "Removing old cluster image..."
+podman rmi localhost/openshell/cluster:dev 2>/dev/null || true
+
+# Rebuild and start cluster
+echo "Rebuilding cluster with latest code..."
+mise run cluster:build:full
+
+echo ""
+echo "=== Rebuild Complete ==="
+echo ""
+echo "Next steps:"
+echo "  1. Recreate provider: openshell provider create --name <name> --type <type> --from-existing"
+echo "  2. Configure inference: openshell inference set --provider <name> --model <model>"
+echo "  3. Recreate sandboxes: openshell sandbox create ..."
+echo ""
diff --git a/tasks/scripts/cluster-deploy-fast.sh b/tasks/scripts/cluster-deploy-fast.sh
index 86fe9746d..9bdc6a604 100755
--- a/tasks/scripts/cluster-deploy-fast.sh
+++ b/tasks/scripts/cluster-deploy-fast.sh
@@ -28,6 +28,23 @@ log_duration() {
 	echo "${label} took $((end - start))s"
 }
 
+# Read lines into an array variable (bash 3 & 4 compatible)
+# Usage: read_lines_into_array array_name < <(command)
+read_lines_into_array() {
+  local array_name=$1
+  if ((BASH_VERSINFO[0] >= 4)); then
+    # Bash 4+: use mapfile (faster)
+    mapfile -t "$array_name"
+  else
+    # Bash 3: use while loop
+    local line
+    eval "$array_name=()"
+    while IFS= read -r line; do
+      eval "$array_name+=(\"\$line\")"
+    done
+  fi
+}
+
 if ! $CONTAINER_RUNTIME ps -q --filter "name=^${CONTAINER_NAME}$" --filter "health=healthy" | grep -q .; then
 	echo "Error: Cluster container '${CONTAINER_NAME}' is not running or not healthy."
 	echo "Start the cluster first with: mise run cluster"
@@ -86,7 +103,7 @@ fi
 
 declare -a changed_files=()
 detect_start=$(date +%s)
-mapfile -t changed_files < <(
+read_lines_into_array changed_files < <(
 	{
 		git diff --name-only
 		git diff --name-only --cached

From 308dc5cfd3f1358432e8d849460d1d6250877a3a Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Mon, 6 Apr 2026 23:22:08 -0400
Subject: [PATCH 12/19] docs: add Vertex AI example with network policy

Added examples/vertex-ai/ directory with:
- sandbox-policy.yaml: Network policy for Vertex AI endpoints
- README.md: Quick start guide with links to full documentation

Provides ready-to-use policy file for Vertex AI integration.
---
 examples/vertex-ai/README.md           | 46 +++++++++++++++++++++
 examples/vertex-ai/sandbox-policy.yaml | 55 ++++++++++++++++++++++++++
 2 files changed, 101 insertions(+)
 create mode 100644 examples/vertex-ai/README.md
 create mode 100644 examples/vertex-ai/sandbox-policy.yaml

diff --git a/examples/vertex-ai/README.md b/examples/vertex-ai/README.md
new file mode 100644
index 000000000..ec0cdf78a
--- /dev/null
+++ b/examples/vertex-ai/README.md
@@ -0,0 +1,46 @@
+# Google Cloud Vertex AI Example
+
+This example demonstrates how to use OpenShell with Google Cloud Vertex AI to run Claude models via GCP infrastructure.
+
+## Quick Start
+
+```bash
+# Configure GCP credentials
+export ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id
+gcloud auth application-default login
+
+# Create provider
+openshell provider create --name vertex --type vertex --from-existing
+
+# Create sandbox with policy
+openshell sandbox create --name vertex-test --provider vertex \
+  --upload ~/.config/gcloud/:.config/gcloud/ \
+  --policy examples/vertex-ai/sandbox-policy.yaml
+
+# Inside sandbox
+claude  # Automatically uses Vertex AI
+```
+
+## What's Included
+
+- **`sandbox-policy.yaml`**: Network policy allowing Google OAuth and Vertex AI endpoints
+  - Supports major GCP regions (us-east5, us-central1, us-west1, europe-west1, europe-west4, asia-northeast1)
+  - Enables direct Claude CLI usage
+  - Enables `inference.local` routing
+
+## Documentation
+
+For detailed setup instructions, troubleshooting, and configuration options, see:
+
+- [Vertex AI Provider Configuration](../../docs/inference/configure.md#google-cloud-vertex-ai)
+- [Provider Management](../../docs/sandboxes/manage-providers.md)
+- [Inference Routing](../../docs/inference/configure.md)
+
+## Adding Regions
+
+To support additional GCP regions, add them to `sandbox-policy.yaml`:
+
+```yaml
+- host: asia-southeast1-aiplatform.googleapis.com
+  port: 443
+```
diff --git a/examples/vertex-ai/sandbox-policy.yaml b/examples/vertex-ai/sandbox-policy.yaml
new file mode 100644
index 000000000..81fa36d10
--- /dev/null
+++ b/examples/vertex-ai/sandbox-policy.yaml
@@ -0,0 +1,55 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Sandbox policy for Google Cloud Vertex AI
+#
+# This policy allows sandboxes to access Google Cloud endpoints required for
+# Vertex AI with Anthropic Claude models.
+
+version: 1
+
+network_policies:
+  google_vertex:
+    name: google-vertex
+    endpoints:
+      # Google OAuth endpoints for authentication
+      - host: oauth2.googleapis.com
+        port: 443
+      - host: accounts.google.com
+        port: 443
+      - host: www.googleapis.com
+        port: 443
+
+      # Vertex AI endpoints (global and regional)
+      - host: aiplatform.googleapis.com
+        port: 443
+      - host: us-east5-aiplatform.googleapis.com
+        port: 443
+      - host: us-central1-aiplatform.googleapis.com
+        port: 443
+      - host: us-west1-aiplatform.googleapis.com
+        port: 443
+      - host: europe-west1-aiplatform.googleapis.com
+        port: 443
+      - host: europe-west4-aiplatform.googleapis.com
+        port: 443
+      - host: asia-northeast1-aiplatform.googleapis.com
+        port: 443
+
+    binaries:
+      # Claude CLI for direct Vertex AI usage
+      - path: /usr/local/bin/claude
+      # Python for Anthropic SDK usage
+      - path: /usr/bin/python3
+      # curl for testing
+      - path: /usr/bin/curl
+
+  inference_local:
+    name: inference-local
+    endpoints:
+      # Local inference routing endpoint
+      - host: inference.local
+        port: 80
+    binaries:
+      - path: /usr/bin/curl
+      - path: /usr/bin/python3

From 83a94b9fbc61951e7997fbeeedf6ac2dbc787747 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Tue, 7 Apr 2026 09:53:28 -0400
Subject: [PATCH 13/19] fix(build): handle Podman --push flag and array
 expansion

Podman does not support --push flag in build command like Docker buildx.
This commit fixes two issues:

1. docker-build-image.sh: Filter out --push flag and execute push as
   separate command after build completes

2. docker-publish-multiarch.sh: Use safe array expansion syntax to avoid
   unbound variable errors with set -u when EXTRA_TAGS is empty

Note: Multi-arch builds with Podman still require manual workflow due to
cross-compilation toolchain issues. Use /tmp/build-multiarch-local.sh
for local multi-arch builds with QEMU emulation.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 tasks/scripts/docker-build-image.sh       | 11 ++++++++++-
 tasks/scripts/docker-publish-multiarch.sh |  4 ++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/tasks/scripts/docker-build-image.sh b/tasks/scripts/docker-build-image.sh
index 38b200a2e..a76b01d12 100755
--- a/tasks/scripts/docker-build-image.sh
+++ b/tasks/scripts/docker-build-image.sh
@@ -212,11 +212,13 @@ if [[ "${CONTAINER_RUNTIME}" == "podman" ]]; then
 		ARCH_ARGS+=(--build-arg "BUILDARCH=${TARGETARCH}")
 	fi
 
-	# Filter OUTPUT_ARGS: Podman stores images locally by default (no --load)
+	# Filter OUTPUT_ARGS: Podman doesn't support --load or --push in build command
 	PODMAN_OUTPUT_ARGS=()
+	PODMAN_SHOULD_PUSH=0
 	for arg in ${OUTPUT_ARGS[@]+"${OUTPUT_ARGS[@]}"}; do
 		case "${arg}" in
 		--load) ;; # implicit in Podman
+		--push) PODMAN_SHOULD_PUSH=1 ;; # push after build
 		*) PODMAN_OUTPUT_ARGS+=("${arg}") ;;
 		esac
 	done
@@ -227,6 +229,13 @@ if [[ "${CONTAINER_RUNTIME}" == "podman" ]]; then
 		${TLS_ARGS[@]+"${TLS_ARGS[@]}"} \
 		${PODMAN_OUTPUT_ARGS[@]+"${PODMAN_OUTPUT_ARGS[@]}"} \
 		.
+
+	# Push after build if requested (Podman doesn't support --push in build)
+	if [[ "${PODMAN_SHOULD_PUSH}" == "1" && "${IS_FINAL_IMAGE}" == "1" ]]; then
+		echo "Pushing ${IMAGE_NAME}:${IMAGE_TAG}..."
+		podman_local_tls_args "${IMAGE_NAME}"
+		podman push ${PODMAN_TLS_ARGS[@]+"${PODMAN_TLS_ARGS[@]}"} "${IMAGE_NAME}:${IMAGE_TAG}"
+	fi
 else
 	# Docker: use buildx
 	docker buildx build \
diff --git a/tasks/scripts/docker-publish-multiarch.sh b/tasks/scripts/docker-publish-multiarch.sh
index f83a7c203..e8185a952 100755
--- a/tasks/scripts/docker-publish-multiarch.sh
+++ b/tasks/scripts/docker-publish-multiarch.sh
@@ -50,7 +50,7 @@ echo
 echo "Building multi-arch cluster image..."
 tasks/scripts/docker-build-image.sh cluster
 
-TAGS_TO_APPLY=("${EXTRA_TAGS[@]}")
+TAGS_TO_APPLY=(${EXTRA_TAGS[@]+"${EXTRA_TAGS[@]}"})
 if [[ "${TAG_LATEST}" == "true" ]]; then
 	TAGS_TO_APPLY+=("latest")
 fi
@@ -58,7 +58,7 @@ fi
 if [[ ${#TAGS_TO_APPLY[@]} -gt 0 ]]; then
 	for component in gateway cluster; do
 		full_image="${REGISTRY}/${component}"
-		for tag in "${TAGS_TO_APPLY[@]}"; do
+		for tag in ${TAGS_TO_APPLY[@]+"${TAGS_TO_APPLY[@]}"}; do
 			[[ "${tag}" == "${IMAGE_TAG}" ]] && continue
 			echo "Tagging ${full_image}:${tag}..."
 			if [[ "${CONTAINER_RUNTIME}" == "podman" ]]; then

From b2d65457a193561ffcfde5ffce6545608c0e3f35 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Tue, 7 Apr 2026 12:55:19 -0400
Subject: [PATCH 14/19] feat(build): add Podman multi-arch support to
 docker-publish-multiarch.sh

Add Podman-specific multi-architecture build logic to complement existing
Docker buildx support. Podman builds each platform sequentially using
manifest lists, while Docker buildx builds in parallel.

Changes:
- Detect Podman and use manifest-based approach for multi-arch builds
- Build each platform (arm64, amd64) separately with explicit TARGETARCH
- Create and push manifest list combining all architectures
- Preserve existing Docker buildx workflow unchanged
- Add informative logging about sequential vs parallel builds

Build times:
- Podman: Sequential builds (~30-40 min on Linux, ~45-60 min on macOS)
- Docker buildx: Parallel builds (~20-30 min)

This enables multi-arch image publishing on systems using Podman as the
container runtime, supporting both Apple Silicon and Intel architectures.
---
 tasks/scripts/docker-publish-multiarch.sh | 66 +++++++++++++++++++----
 1 file changed, 57 insertions(+), 9 deletions(-)

diff --git a/tasks/scripts/docker-publish-multiarch.sh b/tasks/scripts/docker-publish-multiarch.sh
index e8185a952..398c97c00 100755
--- a/tasks/scripts/docker-publish-multiarch.sh
+++ b/tasks/scripts/docker-publish-multiarch.sh
@@ -27,8 +27,56 @@ fi
 
 if [[ "${CONTAINER_RUNTIME}" == "podman" ]]; then
 	echo "Using Podman for multi-arch build (podman manifest)"
+	echo "Note: Podman builds platforms sequentially (slower than Docker buildx)"
 	export DOCKER_BUILDER=""
+
+	# Podman: build each platform separately and create manifest
+	IFS=',' read -ra PLATFORM_ARRAY <<< "${PLATFORMS}"
+
+	for component in gateway cluster; do
+		full_image="${REGISTRY}/${component}"
+		echo ""
+		echo "=== Building multi-arch ${component} image ==="
+
+		# Create manifest list
+		podman manifest rm "${full_image}:${IMAGE_TAG}" 2>/dev/null || true
+		podman manifest create "${full_image}:${IMAGE_TAG}"
+
+		# Build for each platform
+		for platform in "${PLATFORM_ARRAY[@]}"; do
+			arch="${platform##*/}"
+			case "${arch}" in
+				amd64) target_arch="amd64" ;;
+				arm64) target_arch="arm64" ;;
+				*) echo "Unsupported arch: ${arch}" >&2; exit 1 ;;
+			esac
+
+			echo "Building ${component} for ${platform}..."
+
+			# Package Helm chart for cluster builds
+			if [[ "${component}" == "cluster" ]]; then
+				mkdir -p deploy/docker/.build/charts
+				helm package deploy/helm/openshell -d deploy/docker/.build/charts/ >/dev/null
+			fi
+
+			# Build with explicit TARGETARCH/BUILDARCH to avoid cross-compilation
+			# (QEMU emulation handles running the different architecture)
+			podman build --platform "${platform}" \
+				--build-arg TARGETARCH="${target_arch}" \
+				--build-arg BUILDARCH="${target_arch}" \
+				--manifest "${full_image}:${IMAGE_TAG}" \
+				-f deploy/docker/Dockerfile.images \
+				--target "${component}" \
+				.
+		done
+
+		# Push manifest
+		echo "Pushing ${full_image}:${IMAGE_TAG}..."
+		podman manifest push "${full_image}:${IMAGE_TAG}" \
+			"docker://${full_image}:${IMAGE_TAG}"
+	done
 else
+	# Docker: use buildx
 	BUILDER_NAME=${DOCKER_BUILDER:-multiarch}
 	if docker buildx inspect "${BUILDER_NAME}" >/dev/null 2>&1; then
 		echo "Using existing buildx builder: ${BUILDER_NAME}"
@@ -38,17 +86,17 @@ else
 		docker buildx create --name "${BUILDER_NAME}" --use --bootstrap
 	fi
 	export DOCKER_BUILDER="${BUILDER_NAME}"
-fi
-export DOCKER_PLATFORM="${PLATFORMS}"
-export DOCKER_PUSH=1
-export IMAGE_REGISTRY="${REGISTRY}"
+	export DOCKER_PLATFORM="${PLATFORMS}"
+	export DOCKER_PUSH=1
+	export IMAGE_REGISTRY="${REGISTRY}"
 
-echo "Building multi-arch gateway image..."
-tasks/scripts/docker-build-image.sh gateway
+	echo "Building multi-arch gateway image..."
+	tasks/scripts/docker-build-image.sh gateway
 
-echo
-echo "Building multi-arch cluster image..."
-tasks/scripts/docker-build-image.sh cluster
+	echo
+	echo "Building multi-arch cluster image..."
+	tasks/scripts/docker-build-image.sh cluster
+fi
 
 TAGS_TO_APPLY=(${EXTRA_TAGS[@]+"${EXTRA_TAGS[@]}"})
 if [[ "${TAG_LATEST}" == "true" ]]; then

From 8a27b2fa20dd1a882e7553986fe0fc9a90945f33 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Tue, 7 Apr 2026 14:47:26 -0400
Subject: [PATCH 15/19] fix: apply cargo fmt formatting to vertex provider

Fix CI formatting check failures:
- Split long .insert() calls across multiple lines
- Reformat MockDiscoveryContext initialization

No functional changes, formatting only.
---
 crates/openshell-providers/src/providers/vertex.rs | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs
index 6daadd5f9..de8d45d31 100644
--- a/crates/openshell-providers/src/providers/vertex.rs
+++ b/crates/openshell-providers/src/providers/vertex.rs
@@ -49,7 +49,9 @@ impl ProviderPlugin for VertexProvider {
 
             // Set CLAUDE_CODE_USE_VERTEX=1 to enable Vertex AI in claude CLI
             // Must be in credentials (not config) to be injected into sandbox environment
-            provider.credentials.insert("CLAUDE_CODE_USE_VERTEX".to_string(), "1".to_string());
+            provider
+                .credentials
+                .insert("CLAUDE_CODE_USE_VERTEX".to_string(), "1".to_string());
 
             // Generate OAuth token from Application Default Credentials
             // Try to generate token, but don't fail if we're in a nested runtime context
@@ -65,7 +67,9 @@ impl ProviderPlugin for VertexProvider {
             if let Some(token) = token {
                 // Store the OAuth token as VERTEX_OAUTH_TOKEN
                 // The inference router will use this as the Bearer token
-                provider.credentials.insert("VERTEX_OAUTH_TOKEN".to_string(), token);
+                provider
+                    .credentials
+                    .insert("VERTEX_OAUTH_TOKEN".to_string(), token);
             }
         }
 
@@ -85,8 +89,8 @@ mod tests {
 
     #[test]
     fn discovers_vertex_env_credentials() {
-        let ctx = MockDiscoveryContext::new()
-            .with_env("ANTHROPIC_VERTEX_PROJECT_ID", "my-gcp-project");
+        let ctx =
+            MockDiscoveryContext::new().with_env("ANTHROPIC_VERTEX_PROJECT_ID", "my-gcp-project");
         let discovered = discover_with_spec(&SPEC, &ctx)
             .expect("discovery")
             .expect("provider");

From 8241dc702323efd89281a42b458e84e22cd5b2b1 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Tue, 7 Apr 2026 16:16:06 -0400
Subject: [PATCH 16/19] refactor: remove OAuth token storage from Vertex
 provider

Remove short-lived OAuth token generation and storage in gateway database.
Tokens are now generated on-demand inside sandboxes from uploaded ADC files.

Changes:
- Remove generate_oauth_token() function and gcp_auth dependency
- Remove VERTEX_OAUTH_TOKEN from direct credential injection
- Remove OAuth token insertion in discover_existing()
- Add unset IMAGE_TAG/TAG_LATEST in podman.env to prevent build conflicts
- Update Cargo.lock to remove gcp_auth dependency tree

Benefits:
- No stale token pollution in database
- Tokens generated fresh on-demand (auto-refresh via ADC)
- Simpler provider creation (synchronous, no async OAuth)
- Reduced dependency footprint (removes 32 packages)
- Better security (tokens not persisted in database)

Token lifecycle:
- Provider stores only ANTHROPIC_VERTEX_PROJECT_ID and region
- Sandboxes require --upload ~/.config/gcloud/ for token generation
- Claude CLI uses gcp_auth to generate/refresh tokens from ADC
- Tokens valid for 1 hour, automatically refreshed via refresh token
---
 Cargo.lock                                    | 38 -------------------
 crates/openshell-providers/Cargo.toml         |  2 -
 .../src/providers/vertex.rs                   | 37 ++----------------
 crates/openshell-sandbox/src/secrets.rs       |  4 +-
 scripts/podman.env                            |  5 +++
 5 files changed, 12 insertions(+), 74 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 1e2b542ee..98797cc24 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1509,32 +1509,6 @@ dependencies = [
  "slab",
 ]
 
-[[package]]
-name = "gcp_auth"
-version = "0.12.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2b3d0b409a042a380111af38136310839af8ac1a0917fb6e84515ed1e4bf3ee"
-dependencies = [
- "async-trait",
- "base64 0.22.1",
- "bytes",
- "chrono",
- "http",
- "http-body-util",
- "hyper",
- "hyper-rustls",
- "hyper-util",
- "ring",
- "rustls-pki-types",
- "serde",
- "serde_json",
- "thiserror 2.0.18",
- "tokio",
- "tracing",
- "tracing-futures",
- "url",
-]
-
 [[package]]
 name = "generic-array"
 version = "0.14.7"
@@ -2945,10 +2919,8 @@ dependencies = [
 name = "openshell-providers"
 version = "0.0.0"
 dependencies = [
- "gcp_auth",
  "openshell-core",
  "thiserror 2.0.18",
- "tokio",
 ]
 
 [[package]]
@@ -5406,16 +5378,6 @@ dependencies = [
  "valuable",
 ]
 
-[[package]]
-name = "tracing-futures"
-version = "0.2.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2"
-dependencies = [
- "pin-project",
- "tracing",
-]
-
 [[package]]
 name = "tracing-log"
 version = "0.2.0"
diff --git a/crates/openshell-providers/Cargo.toml b/crates/openshell-providers/Cargo.toml
index 0cf14ec2b..41f9ed6c0 100644
--- a/crates/openshell-providers/Cargo.toml
+++ b/crates/openshell-providers/Cargo.toml
@@ -13,8 +13,6 @@ repository.workspace = true
 [dependencies]
 openshell-core = { path = "../openshell-core" }
 thiserror = { workspace = true }
-gcp_auth = "0.12"
-tokio = { workspace = true }
 
 [lints]
 workspace = true
diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs
index de8d45d31..5b2ecdf9d 100644
--- a/crates/openshell-providers/src/providers/vertex.rs
+++ b/crates/openshell-providers/src/providers/vertex.rs
@@ -16,21 +16,6 @@ pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec {
 // Additional config keys for Vertex AI
 const VERTEX_CONFIG_KEYS: &[&str] = &["ANTHROPIC_VERTEX_REGION"];
 
-/// Generate an OAuth token from GCP Application Default Credentials for Vertex AI.
-///
-/// Returns `None` if ADC is not configured or token generation fails.
-async fn generate_oauth_token() -> Option<String> {
-    // Try to find an appropriate token provider (checks ADC, service account, metadata server, etc.)
-    let provider = gcp_auth::provider().await.ok()?;
-
-    // Get token for Vertex AI scope
-    // Vertex AI uses the Cloud Platform scope
-    let scopes = &["https://www.googleapis.com/auth/cloud-platform"];
-    let token = provider.token(scopes).await.ok()?;
-
-    Some(token.as_str().to_string())
-}
-
 impl ProviderPlugin for VertexProvider {
     fn id(&self) -> &'static str {
         SPEC.id
@@ -53,24 +38,10 @@ impl ProviderPlugin for VertexProvider {
                 .credentials
                 .insert("CLAUDE_CODE_USE_VERTEX".to_string(), "1".to_string());
 
-            // Generate OAuth token from Application Default Credentials
-            // Try to generate token, but don't fail if we're in a nested runtime context
-            let token = std::thread::spawn(|| {
-                tokio::runtime::Runtime::new()
-                    .ok()
-                    .and_then(|rt| rt.block_on(generate_oauth_token()))
-            })
-            .join()
-            .ok()
-            .flatten();
-
-            if let Some(token) = token {
-                // Store the OAuth token as VERTEX_OAUTH_TOKEN
-                // The inference router will use this as the Bearer token
-                provider
-                    .credentials
-                    .insert("VERTEX_OAUTH_TOKEN".to_string(), token);
-            }
+            // NOTE: We do NOT generate/store VERTEX_OAUTH_TOKEN here.
+            // OAuth tokens are short-lived (~1 hour) and storing them leads to stale token pollution.
+            // Instead, sandboxes generate fresh tokens on-demand from the uploaded ADC file
+            // (requires --upload ~/.config/gcloud/:.config/gcloud/ when creating sandbox).
         }
 
         Ok(discovered)
diff --git a/crates/openshell-sandbox/src/secrets.rs b/crates/openshell-sandbox/src/secrets.rs
index 0cd188b6e..87c353c83 100644
--- a/crates/openshell-sandbox/src/secrets.rs
+++ b/crates/openshell-sandbox/src/secrets.rs
@@ -23,8 +23,10 @@ pub(crate) const PLACEHOLDER_PREFIX_PUBLIC: &str = PLACEHOLDER_PREFIX;
 fn direct_inject_credentials() -> &'static [&'static str] {
     &[
         // Vertex AI credentials for claude CLI
+        // NOTE: VERTEX_OAUTH_TOKEN is NOT included here - sandboxes generate
+        // fresh tokens on-demand from the uploaded ADC file instead of using
+        // a pre-generated (and likely expired) token from the provider database.
         "ANTHROPIC_VERTEX_PROJECT_ID",
-        "VERTEX_OAUTH_TOKEN",
         "ANTHROPIC_VERTEX_REGION",
         "CLAUDE_CODE_USE_VERTEX",
     ]
diff --git a/scripts/podman.env b/scripts/podman.env
index 5aba469b2..459627c0e 100644
--- a/scripts/podman.env
+++ b/scripts/podman.env
@@ -8,6 +8,11 @@
 
 MACHINE_NAME="${PODMAN_MACHINE_NAME:-openshell}"
 
+# Clear variables from other build workflows that would interfere with local development
+unset IMAGE_TAG
+unset TAG_LATEST
+unset REGISTRY
+
 # Get Podman socket path from the machine
 if command -v podman &>/dev/null; then
     SOCKET_PATH=$(podman machine inspect "${MACHINE_NAME}" --format '{{.ConnectionInfo.PodmanSocket.Path}}' 2>/dev/null)

From 987b2a0e4d2d6154aa3ba19634c0a6eed843b609 Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Tue, 7 Apr 2026 16:27:27 -0400
Subject: [PATCH 17/19] docs(vertex): improve ADC detection and troubleshooting
 docs

- Check for ADC in both GOOGLE_APPLICATION_CREDENTIALS and default location
- Add critical warning about --upload ~/.config/gcloud/ requirement
- Document security model for credential injection strategy
- Add comprehensive troubleshooting section with solutions for:
  - Authentication failures (missing ADC)
  - Project not found errors
  - Region not supported errors
---
 .../src/providers/vertex.rs                   | 28 ++++++
 examples/vertex-ai/README.md                  | 93 +++++++++++++++++--
 2 files changed, 115 insertions(+), 6 deletions(-)

diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs
index 5b2ecdf9d..38d54a24e 100644
--- a/crates/openshell-providers/src/providers/vertex.rs
+++ b/crates/openshell-providers/src/providers/vertex.rs
@@ -42,6 +42,34 @@ impl ProviderPlugin for VertexProvider {
             // OAuth tokens are short-lived (~1 hour) and storing them leads to stale token pollution.
             // Instead, sandboxes generate fresh tokens on-demand from the uploaded ADC file
             // (requires --upload ~/.config/gcloud/:.config/gcloud/ when creating sandbox).
+
+            // Warn if ADC doesn't exist on host
+            let adc_exists = if let Ok(custom_path) = std::env::var("GOOGLE_APPLICATION_CREDENTIALS") {
+                std::path::Path::new(&custom_path).exists()
+            } else {
+                let default_path = format!(
+                    "{}/.config/gcloud/application_default_credentials.json",
+                    std::env::var("HOME").unwrap_or_default()
+                );
+                std::path::Path::new(&default_path).exists()
+            };
+
+            if !adc_exists {
+                eprintln!();
+                eprintln!("⚠️  Warning: GCP Application Default Credentials not found");
+                eprintln!("   Sandboxes will need ADC uploaded to generate OAuth tokens.");
+                eprintln!();
+                eprintln!("   Configure ADC with:");
+                eprintln!("     gcloud auth application-default login");
+                eprintln!();
+                eprintln!("   Or use a service account key:");
+                eprintln!("     export GOOGLE_APPLICATION_CREDENTIALS=/path/to/key.json");
+                eprintln!();
+                eprintln!("   Then upload credentials when creating sandboxes:");
+                eprintln!("     openshell sandbox create --provider vertex \\");
+                eprintln!("       --upload ~/.config/gcloud/:.config/gcloud/");
+                eprintln!();
+            }
         }
 
         Ok(discovered)
diff --git a/examples/vertex-ai/README.md b/examples/vertex-ai/README.md
index ec0cdf78a..2423c3d04 100644
--- a/examples/vertex-ai/README.md
+++ b/examples/vertex-ai/README.md
@@ -2,22 +2,32 @@
 
 This example demonstrates how to use OpenShell with Google Cloud Vertex AI to run Claude models via GCP infrastructure.
 
+## ⚠️ Critical Requirement
+
+Vertex AI sandboxes **MUST** upload GCP credentials to generate OAuth tokens:
+
+```bash
+--upload ~/.config/gcloud/:.config/gcloud/
+```
+
+Without this upload, token generation will fail and sandboxes cannot connect to Vertex AI.
+
 ## Quick Start
 
 ```bash
-# Configure GCP credentials
+# 1. Configure GCP credentials
 export ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id
 gcloud auth application-default login
 
-# Create provider
+# 2. Create provider
 openshell provider create --name vertex --type vertex --from-existing
 
-# Create sandbox with policy
+# 3. Create sandbox with credentials uploaded
 openshell sandbox create --name vertex-test --provider vertex \
-  --upload ~/.config/gcloud/:.config/gcloud/ \
+  --upload ~/.config/gcloud/:.config/gcloud/ \  # ← REQUIRED
   --policy examples/vertex-ai/sandbox-policy.yaml
 
-# Inside sandbox
+# 4. Inside sandbox
 claude  # Automatically uses Vertex AI
 ```
 
@@ -28,9 +38,80 @@ claude  # Automatically uses Vertex AI
   - Enables direct Claude CLI usage
   - Enables `inference.local` routing
 
+## Security Model
+
+### Credential Injection
+
+Vertex AI uses selective credential injection for CLI tool compatibility:
+
+**Directly injected (visible in `/proc/<pid>/environ`):**
+- `ANTHROPIC_VERTEX_PROJECT_ID` - Not sensitive (public project ID, visible in API URLs)
+- `CLAUDE_CODE_USE_VERTEX` - Configuration flag (boolean)
+- `ANTHROPIC_VERTEX_REGION` - Public metadata (region name)
+
+**Generated in sandbox (not stored in gateway database):**
+- OAuth access tokens - Generated on-demand from uploaded ADC file, automatically refreshed
+
+**Trade-off:** Direct injection required for Claude CLI compatibility (cannot use HTTP proxy placeholders). Risk is low since no secrets are exposed via environment variables.
+
+## Troubleshooting
+
+### "Authentication failed" or "invalid credentials"
+
+**Cause:** Sandbox cannot generate OAuth tokens (ADC file not uploaded or missing).
+
+**Solution:**
+1. Verify ADC exists on host:
+   ```bash
+   ls -la ~/.config/gcloud/application_default_credentials.json
+   ```
+
+2. If missing, configure ADC:
+   ```bash
+   gcloud auth application-default login
+   ```
+
+3. Ensure sandbox creation includes upload:
+   ```bash
+   openshell sandbox create --provider vertex \
+     --upload ~/.config/gcloud/:.config/gcloud/  # ← Required
+   ```
+
+### "Project not found" errors
+
+**Cause:** Invalid or inaccessible GCP project ID.
+
+**Solution:**
+1. Verify project exists and you have access:
+   ```bash
+   gcloud projects describe $ANTHROPIC_VERTEX_PROJECT_ID
+   ```
+
+2. Check Vertex AI API is enabled:
+   ```bash
+   gcloud services list --enabled --project=$ANTHROPIC_VERTEX_PROJECT_ID | grep aiplatform
+   ```
+
+3. Enable if needed:
+   ```bash
+   gcloud services enable aiplatform.googleapis.com --project=$ANTHROPIC_VERTEX_PROJECT_ID
+   ```
+
+### "Region not supported" errors
+
+**Cause:** Vertex AI endpoint for your region not in network policy.
+
+**Solution:** Add region to `sandbox-policy.yaml`:
+```yaml
+- host: your-region-aiplatform.googleapis.com
+  port: 443
+```
+
+Supported regions: us-central1, us-east5, us-west1, europe-west1, europe-west4, asia-northeast1, asia-southeast1
+
 ## Documentation
 
-For detailed setup instructions, troubleshooting, and configuration options, see:
+For detailed setup instructions and configuration options, see:
 
 - [Vertex AI Provider Configuration](../../docs/inference/configure.md#google-cloud-vertex-ai)
 - [Provider Management](../../docs/sandboxes/manage-providers.md)

From c58f3c7eec90b8dd252e4943ee1c9f062e42515f Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Tue, 7 Apr 2026 16:29:40 -0400
Subject: [PATCH 18/19] style(vertex): apply cargo fmt formatting

---
 .../src/providers/vertex.rs                   | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs
index 38d54a24e..f5b5b67d0 100644
--- a/crates/openshell-providers/src/providers/vertex.rs
+++ b/crates/openshell-providers/src/providers/vertex.rs
@@ -44,15 +44,16 @@ impl ProviderPlugin for VertexProvider {
             // (requires --upload ~/.config/gcloud/:.config/gcloud/ when creating sandbox).
 
             // Warn if ADC doesn't exist on host
-            let adc_exists = if let Ok(custom_path) = std::env::var("GOOGLE_APPLICATION_CREDENTIALS") {
-                std::path::Path::new(&custom_path).exists()
-            } else {
-                let default_path = format!(
-                    "{}/.config/gcloud/application_default_credentials.json",
-                    std::env::var("HOME").unwrap_or_default()
-                );
-                std::path::Path::new(&default_path).exists()
-            };
+            let adc_exists =
+                if let Ok(custom_path) = std::env::var("GOOGLE_APPLICATION_CREDENTIALS") {
+                    std::path::Path::new(&custom_path).exists()
+                } else {
+                    let default_path = format!(
+                        "{}/.config/gcloud/application_default_credentials.json",
+                        std::env::var("HOME").unwrap_or_default()
+                    );
+                    std::path::Path::new(&default_path).exists()
+                };
 
             if !adc_exists {
                 eprintln!();

From c6a63eaaaeacd8b59ecf8cd3b3b620b5b59a36ca Mon Sep 17 00:00:00 2001
From: itdove <dvernier@redhat.com>
Date: Wed, 8 Apr 2026 12:04:32 -0400
Subject: [PATCH 19/19] fix(docker): resolve DNF package dependency conflict in
 cluster build

Add --no-best --skip-broken flags to dnf install in cluster image build
to handle util-linux package dependency on liblastlog2 which has broken
dependencies in the hummingbird repository.

This allows the cluster image build to complete successfully by skipping
the problematic package version and selecting an alternative that satisfies
dependencies.
---
 deploy/docker/Dockerfile.images | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deploy/docker/Dockerfile.images b/deploy/docker/Dockerfile.images
index 837f4fb9c..7c9187dd1 100644
--- a/deploy/docker/Dockerfile.images
+++ b/deploy/docker/Dockerfile.images
@@ -230,7 +230,7 @@ FROM quay.io/hummingbird/core-runtime:latest-builder AS cluster
 USER root
 
 RUN dnf install -y fedora-repos && \
-    dnf install -y \
+    dnf install -y --no-best --skip-broken \
     ca-certificates \
     iptables \
     util-linux \