diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 19a398a32..d759863a8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -176,6 +176,23 @@ These are the primary `mise` tasks for day-to-day development: | `mise run docs` | Build and serve documentation locally | | `mise run clean` | Clean build artifacts | +## Rebuilding After Code Changes + +When developing OpenShell core components (gateway, router, sandbox supervisor), you need to rebuild the cluster to test your changes: + +```bash +bash scripts/rebuild-cluster.sh +``` + +This script stops the cluster, rebuilds the image with your changes, and restarts it. + +**After rebuilding:** +- Providers need to be recreated (gateway database was reset) +- Inference routing needs to be reconfigured +- Sandboxes need to be recreated + +For a complete cleanup, see the cleanup scripts in the `scripts/` directory. + ## Project Structure | Path | Purpose | diff --git a/cleanup-openshell-podman-macos.sh b/cleanup-openshell-podman-macos.sh index 43efd8dd5..d6b80a411 100755 --- a/cleanup-openshell-podman-macos.sh +++ b/cleanup-openshell-podman-macos.sh @@ -11,19 +11,43 @@ set -e echo "=== OpenShell Podman Cleanup Script ===" echo "" +# Delete all sandboxes first (before destroying gateway) +echo "Deleting all sandboxes..." +if command -v openshell &>/dev/null; then + # Get list of sandboxes and delete each one + openshell sandbox list --no-header 2>/dev/null | awk '{print $1}' | while read -r sandbox; do + if [ -n "$sandbox" ]; then + echo " Deleting sandbox: $sandbox" + openshell sandbox delete "$sandbox" 2>/dev/null || true + fi + done +fi + # Destroy OpenShell gateway (if it exists) echo "Destroying OpenShell gateway..." if command -v openshell &>/dev/null; then openshell gateway destroy --name openshell 2>/dev/null || true fi -# Stop and remove any running OpenShell containers -echo "Stopping OpenShell containers..." -podman ps -a | grep openshell | awk '{print $1}' | xargs -r podman rm -f || true +# Stop and remove cluster container +echo "Stopping cluster container..." +podman stop openshell-cluster-openshell 2>/dev/null || true +podman rm openshell-cluster-openshell 2>/dev/null || true + +# Stop and remove local registry container +echo "Stopping local registry..." +podman stop openshell-local-registry 2>/dev/null || true +podman rm openshell-local-registry 2>/dev/null || true + +# Stop and remove any other OpenShell containers +echo "Cleaning up remaining OpenShell containers..." +podman ps -a | grep openshell | awk '{print $1}' | xargs -r podman rm -f 2>/dev/null || true # Remove OpenShell images echo "Removing OpenShell images..." -podman images | grep -E "openshell|cluster" | awk '{print $3}' | xargs -r podman rmi -f || true +podman rmi localhost/openshell/cluster:dev 2>/dev/null || true +podman rmi localhost/openshell/gateway:dev 2>/dev/null || true +podman images | grep -E "openshell|127.0.0.1:5000/openshell" | awk '{print $3}' | xargs -r podman rmi -f 2>/dev/null || true # Remove CLI binary echo "Removing CLI binary..." @@ -41,8 +65,11 @@ rm -rf ~/.openshell echo "Removing build artifacts..." SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "$SCRIPT_DIR" -rm -rf target/ -rm -rf deploy/docker/.build/ +if command -v cargo &>/dev/null; then + echo " Running cargo clean..." + cargo clean 2>/dev/null || true +fi +rm -rf deploy/docker/.build/ 2>/dev/null || true # Clean Podman cache echo "Cleaning Podman build cache..." @@ -51,6 +78,13 @@ podman system prune -af --volumes echo "" echo "=== Cleanup Complete ===" echo "" +echo "OpenShell containers, images, and configuration have been removed." +echo "" +echo "To reinstall OpenShell:" +echo " 1. source scripts/podman.env" +echo " 2. mise run cluster:build:full" +echo " 3. cargo install --path crates/openshell-cli --root ~/.local" +echo "" echo "To completely remove the OpenShell Podman machine:" echo " podman machine stop openshell" echo " podman machine rm openshell" diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index 0d546c7b1..5277ab805 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -615,6 +615,7 @@ enum CliProviderType { Gitlab, Github, Outlook, + Vertex, } #[derive(Clone, Debug, ValueEnum)] @@ -646,6 +647,7 @@ impl CliProviderType { Self::Gitlab => "gitlab", Self::Github => "github", Self::Outlook => "outlook", + Self::Vertex => "vertex", } } } diff --git a/crates/openshell-core/src/inference.rs b/crates/openshell-core/src/inference.rs index a06c427f8..0973f25db 100644 --- a/crates/openshell-core/src/inference.rs +++ b/crates/openshell-core/src/inference.rs @@ -86,6 +86,19 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile { default_headers: &[], }; +static VERTEX_PROFILE: InferenceProviderProfile = InferenceProviderProfile { + provider_type: "vertex", + // Base URL template - actual URL constructed at request time with project/region/model + default_base_url: "https://us-central1-aiplatform.googleapis.com/v1", + protocols: ANTHROPIC_PROTOCOLS, + // Look for OAuth token first, fallback to project ID (for manual config) + credential_key_names: &["VERTEX_OAUTH_TOKEN", "ANTHROPIC_VERTEX_PROJECT_ID"], + base_url_config_keys: &["VERTEX_BASE_URL", "ANTHROPIC_VERTEX_REGION"], + // Vertex uses OAuth Bearer tokens, not x-api-key + auth: AuthHeader::Bearer, + default_headers: &[("anthropic-version", "vertex-2023-10-16")], +}; + /// Look up the inference provider profile for a given provider type. /// /// Returns `None` for provider types that don't support inference routing @@ -95,6 +108,7 @@ pub fn profile_for(provider_type: &str) -> Option<&'static InferenceProviderProf "openai" => Some(&OPENAI_PROFILE), "anthropic" => Some(&ANTHROPIC_PROFILE), "nvidia" => Some(&NVIDIA_PROFILE), + "vertex" => Some(&VERTEX_PROFILE), _ => None, } } @@ -176,6 +190,7 @@ mod tests { assert!(profile_for("openai").is_some()); assert!(profile_for("anthropic").is_some()); assert!(profile_for("nvidia").is_some()); + assert!(profile_for("vertex").is_some()); assert!(profile_for("OpenAI").is_some()); // case insensitive } diff --git a/crates/openshell-providers/src/lib.rs b/crates/openshell-providers/src/lib.rs index e2bcc0c09..2fa771950 100644 --- a/crates/openshell-providers/src/lib.rs +++ b/crates/openshell-providers/src/lib.rs @@ -86,6 +86,7 @@ impl ProviderRegistry { registry.register(providers::gitlab::GitlabProvider); registry.register(providers::github::GithubProvider); registry.register(providers::outlook::OutlookProvider); + registry.register(providers::vertex::VertexProvider); registry } @@ -138,6 +139,7 @@ pub fn normalize_provider_type(input: &str) -> Option<&'static str> { "gitlab" | "glab" => Some("gitlab"), "github" | "gh" => Some("github"), "outlook" => Some("outlook"), + "vertex" => Some("vertex"), _ => None, } } diff --git a/crates/openshell-providers/src/providers/mod.rs b/crates/openshell-providers/src/providers/mod.rs index 6fe395135..19f9c54a5 100644 --- a/crates/openshell-providers/src/providers/mod.rs +++ b/crates/openshell-providers/src/providers/mod.rs @@ -12,3 +12,4 @@ pub mod nvidia; pub mod openai; pub mod opencode; pub mod outlook; +pub mod vertex; diff --git a/crates/openshell-providers/src/providers/vertex.rs b/crates/openshell-providers/src/providers/vertex.rs new file mode 100644 index 000000000..f5b5b67d0 --- /dev/null +++ b/crates/openshell-providers/src/providers/vertex.rs @@ -0,0 +1,102 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use crate::{ + DiscoveredProvider, ProviderDiscoverySpec, ProviderError, ProviderPlugin, RealDiscoveryContext, + discover_with_spec, +}; + +pub struct VertexProvider; + +pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec { + id: "vertex", + credential_env_vars: &["ANTHROPIC_VERTEX_PROJECT_ID"], +}; + +// Additional config keys for Vertex AI +const VERTEX_CONFIG_KEYS: &[&str] = &["ANTHROPIC_VERTEX_REGION"]; + +impl ProviderPlugin for VertexProvider { + fn id(&self) -> &'static str { + SPEC.id + } + + fn discover_existing(&self) -> Result, ProviderError> { + let mut discovered = discover_with_spec(&SPEC, &RealDiscoveryContext)?; + + // Add region config if present + if let Some(ref mut provider) = discovered { + for &key in VERTEX_CONFIG_KEYS { + if let Ok(value) = std::env::var(key) { + provider.config.insert(key.to_string(), value); + } + } + + // Set CLAUDE_CODE_USE_VERTEX=1 to enable Vertex AI in claude CLI + // Must be in credentials (not config) to be injected into sandbox environment + provider + .credentials + .insert("CLAUDE_CODE_USE_VERTEX".to_string(), "1".to_string()); + + // NOTE: We do NOT generate/store VERTEX_OAUTH_TOKEN here. + // OAuth tokens are short-lived (~1 hour) and storing them leads to stale token pollution. + // Instead, sandboxes generate fresh tokens on-demand from the uploaded ADC file + // (requires --upload ~/.config/gcloud/:.config/gcloud/ when creating sandbox). + + // Warn if ADC doesn't exist on host + let adc_exists = + if let Ok(custom_path) = std::env::var("GOOGLE_APPLICATION_CREDENTIALS") { + std::path::Path::new(&custom_path).exists() + } else { + let default_path = format!( + "{}/.config/gcloud/application_default_credentials.json", + std::env::var("HOME").unwrap_or_default() + ); + std::path::Path::new(&default_path).exists() + }; + + if !adc_exists { + eprintln!(); + eprintln!("⚠️ Warning: GCP Application Default Credentials not found"); + eprintln!(" Sandboxes will need ADC uploaded to generate OAuth tokens."); + eprintln!(); + eprintln!(" Configure ADC with:"); + eprintln!(" gcloud auth application-default login"); + eprintln!(); + eprintln!(" Or use a service account key:"); + eprintln!(" export GOOGLE_APPLICATION_CREDENTIALS=/path/to/key.json"); + eprintln!(); + eprintln!(" Then upload credentials when creating sandboxes:"); + eprintln!(" openshell sandbox create --provider vertex \\"); + eprintln!(" --upload ~/.config/gcloud/:.config/gcloud/"); + eprintln!(); + } + } + + Ok(discovered) + } + + fn credential_env_vars(&self) -> &'static [&'static str] { + SPEC.credential_env_vars + } +} + +#[cfg(test)] +mod tests { + use super::SPEC; + use crate::discover_with_spec; + use crate::test_helpers::MockDiscoveryContext; + + #[test] + fn discovers_vertex_env_credentials() { + let ctx = + MockDiscoveryContext::new().with_env("ANTHROPIC_VERTEX_PROJECT_ID", "my-gcp-project"); + let discovered = discover_with_spec(&SPEC, &ctx) + .expect("discovery") + .expect("provider"); + assert_eq!( + discovered.credentials.get("ANTHROPIC_VERTEX_PROJECT_ID"), + Some(&"my-gcp-project".to_string()) + ); + } +} diff --git a/crates/openshell-router/src/backend.rs b/crates/openshell-router/src/backend.rs index d1d7092c0..9b5d1a000 100644 --- a/crates/openshell-router/src/backend.rs +++ b/crates/openshell-router/src/backend.rs @@ -95,7 +95,7 @@ async fn send_backend_request( headers: Vec<(String, String)>, body: bytes::Bytes, ) -> Result { - let url = build_backend_url(&route.endpoint, path); + let url = build_backend_url(&route.endpoint, path, &route.model); let reqwest_method: reqwest::Method = method .parse() @@ -137,13 +137,24 @@ async fn send_backend_request( // Set the "model" field in the JSON body to the route's configured model so the // backend receives the correct model ID regardless of what the client sent. + // + // Exception: Vertex AI's :streamRawPredict endpoint expects the model in the URL + // path (already handled in build_backend_url), not in the request body. + let is_vertex_ai = route.endpoint.contains("aiplatform.googleapis.com"); + let body = match serde_json::from_slice::(&body) { Ok(mut json) => { if let Some(obj) = json.as_object_mut() { - obj.insert( - "model".to_string(), - serde_json::Value::String(route.model.clone()), - ); + if is_vertex_ai { + // Remove model field for Vertex AI (it's in the URL path) + obj.remove("model"); + } else { + // Insert/override model field for standard backends + obj.insert( + "model".to_string(), + serde_json::Value::String(route.model.clone()), + ); + } } bytes::Bytes::from(serde_json::to_vec(&json).unwrap_or_else(|_| body.to_vec())) } @@ -241,7 +252,7 @@ pub async fn verify_backend_endpoint( if mock::is_mock_route(route) { return Ok(ValidatedEndpoint { - url: build_backend_url(&route.endpoint, probe.path), + url: build_backend_url(&route.endpoint, probe.path, &route.model), protocol: probe.protocol.to_string(), }); } @@ -306,7 +317,7 @@ async fn try_validation_request( details, }, })?; - let url = build_backend_url(&route.endpoint, path); + let url = build_backend_url(&route.endpoint, path, &route.model); if response.status().is_success() { return Ok(ValidatedEndpoint { @@ -418,8 +429,23 @@ pub async fn proxy_to_backend_streaming( }) } -fn build_backend_url(endpoint: &str, path: &str) -> String { +fn build_backend_url(endpoint: &str, path: &str, model: &str) -> String { let base = endpoint.trim_end_matches('/'); + + // Special handling for Vertex AI + if base.contains("aiplatform.googleapis.com") && path.starts_with("/v1/messages") { + // Vertex AI uses a different path structure: + // https://{region}-aiplatform.googleapis.com/v1/projects/{project}/locations/{region}/publishers/anthropic/models/{model}:streamRawPredict + // The base already has everything up to /models, so we append /{model}:streamRawPredict + let model_suffix = if model.is_empty() { + String::new() + } else { + format!("/{}", model) + }; + return format!("{}{}:streamRawPredict", base, model_suffix); + } + + // Deduplicate /v1 prefix for standard endpoints if base.ends_with("/v1") && (path == "/v1" || path.starts_with("/v1/")) { return format!("{base}{}", &path[3..]); } @@ -438,7 +464,7 @@ mod tests { #[test] fn build_backend_url_dedupes_v1_prefix() { assert_eq!( - build_backend_url("https://api.openai.com/v1", "/v1/chat/completions"), + build_backend_url("https://api.openai.com/v1", "/v1/chat/completions", "gpt-4"), "https://api.openai.com/v1/chat/completions" ); } @@ -446,15 +472,27 @@ mod tests { #[test] fn build_backend_url_preserves_non_versioned_base() { assert_eq!( - build_backend_url("https://api.anthropic.com", "/v1/messages"), + build_backend_url("https://api.anthropic.com", "/v1/messages", "claude-3"), "https://api.anthropic.com/v1/messages" ); } + #[test] + fn build_backend_url_handles_vertex_ai() { + assert_eq!( + build_backend_url( + "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1/publishers/anthropic/models", + "/v1/messages", + "claude-3-5-sonnet-20241022" + ), + "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1/publishers/anthropic/models/claude-3-5-sonnet-20241022:streamRawPredict" + ); + } + #[test] fn build_backend_url_handles_exact_v1_path() { assert_eq!( - build_backend_url("https://api.openai.com/v1", "/v1"), + build_backend_url("https://api.openai.com/v1", "/v1", "gpt-4"), "https://api.openai.com/v1" ); } diff --git a/crates/openshell-sandbox/src/secrets.rs b/crates/openshell-sandbox/src/secrets.rs index a27537c91..87c353c83 100644 --- a/crates/openshell-sandbox/src/secrets.rs +++ b/crates/openshell-sandbox/src/secrets.rs @@ -10,6 +10,28 @@ const PLACEHOLDER_PREFIX: &str = "openshell:resolve:env:"; /// Public access to the placeholder prefix for fail-closed scanning in other modules. pub(crate) const PLACEHOLDER_PREFIX_PUBLIC: &str = PLACEHOLDER_PREFIX; +/// Credentials that should be injected as actual values into the sandbox environment +/// instead of being converted to placeholders. +/// +/// These credentials are needed by tools (like `claude` CLI) that read environment +/// variables directly rather than making HTTP requests through the proxy. +/// +/// **Security consideration**: These values are visible to all sandbox processes via +/// `/proc//environ`, unlike placeholder-based credentials which are only resolved +/// within HTTP requests. Only include credentials here when direct env var access is +/// required for tool compatibility. +fn direct_inject_credentials() -> &'static [&'static str] { + &[ + // Vertex AI credentials for claude CLI + // NOTE: VERTEX_OAUTH_TOKEN is NOT included here - sandboxes generate + // fresh tokens on-demand from the uploaded ADC file instead of using + // a pre-generated (and likely expired) token from the provider database. + "ANTHROPIC_VERTEX_PROJECT_ID", + "ANTHROPIC_VERTEX_REGION", + "CLAUDE_CODE_USE_VERTEX", + ] +} + /// Characters that are valid in an env var key name (used to extract /// placeholder boundaries within concatenated strings like path segments). fn is_env_key_char(b: u8) -> bool { @@ -69,6 +91,19 @@ pub struct SecretResolver { impl SecretResolver { pub(crate) fn from_provider_env( provider_env: HashMap, + ) -> (HashMap, Option) { + Self::from_provider_env_with_direct_inject(provider_env, &direct_inject_credentials()) + } + + /// Create a resolver from provider environment with selective direct injection. + /// + /// Credentials matching keys in `direct_inject` are injected as actual values + /// into the child environment (for tools like `claude` CLI that need real env vars). + /// All other credentials are converted to `openshell:resolve:env:*` placeholders + /// that get resolved by the HTTP proxy. + pub(crate) fn from_provider_env_with_direct_inject( + provider_env: HashMap, + direct_inject: &[&str], ) -> (HashMap, Option) { if provider_env.is_empty() { return (HashMap::new(), None); @@ -78,12 +113,25 @@ impl SecretResolver { let mut by_placeholder = HashMap::with_capacity(provider_env.len()); for (key, value) in provider_env { - let placeholder = placeholder_for_env_key(&key); - child_env.insert(key, placeholder.clone()); - by_placeholder.insert(placeholder, value); + // Check if this credential should be injected directly + if direct_inject.contains(&key.as_str()) { + // Direct injection: put actual value in environment + child_env.insert(key, value); + } else { + // Placeholder: will be resolved by HTTP proxy + let placeholder = placeholder_for_env_key(&key); + child_env.insert(key, placeholder.clone()); + by_placeholder.insert(placeholder, value); + } } - (child_env, Some(Self { by_placeholder })) + let resolver = if by_placeholder.is_empty() { + None + } else { + Some(Self { by_placeholder }) + }; + + (child_env, resolver) } /// Resolve a placeholder string to the real secret value. diff --git a/crates/openshell-server/src/inference.rs b/crates/openshell-server/src/inference.rs index 0fb29bde5..5faa30518 100644 --- a/crates/openshell-server/src/inference.rs +++ b/crates/openshell-server/src/inference.rs @@ -237,7 +237,7 @@ fn resolve_provider_route(provider: &Provider) -> Result Result/environ`):** +- `ANTHROPIC_VERTEX_PROJECT_ID` - Not sensitive (public project ID, visible in API URLs) +- `CLAUDE_CODE_USE_VERTEX` - Configuration flag (boolean) +- `ANTHROPIC_VERTEX_REGION` - Public metadata (region name) + +**Generated in sandbox (not stored in gateway database):** +- OAuth access tokens - Generated on-demand from uploaded ADC file, automatically refreshed + +**Trade-off:** Direct injection required for Claude CLI compatibility (cannot use HTTP proxy placeholders). Risk is low since no secrets are exposed via environment variables. + +## Troubleshooting + +### "Authentication failed" or "invalid credentials" + +**Cause:** Sandbox cannot generate OAuth tokens (ADC file not uploaded or missing). + +**Solution:** +1. Verify ADC exists on host: + ```bash + ls -la ~/.config/gcloud/application_default_credentials.json + ``` + +2. If missing, configure ADC: + ```bash + gcloud auth application-default login + ``` + +3. Ensure sandbox creation includes upload: + ```bash + openshell sandbox create --provider vertex \ + --upload ~/.config/gcloud/:.config/gcloud/ # ← Required + ``` + +### "Project not found" errors + +**Cause:** Invalid or inaccessible GCP project ID. + +**Solution:** +1. Verify project exists and you have access: + ```bash + gcloud projects describe $ANTHROPIC_VERTEX_PROJECT_ID + ``` + +2. Check Vertex AI API is enabled: + ```bash + gcloud services list --enabled --project=$ANTHROPIC_VERTEX_PROJECT_ID | grep aiplatform + ``` + +3. Enable if needed: + ```bash + gcloud services enable aiplatform.googleapis.com --project=$ANTHROPIC_VERTEX_PROJECT_ID + ``` + +### "Region not supported" errors + +**Cause:** Vertex AI endpoint for your region not in network policy. + +**Solution:** Add region to `sandbox-policy.yaml`: +```yaml +- host: your-region-aiplatform.googleapis.com + port: 443 +``` + +Supported regions: us-central1, us-east5, us-west1, europe-west1, europe-west4, asia-northeast1, asia-southeast1 + +## Documentation + +For detailed setup instructions and configuration options, see: + +- [Vertex AI Provider Configuration](../../docs/inference/configure.md#google-cloud-vertex-ai) +- [Provider Management](../../docs/sandboxes/manage-providers.md) +- [Inference Routing](../../docs/inference/configure.md) + +## Adding Regions + +To support additional GCP regions, add them to `sandbox-policy.yaml`: + +```yaml +- host: asia-southeast1-aiplatform.googleapis.com + port: 443 +``` diff --git a/examples/vertex-ai/sandbox-policy.yaml b/examples/vertex-ai/sandbox-policy.yaml new file mode 100644 index 000000000..81fa36d10 --- /dev/null +++ b/examples/vertex-ai/sandbox-policy.yaml @@ -0,0 +1,55 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Sandbox policy for Google Cloud Vertex AI +# +# This policy allows sandboxes to access Google Cloud endpoints required for +# Vertex AI with Anthropic Claude models. + +version: 1 + +network_policies: + google_vertex: + name: google-vertex + endpoints: + # Google OAuth endpoints for authentication + - host: oauth2.googleapis.com + port: 443 + - host: accounts.google.com + port: 443 + - host: www.googleapis.com + port: 443 + + # Vertex AI endpoints (global and regional) + - host: aiplatform.googleapis.com + port: 443 + - host: us-east5-aiplatform.googleapis.com + port: 443 + - host: us-central1-aiplatform.googleapis.com + port: 443 + - host: us-west1-aiplatform.googleapis.com + port: 443 + - host: europe-west1-aiplatform.googleapis.com + port: 443 + - host: europe-west4-aiplatform.googleapis.com + port: 443 + - host: asia-northeast1-aiplatform.googleapis.com + port: 443 + + binaries: + # Claude CLI for direct Vertex AI usage + - path: /usr/local/bin/claude + # Python for Anthropic SDK usage + - path: /usr/bin/python3 + # curl for testing + - path: /usr/bin/curl + + inference_local: + name: inference-local + endpoints: + # Local inference routing endpoint + - host: inference.local + port: 80 + binaries: + - path: /usr/bin/curl + - path: /usr/bin/python3 diff --git a/scripts/podman.env b/scripts/podman.env index 1e74a6b71..459627c0e 100644 --- a/scripts/podman.env +++ b/scripts/podman.env @@ -8,6 +8,11 @@ MACHINE_NAME="${PODMAN_MACHINE_NAME:-openshell}" +# Clear variables from other build workflows that would interfere with local development +unset IMAGE_TAG +unset TAG_LATEST +unset REGISTRY + # Get Podman socket path from the machine if command -v podman &>/dev/null; then SOCKET_PATH=$(podman machine inspect "${MACHINE_NAME}" --format '{{.ConnectionInfo.PodmanSocket.Path}}' 2>/dev/null) @@ -21,13 +26,19 @@ if command -v podman &>/dev/null; then export OPENSHELL_CONTAINER_RUNTIME=podman # Local development image registry - export OPENSHELL_REGISTRY="127.0.0.1:5000/openshell" + export OPENSHELL_IMAGE_REPO_BASE="127.0.0.1:5000/openshell" + export OPENSHELL_REGISTRY_HOST="127.0.0.1:5000" + export OPENSHELL_REGISTRY_NAMESPACE="openshell" + export OPENSHELL_REGISTRY_ENDPOINT="host.containers.internal:5000" + export OPENSHELL_REGISTRY_INSECURE="true" export OPENSHELL_CLUSTER_IMAGE="localhost/openshell/cluster:dev" echo "✓ Podman environment configured:" echo " CONTAINER_HOST=${CONTAINER_HOST}" echo " OPENSHELL_CONTAINER_RUNTIME=${OPENSHELL_CONTAINER_RUNTIME}" - echo " OPENSHELL_REGISTRY=${OPENSHELL_REGISTRY}" + echo " OPENSHELL_IMAGE_REPO_BASE=${OPENSHELL_IMAGE_REPO_BASE}" + echo " OPENSHELL_REGISTRY_HOST=${OPENSHELL_REGISTRY_HOST}" + echo " OPENSHELL_REGISTRY_INSECURE=${OPENSHELL_REGISTRY_INSECURE}" echo " OPENSHELL_CLUSTER_IMAGE=${OPENSHELL_CLUSTER_IMAGE}" fi else diff --git a/scripts/rebuild-cluster.sh b/scripts/rebuild-cluster.sh new file mode 100755 index 000000000..f836a832a --- /dev/null +++ b/scripts/rebuild-cluster.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Quick rebuild script for development +# Restarts the cluster container with the latest code changes + +set -euo pipefail + +echo "=== OpenShell Quick Rebuild ===" +echo "" + +# Stop and remove cluster container +echo "Stopping cluster container..." +podman stop openshell-cluster-openshell 2>/dev/null || true +podman rm openshell-cluster-openshell 2>/dev/null || true + +# Remove old cluster image +echo "Removing old cluster image..." +podman rmi localhost/openshell/cluster:dev 2>/dev/null || true + +# Rebuild and start cluster +echo "Rebuilding cluster with latest code..." +mise run cluster:build:full + +echo "" +echo "=== Rebuild Complete ===" +echo "" +echo "Next steps:" +echo " 1. Recreate provider: openshell provider create --name --type --from-existing" +echo " 2. Configure inference: openshell inference set --provider --model " +echo " 3. Recreate sandboxes: openshell sandbox create ..." +echo "" diff --git a/scripts/setup-podman-macos.sh b/scripts/setup-podman-macos.sh index 1538259f3..02fdf2343 100755 --- a/scripts/setup-podman-macos.sh +++ b/scripts/setup-podman-macos.sh @@ -9,7 +9,7 @@ set -euo pipefail MACHINE_NAME="${PODMAN_MACHINE_NAME:-openshell}" -MEMORY="${PODMAN_MEMORY:-8192}" +MEMORY="${PODMAN_MEMORY:-12288}" CPUS="${PODMAN_CPUS:-4}" echo "=== OpenShell Podman Setup for macOS ===" @@ -108,9 +108,9 @@ echo "Podman machine '${MACHINE_NAME}' is ready!" echo "" echo "Next steps:" echo " 1. Set up environment: source scripts/podman.env" -echo " 2. Build and deploy: mise run cluster:build:full" -echo " 3. Build CLI: cargo build --release -p openshell-cli" -echo " 4. Install CLI: cp target/release/openshell ~/.local/bin/" +echo " 2. Build and deploy cluster: mise run cluster:build:full" +echo " 3. Install CLI: cargo install --path crates/openshell-cli --root ~/.local" +echo " 4. Verify installation: openshell gateway info" echo "" echo "To make the environment persistent, add to your shell profile (~/.zshrc):" echo " source $(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/scripts/podman.env" diff --git a/tasks/scripts/cluster-deploy-fast.sh b/tasks/scripts/cluster-deploy-fast.sh index 86fe9746d..9bdc6a604 100755 --- a/tasks/scripts/cluster-deploy-fast.sh +++ b/tasks/scripts/cluster-deploy-fast.sh @@ -28,6 +28,23 @@ log_duration() { echo "${label} took $((end - start))s" } +# Read lines into an array variable (bash 3 & 4 compatible) +# Usage: read_lines_into_array array_name < <(command) +read_lines_into_array() { + local array_name=$1 + if ((BASH_VERSINFO[0] >= 4)); then + # Bash 4+: use mapfile (faster) + mapfile -t "$array_name" + else + # Bash 3: use while loop + local line + eval "$array_name=()" + while IFS= read -r line; do + eval "$array_name+=(\"\$line\")" + done + fi +} + if ! $CONTAINER_RUNTIME ps -q --filter "name=^${CONTAINER_NAME}$" --filter "health=healthy" | grep -q .; then echo "Error: Cluster container '${CONTAINER_NAME}' is not running or not healthy." echo "Start the cluster first with: mise run cluster" @@ -86,7 +103,7 @@ fi declare -a changed_files=() detect_start=$(date +%s) -mapfile -t changed_files < <( +read_lines_into_array changed_files < <( { git diff --name-only git diff --name-only --cached diff --git a/tasks/scripts/docker-build-image.sh b/tasks/scripts/docker-build-image.sh index 38b200a2e..a76b01d12 100755 --- a/tasks/scripts/docker-build-image.sh +++ b/tasks/scripts/docker-build-image.sh @@ -212,11 +212,13 @@ if [[ "${CONTAINER_RUNTIME}" == "podman" ]]; then ARCH_ARGS+=(--build-arg "BUILDARCH=${TARGETARCH}") fi - # Filter OUTPUT_ARGS: Podman stores images locally by default (no --load) + # Filter OUTPUT_ARGS: Podman doesn't support --load or --push in build command PODMAN_OUTPUT_ARGS=() + PODMAN_SHOULD_PUSH=0 for arg in ${OUTPUT_ARGS[@]+"${OUTPUT_ARGS[@]}"}; do case "${arg}" in --load) ;; # implicit in Podman + --push) PODMAN_SHOULD_PUSH=1 ;; # push after build *) PODMAN_OUTPUT_ARGS+=("${arg}") ;; esac done @@ -227,6 +229,13 @@ if [[ "${CONTAINER_RUNTIME}" == "podman" ]]; then ${TLS_ARGS[@]+"${TLS_ARGS[@]}"} \ ${PODMAN_OUTPUT_ARGS[@]+"${PODMAN_OUTPUT_ARGS[@]}"} \ . + + # Push after build if requested (Podman doesn't support --push in build) + if [[ "${PODMAN_SHOULD_PUSH}" == "1" && "${IS_FINAL_IMAGE}" == "1" ]]; then + echo "Pushing ${IMAGE_NAME}:${IMAGE_TAG}..." + podman_local_tls_args "${IMAGE_NAME}" + podman push ${PODMAN_TLS_ARGS[@]+"${PODMAN_TLS_ARGS[@]}"} "${IMAGE_NAME}:${IMAGE_TAG}" + fi else # Docker: use buildx docker buildx build \ diff --git a/tasks/scripts/docker-publish-multiarch.sh b/tasks/scripts/docker-publish-multiarch.sh index f83a7c203..398c97c00 100755 --- a/tasks/scripts/docker-publish-multiarch.sh +++ b/tasks/scripts/docker-publish-multiarch.sh @@ -27,8 +27,56 @@ fi if [[ "${CONTAINER_RUNTIME}" == "podman" ]]; then echo "Using Podman for multi-arch build (podman manifest)" + echo "Note: Podman builds platforms sequentially (slower than Docker buildx)" export DOCKER_BUILDER="" + + # Podman: build each platform separately and create manifest + IFS=',' read -ra PLATFORM_ARRAY <<< "${PLATFORMS}" + + for component in gateway cluster; do + full_image="${REGISTRY}/${component}" + echo "" + echo "=== Building multi-arch ${component} image ===" + + # Create manifest list + podman manifest rm "${full_image}:${IMAGE_TAG}" 2>/dev/null || true + podman manifest create "${full_image}:${IMAGE_TAG}" + + # Build for each platform + for platform in "${PLATFORM_ARRAY[@]}"; do + arch="${platform##*/}" + case "${arch}" in + amd64) target_arch="amd64" ;; + arm64) target_arch="arm64" ;; + *) echo "Unsupported arch: ${arch}" >&2; exit 1 ;; + esac + + echo "Building ${component} for ${platform}..." + + # Package Helm chart for cluster builds + if [[ "${component}" == "cluster" ]]; then + mkdir -p deploy/docker/.build/charts + helm package deploy/helm/openshell -d deploy/docker/.build/charts/ >/dev/null + fi + + # Build with explicit TARGETARCH/BUILDARCH to avoid cross-compilation + # (QEMU emulation handles running the different architecture) + podman build --platform "${platform}" \ + --build-arg TARGETARCH="${target_arch}" \ + --build-arg BUILDARCH="${target_arch}" \ + --manifest "${full_image}:${IMAGE_TAG}" \ + -f deploy/docker/Dockerfile.images \ + --target "${component}" \ + . + done + + # Push manifest + echo "Pushing ${full_image}:${IMAGE_TAG}..." + podman manifest push "${full_image}:${IMAGE_TAG}" \ + "docker://${full_image}:${IMAGE_TAG}" + done else + # Docker: use buildx BUILDER_NAME=${DOCKER_BUILDER:-multiarch} if docker buildx inspect "${BUILDER_NAME}" >/dev/null 2>&1; then echo "Using existing buildx builder: ${BUILDER_NAME}" @@ -38,19 +86,19 @@ else docker buildx create --name "${BUILDER_NAME}" --use --bootstrap fi export DOCKER_BUILDER="${BUILDER_NAME}" -fi -export DOCKER_PLATFORM="${PLATFORMS}" -export DOCKER_PUSH=1 -export IMAGE_REGISTRY="${REGISTRY}" + export DOCKER_PLATFORM="${PLATFORMS}" + export DOCKER_PUSH=1 + export IMAGE_REGISTRY="${REGISTRY}" -echo "Building multi-arch gateway image..." -tasks/scripts/docker-build-image.sh gateway + echo "Building multi-arch gateway image..." + tasks/scripts/docker-build-image.sh gateway -echo -echo "Building multi-arch cluster image..." -tasks/scripts/docker-build-image.sh cluster + echo + echo "Building multi-arch cluster image..." + tasks/scripts/docker-build-image.sh cluster +fi -TAGS_TO_APPLY=("${EXTRA_TAGS[@]}") +TAGS_TO_APPLY=(${EXTRA_TAGS[@]+"${EXTRA_TAGS[@]}"}) if [[ "${TAG_LATEST}" == "true" ]]; then TAGS_TO_APPLY+=("latest") fi @@ -58,7 +106,7 @@ fi if [[ ${#TAGS_TO_APPLY[@]} -gt 0 ]]; then for component in gateway cluster; do full_image="${REGISTRY}/${component}" - for tag in "${TAGS_TO_APPLY[@]}"; do + for tag in ${TAGS_TO_APPLY[@]+"${TAGS_TO_APPLY[@]}"}; do [[ "${tag}" == "${IMAGE_TAG}" ]] && continue echo "Tagging ${full_image}:${tag}..." if [[ "${CONTAINER_RUNTIME}" == "podman" ]]; then