diff --git a/AGENTS.md b/AGENTS.md index 84a93381..5015246a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -99,8 +99,14 @@ Every compiled pipeline runs as three sequential jobs: │ │ └── upload_workitem_attachment.rs │ ├── runtimes/ # Runtime environment implementations (one dir per runtime) │ │ ├── mod.rs # Module entry point -│ │ └── lean/ # Lean 4 theorem prover runtime -│ │ ├── mod.rs # Config types, install helpers +│ │ ├── lean/ # Lean 4 theorem prover runtime +│ │ │ ├── mod.rs # Config types, install helpers +│ │ │ └── extension.rs # CompilerExtension impl +│ │ ├── python/ # Python runtime +│ │ │ ├── mod.rs # Config types, install/auth helpers +│ │ │ └── extension.rs # CompilerExtension impl +│ │ └── node/ # Node.js runtime +│ │ ├── mod.rs # Config types, install/auth helpers │ │ └── extension.rs # CompilerExtension impl │ ├── data/ │ │ ├── base.yml # Base pipeline template for standalone @@ -156,8 +162,8 @@ index to jump to the right page. in the pipeline UI, including the auto-injected `clearMemory` parameter. - [`docs/tools.md`](docs/tools.md) — `tools:` configuration (bash allow-list, `edit`, `cache-memory`, `azure-devops` MCP). -- [`docs/runtimes.md`](docs/runtimes.md) — `runtimes:` configuration (currently - Lean 4). +- [`docs/runtimes.md`](docs/runtimes.md) — `runtimes:` configuration (Lean 4, + Python, Node.js). - [`docs/targets.md`](docs/targets.md) — target platforms: `standalone` and `1es`. - [`docs/safe-outputs.md`](docs/safe-outputs.md) — full reference for every diff --git a/docs/runtimes.md b/docs/runtimes.md index 47d78463..edaf6a5d 100644 --- a/docs/runtimes.md +++ b/docs/runtimes.md @@ -33,3 +33,87 @@ When enabled, the compiler: - Emits a compile-time warning if `tools.bash` is empty (Lean requires bash access) **Note:** In the 1ES target, the bash command allow-list is updated but elan installation must be done manually via `steps:` front matter. The 1ES target handles network isolation separately. + +### Python (`python:`) + +Python runtime. Auto-installs Python via `UsePythonVersion@0`, emits `PipAuthenticate@1` for internal feed access, adds Python ecosystem domains to the AWF network allowlist, extends the bash command allow-list, and optionally injects feed URL env vars for pip and uv. + +```yaml +# Simple enablement (installs default Python 3.x) +runtimes: + python: true + +# With options (pin version, configure feed) +runtimes: + python: + version: "3.12" + feed-url: "https://pkgs.dev.azure.com/myorg/_packaging/myfeed/pypi/simple/" +``` + +**Fields:** + +| Field | Type | Description | +|-------|------|-------------| +| `version` | string | Python version to install (e.g., `"3.12"`, `"3.11"`). Passed to `UsePythonVersion@0` `versionSpec`. Defaults to latest 3.x. | +| `feed-url` | string | Internal PyPI feed URL. Injects `PIP_INDEX_URL` and `UV_DEFAULT_INDEX` env vars into the agent environment. | +| `config` | string | Path to a pip/uv config file. Accepted with a warning — the file will not be available inside the AWF agent environment until proxy-auth support lands. | + +When enabled, the compiler: +- Injects `UsePythonVersion@0` into `{{ prepare_steps }}` (runs before AWF) +- If `feed-url` is set, also injects `PipAuthenticate@1` to authenticate the ADO build service identity for internal feeds +- Auto-adds `python`, `python3`, `pip`, `pip3`, `uv` to the bash command allow-list +- Adds Python ecosystem domains to the network allowlist (pypi.org, pythonhosted.org, etc.) +- If `feed-url` is set, injects `PIP_INDEX_URL` and `UV_DEFAULT_INDEX` env vars into the agent environment +- Appends a prompt supplement informing the agent about Python availability +- No AWF mounts or PATH prepends needed — `UsePythonVersion@0` installs to `/opt/hostedtoolcache` (auto-mounted by AWF) and publishes PATH entries that AWF merges via `$GITHUB_PATH` + +**Note:** `PipAuthenticate@1` is currently emitted with an empty `artifactFeeds` input, which configures credentials for all feeds accessible to the build service identity. If your internal feed requires scoped authentication to a specific Azure Artifacts feed, this may need future refinement. + +### Node.js (`node:`) + +Node.js runtime. Auto-installs Node.js via `NodeTool@0`, emits `npmAuthenticate@0` for internal feed access, adds Node ecosystem domains to the AWF network allowlist, extends the bash command allow-list, and optionally injects feed URL env vars for npm. + +```yaml +# Simple enablement (installs default Node LTS) +runtimes: + node: true + +# With options (pin version, configure feed) +runtimes: + node: + version: "22.x" + feed-url: "https://pkgs.dev.azure.com/ORG/PROJECT/_packaging/FEED/npm/registry/" +``` + +**Fields:** + +| Field | Type | Description | +|-------|------|-------------| +| `version` | string | Node.js version to install (e.g., `"22.x"`, `"20.x"`). Passed to `NodeTool@0` `versionSpec`. Defaults to `"22.x"`. | +| `feed-url` | string | Internal npm registry URL. Injects `NPM_CONFIG_REGISTRY` env var into the agent environment. | +| `config` | string | Path to an .npmrc config file. Accepted with a warning — the file will not be available inside the AWF agent environment until proxy-auth support lands. | + +When enabled, the compiler: +- Injects `NodeTool@0` into `{{ prepare_steps }}` (runs before AWF) +- If `feed-url` or `config` is set, also injects `npmAuthenticate@0` (and an ensure-`.npmrc` step) to authenticate the ADO build service identity for internal feeds +- Auto-adds `node`, `npm`, `npx` to the bash command allow-list +- Adds Node ecosystem domains to the network allowlist (npmjs.org, nodejs.org, etc.) +- If `feed-url` is set, injects `NPM_CONFIG_REGISTRY` env var into the agent environment +- Appends a prompt supplement informing the agent about Node.js availability +- No AWF mounts or PATH prepends needed — `NodeTool@0` installs to `/opt/hostedtoolcache` (auto-mounted by AWF) and publishes PATH entries that AWF merges via `$GITHUB_PATH` +- Note: AWF overlays `~/.npmrc` with `/dev/null` for credential security — the `NPM_CONFIG_REGISTRY` env var approach avoids conflicting with this overlay + +### Combining Runtimes + +Multiple runtimes can be enabled simultaneously: + +```yaml +runtimes: + python: + version: "3.12" + node: + version: "22.x" + lean: true +``` + +All runtime extensions are sorted into `ExtensionPhase::Runtime` and execute before tool extensions (`ExtensionPhase::Tool`), ensuring language toolchains are available before any tools that depend on them. diff --git a/src/compile/common.rs b/src/compile/common.rs index 5e018087..3e65e7de 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -1912,6 +1912,71 @@ pub fn collect_awf_path_prepends(extensions: &[super::extensions::Extension]) -> .collect() } +/// Collects `agent_env_vars()` from all extensions, validates keys against +/// `BLOCKED_ENV_KEYS`, deduplicates (bails on collision), and formats them +/// as YAML `KEY: "value"` lines for injection into the `{{ engine_env }}` block. +/// +/// Returns an empty string if no extensions declare env vars. +pub fn collect_agent_env_vars(extensions: &[super::extensions::Extension]) -> anyhow::Result { + use crate::engine::BLOCKED_ENV_KEYS; + use crate::validate; + use std::collections::HashSet; + + let mut lines = Vec::new(); + let mut seen_keys = HashSet::new(); + + for ext in extensions { + for (key, value) in ext.agent_env_vars() { + // Deduplicate: bail on collision + if !seen_keys.insert(key.clone()) { + anyhow::bail!( + "Extension '{}' declares agent env var '{}' which was already declared \ + by a previous extension. Each env var key must be unique.", + ext.name(), + key, + ); + } + + // Validate key is not blocked + if BLOCKED_ENV_KEYS.iter().any(|blocked| key.eq_ignore_ascii_case(blocked)) { + anyhow::bail!( + "Extension '{}' declares agent env var '{}' which conflicts with a \ + compiler-controlled environment variable.", + ext.name(), + key, + ); + } + + // Validate key format + if !validate::is_valid_env_var_name(&key) { + anyhow::bail!( + "Extension '{}' declares agent env var '{}' with invalid key format. \ + Keys must contain only ASCII alphanumerics and underscores.", + ext.name(), + key, + ); + } + + // Validate value for injection (defence in depth — covers ADO expressions, + // pipeline commands, template markers, and newlines) + validate::reject_pipeline_injection(&value, &format!("agent env var '{key}'"))?; + + if value.contains('"') || value.contains('\'') { + anyhow::bail!( + "Extension '{}' agent env var '{}' value contains a quote character \ + which would produce malformed YAML or bash syntax.", + ext.name(), + key, + ); + } + + lines.push(format!("{key}: \"{value}\"")); + } + } + + Ok(lines.join("\n")) +} + // ==================== Shared compile flow ==================== /// Target-specific overrides for the shared compile flow. @@ -2104,6 +2169,12 @@ pub async fn compile_shared( if !awf_path_env.is_empty() { engine_env = format!("{engine_env}\n{awf_path_env}"); } + + // Append extension-declared agent env vars (e.g., PIP_INDEX_URL, NPM_CONFIG_REGISTRY) + let agent_env = collect_agent_env_vars(extensions)?; + if !agent_env.is_empty() { + engine_env = format!("{engine_env}\n{agent_env}"); + } let engine_log_dir = ctx.engine.log_dir(); let acquire_write_token = generate_acquire_ado_token( front_matter @@ -2505,6 +2576,8 @@ mod tests { }); fm.runtimes = Some(crate::compile::types::RuntimesConfig { lean: Some(crate::runtimes::lean::LeanRuntimeConfig::Enabled(true)), + python: None, + node: None, }); let params = CompileContext::for_test(&fm).engine.args(&fm, &crate::compile::extensions::collect_extensions(&fm)).unwrap(); assert!(params.contains("shell(lean)"), "lean command should be allowed"); @@ -2525,6 +2598,8 @@ mod tests { }); fm.runtimes = Some(crate::compile::types::RuntimesConfig { lean: Some(crate::runtimes::lean::LeanRuntimeConfig::Enabled(true)), + python: None, + node: None, }); let params = CompileContext::for_test(&fm).engine.args(&fm, &crate::compile::extensions::collect_extensions(&fm)).unwrap(); assert!(params.contains("--allow-all-tools"), "wildcard should use --allow-all-tools"); diff --git a/src/compile/extensions/mod.rs b/src/compile/extensions/mod.rs index 30f1646a..6ac8aba2 100644 --- a/src/compile/extensions/mod.rs +++ b/src/compile/extensions/mod.rs @@ -324,6 +324,17 @@ pub trait CompilerExtension { fn awf_path_prepends(&self) -> Vec { vec![] } + + /// Environment variables to inject into the agent execution environment. + /// + /// Returns `(key, value)` pairs that are emitted as `KEY: "value"` in + /// the `{{ engine_env }}` YAML block. Used by runtimes to configure + /// package managers via env vars (e.g., `PIP_INDEX_URL`, `NPM_CONFIG_REGISTRY`). + /// + /// Keys are validated against `BLOCKED_ENV_KEYS` at collection time. + fn agent_env_vars(&self) -> Vec<(String, String)> { + vec![] + } } /// Mount access mode for an AWF bind mount. @@ -534,6 +545,9 @@ macro_rules! extension_enum { fn awf_path_prepends(&self) -> Vec { match self { $( $Enum::$Variant(e) => e.awf_path_prepends(), )+ } } + fn agent_env_vars(&self) -> Vec<(String, String)> { + match self { $( $Enum::$Variant(e) => e.agent_env_vars(), )+ } + } } }; } @@ -547,6 +561,8 @@ pub use crate::tools::azure_devops::AzureDevOpsExtension; pub use crate::tools::cache_memory::CacheMemoryExtension; pub use github::GitHubExtension; pub use crate::runtimes::lean::LeanExtension; +pub use crate::runtimes::node::NodeExtension; +pub use crate::runtimes::python::PythonExtension; pub use safe_outputs::SafeOutputsExtension; pub use trigger_filters::TriggerFiltersExtension; @@ -559,6 +575,8 @@ extension_enum! { GitHub(GitHubExtension), SafeOutputs(SafeOutputsExtension), Lean(LeanExtension), + Python(PythonExtension), + Node(NodeExtension), AzureDevOps(AzureDevOpsExtension), CacheMemory(CacheMemoryExtension), TriggerFilters(TriggerFiltersExtension), @@ -593,6 +611,16 @@ pub fn collect_extensions(front_matter: &FrontMatter) -> Vec { extensions.push(Extension::Lean(LeanExtension::new(lean.clone()))); } } + if let Some(python) = front_matter.runtimes.as_ref().and_then(|r| r.python.as_ref()) { + if python.is_enabled() { + extensions.push(Extension::Python(PythonExtension::new(python.clone()))); + } + } + if let Some(node) = front_matter.runtimes.as_ref().and_then(|r| r.node.as_ref()) { + if node.is_enabled() { + extensions.push(Extension::Node(NodeExtension::new(node.clone()))); + } + } // ── First-party tools (ExtensionPhase::Tool) ── if let Some(tools) = front_matter.tools.as_ref() { diff --git a/src/compile/extensions/tests.rs b/src/compile/extensions/tests.rs index 903b78d4..e10920f2 100644 --- a/src/compile/extensions/tests.rs +++ b/src/compile/extensions/tests.rs @@ -356,3 +356,257 @@ fn test_wrap_prompt_append_rejects_unsafe_display_name() { let result = wrap_prompt_append("content", "ext$(rm -rf)"); assert!(result.is_err()); } + +// ── PythonExtension ──────────────────────────────────────────── + +#[test] +fn test_collect_extensions_python_enabled() { + let (fm, _) = + parse_markdown("---\nname: test\ndescription: test\nruntimes:\n python: true\n---\n") + .unwrap(); + let exts = collect_extensions(&fm); + assert!(exts.iter().any(|e| e.name() == "Python")); +} + +#[test] +fn test_collect_extensions_python_disabled() { + let (fm, _) = + parse_markdown("---\nname: test\ndescription: test\nruntimes:\n python: false\n---\n") + .unwrap(); + let exts = collect_extensions(&fm); + assert!(!exts.iter().any(|e| e.name() == "Python")); +} + +#[test] +fn test_collect_extensions_python_with_version() { + let (fm, _) = + parse_markdown("---\nname: test\ndescription: test\nruntimes:\n python:\n version: '3.12'\n---\n") + .unwrap(); + let exts = collect_extensions(&fm); + assert!(exts.iter().any(|e| e.name() == "Python")); +} + +#[test] +fn test_python_required_hosts() { + let ext = crate::runtimes::python::PythonExtension::new( + crate::runtimes::python::PythonRuntimeConfig::Enabled(true), + ); + let hosts = ext.required_hosts(); + assert_eq!(hosts, vec!["python".to_string()]); +} + +#[test] +fn test_python_prepare_steps() { + let ext = crate::runtimes::python::PythonExtension::new( + crate::runtimes::python::PythonRuntimeConfig::Enabled(true), + ); + let steps = ext.prepare_steps(); + assert_eq!(steps.len(), 1, "no auth step without feed-url/config"); + assert!(steps[0].contains("UsePythonVersion@0")); +} + +#[test] +fn test_python_prepare_steps_with_feed_url() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\nruntimes:\n python:\n feed-url: 'https://pkgs.dev.azure.com/org/_packaging/feed/pypi/simple/'\n---\n", + ).unwrap(); + let python = fm.runtimes.as_ref().unwrap().python.as_ref().unwrap(); + let ext = crate::runtimes::python::PythonExtension::new(python.clone()); + let steps = ext.prepare_steps(); + assert_eq!(steps.len(), 2); + assert!(steps[0].contains("UsePythonVersion@0")); + assert!(steps[1].contains("PipAuthenticate@1")); +} + +#[test] +fn test_python_agent_env_vars_no_feed() { + let ext = crate::runtimes::python::PythonExtension::new( + crate::runtimes::python::PythonRuntimeConfig::Enabled(true), + ); + assert!(ext.agent_env_vars().is_empty()); +} + +#[test] +fn test_python_agent_env_vars_with_feed() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\nruntimes:\n python:\n version: '3.12'\n feed-url: 'https://pkgs.dev.azure.com/org/_packaging/feed/pypi/simple/'\n---\n", + ).unwrap(); + let python = fm.runtimes.as_ref().unwrap().python.as_ref().unwrap(); + let ext = crate::runtimes::python::PythonExtension::new(python.clone()); + let vars = ext.agent_env_vars(); + assert_eq!(vars.len(), 2); + assert_eq!(vars[0].0, "PIP_INDEX_URL"); + assert_eq!(vars[1].0, "UV_DEFAULT_INDEX"); +} + +#[test] +fn test_python_config_warns_not_functional() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\nruntimes:\n python:\n version: '3.12'\n config: '/path/to/pip.conf'\n---\n", + ).unwrap(); + let python = fm.runtimes.as_ref().unwrap().python.as_ref().unwrap(); + let ext = crate::runtimes::python::PythonExtension::new(python.clone()); + let ctx = ctx_from(&fm); + let result = ext.validate(&ctx); + assert!(result.is_ok(), "config: should be accepted (warning, not error)"); + let warnings = result.unwrap(); + assert!(warnings.iter().any(|w| w.contains("will not be available"))); +} + +// ── NodeExtension ────────────────────────────────────────────── + +#[test] +fn test_collect_extensions_node_enabled() { + let (fm, _) = + parse_markdown("---\nname: test\ndescription: test\nruntimes:\n node: true\n---\n") + .unwrap(); + let exts = collect_extensions(&fm); + assert!(exts.iter().any(|e| e.name() == "Node.js")); +} + +#[test] +fn test_collect_extensions_node_disabled() { + let (fm, _) = + parse_markdown("---\nname: test\ndescription: test\nruntimes:\n node: false\n---\n") + .unwrap(); + let exts = collect_extensions(&fm); + assert!(!exts.iter().any(|e| e.name() == "Node.js")); +} + +#[test] +fn test_collect_extensions_node_with_version() { + let (fm, _) = + parse_markdown("---\nname: test\ndescription: test\nruntimes:\n node:\n version: '22.x'\n---\n") + .unwrap(); + let exts = collect_extensions(&fm); + assert!(exts.iter().any(|e| e.name() == "Node.js")); +} + +#[test] +fn test_node_required_hosts() { + let ext = crate::runtimes::node::NodeExtension::new( + crate::runtimes::node::NodeRuntimeConfig::Enabled(true), + ); + let hosts = ext.required_hosts(); + assert_eq!(hosts, vec!["node".to_string()]); +} + +#[test] +fn test_node_prepare_steps() { + let ext = crate::runtimes::node::NodeExtension::new( + crate::runtimes::node::NodeRuntimeConfig::Enabled(true), + ); + let steps = ext.prepare_steps(); + assert_eq!(steps.len(), 1, "no auth steps without feed-url/config"); + assert!(steps[0].contains("NodeTool@0")); +} + +#[test] +fn test_node_prepare_steps_with_feed_url() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\nruntimes:\n node:\n feed-url: 'https://pkgs.dev.azure.com/ORG/PROJECT/_packaging/FEED/npm/registry/'\n---\n", + ).unwrap(); + let node = fm.runtimes.as_ref().unwrap().node.as_ref().unwrap(); + let ext = crate::runtimes::node::NodeExtension::new(node.clone()); + let steps = ext.prepare_steps(); + assert_eq!(steps.len(), 3); + assert!(steps[0].contains("NodeTool@0")); + assert!(steps[1].contains("Ensure .npmrc")); + assert!(steps[2].contains("npmAuthenticate@0")); +} + +#[test] +fn test_node_agent_env_vars_no_feed() { + let ext = crate::runtimes::node::NodeExtension::new( + crate::runtimes::node::NodeRuntimeConfig::Enabled(true), + ); + assert!(ext.agent_env_vars().is_empty()); +} + +#[test] +fn test_node_agent_env_vars_with_feed() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\nruntimes:\n node:\n version: '22.x'\n feed-url: 'https://pkgs.dev.azure.com/ORG/PROJECT/_packaging/FEED/npm/registry/'\n---\n", + ).unwrap(); + let node = fm.runtimes.as_ref().unwrap().node.as_ref().unwrap(); + let ext = crate::runtimes::node::NodeExtension::new(node.clone()); + let vars = ext.agent_env_vars(); + assert_eq!(vars.len(), 1); + assert_eq!(vars[0].0, "NPM_CONFIG_REGISTRY"); +} + +#[test] +fn test_node_config_warns_not_functional() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\nruntimes:\n node:\n version: '22.x'\n config: '/path/to/.npmrc'\n---\n", + ).unwrap(); + let node = fm.runtimes.as_ref().unwrap().node.as_ref().unwrap(); + let ext = crate::runtimes::node::NodeExtension::new(node.clone()); + let ctx = ctx_from(&fm); + let result = ext.validate(&ctx); + assert!(result.is_ok(), "config: should be accepted (warning, not error)"); + let warnings = result.unwrap(); + assert!(warnings.iter().any(|w| w.contains("will not be available"))); +} + +#[test] +fn test_node_config_and_feed_url_mutually_exclusive() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\nruntimes:\n node:\n config: '/path/to/.npmrc'\n feed-url: 'https://example.com/npm/'\n---\n", + ).unwrap(); + let node = fm.runtimes.as_ref().unwrap().node.as_ref().unwrap(); + let ext = crate::runtimes::node::NodeExtension::new(node.clone()); + let ctx = ctx_from(&fm); + let result = ext.validate(&ctx); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("mutually exclusive")); +} + +#[test] +fn test_python_config_and_feed_url_mutually_exclusive() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\nruntimes:\n python:\n config: '/path/to/pip.conf'\n feed-url: 'https://example.com/pypi/'\n---\n", + ).unwrap(); + let python = fm.runtimes.as_ref().unwrap().python.as_ref().unwrap(); + let ext = crate::runtimes::python::PythonExtension::new(python.clone()); + let ctx = ctx_from(&fm); + let result = ext.validate(&ctx); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("mutually exclusive")); +} + +// ── Multiple runtimes ────────────────────────────────────────── + +#[test] +fn test_collect_extensions_all_runtimes_enabled() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\nruntimes:\n lean: true\n python: true\n node: true\n---\n", + ).unwrap(); + let exts = collect_extensions(&fm); + assert!(exts.iter().any(|e| e.name() == "Lean 4")); + assert!(exts.iter().any(|e| e.name() == "Python")); + assert!(exts.iter().any(|e| e.name() == "Node.js")); + // All are Runtime phase + let runtime_exts: Vec<_> = exts.iter().filter(|e| e.phase() == ExtensionPhase::Runtime).collect(); + assert_eq!(runtime_exts.len(), 3); +} + +#[test] +fn test_collect_extensions_runtimes_before_tools_with_python_and_node() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops: true\nruntimes:\n python: true\n node: true\n---\n", + ).unwrap(); + let exts = collect_extensions(&fm); + let last_runtime_idx = exts + .iter() + .rposition(|e| e.phase() == ExtensionPhase::Runtime) + .expect("expected Runtime extension"); + let first_tool_idx = exts + .iter() + .position(|e| e.phase() == ExtensionPhase::Tool) + .expect("expected Tool extension"); + assert!( + last_runtime_idx < first_tool_idx, + "Runtime extensions must come before Tool extensions" + ); +} diff --git a/src/compile/standalone.rs b/src/compile/standalone.rs index 40248826..5054a222 100644 --- a/src/compile/standalone.rs +++ b/src/compile/standalone.rs @@ -178,6 +178,8 @@ mod tests { let mut fm = minimal_front_matter(); fm.runtimes = Some(crate::compile::types::RuntimesConfig { lean: Some(crate::runtimes::lean::LeanRuntimeConfig::Enabled(true)), + python: None, + node: None, }); let exts = super::super::extensions::collect_extensions(&fm); let domains = generate_allowed_domains(&fm, &exts).unwrap(); @@ -191,6 +193,8 @@ mod tests { let mut fm = minimal_front_matter(); fm.runtimes = Some(crate::compile::types::RuntimesConfig { lean: Some(crate::runtimes::lean::LeanRuntimeConfig::Enabled(false)), + python: None, + node: None, }); let exts = super::super::extensions::collect_extensions(&fm); let domains = generate_allowed_domains(&fm, &exts).unwrap(); diff --git a/src/compile/types.rs b/src/compile/types.rs index 1f9d799d..cb200acd 100644 --- a/src/compile/types.rs +++ b/src/compile/types.rs @@ -499,6 +499,20 @@ pub struct RuntimesConfig { /// extends the bash command allow-list, and appends a prompt supplement. #[serde(default)] pub lean: Option, + + /// Python runtime. + /// Auto-installs Python via UsePythonVersion@0, emits PipAuthenticate@1, + /// adds Python ecosystem domains to the AWF network allowlist, extends + /// the bash command allow-list, and optionally injects feed URL env vars. + #[serde(default)] + pub python: Option, + + /// Node.js runtime. + /// Auto-installs Node.js via NodeTool@0, emits npmAuthenticate@0, + /// adds Node ecosystem domains to the AWF network allowlist, extends + /// the bash command allow-list, and optionally injects feed URL env vars. + #[serde(default)] + pub node: Option, } impl SanitizeConfigTrait for RuntimesConfig { @@ -506,6 +520,12 @@ impl SanitizeConfigTrait for RuntimesConfig { if let Some(ref mut lean) = self.lean { lean.sanitize_config_fields(); } + if let Some(ref mut python) = self.python { + python.sanitize_config_fields(); + } + if let Some(ref mut node) = self.node { + node.sanitize_config_fields(); + } } } diff --git a/src/engine.rs b/src/engine.rs index 8a0119e2..a3e82a8f 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -21,7 +21,7 @@ const BLOCKED_ARG_PREFIXES: &[&str] = &[ ]; /// Environment variable keys that the compiler controls — users must not override these. -const BLOCKED_ENV_KEYS: &[&str] = &[ +pub const BLOCKED_ENV_KEYS: &[&str] = &[ "GITHUB_TOKEN", "GITHUB_READ_ONLY", "COPILOT_OTEL_ENABLED", diff --git a/src/runtimes/mod.rs b/src/runtimes/mod.rs index 34189761..59099131 100644 --- a/src/runtimes/mod.rs +++ b/src/runtimes/mod.rs @@ -10,3 +10,5 @@ //! Aligned with gh-aw's `runtimes:` front matter field. pub mod lean; +pub mod node; +pub mod python; diff --git a/src/runtimes/node/extension.rs b/src/runtimes/node/extension.rs new file mode 100644 index 00000000..c57ca4c2 --- /dev/null +++ b/src/runtimes/node/extension.rs @@ -0,0 +1,124 @@ +// ─── Node.js ─────────────────────────────────────────────────────── + +use crate::compile::extensions::{CompileContext, CompilerExtension, ExtensionPhase}; +use crate::validate; +use super::{NODE_BASH_COMMANDS, NodeRuntimeConfig, generate_ensure_npmrc, generate_node_install, generate_npm_authenticate}; +use anyhow::Result; + +/// Node.js runtime extension. +/// +/// Injects: ecosystem network hosts (node), bash commands (node, npm, npx), +/// install steps (NodeTool@0), authenticate steps (npmAuthenticate@0), +/// env vars (NPM_CONFIG_REGISTRY when feed-url is set), and a prompt +/// supplement. +pub struct NodeExtension { + config: NodeRuntimeConfig, +} + +impl NodeExtension { + pub fn new(config: NodeRuntimeConfig) -> Self { + Self { config } + } +} + +impl CompilerExtension for NodeExtension { + fn name(&self) -> &str { + "Node.js" + } + + fn phase(&self) -> ExtensionPhase { + ExtensionPhase::Runtime + } + + fn required_hosts(&self) -> Vec { + vec!["node".to_string()] + } + + fn required_bash_commands(&self) -> Vec { + NODE_BASH_COMMANDS + .iter() + .map(|c| (*c).to_string()) + .collect() + } + + fn prompt_supplement(&self) -> Option { + Some( + "\n\ +---\n\ +\n\ +## Node.js\n\ +\n\ +Node.js is installed and available. Use `node` to run scripts, \ +`npm` to manage packages, and `npx` to run package binaries.\n" + .to_string(), + ) + } + + fn prepare_steps(&self) -> Vec { + let mut steps = vec![generate_node_install(&self.config)]; + // Emit ensure-npmrc + npmAuthenticate only when an internal feed is configured + if self.config.feed_url().is_some() || self.config.config().is_some() { + steps.push(generate_ensure_npmrc(&self.config)); + steps.push(generate_npm_authenticate()); + } + steps + } + + fn agent_env_vars(&self) -> Vec<(String, String)> { + let mut vars = Vec::new(); + if let Some(feed_url) = self.config.feed_url() { + vars.push(("NPM_CONFIG_REGISTRY".to_string(), feed_url.to_string())); + } + vars + } + + fn validate(&self, ctx: &CompileContext) -> Result> { + let mut warnings = Vec::new(); + + // Warn if bash is disabled + let is_bash_disabled = ctx + .front_matter + .tools + .as_ref() + .and_then(|t| t.bash.as_ref()) + .is_some_and(|cmds| cmds.is_empty()); + + if is_bash_disabled { + warnings.push(format!( + "Agent '{}' has runtimes.node enabled but tools.bash is empty. \ + Node.js requires bash access (node, npm, npx commands).", + ctx.agent_name + )); + } + + // Mutual exclusivity: config + feed-url (check before individual field warnings) + if self.config.config().is_some() && self.config.feed_url().is_some() { + anyhow::bail!( + "runtimes.node: 'config' and 'feed-url' are mutually exclusive. \ + Use one or the other." + ); + } + + // Warn if config: is set — accepted but not yet functional inside AWF + if self.config.config().is_some() { + warnings.push( + "runtimes.node.config is accepted but the .npmrc file will not be \ + available inside the AWF agent environment yet. Config file passthrough \ + requires AWF proxy-auth support (gh-aw-firewall#2547)." + .to_string(), + ); + } + + // Validate feed URL + if let Some(feed_url) = self.config.feed_url() { + validate::validate_feed_url(feed_url, "runtimes.node.feed-url")?; + } + + // Validate version string + if let Some(version) = self.config.version() { + validate::reject_pipeline_injection(version, "runtimes.node.version")?; + } + + Ok(warnings) + } +} diff --git a/src/runtimes/node/mod.rs b/src/runtimes/node/mod.rs new file mode 100644 index 00000000..4d235c69 --- /dev/null +++ b/src/runtimes/node/mod.rs @@ -0,0 +1,168 @@ +//! Node.js runtime support for the ado-aw compiler. +//! +//! When enabled via `runtimes: node:`, the compiler auto-installs a specific +//! Node.js version via `NodeTool@0`, emits `npmAuthenticate@0` for internal +//! feed access, adds Node ecosystem domains to the AWF network allowlist, +//! extends the bash command allow-list, and optionally injects feed URL env +//! vars for npm. +//! +//! No AWF mounts or PATH prepends are needed because `NodeTool@0` installs +//! to `/opt/hostedtoolcache` (already mounted read-only by AWF) and publishes +//! `##vso[task.prependpath]` entries that AWF merges via `$GITHUB_PATH`. +//! +//! This module generates `NodeTool@0` YAML inline rather than importing +//! the `node_tool_step()` helper from `compile/extensions/mod.rs`, keeping +//! the runtime decoupled from the ado-script infrastructure. + +pub mod extension; + +pub use extension::NodeExtension; + +use ado_aw_derive::SanitizeConfig; +use serde::Deserialize; + +use crate::sanitize::SanitizeConfig as SanitizeConfigTrait; + +/// Node.js runtime configuration — accepts both `true` and object formats. +/// +/// Examples: +/// ```yaml +/// # Simple enablement (installs default Node LTS) +/// runtimes: +/// node: true +/// +/// # With options (pin version, configure feed) +/// runtimes: +/// node: +/// version: "22.x" +/// feed-url: "https://pkgs.dev.azure.com/ORG/PROJECT/_packaging/FEED/npm/registry/" +/// ``` +#[derive(Debug, Deserialize, Clone)] +#[serde(untagged)] +pub enum NodeRuntimeConfig { + /// Simple boolean enablement + Enabled(bool), + /// Full configuration with options + WithOptions(NodeOptions), +} + +impl NodeRuntimeConfig { + /// Whether Node.js is enabled. + pub fn is_enabled(&self) -> bool { + match self { + NodeRuntimeConfig::Enabled(enabled) => *enabled, + NodeRuntimeConfig::WithOptions(_) => true, + } + } + + /// Get the Node.js version (None = use ADO default). + pub fn version(&self) -> Option<&str> { + match self { + NodeRuntimeConfig::Enabled(_) => None, + NodeRuntimeConfig::WithOptions(opts) => opts.version.as_deref(), + } + } + + /// Get the npm registry feed URL (None = use public npmjs). + pub fn feed_url(&self) -> Option<&str> { + match self { + NodeRuntimeConfig::Enabled(_) => None, + NodeRuntimeConfig::WithOptions(opts) => opts.feed_url.as_deref(), + } + } + + /// Get the config file path (None = not set). + pub fn config(&self) -> Option<&str> { + match self { + NodeRuntimeConfig::Enabled(_) => None, + NodeRuntimeConfig::WithOptions(opts) => opts.config.as_deref(), + } + } +} + +impl SanitizeConfigTrait for NodeRuntimeConfig { + fn sanitize_config_fields(&mut self) { + match self { + NodeRuntimeConfig::Enabled(_) => {} + NodeRuntimeConfig::WithOptions(opts) => opts.sanitize_config_fields(), + } + } +} + +/// Node.js runtime options. +#[derive(Debug, Deserialize, Clone, Default, SanitizeConfig)] +pub struct NodeOptions { + /// Node.js version to install (e.g., "22.x", "20.x"). + /// Passed to `NodeTool@0` `versionSpec`. + #[serde(default)] + pub version: Option, + + /// Internal npm registry URL. When set, the compiler injects + /// `NPM_CONFIG_REGISTRY` env var into the agent environment so npm + /// uses this feed without .npmrc changes (which would conflict with + /// AWF's credential overlay of `~/.npmrc`). + #[serde(default, rename = "feed-url")] + pub feed_url: Option, + + /// Path to an .npmrc config file. Currently recognized but not yet + /// supported — specifying this field produces a compile error. + /// Reserved for future proxy-auth integration (gh-aw-firewall#2547). + #[serde(default)] + pub config: Option, +} + +/// Bash commands that the Node.js runtime adds to the allow-list. +pub const NODE_BASH_COMMANDS: &[&str] = &["node", "npm", "npx"]; + +/// Generate the `NodeTool@0` pipeline step (inline, decoupled from ado-script). +pub fn generate_node_install(config: &NodeRuntimeConfig) -> String { + let version = config.version().unwrap_or("22.x"); + format!( + "\ +- task: NodeTool@0 + inputs: + versionSpec: '{version}' + displayName: 'Install Node.js {version}'" + ) +} + +/// Generate the `npmAuthenticate@0` pipeline step. +/// +/// Emitted when `feed-url:` or `config:` is set, authenticating the ADO +/// build service identity for internal npm feeds. This runs before AWF. +/// +/// Requires a `.npmrc` file to exist; call [`generate_ensure_npmrc`] first +/// to create one if the repo doesn't already have one. +pub fn generate_npm_authenticate() -> String { + "\ +- task: npmAuthenticate@0 + inputs: + workingFile: .npmrc + displayName: 'Authenticate npm (build service identity)'" + .to_string() +} + +/// Generate a step that ensures `.npmrc` exists before `npmAuthenticate@0`. +/// +/// `npmAuthenticate@0` requires `workingFile:` to point at an existing file — +/// unlike `PipAuthenticate@1` it fails if the file is missing. This step +/// creates a minimal `.npmrc` (with the configured registry or the default +/// npmjs registry) only when one doesn't already exist, preserving any +/// repo-checked-in `.npmrc`. +pub fn generate_ensure_npmrc(config: &NodeRuntimeConfig) -> String { + let registry = config + .feed_url() + .unwrap_or("https://registry.npmjs.org/"); + + format!( + "\ +- bash: |\n\ + if [ ! -f .npmrc ]; then\n\ + echo 'registry={registry}' > .npmrc\n\ + echo 'Created .npmrc with registry={registry}'\n\ + else\n\ + echo '.npmrc already exists, skipping creation'\n\ + fi\n\ + displayName: 'Ensure .npmrc exists'" + ) +} diff --git a/src/runtimes/python/extension.rs b/src/runtimes/python/extension.rs new file mode 100644 index 00000000..de4992bd --- /dev/null +++ b/src/runtimes/python/extension.rs @@ -0,0 +1,126 @@ +// ─── Python ──────────────────────────────────────────────────────── + +use crate::compile::extensions::{CompileContext, CompilerExtension, ExtensionPhase}; +use crate::validate; +use super::{PYTHON_BASH_COMMANDS, PythonRuntimeConfig, generate_pip_authenticate, generate_python_install}; +use anyhow::Result; + +/// Python runtime extension. +/// +/// Injects: ecosystem network hosts (python), bash commands (python, pip, uv), +/// install steps (UsePythonVersion@0), authenticate steps (PipAuthenticate@1), +/// env vars (PIP_INDEX_URL, UV_DEFAULT_INDEX when feed-url is set), and a +/// prompt supplement. +pub struct PythonExtension { + config: PythonRuntimeConfig, +} + +impl PythonExtension { + pub fn new(config: PythonRuntimeConfig) -> Self { + Self { config } + } +} + +impl CompilerExtension for PythonExtension { + fn name(&self) -> &str { + "Python" + } + + fn phase(&self) -> ExtensionPhase { + ExtensionPhase::Runtime + } + + fn required_hosts(&self) -> Vec { + vec!["python".to_string()] + } + + fn required_bash_commands(&self) -> Vec { + PYTHON_BASH_COMMANDS + .iter() + .map(|c| (*c).to_string()) + .collect() + } + + fn prompt_supplement(&self) -> Option { + Some( + "\n\ +---\n\ +\n\ +## Python\n\ +\n\ +Python is installed and available. Use `python3` or `python` to run scripts, \ +`pip` or `pip3` to install packages. If you need `uv` for fast package \ +management, install it first with `pip install uv`.\n" + .to_string(), + ) + } + + fn prepare_steps(&self) -> Vec { + let mut steps = vec![generate_python_install(&self.config)]; + // Emit PipAuthenticate only when feed-url is set (config alone is not + // sufficient — PipAuthenticate needs a feed to authenticate against) + if self.config.feed_url().is_some() { + steps.push(generate_pip_authenticate()); + } + steps + } + + fn agent_env_vars(&self) -> Vec<(String, String)> { + let mut vars = Vec::new(); + if let Some(feed_url) = self.config.feed_url() { + vars.push(("PIP_INDEX_URL".to_string(), feed_url.to_string())); + vars.push(("UV_DEFAULT_INDEX".to_string(), feed_url.to_string())); + } + vars + } + + fn validate(&self, ctx: &CompileContext) -> Result> { + let mut warnings = Vec::new(); + + // Warn if bash is disabled + let is_bash_disabled = ctx + .front_matter + .tools + .as_ref() + .and_then(|t| t.bash.as_ref()) + .is_some_and(|cmds| cmds.is_empty()); + + if is_bash_disabled { + warnings.push(format!( + "Agent '{}' has runtimes.python enabled but tools.bash is empty. \ + Python requires bash access (python, pip, uv commands).", + ctx.agent_name + )); + } + + // Mutual exclusivity: config + feed-url (check before individual field warnings) + if self.config.config().is_some() && self.config.feed_url().is_some() { + anyhow::bail!( + "runtimes.python: 'config' and 'feed-url' are mutually exclusive. \ + Use one or the other." + ); + } + + // Warn if config: is set — accepted but not yet functional inside AWF + if self.config.config().is_some() { + warnings.push( + "runtimes.python.config is accepted but the config file will not be \ + available inside the AWF agent environment yet. Config file passthrough \ + requires AWF proxy-auth support (gh-aw-firewall#2547)." + .to_string(), + ); + } + + // Validate feed URL + if let Some(feed_url) = self.config.feed_url() { + validate::validate_feed_url(feed_url, "runtimes.python.feed-url")?; + } + + // Validate version string + if let Some(version) = self.config.version() { + validate::reject_pipeline_injection(version, "runtimes.python.version")?; + } + + Ok(warnings) + } +} diff --git a/src/runtimes/python/mod.rs b/src/runtimes/python/mod.rs new file mode 100644 index 00000000..70ab88da --- /dev/null +++ b/src/runtimes/python/mod.rs @@ -0,0 +1,138 @@ +//! Python runtime support for the ado-aw compiler. +//! +//! When enabled via `runtimes: python:`, the compiler auto-installs a specific +//! Python version via `UsePythonVersion@0`, emits `PipAuthenticate@1` for +//! internal feed access, adds Python ecosystem domains to the AWF network +//! allowlist, extends the bash command allow-list, and optionally injects +//! feed URL env vars for `pip` and `uv`. +//! +//! No AWF mounts or PATH prepends are needed because `UsePythonVersion@0` +//! installs to `/opt/hostedtoolcache` (already mounted read-only by AWF) +//! and publishes `##vso[task.prependpath]` entries that AWF merges via +//! `$GITHUB_PATH`. + +pub mod extension; + +pub use extension::PythonExtension; + +use ado_aw_derive::SanitizeConfig; +use serde::Deserialize; + +use crate::sanitize::SanitizeConfig as SanitizeConfigTrait; + +/// Python runtime configuration — accepts both `true` and object formats. +/// +/// Examples: +/// ```yaml +/// # Simple enablement (installs default Python 3.x) +/// runtimes: +/// python: true +/// +/// # With options (pin version, configure feed) +/// runtimes: +/// python: +/// version: "3.12" +/// feed-url: "https://pkgs.dev.azure.com/myorg/_packaging/myfeed/pypi/simple/" +/// ``` +#[derive(Debug, Deserialize, Clone)] +#[serde(untagged)] +pub enum PythonRuntimeConfig { + /// Simple boolean enablement + Enabled(bool), + /// Full configuration with options + WithOptions(PythonOptions), +} + +impl PythonRuntimeConfig { + /// Whether Python is enabled. + pub fn is_enabled(&self) -> bool { + match self { + PythonRuntimeConfig::Enabled(enabled) => *enabled, + PythonRuntimeConfig::WithOptions(_) => true, + } + } + + /// Get the Python version (None = use ADO default, typically latest 3.x). + pub fn version(&self) -> Option<&str> { + match self { + PythonRuntimeConfig::Enabled(_) => None, + PythonRuntimeConfig::WithOptions(opts) => opts.version.as_deref(), + } + } + + /// Get the feed URL for pip/uv (None = use public PyPI). + pub fn feed_url(&self) -> Option<&str> { + match self { + PythonRuntimeConfig::Enabled(_) => None, + PythonRuntimeConfig::WithOptions(opts) => opts.feed_url.as_deref(), + } + } + + /// Get the config file path (None = not set). + pub fn config(&self) -> Option<&str> { + match self { + PythonRuntimeConfig::Enabled(_) => None, + PythonRuntimeConfig::WithOptions(opts) => opts.config.as_deref(), + } + } +} + +impl SanitizeConfigTrait for PythonRuntimeConfig { + fn sanitize_config_fields(&mut self) { + match self { + PythonRuntimeConfig::Enabled(_) => {} + PythonRuntimeConfig::WithOptions(opts) => opts.sanitize_config_fields(), + } + } +} + +/// Python runtime options. +#[derive(Debug, Deserialize, Clone, Default, SanitizeConfig)] +pub struct PythonOptions { + /// Python version to install (e.g., "3.12", "3.11"). + /// Passed to `UsePythonVersion@0` `versionSpec`. + /// Defaults to latest 3.x if not specified. + #[serde(default)] + pub version: Option, + + /// Internal package feed URL. When set, the compiler injects + /// `PIP_INDEX_URL` and `UV_DEFAULT_INDEX` env vars into the agent + /// environment so pip/uv use this feed without config file changes. + #[serde(default, rename = "feed-url")] + pub feed_url: Option, + + /// Path to a pip/uv config file. Currently recognized but not yet + /// supported — specifying this field produces a compile error. + /// Reserved for future proxy-auth integration (gh-aw-firewall#2547). + #[serde(default)] + pub config: Option, +} + +/// Bash commands that the Python runtime adds to the allow-list. +pub const PYTHON_BASH_COMMANDS: &[&str] = &["python", "python3", "pip", "pip3", "uv"]; + +/// Generate the `UsePythonVersion@0` pipeline step. +pub fn generate_python_install(config: &PythonRuntimeConfig) -> String { + let version = config.version().unwrap_or("3.x"); + format!( + "\ +- task: UsePythonVersion@0 + inputs: + versionSpec: '{version}' + displayName: 'Install Python {version}'" + ) +} + +/// Generate the `PipAuthenticate@1` pipeline step. +/// +/// Emitted when `feed-url:` is set, authenticating the ADO build service +/// identity for internal package feeds. This runs before AWF, setting up +/// credentials via `##vso[task.setvariable]`. +pub fn generate_pip_authenticate() -> String { + "\ +- task: PipAuthenticate@1 + inputs: + artifactFeeds: '' + displayName: 'Authenticate pip (build service identity)'" + .to_string() +} diff --git a/src/validate.rs b/src/validate.rs index af73d126..97bfb666 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -408,6 +408,40 @@ pub fn warn_potential_secrets(mcp_name: &str, env: &HashMap, hea warnings } +// ── Feed URL validation ───────────────────────────────────────────────────── + +/// Validate a package feed URL for use in runtime `feed-url:` fields. +/// +/// Checks for: +/// - ADO expression injection (`$(`, `${{`, `$[`) +/// - Pipeline command injection (`##vso[`, `##[`) +/// - Template marker injection (`{{`) +/// - Newline injection +/// - Quote characters (`"`, `'`) — would break YAML or bash quoting +/// - Missing scheme (must be `https://` or `http://`) +pub fn validate_feed_url(url: &str, field_name: &str) -> Result<()> { + reject_pipeline_injection(url, field_name)?; + + if url.contains('"') || url.contains('\'') { + anyhow::bail!( + "Front matter '{}' contains a quote character which would produce \ + malformed YAML or bash syntax. Remove quotes from the URL. Found: '{}'", + field_name, + url, + ); + } + + if !url.starts_with("https://") && !url.starts_with("http://") { + anyhow::bail!( + "Front matter '{}' must use https:// or http:// scheme. Found: '{}'", + field_name, + url, + ); + } + + Ok(()) +} + #[cfg(test)] mod tests { use super::*; @@ -643,4 +677,36 @@ mod tests { let warnings = warn_potential_secrets("mcp", &empty_env, &HashMap::new()); assert!(warnings.is_empty()); } + + // ── Feed URL validation ──────────────────────────────────────────── + + #[test] + fn test_validate_feed_url_valid() { + assert!(validate_feed_url("https://pkgs.dev.azure.com/org/_packaging/feed/pypi/simple/", "test").is_ok()); + assert!(validate_feed_url("http://internal.registry.example.com/", "test").is_ok()); + } + + #[test] + fn test_validate_feed_url_missing_scheme() { + assert!(validate_feed_url("pkgs.dev.azure.com/org/feed", "test").is_err()); + assert!(validate_feed_url("ftp://example.com/feed", "test").is_err()); + } + + #[test] + fn test_validate_feed_url_injection() { + assert!(validate_feed_url("https://example.com/$(SECRET)", "test").is_err()); + assert!(validate_feed_url("https://example.com/##vso[task.setvariable]", "test").is_err()); + assert!(validate_feed_url("https://example.com/{{ marker }}", "test").is_err()); + assert!(validate_feed_url("https://example.com/\ninjected", "test").is_err()); + } + + #[test] + fn test_validate_feed_url_rejects_double_quote() { + assert!(validate_feed_url("https://example.com/feed\"name", "test").is_err()); + } + + #[test] + fn test_validate_feed_url_rejects_single_quote() { + assert!(validate_feed_url("https://example.com/feed'name", "test").is_err()); + } }