From 2ac4560c5ad084b1858c14c01a677e4e2119e71f Mon Sep 17 00:00:00 2001 From: James Devine Date: Fri, 8 May 2026 21:53:13 +0100 Subject: [PATCH 1/8] feat(compile): autorewrite front matter via versioned migration chain Introduces a Django/Rails-style schema-version field on FrontMatter and a registered migration framework so breaking grammar changes auto-migrate existing user sources during compile rather than hard-failing. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- AGENTS.md | 7 + docs/extending.md | 3 + docs/front-matter.md | 1 + docs/migrations.md | 481 ++++++++++++++++++ src/compile/common.rs | 368 +++++++++++++- src/compile/migration_integration_test.rs | 231 +++++++++ src/compile/migrations/helpers.rs | 198 ++++++++ src/compile/migrations/mod.rs | 567 ++++++++++++++++++++++ src/compile/mod.rs | 208 +++++++- src/compile/types.rs | 46 ++ src/main.rs | 20 +- tests/migration_tests.rs | 382 +++++++++++++++ 12 files changed, 2485 insertions(+), 27 deletions(-) create mode 100644 docs/migrations.md create mode 100644 src/compile/migration_integration_test.rs create mode 100644 src/compile/migrations/helpers.rs create mode 100644 src/compile/migrations/mod.rs create mode 100644 tests/migration_tests.rs diff --git a/AGENTS.md b/AGENTS.md index 02de6d1f..3070a06b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -61,6 +61,10 @@ Every compiled pipeline runs as three sequential jobs: │ │ │ ├── safe_outputs.rs # Always-on SafeOutputs MCP extension │ │ │ ├── trigger_filters.rs # Trigger filter extension (gate evaluator delivery) │ │ │ └── tests.rs # Extension integration tests +│ │ ├── migrations/ # Front-matter migrations (one file per version step) +│ │ │ ├── mod.rs # Migration struct, MIGRATIONS registry, runner, CURRENT_SCHEMA_VERSION +│ │ │ └── helpers.rs # take_key, insert_no_overwrite, rename_key, ConflictPolicy +│ │ ├── migration_integration_test.rs # White-box rewrite-path tests (stub registry injection) │ │ └── types.rs # Front matter grammar and types │ ├── init.rs # Repository initialization for AI-first authoring │ ├── execute.rs # Stage 3 safe output execution @@ -192,6 +196,9 @@ index to jump to the right page. - [`docs/filter-ir.md`](docs/filter-ir.md) — filter expression IR specification: `Fact`/`Predicate` types, three-pass compilation (lower → validate → codegen), gate step generation, adding new filter types. +- [`docs/migrations.md`](docs/migrations.md) — front-matter migration + framework: schema-version field, automatic source rewrite on + breaking-change updates, contributor workflow for adding migrations. - [`docs/local-development.md`](docs/local-development.md) — local development setup notes. diff --git a/docs/extending.md b/docs/extending.md index f4a235f2..bed8e450 100644 --- a/docs/extending.md +++ b/docs/extending.md @@ -9,6 +9,9 @@ When extending the compiler: 1. **New CLI commands**: Add variants to the `Commands` enum in `main.rs` 2. **New compile targets**: Implement the `Compiler` trait in a new file under `src/compile/` 3. **New front matter fields**: Add fields to `FrontMatter` in `src/compile/types.rs` + - **Breaking changes** (renames, removals, type changes, added required fields) + require adding a migration under `src/compile/migrations/` in the same PR. + See [`docs/migrations.md`](migrations.md). 4. **New template markers**: Handle replacements in the target-specific compiler (e.g., `standalone.rs` or `onees.rs`) 5. **New safe-output tools**: Add to `src/safeoutputs/` — implement `ToolResult`, `Executor`, register in `mod.rs`, `mcp.rs`, `execute.rs` 6. **New first-class tools**: Create `src/tools//` with `mod.rs` and `extension.rs` (CompilerExtension impl). Add `execute.rs` if the tool has Stage 3 runtime logic. Extend `ToolsConfig` in `types.rs`, add collection in `collect_extensions()` diff --git a/docs/front-matter.md b/docs/front-matter.md index cdcf9eb0..b1a383e3 100644 --- a/docs/front-matter.md +++ b/docs/front-matter.md @@ -8,6 +8,7 @@ The compiler expects markdown files with YAML front matter similar to gh-aw: ```markdown --- +schema-version: 1 # Optional: front-matter schema version (defaults to 1). The compiler bumps this in place when migrations apply during compilation. See docs/migrations.md. name: "name for this agent" description: "One line description for this agent" target: standalone # Optional: "standalone" (default) or "1es". See docs/targets.md. diff --git a/docs/migrations.md b/docs/migrations.md new file mode 100644 index 00000000..4045cdaa --- /dev/null +++ b/docs/migrations.md @@ -0,0 +1,481 @@ +# Front-matter Migrations + +_Part of the [ado-aw documentation](../AGENTS.md)._ + +The `ado-aw` compiler maintains a versioned schema for the front-matter +grammar. When a breaking change is introduced (a renamed field, a removed +field, a type change), the compiler ships a **migration** that rewrites +older sources to the new shape automatically. Users see a warning during +compilation; their source files are updated in place; the build moves on. + +This page is the reference for both users (what migrations mean for me) +and contributors (how to add one). + +## How it works + +### `schema-version` field + +Every front-matter file carries an optional `schema-version: ` +field. When the field is missing, it defaults to `1`. The compiler bumps +this in place after running migrations. + +```yaml +--- +schema-version: 1 +name: my-agent +description: my agent +--- +``` + +End users typically don't write this field by hand. The compiler keeps +it accurate when migrations apply. + +### The compile flow + +1. `ado-aw compile` reads the source `.md` and parses the front matter + as an **untyped** `serde_yaml::Mapping`. This step never trips on + removed/renamed fields. +2. The migration runner walks the registered chain from the source's + `schema-version` up to the compiler's current version, applying each + migration's transformation in order. +3. The compiler runs all the usual validation and codegen against the + migrated, typed `FrontMatter`. +4. **Only on a fully successful compile**, the source `.md` is + atomically rewritten with the migrated front matter and a warning + prints to stderr. A failed compile leaves the source untouched. +5. The `.lock.yml` is written atomically last. + +### What gets preserved on rewrite + +- **Body markdown** is preserved byte-for-byte (everything after the + closing `---`). +- **Front-matter key order** is preserved by `serde_yaml`'s + insertion-ordered mapping. +- **Front-matter comments** are NOT preserved. `serde_yaml` round-trip + drops them. The warning emitted on rewrite calls this out so it isn't + a surprise. If you have important context in a front-matter comment, + move it into the markdown body before running compile. +- **Quote and scalar styles** in YAML may be normalized. This is + cosmetic. + +### Atomicity and lost-update protection + +The rewrite uses `tempfile + rename` for atomicity (no torn writes). +Before the rename, the runner re-reads the source and compares its +SHA-256 to the snapshot taken at parse time. If the file changed +between parse and rewrite, the runner aborts with a clear error +("source file ... changed during compilation; refusing to migrate") +rather than clobbering whoever wrote the file. + +### `check` command behavior + +`ado-aw check` exits non-zero when migrations are pending — there is no +opt-in flag and no warning-only mode. Rationale: compiled pipelines +download the **same** `ado-aw` version that produced them +(`src/data/base.yml`, `src/data/1es-base.yml`), so the in-pipeline +integrity check is internally consistent by construction. The only time +`check` sees a pending migration is when a developer runs a newer +`ado-aw` locally against an older source — exactly when we want to +fail loudly. The fix is `ado-aw compile`, which migrates the source in +place. + +### `execute` command behavior + +The Stage 3 executor runs migrations in memory only. It never rewrites +the source (the executor's working tree is not the source-of-truth +tree). When a migration applies, it logs a warning and continues. + +## Adding a migration + +You need a migration whenever you introduce a breaking change to the +front-matter grammar: + +- Renaming a field +- Removing a field +- Changing a field's type or shape +- Adding a required field that didn't exist before +- Changing the meaning of an existing field + +Non-breaking changes (adding an optional field, accepting a new +variant) do **not** need a migration. + +### File layout + +Migrations live in `src/compile/migrations/`: + +``` +src/compile/migrations/ +├── mod.rs # Framework + MIGRATIONS registry +├── helpers.rs # take_key, insert_no_overwrite, rename_key, ConflictPolicy +├── 0001_engine_id_split.rs +├── 0002_permissions_field.rs +└── 0003_safeoutput_renames.rs +``` + +The filename prefix is the zero-padded `from_version`. Files sort +naturally in the directory listing in chain order. + +### Anatomy of a migration + +```rust +// src/compile/migrations/0001_engine_id_split.rs + +use anyhow::Result; +use serde_yaml::Mapping; + +use super::{ConflictPolicy, Migration, MigrationContext}; + +pub static MIGRATION: Migration = Migration { + from_version: 1, + to_version: 2, + id: "engine_id_split", + summary: "engine: -> engine: { id: copilot, model: }", + introduced_in: "0.27.0", + apply: apply_migration, +}; + +fn apply_migration(fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { + // ... your transformation ... + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn upgrades_engine_string_to_object() { + // before / after fixture pair + } + + #[test] + fn already_current_shape_is_preserved() { + // defensive: every from_version=1 migration runs on every + // unstamped source in the wild, including ones that were + // authored *yesterday* against the current shape. + } +} +``` + +### Registry append + +Two edits in `src/compile/migrations/mod.rs`: + +```rust +mod m0001_engine_id_split; // <-- add module declaration + +pub static MIGRATIONS: &[&'static Migration] = &[ + &m0001_engine_id_split::MIGRATION, // <-- append at the end +]; +``` + +`CURRENT_SCHEMA_VERSION` is computed automatically from the registry +length. Tests in `mod.rs` enforce that the registry is contiguous, that +migration ids are unique, and that filenames match `from_version`. A +malformed registry fails fast at runtime via `ensure!()`. + +### Use the helpers + +Migrations should prefer `helpers::*` over raw `Mapping` manipulation: + +- `take_key(map, "old")` — remove and return. +- `insert_no_overwrite(map, "new", value)` — error on conflict. +- `rename_key(map, "old", "new", ConflictPolicy::Error)` — default + policy is **error**, never silent overwrite. + +Silent overwrite is almost always a bug when transforming user data. + +### Defensive predicates for `from_version: 1` + +Because the `schema-version` field doesn't exist in any source written +before this framework shipped, every existing file looks like v1 — +including files that were authored yesterday against the current +shape. Migrations from `from_version: 1` must be **shape-detecting and +conflict-aware**: + +- If both old and new keys are present → error rather than silent + overwrite (default `ConflictPolicy::Error`). +- If the old key has the new shape already → no-op (don't migrate). + +Once we ship v2, this concern goes away for v2 → v3 etc. — v2 sources +have an explicit `schema-version: 2` stamp, so the framework only runs +migrations against actual v2 documents. + +### Concurrent PRs + +If two PRs each add a migration `N → N+1`, the second one to merge +must rebase. The rebase is mechanical: + +1. Renumber the file prefix: `0003_…rs` → `0004_…rs`. +2. Bump `from_version` and `to_version` in the static. +3. Update any per-migration test fixtures that stamped the old + version. +4. Re-append your migration to `MIGRATIONS` after the freshly merged + one. + +The contiguity tests fail loudly if a rebase leaves a gap, so this is +hard to get wrong silently. + +### What if my change can't be expressed as a `Mapping` rewrite? + +The migration `apply` function receives only the front-matter mapping. +It cannot inspect the markdown body, the file path, the lock file, or +git state. If your change requires that information, do not write a +migration that guesses. Instead, add a migration that **errors** with +an actionable "manual migration required: " message so +the user knows exactly what to fix. + +## Worked example: `engine_id_split` + +This is the migration that would have caught the `0.17.0` breaking +change (`engine: ` → `engine: { id: copilot, model: }`). +It's a complete file you could drop into `src/compile/migrations/` +and register today. Read it as a template for your own migration. + +```rust +// src/compile/migrations/0001_engine_id_split.rs + +//! engine: -> engine: { id: copilot, model: } +//! +//! Before 0.17.0 the `engine` field was a model name (e.g. +//! `engine: claude-opus-4.5`). The grammar changed to use engine +//! identifiers (`engine: copilot`), with the model nested in an object +//! form (`engine: { id: copilot, model: }`). +//! +//! This migration detects the old shape and rewrites it. + +use anyhow::{bail, Result}; +use serde_yaml::{Mapping, Value}; + +use super::{Migration, MigrationContext}; + +pub static MIGRATION: Migration = Migration { + from_version: 1, + to_version: 2, + id: "engine_id_split", + summary: "engine: -> engine: { id: copilot, model: }", + introduced_in: "0.27.0", + apply: apply_migration, +}; + +/// Engine identifiers that are valid as the simple-form string. When +/// `engine` is a string equal to one of these, the source is already +/// using the current grammar and we leave it alone. +const KNOWN_ENGINE_IDS: &[&str] = &["copilot"]; + +fn apply_migration(fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { + let key = Value::String("engine".to_string()); + + // No `engine` field at all — the front matter relies on the + // default. Default is `copilot` in both old and new grammars, so + // there's nothing to migrate. + let Some(engine) = fm.get(&key) else { + return Ok(()); + }; + + match engine { + // Already-object form. Either authored against the new + // grammar from day one, or migrated by an earlier run. No-op. + Value::Mapping(_) => Ok(()), + + // String form. Two cases: + // - The string is a known engine identifier (`copilot`): + // the source is already using the current simple form. + // No-op. + // - Anything else: the string is a *model name* from the + // old grammar. Wrap it in the object form. + Value::String(s) => { + if KNOWN_ENGINE_IDS.contains(&s.as_str()) { + return Ok(()); + } + let model = s.clone(); + let mut object = Mapping::new(); + object.insert( + Value::String("id".to_string()), + Value::String("copilot".to_string()), + ); + object.insert( + Value::String("model".to_string()), + Value::String(model), + ); + fm.insert(key, Value::Mapping(object)); + Ok(()) + } + + // Unexpected shape (number, bool, sequence, …). Refuse rather + // than guess — the user needs to fix this by hand. + other => bail!( + "engine field has unexpected shape (expected string or mapping, \ + got {}); manual migration required", + describe(other) + ), + } +} + +fn describe(v: &Value) -> &'static str { + match v { + Value::Null => "null", + Value::Bool(_) => "bool", + Value::Number(_) => "number", + Value::String(_) => "string", + Value::Sequence(_) => "sequence", + Value::Mapping(_) => "mapping", + Value::Tagged(_) => "tagged", + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn run(input: &str) -> Mapping { + let mut m: Mapping = serde_yaml::from_str(input).unwrap(); + apply_migration(&mut m, &MigrationContext {}).expect("apply"); + m + } + + fn run_err(input: &str) -> String { + let mut m: Mapping = serde_yaml::from_str(input).unwrap(); + format!( + "{}", + apply_migration(&mut m, &MigrationContext {}).unwrap_err() + ) + } + + #[test] + fn rewrites_legacy_model_string_into_object_form() { + let after = run("name: x\nengine: claude-opus-4.5\n"); + let engine = after + .get(Value::String("engine".into())) + .unwrap() + .as_mapping() + .expect("engine should now be a mapping"); + assert_eq!( + engine.get(Value::String("id".into())), + Some(&Value::String("copilot".into())) + ); + assert_eq!( + engine.get(Value::String("model".into())), + Some(&Value::String("claude-opus-4.5".into())) + ); + } + + #[test] + fn already_current_simple_form_is_noop() { + // Defensive: every from_version=1 migration must be safe on + // sources that look like they're already current. Here the + // user explicitly wrote the new simple form `engine: copilot`. + let after = run("name: x\nengine: copilot\n"); + assert_eq!( + after.get(Value::String("engine".into())), + Some(&Value::String("copilot".into())), + "string `copilot` should be left untouched" + ); + } + + #[test] + fn already_object_form_is_noop() { + let input = "name: x\nengine:\n id: copilot\n model: claude-opus-4.7\n"; + let mut original: Mapping = serde_yaml::from_str(input).unwrap(); + let after = run(input); + assert_eq!( + after.get(Value::String("engine".into())), + original + .remove(Value::String("engine".into())) + .as_ref() + ); + } + + #[test] + fn missing_engine_field_is_noop() { + let after = run("name: x\ndescription: y\n"); + assert!(!after.contains_key(Value::String("engine".into()))); + } + + #[test] + fn unexpected_engine_shape_is_rejected() { + let err = run_err("name: x\nengine: 42\n"); + assert!( + err.contains("manual migration required"), + "expected actionable error, got: {}", + err + ); + } +} +``` + +### What this example illustrates + +1. **The data model does the work.** `Migration` is a static — + every field is set once at the top of the file. The runner reads + `from_version`/`to_version` to walk the chain. `apply` is a plain + `fn` pointer, so the registry is `&'static [&'static Migration]` + — zero allocation, zero indirection beyond the function call. + +2. **`Value` pattern-matching beats `if let` ladders.** The + `match engine { Mapping(_) => …, String(s) => …, other => … }` + shape is the natural way to express "I support these shapes; + reject others." It catches numeric/boolean/null engines that + someone might have written by mistake. + +3. **Defensive predicates for `from_version: 1`.** Three of the five + tests exercise no-op cases (already-object, already-simple + `copilot`, missing field). These matter because **every existing + `.md` file in the wild looks like v1** — they don't carry + `schema-version`. The migration runs on all of them, so it has + to be safe on already-current shapes. Once we ship v2 the + defensiveness requirement drops for v2 → v3 migrations: v2 + sources have an explicit stamp. + +4. **Hard error on unexpected shapes.** The `other => bail!(…)` + arm is the "manual migration required" escape hatch. We don't + try to guess what `engine: 42` means — we surface a clear error + so the user fixes it by hand. + +5. **Registering it is two lines** in `src/compile/migrations/mod.rs`: + + ```rust + mod m0001_engine_id_split; + + pub static MIGRATIONS: &[&'static Migration] = &[ + &m0001_engine_id_split::MIGRATION, + ]; + ``` + + `CURRENT_SCHEMA_VERSION` automatically becomes 2. The + registry-contiguity test, the filename-prefix test, and the + id-uniqueness test all keep passing. + +## Tests + +The migration framework is covered by three layers of tests: + +- **Unit tests** in `src/compile/migrations/{mod.rs,helpers.rs}` + cover registry contiguity, helper edge cases, and individual + migration apply functions. +- **White-box integration tests** in + `src/compile/migration_integration_test.rs` exercise the rewrite + path end-to-end (parse → migrate → compile → atomic source rewrite + → lock-file write) using a stub migration registry injected via the + crate-private `compile_pipeline_with_registry` and + `parse_markdown_detailed_with_registry` hooks. They live inside + `src/` because the production registry is empty and integration + tests in `tests/` cannot link against crate internals. +- **Black-box CLI tests** in `tests/migration_tests.rs` spawn the + compiled `ado-aw` binary as a subprocess and assert on the + user-visible behavior of `compile` and `check` for sources with + various `schema-version` shapes (future versions, non-integer values, + healthy round-trip, etc.). + +When you add a real migration, ship the per-migration before/after +fixtures alongside it in the migration's own file +(`src/compile/migrations/_.rs`). + +## See also + +- [`docs/front-matter.md`](front-matter.md) — the full front-matter + grammar (and where the `schema-version` field is documented for end + users). +- [`docs/extending.md`](extending.md) — broader guidance for adding + features to the compiler, including the requirement to add a + migration alongside any breaking front-matter change. diff --git a/src/compile/common.rs b/src/compile/common.rs index dfee1b57..c0008d95 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -12,27 +12,216 @@ use crate::allowed_hosts::{CORE_ALLOWED_HOSTS, mcp_required_hosts}; use crate::ecosystem_domains::{get_ecosystem_domains, is_ecosystem_identifier, is_known_ecosystem}; use crate::validate; -/// Parse the markdown file and extract front matter and body -pub fn parse_markdown(content: &str) -> Result<(FrontMatter, String)> { - let content = content.trim(); +/// Atomically write `contents` to `path`. +/// +/// Uses [`tempfile::NamedTempFile`] in the destination's parent +/// directory so the final `persist` is a same-filesystem rename. This +/// guarantees readers either see the old file or the new file in full — +/// never a half-written state. +/// +/// Behavior: +/// +/// - Creates the tempfile in `path.parent()` (falls back to `.` when +/// the parent is empty, matching `tokio::fs::write` semantics). +/// - On Unix, preserves the existing file's mode if the target exists. +/// Otherwise the tempfile keeps its default mode (0o600 from +/// `tempfile`'s implementation). +/// - When the destination is a symlink, the rename replaces the +/// symlink with a regular file (matches `tokio::fs::write`; the +/// symlink target is *not* followed). +pub async fn atomic_write(path: &Path, contents: &str) -> Result<()> { + let path = path.to_path_buf(); + let owned_contents = contents.to_string(); + // tempfile is sync; do the whole thing on a blocking task so we + // don't block the async runtime on large writes / fsync. + tokio::task::spawn_blocking(move || atomic_write_blocking(&path, &owned_contents)) + .await + .context("atomic_write task panicked")? +} + +fn atomic_write_blocking(path: &Path, contents: &str) -> Result<()> { + use std::io::Write; + + let parent = path.parent().filter(|p| !p.as_os_str().is_empty()); + let mut tmp = match parent { + Some(dir) => tempfile::NamedTempFile::new_in(dir).with_context(|| { + format!( + "failed to create temporary file in {}", + dir.display() + ) + })?, + None => tempfile::NamedTempFile::new() + .context("failed to create temporary file in current directory")?, + }; - if !content.starts_with("---") { + tmp.write_all(contents.as_bytes()) + .with_context(|| format!("failed to write temporary file for {}", path.display()))?; + tmp.as_file() + .sync_all() + .with_context(|| format!("failed to fsync temporary file for {}", path.display()))?; + + // On Unix, copy the existing file's mode onto the tempfile so + // permissions are preserved across the atomic rename. + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + if let Ok(meta) = std::fs::metadata(path) { + let mode = meta.permissions().mode(); + std::fs::set_permissions( + tmp.path(), + std::fs::Permissions::from_mode(mode), + ) + .with_context(|| { + format!( + "failed to copy permissions from {} to temp file", + path.display() + ) + })?; + } + } + + tmp.persist(path) + .with_context(|| format!("failed to atomically rename into {}", path.display()))?; + Ok(()) +} + +/// Detailed parse result. Holds enough information to rewrite the +/// source on disk byte-faithfully when migrations apply. +/// +/// See [`parse_markdown_detailed`]. +#[derive(Debug)] +pub struct ParsedSource { + /// Typed front matter, after migrations have been applied to the + /// underlying mapping. + pub front_matter: FrontMatter, + /// Body for compilation, with leading/trailing whitespace trimmed + /// (matches the legacy `parse_markdown` second tuple element). + pub markdown_body: String, + /// Migration outcome. + pub migrations: super::migrations::MigrationReport, + /// The migrated front-matter mapping, with `schema-version` + /// bumped. Used to reconstruct the source for rewrite. + pub front_matter_mapping: serde_yaml::Mapping, + /// The body region exactly as it appeared after the closing `---`, + /// byte-for-byte (no trim). Includes any leading newline. + pub body_raw: String, + /// SHA-256 of the original source bytes (lost-update protection). + pub source_sha256: [u8; 32], +} + +/// Parse the markdown file, run the migration chain on the front matter +/// in memory, and return both the typed `FrontMatter` and the raw +/// fragments needed to rewrite the source on disk byte-faithfully. +/// +/// Use this from callers that may rewrite the source (the `compile` +/// command). Callers that only want the typed view of the front matter +/// should use the backward-compatible [`parse_markdown`] wrapper. +pub fn parse_markdown_detailed(content: &str) -> Result { + parse_markdown_detailed_with_registry(content, super::migrations::MIGRATIONS) +} + +/// Variant of [`parse_markdown_detailed`] that allows injecting an +/// explicit migration registry. Used by tests; production callers go +/// through the no-arg version that reads the global +/// [`super::migrations::MIGRATIONS`]. +pub(crate) fn parse_markdown_detailed_with_registry( + content: &str, + registry: &[&'static super::migrations::Migration], +) -> Result { + use sha2::Digest; + + // Lost-update protection: hash the raw input as it was provided, so + // a rewrite path can later re-read the file and compare. + let mut hasher = sha2::Sha256::new(); + hasher.update(content.as_bytes()); + let source_sha256: [u8; 32] = hasher.finalize().into(); + + // Allow leading whitespace before the opening fence (preserves + // historical leniency). We compute a byte offset into `content` so + // that `body_raw` extraction is purely byte-faithful. + let leading_ws = content.bytes().take_while(|b| b.is_ascii_whitespace()).count(); + let after_lead = &content[leading_ws..]; + if !after_lead.starts_with("---") { anyhow::bail!("Markdown file must start with YAML front matter (---)"); } - // Find the closing --- - let rest = &content[3..]; - let end_idx = rest + let after_open = &after_lead[3..]; + let end_idx = after_open .find("\n---") .context("Could not find closing --- for front matter")?; - let yaml_content = &rest[..end_idx]; - let markdown_body = rest[end_idx + 4..].trim(); + let yaml_str = &after_open[..end_idx]; + let body_raw_slice = &after_open[end_idx + 4..]; + let body_raw = body_raw_slice.to_string(); + let markdown_body = body_raw_slice.trim().to_string(); + // Stage 1: parse to untyped Value, reject non-mapping at top level. + let parsed_value: serde_yaml::Value = + serde_yaml::from_str(yaml_str).context("Failed to parse YAML front matter")?; + let mut mapping = match parsed_value { + serde_yaml::Value::Mapping(m) => m, + other => { + anyhow::bail!( + "YAML front matter must be a mapping/object, got {}", + yaml_value_kind(&other) + ); + } + }; + + // Stage 2: run the migration chain on the untyped mapping. + let report = super::migrations::migrate_front_matter_with(&mut mapping, registry) + .context("Failed to migrate front matter")?; + + // Stage 3: deserialize the (possibly migrated) mapping into the + // typed FrontMatter. Errors here mean either the user wrote an + // unsupported shape or a migration produced invalid output. let front_matter: FrontMatter = - serde_yaml::from_str(yaml_content).context("Failed to parse YAML front matter")?; + serde_yaml::from_value(serde_yaml::Value::Mapping(mapping.clone())) + .context("Failed to parse YAML front matter")?; + + Ok(ParsedSource { + front_matter, + markdown_body, + migrations: report, + front_matter_mapping: mapping, + body_raw, + source_sha256, + }) +} - Ok((front_matter, markdown_body.to_string())) +/// Reconstruct full source content from a migrated [`ParsedSource`]. +/// +/// Output shape: +/// - `---\n` +/// - the migrated YAML mapping (`serde_yaml::to_string` always ends +/// with `\n`) +/// - `---` +/// - the original body region byte-for-byte (`body_raw`) +pub fn reconstruct_source(parsed: &ParsedSource) -> Result { + let yaml_serialized = serde_yaml::to_string(&parsed.front_matter_mapping) + .context("Failed to serialize migrated front matter")?; + Ok(format!("---\n{}---{}", yaml_serialized, parsed.body_raw)) +} + +fn yaml_value_kind(v: &serde_yaml::Value) -> &'static str { + match v { + serde_yaml::Value::Null => "null", + serde_yaml::Value::Bool(_) => "bool", + serde_yaml::Value::Number(_) => "number", + serde_yaml::Value::String(_) => "string", + serde_yaml::Value::Sequence(_) => "sequence", + serde_yaml::Value::Mapping(_) => "mapping", + serde_yaml::Value::Tagged(_) => "tagged", + } +} + +/// Backward-compatible parse: returns the typed front matter and the +/// trimmed body. New callers that may rewrite the source on disk +/// should use [`parse_markdown_detailed`] instead. +#[allow(dead_code)] +pub fn parse_markdown(content: &str) -> Result<(FrontMatter, String)> { + let parsed = parse_markdown_detailed(content)?; + Ok((parsed.front_matter, parsed.markdown_body)) } /// Replace a placeholder in the template, preserving the indentation for multi-line content. @@ -2293,6 +2482,163 @@ mod tests { fm } + // ─── atomic_write ───────────────────────────────────────────────────────── + + #[tokio::test] + async fn atomic_write_creates_new_file() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("a.txt"); + atomic_write(&path, "hello\n").await.unwrap(); + let read = tokio::fs::read_to_string(&path).await.unwrap(); + assert_eq!(read, "hello\n"); + } + + #[tokio::test] + async fn atomic_write_overwrites_existing_file() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("a.txt"); + tokio::fs::write(&path, "old contents").await.unwrap(); + atomic_write(&path, "new contents").await.unwrap(); + let read = tokio::fs::read_to_string(&path).await.unwrap(); + assert_eq!(read, "new contents"); + } + + #[cfg(unix)] + #[tokio::test] + async fn atomic_write_preserves_unix_mode() { + use std::os::unix::fs::PermissionsExt; + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("a.txt"); + tokio::fs::write(&path, "old").await.unwrap(); + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o640)).unwrap(); + atomic_write(&path, "new").await.unwrap(); + let mode = std::fs::metadata(&path).unwrap().permissions().mode(); + assert_eq!(mode & 0o777, 0o640, "expected mode 0o640, got {:o}", mode & 0o777); + } + + #[cfg(unix)] + #[tokio::test] + async fn atomic_write_replaces_symlink_with_regular_file() { + let dir = tempfile::tempdir().unwrap(); + let target = dir.path().join("target.txt"); + let link = dir.path().join("link.txt"); + tokio::fs::write(&target, "target").await.unwrap(); + std::os::unix::fs::symlink(&target, &link).unwrap(); + + atomic_write(&link, "via-link").await.unwrap(); + + // Link is now a regular file; target is unchanged. + let link_meta = std::fs::symlink_metadata(&link).unwrap(); + assert!( + !link_meta.file_type().is_symlink(), + "symlink should have been replaced" + ); + assert_eq!(tokio::fs::read_to_string(&link).await.unwrap(), "via-link"); + assert_eq!(tokio::fs::read_to_string(&target).await.unwrap(), "target"); + } + + // ─── parse_markdown_detailed ────────────────────────────────────────────── + + #[test] + fn parse_markdown_detailed_preserves_body_byte_for_byte() { + // Case 1: trailing newline. + let original = "---\nname: x\ndescription: y\n---\nbody line\n"; + let parsed = parse_markdown_detailed(original).unwrap(); + assert_eq!(parsed.body_raw, "\nbody line\n"); + let reconstructed = reconstruct_source(&parsed).unwrap(); + // No migrations ran, so the YAML round-trip is the only + // structural change. The body region is byte-faithful. + assert!(reconstructed.ends_with("---\nbody line\n")); + + // Case 2: no trailing newline at all. + let original = "---\nname: x\ndescription: y\n---\nbody"; + let parsed = parse_markdown_detailed(original).unwrap(); + assert_eq!(parsed.body_raw, "\nbody"); + let reconstructed = reconstruct_source(&parsed).unwrap(); + assert!(reconstructed.ends_with("---\nbody")); + + // Case 3: empty body, but trailing newline. + let original = "---\nname: x\ndescription: y\n---\n"; + let parsed = parse_markdown_detailed(original).unwrap(); + assert_eq!(parsed.body_raw, "\n"); + let reconstructed = reconstruct_source(&parsed).unwrap(); + assert!(reconstructed.ends_with("---\n")); + + // Case 4: blank lines between closing fence and content are + // preserved as-is in body_raw. + let original = "---\nname: x\ndescription: y\n---\n\n\n## heading\n\nbody.\n"; + let parsed = parse_markdown_detailed(original).unwrap(); + assert_eq!(parsed.body_raw, "\n\n\n## heading\n\nbody.\n"); + } + + #[test] + fn parse_markdown_detailed_byte_faithful_when_no_migration_runs() { + // With the registry empty, parsing a v1 source and reconstructing + // it should produce a byte-identical document apart from + // serde_yaml's canonical formatting of the YAML mapping. We + // assert the body region matches exactly. + let original = "---\nname: x\ndescription: y\n---\n## body\n"; + let parsed = parse_markdown_detailed(original).unwrap(); + assert!(!parsed.migrations.changed()); + let reconstructed = reconstruct_source(&parsed).unwrap(); + // Find the closing fence in both and compare the suffix. + let orig_suffix = &original[original.find("\n---\n").unwrap()..]; + let recon_suffix = &reconstructed[reconstructed.find("\n---\n").unwrap()..]; + assert_eq!(orig_suffix, recon_suffix, "body region must be byte-identical"); + } + + #[test] + fn parse_markdown_detailed_allows_leading_whitespace() { + let original = "\n \n---\nname: x\ndescription: y\n---\nbody\n"; + let parsed = parse_markdown_detailed(original).unwrap(); + assert_eq!(parsed.front_matter.name, "x"); + } + + #[test] + fn parse_markdown_detailed_rejects_missing_open_fence() { + let original = "name: x\ndescription: y\nbody\n"; + let err = parse_markdown_detailed(original).unwrap_err(); + assert!( + format!("{}", err).contains("must start with YAML front matter"), + "got: {}", + err + ); + } + + #[test] + fn parse_markdown_detailed_rejects_non_mapping_top_level() { + let original = "---\n- a\n- b\n---\nbody\n"; + let err = parse_markdown_detailed(original).unwrap_err(); + assert!( + format!("{}", err).contains("must be a mapping"), + "got: {}", + err + ); + } + + #[test] + fn parse_markdown_detailed_rejects_missing_close_fence() { + let original = "---\nname: x\nno-fence"; + let err = parse_markdown_detailed(original).unwrap_err(); + assert!( + format!("{}", err).contains("Could not find closing"), + "got: {}", + err + ); + } + + #[test] + fn parse_markdown_detailed_records_source_hash() { + let a = "---\nname: x\ndescription: y\n---\n"; + let b = "---\nname: x\ndescription: y\n---\nextra\n"; + let pa = parse_markdown_detailed(a).unwrap(); + let pb = parse_markdown_detailed(b).unwrap(); + assert_ne!(pa.source_sha256, pb.source_sha256); + // Hashing is stable over re-parses of the same input. + let pa2 = parse_markdown_detailed(a).unwrap(); + assert_eq!(pa.source_sha256, pa2.source_sha256); + } + // ─── compute_effective_workspace ───────────────────────────────────────── #[test] diff --git a/src/compile/migration_integration_test.rs b/src/compile/migration_integration_test.rs new file mode 100644 index 00000000..7e086059 --- /dev/null +++ b/src/compile/migration_integration_test.rs @@ -0,0 +1,231 @@ +//! White-box integration tests for the front-matter migration framework. +//! +//! These tests exercise the rewrite path end-to-end (parse → migrate → +//! compile → atomic source rewrite → lock-file write) using a stub +//! migration registry. They live inside `src/` because they need +//! access to crate-private hooks (`compile_pipeline_with_registry`, +//! `perform_source_rewrite_if_needed`, +//! `common::parse_markdown_detailed_with_registry`) that the empty +//! production [`super::migrations::MIGRATIONS`] registry cannot +//! exercise. +//! +//! Black-box subprocess tests of the user-visible CLI behavior live in +//! [`tests/migration_tests.rs`](../../tests/migration_tests.rs). + +#![cfg(test)] + +use super::common; +use super::migrations::{ConflictPolicy, Migration, MigrationContext}; +use super::{compile_pipeline, compile_pipeline_with_registry, perform_source_rewrite_if_needed}; +use anyhow::Result; +use serde_yaml::Mapping; + +// ─── Stub registry ────────────────────────────────────────────────────────── + +/// Stub migration: renames the `legacy-name` top-level key to `name`. +/// Drives end-to-end rewrite tests without registering a real migration. +static TEST_RENAME_LEGACY_NAME: Migration = Migration { + from_version: 1, + to_version: 2, + id: "test_rename_legacy_name", + summary: "rename `legacy-name` -> `name`", + introduced_in: "test", + apply: rename_legacy_name, +}; + +fn rename_legacy_name(fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { + super::migrations::rename_key(fm, "legacy-name", "name", ConflictPolicy::Error)?; + Ok(()) +} + +/// Source whose typed deserialization would fail without a migration: +/// it has `legacy-name` instead of the required `name` field. +fn stale_source() -> &'static str { + "---\nlegacy-name: my-agent\ndescription: test description\n---\n## Body\nHello.\n" +} + +fn write_temp_md(name: &str, content: &str) -> (tempfile::TempDir, std::path::PathBuf) { + let dir = tempfile::tempdir().expect("tempdir"); + let path = dir.path().join(name); + std::fs::write(&path, content).expect("write temp md"); + (dir, path) +} + +// ─── End-to-end rewrite path ──────────────────────────────────────────────── + +#[tokio::test] +async fn migration_rewrites_stale_source_and_preserves_body() { + let (dir, source_path) = write_temp_md("agent.md", stale_source()); + + let registry: &[&'static Migration] = &[&TEST_RENAME_LEGACY_NAME]; + let migrated = compile_pipeline_with_registry( + &source_path.to_string_lossy(), + None, + true, // skip_integrity + false, // debug_pipeline + registry, + ) + .await + .expect("compile_pipeline_with_registry should succeed"); + + assert!(migrated, "expected compile to report a source migration"); + + // Source file rewritten: contains `name: my-agent`, no + // `legacy-name`, has `schema-version: 2`. + let rewritten = std::fs::read_to_string(&source_path).expect("read rewritten"); + assert!( + rewritten.contains("name: my-agent"), + "rewritten source missing `name: my-agent`:\n{}", + rewritten + ); + assert!( + !rewritten.contains("legacy-name"), + "rewritten source still contains legacy-name:\n{}", + rewritten + ); + assert!( + rewritten.contains("schema-version: 2"), + "rewritten source missing `schema-version: 2`:\n{}", + rewritten + ); + + // Body region byte-identical to the original (everything after + // the closing `---`). + let orig = stale_source(); + let orig_body = &orig[orig.find("\n---\n").unwrap() + 5..]; + let new_body = &rewritten[rewritten.find("\n---\n").unwrap() + 5..]; + assert_eq!(orig_body, new_body, "body region not preserved byte-for-byte"); + + // Lock file generated. + let lock = source_path.with_extension("lock.yml"); + assert!(lock.exists(), "expected {} to exist", lock.display()); + + // Re-running parse with the same stub registry produces no + // further migration — the rewrite moved the source to current. + let after = std::fs::read_to_string(&source_path).unwrap(); + let parsed_again = + common::parse_markdown_detailed_with_registry(&after, registry).unwrap(); + assert!( + !parsed_again.migrations.changed(), + "expected post-rewrite source to be at current schema-version, but \ + {} more migration(s) ran", + parsed_again.migrations.applied.len() + ); + + drop(dir); +} + +#[tokio::test] +async fn migration_skip_when_no_stub_registry_runs() { + // With the empty production registry, a healthy v1 source must + // compile without rewriting anything. + let healthy = "---\nname: x\ndescription: y\n---\nbody\n"; + let (dir, source_path) = write_temp_md("agent.md", healthy); + let registry: &[&'static Migration] = &[]; + let migrated = compile_pipeline_with_registry( + &source_path.to_string_lossy(), + None, + true, + false, + registry, + ) + .await + .expect("compile should succeed"); + assert!(!migrated, "expected no rewrite for already-current source"); + let after = std::fs::read_to_string(&source_path).unwrap(); + assert_eq!(after, healthy, "source must be byte-identical"); + drop(dir); +} + +#[tokio::test] +async fn migration_with_only_version_bump_still_writes() { + fn noop(_fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { + Ok(()) + } + static NOOP_MIG: Migration = Migration { + from_version: 1, + to_version: 2, + id: "test_noop_v1_to_v2", + summary: "no-op", + introduced_in: "test", + apply: noop, + }; + + // Even a no-op migration changes bytes on disk because the + // schema-version field is added. Verify we DO rewrite. + let healthy = "---\nname: x\ndescription: y\n---\nbody\n"; + let (dir, source_path) = write_temp_md("agent.md", healthy); + let registry: &[&'static Migration] = &[&NOOP_MIG]; + let migrated = compile_pipeline_with_registry( + &source_path.to_string_lossy(), + None, + true, + false, + registry, + ) + .await + .expect("compile should succeed"); + assert!( + migrated, + "expected rewrite because schema-version field was added" + ); + let after = std::fs::read_to_string(&source_path).unwrap(); + assert!(after.contains("schema-version: 2")); + drop(dir); +} + +// ─── Lost-update guard ────────────────────────────────────────────────────── + +#[tokio::test] +async fn perform_source_rewrite_lost_update_guard() { + // Construct a parsed-source pointing at a file whose hash will + // not match (we mutate it after computing the hash). This + // exercises the lost-update guard directly without needing a + // full compile. + let (dir, source_path) = write_temp_md("agent.md", stale_source()); + let original = std::fs::read_to_string(&source_path).unwrap(); + let registry: &[&'static Migration] = &[&TEST_RENAME_LEGACY_NAME]; + let parsed = + common::parse_markdown_detailed_with_registry(&original, registry).unwrap(); + assert!(parsed.migrations.changed()); + + // Mutate the source after parse (simulates editor / concurrent + // tool overwriting). + std::fs::write(&source_path, "different bytes\n").unwrap(); + + // Rewrite must refuse. + let result = + perform_source_rewrite_if_needed(&source_path, &original, &parsed).await; + let err = result.expect_err("expected lost-update guard to fire"); + assert!( + format!("{:#}", err).contains("changed during compilation"), + "unexpected error: {:#}", + err + ); + + // The source is left as the interloper wrote it (we did not + // clobber); it is *not* the migrated version. + let after = std::fs::read_to_string(&source_path).unwrap(); + assert_eq!(after, "different bytes\n"); + + drop(dir); +} + +// ─── Future-version safety net ────────────────────────────────────────────── + +#[tokio::test] +async fn check_pipeline_fails_on_future_schema_version() { + // The real registry is empty (CURRENT=1), so a source claiming + // schema-version: 2 is a future version. compile should reject it + // loudly through the public entry point. + let future = "---\nschema-version: 2\nname: x\ndescription: y\n---\n"; + let (dir, source_path) = write_temp_md("agent.md", future); + let result = compile_pipeline(&source_path.to_string_lossy(), None, true, false).await; + let err = result.expect_err("future-version source should fail compile"); + assert!( + format!("{:#}", err).contains("only supports up to"), + "unexpected error: {:#}", + err + ); + drop(dir); +} diff --git a/src/compile/migrations/helpers.rs b/src/compile/migrations/helpers.rs new file mode 100644 index 00000000..20987346 --- /dev/null +++ b/src/compile/migrations/helpers.rs @@ -0,0 +1,198 @@ +//! Helpers for writing migrations against `serde_yaml::Mapping`. +//! +//! Migrations should prefer these over raw `Mapping` manipulation so that +//! conflicts (e.g. both old and new keys present) are surfaced rather than +//! silently overwritten. + +use anyhow::{bail, Result}; +use serde_yaml::{Mapping, Value}; + +/// Conflict policy used by [`rename_key`] when the destination key is +/// already present. +#[allow(dead_code)] +#[derive(Debug, Clone, Copy)] +pub enum ConflictPolicy { + /// Default: error if `new` already exists when renaming `old → new`. + Error, + /// Keep the existing `new` value, drop `old`. + PreferNew, + /// Overwrite `new` with the value from `old`. + PreferOld, +} + +/// Remove and return the value at `key`, or `None` if absent. +#[allow(dead_code)] +pub fn take_key(m: &mut Mapping, key: &str) -> Option { + m.remove(Value::String(key.to_string())) +} + +/// Insert `value` at `key`, returning `Err` if the key already exists. +/// +/// Prefer this over `Mapping::insert` in migrations: silent overwrite is +/// almost always a bug when transforming user data. +#[allow(dead_code)] +pub fn insert_no_overwrite( + m: &mut Mapping, + key: &str, + value: Value, +) -> Result<()> { + if m.contains_key(Value::String(key.to_string())) { + bail!( + "refusing to overwrite existing key `{}` in front matter", + key + ); + } + m.insert(Value::String(key.to_string()), value); + Ok(()) +} + +/// Rename `old` → `new` according to `policy`. +/// +/// Returns `Ok(true)` when the rename happened (regardless of policy +/// branch), `Ok(false)` when `old` was absent (no-op). +#[allow(dead_code)] +pub fn rename_key( + m: &mut Mapping, + old: &str, + new: &str, + policy: ConflictPolicy, +) -> Result { + let Some(old_value) = take_key(m, old) else { + return Ok(false); + }; + let new_present = m.contains_key(Value::String(new.to_string())); + match (new_present, policy) { + (false, _) => { + m.insert(Value::String(new.to_string()), old_value); + Ok(true) + } + (true, ConflictPolicy::Error) => { + bail!( + "refusing to rename `{}` -> `{}`: destination key already exists \ + (set both old and new keys at once is ambiguous)", + old, + new + ); + } + (true, ConflictPolicy::PreferNew) => Ok(true), + (true, ConflictPolicy::PreferOld) => { + m.insert(Value::String(new.to_string()), old_value); + Ok(true) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn map_with(pairs: &[(&str, &str)]) -> Mapping { + let mut m = Mapping::new(); + for (k, v) in pairs { + m.insert( + Value::String((*k).to_string()), + Value::String((*v).to_string()), + ); + } + m + } + + #[test] + fn take_key_present_returns_value_and_removes_it() { + let mut m = map_with(&[("a", "1"), ("b", "2")]); + let v = take_key(&mut m, "a").unwrap(); + assert_eq!(v, Value::String("1".to_string())); + assert!(!m.contains_key(Value::String("a".to_string()))); + assert!(m.contains_key(Value::String("b".to_string()))); + } + + #[test] + fn take_key_absent_returns_none() { + let mut m = map_with(&[("a", "1")]); + assert!(take_key(&mut m, "missing").is_none()); + assert!(m.contains_key(Value::String("a".to_string()))); + } + + #[test] + fn insert_no_overwrite_inserts_when_absent() { + let mut m = Mapping::new(); + insert_no_overwrite(&mut m, "k", Value::String("v".into())).unwrap(); + assert_eq!( + m.get(Value::String("k".into())), + Some(&Value::String("v".into())) + ); + } + + #[test] + fn insert_no_overwrite_errors_on_conflict() { + let mut m = map_with(&[("k", "v1")]); + let err = insert_no_overwrite(&mut m, "k", Value::String("v2".into())) + .unwrap_err(); + assert!( + format!("{}", err).contains("refusing to overwrite"), + "unexpected error message: {}", + err + ); + assert_eq!( + m.get(Value::String("k".into())), + Some(&Value::String("v1".into())) + ); + } + + #[test] + fn rename_key_old_absent_is_noop() { + let mut m = map_with(&[("b", "2")]); + let renamed = rename_key(&mut m, "a", "z", ConflictPolicy::Error).unwrap(); + assert!(!renamed); + assert!(!m.contains_key(Value::String("z".into()))); + } + + #[test] + fn rename_key_new_absent_moves_value() { + let mut m = map_with(&[("old", "v")]); + let renamed = rename_key(&mut m, "old", "new", ConflictPolicy::Error).unwrap(); + assert!(renamed); + assert!(!m.contains_key(Value::String("old".into()))); + assert_eq!( + m.get(Value::String("new".into())), + Some(&Value::String("v".into())) + ); + } + + #[test] + fn rename_key_new_present_with_error_policy_fails() { + let mut m = map_with(&[("old", "v_old"), ("new", "v_new")]); + let err = rename_key(&mut m, "old", "new", ConflictPolicy::Error).unwrap_err(); + assert!( + format!("{}", err).contains("destination key already exists"), + "unexpected error message: {}", + err + ); + } + + #[test] + fn rename_key_new_present_with_prefer_new_drops_old() { + let mut m = map_with(&[("old", "v_old"), ("new", "v_new")]); + let renamed = + rename_key(&mut m, "old", "new", ConflictPolicy::PreferNew).unwrap(); + assert!(renamed); + assert!(!m.contains_key(Value::String("old".into()))); + assert_eq!( + m.get(Value::String("new".into())), + Some(&Value::String("v_new".into())) + ); + } + + #[test] + fn rename_key_new_present_with_prefer_old_overwrites() { + let mut m = map_with(&[("old", "v_old"), ("new", "v_new")]); + let renamed = + rename_key(&mut m, "old", "new", ConflictPolicy::PreferOld).unwrap(); + assert!(renamed); + assert!(!m.contains_key(Value::String("old".into()))); + assert_eq!( + m.get(Value::String("new".into())), + Some(&Value::String("v_old".into())) + ); + } +} diff --git a/src/compile/migrations/mod.rs b/src/compile/migrations/mod.rs new file mode 100644 index 00000000..0de2a531 --- /dev/null +++ b/src/compile/migrations/mod.rs @@ -0,0 +1,567 @@ +//! Front-matter migration framework. +//! +//! A version-stamped, append-only chain of transformations that rewrites +//! older source front-matter shapes into the current one before typed +//! deserialization. Modeled on Django/Rails-style schema migrations. +//! +//! See `docs/migrations.md` for the full contributor reference. In +//! short: +//! +//! - Each migration goes from `from_version: N` to `to_version: N + 1`. +//! - Migrations live in `src/compile/migrations/_.rs` and are +//! appended to [`MIGRATIONS`] in strict ascending order. +//! - [`CURRENT_SCHEMA_VERSION`] is `1 + MIGRATIONS.len()`. +//! - Each source `.md` carries a `schema-version: ` field (missing +//! = 1). The compiler bumps it in place when migrations apply. +//! - Migrations operate on the **untyped** `serde_yaml::Mapping` +//! representation so they can rewrite shapes that no longer match the +//! typed `FrontMatter` (renamed/removed fields, +//! `deny_unknown_fields`). +//! +//! The runner is intentionally simple: read current version, validate +//! the registry is contiguous at runtime, and walk the chain. + +use anyhow::{bail, ensure, Context, Result}; +use serde_yaml::{Mapping, Value}; + +mod helpers; +#[allow(unused_imports)] +pub use helpers::{insert_no_overwrite, rename_key, take_key, ConflictPolicy}; + +/// Front-matter key that holds the schema version. Kebab-case to match +/// the rest of the front-matter grammar. +#[allow(dead_code)] +pub const SCHEMA_VERSION_KEY: &str = "schema-version"; + +/// Forward-compatible context passed to every migration. Currently +/// empty; we keep it in the signature so future migrations can be given +/// (e.g.) the source path without breaking the function pointer type. +#[non_exhaustive] +pub struct MigrationContext {} + +/// A single front-matter migration step. +/// +/// Migrations are pure functions that mutate the front-matter mapping +/// in place. They must NOT bump [`SCHEMA_VERSION_KEY`] themselves — the +/// runner does that after each successful step. They must be +/// deterministic and side-effect-free (documentation convention; not +/// enforced by the type system). +pub struct Migration { + /// Source schema version this migration consumes. + pub from_version: u32, + /// Target schema version produced (always `from_version + 1`). + pub to_version: u32, + /// Stable snake_case identifier used in logs, errors, and tests. + pub id: &'static str, + /// Human-facing one-line summary, surfaced in the compile warning. + pub summary: &'static str, + /// Compiler version that introduced this migration (e.g. "0.27.0"). + /// Currently used only for human-readable provenance; not consumed + /// by the runner. + #[allow(dead_code)] + pub introduced_in: &'static str, + /// The transformation. See type-level docs for invariants. + pub apply: fn(&mut Mapping, &MigrationContext) -> Result<()>, +} + +/// The fixed registry of migrations, in strict ascending `from_version` +/// order. Append-only — never reorder, never delete entries. +/// +/// Adding a migration is two edits: +/// +/// 1. Create `src/compile/migrations/_.rs` with a +/// `pub static MIGRATION: Migration` and register the module here. +/// 2. Append `&::MIGRATION` to this slice. +pub static MIGRATIONS: &[&'static Migration] = &[]; + +/// Total number of schema versions known to this compiler. +/// +/// Computed from the registry length so it can never drift. +#[allow(dead_code)] +pub const CURRENT_SCHEMA_VERSION: u32 = 1 + MIGRATIONS.len() as u32; + +/// Result of running the migration chain on a single front-matter +/// mapping. +#[derive(Debug, Clone)] +pub struct MigrationReport { + /// The schema version present in the source before migration. + pub from_version: u32, + /// The schema version after migration (always + /// [`CURRENT_SCHEMA_VERSION`] when the runner returns Ok). + pub to_version: u32, + /// Ordered list of migrations that ran, with id + summary so + /// callers (warnings, logs) don't need to re-query the registry. + pub applied: Vec, +} + +/// Record of a single migration that ran. Carries enough info for +/// warning emission and tests without re-looking up the registry. +#[derive(Debug, Clone)] +pub struct AppliedMigration { + pub id: &'static str, + pub summary: &'static str, +} + +impl MigrationReport { + /// Build a no-migration report for a source already at `version`. + pub fn unchanged(version: u32) -> Self { + Self { + from_version: version, + to_version: version, + applied: Vec::new(), + } + } + + /// Returns true when at least one migration ran. + pub fn changed(&self) -> bool { + !self.applied.is_empty() + } + + /// IDs of migrations that ran, in order. + #[allow(dead_code)] + pub fn applied_ids(&self) -> Vec<&'static str> { + self.applied.iter().map(|a| a.id).collect() + } +} + +/// Read [`SCHEMA_VERSION_KEY`] from the mapping. Missing field → 1. +/// Returns Err on any non-positive-integer value (zero, negative, +/// float, string, sequence, null). +pub fn read_schema_version(fm: &Mapping) -> Result { + let Some(value) = fm.get(Value::String(SCHEMA_VERSION_KEY.to_string())) else { + return Ok(1); + }; + let n = value.as_u64().ok_or_else(|| { + anyhow::anyhow!( + "front matter `schema-version` must be a positive integer (>= 1), got: {}", + describe_yaml_value(value) + ) + })?; + if n == 0 { + bail!( + "front matter `schema-version` must be a positive integer (>= 1), got: 0" + ); + } + if n > u32::MAX as u64 { + bail!( + "front matter `schema-version` must be a positive integer (>= 1), got: {} (exceeds u32::MAX)", + n + ); + } + Ok(n as u32) +} + +/// Set [`SCHEMA_VERSION_KEY`] on the mapping. Inserts at the end if +/// absent, updates in place otherwise. +pub fn set_schema_version(fm: &mut Mapping, version: u32) { + fm.insert( + Value::String(SCHEMA_VERSION_KEY.to_string()), + Value::Number(serde_yaml::Number::from(version as u64)), + ); +} + +/// Apply the registered migration chain to `fm`. +/// +/// Equivalent to [`migrate_front_matter_with`] called with the global +/// [`MIGRATIONS`] registry. +#[allow(dead_code)] +pub fn migrate_front_matter(fm: &mut Mapping) -> Result { + migrate_front_matter_with(fm, MIGRATIONS) +} + +/// Apply an explicit migration chain. Used by tests with a stub +/// registry; production code calls [`migrate_front_matter`]. +pub fn migrate_front_matter_with( + fm: &mut Mapping, + registry: &[&'static Migration], +) -> Result { + let target_version = 1 + registry.len() as u32; + let mut current = read_schema_version(fm)?; + let from_version = current; + + if current > target_version { + bail!( + "source `schema-version` is {}, but this compiler only supports up to {}. Upgrade ado-aw.", + current, + target_version + ); + } + + if current == target_version { + return Ok(MigrationReport::unchanged(current)); + } + + let mut applied: Vec = Vec::new(); + + while current < target_version { + let idx = (current - 1) as usize; + let m = registry.get(idx).ok_or_else(|| { + anyhow::anyhow!( + "migration registry corrupt: expected entry at index {} for from_version={}", + idx, + current + ) + })?; + ensure!( + m.from_version == current && m.to_version == current + 1, + "migration registry corrupt: expected from_version={} at index {}, found from_version={} to_version={}", + current, + idx, + m.from_version, + m.to_version + ); + + let ctx = MigrationContext {}; + (m.apply)(fm, &ctx).with_context(|| { + format!( + "migration {} ({} -> {}) failed", + m.id, m.from_version, m.to_version + ) + })?; + + set_schema_version(fm, m.to_version); + applied.push(AppliedMigration { + id: m.id, + summary: m.summary, + }); + current = m.to_version; + } + + Ok(MigrationReport { + from_version, + to_version: current, + applied, + }) +} + +fn describe_yaml_value(v: &Value) -> String { + match v { + Value::Null => "null".to_string(), + Value::Bool(b) => format!("bool ({})", b), + Value::Number(n) => format!("number ({})", n), + Value::String(s) => format!("string ({:?})", s), + Value::Sequence(_) => "sequence".to_string(), + Value::Mapping(_) => "mapping".to_string(), + Value::Tagged(_) => "tagged".to_string(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // ─── Registry health (operates on the real MIGRATIONS slice) ────────── + + #[test] + fn registry_is_contiguous_and_starts_at_one() { + // Vacuously true with an empty registry; the assertions still + // exercise the loop machinery so this test guards against future + // regressions when migrations are added. + for (i, m) in MIGRATIONS.iter().enumerate() { + assert_eq!( + m.from_version, + (i + 1) as u32, + "migration at index {} has from_version={}; expected {}", + i, + m.from_version, + i + 1 + ); + assert_eq!( + m.to_version, + (i + 2) as u32, + "migration at index {} has to_version={}; expected {}", + i, + m.to_version, + i + 2 + ); + } + } + + #[test] + fn registry_ids_are_unique() { + let mut seen = std::collections::BTreeSet::new(); + for m in MIGRATIONS { + assert!( + seen.insert(m.id), + "duplicate migration id `{}` in MIGRATIONS", + m.id + ); + } + } + + #[test] + fn migration_filenames_match_from_version() { + // Scan src/compile/migrations/*.rs and check that each numeric + // migration file's prefix matches its from_version. + let dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) + .join("src/compile/migrations"); + let mut numeric_files: Vec<(u32, String)> = Vec::new(); + for entry in std::fs::read_dir(&dir).expect("read migrations dir") { + let entry = entry.expect("dir entry"); + let path = entry.path(); + if path.extension().and_then(|e| e.to_str()) != Some("rs") { + continue; + } + let stem = path + .file_stem() + .and_then(|s| s.to_str()) + .expect("utf8 stem") + .to_string(); + if stem == "mod" || stem == "helpers" { + continue; + } + // Expect `_` where NNNN is the zero-padded + // from_version. Files that don't match this shape are tests + // or fixtures and should not exist in this directory. + let (prefix, _rest) = stem.split_once('_').unwrap_or_else(|| { + panic!( + "migration file {:?} does not match `_.rs`", + path + ) + }); + let n: u32 = prefix.parse().unwrap_or_else(|_| { + panic!( + "migration file {:?} has non-numeric prefix `{}`", + path, prefix + ) + }); + numeric_files.push((n, stem)); + } + numeric_files.sort(); + assert_eq!( + numeric_files.len(), + MIGRATIONS.len(), + "number of `_.rs` files ({}) does not match \ + MIGRATIONS.len() ({}); files: {:?}", + numeric_files.len(), + MIGRATIONS.len(), + numeric_files + ); + for (i, (n, stem)) in numeric_files.iter().enumerate() { + assert_eq!( + *n, + (i + 1) as u32, + "migration file `{}` has from_version prefix {} but is at index {}; expected {}", + stem, + n, + i, + i + 1 + ); + } + } + + // ─── read_schema_version ────────────────────────────────────────────── + + fn map_with_version(version: Option<&str>) -> Mapping { + let mut m = Mapping::new(); + if let Some(v) = version { + // Insert as raw YAML so we can test integer/string/etc. + let parsed: Value = serde_yaml::from_str(v).unwrap(); + m.insert(Value::String(SCHEMA_VERSION_KEY.to_string()), parsed); + } + m + } + + #[test] + fn read_schema_version_missing_returns_one() { + let m = Mapping::new(); + assert_eq!(read_schema_version(&m).unwrap(), 1); + } + + #[test] + fn read_schema_version_integer_is_returned() { + let m = map_with_version(Some("5")); + assert_eq!(read_schema_version(&m).unwrap(), 5); + } + + #[test] + fn read_schema_version_zero_rejected() { + let m = map_with_version(Some("0")); + let err = read_schema_version(&m).unwrap_err(); + assert!( + format!("{}", err).contains("must be a positive integer"), + "got: {}", + err + ); + } + + #[test] + fn read_schema_version_negative_rejected() { + let m = map_with_version(Some("-1")); + let err = read_schema_version(&m).unwrap_err(); + assert!( + format!("{}", err).contains("must be a positive integer"), + "got: {}", + err + ); + } + + #[test] + fn read_schema_version_string_rejected() { + let m = map_with_version(Some("\"five\"")); + let err = read_schema_version(&m).unwrap_err(); + assert!( + format!("{}", err).contains("must be a positive integer"), + "got: {}", + err + ); + } + + #[test] + fn read_schema_version_float_rejected() { + let m = map_with_version(Some("2.5")); + let err = read_schema_version(&m).unwrap_err(); + assert!( + format!("{}", err).contains("must be a positive integer"), + "got: {}", + err + ); + } + + #[test] + fn read_schema_version_null_rejected() { + let m = map_with_version(Some("null")); + let err = read_schema_version(&m).unwrap_err(); + assert!( + format!("{}", err).contains("must be a positive integer"), + "got: {}", + err + ); + } + + // ─── set_schema_version ─────────────────────────────────────────────── + + #[test] + fn set_schema_version_inserts_when_absent() { + let mut m = Mapping::new(); + set_schema_version(&mut m, 3); + assert_eq!(read_schema_version(&m).unwrap(), 3); + } + + #[test] + fn set_schema_version_updates_in_place() { + let mut m = map_with_version(Some("1")); + set_schema_version(&mut m, 4); + assert_eq!(read_schema_version(&m).unwrap(), 4); + } + + // ─── migrate_front_matter_with ──────────────────────────────────────── + + #[test] + fn migrate_empty_registry_is_noop_for_v1() { + let mut m: Mapping = serde_yaml::from_str("name: x\n").unwrap(); + let report = migrate_front_matter_with(&mut m, &[]).unwrap(); + assert!(!report.changed()); + assert_eq!(report.from_version, 1); + assert_eq!(report.to_version, 1); + } + + #[test] + fn migrate_already_current_is_noop() { + // Empty registry → CURRENT == 1. Source explicitly stamped 1. + let mut m: Mapping = + serde_yaml::from_str("schema-version: 1\nname: x\n").unwrap(); + let report = migrate_front_matter_with(&mut m, &[]).unwrap(); + assert!(!report.changed()); + } + + #[test] + fn migrate_future_version_rejected() { + // Empty registry → CURRENT == 1. Source claims version 2. + let mut m: Mapping = + serde_yaml::from_str("schema-version: 2\nname: x\n").unwrap(); + let err = migrate_front_matter_with(&mut m, &[]).unwrap_err(); + assert!( + format!("{}", err).contains("only supports up to"), + "got: {}", + err + ); + } + + fn noop_apply(_fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { + Ok(()) + } + + static TEST_MIG_1_TO_2: Migration = Migration { + from_version: 1, + to_version: 2, + id: "test_v1_to_v2", + summary: "test migration", + introduced_in: "test", + apply: noop_apply, + }; + + static TEST_MIG_2_TO_3: Migration = Migration { + from_version: 2, + to_version: 3, + id: "test_v2_to_v3", + summary: "test migration", + introduced_in: "test", + apply: rename_a_to_b, + }; + + fn rename_a_to_b(fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { + rename_key(fm, "a", "b", ConflictPolicy::Error)?; + Ok(()) + } + + #[test] + fn migrate_with_test_registry_chains_migrations() { + let registry: &[&'static Migration] = + &[&TEST_MIG_1_TO_2, &TEST_MIG_2_TO_3]; + let mut m: Mapping = serde_yaml::from_str("a: 1\n").unwrap(); + let report = migrate_front_matter_with(&mut m, registry).unwrap(); + assert!(report.changed()); + assert_eq!(report.from_version, 1); + assert_eq!(report.to_version, 3); + assert_eq!(report.applied_ids(), vec!["test_v1_to_v2", "test_v2_to_v3"]); + assert_eq!(read_schema_version(&m).unwrap(), 3); + // The rename in the second migration moved a → b. + assert!(!m.contains_key(Value::String("a".into()))); + assert!(m.contains_key(Value::String("b".into()))); + } + + #[test] + fn migrate_starting_partway_through_chain() { + let registry: &[&'static Migration] = + &[&TEST_MIG_1_TO_2, &TEST_MIG_2_TO_3]; + // Source is already at v2, only the second migration should run. + let mut m: Mapping = + serde_yaml::from_str("schema-version: 2\na: 1\n").unwrap(); + let report = migrate_front_matter_with(&mut m, registry).unwrap(); + assert_eq!(report.from_version, 2); + assert_eq!(report.to_version, 3); + assert_eq!(report.applied_ids(), vec!["test_v2_to_v3"]); + } + + fn failing_apply(_fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { + bail!("intentional test failure"); + } + + static TEST_MIG_FAIL: Migration = Migration { + from_version: 1, + to_version: 2, + id: "test_fail", + summary: "always fails", + introduced_in: "test", + apply: failing_apply, + }; + + #[test] + fn migrate_aborts_on_step_failure_with_context() { + let registry: &[&'static Migration] = &[&TEST_MIG_FAIL]; + let mut m: Mapping = serde_yaml::from_str("a: 1\n").unwrap(); + let err = migrate_front_matter_with(&mut m, registry).unwrap_err(); + let chain = format!("{:#}", err); + assert!( + chain.contains("test_fail (1 -> 2) failed"), + "expected migration context, got: {}", + chain + ); + assert!( + chain.contains("intentional test failure"), + "expected inner error, got: {}", + chain + ); + } +} diff --git a/src/compile/mod.rs b/src/compile/mod.rs index bad700eb..f6e06c6a 100644 --- a/src/compile/mod.rs +++ b/src/compile/mod.rs @@ -10,6 +10,9 @@ mod common; pub mod extensions; pub(crate) mod filter_ir; mod gitattributes; +#[cfg(test)] +mod migration_integration_test; +pub(crate) mod migrations; mod onees; pub(crate) mod pr_filters; mod standalone; @@ -20,7 +23,10 @@ use async_trait::async_trait; use log::{debug, info}; use std::path::{Path, PathBuf}; +#[allow(unused_imports)] pub use common::parse_markdown; +#[allow(unused_imports)] +pub use common::{atomic_write, parse_markdown_detailed, reconstruct_source, ParsedSource}; pub use common::HEADER_MARKER; pub use common::ADO_MCP_ENTRYPOINT; pub use common::ADO_MCP_IMAGE; @@ -57,7 +63,40 @@ pub async fn compile_pipeline( skip_integrity: bool, debug_pipeline: bool, ) -> Result<()> { - compile_pipeline_inner(input_path, output_path, skip_integrity, debug_pipeline, true).await + compile_pipeline_inner( + input_path, + output_path, + skip_integrity, + debug_pipeline, + true, + migrations::MIGRATIONS, + ) + .await + .map(|_migrated| ()) +} + +/// Test-only entry point that injects a custom migration registry. +/// +/// Production code goes through [`compile_pipeline`]. Tests use this +/// to drive end-to-end migration scenarios without polluting the real +/// [`migrations::MIGRATIONS`] slice. +#[cfg(test)] +async fn compile_pipeline_with_registry( + input_path: &str, + output_path: Option<&str>, + skip_integrity: bool, + debug_pipeline: bool, + registry: &[&'static migrations::Migration], +) -> Result { + compile_pipeline_inner( + input_path, + output_path, + skip_integrity, + debug_pipeline, + false, + registry, + ) + .await } /// Internal compile entry point that lets the caller opt out of the @@ -65,13 +104,17 @@ pub async fn compile_pipeline( /// (`compile_all_pipelines`) skip the per-pipeline sync to avoid an /// O(N²)-ish series of full-tree scans and instead perform a single sync /// after the whole batch completes. +/// +/// Returns whether the source `.md` was rewritten by the migration +/// pass. Batch callers use this to surface an aggregate count. async fn compile_pipeline_inner( input_path: &str, output_path: Option<&str>, skip_integrity: bool, debug_pipeline: bool, sync_gitattributes: bool, -) -> Result<()> { + registry: &[&'static migrations::Migration], +) -> Result { let input_path = Path::new(input_path); info!("Compiling pipeline from: {}", input_path.display()); @@ -82,11 +125,22 @@ async fn compile_pipeline_inner( .with_context(|| format!("Failed to read input file: {}", input_path.display()))?; debug!("Input file size: {} bytes", content.len()); - let (mut front_matter, markdown_body) = parse_markdown(&content)?; + let parsed = common::parse_markdown_detailed_with_registry(&content, registry)?; + let mut front_matter = parsed.front_matter; + let markdown_body = parsed.markdown_body.clone(); + let migrations = parsed.migrations.clone(); + let front_matter_mapping = parsed.front_matter_mapping.clone(); + let body_raw = parsed.body_raw.clone(); + let source_sha256 = parsed.source_sha256; // Sanitize all front matter text fields before any further processing. // This neutralizes pipeline command injection (##vso[), strips control // characters, and enforces content limits across all config values. + // + // Note: sanitization mutates the typed FrontMatter in memory but does + // NOT touch the `front_matter_mapping` we keep around for source + // rewrite. That is intentional — we don't want to silently rewrite + // user source bytes via sanitize on disk. use crate::sanitize::SanitizeConfig; front_matter.sanitize_config_fields(); @@ -134,7 +188,8 @@ async fn compile_pipeline_inner( info!("Using {} compiler", compiler.target_name()); - // Compile + // Compile (no source mutation yet — a failure here must leave the + // source byte-identical). let pipeline_yaml = compiler .compile(input_path, &yaml_output_path, &front_matter, &markdown_body, skip_integrity, debug_pipeline) .await?; @@ -142,8 +197,34 @@ async fn compile_pipeline_inner( // Clean up spacing artifacts from empty placeholder replacements let pipeline_yaml = clean_generated_yaml(&pipeline_yaml); - // Write output - tokio::fs::write(&yaml_output_path, &pipeline_yaml) + // Source rewrite: only after a fully-successful compile. + // + // We perform this BEFORE writing the .lock.yml so that a partial + // state (rewritten source + stale lock file, or rewritten lock file + // pointing at unmigrated source) cannot escape: if rewrite fails, + // we abort before the lock file ever gets touched. + let mut migrated = false; + if migrations.changed() { + // Rebuild the parsed handle (the typed front_matter has been + // sanitized and consumed; rewrite needs the unmodified mapping). + let parsed_for_rewrite = common::ParsedSource { + front_matter: serde_yaml::from_value(serde_yaml::Value::Mapping( + front_matter_mapping.clone(), + )) + .context("Failed to round-trip migrated front matter for rewrite")?, + markdown_body: markdown_body.clone(), + migrations: migrations.clone(), + front_matter_mapping: front_matter_mapping.clone(), + body_raw: body_raw.clone(), + source_sha256, + }; + migrated = perform_source_rewrite_if_needed(input_path, &content, &parsed_for_rewrite) + .await?; + } + + // Write output via atomic_write so a crash mid-write cannot leave a + // half-written .lock.yml on disk. + common::atomic_write(&yaml_output_path, &pipeline_yaml) .await .with_context(|| { format!( @@ -169,7 +250,71 @@ async fn compile_pipeline_inner( } } - Ok(()) + Ok(migrated) +} + +/// Reconstruct the migrated source, run the lost-update guard, and +/// atomically rewrite the source `.md` if the content actually +/// changed. Returns whether a write happened. +/// +/// On success, emits the documented stderr warning so users always see +/// when their source was migrated. This warning is *not* gated by +/// `--verbose`/`--debug`. +async fn perform_source_rewrite_if_needed( + input_path: &Path, + original_content: &str, + parsed: &common::ParsedSource, +) -> Result { + let new_content = common::reconstruct_source(parsed)?; + if new_content == original_content { + // Migrations ran but their net effect is a no-op on disk — + // skip the rewrite to avoid gratuitous diffs. + return Ok(false); + } + + // Lost-update guard: re-read the source and compare its hash to + // the snapshot we took when parsing. If anyone else mutated the + // file in the meantime, refuse to clobber their changes. + use sha2::Digest; + let current_bytes = tokio::fs::read(input_path).await.with_context(|| { + format!( + "Failed to re-read source for migration safety check: {}", + input_path.display() + ) + })?; + let mut hasher = sha2::Sha256::new(); + hasher.update(¤t_bytes); + let current_hash: [u8; 32] = hasher.finalize().into(); + if current_hash != parsed.source_sha256 { + anyhow::bail!( + "source file {} changed during compilation; refusing to migrate. Re-run `ado-aw compile`.", + input_path.display() + ); + } + + common::atomic_write(input_path, &new_content) + .await + .with_context(|| { + format!( + "Failed to atomically rewrite migrated source: {}", + input_path.display() + ) + })?; + + eprintln!( + "warning: migrated front matter in {}: schema-version {} -> {}", + input_path.display(), + parsed.migrations.from_version, + parsed.migrations.to_version + ); + for applied in &parsed.migrations.applied { + eprintln!(" - {}: {}", applied.id, applied.summary); + } + eprintln!( + "note: front-matter comments are not preserved when migrations rewrite the source." + ); + + Ok(true) } /// Locate the repo root containing `output_path`, scan it for all compiled @@ -221,6 +366,7 @@ pub async fn compile_all_pipelines(skip_integrity: bool, debug_pipeline: bool) - let mut success_count = 0; let mut skip_count = 0; let mut fail_count = 0; + let mut migrated_count = 0; for pipeline in &detected { let source_path = root.join(&pipeline.source); @@ -239,8 +385,13 @@ pub async fn compile_all_pipelines(skip_integrity: bool, debug_pipeline: bool) - let source_str = source_path.to_string_lossy(); let output_str = yaml_output_path.to_string_lossy(); - match compile_pipeline_inner(&source_str, Some(&output_str), skip_integrity, debug_pipeline, false).await { - Ok(()) => success_count += 1, + match compile_pipeline_inner(&source_str, Some(&output_str), skip_integrity, debug_pipeline, false, migrations::MIGRATIONS).await { + Ok(migrated) => { + success_count += 1; + if migrated { + migrated_count += 1; + } + } Err(e) => { eprintln!( " Error compiling '{}': {:#}", @@ -263,10 +414,17 @@ pub async fn compile_all_pipelines(skip_integrity: bool, debug_pipeline: bool) - } println!(); - println!( - "Done: {} compiled, {} skipped, {} failed.", - success_count, skip_count, fail_count - ); + if migrated_count > 0 { + println!( + "Done: {} compiled, {} skipped, {} failed; {} source file(s) migrated.", + success_count, skip_count, fail_count, migrated_count + ); + } else { + println!( + "Done: {} compiled, {} skipped, {} failed.", + success_count, skip_count, fail_count + ); + } if fail_count > 0 { anyhow::bail!("{} pipeline(s) failed to compile", fail_count); @@ -349,7 +507,29 @@ pub async fn check_pipeline(pipeline_path: &str) -> Result<()> { ) })?; - let (mut front_matter, markdown_body) = parse_markdown(&content)?; + let parsed = parse_markdown_detailed(&content)?; + + // Pending-migration enforcement: `check` MUST NOT silently let + // a stale source pass. The runtime integrity check inside + // compiled pipelines uses the same ado-aw version that produced + // the pipeline (see src/data/base.yml / 1es-base.yml — they + // download `ado-aw v${COMPILER_VERSION}`), so a check failure + // here only happens locally / in CI when a developer runs a + // newer ado-aw against an older source. That is exactly when + // we want to fail loudly so `ado-aw compile` is run. + if parsed.migrations.changed() { + anyhow::bail!( + "error: {} is at schema-version {}; this compiler requires schema-version {}.\n\ + hint: run `ado-aw compile {}` to migrate the source in place.", + source_path.display(), + parsed.migrations.from_version, + parsed.migrations.to_version, + source_path.display(), + ); + } + + let mut front_matter = parsed.front_matter; + let markdown_body = parsed.markdown_body; use crate::sanitize::SanitizeConfig; front_matter.sanitize_config_fields(); diff --git a/src/compile/types.rs b/src/compile/types.rs index 46ddb8c0..24614573 100644 --- a/src/compile/types.rs +++ b/src/compile/types.rs @@ -7,6 +7,15 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; use crate::sanitize::SanitizeConfig as SanitizeConfigTrait; +/// Default `schema-version` used when the field is absent from the +/// front matter. Defined here so serde's `#[serde(default = "...")]` +/// can refer to it. Note: source files that simply omit the field +/// continue to behave as schema-version 1; this default is not the +/// "current" schema version. +fn default_schema_version() -> u32 { + 1 +} + /// Target platform for compiled pipeline #[derive(Debug, Deserialize, Clone, Default, PartialEq)] #[serde(rename_all = "lowercase")] @@ -580,6 +589,13 @@ pub struct PipelineParameter { #[derive(Debug, Deserialize)] #[serde(deny_unknown_fields)] pub struct FrontMatter { + /// Schema version of the front-matter grammar. Missing field defaults + /// to 1. The compiler bumps this in place when migrations apply + /// during compilation; users typically don't write it by hand. + /// See [`crate::compile::migrations`] for the migration framework. + #[serde(default = "default_schema_version", rename = "schema-version")] + #[allow(dead_code)] + pub schema_version: u32, /// Agent name (required) pub name: String, /// One-line description (required) @@ -1143,6 +1159,36 @@ impl SanitizeConfigTrait for LabelFilter { mod tests { use super::*; + // ─── schema_version field ─────────────────────────────────────────────── + + #[test] + fn schema_version_defaults_to_one_when_absent() { + let yaml = "name: x\ndescription: y\n"; + let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); + assert_eq!(fm.schema_version, 1); + } + + #[test] + fn schema_version_accepts_explicit_integer() { + let yaml = "schema-version: 5\nname: x\ndescription: y\n"; + let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); + assert_eq!(fm.schema_version, 5); + } + + #[test] + fn schema_version_string_value_fails_typed_deserialize() { + // The migration runner is the first line of defense (rejects with a + // friendly message before typed deserialization). This test is a + // defense-in-depth check that the typed FrontMatter would also + // reject a non-integer `schema-version`. + let yaml = "schema-version: notanumber\nname: x\ndescription: y\n"; + let result: Result = serde_yaml::from_str(yaml); + assert!( + result.is_err(), + "expected non-integer schema-version to fail typed deserialize" + ); + } + // ─── PoolConfig deserialization ────────────────────────────────────────── #[test] diff --git a/src/main.rs b/src/main.rs index 1b2985e3..6a3b0d84 100644 --- a/src/main.rs +++ b/src/main.rs @@ -223,14 +223,30 @@ async fn run_execute( ado_project: Option, dry_run: bool, ) -> Result<()> { - // Read and parse source markdown to get tool configs + // Read and parse source markdown to get tool configs. + // Use parse_markdown_detailed so Stage 3 benefits from in-memory + // migration when a source is mid-migration. Stage 3 must NOT + // rewrite the source file (the executor's working tree is not the + // source-of-truth tree), so we just emit a log warning. let content = tokio::fs::read_to_string(&source) .await .with_context(|| format!("Failed to read source file: {}", source.display()))?; - let (front_matter, _) = compile::parse_markdown(&content) + let parsed = compile::parse_markdown_detailed(&content) .with_context(|| format!("Failed to parse source file: {}", source.display()))?; + if parsed.migrations.changed() { + log::warn!( + "front matter at {} is at schema-version {}; running with in-memory migration to {}. Run `ado-aw compile {}` to update the source.", + source.display(), + parsed.migrations.from_version, + parsed.migrations.to_version, + source.display(), + ); + } + + let front_matter = parsed.front_matter; + println!("Loaded tool configs from: {}", source.display()); // Extract tools config before moving front_matter into build_execution_context diff --git a/tests/migration_tests.rs b/tests/migration_tests.rs new file mode 100644 index 00000000..75d4f2e9 --- /dev/null +++ b/tests/migration_tests.rs @@ -0,0 +1,382 @@ +//! Integration tests for the front-matter migration framework. +//! +//! These tests spawn the compiled `ado-aw` binary as a subprocess +//! (matching the pattern used in `tests/compiler_tests.rs`) and assert +//! on the user-visible behavior of `compile` and `check` for sources +//! with various `schema-version` shapes. +//! +//! The migration registry shipped with this binary is intentionally +//! empty (`CURRENT_SCHEMA_VERSION == 1`); the rewrite path is exercised +//! by the white-box tests in `src/compile/migrations/integration_test.rs`, +//! which can inject a stub registry. These tests cover the user-facing +//! CLI behaviors that don't require migration registration: +//! +//! - Future-version sources are rejected with a clear message. +//! - Non-integer / zero / negative `schema-version` is rejected. +//! - Healthy v1 sources (no `schema-version` field, or explicit `1`) +//! compile and `check` cleanly without rewriting the source. +//! - The full `compile` -> `check` round-trip succeeds. + +use std::fs; +use std::path::PathBuf; +use std::process::Command; + +/// Set up a unique temp directory for each test run. +fn fresh_temp_dir(label: &str) -> PathBuf { + let dir = std::env::temp_dir().join(format!( + "ado-aw-migration-tests-{}-{}-{}", + label, + std::process::id(), + // Add a thread-local counter to avoid intra-process collisions + // when multiple tests run in parallel. + rand_suffix(), + )); + fs::create_dir_all(&dir).expect("create temp dir"); + dir +} + +/// Same as [`fresh_temp_dir`] but also creates an empty `.git/` +/// directory at the root so `ado-aw check` (which walks up to the +/// repo root) can resolve a source path from the compiled lock file's +/// `@ado-aw` header. +fn fresh_git_temp_dir(label: &str) -> PathBuf { + let dir = fresh_temp_dir(label); + fs::create_dir(dir.join(".git")).expect("create .git dir"); + dir +} + +/// Lightweight randomness for test temp dir uniqueness — no crate dep. +fn rand_suffix() -> String { + use std::time::{SystemTime, UNIX_EPOCH}; + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + format!("{:x}", nanos) +} + +fn ado_aw_binary() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_ado-aw")) +} + +/// Run `ado-aw compile `, returning the captured output. +fn run_compile(source: &PathBuf) -> std::process::Output { + Command::new(ado_aw_binary()) + .args(["compile", source.to_str().unwrap()]) + .output() + .expect("Failed to run ado-aw compile") +} + +/// Run `ado-aw check `, returning the captured output. +fn run_check(pipeline: &PathBuf) -> std::process::Output { + Command::new(ado_aw_binary()) + .args(["check", pipeline.to_str().unwrap()]) + .output() + .expect("Failed to run ado-aw check") +} + +/// Write a source file to `dir/agent.md` and return its path. +fn write_source(dir: &PathBuf, content: &str) -> PathBuf { + let path = dir.join("agent.md"); + fs::write(&path, content).expect("write source"); + path +} + +// ─── Future-version rejection ────────────────────────────────────────────── + +#[test] +fn compile_rejects_future_schema_version() { + let dir = fresh_temp_dir("future-version"); + let source = write_source( + &dir, + "---\nschema-version: 99\nname: x\ndescription: y\n---\nbody\n", + ); + + let output = run_compile(&source); + + assert!( + !output.status.success(), + "compile should fail on future schema-version: stdout={:?} stderr={:?}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("only supports up to"), + "stderr should mention compiler's supported schema-version range, got: {}", + stderr + ); + assert!( + stderr.contains("99"), + "stderr should mention the offending version 99, got: {}", + stderr + ); + + let _ = fs::remove_dir_all(&dir); +} + +// ─── Invalid schema-version values ───────────────────────────────────────── + +#[test] +fn compile_rejects_zero_schema_version() { + let dir = fresh_temp_dir("zero-version"); + let source = write_source( + &dir, + "---\nschema-version: 0\nname: x\ndescription: y\n---\nbody\n", + ); + + let output = run_compile(&source); + + assert!( + !output.status.success(), + "compile should fail on zero schema-version" + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("must be a positive integer"), + "stderr should reject zero with `must be a positive integer`, got: {}", + stderr + ); + + let _ = fs::remove_dir_all(&dir); +} + +#[test] +fn compile_rejects_negative_schema_version() { + let dir = fresh_temp_dir("negative-version"); + let source = write_source( + &dir, + "---\nschema-version: -1\nname: x\ndescription: y\n---\nbody\n", + ); + + let output = run_compile(&source); + + assert!( + !output.status.success(), + "compile should fail on negative schema-version" + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("must be a positive integer"), + "stderr should reject negative with `must be a positive integer`, got: {}", + stderr + ); + + let _ = fs::remove_dir_all(&dir); +} + +#[test] +fn compile_rejects_non_integer_schema_version() { + let dir = fresh_temp_dir("non-integer-version"); + let source = write_source( + &dir, + "---\nschema-version: \"not-a-number\"\nname: x\ndescription: y\n---\nbody\n", + ); + + let output = run_compile(&source); + + assert!( + !output.status.success(), + "compile should fail on non-integer schema-version" + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("must be a positive integer"), + "stderr should reject non-integer with `must be a positive integer`, got: {}", + stderr + ); + + let _ = fs::remove_dir_all(&dir); +} + +#[test] +fn compile_rejects_float_schema_version() { + let dir = fresh_temp_dir("float-version"); + let source = write_source( + &dir, + "---\nschema-version: 1.5\nname: x\ndescription: y\n---\nbody\n", + ); + + let output = run_compile(&source); + + assert!( + !output.status.success(), + "compile should fail on float schema-version" + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("must be a positive integer"), + "stderr should reject float with `must be a positive integer`, got: {}", + stderr + ); + + let _ = fs::remove_dir_all(&dir); +} + +// ─── Healthy v1 round-trip ───────────────────────────────────────────────── + +#[test] +fn compile_succeeds_on_unstamped_v1_source() { + let dir = fresh_temp_dir("unstamped-v1"); + let original = "---\nname: smoketest\ndescription: smoketest description\n---\n## Body\n\nHello.\n"; + let source = write_source(&dir, original); + + let output = run_compile(&source); + + assert!( + output.status.success(), + "compile should succeed on healthy unstamped source: stdout={:?} stderr={:?}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + + // Lock file must be generated. + let lock = source.with_extension("lock.yml"); + assert!( + lock.exists(), + "expected compiled YAML at {}", + lock.display() + ); + + // With CURRENT_SCHEMA_VERSION == 1 and no migrations registered, + // the source must NOT be rewritten — verify byte-identity. + let after = fs::read_to_string(&source).expect("re-read source"); + assert_eq!( + after, original, + "source must be byte-identical after compile when no migrations apply" + ); + + // Stderr should NOT contain a migration warning. + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + !stderr.contains("warning: migrated front matter"), + "no migration warning expected, got stderr: {}", + stderr + ); + + let _ = fs::remove_dir_all(&dir); +} + +#[test] +fn compile_succeeds_on_explicitly_stamped_v1_source() { + let dir = fresh_temp_dir("stamped-v1"); + let original = "---\nschema-version: 1\nname: x\ndescription: y\n---\n## Body\n"; + let source = write_source(&dir, original); + + let output = run_compile(&source); + + assert!( + output.status.success(), + "compile should succeed on explicitly stamped v1 source: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let after = fs::read_to_string(&source).expect("re-read source"); + assert_eq!( + after, original, + "explicitly stamped v1 source must be byte-identical" + ); + + let _ = fs::remove_dir_all(&dir); +} + +#[test] +fn compile_then_check_round_trip_passes() { + let dir = fresh_git_temp_dir("round-trip"); + let source = write_source( + &dir, + "---\nname: round-trip-agent\ndescription: round-trip\n---\n## Body\n", + ); + + let compile_output = run_compile(&source); + assert!( + compile_output.status.success(), + "compile should succeed: {}", + String::from_utf8_lossy(&compile_output.stderr) + ); + + let lock = source.with_extension("lock.yml"); + assert!(lock.exists(), "expected lock file at {}", lock.display()); + + // `check` reads the source path from the lock file's @ado-aw header. + // The header records an absolute or relative path from the compile + // invocation; since we passed an absolute path, that's what we get. + let check_output = run_check(&lock); + assert!( + check_output.status.success(), + "check should succeed: stdout={:?} stderr={:?}", + String::from_utf8_lossy(&check_output.stdout), + String::from_utf8_lossy(&check_output.stderr) + ); + + let _ = fs::remove_dir_all(&dir); +} + +// ─── check rejects future-version sources ────────────────────────────────── + +#[test] +fn check_rejects_future_schema_version() { + // Setup: compile a healthy source so we have a valid lock file with + // a header pointing at our source path. Then mutate the source to + // claim a future schema version, and confirm `check` fails. + let dir = fresh_git_temp_dir("check-future"); + let source = write_source( + &dir, + "---\nname: x\ndescription: y\n---\n## Body\n", + ); + + let compile_output = run_compile(&source); + assert!( + compile_output.status.success(), + "initial compile should succeed: {}", + String::from_utf8_lossy(&compile_output.stderr) + ); + + // Mutate the source to claim a future version. Note: this mutation + // would also fail the lock-file integrity check, but the migration + // runner runs first so we observe the schema-version error. + fs::write( + &source, + "---\nschema-version: 99\nname: x\ndescription: y\n---\n## Body\n", + ) + .expect("rewrite source"); + + let lock = source.with_extension("lock.yml"); + let check_output = run_check(&lock); + assert!( + !check_output.status.success(), + "check should fail when source is at a future schema-version" + ); + let stderr = String::from_utf8_lossy(&check_output.stderr); + assert!( + stderr.contains("only supports up to") || stderr.contains("Failed to migrate"), + "stderr should report the future-version error, got: {}", + stderr + ); + + let _ = fs::remove_dir_all(&dir); +} + +// ─── Non-mapping front matter ────────────────────────────────────────────── + +#[test] +fn compile_rejects_non_mapping_top_level_yaml() { + let dir = fresh_temp_dir("non-mapping"); + let source = write_source(&dir, "---\n- a\n- b\n---\nbody\n"); + + let output = run_compile(&source); + + assert!( + !output.status.success(), + "compile should fail when front matter is a sequence not a mapping" + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("must be a mapping"), + "stderr should report non-mapping error, got: {}", + stderr + ); + + let _ = fs::remove_dir_all(&dir); +} From a90f543256c51bb8cdd020e534460bb43ede2e67 Mon Sep 17 00:00:00 2001 From: James Devine Date: Fri, 8 May 2026 22:03:51 +0100 Subject: [PATCH 2/8] fix(compile): use checked u32 arithmetic in migration runner Address code review finding: the runner did `1 + registry.len() as u32` and `m.to_version == current + 1` without overflow checks. With realistic registries this is unreachable, but rust panics on overflow in debug mode and wraps in release. Switch to checked_add so we surface a clear error either way; preserves existing behavior on all realistic inputs. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/compile/migrations/mod.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/compile/migrations/mod.rs b/src/compile/migrations/mod.rs index 0de2a531..0f8d3e5b 100644 --- a/src/compile/migrations/mod.rs +++ b/src/compile/migrations/mod.rs @@ -175,7 +175,17 @@ pub fn migrate_front_matter_with( fm: &mut Mapping, registry: &[&'static Migration], ) -> Result { - let target_version = 1 + registry.len() as u32; + // Use checked arithmetic so we surface a clear error rather than + // panic-or-wrap if a registry ever grows past u32::MAX. Realistic + // registries are tiny — this is a "no panic in library code" guard. + let registry_len: u32 = registry + .len() + .try_into() + .ok() + .context("migration registry has more than u32::MAX entries")?; + let target_version = 1u32 + .checked_add(registry_len) + .context("migration registry too large: target_version would overflow u32")?; let mut current = read_schema_version(fm)?; let from_version = current; @@ -202,8 +212,11 @@ pub fn migrate_front_matter_with( current ) })?; + let next_version = current + .checked_add(1) + .context("migration version overflow: current + 1 exceeds u32::MAX")?; ensure!( - m.from_version == current && m.to_version == current + 1, + m.from_version == current && m.to_version == next_version, "migration registry corrupt: expected from_version={} at index {}, found from_version={} to_version={}", current, idx, From 72e97c74535016b1de9f235c8c773bad69ab8763 Mon Sep 17 00:00:00 2001 From: James Devine Date: Fri, 8 May 2026 22:14:13 +0100 Subject: [PATCH 3/8] fix(compile): address PR review feedback on migration framework Four findings from the Rust PR Reviewer bot: 1. check_pipeline's bail message had a redundant "error:" prefix that produced "Error: error: ..." once anyhow's top-level handler added its own "Error:" wrapper. Drop the prefix and reformat the hint onto an indented continuation line. 2. compile_pipeline_inner did a redundant serde_yaml::from_value round-trip just to satisfy ParsedSource.front_matter, even though perform_source_rewrite_if_needed never reads that field. Refactor the helper to take the four primitive fields it actually uses (mapping, body_raw, source_sha256, migrations) instead of the full struct. reconstruct_source likewise takes individual fields now. 3. Misleading comment in tests/migration_tests.rs claimed a "thread-local counter" was used; only a nanosecond timestamp was. Replaced rand_suffix() with timestamp + AtomicU64 monotonic seq so parallel tests scheduled in the same nanosecond always get distinct dirs. 4. Leading whitespace before the opening `---` was silently stripped on migration rewrite. parse_markdown_detailed already tolerates it on read; capture it as a new ParsedSource.leading_whitespace field and emit it from reconstruct_source so byte-faithful preservation extends to whitespace prefixes (BOM-stripped editor blank lines, etc.). All 1369 tests still pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/compile/common.rs | 66 +++++++++++++++++++---- src/compile/migration_integration_test.rs | 12 ++++- src/compile/mod.rs | 53 +++++++++--------- tests/migration_tests.rs | 12 +++-- 4 files changed, 102 insertions(+), 41 deletions(-) diff --git a/src/compile/common.rs b/src/compile/common.rs index c0008d95..f9bd5230 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -102,6 +102,10 @@ pub struct ParsedSource { /// The migrated front-matter mapping, with `schema-version` /// bumped. Used to reconstruct the source for rewrite. pub front_matter_mapping: serde_yaml::Mapping, + /// Whitespace bytes that appeared before the opening `---` fence, + /// preserved verbatim so that source rewrite is byte-faithful. + /// Empty in the typical case where the file starts with `---`. + pub leading_whitespace: String, /// The body region exactly as it appeared after the closing `---`, /// byte-for-byte (no trim). Includes any leading newline. pub body_raw: String, @@ -138,8 +142,12 @@ pub(crate) fn parse_markdown_detailed_with_registry( // Allow leading whitespace before the opening fence (preserves // historical leniency). We compute a byte offset into `content` so - // that `body_raw` extraction is purely byte-faithful. + // that `body_raw` extraction is purely byte-faithful, and we keep + // the whitespace prefix around so that source rewrites preserve + // anything the user (or their editor) put before the opening + // fence. let leading_ws = content.bytes().take_while(|b| b.is_ascii_whitespace()).count(); + let leading_whitespace = content[..leading_ws].to_string(); let after_lead = &content[leading_ws..]; if !after_lead.starts_with("---") { anyhow::bail!("Markdown file must start with YAML front matter (---)"); @@ -184,23 +192,37 @@ pub(crate) fn parse_markdown_detailed_with_registry( markdown_body, migrations: report, front_matter_mapping: mapping, + leading_whitespace, body_raw, source_sha256, }) } -/// Reconstruct full source content from a migrated [`ParsedSource`]. +/// Reconstruct full source content from migration outputs. +/// +/// Takes the individual fragments rather than the full +/// [`ParsedSource`] so callers that have already destructured the +/// parse don't have to round-trip a fresh `front_matter` through +/// serde just to satisfy the typed field. /// /// Output shape: +/// - `leading_whitespace` (typically empty) /// - `---\n` /// - the migrated YAML mapping (`serde_yaml::to_string` always ends /// with `\n`) /// - `---` /// - the original body region byte-for-byte (`body_raw`) -pub fn reconstruct_source(parsed: &ParsedSource) -> Result { - let yaml_serialized = serde_yaml::to_string(&parsed.front_matter_mapping) +pub fn reconstruct_source( + leading_whitespace: &str, + front_matter_mapping: &serde_yaml::Mapping, + body_raw: &str, +) -> Result { + let yaml_serialized = serde_yaml::to_string(front_matter_mapping) .context("Failed to serialize migrated front matter")?; - Ok(format!("---\n{}---{}", yaml_serialized, parsed.body_raw)) + Ok(format!( + "{}---\n{}---{}", + leading_whitespace, yaml_serialized, body_raw + )) } fn yaml_value_kind(v: &serde_yaml::Value) -> &'static str { @@ -2539,13 +2561,22 @@ mod tests { // ─── parse_markdown_detailed ────────────────────────────────────────────── + fn reconstruct(parsed: &ParsedSource) -> String { + reconstruct_source( + &parsed.leading_whitespace, + &parsed.front_matter_mapping, + &parsed.body_raw, + ) + .unwrap() + } + #[test] fn parse_markdown_detailed_preserves_body_byte_for_byte() { // Case 1: trailing newline. let original = "---\nname: x\ndescription: y\n---\nbody line\n"; let parsed = parse_markdown_detailed(original).unwrap(); assert_eq!(parsed.body_raw, "\nbody line\n"); - let reconstructed = reconstruct_source(&parsed).unwrap(); + let reconstructed = reconstruct(&parsed); // No migrations ran, so the YAML round-trip is the only // structural change. The body region is byte-faithful. assert!(reconstructed.ends_with("---\nbody line\n")); @@ -2554,14 +2585,14 @@ mod tests { let original = "---\nname: x\ndescription: y\n---\nbody"; let parsed = parse_markdown_detailed(original).unwrap(); assert_eq!(parsed.body_raw, "\nbody"); - let reconstructed = reconstruct_source(&parsed).unwrap(); + let reconstructed = reconstruct(&parsed); assert!(reconstructed.ends_with("---\nbody")); // Case 3: empty body, but trailing newline. let original = "---\nname: x\ndescription: y\n---\n"; let parsed = parse_markdown_detailed(original).unwrap(); assert_eq!(parsed.body_raw, "\n"); - let reconstructed = reconstruct_source(&parsed).unwrap(); + let reconstructed = reconstruct(&parsed); assert!(reconstructed.ends_with("---\n")); // Case 4: blank lines between closing fence and content are @@ -2580,13 +2611,30 @@ mod tests { let original = "---\nname: x\ndescription: y\n---\n## body\n"; let parsed = parse_markdown_detailed(original).unwrap(); assert!(!parsed.migrations.changed()); - let reconstructed = reconstruct_source(&parsed).unwrap(); + let reconstructed = reconstruct(&parsed); // Find the closing fence in both and compare the suffix. let orig_suffix = &original[original.find("\n---\n").unwrap()..]; let recon_suffix = &reconstructed[reconstructed.find("\n---\n").unwrap()..]; assert_eq!(orig_suffix, recon_suffix, "body region must be byte-identical"); } + #[test] + fn parse_markdown_detailed_preserves_leading_whitespace() { + // Leading blank lines / spaces (e.g. from editor BOM-strippers + // adding a blank line) must round-trip through reconstruction + // so migration rewrites don't silently normalize them away. + let original = "\n \n---\nname: x\ndescription: y\n---\nbody\n"; + let parsed = parse_markdown_detailed(original).unwrap(); + assert_eq!(parsed.leading_whitespace, "\n \n"); + let reconstructed = reconstruct(&parsed); + assert!( + reconstructed.starts_with("\n \n---\n"), + "expected leading whitespace preserved, got: {:?}", + &reconstructed[..20.min(reconstructed.len())] + ); + assert!(reconstructed.ends_with("---\nbody\n")); + } + #[test] fn parse_markdown_detailed_allows_leading_whitespace() { let original = "\n \n---\nname: x\ndescription: y\n---\nbody\n"; diff --git a/src/compile/migration_integration_test.rs b/src/compile/migration_integration_test.rs index 7e086059..e7bb5737 100644 --- a/src/compile/migration_integration_test.rs +++ b/src/compile/migration_integration_test.rs @@ -194,8 +194,16 @@ async fn perform_source_rewrite_lost_update_guard() { std::fs::write(&source_path, "different bytes\n").unwrap(); // Rewrite must refuse. - let result = - perform_source_rewrite_if_needed(&source_path, &original, &parsed).await; + let result = perform_source_rewrite_if_needed( + &source_path, + &original, + &parsed.leading_whitespace, + &parsed.front_matter_mapping, + &parsed.body_raw, + &parsed.source_sha256, + &parsed.migrations, + ) + .await; let err = result.expect_err("expected lost-update guard to fire"); assert!( format!("{:#}", err).contains("changed during compilation"), diff --git a/src/compile/mod.rs b/src/compile/mod.rs index f6e06c6a..3eb7dc55 100644 --- a/src/compile/mod.rs +++ b/src/compile/mod.rs @@ -127,10 +127,11 @@ async fn compile_pipeline_inner( let parsed = common::parse_markdown_detailed_with_registry(&content, registry)?; let mut front_matter = parsed.front_matter; - let markdown_body = parsed.markdown_body.clone(); - let migrations = parsed.migrations.clone(); - let front_matter_mapping = parsed.front_matter_mapping.clone(); - let body_raw = parsed.body_raw.clone(); + let markdown_body = parsed.markdown_body; + let migrations = parsed.migrations; + let front_matter_mapping = parsed.front_matter_mapping; + let leading_whitespace = parsed.leading_whitespace; + let body_raw = parsed.body_raw; let source_sha256 = parsed.source_sha256; // Sanitize all front matter text fields before any further processing. @@ -205,21 +206,16 @@ async fn compile_pipeline_inner( // we abort before the lock file ever gets touched. let mut migrated = false; if migrations.changed() { - // Rebuild the parsed handle (the typed front_matter has been - // sanitized and consumed; rewrite needs the unmodified mapping). - let parsed_for_rewrite = common::ParsedSource { - front_matter: serde_yaml::from_value(serde_yaml::Value::Mapping( - front_matter_mapping.clone(), - )) - .context("Failed to round-trip migrated front matter for rewrite")?, - markdown_body: markdown_body.clone(), - migrations: migrations.clone(), - front_matter_mapping: front_matter_mapping.clone(), - body_raw: body_raw.clone(), - source_sha256, - }; - migrated = perform_source_rewrite_if_needed(input_path, &content, &parsed_for_rewrite) - .await?; + migrated = perform_source_rewrite_if_needed( + input_path, + &content, + &leading_whitespace, + &front_matter_mapping, + &body_raw, + &source_sha256, + &migrations, + ) + .await?; } // Write output via atomic_write so a crash mid-write cannot leave a @@ -263,9 +259,14 @@ async fn compile_pipeline_inner( async fn perform_source_rewrite_if_needed( input_path: &Path, original_content: &str, - parsed: &common::ParsedSource, + leading_whitespace: &str, + front_matter_mapping: &serde_yaml::Mapping, + body_raw: &str, + source_sha256: &[u8; 32], + migrations: &migrations::MigrationReport, ) -> Result { - let new_content = common::reconstruct_source(parsed)?; + let new_content = + common::reconstruct_source(leading_whitespace, front_matter_mapping, body_raw)?; if new_content == original_content { // Migrations ran but their net effect is a no-op on disk — // skip the rewrite to avoid gratuitous diffs. @@ -285,7 +286,7 @@ async fn perform_source_rewrite_if_needed( let mut hasher = sha2::Sha256::new(); hasher.update(¤t_bytes); let current_hash: [u8; 32] = hasher.finalize().into(); - if current_hash != parsed.source_sha256 { + if ¤t_hash != source_sha256 { anyhow::bail!( "source file {} changed during compilation; refusing to migrate. Re-run `ado-aw compile`.", input_path.display() @@ -304,10 +305,10 @@ async fn perform_source_rewrite_if_needed( eprintln!( "warning: migrated front matter in {}: schema-version {} -> {}", input_path.display(), - parsed.migrations.from_version, - parsed.migrations.to_version + migrations.from_version, + migrations.to_version ); - for applied in &parsed.migrations.applied { + for applied in &migrations.applied { eprintln!(" - {}: {}", applied.id, applied.summary); } eprintln!( @@ -519,7 +520,7 @@ pub async fn check_pipeline(pipeline_path: &str) -> Result<()> { // we want to fail loudly so `ado-aw compile` is run. if parsed.migrations.changed() { anyhow::bail!( - "error: {} is at schema-version {}; this compiler requires schema-version {}.\n\ + "{} is at schema-version {}; this compiler requires schema-version {}.\n \ hint: run `ado-aw compile {}` to migrate the source in place.", source_path.display(), parsed.migrations.from_version, diff --git a/tests/migration_tests.rs b/tests/migration_tests.rs index 75d4f2e9..c7deeb2f 100644 --- a/tests/migration_tests.rs +++ b/tests/migration_tests.rs @@ -27,8 +27,6 @@ fn fresh_temp_dir(label: &str) -> PathBuf { "ado-aw-migration-tests-{}-{}-{}", label, std::process::id(), - // Add a thread-local counter to avoid intra-process collisions - // when multiple tests run in parallel. rand_suffix(), )); fs::create_dir_all(&dir).expect("create temp dir"); @@ -45,14 +43,20 @@ fn fresh_git_temp_dir(label: &str) -> PathBuf { dir } -/// Lightweight randomness for test temp dir uniqueness — no crate dep. +/// Suffix that's unique within the process for the lifetime of a +/// single test binary run. Uses a wall-clock nanosecond timestamp +/// combined with a monotonic atomic counter so two parallel tests +/// scheduled in the same nanosecond still get distinct directories. fn rand_suffix() -> String { + use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{SystemTime, UNIX_EPOCH}; + static SEQ: AtomicU64 = AtomicU64::new(0); let nanos = SystemTime::now() .duration_since(UNIX_EPOCH) .map(|d| d.as_nanos()) .unwrap_or(0); - format!("{:x}", nanos) + let seq = SEQ.fetch_add(1, Ordering::Relaxed); + format!("{:x}-{:x}", nanos, seq) } fn ado_aw_binary() -> PathBuf { From b3bdae6ec4509bc138c98234588a4d3334b0243a Mon Sep 17 00:00:00 2001 From: James Devine Date: Fri, 8 May 2026 22:27:54 +0100 Subject: [PATCH 4/8] fix(compile): address second-round PR review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four findings from a fresh PR Reviewer pass: 1. `rename_key` left the mapping partially mutated when `ConflictPolicy::Error` fired: `take_key` removed `old` before the conflict check ran, so the bail observed a half-mutated state. Refactor to be transactional — check both keys + policy first, then mutate. Document the invariant in the doc comment and extend the existing test to assert both keys are intact on error. 2. `set_schema_version` appended the field to the bottom of the mapping (default `Mapping::insert` behavior), so rewritten sources showed `schema-version` at the end of the front-matter block, diverging from the canonical template in `docs/front-matter.md`. Add a `hoist_schema_version` step inside `reconstruct_source` so the serialized output places `schema-version` first while preserving the relative order of the user's other keys. 3. Doc comment in `tests/migration_tests.rs` referenced the wrong path (`migrations/integration_test.rs`); the actual file is `migration_integration_test.rs` directly under `compile/`. 4. The `#[allow(dead_code)]` on `FrontMatter::schema_version` was noted as a code smell. Add an explicit explanatory comment that the field exists solely to satisfy `deny_unknown_fields` and that the authoritative version lives in the untyped mapping (set by the migration runner before typed deserialization). All 1370 tests pass. New test: `reconstruct_source_hoists_schema_version_to_top`. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/compile/common.rs | 54 +++++++++++++++++++++++++++++-- src/compile/migrations/helpers.rs | 46 +++++++++++++++++++------- src/compile/types.rs | 5 +++ tests/migration_tests.rs | 2 +- 4 files changed, 93 insertions(+), 14 deletions(-) diff --git a/src/compile/common.rs b/src/compile/common.rs index f9bd5230..47aaa046 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -209,7 +209,9 @@ pub(crate) fn parse_markdown_detailed_with_registry( /// - `leading_whitespace` (typically empty) /// - `---\n` /// - the migrated YAML mapping (`serde_yaml::to_string` always ends -/// with `\n`) +/// with `\n`); when present, `schema-version` is hoisted to the top +/// of the mapping so the serialized output matches the canonical +/// front-matter template documented in `docs/front-matter.md` /// - `---` /// - the original body region byte-for-byte (`body_raw`) pub fn reconstruct_source( @@ -217,7 +219,7 @@ pub fn reconstruct_source( front_matter_mapping: &serde_yaml::Mapping, body_raw: &str, ) -> Result { - let yaml_serialized = serde_yaml::to_string(front_matter_mapping) + let yaml_serialized = serde_yaml::to_string(&hoist_schema_version(front_matter_mapping)) .context("Failed to serialize migrated front matter")?; Ok(format!( "{}---\n{}---{}", @@ -225,6 +227,28 @@ pub fn reconstruct_source( )) } +/// Return a mapping with `schema-version` (if present) moved to the +/// front. Other keys retain their relative order. Returns the input +/// unchanged when `schema-version` is absent or already first. +fn hoist_schema_version(input: &serde_yaml::Mapping) -> serde_yaml::Mapping { + let key = serde_yaml::Value::String(super::migrations::SCHEMA_VERSION_KEY.to_string()); + let Some(version) = input.get(&key) else { + return input.clone(); + }; + // If schema-version is already the first key, no rebuild needed. + if input.iter().next().map(|(k, _)| k) == Some(&key) { + return input.clone(); + } + let mut hoisted = serde_yaml::Mapping::with_capacity(input.len()); + hoisted.insert(key.clone(), version.clone()); + for (k, v) in input { + if k != &key { + hoisted.insert(k.clone(), v.clone()); + } + } + hoisted +} + fn yaml_value_kind(v: &serde_yaml::Value) -> &'static str { match v { serde_yaml::Value::Null => "null", @@ -2635,6 +2659,32 @@ mod tests { assert!(reconstructed.ends_with("---\nbody\n")); } + #[test] + fn reconstruct_source_hoists_schema_version_to_top() { + let mut mapping = serde_yaml::Mapping::new(); + mapping.insert( + serde_yaml::Value::String("name".into()), + serde_yaml::Value::String("x".into()), + ); + mapping.insert( + serde_yaml::Value::String("description".into()), + serde_yaml::Value::String("y".into()), + ); + mapping.insert( + serde_yaml::Value::String("schema-version".into()), + serde_yaml::Value::Number(serde_yaml::Number::from(2u64)), + ); + let out = reconstruct_source("", &mapping, "\nbody\n").unwrap(); + let lines: Vec<&str> = out.lines().take(3).collect(); + assert_eq!(lines[0], "---"); + assert_eq!( + lines[1], "schema-version: 2", + "schema-version should appear first in the front-matter block, got: {:?}", + lines + ); + assert_eq!(lines[2], "name: x"); + } + #[test] fn parse_markdown_detailed_allows_leading_whitespace() { let original = "\n \n---\nname: x\ndescription: y\n---\nbody\n"; diff --git a/src/compile/migrations/helpers.rs b/src/compile/migrations/helpers.rs index 20987346..74c5803f 100644 --- a/src/compile/migrations/helpers.rs +++ b/src/compile/migrations/helpers.rs @@ -50,6 +50,11 @@ pub fn insert_no_overwrite( /// /// Returns `Ok(true)` when the rename happened (regardless of policy /// branch), `Ok(false)` when `old` was absent (no-op). +/// +/// The mapping is left **unchanged** on the error path. Callers can +/// rely on this invariant when chaining migrations: a failed rename +/// won't leave the mapping in a half-mutated state for the next call +/// to inspect. #[allow(dead_code)] pub fn rename_key( m: &mut Mapping, @@ -57,26 +62,33 @@ pub fn rename_key( new: &str, policy: ConflictPolicy, ) -> Result { - let Some(old_value) = take_key(m, old) else { + let old_key = Value::String(old.to_string()); + let new_key = Value::String(new.to_string()); + + if !m.contains_key(&old_key) { return Ok(false); - }; - let new_present = m.contains_key(Value::String(new.to_string())); + } + let new_present = m.contains_key(&new_key); + match (new_present, policy) { - (false, _) => { - m.insert(Value::String(new.to_string()), old_value); - Ok(true) - } (true, ConflictPolicy::Error) => { + // Surface the conflict without mutating the mapping. bail!( "refusing to rename `{}` -> `{}`: destination key already exists \ - (set both old and new keys at once is ambiguous)", + (setting both old and new keys at once is ambiguous)", old, new ); } - (true, ConflictPolicy::PreferNew) => Ok(true), - (true, ConflictPolicy::PreferOld) => { - m.insert(Value::String(new.to_string()), old_value); + (false, _) | (true, ConflictPolicy::PreferOld) => { + // Move old -> new, replacing any existing new value. + let old_value = m.remove(&old_key).expect("old key checked above"); + m.insert(new_key, old_value); + Ok(true) + } + (true, ConflictPolicy::PreferNew) => { + // Drop old, keep new. + m.remove(&old_key); Ok(true) } } @@ -168,6 +180,18 @@ mod tests { "unexpected error message: {}", err ); + // Both keys must remain intact — the helper guarantees the + // mapping is unchanged on the error path. + assert_eq!( + m.get(Value::String("old".into())), + Some(&Value::String("v_old".into())), + "old key must be preserved on error" + ); + assert_eq!( + m.get(Value::String("new".into())), + Some(&Value::String("v_new".into())), + "new key must be preserved on error" + ); } #[test] diff --git a/src/compile/types.rs b/src/compile/types.rs index 24614573..a5730283 100644 --- a/src/compile/types.rs +++ b/src/compile/types.rs @@ -593,6 +593,11 @@ pub struct FrontMatter { /// to 1. The compiler bumps this in place when migrations apply /// during compilation; users typically don't write it by hand. /// See [`crate::compile::migrations`] for the migration framework. + /// + /// Field is intentionally read-only on the typed view: it exists + /// solely so `deny_unknown_fields` accepts a stamped source. The + /// authoritative version lives in the untyped front-matter mapping + /// and is set by the migration runner before typed deserialization. #[serde(default = "default_schema_version", rename = "schema-version")] #[allow(dead_code)] pub schema_version: u32, diff --git a/tests/migration_tests.rs b/tests/migration_tests.rs index c7deeb2f..be21e11f 100644 --- a/tests/migration_tests.rs +++ b/tests/migration_tests.rs @@ -7,7 +7,7 @@ //! //! The migration registry shipped with this binary is intentionally //! empty (`CURRENT_SCHEMA_VERSION == 1`); the rewrite path is exercised -//! by the white-box tests in `src/compile/migrations/integration_test.rs`, +//! by the white-box tests in `src/compile/migration_integration_test.rs`, //! which can inject a stub registry. These tests cover the user-facing //! CLI behaviors that don't require migration registration: //! From c8cec49836b35a489c51a0de06aedbe0926e7c2b Mon Sep 17 00:00:00 2001 From: James Devine Date: Fri, 8 May 2026 22:51:47 +0100 Subject: [PATCH 5/8] refactor(compile): replace versioned migrations with detection-based codemods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aligns with gh-aw's codemod model. The previous design (versioned chain + `schema-version` field on user front matter) leaked an internal compiler concern into user files. gh-aw — the project we explicitly say we're inspired by in AGENTS.md — uses 41 codemods with no version stamping; users never see a `schema-version` field. This refactor brings ado-aw to the same model while keeping our ergonomic compile-time autorewrite UX (gh-aw uses a separate `gh aw fix --write` command, we don't). Changes: - Drop FrontMatter.schema_version field, default_schema_version fn, and the SCHEMA_VERSION_KEY const. - Rename src/compile/migrations/ to src/compile/codemods/. Rename src/compile/migration_integration_test.rs to src/compile/codemod_integration_test.rs. Rename tests/migration_tests.rs to tests/codemod_tests.rs. Rename docs/migrations.md to docs/codemods.md (rewritten from scratch). - Replace Migration / MigrationContext / MigrationReport / AppliedMigration / MIGRATIONS with Codemod / CodemodContext / CodemodReport / AppliedCodemod / CODEMODS. - Codemod.apply signature changes from `fn(&mut Mapping, &Ctx) -> Result<()>` to `fn(&mut Mapping, &Ctx) -> Result` so each codemod reports whether it actually modified the mapping. - Simplify the runner: flat per-codemod loop, no version arithmetic, no contiguity checks, no overflow guards, no future-version rejection. - Drop hoist_schema_version from reconstruct_source — no schema-version field to hoist. - Update parse_markdown_detailed, compile_pipeline_inner, perform_source_rewrite_if_needed, check_pipeline, run_execute, and compile_all_pipelines to use the codemod types and reword their messages. - Helpers (take_key, insert_no_overwrite, rename_key, ConflictPolicy) are unchanged — they're as useful for codemods as they were for migrations. rename_key already returned Result, matching the new codemod contract. - Drop ~13 schema-version-specific tests (registry contiguity, future version, non-integer, etc.). Add 7 codemod-runner tests (empty registry, all-no-op, mixed, error context, idempotency). Black-box CLI tests reduced from 10 to 3 (deleted tests asserted on schema-version-specific behavior that no longer exists). PR #476 has not merged; this lands as a follow-up commit on the same branch so users never see the schema-version concept ship. All 1349 tests pass. PR title and body to be updated separately. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- AGENTS.md | 13 +- docs/codemods.md | 372 +++++++++++ docs/extending.md | 4 +- docs/front-matter.md | 1 - docs/migrations.md | 481 --------------- ...on_test.rs => codemod_integration_test.rs} | 126 ++-- .../{migrations => codemods}/helpers.rs | 0 src/compile/codemods/mod.rs | 332 ++++++++++ src/compile/common.rs | 111 +--- src/compile/migrations/mod.rs | 580 ------------------ src/compile/mod.rs | 87 ++- src/compile/types.rs | 51 -- src/main.rs | 12 +- tests/codemod_tests.rs | 182 ++++++ tests/migration_tests.rs | 386 ------------ 15 files changed, 1021 insertions(+), 1717 deletions(-) create mode 100644 docs/codemods.md delete mode 100644 docs/migrations.md rename src/compile/{migration_integration_test.rs => codemod_integration_test.rs} (57%) rename src/compile/{migrations => codemods}/helpers.rs (100%) create mode 100644 src/compile/codemods/mod.rs delete mode 100644 src/compile/migrations/mod.rs create mode 100644 tests/codemod_tests.rs delete mode 100644 tests/migration_tests.rs diff --git a/AGENTS.md b/AGENTS.md index 3070a06b..b02514f5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -61,10 +61,10 @@ Every compiled pipeline runs as three sequential jobs: │ │ │ ├── safe_outputs.rs # Always-on SafeOutputs MCP extension │ │ │ ├── trigger_filters.rs # Trigger filter extension (gate evaluator delivery) │ │ │ └── tests.rs # Extension integration tests -│ │ ├── migrations/ # Front-matter migrations (one file per version step) -│ │ │ ├── mod.rs # Migration struct, MIGRATIONS registry, runner, CURRENT_SCHEMA_VERSION +│ │ ├── codemods/ # Front-matter codemods (one file per transformation) +│ │ │ ├── mod.rs # Codemod struct, CODEMODS registry, runner │ │ │ └── helpers.rs # take_key, insert_no_overwrite, rename_key, ConflictPolicy -│ │ ├── migration_integration_test.rs # White-box rewrite-path tests (stub registry injection) +│ │ ├── codemod_integration_test.rs # White-box rewrite-path tests (stub registry injection) │ │ └── types.rs # Front matter grammar and types │ ├── init.rs # Repository initialization for AI-first authoring │ ├── execute.rs # Stage 3 safe output execution @@ -196,9 +196,10 @@ index to jump to the right page. - [`docs/filter-ir.md`](docs/filter-ir.md) — filter expression IR specification: `Fact`/`Predicate` types, three-pass compilation (lower → validate → codegen), gate step generation, adding new filter types. -- [`docs/migrations.md`](docs/migrations.md) — front-matter migration - framework: schema-version field, automatic source rewrite on - breaking-change updates, contributor workflow for adding migrations. +- [`docs/codemods.md`](docs/codemods.md) — front-matter codemod + framework: detection-based transformations, automatic source + rewrite on breaking-change updates, contributor workflow for + adding codemods. - [`docs/local-development.md`](docs/local-development.md) — local development setup notes. diff --git a/docs/codemods.md b/docs/codemods.md new file mode 100644 index 00000000..edae8456 --- /dev/null +++ b/docs/codemods.md @@ -0,0 +1,372 @@ +# Front-matter Codemods + +_Part of the [ado-aw documentation](../AGENTS.md)._ + +The `ado-aw` compiler keeps user front matter forward-compatible +through **codemods**: small, detection-based transformations that +rewrite deprecated front-matter shapes to the current shape during +`compile`. The model is borrowed from +[gh-aw's codemod registry](https://github.com/github/gh-aw/blob/main/pkg/cli/fix_codemods.go). + +This page is the reference for both users (what codemods mean for me) +and contributors (how to add one). + +## How it works + +### No version field on user files + +Codemods don't stamp anything onto user front matter. There is no +`schema-version` field. Each codemod inspects the mapping, decides +whether the input contains the deprecated shape it knows about, and +either rewrites it or returns "no-op". The whole registry runs on +every compile; codemods that don't match are essentially free. + +### The compile flow + +1. `ado-aw compile` reads the source `.md` and parses the front + matter as an **untyped** `serde_yaml::Mapping`. This step never + trips on removed/renamed fields. +2. The runner walks the registered codemod registry in order. Each + codemod returns `Ok(true)` if it modified the mapping, `Ok(false)` + if it didn't, or `Err` to abort the whole compile. +3. The compiler runs all the usual validation and codegen against + the rewritten, typed `FrontMatter`. +4. **Only on a fully successful compile** AND **only if at least one + codemod fired**, the source `.md` is atomically rewritten and a + warning prints to stderr. A failed compile leaves the source + untouched. +5. The `.lock.yml` is written atomically last. + +### What gets preserved on rewrite + +- **Body markdown** is preserved byte-for-byte (everything after + the closing `---`). +- **Leading whitespace** before the opening `---` is preserved + byte-for-byte (BOM-strippers and editor blank lines). +- **Front-matter key order** is preserved by `serde_yaml`'s + insertion-ordered mapping. +- **Front-matter comments** are NOT preserved. `serde_yaml` + round-trip drops them. The warning emitted on rewrite calls this + out so it isn't a surprise. If you have important context in a + front-matter comment, move it into the markdown body before + running compile. +- **Quote and scalar styles** in YAML may be normalized. This is + cosmetic. + +### Atomicity and lost-update protection + +The rewrite uses `tempfile + rename` for atomicity (no torn writes). +Before the rename, the runner re-reads the source and compares its +SHA-256 to the snapshot taken at parse time. If the file changed +between parse and rewrite, the runner aborts with a clear error +("source file ... changed during compilation; refusing to apply +codemods") rather than clobbering whoever wrote the file. + +### `check` command behavior + +`ado-aw check` exits non-zero when codemods would fire — there is no +opt-in flag and no warning-only mode. Rationale: compiled pipelines +download the **same** `ado-aw` version that produced them +(`src/data/base.yml`, `src/data/1es-base.yml`), so the in-pipeline +integrity check is internally consistent by construction. The only +time `check` sees pending codemods is when a developer runs a newer +`ado-aw` locally against an older source — exactly when we want to +fail loudly. The fix is `ado-aw compile`, which applies the codemods +in place. + +### `execute` command behavior + +The Stage 3 executor runs codemods in memory only. It never rewrites +the source (the executor's working tree is not the source-of-truth +tree). When at least one codemod would fire, it logs a warning and +continues. + +## Adding a codemod + +You need a codemod whenever you introduce a breaking change to the +front-matter grammar: + +- Renaming a field +- Removing a field +- Changing a field's type or shape +- Adding a required field that didn't exist before +- Changing the meaning of an existing field + +Non-breaking changes (adding an optional field, accepting a new +variant) do **not** need a codemod. + +### File layout + +Codemods live in `src/compile/codemods/`: + +``` +src/compile/codemods/ +├── mod.rs # Framework + CODEMODS registry +├── helpers.rs # take_key, insert_no_overwrite, rename_key, ConflictPolicy +├── 0001_engine_id_split.rs +├── 0002_permissions_field.rs +└── 0003_safeoutput_renames.rs +``` + +The filename prefix is a zero-padded sequence number (``). It +sorts files naturally in directory listings; the **registry order** +in `mod.rs` is what determines runtime order, not the filename. + +### Anatomy of a codemod + +```rust +// src/compile/codemods/0001_engine_id_split.rs + +use anyhow::{bail, Result}; +use serde_yaml::{Mapping, Value}; + +use super::{Codemod, CodemodContext}; + +pub static CODEMOD: Codemod = Codemod { + id: "engine_id_split", + summary: "engine: -> engine: { id: copilot, model: }", + introduced_in: "0.27.0", + apply: apply_codemod, +}; + +fn apply_codemod(fm: &mut Mapping, _ctx: &CodemodContext) -> Result { + // ... your detection-based transformation ... + // Return Ok(true) if the mapping was modified, Ok(false) if the + // shape didn't match (no-op), or Err for a hard failure. +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn rewrites_legacy_shape() { /* before / after fixture */ } + + #[test] + fn already_current_shape_is_noop() { /* defensive */ } + + #[test] + fn missing_field_is_noop() { /* defensive */ } + + #[test] + fn unexpected_shape_is_rejected() { /* hard error */ } +} +``` + +### Registry append + +Two edits in `src/compile/codemods/mod.rs`: + +```rust +mod m0001_engine_id_split; // <-- add module declaration + +pub static CODEMODS: &[&'static Codemod] = &[ + &m0001_engine_id_split::CODEMOD, // <-- append at the end +]; +``` + +Tests in `mod.rs` enforce that codemod ids are unique and that the +file count matches the registry length. A malformed registry fails +fast at runtime via `with_context`. + +### Author contract (invariants) + +Every codemod must satisfy these properties. We enforce them via +review + per-codemod tests: + +1. **Idempotent.** Running twice produces the same final state as + running once. The runner re-runs the entire registry every + compile — codemods that no longer apply must be no-ops. +2. **Detection-based.** Returns `Ok(false)` and leaves the mapping + untouched when the input does not contain the targeted shape. + Never modifies a mapping that's already current. +3. **Conflict-aware.** When both old and new shapes coexist in the + same source, error with a "manual migration required" message + rather than guess. Use `helpers::rename_key` with + `ConflictPolicy::Error` (the default) to get this for free. +4. **Pure.** No I/O, no env, no time/randomness. (Convention; not + type-enforced.) +5. **Mapping-only.** Cannot inspect the markdown body, the file + path, the lock file, or git state. +6. **Order-aware.** If codemod B depends on shapes produced by + codemod A, A must precede B in the registry. Document the + ordering requirement in B's doc comment. + +### Use the helpers + +Codemods should prefer `helpers::*` over raw `Mapping` manipulation: + +- `take_key(map, "old")` — remove and return. +- `insert_no_overwrite(map, "new", value)` — error on conflict. +- `rename_key(map, "old", "new", ConflictPolicy::Error)` — default + policy is **error**, never silent overwrite. The helper is + transactional: on the error path the mapping is unchanged. + +`rename_key` returns `Result` directly, so it composes with a +codemod's return value: + +```rust +fn apply_codemod(fm: &mut Mapping, _ctx: &CodemodContext) -> Result { + rename_key(fm, "old-key", "new-key", ConflictPolicy::Error) +} +``` + +### What if my change can't be expressed as a Mapping rewrite? + +The codemod `apply` function receives only the front-matter mapping. +It cannot inspect the markdown body, the file path, the lock file, +or git state. If your change requires that information, do not +write a codemod that guesses. Instead, return an `Err` with an +actionable "manual migration required: " message so +the user knows exactly what to fix. + +## Worked example: `engine_id_split` + +This is the codemod that would have caught the `0.17.0` breaking +change (`engine: ` → `engine: { id: copilot, model: }`). +Drop-in template for your own codemod. + +```rust +// src/compile/codemods/0001_engine_id_split.rs + +//! engine: -> engine: { id: copilot, model: } +//! +//! Before 0.17.0 the `engine` field was a model name (e.g. +//! `engine: claude-opus-4.5`). The grammar changed to use engine +//! identifiers (`engine: copilot`), with the model nested in an +//! object form (`engine: { id: copilot, model: }`). + +use anyhow::{bail, Result}; +use serde_yaml::{Mapping, Value}; + +use super::{Codemod, CodemodContext}; + +pub static CODEMOD: Codemod = Codemod { + id: "engine_id_split", + summary: "engine: -> engine: { id: copilot, model: }", + introduced_in: "0.27.0", + apply: apply_codemod, +}; + +/// Engine identifiers that are valid as the simple-form string. When +/// `engine` is a string equal to one of these, the source is already +/// using the current grammar and we leave it alone. +const KNOWN_ENGINE_IDS: &[&str] = &["copilot"]; + +fn apply_codemod(fm: &mut Mapping, _ctx: &CodemodContext) -> Result { + let key = Value::String("engine".to_string()); + + let Some(engine) = fm.get(&key) else { + // No engine field at all — relies on default (copilot) in + // both old and new grammars. Nothing to do. + return Ok(false); + }; + + match engine { + // Already the object form: nothing to migrate. + Value::Mapping(_) => Ok(false), + + // Simple-form string with a known engine identifier + // (e.g. `copilot`): already current. No-op. + Value::String(s) if KNOWN_ENGINE_IDS.contains(&s.as_str()) => Ok(false), + + // Simple-form string that is NOT a known engine identifier: + // it's a *model name* from the old grammar. Wrap in object + // form. + Value::String(s) => { + let model = s.clone(); + let mut object = Mapping::new(); + object.insert( + Value::String("id".to_string()), + Value::String("copilot".to_string()), + ); + object.insert( + Value::String("model".to_string()), + Value::String(model), + ); + fm.insert(key, Value::Mapping(object)); + Ok(true) + } + + // Unexpected shape (number, bool, sequence, …). Refuse rather + // than guess — the user needs to fix this by hand. + other => bail!( + "engine field has unexpected shape (expected string or mapping, \ + got {}); manual migration required", + describe(other) + ), + } +} + +fn describe(v: &Value) -> &'static str { + match v { + Value::Null => "null", + Value::Bool(_) => "bool", + Value::Number(_) => "number", + Value::String(_) => "string", + Value::Sequence(_) => "sequence", + Value::Mapping(_) => "mapping", + Value::Tagged(_) => "tagged", + } +} +``` + +### What this example illustrates + +1. **Detection-first.** Each match arm decides whether the input + matches the codemod's target shape. Three of the four arms + return `Ok(false)` — those are the cases where the source is + already current. +2. **Idempotent by construction.** Once the mapping is in object + form, the first match arm fires and returns `Ok(false)`. The + codemod can run on every compile without harm. +3. **Conflict-aware.** The `Value::Mapping(_)` arm is the + "already-migrated" case. We never overwrite an existing object + form. The `bail!` on unexpected shapes is the manual-migration + escape hatch. +4. **Two-line registry append:** + + ```rust + mod m0001_engine_id_split; + + pub static CODEMODS: &[&'static Codemod] = &[ + &m0001_engine_id_split::CODEMOD, + ]; + ``` + + That's it. The registry-uniqueness and filename-prefix tests + keep passing. + +## Tests + +The codemod framework is covered by three layers of tests: + +- **Unit tests** in `src/compile/codemods/{mod.rs,helpers.rs}` + cover registry health, helper edge cases, and (for shipped + codemods) individual `apply` functions. +- **White-box integration tests** in + `src/compile/codemod_integration_test.rs` exercise the rewrite + path end-to-end (parse → codemods → compile → atomic source + rewrite → lock-file write) using a stub codemod registry + injected via the crate-private `compile_pipeline_with_registry` + and `parse_markdown_detailed_with_registry` hooks. They live + inside `src/` because the production registry is empty and + integration tests in `tests/` cannot link against crate + internals. +- **Black-box CLI tests** in `tests/codemod_tests.rs` spawn the + compiled `ado-aw` binary as a subprocess and assert on the + user-visible behavior of `compile` and `check`. + +When you add a real codemod, ship the per-codemod before/after +fixtures alongside it in the codemod's own file +(`src/compile/codemods/_.rs`). + +## See also + +- [`docs/front-matter.md`](front-matter.md) — the full + front-matter grammar. +- [`docs/extending.md`](extending.md) — broader guidance for + adding features to the compiler, including the requirement to + add a codemod alongside any breaking front-matter change. +- [gh-aw's `pkg/cli/fix_codemods.go`](https://github.com/github/gh-aw/blob/main/pkg/cli/fix_codemods.go) — + the upstream codemod model. diff --git a/docs/extending.md b/docs/extending.md index bed8e450..1d679d10 100644 --- a/docs/extending.md +++ b/docs/extending.md @@ -10,8 +10,8 @@ When extending the compiler: 2. **New compile targets**: Implement the `Compiler` trait in a new file under `src/compile/` 3. **New front matter fields**: Add fields to `FrontMatter` in `src/compile/types.rs` - **Breaking changes** (renames, removals, type changes, added required fields) - require adding a migration under `src/compile/migrations/` in the same PR. - See [`docs/migrations.md`](migrations.md). + require adding a codemod under `src/compile/codemods/` in the same PR. + See [`docs/codemods.md`](codemods.md). 4. **New template markers**: Handle replacements in the target-specific compiler (e.g., `standalone.rs` or `onees.rs`) 5. **New safe-output tools**: Add to `src/safeoutputs/` — implement `ToolResult`, `Executor`, register in `mod.rs`, `mcp.rs`, `execute.rs` 6. **New first-class tools**: Create `src/tools//` with `mod.rs` and `extension.rs` (CompilerExtension impl). Add `execute.rs` if the tool has Stage 3 runtime logic. Extend `ToolsConfig` in `types.rs`, add collection in `collect_extensions()` diff --git a/docs/front-matter.md b/docs/front-matter.md index b1a383e3..cdcf9eb0 100644 --- a/docs/front-matter.md +++ b/docs/front-matter.md @@ -8,7 +8,6 @@ The compiler expects markdown files with YAML front matter similar to gh-aw: ```markdown --- -schema-version: 1 # Optional: front-matter schema version (defaults to 1). The compiler bumps this in place when migrations apply during compilation. See docs/migrations.md. name: "name for this agent" description: "One line description for this agent" target: standalone # Optional: "standalone" (default) or "1es". See docs/targets.md. diff --git a/docs/migrations.md b/docs/migrations.md deleted file mode 100644 index 4045cdaa..00000000 --- a/docs/migrations.md +++ /dev/null @@ -1,481 +0,0 @@ -# Front-matter Migrations - -_Part of the [ado-aw documentation](../AGENTS.md)._ - -The `ado-aw` compiler maintains a versioned schema for the front-matter -grammar. When a breaking change is introduced (a renamed field, a removed -field, a type change), the compiler ships a **migration** that rewrites -older sources to the new shape automatically. Users see a warning during -compilation; their source files are updated in place; the build moves on. - -This page is the reference for both users (what migrations mean for me) -and contributors (how to add one). - -## How it works - -### `schema-version` field - -Every front-matter file carries an optional `schema-version: ` -field. When the field is missing, it defaults to `1`. The compiler bumps -this in place after running migrations. - -```yaml ---- -schema-version: 1 -name: my-agent -description: my agent ---- -``` - -End users typically don't write this field by hand. The compiler keeps -it accurate when migrations apply. - -### The compile flow - -1. `ado-aw compile` reads the source `.md` and parses the front matter - as an **untyped** `serde_yaml::Mapping`. This step never trips on - removed/renamed fields. -2. The migration runner walks the registered chain from the source's - `schema-version` up to the compiler's current version, applying each - migration's transformation in order. -3. The compiler runs all the usual validation and codegen against the - migrated, typed `FrontMatter`. -4. **Only on a fully successful compile**, the source `.md` is - atomically rewritten with the migrated front matter and a warning - prints to stderr. A failed compile leaves the source untouched. -5. The `.lock.yml` is written atomically last. - -### What gets preserved on rewrite - -- **Body markdown** is preserved byte-for-byte (everything after the - closing `---`). -- **Front-matter key order** is preserved by `serde_yaml`'s - insertion-ordered mapping. -- **Front-matter comments** are NOT preserved. `serde_yaml` round-trip - drops them. The warning emitted on rewrite calls this out so it isn't - a surprise. If you have important context in a front-matter comment, - move it into the markdown body before running compile. -- **Quote and scalar styles** in YAML may be normalized. This is - cosmetic. - -### Atomicity and lost-update protection - -The rewrite uses `tempfile + rename` for atomicity (no torn writes). -Before the rename, the runner re-reads the source and compares its -SHA-256 to the snapshot taken at parse time. If the file changed -between parse and rewrite, the runner aborts with a clear error -("source file ... changed during compilation; refusing to migrate") -rather than clobbering whoever wrote the file. - -### `check` command behavior - -`ado-aw check` exits non-zero when migrations are pending — there is no -opt-in flag and no warning-only mode. Rationale: compiled pipelines -download the **same** `ado-aw` version that produced them -(`src/data/base.yml`, `src/data/1es-base.yml`), so the in-pipeline -integrity check is internally consistent by construction. The only time -`check` sees a pending migration is when a developer runs a newer -`ado-aw` locally against an older source — exactly when we want to -fail loudly. The fix is `ado-aw compile`, which migrates the source in -place. - -### `execute` command behavior - -The Stage 3 executor runs migrations in memory only. It never rewrites -the source (the executor's working tree is not the source-of-truth -tree). When a migration applies, it logs a warning and continues. - -## Adding a migration - -You need a migration whenever you introduce a breaking change to the -front-matter grammar: - -- Renaming a field -- Removing a field -- Changing a field's type or shape -- Adding a required field that didn't exist before -- Changing the meaning of an existing field - -Non-breaking changes (adding an optional field, accepting a new -variant) do **not** need a migration. - -### File layout - -Migrations live in `src/compile/migrations/`: - -``` -src/compile/migrations/ -├── mod.rs # Framework + MIGRATIONS registry -├── helpers.rs # take_key, insert_no_overwrite, rename_key, ConflictPolicy -├── 0001_engine_id_split.rs -├── 0002_permissions_field.rs -└── 0003_safeoutput_renames.rs -``` - -The filename prefix is the zero-padded `from_version`. Files sort -naturally in the directory listing in chain order. - -### Anatomy of a migration - -```rust -// src/compile/migrations/0001_engine_id_split.rs - -use anyhow::Result; -use serde_yaml::Mapping; - -use super::{ConflictPolicy, Migration, MigrationContext}; - -pub static MIGRATION: Migration = Migration { - from_version: 1, - to_version: 2, - id: "engine_id_split", - summary: "engine: -> engine: { id: copilot, model: }", - introduced_in: "0.27.0", - apply: apply_migration, -}; - -fn apply_migration(fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { - // ... your transformation ... - Ok(()) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn upgrades_engine_string_to_object() { - // before / after fixture pair - } - - #[test] - fn already_current_shape_is_preserved() { - // defensive: every from_version=1 migration runs on every - // unstamped source in the wild, including ones that were - // authored *yesterday* against the current shape. - } -} -``` - -### Registry append - -Two edits in `src/compile/migrations/mod.rs`: - -```rust -mod m0001_engine_id_split; // <-- add module declaration - -pub static MIGRATIONS: &[&'static Migration] = &[ - &m0001_engine_id_split::MIGRATION, // <-- append at the end -]; -``` - -`CURRENT_SCHEMA_VERSION` is computed automatically from the registry -length. Tests in `mod.rs` enforce that the registry is contiguous, that -migration ids are unique, and that filenames match `from_version`. A -malformed registry fails fast at runtime via `ensure!()`. - -### Use the helpers - -Migrations should prefer `helpers::*` over raw `Mapping` manipulation: - -- `take_key(map, "old")` — remove and return. -- `insert_no_overwrite(map, "new", value)` — error on conflict. -- `rename_key(map, "old", "new", ConflictPolicy::Error)` — default - policy is **error**, never silent overwrite. - -Silent overwrite is almost always a bug when transforming user data. - -### Defensive predicates for `from_version: 1` - -Because the `schema-version` field doesn't exist in any source written -before this framework shipped, every existing file looks like v1 — -including files that were authored yesterday against the current -shape. Migrations from `from_version: 1` must be **shape-detecting and -conflict-aware**: - -- If both old and new keys are present → error rather than silent - overwrite (default `ConflictPolicy::Error`). -- If the old key has the new shape already → no-op (don't migrate). - -Once we ship v2, this concern goes away for v2 → v3 etc. — v2 sources -have an explicit `schema-version: 2` stamp, so the framework only runs -migrations against actual v2 documents. - -### Concurrent PRs - -If two PRs each add a migration `N → N+1`, the second one to merge -must rebase. The rebase is mechanical: - -1. Renumber the file prefix: `0003_…rs` → `0004_…rs`. -2. Bump `from_version` and `to_version` in the static. -3. Update any per-migration test fixtures that stamped the old - version. -4. Re-append your migration to `MIGRATIONS` after the freshly merged - one. - -The contiguity tests fail loudly if a rebase leaves a gap, so this is -hard to get wrong silently. - -### What if my change can't be expressed as a `Mapping` rewrite? - -The migration `apply` function receives only the front-matter mapping. -It cannot inspect the markdown body, the file path, the lock file, or -git state. If your change requires that information, do not write a -migration that guesses. Instead, add a migration that **errors** with -an actionable "manual migration required: " message so -the user knows exactly what to fix. - -## Worked example: `engine_id_split` - -This is the migration that would have caught the `0.17.0` breaking -change (`engine: ` → `engine: { id: copilot, model: }`). -It's a complete file you could drop into `src/compile/migrations/` -and register today. Read it as a template for your own migration. - -```rust -// src/compile/migrations/0001_engine_id_split.rs - -//! engine: -> engine: { id: copilot, model: } -//! -//! Before 0.17.0 the `engine` field was a model name (e.g. -//! `engine: claude-opus-4.5`). The grammar changed to use engine -//! identifiers (`engine: copilot`), with the model nested in an object -//! form (`engine: { id: copilot, model: }`). -//! -//! This migration detects the old shape and rewrites it. - -use anyhow::{bail, Result}; -use serde_yaml::{Mapping, Value}; - -use super::{Migration, MigrationContext}; - -pub static MIGRATION: Migration = Migration { - from_version: 1, - to_version: 2, - id: "engine_id_split", - summary: "engine: -> engine: { id: copilot, model: }", - introduced_in: "0.27.0", - apply: apply_migration, -}; - -/// Engine identifiers that are valid as the simple-form string. When -/// `engine` is a string equal to one of these, the source is already -/// using the current grammar and we leave it alone. -const KNOWN_ENGINE_IDS: &[&str] = &["copilot"]; - -fn apply_migration(fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { - let key = Value::String("engine".to_string()); - - // No `engine` field at all — the front matter relies on the - // default. Default is `copilot` in both old and new grammars, so - // there's nothing to migrate. - let Some(engine) = fm.get(&key) else { - return Ok(()); - }; - - match engine { - // Already-object form. Either authored against the new - // grammar from day one, or migrated by an earlier run. No-op. - Value::Mapping(_) => Ok(()), - - // String form. Two cases: - // - The string is a known engine identifier (`copilot`): - // the source is already using the current simple form. - // No-op. - // - Anything else: the string is a *model name* from the - // old grammar. Wrap it in the object form. - Value::String(s) => { - if KNOWN_ENGINE_IDS.contains(&s.as_str()) { - return Ok(()); - } - let model = s.clone(); - let mut object = Mapping::new(); - object.insert( - Value::String("id".to_string()), - Value::String("copilot".to_string()), - ); - object.insert( - Value::String("model".to_string()), - Value::String(model), - ); - fm.insert(key, Value::Mapping(object)); - Ok(()) - } - - // Unexpected shape (number, bool, sequence, …). Refuse rather - // than guess — the user needs to fix this by hand. - other => bail!( - "engine field has unexpected shape (expected string or mapping, \ - got {}); manual migration required", - describe(other) - ), - } -} - -fn describe(v: &Value) -> &'static str { - match v { - Value::Null => "null", - Value::Bool(_) => "bool", - Value::Number(_) => "number", - Value::String(_) => "string", - Value::Sequence(_) => "sequence", - Value::Mapping(_) => "mapping", - Value::Tagged(_) => "tagged", - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn run(input: &str) -> Mapping { - let mut m: Mapping = serde_yaml::from_str(input).unwrap(); - apply_migration(&mut m, &MigrationContext {}).expect("apply"); - m - } - - fn run_err(input: &str) -> String { - let mut m: Mapping = serde_yaml::from_str(input).unwrap(); - format!( - "{}", - apply_migration(&mut m, &MigrationContext {}).unwrap_err() - ) - } - - #[test] - fn rewrites_legacy_model_string_into_object_form() { - let after = run("name: x\nengine: claude-opus-4.5\n"); - let engine = after - .get(Value::String("engine".into())) - .unwrap() - .as_mapping() - .expect("engine should now be a mapping"); - assert_eq!( - engine.get(Value::String("id".into())), - Some(&Value::String("copilot".into())) - ); - assert_eq!( - engine.get(Value::String("model".into())), - Some(&Value::String("claude-opus-4.5".into())) - ); - } - - #[test] - fn already_current_simple_form_is_noop() { - // Defensive: every from_version=1 migration must be safe on - // sources that look like they're already current. Here the - // user explicitly wrote the new simple form `engine: copilot`. - let after = run("name: x\nengine: copilot\n"); - assert_eq!( - after.get(Value::String("engine".into())), - Some(&Value::String("copilot".into())), - "string `copilot` should be left untouched" - ); - } - - #[test] - fn already_object_form_is_noop() { - let input = "name: x\nengine:\n id: copilot\n model: claude-opus-4.7\n"; - let mut original: Mapping = serde_yaml::from_str(input).unwrap(); - let after = run(input); - assert_eq!( - after.get(Value::String("engine".into())), - original - .remove(Value::String("engine".into())) - .as_ref() - ); - } - - #[test] - fn missing_engine_field_is_noop() { - let after = run("name: x\ndescription: y\n"); - assert!(!after.contains_key(Value::String("engine".into()))); - } - - #[test] - fn unexpected_engine_shape_is_rejected() { - let err = run_err("name: x\nengine: 42\n"); - assert!( - err.contains("manual migration required"), - "expected actionable error, got: {}", - err - ); - } -} -``` - -### What this example illustrates - -1. **The data model does the work.** `Migration` is a static — - every field is set once at the top of the file. The runner reads - `from_version`/`to_version` to walk the chain. `apply` is a plain - `fn` pointer, so the registry is `&'static [&'static Migration]` - — zero allocation, zero indirection beyond the function call. - -2. **`Value` pattern-matching beats `if let` ladders.** The - `match engine { Mapping(_) => …, String(s) => …, other => … }` - shape is the natural way to express "I support these shapes; - reject others." It catches numeric/boolean/null engines that - someone might have written by mistake. - -3. **Defensive predicates for `from_version: 1`.** Three of the five - tests exercise no-op cases (already-object, already-simple - `copilot`, missing field). These matter because **every existing - `.md` file in the wild looks like v1** — they don't carry - `schema-version`. The migration runs on all of them, so it has - to be safe on already-current shapes. Once we ship v2 the - defensiveness requirement drops for v2 → v3 migrations: v2 - sources have an explicit stamp. - -4. **Hard error on unexpected shapes.** The `other => bail!(…)` - arm is the "manual migration required" escape hatch. We don't - try to guess what `engine: 42` means — we surface a clear error - so the user fixes it by hand. - -5. **Registering it is two lines** in `src/compile/migrations/mod.rs`: - - ```rust - mod m0001_engine_id_split; - - pub static MIGRATIONS: &[&'static Migration] = &[ - &m0001_engine_id_split::MIGRATION, - ]; - ``` - - `CURRENT_SCHEMA_VERSION` automatically becomes 2. The - registry-contiguity test, the filename-prefix test, and the - id-uniqueness test all keep passing. - -## Tests - -The migration framework is covered by three layers of tests: - -- **Unit tests** in `src/compile/migrations/{mod.rs,helpers.rs}` - cover registry contiguity, helper edge cases, and individual - migration apply functions. -- **White-box integration tests** in - `src/compile/migration_integration_test.rs` exercise the rewrite - path end-to-end (parse → migrate → compile → atomic source rewrite - → lock-file write) using a stub migration registry injected via the - crate-private `compile_pipeline_with_registry` and - `parse_markdown_detailed_with_registry` hooks. They live inside - `src/` because the production registry is empty and integration - tests in `tests/` cannot link against crate internals. -- **Black-box CLI tests** in `tests/migration_tests.rs` spawn the - compiled `ado-aw` binary as a subprocess and assert on the - user-visible behavior of `compile` and `check` for sources with - various `schema-version` shapes (future versions, non-integer values, - healthy round-trip, etc.). - -When you add a real migration, ship the per-migration before/after -fixtures alongside it in the migration's own file -(`src/compile/migrations/_.rs`). - -## See also - -- [`docs/front-matter.md`](front-matter.md) — the full front-matter - grammar (and where the `schema-version` field is documented for end - users). -- [`docs/extending.md`](extending.md) — broader guidance for adding - features to the compiler, including the requirement to add a - migration alongside any breaking front-matter change. diff --git a/src/compile/migration_integration_test.rs b/src/compile/codemod_integration_test.rs similarity index 57% rename from src/compile/migration_integration_test.rs rename to src/compile/codemod_integration_test.rs index e7bb5737..399abff9 100644 --- a/src/compile/migration_integration_test.rs +++ b/src/compile/codemod_integration_test.rs @@ -1,44 +1,43 @@ -//! White-box integration tests for the front-matter migration framework. +//! White-box integration tests for the front-matter codemod +//! framework. //! -//! These tests exercise the rewrite path end-to-end (parse → migrate → -//! compile → atomic source rewrite → lock-file write) using a stub -//! migration registry. They live inside `src/` because they need -//! access to crate-private hooks (`compile_pipeline_with_registry`, +//! These tests exercise the rewrite path end-to-end (parse → +//! codemods → compile → atomic source rewrite → lock-file write) +//! using a stub codemod registry. They live inside `src/` because +//! they need access to crate-private hooks +//! (`compile_pipeline_with_registry`, //! `perform_source_rewrite_if_needed`, //! `common::parse_markdown_detailed_with_registry`) that the empty -//! production [`super::migrations::MIGRATIONS`] registry cannot +//! production [`super::codemods::CODEMODS`] registry cannot //! exercise. //! -//! Black-box subprocess tests of the user-visible CLI behavior live in -//! [`tests/migration_tests.rs`](../../tests/migration_tests.rs). +//! Black-box subprocess tests of the user-visible CLI behavior live +//! in [`tests/codemod_tests.rs`](../../tests/codemod_tests.rs). #![cfg(test)] +use super::codemods::{Codemod, CodemodContext, ConflictPolicy}; use super::common; -use super::migrations::{ConflictPolicy, Migration, MigrationContext}; -use super::{compile_pipeline, compile_pipeline_with_registry, perform_source_rewrite_if_needed}; +use super::{compile_pipeline_with_registry, perform_source_rewrite_if_needed}; use anyhow::Result; use serde_yaml::Mapping; // ─── Stub registry ────────────────────────────────────────────────────────── -/// Stub migration: renames the `legacy-name` top-level key to `name`. -/// Drives end-to-end rewrite tests without registering a real migration. -static TEST_RENAME_LEGACY_NAME: Migration = Migration { - from_version: 1, - to_version: 2, +/// Stub codemod: renames the `legacy-name` top-level key to `name`. +/// Drives end-to-end rewrite tests without registering a real codemod. +static TEST_RENAME_LEGACY_NAME: Codemod = Codemod { id: "test_rename_legacy_name", summary: "rename `legacy-name` -> `name`", introduced_in: "test", apply: rename_legacy_name, }; -fn rename_legacy_name(fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { - super::migrations::rename_key(fm, "legacy-name", "name", ConflictPolicy::Error)?; - Ok(()) +fn rename_legacy_name(fm: &mut Mapping, _ctx: &CodemodContext) -> Result { + super::codemods::rename_key(fm, "legacy-name", "name", ConflictPolicy::Error) } -/// Source whose typed deserialization would fail without a migration: +/// Source whose typed deserialization would fail without a codemod: /// it has `legacy-name` instead of the required `name` field. fn stale_source() -> &'static str { "---\nlegacy-name: my-agent\ndescription: test description\n---\n## Body\nHello.\n" @@ -54,11 +53,11 @@ fn write_temp_md(name: &str, content: &str) -> (tempfile::TempDir, std::path::Pa // ─── End-to-end rewrite path ──────────────────────────────────────────────── #[tokio::test] -async fn migration_rewrites_stale_source_and_preserves_body() { +async fn codemod_rewrites_stale_source_and_preserves_body() { let (dir, source_path) = write_temp_md("agent.md", stale_source()); - let registry: &[&'static Migration] = &[&TEST_RENAME_LEGACY_NAME]; - let migrated = compile_pipeline_with_registry( + let registry: &[&'static Codemod] = &[&TEST_RENAME_LEGACY_NAME]; + let rewrote = compile_pipeline_with_registry( &source_path.to_string_lossy(), None, true, // skip_integrity @@ -68,10 +67,10 @@ async fn migration_rewrites_stale_source_and_preserves_body() { .await .expect("compile_pipeline_with_registry should succeed"); - assert!(migrated, "expected compile to report a source migration"); + assert!(rewrote, "expected compile to report a source rewrite"); // Source file rewritten: contains `name: my-agent`, no - // `legacy-name`, has `schema-version: 2`. + // `legacy-name`. let rewritten = std::fs::read_to_string(&source_path).expect("read rewritten"); assert!( rewritten.contains("name: my-agent"), @@ -83,11 +82,6 @@ async fn migration_rewrites_stale_source_and_preserves_body() { "rewritten source still contains legacy-name:\n{}", rewritten ); - assert!( - rewritten.contains("schema-version: 2"), - "rewritten source missing `schema-version: 2`:\n{}", - rewritten - ); // Body region byte-identical to the original (everything after // the closing `---`). @@ -101,28 +95,29 @@ async fn migration_rewrites_stale_source_and_preserves_body() { assert!(lock.exists(), "expected {} to exist", lock.display()); // Re-running parse with the same stub registry produces no - // further migration — the rewrite moved the source to current. + // further codemod fires — the rewrite moved the source to its + // current shape. let after = std::fs::read_to_string(&source_path).unwrap(); let parsed_again = common::parse_markdown_detailed_with_registry(&after, registry).unwrap(); assert!( - !parsed_again.migrations.changed(), - "expected post-rewrite source to be at current schema-version, but \ - {} more migration(s) ran", - parsed_again.migrations.applied.len() + !parsed_again.codemods.changed(), + "expected post-rewrite source to require no further codemods, but \ + {} fired", + parsed_again.codemods.applied.len() ); drop(dir); } #[tokio::test] -async fn migration_skip_when_no_stub_registry_runs() { - // With the empty production registry, a healthy v1 source must +async fn codemod_skip_when_no_stub_registry_runs() { + // With the empty production registry, a healthy source must // compile without rewriting anything. let healthy = "---\nname: x\ndescription: y\n---\nbody\n"; let (dir, source_path) = write_temp_md("agent.md", healthy); - let registry: &[&'static Migration] = &[]; - let migrated = compile_pipeline_with_registry( + let registry: &[&'static Codemod] = &[]; + let rewrote = compile_pipeline_with_registry( &source_path.to_string_lossy(), None, true, @@ -131,32 +126,30 @@ async fn migration_skip_when_no_stub_registry_runs() { ) .await .expect("compile should succeed"); - assert!(!migrated, "expected no rewrite for already-current source"); + assert!(!rewrote, "expected no rewrite for source that needs no codemods"); let after = std::fs::read_to_string(&source_path).unwrap(); assert_eq!(after, healthy, "source must be byte-identical"); drop(dir); } #[tokio::test] -async fn migration_with_only_version_bump_still_writes() { - fn noop(_fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { - Ok(()) +async fn codemod_no_op_returns_false_does_not_rewrite() { + // A codemod that always returns Ok(false) must not trigger a + // source rewrite even though it ran. + fn noop(_fm: &mut Mapping, _ctx: &CodemodContext) -> Result { + Ok(false) } - static NOOP_MIG: Migration = Migration { - from_version: 1, - to_version: 2, - id: "test_noop_v1_to_v2", + static NOOP_MIG: Codemod = Codemod { + id: "test_noop_returns_false", summary: "no-op", introduced_in: "test", apply: noop, }; - // Even a no-op migration changes bytes on disk because the - // schema-version field is added. Verify we DO rewrite. let healthy = "---\nname: x\ndescription: y\n---\nbody\n"; let (dir, source_path) = write_temp_md("agent.md", healthy); - let registry: &[&'static Migration] = &[&NOOP_MIG]; - let migrated = compile_pipeline_with_registry( + let registry: &[&'static Codemod] = &[&NOOP_MIG]; + let rewrote = compile_pipeline_with_registry( &source_path.to_string_lossy(), None, true, @@ -166,11 +159,11 @@ async fn migration_with_only_version_bump_still_writes() { .await .expect("compile should succeed"); assert!( - migrated, - "expected rewrite because schema-version field was added" + !rewrote, + "no-op codemod that returns Ok(false) must not trigger a rewrite" ); let after = std::fs::read_to_string(&source_path).unwrap(); - assert!(after.contains("schema-version: 2")); + assert_eq!(after, healthy, "source must be byte-identical"); drop(dir); } @@ -184,10 +177,10 @@ async fn perform_source_rewrite_lost_update_guard() { // full compile. let (dir, source_path) = write_temp_md("agent.md", stale_source()); let original = std::fs::read_to_string(&source_path).unwrap(); - let registry: &[&'static Migration] = &[&TEST_RENAME_LEGACY_NAME]; + let registry: &[&'static Codemod] = &[&TEST_RENAME_LEGACY_NAME]; let parsed = common::parse_markdown_detailed_with_registry(&original, registry).unwrap(); - assert!(parsed.migrations.changed()); + assert!(parsed.codemods.changed()); // Mutate the source after parse (simulates editor / concurrent // tool overwriting). @@ -201,7 +194,7 @@ async fn perform_source_rewrite_lost_update_guard() { &parsed.front_matter_mapping, &parsed.body_raw, &parsed.source_sha256, - &parsed.migrations, + &parsed.codemods, ) .await; let err = result.expect_err("expected lost-update guard to fire"); @@ -212,28 +205,9 @@ async fn perform_source_rewrite_lost_update_guard() { ); // The source is left as the interloper wrote it (we did not - // clobber); it is *not* the migrated version. + // clobber); it is *not* the codemod-rewritten version. let after = std::fs::read_to_string(&source_path).unwrap(); assert_eq!(after, "different bytes\n"); drop(dir); } - -// ─── Future-version safety net ────────────────────────────────────────────── - -#[tokio::test] -async fn check_pipeline_fails_on_future_schema_version() { - // The real registry is empty (CURRENT=1), so a source claiming - // schema-version: 2 is a future version. compile should reject it - // loudly through the public entry point. - let future = "---\nschema-version: 2\nname: x\ndescription: y\n---\n"; - let (dir, source_path) = write_temp_md("agent.md", future); - let result = compile_pipeline(&source_path.to_string_lossy(), None, true, false).await; - let err = result.expect_err("future-version source should fail compile"); - assert!( - format!("{:#}", err).contains("only supports up to"), - "unexpected error: {:#}", - err - ); - drop(dir); -} diff --git a/src/compile/migrations/helpers.rs b/src/compile/codemods/helpers.rs similarity index 100% rename from src/compile/migrations/helpers.rs rename to src/compile/codemods/helpers.rs diff --git a/src/compile/codemods/mod.rs b/src/compile/codemods/mod.rs new file mode 100644 index 00000000..6be5eca0 --- /dev/null +++ b/src/compile/codemods/mod.rs @@ -0,0 +1,332 @@ +//! Front-matter codemod framework. +//! +//! A flat, append-only registry of detection-based transformations +//! that rewrite deprecated front-matter shapes to current ones +//! before typed deserialization. Modeled on gh-aw's codemod registry +//! (`pkg/cli/fix_codemods.go`). +//! +//! See `docs/codemods.md` for the full contributor reference. In +//! short: +//! +//! - Each codemod is a pure function that **detects** an old shape +//! and rewrites it. When the input does not match, it returns +//! `Ok(false)` and leaves the mapping untouched. +//! - Codemods live in `src/compile/codemods/_.rs` and are +//! appended to [`CODEMODS`] in registration order. +//! - Codemods operate on the **untyped** `serde_yaml::Mapping` +//! representation so they can rewrite shapes that no longer match +//! the typed `FrontMatter` (renamed/removed fields, +//! `deny_unknown_fields`). +//! - Codemods MUST be **idempotent**: running twice produces the +//! same final state as running once. +//! - When an old key and a new key coexist in the same source, the +//! codemod errors with a "manual migration required" message +//! rather than guess. The [`helpers::rename_key`] + +//! [`helpers::ConflictPolicy::Error`] pattern gives this for free. +//! +//! Unlike a version-stamped chain, codemods leave no per-source +//! version footprint — there is no `schema-version` field in user +//! front matter. Each codemod is idempotent so re-running the whole +//! registry on every compile is cheap when nothing matches. + +use anyhow::{Context, Result}; +use serde_yaml::Mapping; + +mod helpers; +#[allow(unused_imports)] +pub use helpers::{insert_no_overwrite, rename_key, take_key, ConflictPolicy}; + +/// Forward-compatible context passed to every codemod. Currently +/// empty; we keep it in the signature so future codemods can be +/// given (e.g.) the source path without breaking the function +/// pointer type. +#[non_exhaustive] +pub struct CodemodContext {} + +/// A single front-matter codemod. +/// +/// Codemods are pure functions that detect a specific deprecated +/// shape in the front-matter mapping and rewrite it to the current +/// shape. They must satisfy the following invariants (see +/// `docs/codemods.md`): +/// +/// - **Idempotent.** Running twice produces the same result as +/// running once. +/// - **Detection-based.** Returns `Ok(false)` and leaves the mapping +/// untouched when the input does not contain the targeted shape. +/// - **Conflict-aware.** Errors with an actionable "manual migration +/// required" message when both old and new shapes coexist. +/// - **Pure.** No I/O, no env, no time/randomness. +/// - **Mapping-only.** Cannot inspect the markdown body, file path, +/// lock file, or git state. +pub struct Codemod { + /// Stable snake_case identifier used in logs, errors, and tests. + pub id: &'static str, + /// Human-facing one-line summary, surfaced in the compile warning. + pub summary: &'static str, + /// Compiler version that introduced this codemod (e.g. "0.27.0"). + /// Provenance only; not consumed by the runner. + #[allow(dead_code)] + pub introduced_in: &'static str, + /// The transformation. Returns `Ok(true)` when the codemod + /// modified the mapping; `Ok(false)` when the mapping does not + /// match this codemod's detection predicate. Errors propagate up + /// and abort the whole compile. + pub apply: fn(&mut Mapping, &CodemodContext) -> Result, +} + +/// The fixed registry of codemods. Append-only — new codemods land +/// at the end. Order matters when one codemod depends on another +/// having run first (e.g. A renames `foo` → `bar`, B operates on +/// `bar`); idempotency means any codemod can re-run on any source +/// without harm. +pub static CODEMODS: &[&'static Codemod] = &[]; + +/// Result of running the codemod registry on a single front-matter +/// mapping. +#[derive(Debug, Clone)] +pub struct CodemodReport { + /// Ordered list of codemods that ran (returned `Ok(true)`). + pub applied: Vec, +} + +/// Record of a single codemod that fired. Carries enough info for +/// warning emission and tests without re-querying the registry. +#[derive(Debug, Clone)] +pub struct AppliedCodemod { + pub id: &'static str, + pub summary: &'static str, +} + +impl CodemodReport { + /// An empty report (no codemod fired). + #[allow(dead_code)] + pub fn empty() -> Self { + Self { + applied: Vec::new(), + } + } + + /// Returns true when at least one codemod ran. + pub fn changed(&self) -> bool { + !self.applied.is_empty() + } + + /// IDs of codemods that ran, in order. Helpful for tests. + #[allow(dead_code)] + pub fn applied_ids(&self) -> Vec<&'static str> { + self.applied.iter().map(|a| a.id).collect() + } +} + +/// Apply the registered codemods to `fm`. +/// +/// Equivalent to [`apply_codemods_with`] called with the global +/// [`CODEMODS`] registry. +#[allow(dead_code)] +pub fn apply_codemods(fm: &mut Mapping) -> Result { + apply_codemods_with(fm, CODEMODS) +} + +/// Apply an explicit codemod registry. Used by tests with a stub +/// registry; production code calls [`apply_codemods`]. +pub(crate) fn apply_codemods_with( + fm: &mut Mapping, + registry: &[&'static Codemod], +) -> Result { + let mut applied: Vec = Vec::new(); + for c in registry { + let changed = (c.apply)(fm, &CodemodContext {}) + .with_context(|| format!("codemod {} failed", c.id))?; + if changed { + applied.push(AppliedCodemod { + id: c.id, + summary: c.summary, + }); + } + } + Ok(CodemodReport { applied }) +} + +#[cfg(test)] +mod tests { + use super::helpers::ConflictPolicy; + use super::*; + use anyhow::bail; + use serde_yaml::Value; + + // ─── Registry health (operates on the real CODEMODS slice) ──────────── + + #[test] + fn registry_ids_are_unique() { + let mut seen = std::collections::BTreeSet::new(); + for c in CODEMODS { + assert!( + seen.insert(c.id), + "duplicate codemod id `{}` in CODEMODS", + c.id + ); + } + } + + #[test] + fn codemod_filenames_match_registry_count() { + // Scan src/compile/codemods/*.rs and check that each numeric + // codemod file is present exactly once in the registry. + let dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) + .join("src/compile/codemods"); + let mut numeric_files: Vec = Vec::new(); + for entry in std::fs::read_dir(&dir).expect("read codemods dir") { + let entry = entry.expect("dir entry"); + let path = entry.path(); + if path.extension().and_then(|e| e.to_str()) != Some("rs") { + continue; + } + let stem = path + .file_stem() + .and_then(|s| s.to_str()) + .expect("utf8 stem") + .to_string(); + if stem == "mod" || stem == "helpers" { + continue; + } + // Expect `_` so files sort naturally in + // directory listings. The registry order is the canonical + // application order; the prefix is purely cosmetic. + let (prefix, _rest) = stem.split_once('_').unwrap_or_else(|| { + panic!( + "codemod file {:?} does not match `_.rs`", + path + ) + }); + let _: u32 = prefix.parse().unwrap_or_else(|_| { + panic!( + "codemod file {:?} has non-numeric prefix `{}`", + path, prefix + ) + }); + numeric_files.push(stem); + } + assert_eq!( + numeric_files.len(), + CODEMODS.len(), + "number of `_.rs` files ({}) does not match \ + CODEMODS.len() ({}); files: {:?}", + numeric_files.len(), + CODEMODS.len(), + numeric_files + ); + } + + // ─── apply_codemods_with ────────────────────────────────────────────── + + fn noop_no_change(_fm: &mut Mapping, _ctx: &CodemodContext) -> Result { + Ok(false) + } + + fn rename_a_to_b(fm: &mut Mapping, _ctx: &CodemodContext) -> Result { + rename_key(fm, "a", "b", ConflictPolicy::Error) + } + + fn always_fail(_fm: &mut Mapping, _ctx: &CodemodContext) -> Result { + bail!("intentional test failure"); + } + + static TEST_CODEMOD_NOOP: Codemod = Codemod { + id: "test_noop", + summary: "no-op codemod for tests", + introduced_in: "test", + apply: noop_no_change, + }; + + static TEST_CODEMOD_RENAME: Codemod = Codemod { + id: "test_rename_a_to_b", + summary: "rename a -> b", + introduced_in: "test", + apply: rename_a_to_b, + }; + + static TEST_CODEMOD_FAIL: Codemod = Codemod { + id: "test_fail", + summary: "always fails", + introduced_in: "test", + apply: always_fail, + }; + + #[test] + fn empty_registry_produces_empty_report() { + let mut m: Mapping = serde_yaml::from_str("name: x\n").unwrap(); + let report = apply_codemods_with(&mut m, &[]).unwrap(); + assert!(!report.changed()); + assert!(report.applied.is_empty()); + } + + #[test] + fn all_no_op_codemods_produce_empty_report() { + let mut m: Mapping = serde_yaml::from_str("name: x\n").unwrap(); + let registry: &[&'static Codemod] = &[&TEST_CODEMOD_NOOP]; + let report = apply_codemods_with(&mut m, registry).unwrap(); + assert!(!report.changed()); + } + + #[test] + fn matching_codemod_appears_in_report() { + let mut m: Mapping = serde_yaml::from_str("a: 1\n").unwrap(); + let registry: &[&'static Codemod] = &[&TEST_CODEMOD_RENAME]; + let report = apply_codemods_with(&mut m, registry).unwrap(); + assert!(report.changed()); + assert_eq!(report.applied_ids(), vec!["test_rename_a_to_b"]); + assert!(!m.contains_key(Value::String("a".into()))); + assert!(m.contains_key(Value::String("b".into()))); + } + + #[test] + fn non_matching_codemod_omitted_from_report() { + // Source has no `a` key, so the rename is a no-op and is + // not listed in the report. + let mut m: Mapping = serde_yaml::from_str("name: x\n").unwrap(); + let registry: &[&'static Codemod] = &[&TEST_CODEMOD_RENAME]; + let report = apply_codemods_with(&mut m, registry).unwrap(); + assert!(!report.changed()); + } + + #[test] + fn mixed_registry_lists_only_changed_codemods() { + let mut m: Mapping = serde_yaml::from_str("a: 1\n").unwrap(); + let registry: &[&'static Codemod] = + &[&TEST_CODEMOD_NOOP, &TEST_CODEMOD_RENAME, &TEST_CODEMOD_NOOP]; + let report = apply_codemods_with(&mut m, registry).unwrap(); + assert_eq!(report.applied_ids(), vec!["test_rename_a_to_b"]); + } + + #[test] + fn codemod_failure_carries_id_in_context() { + let mut m: Mapping = serde_yaml::from_str("a: 1\n").unwrap(); + let registry: &[&'static Codemod] = &[&TEST_CODEMOD_FAIL]; + let err = apply_codemods_with(&mut m, registry).unwrap_err(); + let chain = format!("{:#}", err); + assert!( + chain.contains("codemod test_fail failed"), + "expected codemod context, got: {}", + chain + ); + assert!( + chain.contains("intentional test failure"), + "expected inner error, got: {}", + chain + ); + } + + #[test] + fn idempotent_codemod_runs_safely_twice() { + let registry: &[&'static Codemod] = &[&TEST_CODEMOD_RENAME]; + let mut m: Mapping = serde_yaml::from_str("a: 1\n").unwrap(); + let report1 = apply_codemods_with(&mut m, registry).unwrap(); + assert!(report1.changed()); + let report2 = apply_codemods_with(&mut m, registry).unwrap(); + assert!( + !report2.changed(), + "second run on already-migrated mapping must be a no-op" + ); + } +} diff --git a/src/compile/common.rs b/src/compile/common.rs index 47aaa046..af3838d4 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -86,21 +86,21 @@ fn atomic_write_blocking(path: &Path, contents: &str) -> Result<()> { } /// Detailed parse result. Holds enough information to rewrite the -/// source on disk byte-faithfully when migrations apply. +/// source on disk byte-faithfully when codemods apply. /// /// See [`parse_markdown_detailed`]. #[derive(Debug)] pub struct ParsedSource { - /// Typed front matter, after migrations have been applied to the + /// Typed front matter, after codemods have been applied to the /// underlying mapping. pub front_matter: FrontMatter, /// Body for compilation, with leading/trailing whitespace trimmed /// (matches the legacy `parse_markdown` second tuple element). pub markdown_body: String, - /// Migration outcome. - pub migrations: super::migrations::MigrationReport, - /// The migrated front-matter mapping, with `schema-version` - /// bumped. Used to reconstruct the source for rewrite. + /// Codemod outcome. + pub codemods: super::codemods::CodemodReport, + /// The codemod-rewritten front-matter mapping. Used to + /// reconstruct the source for the on-disk rewrite. pub front_matter_mapping: serde_yaml::Mapping, /// Whitespace bytes that appeared before the opening `---` fence, /// preserved verbatim so that source rewrite is byte-faithful. @@ -113,24 +113,24 @@ pub struct ParsedSource { pub source_sha256: [u8; 32], } -/// Parse the markdown file, run the migration chain on the front matter -/// in memory, and return both the typed `FrontMatter` and the raw -/// fragments needed to rewrite the source on disk byte-faithfully. +/// Parse the markdown file, run the codemod registry on the front +/// matter in memory, and return both the typed `FrontMatter` and the +/// raw fragments needed to rewrite the source on disk byte-faithfully. /// /// Use this from callers that may rewrite the source (the `compile` /// command). Callers that only want the typed view of the front matter /// should use the backward-compatible [`parse_markdown`] wrapper. pub fn parse_markdown_detailed(content: &str) -> Result { - parse_markdown_detailed_with_registry(content, super::migrations::MIGRATIONS) + parse_markdown_detailed_with_registry(content, super::codemods::CODEMODS) } /// Variant of [`parse_markdown_detailed`] that allows injecting an -/// explicit migration registry. Used by tests; production callers go +/// explicit codemod registry. Used by tests; production callers go /// through the no-arg version that reads the global -/// [`super::migrations::MIGRATIONS`]. +/// [`super::codemods::CODEMODS`]. pub(crate) fn parse_markdown_detailed_with_registry( content: &str, - registry: &[&'static super::migrations::Migration], + registry: &[&'static super::codemods::Codemod], ) -> Result { use sha2::Digest; @@ -176,13 +176,13 @@ pub(crate) fn parse_markdown_detailed_with_registry( } }; - // Stage 2: run the migration chain on the untyped mapping. - let report = super::migrations::migrate_front_matter_with(&mut mapping, registry) - .context("Failed to migrate front matter")?; + // Stage 2: run the codemod registry against the untyped mapping. + let report = super::codemods::apply_codemods_with(&mut mapping, registry) + .context("Failed to apply codemods")?; - // Stage 3: deserialize the (possibly migrated) mapping into the + // Stage 3: deserialize the (possibly modified) mapping into the // typed FrontMatter. Errors here mean either the user wrote an - // unsupported shape or a migration produced invalid output. + // unsupported shape or a codemod produced invalid output. let front_matter: FrontMatter = serde_yaml::from_value(serde_yaml::Value::Mapping(mapping.clone())) .context("Failed to parse YAML front matter")?; @@ -190,7 +190,7 @@ pub(crate) fn parse_markdown_detailed_with_registry( Ok(ParsedSource { front_matter, markdown_body, - migrations: report, + codemods: report, front_matter_mapping: mapping, leading_whitespace, body_raw, @@ -198,7 +198,7 @@ pub(crate) fn parse_markdown_detailed_with_registry( }) } -/// Reconstruct full source content from migration outputs. +/// Reconstruct full source content from codemod outputs. /// /// Takes the individual fragments rather than the full /// [`ParsedSource`] so callers that have already destructured the @@ -208,10 +208,9 @@ pub(crate) fn parse_markdown_detailed_with_registry( /// Output shape: /// - `leading_whitespace` (typically empty) /// - `---\n` -/// - the migrated YAML mapping (`serde_yaml::to_string` always ends -/// with `\n`); when present, `schema-version` is hoisted to the top -/// of the mapping so the serialized output matches the canonical -/// front-matter template documented in `docs/front-matter.md` +/// - the codemod-rewritten YAML mapping (`serde_yaml::to_string` +/// always ends with `\n`); the mapping's existing key order is +/// preserved so user-authored keys keep their original positions /// - `---` /// - the original body region byte-for-byte (`body_raw`) pub fn reconstruct_source( @@ -219,36 +218,14 @@ pub fn reconstruct_source( front_matter_mapping: &serde_yaml::Mapping, body_raw: &str, ) -> Result { - let yaml_serialized = serde_yaml::to_string(&hoist_schema_version(front_matter_mapping)) - .context("Failed to serialize migrated front matter")?; + let yaml_serialized = serde_yaml::to_string(front_matter_mapping) + .context("Failed to serialize codemod-rewritten front matter")?; Ok(format!( "{}---\n{}---{}", leading_whitespace, yaml_serialized, body_raw )) } -/// Return a mapping with `schema-version` (if present) moved to the -/// front. Other keys retain their relative order. Returns the input -/// unchanged when `schema-version` is absent or already first. -fn hoist_schema_version(input: &serde_yaml::Mapping) -> serde_yaml::Mapping { - let key = serde_yaml::Value::String(super::migrations::SCHEMA_VERSION_KEY.to_string()); - let Some(version) = input.get(&key) else { - return input.clone(); - }; - // If schema-version is already the first key, no rebuild needed. - if input.iter().next().map(|(k, _)| k) == Some(&key) { - return input.clone(); - } - let mut hoisted = serde_yaml::Mapping::with_capacity(input.len()); - hoisted.insert(key.clone(), version.clone()); - for (k, v) in input { - if k != &key { - hoisted.insert(k.clone(), v.clone()); - } - } - hoisted -} - fn yaml_value_kind(v: &serde_yaml::Value) -> &'static str { match v { serde_yaml::Value::Null => "null", @@ -2627,14 +2604,14 @@ mod tests { } #[test] - fn parse_markdown_detailed_byte_faithful_when_no_migration_runs() { - // With the registry empty, parsing a v1 source and reconstructing - // it should produce a byte-identical document apart from - // serde_yaml's canonical formatting of the YAML mapping. We - // assert the body region matches exactly. + fn parse_markdown_detailed_byte_faithful_when_no_codemod_runs() { + // With the registry empty, parsing a healthy source and + // reconstructing it should produce a byte-identical document + // apart from serde_yaml's canonical formatting of the YAML + // mapping. We assert the body region matches exactly. let original = "---\nname: x\ndescription: y\n---\n## body\n"; let parsed = parse_markdown_detailed(original).unwrap(); - assert!(!parsed.migrations.changed()); + assert!(!parsed.codemods.changed()); let reconstructed = reconstruct(&parsed); // Find the closing fence in both and compare the suffix. let orig_suffix = &original[original.find("\n---\n").unwrap()..]; @@ -2659,32 +2636,6 @@ mod tests { assert!(reconstructed.ends_with("---\nbody\n")); } - #[test] - fn reconstruct_source_hoists_schema_version_to_top() { - let mut mapping = serde_yaml::Mapping::new(); - mapping.insert( - serde_yaml::Value::String("name".into()), - serde_yaml::Value::String("x".into()), - ); - mapping.insert( - serde_yaml::Value::String("description".into()), - serde_yaml::Value::String("y".into()), - ); - mapping.insert( - serde_yaml::Value::String("schema-version".into()), - serde_yaml::Value::Number(serde_yaml::Number::from(2u64)), - ); - let out = reconstruct_source("", &mapping, "\nbody\n").unwrap(); - let lines: Vec<&str> = out.lines().take(3).collect(); - assert_eq!(lines[0], "---"); - assert_eq!( - lines[1], "schema-version: 2", - "schema-version should appear first in the front-matter block, got: {:?}", - lines - ); - assert_eq!(lines[2], "name: x"); - } - #[test] fn parse_markdown_detailed_allows_leading_whitespace() { let original = "\n \n---\nname: x\ndescription: y\n---\nbody\n"; diff --git a/src/compile/migrations/mod.rs b/src/compile/migrations/mod.rs deleted file mode 100644 index 0f8d3e5b..00000000 --- a/src/compile/migrations/mod.rs +++ /dev/null @@ -1,580 +0,0 @@ -//! Front-matter migration framework. -//! -//! A version-stamped, append-only chain of transformations that rewrites -//! older source front-matter shapes into the current one before typed -//! deserialization. Modeled on Django/Rails-style schema migrations. -//! -//! See `docs/migrations.md` for the full contributor reference. In -//! short: -//! -//! - Each migration goes from `from_version: N` to `to_version: N + 1`. -//! - Migrations live in `src/compile/migrations/_.rs` and are -//! appended to [`MIGRATIONS`] in strict ascending order. -//! - [`CURRENT_SCHEMA_VERSION`] is `1 + MIGRATIONS.len()`. -//! - Each source `.md` carries a `schema-version: ` field (missing -//! = 1). The compiler bumps it in place when migrations apply. -//! - Migrations operate on the **untyped** `serde_yaml::Mapping` -//! representation so they can rewrite shapes that no longer match the -//! typed `FrontMatter` (renamed/removed fields, -//! `deny_unknown_fields`). -//! -//! The runner is intentionally simple: read current version, validate -//! the registry is contiguous at runtime, and walk the chain. - -use anyhow::{bail, ensure, Context, Result}; -use serde_yaml::{Mapping, Value}; - -mod helpers; -#[allow(unused_imports)] -pub use helpers::{insert_no_overwrite, rename_key, take_key, ConflictPolicy}; - -/// Front-matter key that holds the schema version. Kebab-case to match -/// the rest of the front-matter grammar. -#[allow(dead_code)] -pub const SCHEMA_VERSION_KEY: &str = "schema-version"; - -/// Forward-compatible context passed to every migration. Currently -/// empty; we keep it in the signature so future migrations can be given -/// (e.g.) the source path without breaking the function pointer type. -#[non_exhaustive] -pub struct MigrationContext {} - -/// A single front-matter migration step. -/// -/// Migrations are pure functions that mutate the front-matter mapping -/// in place. They must NOT bump [`SCHEMA_VERSION_KEY`] themselves — the -/// runner does that after each successful step. They must be -/// deterministic and side-effect-free (documentation convention; not -/// enforced by the type system). -pub struct Migration { - /// Source schema version this migration consumes. - pub from_version: u32, - /// Target schema version produced (always `from_version + 1`). - pub to_version: u32, - /// Stable snake_case identifier used in logs, errors, and tests. - pub id: &'static str, - /// Human-facing one-line summary, surfaced in the compile warning. - pub summary: &'static str, - /// Compiler version that introduced this migration (e.g. "0.27.0"). - /// Currently used only for human-readable provenance; not consumed - /// by the runner. - #[allow(dead_code)] - pub introduced_in: &'static str, - /// The transformation. See type-level docs for invariants. - pub apply: fn(&mut Mapping, &MigrationContext) -> Result<()>, -} - -/// The fixed registry of migrations, in strict ascending `from_version` -/// order. Append-only — never reorder, never delete entries. -/// -/// Adding a migration is two edits: -/// -/// 1. Create `src/compile/migrations/_.rs` with a -/// `pub static MIGRATION: Migration` and register the module here. -/// 2. Append `&::MIGRATION` to this slice. -pub static MIGRATIONS: &[&'static Migration] = &[]; - -/// Total number of schema versions known to this compiler. -/// -/// Computed from the registry length so it can never drift. -#[allow(dead_code)] -pub const CURRENT_SCHEMA_VERSION: u32 = 1 + MIGRATIONS.len() as u32; - -/// Result of running the migration chain on a single front-matter -/// mapping. -#[derive(Debug, Clone)] -pub struct MigrationReport { - /// The schema version present in the source before migration. - pub from_version: u32, - /// The schema version after migration (always - /// [`CURRENT_SCHEMA_VERSION`] when the runner returns Ok). - pub to_version: u32, - /// Ordered list of migrations that ran, with id + summary so - /// callers (warnings, logs) don't need to re-query the registry. - pub applied: Vec, -} - -/// Record of a single migration that ran. Carries enough info for -/// warning emission and tests without re-looking up the registry. -#[derive(Debug, Clone)] -pub struct AppliedMigration { - pub id: &'static str, - pub summary: &'static str, -} - -impl MigrationReport { - /// Build a no-migration report for a source already at `version`. - pub fn unchanged(version: u32) -> Self { - Self { - from_version: version, - to_version: version, - applied: Vec::new(), - } - } - - /// Returns true when at least one migration ran. - pub fn changed(&self) -> bool { - !self.applied.is_empty() - } - - /// IDs of migrations that ran, in order. - #[allow(dead_code)] - pub fn applied_ids(&self) -> Vec<&'static str> { - self.applied.iter().map(|a| a.id).collect() - } -} - -/// Read [`SCHEMA_VERSION_KEY`] from the mapping. Missing field → 1. -/// Returns Err on any non-positive-integer value (zero, negative, -/// float, string, sequence, null). -pub fn read_schema_version(fm: &Mapping) -> Result { - let Some(value) = fm.get(Value::String(SCHEMA_VERSION_KEY.to_string())) else { - return Ok(1); - }; - let n = value.as_u64().ok_or_else(|| { - anyhow::anyhow!( - "front matter `schema-version` must be a positive integer (>= 1), got: {}", - describe_yaml_value(value) - ) - })?; - if n == 0 { - bail!( - "front matter `schema-version` must be a positive integer (>= 1), got: 0" - ); - } - if n > u32::MAX as u64 { - bail!( - "front matter `schema-version` must be a positive integer (>= 1), got: {} (exceeds u32::MAX)", - n - ); - } - Ok(n as u32) -} - -/// Set [`SCHEMA_VERSION_KEY`] on the mapping. Inserts at the end if -/// absent, updates in place otherwise. -pub fn set_schema_version(fm: &mut Mapping, version: u32) { - fm.insert( - Value::String(SCHEMA_VERSION_KEY.to_string()), - Value::Number(serde_yaml::Number::from(version as u64)), - ); -} - -/// Apply the registered migration chain to `fm`. -/// -/// Equivalent to [`migrate_front_matter_with`] called with the global -/// [`MIGRATIONS`] registry. -#[allow(dead_code)] -pub fn migrate_front_matter(fm: &mut Mapping) -> Result { - migrate_front_matter_with(fm, MIGRATIONS) -} - -/// Apply an explicit migration chain. Used by tests with a stub -/// registry; production code calls [`migrate_front_matter`]. -pub fn migrate_front_matter_with( - fm: &mut Mapping, - registry: &[&'static Migration], -) -> Result { - // Use checked arithmetic so we surface a clear error rather than - // panic-or-wrap if a registry ever grows past u32::MAX. Realistic - // registries are tiny — this is a "no panic in library code" guard. - let registry_len: u32 = registry - .len() - .try_into() - .ok() - .context("migration registry has more than u32::MAX entries")?; - let target_version = 1u32 - .checked_add(registry_len) - .context("migration registry too large: target_version would overflow u32")?; - let mut current = read_schema_version(fm)?; - let from_version = current; - - if current > target_version { - bail!( - "source `schema-version` is {}, but this compiler only supports up to {}. Upgrade ado-aw.", - current, - target_version - ); - } - - if current == target_version { - return Ok(MigrationReport::unchanged(current)); - } - - let mut applied: Vec = Vec::new(); - - while current < target_version { - let idx = (current - 1) as usize; - let m = registry.get(idx).ok_or_else(|| { - anyhow::anyhow!( - "migration registry corrupt: expected entry at index {} for from_version={}", - idx, - current - ) - })?; - let next_version = current - .checked_add(1) - .context("migration version overflow: current + 1 exceeds u32::MAX")?; - ensure!( - m.from_version == current && m.to_version == next_version, - "migration registry corrupt: expected from_version={} at index {}, found from_version={} to_version={}", - current, - idx, - m.from_version, - m.to_version - ); - - let ctx = MigrationContext {}; - (m.apply)(fm, &ctx).with_context(|| { - format!( - "migration {} ({} -> {}) failed", - m.id, m.from_version, m.to_version - ) - })?; - - set_schema_version(fm, m.to_version); - applied.push(AppliedMigration { - id: m.id, - summary: m.summary, - }); - current = m.to_version; - } - - Ok(MigrationReport { - from_version, - to_version: current, - applied, - }) -} - -fn describe_yaml_value(v: &Value) -> String { - match v { - Value::Null => "null".to_string(), - Value::Bool(b) => format!("bool ({})", b), - Value::Number(n) => format!("number ({})", n), - Value::String(s) => format!("string ({:?})", s), - Value::Sequence(_) => "sequence".to_string(), - Value::Mapping(_) => "mapping".to_string(), - Value::Tagged(_) => "tagged".to_string(), - } -} - -#[cfg(test)] -mod tests { - use super::*; - - // ─── Registry health (operates on the real MIGRATIONS slice) ────────── - - #[test] - fn registry_is_contiguous_and_starts_at_one() { - // Vacuously true with an empty registry; the assertions still - // exercise the loop machinery so this test guards against future - // regressions when migrations are added. - for (i, m) in MIGRATIONS.iter().enumerate() { - assert_eq!( - m.from_version, - (i + 1) as u32, - "migration at index {} has from_version={}; expected {}", - i, - m.from_version, - i + 1 - ); - assert_eq!( - m.to_version, - (i + 2) as u32, - "migration at index {} has to_version={}; expected {}", - i, - m.to_version, - i + 2 - ); - } - } - - #[test] - fn registry_ids_are_unique() { - let mut seen = std::collections::BTreeSet::new(); - for m in MIGRATIONS { - assert!( - seen.insert(m.id), - "duplicate migration id `{}` in MIGRATIONS", - m.id - ); - } - } - - #[test] - fn migration_filenames_match_from_version() { - // Scan src/compile/migrations/*.rs and check that each numeric - // migration file's prefix matches its from_version. - let dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) - .join("src/compile/migrations"); - let mut numeric_files: Vec<(u32, String)> = Vec::new(); - for entry in std::fs::read_dir(&dir).expect("read migrations dir") { - let entry = entry.expect("dir entry"); - let path = entry.path(); - if path.extension().and_then(|e| e.to_str()) != Some("rs") { - continue; - } - let stem = path - .file_stem() - .and_then(|s| s.to_str()) - .expect("utf8 stem") - .to_string(); - if stem == "mod" || stem == "helpers" { - continue; - } - // Expect `_` where NNNN is the zero-padded - // from_version. Files that don't match this shape are tests - // or fixtures and should not exist in this directory. - let (prefix, _rest) = stem.split_once('_').unwrap_or_else(|| { - panic!( - "migration file {:?} does not match `_.rs`", - path - ) - }); - let n: u32 = prefix.parse().unwrap_or_else(|_| { - panic!( - "migration file {:?} has non-numeric prefix `{}`", - path, prefix - ) - }); - numeric_files.push((n, stem)); - } - numeric_files.sort(); - assert_eq!( - numeric_files.len(), - MIGRATIONS.len(), - "number of `_.rs` files ({}) does not match \ - MIGRATIONS.len() ({}); files: {:?}", - numeric_files.len(), - MIGRATIONS.len(), - numeric_files - ); - for (i, (n, stem)) in numeric_files.iter().enumerate() { - assert_eq!( - *n, - (i + 1) as u32, - "migration file `{}` has from_version prefix {} but is at index {}; expected {}", - stem, - n, - i, - i + 1 - ); - } - } - - // ─── read_schema_version ────────────────────────────────────────────── - - fn map_with_version(version: Option<&str>) -> Mapping { - let mut m = Mapping::new(); - if let Some(v) = version { - // Insert as raw YAML so we can test integer/string/etc. - let parsed: Value = serde_yaml::from_str(v).unwrap(); - m.insert(Value::String(SCHEMA_VERSION_KEY.to_string()), parsed); - } - m - } - - #[test] - fn read_schema_version_missing_returns_one() { - let m = Mapping::new(); - assert_eq!(read_schema_version(&m).unwrap(), 1); - } - - #[test] - fn read_schema_version_integer_is_returned() { - let m = map_with_version(Some("5")); - assert_eq!(read_schema_version(&m).unwrap(), 5); - } - - #[test] - fn read_schema_version_zero_rejected() { - let m = map_with_version(Some("0")); - let err = read_schema_version(&m).unwrap_err(); - assert!( - format!("{}", err).contains("must be a positive integer"), - "got: {}", - err - ); - } - - #[test] - fn read_schema_version_negative_rejected() { - let m = map_with_version(Some("-1")); - let err = read_schema_version(&m).unwrap_err(); - assert!( - format!("{}", err).contains("must be a positive integer"), - "got: {}", - err - ); - } - - #[test] - fn read_schema_version_string_rejected() { - let m = map_with_version(Some("\"five\"")); - let err = read_schema_version(&m).unwrap_err(); - assert!( - format!("{}", err).contains("must be a positive integer"), - "got: {}", - err - ); - } - - #[test] - fn read_schema_version_float_rejected() { - let m = map_with_version(Some("2.5")); - let err = read_schema_version(&m).unwrap_err(); - assert!( - format!("{}", err).contains("must be a positive integer"), - "got: {}", - err - ); - } - - #[test] - fn read_schema_version_null_rejected() { - let m = map_with_version(Some("null")); - let err = read_schema_version(&m).unwrap_err(); - assert!( - format!("{}", err).contains("must be a positive integer"), - "got: {}", - err - ); - } - - // ─── set_schema_version ─────────────────────────────────────────────── - - #[test] - fn set_schema_version_inserts_when_absent() { - let mut m = Mapping::new(); - set_schema_version(&mut m, 3); - assert_eq!(read_schema_version(&m).unwrap(), 3); - } - - #[test] - fn set_schema_version_updates_in_place() { - let mut m = map_with_version(Some("1")); - set_schema_version(&mut m, 4); - assert_eq!(read_schema_version(&m).unwrap(), 4); - } - - // ─── migrate_front_matter_with ──────────────────────────────────────── - - #[test] - fn migrate_empty_registry_is_noop_for_v1() { - let mut m: Mapping = serde_yaml::from_str("name: x\n").unwrap(); - let report = migrate_front_matter_with(&mut m, &[]).unwrap(); - assert!(!report.changed()); - assert_eq!(report.from_version, 1); - assert_eq!(report.to_version, 1); - } - - #[test] - fn migrate_already_current_is_noop() { - // Empty registry → CURRENT == 1. Source explicitly stamped 1. - let mut m: Mapping = - serde_yaml::from_str("schema-version: 1\nname: x\n").unwrap(); - let report = migrate_front_matter_with(&mut m, &[]).unwrap(); - assert!(!report.changed()); - } - - #[test] - fn migrate_future_version_rejected() { - // Empty registry → CURRENT == 1. Source claims version 2. - let mut m: Mapping = - serde_yaml::from_str("schema-version: 2\nname: x\n").unwrap(); - let err = migrate_front_matter_with(&mut m, &[]).unwrap_err(); - assert!( - format!("{}", err).contains("only supports up to"), - "got: {}", - err - ); - } - - fn noop_apply(_fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { - Ok(()) - } - - static TEST_MIG_1_TO_2: Migration = Migration { - from_version: 1, - to_version: 2, - id: "test_v1_to_v2", - summary: "test migration", - introduced_in: "test", - apply: noop_apply, - }; - - static TEST_MIG_2_TO_3: Migration = Migration { - from_version: 2, - to_version: 3, - id: "test_v2_to_v3", - summary: "test migration", - introduced_in: "test", - apply: rename_a_to_b, - }; - - fn rename_a_to_b(fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { - rename_key(fm, "a", "b", ConflictPolicy::Error)?; - Ok(()) - } - - #[test] - fn migrate_with_test_registry_chains_migrations() { - let registry: &[&'static Migration] = - &[&TEST_MIG_1_TO_2, &TEST_MIG_2_TO_3]; - let mut m: Mapping = serde_yaml::from_str("a: 1\n").unwrap(); - let report = migrate_front_matter_with(&mut m, registry).unwrap(); - assert!(report.changed()); - assert_eq!(report.from_version, 1); - assert_eq!(report.to_version, 3); - assert_eq!(report.applied_ids(), vec!["test_v1_to_v2", "test_v2_to_v3"]); - assert_eq!(read_schema_version(&m).unwrap(), 3); - // The rename in the second migration moved a → b. - assert!(!m.contains_key(Value::String("a".into()))); - assert!(m.contains_key(Value::String("b".into()))); - } - - #[test] - fn migrate_starting_partway_through_chain() { - let registry: &[&'static Migration] = - &[&TEST_MIG_1_TO_2, &TEST_MIG_2_TO_3]; - // Source is already at v2, only the second migration should run. - let mut m: Mapping = - serde_yaml::from_str("schema-version: 2\na: 1\n").unwrap(); - let report = migrate_front_matter_with(&mut m, registry).unwrap(); - assert_eq!(report.from_version, 2); - assert_eq!(report.to_version, 3); - assert_eq!(report.applied_ids(), vec!["test_v2_to_v3"]); - } - - fn failing_apply(_fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { - bail!("intentional test failure"); - } - - static TEST_MIG_FAIL: Migration = Migration { - from_version: 1, - to_version: 2, - id: "test_fail", - summary: "always fails", - introduced_in: "test", - apply: failing_apply, - }; - - #[test] - fn migrate_aborts_on_step_failure_with_context() { - let registry: &[&'static Migration] = &[&TEST_MIG_FAIL]; - let mut m: Mapping = serde_yaml::from_str("a: 1\n").unwrap(); - let err = migrate_front_matter_with(&mut m, registry).unwrap_err(); - let chain = format!("{:#}", err); - assert!( - chain.contains("test_fail (1 -> 2) failed"), - "expected migration context, got: {}", - chain - ); - assert!( - chain.contains("intentional test failure"), - "expected inner error, got: {}", - chain - ); - } -} diff --git a/src/compile/mod.rs b/src/compile/mod.rs index 3eb7dc55..8ff01646 100644 --- a/src/compile/mod.rs +++ b/src/compile/mod.rs @@ -11,8 +11,8 @@ pub mod extensions; pub(crate) mod filter_ir; mod gitattributes; #[cfg(test)] -mod migration_integration_test; -pub(crate) mod migrations; +mod codemod_integration_test; +pub(crate) mod codemods; mod onees; pub(crate) mod pr_filters; mod standalone; @@ -69,24 +69,24 @@ pub async fn compile_pipeline( skip_integrity, debug_pipeline, true, - migrations::MIGRATIONS, + codemods::CODEMODS, ) .await - .map(|_migrated| ()) + .map(|_rewrote| ()) } -/// Test-only entry point that injects a custom migration registry. +/// Test-only entry point that injects a custom codemod registry. /// /// Production code goes through [`compile_pipeline`]. Tests use this -/// to drive end-to-end migration scenarios without polluting the real -/// [`migrations::MIGRATIONS`] slice. +/// to drive end-to-end codemod scenarios without polluting the real +/// [`codemods::CODEMODS`] slice. #[cfg(test)] async fn compile_pipeline_with_registry( input_path: &str, output_path: Option<&str>, skip_integrity: bool, debug_pipeline: bool, - registry: &[&'static migrations::Migration], + registry: &[&'static codemods::Codemod], ) -> Result { compile_pipeline_inner( input_path, @@ -113,7 +113,7 @@ async fn compile_pipeline_inner( skip_integrity: bool, debug_pipeline: bool, sync_gitattributes: bool, - registry: &[&'static migrations::Migration], + registry: &[&'static codemods::Codemod], ) -> Result { let input_path = Path::new(input_path); info!("Compiling pipeline from: {}", input_path.display()); @@ -128,7 +128,7 @@ async fn compile_pipeline_inner( let parsed = common::parse_markdown_detailed_with_registry(&content, registry)?; let mut front_matter = parsed.front_matter; let markdown_body = parsed.markdown_body; - let migrations = parsed.migrations; + let codemod_report = parsed.codemods; let front_matter_mapping = parsed.front_matter_mapping; let leading_whitespace = parsed.leading_whitespace; let body_raw = parsed.body_raw; @@ -204,16 +204,16 @@ async fn compile_pipeline_inner( // state (rewritten source + stale lock file, or rewritten lock file // pointing at unmigrated source) cannot escape: if rewrite fails, // we abort before the lock file ever gets touched. - let mut migrated = false; - if migrations.changed() { - migrated = perform_source_rewrite_if_needed( + let mut rewrote = false; + if codemod_report.changed() { + rewrote = perform_source_rewrite_if_needed( input_path, &content, &leading_whitespace, &front_matter_mapping, &body_raw, &source_sha256, - &migrations, + &codemod_report, ) .await?; } @@ -246,15 +246,15 @@ async fn compile_pipeline_inner( } } - Ok(migrated) + Ok(rewrote) } -/// Reconstruct the migrated source, run the lost-update guard, and -/// atomically rewrite the source `.md` if the content actually -/// changed. Returns whether a write happened. +/// Reconstruct the codemod-rewritten source, run the lost-update +/// guard, and atomically rewrite the source `.md` if the content +/// actually changed. Returns whether a write happened. /// -/// On success, emits the documented stderr warning so users always see -/// when their source was migrated. This warning is *not* gated by +/// On success, emits the documented stderr warning so users always +/// see when codemods were applied. This warning is *not* gated by /// `--verbose`/`--debug`. async fn perform_source_rewrite_if_needed( input_path: &Path, @@ -263,13 +263,13 @@ async fn perform_source_rewrite_if_needed( front_matter_mapping: &serde_yaml::Mapping, body_raw: &str, source_sha256: &[u8; 32], - migrations: &migrations::MigrationReport, + codemods: &codemods::CodemodReport, ) -> Result { let new_content = common::reconstruct_source(leading_whitespace, front_matter_mapping, body_raw)?; if new_content == original_content { - // Migrations ran but their net effect is a no-op on disk — - // skip the rewrite to avoid gratuitous diffs. + // Codemods ran but their net effect is a no-op on disk — skip + // the rewrite to avoid gratuitous diffs. return Ok(false); } @@ -279,7 +279,7 @@ async fn perform_source_rewrite_if_needed( use sha2::Digest; let current_bytes = tokio::fs::read(input_path).await.with_context(|| { format!( - "Failed to re-read source for migration safety check: {}", + "Failed to re-read source for codemod safety check: {}", input_path.display() ) })?; @@ -288,7 +288,7 @@ async fn perform_source_rewrite_if_needed( let current_hash: [u8; 32] = hasher.finalize().into(); if ¤t_hash != source_sha256 { anyhow::bail!( - "source file {} changed during compilation; refusing to migrate. Re-run `ado-aw compile`.", + "source file {} changed during compilation; refusing to apply codemods. Re-run `ado-aw compile`.", input_path.display() ); } @@ -297,22 +297,17 @@ async fn perform_source_rewrite_if_needed( .await .with_context(|| { format!( - "Failed to atomically rewrite migrated source: {}", + "Failed to atomically rewrite source after codemods: {}", input_path.display() ) })?; - eprintln!( - "warning: migrated front matter in {}: schema-version {} -> {}", - input_path.display(), - migrations.from_version, - migrations.to_version - ); - for applied in &migrations.applied { + eprintln!("warning: applied codemods to {}:", input_path.display()); + for applied in &codemods.applied { eprintln!(" - {}: {}", applied.id, applied.summary); } eprintln!( - "note: front-matter comments are not preserved when migrations rewrite the source." + "note: front-matter comments are not preserved when codemods rewrite the source." ); Ok(true) @@ -367,7 +362,7 @@ pub async fn compile_all_pipelines(skip_integrity: bool, debug_pipeline: bool) - let mut success_count = 0; let mut skip_count = 0; let mut fail_count = 0; - let mut migrated_count = 0; + let mut rewrote_count = 0; for pipeline in &detected { let source_path = root.join(&pipeline.source); @@ -386,11 +381,11 @@ pub async fn compile_all_pipelines(skip_integrity: bool, debug_pipeline: bool) - let source_str = source_path.to_string_lossy(); let output_str = yaml_output_path.to_string_lossy(); - match compile_pipeline_inner(&source_str, Some(&output_str), skip_integrity, debug_pipeline, false, migrations::MIGRATIONS).await { - Ok(migrated) => { + match compile_pipeline_inner(&source_str, Some(&output_str), skip_integrity, debug_pipeline, false, codemods::CODEMODS).await { + Ok(rewrote) => { success_count += 1; - if migrated { - migrated_count += 1; + if rewrote { + rewrote_count += 1; } } Err(e) => { @@ -415,10 +410,10 @@ pub async fn compile_all_pipelines(skip_integrity: bool, debug_pipeline: bool) - } println!(); - if migrated_count > 0 { + if rewrote_count > 0 { println!( - "Done: {} compiled, {} skipped, {} failed; {} source file(s) migrated.", - success_count, skip_count, fail_count, migrated_count + "Done: {} compiled, {} skipped, {} failed; {} source file(s) rewritten by codemods.", + success_count, skip_count, fail_count, rewrote_count ); } else { println!( @@ -518,13 +513,11 @@ pub async fn check_pipeline(pipeline_path: &str) -> Result<()> { // here only happens locally / in CI when a developer runs a // newer ado-aw against an older source. That is exactly when // we want to fail loudly so `ado-aw compile` is run. - if parsed.migrations.changed() { + if parsed.codemods.changed() { anyhow::bail!( - "{} is at schema-version {}; this compiler requires schema-version {}.\n \ - hint: run `ado-aw compile {}` to migrate the source in place.", + "{} contains deprecated front-matter shapes that need codemod fixes.\n \ + hint: run `ado-aw compile {}` to apply the codemods in place.", source_path.display(), - parsed.migrations.from_version, - parsed.migrations.to_version, source_path.display(), ); } diff --git a/src/compile/types.rs b/src/compile/types.rs index a5730283..46ddb8c0 100644 --- a/src/compile/types.rs +++ b/src/compile/types.rs @@ -7,15 +7,6 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; use crate::sanitize::SanitizeConfig as SanitizeConfigTrait; -/// Default `schema-version` used when the field is absent from the -/// front matter. Defined here so serde's `#[serde(default = "...")]` -/// can refer to it. Note: source files that simply omit the field -/// continue to behave as schema-version 1; this default is not the -/// "current" schema version. -fn default_schema_version() -> u32 { - 1 -} - /// Target platform for compiled pipeline #[derive(Debug, Deserialize, Clone, Default, PartialEq)] #[serde(rename_all = "lowercase")] @@ -589,18 +580,6 @@ pub struct PipelineParameter { #[derive(Debug, Deserialize)] #[serde(deny_unknown_fields)] pub struct FrontMatter { - /// Schema version of the front-matter grammar. Missing field defaults - /// to 1. The compiler bumps this in place when migrations apply - /// during compilation; users typically don't write it by hand. - /// See [`crate::compile::migrations`] for the migration framework. - /// - /// Field is intentionally read-only on the typed view: it exists - /// solely so `deny_unknown_fields` accepts a stamped source. The - /// authoritative version lives in the untyped front-matter mapping - /// and is set by the migration runner before typed deserialization. - #[serde(default = "default_schema_version", rename = "schema-version")] - #[allow(dead_code)] - pub schema_version: u32, /// Agent name (required) pub name: String, /// One-line description (required) @@ -1164,36 +1143,6 @@ impl SanitizeConfigTrait for LabelFilter { mod tests { use super::*; - // ─── schema_version field ─────────────────────────────────────────────── - - #[test] - fn schema_version_defaults_to_one_when_absent() { - let yaml = "name: x\ndescription: y\n"; - let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); - assert_eq!(fm.schema_version, 1); - } - - #[test] - fn schema_version_accepts_explicit_integer() { - let yaml = "schema-version: 5\nname: x\ndescription: y\n"; - let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); - assert_eq!(fm.schema_version, 5); - } - - #[test] - fn schema_version_string_value_fails_typed_deserialize() { - // The migration runner is the first line of defense (rejects with a - // friendly message before typed deserialization). This test is a - // defense-in-depth check that the typed FrontMatter would also - // reject a non-integer `schema-version`. - let yaml = "schema-version: notanumber\nname: x\ndescription: y\n"; - let result: Result = serde_yaml::from_str(yaml); - assert!( - result.is_err(), - "expected non-integer schema-version to fail typed deserialize" - ); - } - // ─── PoolConfig deserialization ────────────────────────────────────────── #[test] diff --git a/src/main.rs b/src/main.rs index 6a3b0d84..e046b4d6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -225,9 +225,9 @@ async fn run_execute( ) -> Result<()> { // Read and parse source markdown to get tool configs. // Use parse_markdown_detailed so Stage 3 benefits from in-memory - // migration when a source is mid-migration. Stage 3 must NOT - // rewrite the source file (the executor's working tree is not the - // source-of-truth tree), so we just emit a log warning. + // codemod fixes when a source has deprecated shapes. Stage 3 must + // NOT rewrite the source file (the executor's working tree is not + // the source-of-truth tree), so we just emit a log warning. let content = tokio::fs::read_to_string(&source) .await .with_context(|| format!("Failed to read source file: {}", source.display()))?; @@ -235,12 +235,10 @@ async fn run_execute( let parsed = compile::parse_markdown_detailed(&content) .with_context(|| format!("Failed to parse source file: {}", source.display()))?; - if parsed.migrations.changed() { + if parsed.codemods.changed() { log::warn!( - "front matter at {} is at schema-version {}; running with in-memory migration to {}. Run `ado-aw compile {}` to update the source.", + "front matter at {} contains deprecated shapes; running with in-memory codemod fixes applied. Run `ado-aw compile {}` to update the source.", source.display(), - parsed.migrations.from_version, - parsed.migrations.to_version, source.display(), ); } diff --git a/tests/codemod_tests.rs b/tests/codemod_tests.rs new file mode 100644 index 00000000..57f5cb6c --- /dev/null +++ b/tests/codemod_tests.rs @@ -0,0 +1,182 @@ +//! Integration tests for the front-matter codemod framework. +//! +//! These tests spawn the compiled `ado-aw` binary as a subprocess +//! (matching the pattern used in `tests/compiler_tests.rs`) and +//! assert on the user-visible behavior of `compile` and `check` for +//! sources with various front-matter shapes. +//! +//! The codemod registry shipped with this binary is intentionally +//! empty; the rewrite path is exercised by the white-box tests in +//! `src/compile/codemod_integration_test.rs`, which can inject a +//! stub registry. These tests cover the user-facing CLI behaviors +//! that don't require codemod registration: +//! +//! - Healthy current sources compile and `check` cleanly without +//! rewriting the source. +//! - Non-mapping front matter is rejected with a clear message. +//! - The full `compile` -> `check` round-trip succeeds. + +use std::fs; +use std::path::PathBuf; +use std::process::Command; + +/// Set up a unique temp directory for each test run. +fn fresh_temp_dir(label: &str) -> PathBuf { + let dir = std::env::temp_dir().join(format!( + "ado-aw-codemod-tests-{}-{}-{}", + label, + std::process::id(), + rand_suffix(), + )); + fs::create_dir_all(&dir).expect("create temp dir"); + dir +} + +/// Same as [`fresh_temp_dir`] but also creates an empty `.git/` +/// directory at the root so `ado-aw check` (which walks up to the +/// repo root) can resolve a source path from the compiled lock +/// file's `@ado-aw` header. +fn fresh_git_temp_dir(label: &str) -> PathBuf { + let dir = fresh_temp_dir(label); + fs::create_dir(dir.join(".git")).expect("create .git dir"); + dir +} + +/// Suffix that's unique within the process for the lifetime of a +/// single test binary run. Uses a wall-clock nanosecond timestamp +/// combined with a monotonic atomic counter so two parallel tests +/// scheduled in the same nanosecond still get distinct directories. +fn rand_suffix() -> String { + use std::sync::atomic::{AtomicU64, Ordering}; + use std::time::{SystemTime, UNIX_EPOCH}; + static SEQ: AtomicU64 = AtomicU64::new(0); + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + let seq = SEQ.fetch_add(1, Ordering::Relaxed); + format!("{:x}-{:x}", nanos, seq) +} + +fn ado_aw_binary() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_ado-aw")) +} + +/// Run `ado-aw compile `, returning the captured output. +fn run_compile(source: &PathBuf) -> std::process::Output { + Command::new(ado_aw_binary()) + .args(["compile", source.to_str().unwrap()]) + .output() + .expect("Failed to run ado-aw compile") +} + +/// Run `ado-aw check `, returning the captured output. +fn run_check(pipeline: &PathBuf) -> std::process::Output { + Command::new(ado_aw_binary()) + .args(["check", pipeline.to_str().unwrap()]) + .output() + .expect("Failed to run ado-aw check") +} + +/// Write a source file to `dir/agent.md` and return its path. +fn write_source(dir: &PathBuf, content: &str) -> PathBuf { + let path = dir.join("agent.md"); + fs::write(&path, content).expect("write source"); + path +} + +// ─── Healthy compile (no codemods needed) ────────────────────────────────── + +#[test] +fn compile_succeeds_on_current_source() { + let dir = fresh_temp_dir("current-source"); + let original = "---\nname: smoketest\ndescription: smoketest description\n---\n## Body\n\nHello.\n"; + let source = write_source(&dir, original); + + let output = run_compile(&source); + + assert!( + output.status.success(), + "compile should succeed on healthy source: stdout={:?} stderr={:?}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + + // Lock file must be generated. + let lock = source.with_extension("lock.yml"); + assert!( + lock.exists(), + "expected compiled YAML at {}", + lock.display() + ); + + // Empty registry + healthy source must NOT rewrite — verify + // byte-identity. + let after = fs::read_to_string(&source).expect("re-read source"); + assert_eq!( + after, original, + "source must be byte-identical after compile when no codemods apply" + ); + + // Stderr should NOT contain a codemod warning. + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + !stderr.contains("warning: applied codemods"), + "no codemod warning expected, got stderr: {}", + stderr + ); + + let _ = fs::remove_dir_all(&dir); +} + +#[test] +fn compile_then_check_round_trip_passes() { + let dir = fresh_git_temp_dir("round-trip"); + let source = write_source( + &dir, + "---\nname: round-trip-agent\ndescription: round-trip\n---\n## Body\n", + ); + + let compile_output = run_compile(&source); + assert!( + compile_output.status.success(), + "compile should succeed: {}", + String::from_utf8_lossy(&compile_output.stderr) + ); + + let lock = source.with_extension("lock.yml"); + assert!(lock.exists(), "expected lock file at {}", lock.display()); + + let check_output = run_check(&lock); + assert!( + check_output.status.success(), + "check should succeed: stdout={:?} stderr={:?}", + String::from_utf8_lossy(&check_output.stdout), + String::from_utf8_lossy(&check_output.stderr) + ); + + let _ = fs::remove_dir_all(&dir); +} + +// ─── Non-mapping front matter ────────────────────────────────────────────── + +#[test] +fn compile_rejects_non_mapping_top_level_yaml() { + let dir = fresh_temp_dir("non-mapping"); + let source = write_source(&dir, "---\n- a\n- b\n---\nbody\n"); + + let output = run_compile(&source); + + assert!( + !output.status.success(), + "compile should fail when front matter is a sequence not a mapping" + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("must be a mapping"), + "stderr should report non-mapping error, got: {}", + stderr + ); + + let _ = fs::remove_dir_all(&dir); +} diff --git a/tests/migration_tests.rs b/tests/migration_tests.rs deleted file mode 100644 index be21e11f..00000000 --- a/tests/migration_tests.rs +++ /dev/null @@ -1,386 +0,0 @@ -//! Integration tests for the front-matter migration framework. -//! -//! These tests spawn the compiled `ado-aw` binary as a subprocess -//! (matching the pattern used in `tests/compiler_tests.rs`) and assert -//! on the user-visible behavior of `compile` and `check` for sources -//! with various `schema-version` shapes. -//! -//! The migration registry shipped with this binary is intentionally -//! empty (`CURRENT_SCHEMA_VERSION == 1`); the rewrite path is exercised -//! by the white-box tests in `src/compile/migration_integration_test.rs`, -//! which can inject a stub registry. These tests cover the user-facing -//! CLI behaviors that don't require migration registration: -//! -//! - Future-version sources are rejected with a clear message. -//! - Non-integer / zero / negative `schema-version` is rejected. -//! - Healthy v1 sources (no `schema-version` field, or explicit `1`) -//! compile and `check` cleanly without rewriting the source. -//! - The full `compile` -> `check` round-trip succeeds. - -use std::fs; -use std::path::PathBuf; -use std::process::Command; - -/// Set up a unique temp directory for each test run. -fn fresh_temp_dir(label: &str) -> PathBuf { - let dir = std::env::temp_dir().join(format!( - "ado-aw-migration-tests-{}-{}-{}", - label, - std::process::id(), - rand_suffix(), - )); - fs::create_dir_all(&dir).expect("create temp dir"); - dir -} - -/// Same as [`fresh_temp_dir`] but also creates an empty `.git/` -/// directory at the root so `ado-aw check` (which walks up to the -/// repo root) can resolve a source path from the compiled lock file's -/// `@ado-aw` header. -fn fresh_git_temp_dir(label: &str) -> PathBuf { - let dir = fresh_temp_dir(label); - fs::create_dir(dir.join(".git")).expect("create .git dir"); - dir -} - -/// Suffix that's unique within the process for the lifetime of a -/// single test binary run. Uses a wall-clock nanosecond timestamp -/// combined with a monotonic atomic counter so two parallel tests -/// scheduled in the same nanosecond still get distinct directories. -fn rand_suffix() -> String { - use std::sync::atomic::{AtomicU64, Ordering}; - use std::time::{SystemTime, UNIX_EPOCH}; - static SEQ: AtomicU64 = AtomicU64::new(0); - let nanos = SystemTime::now() - .duration_since(UNIX_EPOCH) - .map(|d| d.as_nanos()) - .unwrap_or(0); - let seq = SEQ.fetch_add(1, Ordering::Relaxed); - format!("{:x}-{:x}", nanos, seq) -} - -fn ado_aw_binary() -> PathBuf { - PathBuf::from(env!("CARGO_BIN_EXE_ado-aw")) -} - -/// Run `ado-aw compile `, returning the captured output. -fn run_compile(source: &PathBuf) -> std::process::Output { - Command::new(ado_aw_binary()) - .args(["compile", source.to_str().unwrap()]) - .output() - .expect("Failed to run ado-aw compile") -} - -/// Run `ado-aw check `, returning the captured output. -fn run_check(pipeline: &PathBuf) -> std::process::Output { - Command::new(ado_aw_binary()) - .args(["check", pipeline.to_str().unwrap()]) - .output() - .expect("Failed to run ado-aw check") -} - -/// Write a source file to `dir/agent.md` and return its path. -fn write_source(dir: &PathBuf, content: &str) -> PathBuf { - let path = dir.join("agent.md"); - fs::write(&path, content).expect("write source"); - path -} - -// ─── Future-version rejection ────────────────────────────────────────────── - -#[test] -fn compile_rejects_future_schema_version() { - let dir = fresh_temp_dir("future-version"); - let source = write_source( - &dir, - "---\nschema-version: 99\nname: x\ndescription: y\n---\nbody\n", - ); - - let output = run_compile(&source); - - assert!( - !output.status.success(), - "compile should fail on future schema-version: stdout={:?} stderr={:?}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - - let stderr = String::from_utf8_lossy(&output.stderr); - assert!( - stderr.contains("only supports up to"), - "stderr should mention compiler's supported schema-version range, got: {}", - stderr - ); - assert!( - stderr.contains("99"), - "stderr should mention the offending version 99, got: {}", - stderr - ); - - let _ = fs::remove_dir_all(&dir); -} - -// ─── Invalid schema-version values ───────────────────────────────────────── - -#[test] -fn compile_rejects_zero_schema_version() { - let dir = fresh_temp_dir("zero-version"); - let source = write_source( - &dir, - "---\nschema-version: 0\nname: x\ndescription: y\n---\nbody\n", - ); - - let output = run_compile(&source); - - assert!( - !output.status.success(), - "compile should fail on zero schema-version" - ); - let stderr = String::from_utf8_lossy(&output.stderr); - assert!( - stderr.contains("must be a positive integer"), - "stderr should reject zero with `must be a positive integer`, got: {}", - stderr - ); - - let _ = fs::remove_dir_all(&dir); -} - -#[test] -fn compile_rejects_negative_schema_version() { - let dir = fresh_temp_dir("negative-version"); - let source = write_source( - &dir, - "---\nschema-version: -1\nname: x\ndescription: y\n---\nbody\n", - ); - - let output = run_compile(&source); - - assert!( - !output.status.success(), - "compile should fail on negative schema-version" - ); - let stderr = String::from_utf8_lossy(&output.stderr); - assert!( - stderr.contains("must be a positive integer"), - "stderr should reject negative with `must be a positive integer`, got: {}", - stderr - ); - - let _ = fs::remove_dir_all(&dir); -} - -#[test] -fn compile_rejects_non_integer_schema_version() { - let dir = fresh_temp_dir("non-integer-version"); - let source = write_source( - &dir, - "---\nschema-version: \"not-a-number\"\nname: x\ndescription: y\n---\nbody\n", - ); - - let output = run_compile(&source); - - assert!( - !output.status.success(), - "compile should fail on non-integer schema-version" - ); - let stderr = String::from_utf8_lossy(&output.stderr); - assert!( - stderr.contains("must be a positive integer"), - "stderr should reject non-integer with `must be a positive integer`, got: {}", - stderr - ); - - let _ = fs::remove_dir_all(&dir); -} - -#[test] -fn compile_rejects_float_schema_version() { - let dir = fresh_temp_dir("float-version"); - let source = write_source( - &dir, - "---\nschema-version: 1.5\nname: x\ndescription: y\n---\nbody\n", - ); - - let output = run_compile(&source); - - assert!( - !output.status.success(), - "compile should fail on float schema-version" - ); - let stderr = String::from_utf8_lossy(&output.stderr); - assert!( - stderr.contains("must be a positive integer"), - "stderr should reject float with `must be a positive integer`, got: {}", - stderr - ); - - let _ = fs::remove_dir_all(&dir); -} - -// ─── Healthy v1 round-trip ───────────────────────────────────────────────── - -#[test] -fn compile_succeeds_on_unstamped_v1_source() { - let dir = fresh_temp_dir("unstamped-v1"); - let original = "---\nname: smoketest\ndescription: smoketest description\n---\n## Body\n\nHello.\n"; - let source = write_source(&dir, original); - - let output = run_compile(&source); - - assert!( - output.status.success(), - "compile should succeed on healthy unstamped source: stdout={:?} stderr={:?}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - ); - - // Lock file must be generated. - let lock = source.with_extension("lock.yml"); - assert!( - lock.exists(), - "expected compiled YAML at {}", - lock.display() - ); - - // With CURRENT_SCHEMA_VERSION == 1 and no migrations registered, - // the source must NOT be rewritten — verify byte-identity. - let after = fs::read_to_string(&source).expect("re-read source"); - assert_eq!( - after, original, - "source must be byte-identical after compile when no migrations apply" - ); - - // Stderr should NOT contain a migration warning. - let stderr = String::from_utf8_lossy(&output.stderr); - assert!( - !stderr.contains("warning: migrated front matter"), - "no migration warning expected, got stderr: {}", - stderr - ); - - let _ = fs::remove_dir_all(&dir); -} - -#[test] -fn compile_succeeds_on_explicitly_stamped_v1_source() { - let dir = fresh_temp_dir("stamped-v1"); - let original = "---\nschema-version: 1\nname: x\ndescription: y\n---\n## Body\n"; - let source = write_source(&dir, original); - - let output = run_compile(&source); - - assert!( - output.status.success(), - "compile should succeed on explicitly stamped v1 source: {}", - String::from_utf8_lossy(&output.stderr) - ); - - let after = fs::read_to_string(&source).expect("re-read source"); - assert_eq!( - after, original, - "explicitly stamped v1 source must be byte-identical" - ); - - let _ = fs::remove_dir_all(&dir); -} - -#[test] -fn compile_then_check_round_trip_passes() { - let dir = fresh_git_temp_dir("round-trip"); - let source = write_source( - &dir, - "---\nname: round-trip-agent\ndescription: round-trip\n---\n## Body\n", - ); - - let compile_output = run_compile(&source); - assert!( - compile_output.status.success(), - "compile should succeed: {}", - String::from_utf8_lossy(&compile_output.stderr) - ); - - let lock = source.with_extension("lock.yml"); - assert!(lock.exists(), "expected lock file at {}", lock.display()); - - // `check` reads the source path from the lock file's @ado-aw header. - // The header records an absolute or relative path from the compile - // invocation; since we passed an absolute path, that's what we get. - let check_output = run_check(&lock); - assert!( - check_output.status.success(), - "check should succeed: stdout={:?} stderr={:?}", - String::from_utf8_lossy(&check_output.stdout), - String::from_utf8_lossy(&check_output.stderr) - ); - - let _ = fs::remove_dir_all(&dir); -} - -// ─── check rejects future-version sources ────────────────────────────────── - -#[test] -fn check_rejects_future_schema_version() { - // Setup: compile a healthy source so we have a valid lock file with - // a header pointing at our source path. Then mutate the source to - // claim a future schema version, and confirm `check` fails. - let dir = fresh_git_temp_dir("check-future"); - let source = write_source( - &dir, - "---\nname: x\ndescription: y\n---\n## Body\n", - ); - - let compile_output = run_compile(&source); - assert!( - compile_output.status.success(), - "initial compile should succeed: {}", - String::from_utf8_lossy(&compile_output.stderr) - ); - - // Mutate the source to claim a future version. Note: this mutation - // would also fail the lock-file integrity check, but the migration - // runner runs first so we observe the schema-version error. - fs::write( - &source, - "---\nschema-version: 99\nname: x\ndescription: y\n---\n## Body\n", - ) - .expect("rewrite source"); - - let lock = source.with_extension("lock.yml"); - let check_output = run_check(&lock); - assert!( - !check_output.status.success(), - "check should fail when source is at a future schema-version" - ); - let stderr = String::from_utf8_lossy(&check_output.stderr); - assert!( - stderr.contains("only supports up to") || stderr.contains("Failed to migrate"), - "stderr should report the future-version error, got: {}", - stderr - ); - - let _ = fs::remove_dir_all(&dir); -} - -// ─── Non-mapping front matter ────────────────────────────────────────────── - -#[test] -fn compile_rejects_non_mapping_top_level_yaml() { - let dir = fresh_temp_dir("non-mapping"); - let source = write_source(&dir, "---\n- a\n- b\n---\nbody\n"); - - let output = run_compile(&source); - - assert!( - !output.status.success(), - "compile should fail when front matter is a sequence not a mapping" - ); - let stderr = String::from_utf8_lossy(&output.stderr); - assert!( - stderr.contains("must be a mapping"), - "stderr should report non-mapping error, got: {}", - stderr - ); - - let _ = fs::remove_dir_all(&dir); -} From 5b6e265c0f177a15b8e2f0235af319bb8842da0c Mon Sep 17 00:00:00 2001 From: James Devine Date: Fri, 8 May 2026 23:05:09 +0100 Subject: [PATCH 6/8] fix(compile): address codemod-refactor PR review Two findings + two suggestions from the latest Rust PR Reviewer pass: 1. Naming clash in `perform_source_rewrite_if_needed`: the parameter `codemods: &codemods::CodemodReport` shadowed the sibling `codemods` module. Functionally fine (the module isn't used inside the function body) but a future caller adding a `codemods::something()` reference would hit a confusing type error. Renamed to `report`. 2. `docs/codemods.md` claimed "Front-matter key order is preserved" without qualification. Renamed keys move to the END of the front-matter block because `Mapping::insert` appends new keys when absent. Updated the doc to call this out, and added the same note to the stderr warning emitted when codemods rewrite the source. 3. Marked the two registry-health tests (`registry_ids_are_unique`, `codemod_filenames_match_registry_count`) as vacuously true while CODEMODS is empty, so future reviewers don't mistake the green test for a meaningful invariant today. 4. Added a comment on the `pub use helpers::*` re-export and on the helper-level `#[allow(dead_code)]`s explaining they're defensive against the empty-registry warning surface and should be stripped once the first real codemod lands. Plus, while in the docs, added an "unsanitized input" invariant to the codemod author contract: the runner sanitizes the typed `FrontMatter` AFTER codemods run, so the raw `Mapping` codemods receive is whatever the user wrote. Codemods should treat values as opaque rather than parse/interpolate them. All 1349 tests pass. No behavior changes outside the docs and the warning text. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/codemods.md | 18 ++++++++++++++++-- src/compile/codemods/mod.rs | 16 ++++++++++++++-- src/compile/mod.rs | 7 ++++--- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/docs/codemods.md b/docs/codemods.md index edae8456..7c91e17e 100644 --- a/docs/codemods.md +++ b/docs/codemods.md @@ -43,8 +43,13 @@ every compile; codemods that don't match are essentially free. the closing `---`). - **Leading whitespace** before the opening `---` is preserved byte-for-byte (BOM-strippers and editor blank lines). -- **Front-matter key order** is preserved by `serde_yaml`'s - insertion-ordered mapping. +- **Front-matter key order** is preserved for keys the codemod + doesn't touch (`serde_yaml`'s mapping is insertion-ordered). + Renamed keys, however, move to the **end** of the front-matter + block: `Mapping::insert` appends new keys, so when a codemod + removes `old-key` and inserts `new-key`, the new key lands at + the bottom regardless of where the old one was. The compile + warning calls this out so users aren't surprised. - **Front-matter comments** are NOT preserved. `serde_yaml` round-trip drops them. The warning emitted on rewrite calls this out so it isn't a surprise. If you have important context in a @@ -191,6 +196,15 @@ review + per-codemod tests: 6. **Order-aware.** If codemod B depends on shapes produced by codemod A, A must precede B in the registry. Document the ordering requirement in B's doc comment. +7. **Receives unsanitized input.** The compiler runs sanitization + (`##vso[` neutralization, control-character stripping, length + limits) on the typed `FrontMatter` *after* codemods run, but the + raw `Mapping` you receive is whatever the user wrote — including + any pipeline-injection attempts, control characters, or + over-length strings. Codemods should therefore treat values as + opaque (move them around, wrap them in objects, etc.) rather + than parse or interpolate them. If a codemod must inspect a + value, treat it defensively. ### Use the helpers diff --git a/src/compile/codemods/mod.rs b/src/compile/codemods/mod.rs index 6be5eca0..4c9618c0 100644 --- a/src/compile/codemods/mod.rs +++ b/src/compile/codemods/mod.rs @@ -33,6 +33,13 @@ use anyhow::{Context, Result}; use serde_yaml::Mapping; mod helpers; +// `#[allow(unused_imports)]` here mirrors the per-item +// `#[allow(dead_code)]` annotations in `helpers.rs`. While the +// CODEMODS registry is empty, no in-crate caller exercises these +// re-exports — the lint warns. Once the first real codemod lands +// and uses one of these helpers, both the re-export attribute and +// the per-item `dead_code` allows on `helpers.rs` should be +// removed. #[allow(unused_imports)] pub use helpers::{insert_no_overwrite, rename_key, take_key, ConflictPolicy}; @@ -159,6 +166,9 @@ mod tests { #[test] fn registry_ids_are_unique() { + // Vacuously true while CODEMODS is empty; the assertion + // machinery still compiles so this test guards against + // duplicate ids the moment a real codemod ships. let mut seen = std::collections::BTreeSet::new(); for c in CODEMODS { assert!( @@ -171,8 +181,10 @@ mod tests { #[test] fn codemod_filenames_match_registry_count() { - // Scan src/compile/codemods/*.rs and check that each numeric - // codemod file is present exactly once in the registry. + // Vacuously true while CODEMODS is empty (the directory + // contains only `mod.rs` and `helpers.rs`, which are + // skipped). Once a numeric `_.rs` file lands, this + // test asserts the registry was updated to match. let dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) .join("src/compile/codemods"); let mut numeric_files: Vec = Vec::new(); diff --git a/src/compile/mod.rs b/src/compile/mod.rs index 8ff01646..63bfa15a 100644 --- a/src/compile/mod.rs +++ b/src/compile/mod.rs @@ -263,7 +263,7 @@ async fn perform_source_rewrite_if_needed( front_matter_mapping: &serde_yaml::Mapping, body_raw: &str, source_sha256: &[u8; 32], - codemods: &codemods::CodemodReport, + report: &codemods::CodemodReport, ) -> Result { let new_content = common::reconstruct_source(leading_whitespace, front_matter_mapping, body_raw)?; @@ -303,11 +303,12 @@ async fn perform_source_rewrite_if_needed( })?; eprintln!("warning: applied codemods to {}:", input_path.display()); - for applied in &codemods.applied { + for applied in &report.applied { eprintln!(" - {}: {}", applied.id, applied.summary); } eprintln!( - "note: front-matter comments are not preserved when codemods rewrite the source." + "note: front-matter comments are not preserved when codemods rewrite the source. \ + Renamed keys may move to the end of the front-matter block." ); Ok(true) From 279527436e2476257a1d5e8681ff54662051b434 Mon Sep 17 00:00:00 2001 From: James Devine Date: Fri, 8 May 2026 23:17:26 +0100 Subject: [PATCH 7/8] fix(compile): address fourth-round PR review on codemod refactor Two findings + three suggestions from the latest Rust PR Reviewer pass: 1. Stage-3 typed-deserialize errors after codemods reused the same "Failed to parse YAML front matter" context as the Stage-1 raw parse, so a codemod that produced an invalid shape was indistinguishable from a user typo. Switch to with_context that surfaces the applied codemod ids when codemods ran: "Failed to parse YAML front matter after applying codemods (id1, id2); a codemod likely produced an invalid shape". 2. rename_key with ConflictPolicy::PreferNew returns Ok(true) when it drops a stale `old` key even though `new` is unchanged. That triggers a source-rewrite warning saying "we migrated" when really we "cleaned up a remnant". The behavior is correct (the mapping did mutate); the doc was misleading. Rewrote the rename_key doc comment to enumerate exactly when Ok(true) fires and call out the PreferNew/cleanup case. 3. Added `// TODO(codemods): remove when the first real codemod is registered.` markers next to every #[allow(dead_code)] / #[allow(unused_imports)] in src/compile/codemods/{mod,helpers}.rs so search-based cleanup is trivial when the registry stops being empty. 4. tests/codemod_tests.rs leaked temp dirs on assertion failure because `let _ = fs::remove_dir_all(&dir)` only ran on the happy path. Refactored to return tempfile::TempDir from the helpers and rely on RAII cleanup, matching the pattern already used by src/compile/codemod_integration_test.rs. Skipped: the empty-mapping concern in reconstruct_source (a mapping reduced to empty wouldn't deserialize as FrontMatter anyway because of required name/description fields, so the existing typed-deserialize error path catches it; #1 makes that error informative). All 1349 tests pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/compile/codemods/helpers.rs | 18 ++++++++- src/compile/codemods/mod.rs | 5 +++ src/compile/common.rs | 21 ++++++++-- tests/codemod_tests.rs | 68 +++++++++++---------------------- 4 files changed, 60 insertions(+), 52 deletions(-) diff --git a/src/compile/codemods/helpers.rs b/src/compile/codemods/helpers.rs index 74c5803f..59331fa2 100644 --- a/src/compile/codemods/helpers.rs +++ b/src/compile/codemods/helpers.rs @@ -9,6 +9,7 @@ use serde_yaml::{Mapping, Value}; /// Conflict policy used by [`rename_key`] when the destination key is /// already present. +// TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] #[derive(Debug, Clone, Copy)] pub enum ConflictPolicy { @@ -21,6 +22,7 @@ pub enum ConflictPolicy { } /// Remove and return the value at `key`, or `None` if absent. +// TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] pub fn take_key(m: &mut Mapping, key: &str) -> Option { m.remove(Value::String(key.to_string())) @@ -30,6 +32,7 @@ pub fn take_key(m: &mut Mapping, key: &str) -> Option { /// /// Prefer this over `Mapping::insert` in migrations: silent overwrite is /// almost always a bug when transforming user data. +// TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] pub fn insert_no_overwrite( m: &mut Mapping, @@ -48,13 +51,24 @@ pub fn insert_no_overwrite( /// Rename `old` → `new` according to `policy`. /// -/// Returns `Ok(true)` when the rename happened (regardless of policy -/// branch), `Ok(false)` when `old` was absent (no-op). +/// Returns `Ok(true)` when the mapping was mutated in any way: +/// +/// - the `old` key was moved to `new` (the typical rename), OR +/// - both keys were present with [`ConflictPolicy::PreferOld`] and +/// `new` was overwritten, OR +/// - both keys were present with [`ConflictPolicy::PreferNew`] and +/// the stale `old` key was dropped (note: the `new` value did not +/// change, but the mapping still mutated — codemod authors using +/// `PreferNew` should be aware that `Ok(true)` here means +/// "cleaned up a remnant," not "migrated semantic content"). +/// +/// Returns `Ok(false)` when `old` was absent (no-op). /// /// The mapping is left **unchanged** on the error path. Callers can /// rely on this invariant when chaining migrations: a failed rename /// won't leave the mapping in a half-mutated state for the next call /// to inspect. +// TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] pub fn rename_key( m: &mut Mapping, diff --git a/src/compile/codemods/mod.rs b/src/compile/codemods/mod.rs index 4c9618c0..6e308eca 100644 --- a/src/compile/codemods/mod.rs +++ b/src/compile/codemods/mod.rs @@ -40,6 +40,7 @@ mod helpers; // and uses one of these helpers, both the re-export attribute and // the per-item `dead_code` allows on `helpers.rs` should be // removed. +// TODO(codemods): remove when the first real codemod is registered. #[allow(unused_imports)] pub use helpers::{insert_no_overwrite, rename_key, take_key, ConflictPolicy}; @@ -73,6 +74,7 @@ pub struct Codemod { pub summary: &'static str, /// Compiler version that introduced this codemod (e.g. "0.27.0"). /// Provenance only; not consumed by the runner. + // TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] pub introduced_in: &'static str, /// The transformation. Returns `Ok(true)` when the codemod @@ -107,6 +109,7 @@ pub struct AppliedCodemod { impl CodemodReport { /// An empty report (no codemod fired). + // TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] pub fn empty() -> Self { Self { @@ -120,6 +123,7 @@ impl CodemodReport { } /// IDs of codemods that ran, in order. Helpful for tests. + // TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] pub fn applied_ids(&self) -> Vec<&'static str> { self.applied.iter().map(|a| a.id).collect() @@ -130,6 +134,7 @@ impl CodemodReport { /// /// Equivalent to [`apply_codemods_with`] called with the global /// [`CODEMODS`] registry. +// TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] pub fn apply_codemods(fm: &mut Mapping) -> Result { apply_codemods_with(fm, CODEMODS) diff --git a/src/compile/common.rs b/src/compile/common.rs index af3838d4..9a23f74d 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -182,10 +182,23 @@ pub(crate) fn parse_markdown_detailed_with_registry( // Stage 3: deserialize the (possibly modified) mapping into the // typed FrontMatter. Errors here mean either the user wrote an - // unsupported shape or a codemod produced invalid output. - let front_matter: FrontMatter = - serde_yaml::from_value(serde_yaml::Value::Mapping(mapping.clone())) - .context("Failed to parse YAML front matter")?; + // unsupported shape or a codemod produced invalid output. The + // error context differs by case so the user can tell which. + let front_matter: FrontMatter = serde_yaml::from_value( + serde_yaml::Value::Mapping(mapping.clone()), + ) + .with_context(|| { + if report.changed() { + let ids = report.applied_ids().join(", "); + format!( + "Failed to parse YAML front matter after applying codemods ({}); \ + a codemod likely produced an invalid shape", + ids + ) + } else { + "Failed to parse YAML front matter".to_string() + } + })?; Ok(ParsedSource { front_matter, diff --git a/tests/codemod_tests.rs b/tests/codemod_tests.rs index 57f5cb6c..eadc6499 100644 --- a/tests/codemod_tests.rs +++ b/tests/codemod_tests.rs @@ -17,53 +17,35 @@ //! - The full `compile` -> `check` round-trip succeeds. use std::fs; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::process::Command; - -/// Set up a unique temp directory for each test run. -fn fresh_temp_dir(label: &str) -> PathBuf { - let dir = std::env::temp_dir().join(format!( - "ado-aw-codemod-tests-{}-{}-{}", - label, - std::process::id(), - rand_suffix(), - )); - fs::create_dir_all(&dir).expect("create temp dir"); - dir +use tempfile::TempDir; + +/// Set up a unique temp directory for each test run. Returned as a +/// `TempDir` so RAII cleans the directory up even if a test panics. +fn fresh_temp_dir() -> TempDir { + tempfile::Builder::new() + .prefix("ado-aw-codemod-tests-") + .tempdir() + .expect("create temp dir") } /// Same as [`fresh_temp_dir`] but also creates an empty `.git/` /// directory at the root so `ado-aw check` (which walks up to the /// repo root) can resolve a source path from the compiled lock /// file's `@ado-aw` header. -fn fresh_git_temp_dir(label: &str) -> PathBuf { - let dir = fresh_temp_dir(label); - fs::create_dir(dir.join(".git")).expect("create .git dir"); +fn fresh_git_temp_dir() -> TempDir { + let dir = fresh_temp_dir(); + fs::create_dir(dir.path().join(".git")).expect("create .git dir"); dir } -/// Suffix that's unique within the process for the lifetime of a -/// single test binary run. Uses a wall-clock nanosecond timestamp -/// combined with a monotonic atomic counter so two parallel tests -/// scheduled in the same nanosecond still get distinct directories. -fn rand_suffix() -> String { - use std::sync::atomic::{AtomicU64, Ordering}; - use std::time::{SystemTime, UNIX_EPOCH}; - static SEQ: AtomicU64 = AtomicU64::new(0); - let nanos = SystemTime::now() - .duration_since(UNIX_EPOCH) - .map(|d| d.as_nanos()) - .unwrap_or(0); - let seq = SEQ.fetch_add(1, Ordering::Relaxed); - format!("{:x}-{:x}", nanos, seq) -} - fn ado_aw_binary() -> PathBuf { PathBuf::from(env!("CARGO_BIN_EXE_ado-aw")) } /// Run `ado-aw compile `, returning the captured output. -fn run_compile(source: &PathBuf) -> std::process::Output { +fn run_compile(source: &Path) -> std::process::Output { Command::new(ado_aw_binary()) .args(["compile", source.to_str().unwrap()]) .output() @@ -71,7 +53,7 @@ fn run_compile(source: &PathBuf) -> std::process::Output { } /// Run `ado-aw check `, returning the captured output. -fn run_check(pipeline: &PathBuf) -> std::process::Output { +fn run_check(pipeline: &Path) -> std::process::Output { Command::new(ado_aw_binary()) .args(["check", pipeline.to_str().unwrap()]) .output() @@ -79,7 +61,7 @@ fn run_check(pipeline: &PathBuf) -> std::process::Output { } /// Write a source file to `dir/agent.md` and return its path. -fn write_source(dir: &PathBuf, content: &str) -> PathBuf { +fn write_source(dir: &Path, content: &str) -> PathBuf { let path = dir.join("agent.md"); fs::write(&path, content).expect("write source"); path @@ -89,9 +71,9 @@ fn write_source(dir: &PathBuf, content: &str) -> PathBuf { #[test] fn compile_succeeds_on_current_source() { - let dir = fresh_temp_dir("current-source"); + let dir = fresh_temp_dir(); let original = "---\nname: smoketest\ndescription: smoketest description\n---\n## Body\n\nHello.\n"; - let source = write_source(&dir, original); + let source = write_source(dir.path(), original); let output = run_compile(&source); @@ -125,15 +107,13 @@ fn compile_succeeds_on_current_source() { "no codemod warning expected, got stderr: {}", stderr ); - - let _ = fs::remove_dir_all(&dir); } #[test] fn compile_then_check_round_trip_passes() { - let dir = fresh_git_temp_dir("round-trip"); + let dir = fresh_git_temp_dir(); let source = write_source( - &dir, + dir.path(), "---\nname: round-trip-agent\ndescription: round-trip\n---\n## Body\n", ); @@ -154,16 +134,14 @@ fn compile_then_check_round_trip_passes() { String::from_utf8_lossy(&check_output.stdout), String::from_utf8_lossy(&check_output.stderr) ); - - let _ = fs::remove_dir_all(&dir); } // ─── Non-mapping front matter ────────────────────────────────────────────── #[test] fn compile_rejects_non_mapping_top_level_yaml() { - let dir = fresh_temp_dir("non-mapping"); - let source = write_source(&dir, "---\n- a\n- b\n---\nbody\n"); + let dir = fresh_temp_dir(); + let source = write_source(dir.path(), "---\n- a\n- b\n---\nbody\n"); let output = run_compile(&source); @@ -177,6 +155,4 @@ fn compile_rejects_non_mapping_top_level_yaml() { "stderr should report non-mapping error, got: {}", stderr ); - - let _ = fs::remove_dir_all(&dir); } From 5c6f72b03a25bc9380b69c998d3e00e6fb3f269a Mon Sep 17 00:00:00 2001 From: James Devine Date: Fri, 8 May 2026 23:34:25 +0100 Subject: [PATCH 8/8] fix(compile): atomic_write tempfile lands on same FS for bare filenames Two fixes from the latest Rust PR Reviewer pass: 1. atomic_write created the tempfile in std::env::temp_dir() when the destination path had no directory component (e.g. a bare "agent.md" run from cwd). On Linux when /tmp is a tmpfs mount, the subsequent persist() rename fails with EXDEV because rename cannot cross filesystems. Fix: when path.parent() returns None or Some("") (the bare-filename case), fall back to "." so the tempfile lives on the same filesystem as the destination. Adds a pure-logic regression test for the parent-dir derivation that doesn't mutate process cwd (would race with parallel tests). 2. reconstruct_source's `format!("---\n{}---{}", ...)` template relies on serde_yaml::to_string ending the output with `\n`. This holds in practice but is an undocumented invariant. Add a defensive `ensure!` so a future serde_yaml change surfaces a loud, actionable error rather than silently producing malformed YAML. Skipped: the redundant `'static` annotation on `CODEMODS` (cosmetic, reviewer marked as minor). All 1350 tests pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/compile/common.rs | 61 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/src/compile/common.rs b/src/compile/common.rs index 9a23f74d..32e882b4 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -42,17 +42,23 @@ pub async fn atomic_write(path: &Path, contents: &str) -> Result<()> { fn atomic_write_blocking(path: &Path, contents: &str) -> Result<()> { use std::io::Write; + // Determine the directory to create the tempfile in. We MUST use + // a path on the same filesystem as the destination so that the + // final `persist` rename is atomic (otherwise it fails with + // EXDEV on Linux when /tmp is a separate tmpfs mount). + // + // - `path.parent() == Some(non-empty)` -> use that parent. + // - `path.parent() == Some("")` (bare filename like "agent.md") + // or `None` -> use the current directory ("."), which is the + // same filesystem as where the file will land. let parent = path.parent().filter(|p| !p.as_os_str().is_empty()); - let mut tmp = match parent { - Some(dir) => tempfile::NamedTempFile::new_in(dir).with_context(|| { - format!( - "failed to create temporary file in {}", - dir.display() - ) - })?, - None => tempfile::NamedTempFile::new() - .context("failed to create temporary file in current directory")?, - }; + let parent_dir: &Path = parent.unwrap_or_else(|| Path::new(".")); + let mut tmp = tempfile::NamedTempFile::new_in(parent_dir).with_context(|| { + format!( + "failed to create temporary file in {}", + parent_dir.display() + ) + })?; tmp.write_all(contents.as_bytes()) .with_context(|| format!("failed to write temporary file for {}", path.display()))?; @@ -233,6 +239,16 @@ pub fn reconstruct_source( ) -> Result { let yaml_serialized = serde_yaml::to_string(front_matter_mapping) .context("Failed to serialize codemod-rewritten front matter")?; + // Defensive: the format string assumes the serialized YAML ends + // with `\n` so the closing `---` lands on a new line. This is + // serde_yaml's documented behavior for non-empty mappings, but + // hard-fail loudly if a future version breaks the assumption + // rather than silently producing malformed YAML. + anyhow::ensure!( + yaml_serialized.ends_with('\n'), + "serde_yaml::to_string produced output without trailing newline; \ + cannot reconstruct front-matter block safely" + ); Ok(format!( "{}---\n{}---{}", leading_whitespace, yaml_serialized, body_raw @@ -2573,6 +2589,31 @@ mod tests { assert_eq!(tokio::fs::read_to_string(&target).await.unwrap(), "target"); } + #[test] + fn atomic_write_parent_derivation_handles_bare_filename() { + // Regression test for the EXDEV bug: a bare filename (no + // directory component) used to fall through to + // `NamedTempFile::new()` which creates the tempfile in the + // system temp dir, breaking persist() across filesystem + // boundaries (e.g. tmpfs `/tmp` on Linux). Verify the + // derivation logic now picks ".". Pure-logic test — no + // filesystem I/O so it's parallel-safe. + let bare = Path::new("agent.md"); + let parent = bare.parent().filter(|p| !p.as_os_str().is_empty()); + let parent_dir: &Path = parent.unwrap_or_else(|| Path::new(".")); + assert_eq!(parent_dir, Path::new(".")); + + let with_dir = Path::new("subdir/agent.md"); + let parent = with_dir.parent().filter(|p| !p.as_os_str().is_empty()); + let parent_dir: &Path = parent.unwrap_or_else(|| Path::new(".")); + assert_eq!(parent_dir, Path::new("subdir")); + + let absolute = Path::new("/tmp/agent.md"); + let parent = absolute.parent().filter(|p| !p.as_os_str().is_empty()); + let parent_dir: &Path = parent.unwrap_or_else(|| Path::new(".")); + assert_eq!(parent_dir, Path::new("/tmp")); + } + // ─── parse_markdown_detailed ────────────────────────────────────────────── fn reconstruct(parsed: &ParsedSource) -> String {