From 680aad4e1642d5d28e42d2d88773b5965d854c01 Mon Sep 17 00:00:00 2001 From: Thien Trung Vuong Date: Tue, 26 May 2026 21:58:18 +0000 Subject: [PATCH 1/9] docs: add RFC for conditional-aware spec editing Adds the design RFC documenting the problem (straddling conditionals break naive line-range edits), the two evaluated approaches (enriched structure vs structural tree), and the chosen direction (tree model). Also documents known limitations in the overlays user guide: section-scoped operations cannot reach content past a wrapper %endif when the section header is inside the wrapper (workaround: spec-search-replace with anchored regex), and macro-generated sections are invisible to the static parser. --- .../rfc/001-conditional-aware-spec-editing.md | 559 ++++++++++++++++++ docs/user/reference/config/overlays.md | 36 ++ 2 files changed, 595 insertions(+) create mode 100644 docs/developer/rfc/001-conditional-aware-spec-editing.md diff --git a/docs/developer/rfc/001-conditional-aware-spec-editing.md b/docs/developer/rfc/001-conditional-aware-spec-editing.md new file mode 100644 index 00000000..96e4bf4a --- /dev/null +++ b/docs/developer/rfc/001-conditional-aware-spec-editing.md @@ -0,0 +1,559 @@ +# RFC 001: Conditional-Aware Spec Editing + +- **Status**: Draft +- **Author**: @trungams +- **Created**: 2026-05-22 +- **Related issues**: + - [#144](https://github.com/microsoft/azure-linux-dev-tools/issues/144) — `spec-remove-section` / `spec-remove-subpackage` produce invalid specs when sections are inside `%if/%endif` + - [#193](https://github.com/microsoft/azure-linux-dev-tools/issues/193) — `spec-remove-subpackage` / `spec-remove-section` greedily consume `%if` guards of the following subpackage + - [#203](https://github.com/microsoft/azure-linux-dev-tools/issues/203) — `spec-remove-subpackage` drops `%define` macros inside the removed subpackage that are referenced elsewhere +- **Related PRs**: + - [#190](https://github.com/microsoft/azure-linux-dev-tools/pull/190) — `fix: balance conditional nesting when removing spec sections` (merged) + +## Background + +### Spec overlay system + +Azure Linux imports RPM specs from upstream (primarily Fedora) and customizes them via an **overlay system** — no spec forking required. Overlays are declarative TOML directives that modify specs at render time. Key overlay types relevant to this RFC: + +- **`spec-remove-section`** / **`spec-remove-subpackage`**: Remove entire sections or all sections belonging to a subpackage. +- **`spec-append-lines`**: Append lines at the end of a named section. +- **`spec-insert-tag`**: Insert a tag (e.g., `Source9999:`) after the last tag of the same family in a section. +- **`spec-search-replace`**: Regex-based search and replace within a section. + +All of these operate on section ranges determined by `Spec.Visit`, a single-pass line walker that emits visitor targets (`SectionStartTarget`, `SectionLineTarget`, `SectionEndTarget`) as it walks the raw lines of a spec. + +### The problem + +`Spec.Visit` treats `%if/%endif` conditional directives as opaque content lines. It has no understanding of conditional structure. This leads to a family of bugs where operations produce incorrect results at section boundaries that involve conditionals. + +The pattern is extremely common in Fedora specs: `%if` guards wrapping subpackages, `%define` macros inside conditional sections, and bcond-controlled feature blocks. As Azure Linux imports more complex packages, these issues surface increasingly. + +### What has been fixed + +PR [#190](https://github.com/microsoft/azure-linux-dev-tools/pull/190) added post-hoc conditional balancing (`balanceRange`) to `collectSectionRanges`, fixing section removal (issues #144 and #193). A separate `skipPastConditional` workaround already existed for `spec-insert-tag`. These are point fixes layered on top of Visit — they work but add complexity that is recognized as unsustainable. The PR review consensus was that with better test coverage, the conditional-handling logic should be unified and factored to prevent further accretion of ad-hoc workarounds. + +### What remains broken + +1. **`spec-append-lines` boundary bug**: `AppendLinesToSection` uses `SectionEndTarget` to find where to insert. When a `%if` sits between two sections (wrapping the next one), Visit considers it part of the current section, so appended lines land inside the conditional. `spec-insert-tag` has a workaround (`skipPastConditional`); `spec-append-lines` does not. + +2. **`%define`/`%global` dropped during removal** (#203): `spec-remove-subpackage` deletes the entire block without checking whether macro definitions inside it are referenced from surviving sections. The macro silently vanishes, causing build failures downstream. + +## Problem inventory + +| # | Problem | Root cause | Severity | +|---|---------|------------|----------| +| 1 | `spec-append-lines` places lines inside straddling conditionals | Visit reports wrong section end | Incorrect output | +| 2 | `collectSectionRanges` needs post-hoc `balanceRange` | Visit reports wrong section end | Complexity / fragility | +| 3 | `spec-insert-tag` needs `skipPastConditional` workaround | Visit reports wrong section end | Complexity / fragility | +| 4 | `%define`/`%global` inside removed sections silently dropped | No semantic awareness of macro definitions | Incorrect output | +| 5 | `%else` branches with section headers create phantom sections | Visit walks both branches without conditional awareness | Potential incorrect operations | +| 6 | Continuation lines (`\`) misinterpreted as structural elements | Parser processes each physical line independently; no continuation state | Incorrect section tracking, false-positive errors | +| 7 | Section content spilling past wrapper `%endif` is invisible to section-scoped operations | Tree parser assigns section headers to wrapper blocks; content after `%endif` has no section context | Section-scoped overlays miss post-wrapper content | + +Problems 1–3 share the same root cause: Visit treats `%if/%endif` as opaque content, so `SectionEndTarget` fires at the wrong position. Problem 4 requires understanding what's *inside* a section before deleting it. Problem 5 is a subtler structural issue: Visit sees sections in both `%if` and `%else` branches as real, coexisting sections, when at build time only one branch is active. Problem 6 is a parser-level issue: when a line ends with `\`, the next physical line is a continuation and should not be structurally interpreted. The parser currently ignores this, causing phantom section boundaries inside `%define`/`%global` bodies and false-positive `balanceRange` errors on `%if` continuation lines. Problem 7 is a fundamental tree-model limitation: when a section header is inside a `%if` wrapper but the section's content continues past `%endif`, the tree cannot associate the post-wrapper content with the section. This affects 1 spec in the corpus (gdb). The workaround is to use `spec-search-replace` instead of section-scoped overlays for tags in this position. See [overlays documentation](../../user/reference/config/overlays.md#section-scoped-operations-and-straddling-conditionals). + +## RPM spec syntax survey + +Beyond `%if/%endif` and `%define/%global`, a comprehensive solution must account for the full range of RPM spec syntax that could interact with structural analysis. This survey was conducted against the Azure Linux spec corpus (7,432 rendered specs as of May 2026). + +### Actionable: patterns the structural model must handle + +#### `%else`/`%elif` branches containing section headers (13 specs) + +Some specs place different sections inside the `%if` and `%else` branches of a conditional: + +```spec +%if %{with gui} +%description gui +GUI tools for the package. +%else +%description minimal +Minimal tools for the package. +%endif +``` + +Found in: kernel, gdb, firefox, java-openjdk, apr-util, dogtag-pki, linux-system-roles, and others. + +Visit walks **both** branches and emits `SectionStartTarget`/`SectionEndTarget` for sections in each. This means section lists contain "phantom" sections that may not exist at build time. Implications: + +- Section-targeted overlays could match a section that only exists in one branch. +- Removing a section inside one branch could orphan the other branch or break the conditional structure. +- For a tree model, `ConditionalWrapper` needs to carry sections in **both** `Then` and `Else` branches. + +#### Line continuation (`\`) creates phantom structural elements (22+ specs) + +When a line ends with `\`, RPM treats the next physical line as a continuation of the same logical line. Our parser processes each physical line independently and has no continuation awareness. This causes two classes of bugs: + +**Phantom section boundaries in `%define`/`%global` bodies (22 specs, 68 phantom lines):** + +```spec +%define kernel_gcov_package() \ +%package %{?1:%{1}-}gcov\ +Summary: gcov graph and source files for coverage data collection.\ +%description %{?1:%{1}-}gcov\ +%{?1:%{1}-}gcov includes the gcov graph and source files...\ +%{nil} +``` + +The parser sees `%package` on line 2 as `tokens[0]`, matches `sectionTypesByName`, and emits a `SectionStartLine` — creating a phantom section boundary inside a macro definition. This corrupts Visit's section tracking. Corpus data: 68 phantom section lines and 148 phantom conditional lines across 22 specs (kernel, cross-binutils, cross-gcc, ccache, rust, python3.13, ghc, opencv, etc.). + +**False-positive `balanceRange` errors on `%if` continuations (16 specs):** + +```spec +%description pkgA +content + +%if cond1 \ +cond2 +%package pkgB +... +%endif +``` + +When removing pkgA's section, `balanceRange` trims the range to exclude the straddling `%if`, then `validateNoContentAfter` checks the trimmed zone. The continuation line `cond2` has `conditionalDepthChange == 0` and `isBlankOrComment == false`, so it's wrongly flagged as "real content" — triggering a spurious `ErrConditionalSpansSections` error. + +**Unified fix — `inContinuation` state flag:** + +Both problems share the same root cause: the parser doesn't track continuation state. The fix is a single `inContinuation bool` in `parseState`. Any line ending with `\` sets it; the next physical line is forced to `RawLine` regardless of content (no section keyword or conditional directive detection); if that line also ends with `\`, the flag persists. This handles `%define` bodies, `%if` conditions, and any other continuation context uniformly. + +```go +type parseState struct { + currentSect SectionTarget + inContinuation bool +} +``` + +This is independent of the structural model work and could ship as a standalone PR. Physical lines remain unchanged — only structural interpretation is suppressed on continuation lines. Content operations (`spec-search-replace`) and serialization are unaffected. + +#### `%define`/`%global` scoping and `%undefine` (pervasive) + +RPM macros have dynamic scoping: +- `%define` defines a macro that persists until redefined or undefined. +- `%global` is the same but the body is expanded at definition time. +- `%undefine` removes the topmost definition from the macro stack. + +For the macro hoisting feature (#203), `%undefine` must be considered: if a removed section contains both `%define foo` and later `%undefine foo`, hoisting only the `%define` would change semantics. The hoisting logic should treat `%define`/`%undefine` pairs as a unit. + +### Documented limitations: patterns that require macro evaluation + +The following patterns **cannot** be handled by static analysis. They are fundamental limitations of any approach that doesn't evaluate macros. These should be documented in user-facing overlay documentation so users understand when `spec-search-replace` is needed instead of structural overlays. + +#### Macro-generated sections (529 specs — 7% of corpus) + +Several Fedora packaging macros expand to full `%package`/`%description`/`%files` sections at build time: + +| Macro | Spec count | Creates sections? | +|-------|-----------|-------------------| +| `%gometa` | 364 | No — only defines macros | +| `%ghc_lib_subpackage` | 194 | **Yes** — `%package`, `%description`, `%files` | +| `%pyproject_extras_subpkg` | 117 | **Yes** — `%package`, `%description` | +| `%fontpkg` | 22 | **Yes** — `%package`, `%description`, `%files` | + +Our static parser sees these macro invocations as raw lines. The sections they generate are invisible. Overlays targeting sections created by these macros must use `spec-search-replace`. + +#### `%{expand:...}` — deferred expansion (3,041 specs) + +```spec +%global _description %{expand: +7-Zip is a file archiver with a high compression ratio. +} +``` + +Massively prevalent but used almost exclusively for multi-line string definitions (descriptions, changelogs), not for generating section headers. The parser sees the `%global` line, not the expanded content. No structural impact in practice. + +#### `%{load:...}` — external macro files (88 specs) + +```spec +%{load:%{_sourcedir}/automake.azl.macros} +``` + +Loads macro definitions from external files. In Azure Linux, these are primarily `azl.macros` files loaded via the `spec-insert-tag Source9999` overlay pattern. They define Azure Linux-specific macros, not section structure. + +#### `%include` — file inclusion (15 specs) + +```spec +%include %{SOURCE9998} +``` + +Found in gdb, efivar, efibootmgr, ansible. The included file could contain sections, conditionals, or macro definitions. Cannot be resolved without file access and macro evaluation. + +### Non-issues: patterns already handled correctly + +| Pattern | Prevalence | Why it's fine | +|---------|-----------|---------------| +| `%dnl` (comment directive) | 5 specs | Parser sees `%dnl` as first token → not in `sectionTypesByName` → raw line | +| `%bcond_with`/`%bcond_without` | Pervasive | Purely macro-level; structural impact is through `%if %{with ...}` which is already handled | +| `%{lua:...}` blocks | Pervasive (via `%autorelease`) | Inline macro expansion; parser sees the containing `%define` line | +| Comments (`#` lines) inside conditionals | Pervasive | Parser treats as empty lines; `conditionalDepthChange` ignores them | + +### Summary + +| Category | Prevalence | Structural impact | Solvable? | +|----------|-----------|-------------------|----------| +| `%if/%endif` straddling sections | Common | **High** — core problem | Yes | +| `%else` branches with section headers | 13 specs | **Medium** — phantom sections | Yes, needs branch-aware model | +| `%define/%global` in removed ranges | Occasional | **High** — silent breakage | Yes | +| `%undefine` interacting with hoisting | Rare | **Low** — edge case for #203 | Yes, hoist as unit | +| Line continuation (`\`) | 22+ specs | **High** — phantom sections + false-positive errors | Yes, `inContinuation` state flag in `parseState` | +| Macro-generated sections | 529 specs (7%) | **Medium** — invisible sections | **No** — document as limitation | +| `%{expand:}`, `%{load:}`, `%include` | Thousands | **Low** — text/macros, not structure | **No** — document as limitation | + +## Research + +### How other tools handle this + +- **RPM itself** evaluates conditionals *before* parsing sections (two-phase: conditional preprocessing → section parsing). We cannot do this — we edit specs statically without evaluating macros. +- **[tree-sitter-rpmspec](https://gitlab.com/cryptomilk/tree-sitter-rpmspec)** faces the same fundamental problem we do, documented in their [DESIGN.md](https://gitlab.com/cryptomilk/tree-sitter-rpmspec/-/blob/main/rpmspec/DESIGN.md). They call it the "Section End Detection Problem" — sections have no explicit end markers, and conditionals can appear at different structural levels (wrapping sections vs inside sections). They describe the same chicken-and-egg problem: you need to know what's inside a `%if` body to determine how to parse the `%if` itself. Their solution is **context-aware lookahead** in the external scanner: before emitting a `%if` token, peek ahead to see if the body contains section keywords, and emit different token types (`top_level_if`, `scriptlet_if`, `subsection_if`, `files_if`). This is functionally equivalent to the two-pass approach proposed below (Pass 1: collect pairs + section positions → Pass 2: classify and build tree). Key differences: they build a full grammar parser and distinguish four conditional contexts (we need only two: wrapper vs block, since we don't validate section content); they share the same macro-evaluation limitation we do. +- **packit/specfile** (Python) uses a flat section list — section boundaries are determined by `%section_name` headers only. Lines have a per-line `valid` boolean for conditional state, but boundaries themselves are not conditional-aware. + +### Existing Visit callers audit + +All 9 `Visit` call sites are internal to the `spec` package (in `edit.go`). `VisitTarget` is only constructed inside `spec.go`. No external code creates `VisitTarget` values. + +Three callers use `SectionEndTarget` and would be affected by boundary changes: + +| Caller | Uses `SectionEndTarget` | Mutation | Impact of boundary change | +|--------|------------------------|----------|--------------------------| +| `findInsertTagPosition` | Records `sectionEndLineNum` | No | Boundary feeds into `skipPastConditional` | +| `AppendLinesToSection` | Inserts at `CurrentLineNum` | Yes (`InsertLinesBefore`) | Lines would move to correct position | +| `collectSectionRanges` | Records range end | No | Would simplify/eliminate `balanceRange` | + +Six callers do NOT use `SectionEndTarget` and are unaffected: +`VisitTags`, `PrependLinesToSection`, `SearchAndReplace`, `AddChangelogEntry`, `HasSection`, `removePatchlistEntriesMatching`. + +### Lock file independence + +Lock file fields (`input-fingerprint`, `resolution-input-hash`) do **not** depend on rendered spec content. The rendered spec directory is explicitly excluded from fingerprinting (tagged `fingerprint:"-"`). Cosmetic changes to rendered specs have zero effect on lock files. + +## Proposed approaches + +### Option A: Enriched Visit context with pre-computed structure + +This is an incremental approach that preserves the existing Visit API. + +#### Core idea + +Visit stays as a single-pass line walker. Before walking, a structural index is built from the raw lines and exposed on `Context` for visitors that need structural awareness. + +#### What the structure looks like + +```go +// Computed once per Visit call (or lazily on first access). +type SpecStructure struct { + // Conditional pairs: every %if matched with its %endif. + ConditionalPairs []conditionalPair + + // Section headers: line number + parsed section identity. + SectionHeaders []sectionHeaderPos + + // Macro definitions: %define/%global with line number + name. + MacroDefs []macroDef +} + +// Derived: for a given section boundary (line of next section header), +// returns the "content end" adjusted for straddling conditionals. +func (ss *SpecStructure) ContentEndFor(sectionEnd int) int { ... } +``` + +Exposed on `Context`: + +```go +type Context struct { + // ... existing fields unchanged ... + + // Structure provides pre-computed structural info about the spec. + Structure *SpecStructure // new, additive +} +``` + +#### How each problem is solved + +**Problem 1 — `AppendLinesToSection` boundary bug:** + +```go +// Before (buggy): +ctx.InsertLinesBefore(lines) // inserts at CurrentLineNum = next section header + +// After: +contentEnd := ctx.Structure.ContentEndFor(ctx.CurrentLineNum) +ctx.spec.InsertLinesAt(lines, contentEnd) +``` + +**Problem 2 — `collectSectionRanges` post-hoc balancing:** + +```go +// Before: collect raw ranges, then balanceRange each one. +// After: collect content-aware ranges directly. +case SectionEndTarget: + if matched && curStart >= 0 { + end := ctx.Structure.ContentEndFor(ctx.CurrentLineNum) + ranges = append(ranges, sectionLineRange{start: curStart, end: end}) + } +``` + +`balanceRange` becomes either unnecessary or dramatically simpler (only the `%else` validation edge case survives). + +**Problem 3 — `InsertTag` / `skipPastConditional`:** + +```go +// Before: skipPastConditional scans from line 0 computing depth. +// After: look up in pre-computed ConditionalPairs directly. +insertAfterLine = ctx.Structure.SkipPastConditional(insertAfterLine, sectionEnd) +``` + +**Problem 4 — `%define` hoisting during removal:** + +```go +// In removal logic, before deleting range [start, end): +defsInRange := structure.MacroDefsInRange(start, end) +for _, def := range defsInRange { + if structure.IsReferencedOutsideRange(def.Name, start, end, rawLines) { + s.InsertLinesAt([]string{rawLines[def.Line]}, hoistTarget) + } +} +``` + +#### Mutation handling + +Most mutating visitors access the structure **before** they mutate, then return immediately. Staleness is not an issue in practice. For safety, the structure can be marked dirty on any mutation and lazily recomputed on next access. + +#### What gets simplified or removed + +- `skipPastConditional` → replaced by `SpecStructure.ContentEndFor` or `SkipPastConditional` +- `collectConditionalPairs` call in `collectSectionRanges` → uses pre-computed pairs +- `balanceRange` → most logic absorbed into `ContentEndFor`; only `%else` validation survives + +#### Incremental delivery + +1. **PR A**: Add `SpecStructure` with `ConditionalPairs` + `ContentEndFor`. Fix `AppendLinesToSection` and `collectSectionRanges`. Simplify `balanceRange`. +2. **PR B**: Add `MacroDefs` + `IsReferencedOutsideRange`. Fix removal to hoist macros (#203). + +#### Risks and limitations + +- The structural model is **read-only metadata alongside mutable raw lines**. The two can drift after mutations. This is manageable because current callers don't interleave structure reads and line writes, but it's a latent hazard for future callers. +- Operations still ultimately manipulate line arrays by position. Complex transformations (move a section, restructure conditionals) remain fiddly — you compute positions from metadata and then do array surgery. +- Each new structural feature (macros, conditionals, future: `%autosetup` awareness?) needs explicit support in the pre-scan. The structure grows organically. + +--- + +### Option B: Structural tree model (recommended) + +This is a more ambitious approach that replaces the flat line walker with a parsed structure. + +#### Core idea + +Parse the spec into a tree where sections and conditionals are explicit nodes. Operations work on the tree. Serialization flattens back to lines. + +#### What the tree looks like + +```go +type SpecTree struct { + Preamble *SectionNode + Body []BodyElement // sections and top-level conditional wrappers +} + +// BodyElement is either a SectionNode or a ConditionalWrapper. +type BodyElement interface { + bodyElement() + Lines() []string // serialize back to text +} + +type SectionNode struct { + HeaderLine string // e.g., "%build", "%package -n foo" + Name string + Package string + Content []ContentElement +} + +// ContentElement is a text block, conditional block, or macro definition. +type ContentElement interface { + contentElement() + Lines() []string +} + +type TextLines struct { + lines []string +} + +type ConditionalBlock struct { + Directive string // "%if %{with_docs}" + Then []ContentElement + Else []ContentElement // nil if no %else + Endif string // "%endif" +} + +type MacroDef struct { + Line string // "%define testsdir %{_libdir}/..." + Name string // "testsdir" +} + +// A conditional that wraps one or more entire sections. +type ConditionalWrapper struct { + Directive string + Body []BodyElement // sections inside the conditional + Else []BodyElement + Endif string +} +``` + +The key structural distinction: a `ConditionalBlock` lives **inside** a section (as content). A `ConditionalWrapper` lives **between** sections (wrapping them). The parser determines which based on whether the `%if`/`%endif` pair contains section headers. + +#### Implementation strategy: start simple, add types later + +For the initial implementation, a single recursive `Block` type is simpler to build and iterate on: + +```go +type Block struct { + Kind BlockKind // Section, Conditional, Text, MacroDef + Header string // "%build", "%if %{with_docs}", "%define foo ..." + Name string // section name / macro name + Package string // for section blocks + Lines []string // raw text lines (leaf Text blocks) + Children []*Block // nested blocks + Else []*Block // for Conditional blocks with %else +} +``` + +This trades type safety for simplicity — the type system won't prevent invalid nesting (e.g., a section inside a section), but the parser controls construction so this is safe in practice. Once the parser and operations are proven, the model can be promoted to the typed interface hierarchy above, making illegal states unrepresentable. + +#### Parsing + +The parser is two-pass: + +1. **Pass 1**: Collect conditional pairs and section header positions (same pre-scan as the enriched metadata approach). +2. **Pass 2**: Build the tree — for each conditional pair, determine whether it's a wrapper (spans section boundaries) or content (fully within a section). Straddling cases (a `%if` that starts partway through a section and closes in the next) are errors — same as today. + +Example input and resulting tree: + +``` +Input: Tree: + +%if %{with_docs} ConditionalWrapper +%package docs ├─ Then: +Summary: Docs │ ├─ SectionNode(%package docs) +%description docs │ │ └─ TextLines["Summary: Docs"] +Documentation. │ └─ SectionNode(%description docs) +%endif │ └─ TextLines["Documentation."] + +%build SectionNode(%build) +make ├─ TextLines["make"] +%if %{with_docs} └─ ConditionalBlock +make docs └─ Then: TextLines["make docs"] +%endif + +%install SectionNode(%install) +make install └─ TextLines["make install"] +``` + +Example with `%else` branches containing different sections: + +``` +Input: Tree: + +%if %{with gui} ConditionalWrapper +%description gui ├─ Then: +GUI tools. │ └─ SectionNode(%description gui) +%else │ └─ TextLines["GUI tools."] +%description minimal └─ Else: +Minimal tools. └─ SectionNode(%description minimal) +%endif └─ TextLines["Minimal tools."] +``` + +This case requires `ConditionalWrapper.Else` to hold `[]BodyElement`, not just `[]ContentElement`. Removing a section inside one branch must not disturb the other branch. + +#### How each problem is solved + +**Problem 1 — `AppendLinesToSection`:** + +```go +func (s *SpecTree) AppendToSection(name, pkg string, lines []string) error { + section := s.FindSection(name, pkg) + section.Content = append(section.Content, &TextLines{lines: lines}) + return nil +} +``` + +No boundary confusion possible — conditionals are separate nodes, not inline content. + +**Problem 2 — Section removal:** + +```go +func (s *SpecTree) RemoveSection(name, pkg string) { + s.Body = slices.DeleteFunc(s.Body, func(e BodyElement) bool { + sec, ok := e.(*SectionNode) + return ok && sec.Name == name && sec.Package == pkg + }) + // Also handle sections inside ConditionalWrappers +} +``` + +The conditional wrapper stays in place. No `balanceRange` needed. Empty wrappers can be collapsed in a cleanup pass. + +**Problem 3 — `InsertTag`:** + +Tags are content elements within a section. Conditional blocks are sibling content elements. Inserting after a tag naturally avoids landing inside a conditional — no `skipPastConditional` needed. + +**Problem 4 — `%define` hoisting:** + +```go +func (s *SpecTree) RemoveSectionWithMacroHoist(name, pkg string) { + section := s.FindSection(name, pkg) + for _, elem := range section.Content { + if def, ok := elem.(*MacroDef); ok { + if s.IsNameReferencedElsewhere(def.Name, section) { + s.Preamble.Content = append(s.Preamble.Content, def) + } + } + } + s.RemoveSection(name, pkg) +} +``` + +#### Visit compatibility layer + +Visit can be reimplemented as a tree walk emitting the same targets, or kept as-is on raw lines during a migration period. Alternatively, a new tree-based API (`SpecTree.Walk(...)`) can coexist with Visit, and callers migrate gradually. + +#### Incremental delivery + +1. **PR A**: Add `SpecTree` construction from raw lines + serialization back to lines. Round-trip test: `parse → serialize = identity`. +2. **PR B**: Implement tree-based `AppendToSection` and `RemoveSection`. Wire up overlay dispatch to use tree operations for `spec-append-lines` and `spec-remove-section/subpackage`. +3. **PR C**: Implement tree-based `InsertTag`, `SearchAndReplace`. Migrate more overlays. +4. **PR D**: Macro hoisting (#203) using tree traversal. +5. **PR E** (optional): Reimplement Visit as a tree walk, or deprecate in favor of tree API. + +#### Risks and limitations + +- **Parsing complexity**: Building the tree requires handling ambiguous cases. `%if` / `%else` where each branch contains different sections is valid (13 specs in Azure Linux) and makes the tree shape depend on conditional structure. The parser must distinguish three conditional geometries: fully inside a section (→ `ConditionalBlock`), wrapping sections (→ `ConditionalWrapper`), and straddling a section boundary (→ error). +- **`%else` branch asymmetry**: The `Then` and `Else` branches of a `ConditionalWrapper` can contain different numbers and types of sections. Operations like `FindSection` must search both branches, and removal must handle the case where a section exists only in one branch. +- **Serialization fidelity**: Round-tripping must preserve the original text exactly (whitespace, comments, blank lines). Achievable but needs extensive testing against real specs. +- **Migration cost**: 9 Visit call sites + external callers need migration or a compatibility layer. This is the largest concrete risk. +- **Scope**: Multi-PR effort. Each PR needs testing against real-world specs (qemu, kernel, etc.). +- **Macro-generated sections are invisible**: 529 specs (7%) use macros like `%fontpkg`, `%ghc_lib_subpackage`, or `%pyproject_extras_subpkg` that expand to sections at build time. The tree model cannot represent these — they appear as raw text nodes. This is a fundamental limitation shared with the enriched metadata approach and must be documented. + +## Comparison + +| Dimension | Enriched metadata (Option A) | Structural tree (Option B) | +|-----------|------------------------------|----------------------------| +| **Core model** | Flat lines + structural metadata | Tree of sections / conditionals / macros | +| **Visit API** | Unchanged (additive field) | Compatibility layer or new API | +| **Boundary problem** | Metadata provides correct positions | Structurally impossible to get wrong | +| **Macro hoisting** | Scan metadata + line surgery | Move tree nodes between parents | +| **Mutation model** | Positional line array edits | Tree node manipulation | +| **New operations** | Each needs position computation from metadata | Natural tree traversal | +| **Migration effort** | 3 callers change (use `ContentEnd`) | All callers eventually migrate | +| **First PR size** | Small | Medium (tree parser + serializer + round-trip tests) | +| **Risk** | Low — additive, reversible | Moderate — new parser, migration period | +| **Long-term payoff** | Moderate — still doing line surgery | High — structural operations become trivial | +| **Ceiling** | Hits limits when operations need to restructure conditionals | No structural ceiling | + +## Open questions + +1. Which approach (enriched metadata vs structural tree) is the right level of ambition given the current and projected demand for spec editing features? +2. For the enriched metadata approach: should `SpecStructure` live on `Context` (per-visit, transient) or on `Spec` (persistent, invalidated on mutation)? +3. For the structural tree: is `ConditionalWrapper` vs `ConditionalBlock` the right split, or should all conditionals be one type with a "wraps sections" flag? +4. Should we prototype against a complex real-world spec (e.g., qemu) early to validate the approach? +5. Is there an intermediate path — e.g., start with enriched metadata for quick wins, then migrate to the structural tree as the model matures? +6. How should operations handle sections inside `%else` branches? Should `FindSection` return all matches (both branches), or require the caller to specify which branch? +7. Should macro-generated sections (from `%fontpkg`, `%ghc_lib_subpackage`, etc.) be documented as an explicit limitation in the overlay user guide, with guidance to use `spec-search-replace` for those cases? +8. Should the `inContinuation` fix (problem 6) ship as a standalone PR before the structural model work, since it's independent and low-risk? diff --git a/docs/user/reference/config/overlays.md b/docs/user/reference/config/overlays.md index 55061118..6a7be4a5 100644 --- a/docs/user/reference/config/overlays.md +++ b/docs/user/reference/config/overlays.md @@ -347,6 +347,42 @@ the overlay always removes every section associated with the sub-package. > an error is returned; use a `spec-search-replace` overlay to adjust the conditionals > before removing the sub-package. +## Known Limitations + +### Section-scoped operations and straddling conditionals + +Spec overlays that target a specific section and package (e.g., `spec-remove-tag` with `package = "headless"`) rely on the structural tree parser to identify which lines belong to that section. In rare cases, a section header may live inside a `%if` wrapper while its content continues past the `%endif`: + +```spec +%if 0%{!?scl:1} +%package headless +Requires: binutils +%endif +# ← content below is still part of %package headless in RPM's view, +# but the tree parser cannot associate it with the headless section +# because the section header is structurally inside the wrapper. +Recommends: default-yama-scope +``` + +In this pattern, section-scoped overlays (`spec-remove-tag`, `spec-add-tag`, `spec-update-tag`, `spec-insert-tag`) cannot find or modify tags that fall outside the `%endif`. Use `spec-search-replace` with a precise anchored regex instead: + +```toml +# Instead of: +# type = "spec-remove-tag" +# package = "headless" +# tag = "Recommends" +# value = "default-yama-scope" + +# Use: +type = "spec-search-replace" +regex = "^Recommends: default-yama-scope$" +replacement = "# Recommends: default-yama-scope (disabled)" +``` + +### Macro-generated sections + +Specs that use macros like `%ghc_lib_subpackage`, `%pyproject_extras_subpkg`, or `%fontpkg` generate sections at build time that are invisible to the static parser. Section-scoped overlays cannot target these generated sections. Use `spec-search-replace` for modifications that need to reach macro-generated content. + ## Validation Overlay configurations are validated when the config file is loaded. Validation checks: From 9f585f0b86b4ac6a3a688d0923e9cc1a9ea890c5 Mon Sep 17 00:00:00 2001 From: Thien Trung Vuong Date: Tue, 26 May 2026 21:58:43 +0000 Subject: [PATCH 2/9] feat(spec): add structural tree parser for spec files Two-pass parser that builds a block tree from raw spec lines: - Pass 1 (collectConditionalPairs in edit.go): collects conditional pairs, skipping %if/%endif inside %define/%global macro continuation bodies. Shell-level continuations still process %if as structural. - Pass 2 (buildBlockChildren): builds nested block tree, classifying conditionals as wrappers (contain section headers) vs content blocks. Block kinds: rootBlock, sectionBlock, conditionalBlock, textBlock, macroDefBlock. Preamble content is wrapped in an implicit sectionBlock with empty Name for uniform access via findSectionBlock. All tree types and helpers are kept unexported so the parsed representation remains an internal implementation detail. Includes serializeTree for lossless round-tripping, query helpers (findSectionBlock, findAllSectionBlocks, findAllSectionBlocksByPackage), mutation helpers (removeBlockFromParent), and validateSectionRemoval for detecting unsafe removal patterns (orphaned content). Validation checks: - Preceding-section: wrapper has text AND preceding section is removed. - Empty-wrapper + adjacent content: all sections removed from wrapper AND adjacent content conditional would be orphaned. Cross-branch asymmetry is intentionally allowed (removing from %if while %else retains sections is a valid overlay operation). --- internal/rpm/spec/tree.go | 857 +++++++++++++++++++++++++++++++++ internal/rpm/spec/tree_test.go | 687 ++++++++++++++++++++++++++ 2 files changed, 1544 insertions(+) create mode 100644 internal/rpm/spec/tree.go create mode 100644 internal/rpm/spec/tree_test.go diff --git a/internal/rpm/spec/tree.go b/internal/rpm/spec/tree.go new file mode 100644 index 00000000..704d9c4f --- /dev/null +++ b/internal/rpm/spec/tree.go @@ -0,0 +1,857 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package spec + +import ( + "fmt" + "strings" +) + +// blockKind classifies what a [block] represents in the spec tree. +type blockKind int + +const ( + // rootBlock is the top-level container for the entire spec. + rootBlock blockKind = iota + // sectionBlock is a named section (e.g., %build, %package -n foo). + // The implicit preamble (before any section header) is also a [sectionBlock] + // with an empty [block.Name]. + sectionBlock + // conditionalBlock is a %if/%endif block. May wrap sections (at top level) + // or appear as content inside a section. + conditionalBlock + // textBlock is a contiguous run of raw text lines (leaf node). + textBlock + // macroDefBlock is a %define/%global directive, optionally spanning + // multiple lines via backslash continuation. + macroDefBlock +) + +// block is a recursive node in the spec's structural tree. +// +// The tree is built by [parseTree] and serialized back to lines by [serializeTree]. +// Operations find and manipulate blocks, then serialize to update [Spec.rawLines]. +type block struct { + // Kind classifies this block. + Kind blockKind + // Header is the opening line: section header, conditional directive, or macro + // definition line. Empty for [rootBlock] and [textBlock]. + Header string + // Name is the section keyword (e.g., "%build") or macro name (e.g., "buildflags"). + // Empty for [rootBlock], [conditionalBlock], and [textBlock]. + Name string + // Package is the sub-package name for section blocks (e.g., "devel", "foo"). + // Empty for sections that target the main package. + Package string + // Endif is the %endif line text for [conditionalBlock] nodes. + Endif string + // Lines holds raw text for [textBlock] and [macroDefBlock] leaf nodes + // (including continuation lines for multi-line macros). + Lines []string + // Children holds nested blocks. For [sectionBlock], these are the section's + // content. For [conditionalBlock], these are the "then" branch. For [rootBlock], + // these are top-level sections and conditional wrappers. + Children []*block + // Else holds the "else" branch blocks for [conditionalBlock] nodes. + // nil when there is no %else/%elif branch. + Else []*block + // ElseDirective is the %else/%elif directive line, if present. + ElseDirective string +} + +// parseTree parses raw spec lines into a [block] tree. +// +// The parser runs in two passes: +// 1. Collect conditional pairs (%if/%endif) and section header positions. +// 2. Build the tree, classifying each conditional as a wrapper (spans sections) +// or content block (fully inside a section) based on whether its body contains +// section headers. +// +// Line continuations (backslash at end of line) are respected: continuation bodies +// are never interpreted as section headers or conditional directives. +func parseTree(rawLines []string) (*block, error) { + pairs, err := collectConditionalPairs(rawLines) + if err != nil { + return nil, fmt.Errorf("parsing conditional structure:\n%w", err) + } + + pairByIf := make(map[int]conditionalPair, len(pairs)) + for _, p := range pairs { + pairByIf[p.ifLine] = p + } + + sectionHeaders := findSectionHeaderLines(rawLines) + + sectionHeaderSet := make(map[int]bool, len(sectionHeaders)) + for _, h := range sectionHeaders { + sectionHeaderSet[h] = true + } + + root := &block{Kind: rootBlock} + + err = buildBlockChildren(rawLines, 0, len(rawLines), pairByIf, sectionHeaderSet, root, true) + if err != nil { + return nil, fmt.Errorf("building spec tree:\n%w", err) + } + + // Wrap leading non-section children (preamble content) into an implicit + // preamble sectionBlock with empty Name, matching how Visit treats lines + // before the first section header. This allows findSectionBlock(root, "", "") + // to locate the preamble. + wrapPreamble(root) + + return root, nil +} + +// wrapPreamble wraps the leading non-section children of root into a preamble +// [sectionBlock] with empty Name and Package. If the root already starts with +// a [sectionBlock], no wrapping is needed. +func wrapPreamble(root *block) { + // Find the index of the first sectionBlock or section-wrapping conditionalBlock. + firstSectionIdx := -1 + + for childIdx, child := range root.Children { + if child.Kind == sectionBlock { + firstSectionIdx = childIdx + + break + } + + if child.Kind == conditionalBlock && containsSectionBlocks(child) { + firstSectionIdx = childIdx + + break + } + } + + // If everything is preamble (no sections) or nothing precedes the first section, + // still wrap in a preamble block for uniform access. + preambleEnd := firstSectionIdx + if preambleEnd < 0 { + preambleEnd = len(root.Children) + } + + if preambleEnd == 0 { + // Nothing to wrap, but insert an empty preamble for uniform lookup. + preamble := &block{Kind: sectionBlock, Name: "", Package: ""} + root.Children = append([]*block{preamble}, root.Children...) + + return + } + + preamble := &block{ + Kind: sectionBlock, + Name: "", + Package: "", + Children: root.Children[:preambleEnd], + } + + root.Children = append([]*block{preamble}, root.Children[preambleEnd:]...) +} + +// containsSectionBlocks checks if a block (typically a conditionalBlock) contains +// any sectionBlock children in any branch, recursing through %elif chains. +func containsSectionBlocks(block *block) bool { + for _, child := range block.Children { + if child.Kind == sectionBlock { + return true + } + + if child.Kind == conditionalBlock && containsSectionBlocks(child) { + return true + } + } + + for _, child := range block.Else { + if child.Kind == sectionBlock { + return true + } + + if child.Kind == conditionalBlock && containsSectionBlocks(child) { + return true + } + } + + return false +} + +// findSectionHeaderLines returns the 0-indexed line numbers of all section headers, +// respecting line continuations (backslash-terminated lines suppress the next line). +func findSectionHeaderLines(rawLines []string) []int { + var headers []int + + inCont := false + + for lineIdx, line := range rawLines { + if inCont { + inCont = strings.HasSuffix(line, "\\") + + continue + } + + if isSectionHeaderLine(line) { + headers = append(headers, lineIdx) + } + + inCont = strings.HasSuffix(line, "\\") + } + + return headers +} + +// isSectionHeaderLine returns true if the line starts a new RPM spec section. +func isSectionHeaderLine(rawLine string) bool { + tokens := strings.Fields(strings.TrimSpace(rawLine)) + if len(tokens) == 0 { + return false + } + + _, known := sectionTypesByName[strings.ToLower(tokens[0])] + + return known +} + +// hasSectionHeaderInRange checks whether any line in [start, end) is a section header. +func hasSectionHeaderInRange(start, end int, sectionHeaderSet map[int]bool) bool { + for lineNum := start; lineNum < end; lineNum++ { + if sectionHeaderSet[lineNum] { + return true + } + } + + return false +} + +// buildBlockChildren parses lines in [start, end) and appends resulting blocks +// to parent.Children. topLevel indicates whether sections can appear (true at +// root level and inside conditional wrappers). +// +//nolint:funlen,gocognit // Recursive parser with multiple block types. +func buildBlockChildren( + rawLines []string, + start, end int, + pairByIf map[int]conditionalPair, + sectionHeaderSet map[int]bool, + parent *block, + topLevel bool, +) error { + lineIdx := start + inCont := false + + var textBuf []string + + flushText := func() { + if len(textBuf) > 0 { + parent.Children = append(parent.Children, &block{ + Kind: textBlock, + Lines: textBuf, + }) + + textBuf = nil + } + } + + for lineIdx < end { + line := rawLines[lineIdx] + + if inCont { + textBuf = append(textBuf, line) + inCont = strings.HasSuffix(line, "\\") + lineIdx++ + + continue + } + + // Section headers (only at top level). + if topLevel && sectionHeaderSet[lineIdx] { + flushText() + + name, pkg := getSectionNameAndPackageFromHeader(line) + sectionBlock := &block{ + Kind: sectionBlock, + Header: line, + Name: name, + Package: pkg, + } + + sectionEnd := findTreeSectionEnd(lineIdx+1, end, pairByIf, sectionHeaderSet) + + err := buildBlockChildren(rawLines, lineIdx+1, sectionEnd, pairByIf, sectionHeaderSet, sectionBlock, false) + if err != nil { + return err + } + + parent.Children = append(parent.Children, sectionBlock) + lineIdx = sectionEnd + + continue + } + + // Conditional directives. + if conditionalDepthChange(line) == 1 { + flushText() + + pair, ok := pairByIf[lineIdx] + if !ok { + return fmt.Errorf("%%if at line %d has no matching pair", lineIdx+1) + } + + condBlock := &block{ + Kind: conditionalBlock, + Header: line, + Endif: rawLines[pair.endifLine], + } + + bodyStart := lineIdx + 1 + bodyEnd := pair.endifLine + + elseLine := findElseDirectiveLine(rawLines, bodyStart, bodyEnd) + + thenEnd := bodyEnd + if elseLine >= 0 { + thenEnd = elseLine + } + + isWrapper := hasSectionHeaderInRange(bodyStart, bodyEnd, sectionHeaderSet) + + if err := buildConditionalBranches( + rawLines, bodyStart, thenEnd, elseLine, bodyEnd, + pairByIf, sectionHeaderSet, condBlock, isWrapper, + ); err != nil { + return err + } + + parent.Children = append(parent.Children, condBlock) + lineIdx = pair.endifLine + 1 + + continue + } + + // Macro definitions. + if name, ok := isMacroDefLine(line); ok { + flushText() + + macroBlock := &block{ + Kind: macroDefBlock, + Header: line, + Name: name, + Lines: []string{line}, + } + + if strings.HasSuffix(line, "\\") { + inCont = true + lineIdx++ + + for lineIdx < end { + macroBlock.Lines = append(macroBlock.Lines, rawLines[lineIdx]) + + if !strings.HasSuffix(rawLines[lineIdx], "\\") { + inCont = false + lineIdx++ + + break + } + + lineIdx++ + } + } else { + lineIdx++ + } + + parent.Children = append(parent.Children, macroBlock) + + continue + } + + // Plain text line. + textBuf = append(textBuf, line) + inCont = strings.HasSuffix(line, "\\") + lineIdx++ + } + + flushText() + + return nil +} + +// buildConditionalBranches parses the then and optional else/elif branches of a +// conditional block. For %elif chains, the else branch contains a single nested +// [conditionalBlock] whose Header is the %elif directive, forming a linked list. +func buildConditionalBranches( + rawLines []string, + bodyStart, thenEnd, elseLine, bodyEnd int, + pairByIf map[int]conditionalPair, + sectionHeaderSet map[int]bool, + condBlock *block, + isWrapper bool, +) error { + err := buildBlockChildren(rawLines, bodyStart, thenEnd, pairByIf, sectionHeaderSet, condBlock, isWrapper) + if err != nil { + return err + } + + if elseLine < 0 { + return nil + } + + if isElifDirective(rawLines[elseLine]) { + // %elif: create a nested conditionalBlock forming a linked list. + // The inner block has no Endif — only the outermost block owns %endif. + inner := &block{ + Kind: conditionalBlock, + Header: rawLines[elseLine], + } + + // Find the next branch directive (%elif/%else) within the remaining body. + nextElse := findElseDirectiveLine(rawLines, elseLine+1, bodyEnd) + + nextThenEnd := bodyEnd + if nextElse >= 0 { + nextThenEnd = nextElse + } + + if err := buildConditionalBranches( + rawLines, elseLine+1, nextThenEnd, nextElse, bodyEnd, + pairByIf, sectionHeaderSet, inner, isWrapper, + ); err != nil { + return err + } + + condBlock.Else = []*block{inner} + } else { + // %else: terminal branch — store directive and parse content directly. + condBlock.ElseDirective = rawLines[elseLine] + elseContainer := &block{Kind: rootBlock} + + err := buildBlockChildren(rawLines, elseLine+1, bodyEnd, pairByIf, sectionHeaderSet, elseContainer, isWrapper) + if err != nil { + return err + } + + condBlock.Else = elseContainer.Children + } + + return nil +} + +// isElifDirective returns true if the line is a %elif/%elifarch/%elifnarch/%elifos/%elifnos +// directive (as opposed to a plain %else which is a terminal branch). +func isElifDirective(rawLine string) bool { + lower := strings.ToLower(strings.Fields(strings.TrimSpace(rawLine))[0]) + + return lower != "%else" && isConditionalBranchDirective(rawLine) +} + +// findTreeSectionEnd finds where a section ends: at the next section header at the +// same nesting level, or at a conditional that wraps sections. +func findTreeSectionEnd(start, end int, pairByIf map[int]conditionalPair, sectionHeaderSet map[int]bool) int { + lineIdx := start + + for lineIdx < end { + if sectionHeaderSet[lineIdx] { + return lineIdx + } + + if pair, ok := pairByIf[lineIdx]; ok { + if hasSectionHeaderInRange(lineIdx+1, pair.endifLine, sectionHeaderSet) { + return lineIdx + } + + lineIdx = pair.endifLine + 1 + + continue + } + + lineIdx++ + } + + return end +} + +// findElseDirectiveLine finds the %else/%elif line within [start, end) at +// conditional depth 0. +func findElseDirectiveLine(rawLines []string, start, end int) int { + depth := 0 + + for lineIdx := start; lineIdx < end; lineIdx++ { + d := conditionalDepthChange(rawLines[lineIdx]) + + switch { + case d == 1: + depth++ + case d == -1: + depth-- + case depth == 0 && isConditionalBranchDirective(rawLines[lineIdx]): + return lineIdx + } + } + + return -1 +} + +// isMacroDefLine returns the macro name if the line is a %define or %global directive. +func isMacroDefLine(rawLine string) (string, bool) { + trimmed := strings.TrimSpace(rawLine) + tokens := strings.Fields(trimmed) + + const minMacroDefTokens = 2 + + if len(tokens) < minMacroDefTokens { + return "", false + } + + lower := strings.ToLower(tokens[0]) + if lower == "%define" || lower == "%global" { + // Strip trailing parentheses from macro names with parameters, + // e.g. "%define foo(x)" → "foo". + name := tokens[1] + if idx := strings.IndexByte(name, '('); idx >= 0 { + name = name[:idx] + } + + return name, true + } + + return "", false +} + +// getSectionNameAndPackageFromHeader extracts the section keyword and package name +// from a section header line. Uses the existing [GetPackageNameFromSectionHeader] +// for package name extraction. +func getSectionNameAndPackageFromHeader(rawLine string) (string, string) { + tokens := strings.Fields(strings.TrimSpace(rawLine)) + if len(tokens) == 0 { + return "", "" + } + + sectName := tokens[0] + + sectType, ok := sectionTypesByName[strings.ToLower(sectName)] + if !ok { + return sectName, "" + } + + pkg := getPackageNameForSection(sectType, tokens) + + return sectName, pkg +} + +// serializeTree flattens a [block] tree back into raw spec lines. +// The result preserves all original whitespace, comments, and blank lines. +func serializeTree(block *block) []string { + var lines []string + + switch block.Kind { + case rootBlock: + for _, child := range block.Children { + lines = append(lines, serializeTree(child)...) + } + + case sectionBlock: + if block.Header != "" { + lines = append(lines, block.Header) + } + + for _, child := range block.Children { + lines = append(lines, serializeTree(child)...) + } + + case conditionalBlock: + lines = append(lines, block.Header) + + for _, child := range block.Children { + lines = append(lines, serializeTree(child)...) + } + + if block.Else != nil { + if block.ElseDirective != "" { + lines = append(lines, block.ElseDirective) + } + + for _, child := range block.Else { + lines = append(lines, serializeTree(child)...) + } + } + + if block.Endif != "" { + lines = append(lines, block.Endif) + } + + case textBlock: + lines = append(lines, block.Lines...) + + case macroDefBlock: + lines = append(lines, block.Lines...) + } + + return lines +} + +// --- Tree query helpers --- + +// findSectionBlock finds the first section block matching name and package, +// searching recursively through conditional wrappers. +func findSectionBlock(root *block, name, pkg string) *block { + for _, child := range root.Children { + if result := findSectionInBlock(child, name, pkg); result != nil { + return result + } + } + + return nil +} + +func findSectionInBlock(block *block, name, pkg string) *block { + if block.Kind == sectionBlock && block.Name == name && block.Package == pkg { + return block + } + + if block.Kind == conditionalBlock { + for _, child := range block.Children { + if result := findSectionInBlock(child, name, pkg); result != nil { + return result + } + } + + for _, child := range block.Else { + if result := findSectionInBlock(child, name, pkg); result != nil { + return result + } + } + } + + return nil +} + +// findAllSectionBlocks returns all section blocks matching name and package. +func findAllSectionBlocks(root *block, name, pkg string) []*block { + var results []*block + collectMatchingSections(root, name, pkg, &results) + + return results +} + +func collectMatchingSections(block *block, name, pkg string, results *[]*block) { + if block.Kind == sectionBlock && block.Name == name && block.Package == pkg { + *results = append(*results, block) + } + + for _, child := range block.Children { + collectMatchingSections(child, name, pkg, results) + } + + if block.Kind == conditionalBlock { + for _, child := range block.Else { + collectMatchingSections(child, name, pkg, results) + } + } +} + +// findAllSectionBlocksByPackage returns all section blocks matching a package name +// (any section name). +func findAllSectionBlocksByPackage(root *block, pkg string) []*block { + var results []*block + collectSectionsByPackage(root, pkg, &results) + + return results +} + +func collectSectionsByPackage(block *block, pkg string, results *[]*block) { + if block.Kind == sectionBlock && block.Package == pkg { + *results = append(*results, block) + } + + for _, child := range block.Children { + collectSectionsByPackage(child, pkg, results) + } + + if block.Kind == conditionalBlock { + for _, child := range block.Else { + collectSectionsByPackage(child, pkg, results) + } + } +} + +// removeBlockFromParent removes a target block from any parent in the tree. +// It searches recursively through all [conditionalBlock] nesting levels. +func removeBlockFromParent(root *block, target *block) { + root.Children = filterBlocks(root.Children, target) + + for _, child := range root.Children { + if child.Kind == conditionalBlock { + removeFromConditional(child, target) + } + } +} + +func removeFromConditional(cond *block, target *block) { + cond.Children = filterBlocks(cond.Children, target) + + if cond.Else != nil { + cond.Else = filterBlocks(cond.Else, target) + } + + for _, child := range cond.Children { + if child.Kind == conditionalBlock { + removeFromConditional(child, target) + } + } + + for _, child := range cond.Else { + if child.Kind == conditionalBlock { + removeFromConditional(child, target) + } + } +} + +func filterBlocks(blocks []*block, exclude *block) []*block { + result := make([]*block, 0, len(blocks)) + + for _, b := range blocks { + if b != exclude { + result = append(result, b) + } + } + + return result +} + +// validateSectionRemoval checks that removing the given sections is safe. +// It detects patterns where the tree's structural section boundaries don't +// align with RPM's linear section ownership, which would produce incorrect +// output if sections were naively removed. +func validateSectionRemoval(root *block, toRemove []*block) error { + removeSet := make(map[*block]bool, len(toRemove)) + for _, b := range toRemove { + removeSet[b] = true + } + + // Check each level of the tree for unsafe patterns. + return validateRemovalInChildren(root.Children, removeSet) +} + +func validateRemovalInChildren(children []*block, removeSet map[*block]bool) error { + for childIdx, child := range children { + if child.Kind != conditionalBlock { + continue + } + + // Check if a wrapper conditional has orphaned text that semantically belongs + // to the section immediately preceding it. If that preceding section is being + // removed, the text would be orphaned. + // + // Note: cross-branch asymmetry (removing sections from %if but not %else, + // or vice versa) is intentionally allowed — a valid use case is removing + // a subpackage from one branch while keeping the alternative in the other. + // Text/macro content alongside surviving sections is also fine — it belongs + // to the section preceding the wrapper, not to the removed section. + if hasTextOrMacroContent(child.Children) && containsSectionBlocks(child) { + preceding := findPrecedingSection(children, childIdx) + if preceding != nil && removeSet[preceding] { + return fmt.Errorf("%%if block at %q "+ + "contains content belonging to the preceding section:\n%w", + child.Header, ErrConditionalSpansSections) + } + } + + // Check if removing sections from a wrapper would leave orphaned content + // in an adjacent non-wrapper conditional (case: adjacent content conditional + // after a wrapper whose only sections are being removed). + if wouldEmptyWrapper(child, removeSet) && childIdx+1 < len(children) { + next := children[childIdx+1] + if next.Kind == conditionalBlock && !containsSectionBlocks(next) && hasTextOrMacroContent(next.Children) { + return fmt.Errorf("content in %%if block at %q "+ + "would be orphaned after removing the preceding section:\n%w", + next.Header, ErrConditionalSpansSections) + } + } + + // Recurse into wrapper conditional's branches. + if err := validateRemovalInChildren(child.Children, removeSet); err != nil { + return err + } + + if child.Else != nil { + if err := validateRemovalInChildren(child.Else, removeSet); err != nil { + return err + } + } + } + + return nil +} + +// findPrecedingSection walks backwards from index i in children to find +// the most recent sectionBlock, skipping over text and other blocks. +func findPrecedingSection(children []*block, i int) *block { + for j := i - 1; j >= 0; j-- { + if children[j].Kind == sectionBlock { + return children[j] + } + } + + return nil +} + +func hasTextOrMacroContent(blocks []*block) bool { + for _, b := range blocks { + if b.Kind == textBlock || b.Kind == macroDefBlock { + return true + } + } + + return false +} + +// wouldEmptyWrapper checks if removing the targeted sections would leave +// a wrapper conditional with no section content in either branch. +func wouldEmptyWrapper(cond *block, removeSet map[*block]bool) bool { + if !containsSectionBlocks(cond) { + return false + } + + for _, child := range cond.Children { + if child.Kind == sectionBlock && !removeSet[child] { + return false + } + + if child.Kind == conditionalBlock && hasNonRemovedSectionsDeep(child, removeSet) { + return false + } + } + + for _, child := range cond.Else { + if child.Kind == sectionBlock && !removeSet[child] { + return false + } + + if child.Kind == conditionalBlock && hasNonRemovedSectionsDeep(child, removeSet) { + return false + } + } + + return true +} + +func hasNonRemovedSectionsDeep(block *block, removeSet map[*block]bool) bool { + if block.Kind == sectionBlock && !removeSet[block] { + return true + } + + for _, child := range block.Children { + if hasNonRemovedSectionsDeep(child, removeSet) { + return true + } + } + + if block.Kind == conditionalBlock { + for _, child := range block.Else { + if hasNonRemovedSectionsDeep(child, removeSet) { + return true + } + } + } + + return false +} diff --git a/internal/rpm/spec/tree_test.go b/internal/rpm/spec/tree_test.go new file mode 100644 index 00000000..94798913 --- /dev/null +++ b/internal/rpm/spec/tree_test.go @@ -0,0 +1,687 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package spec //nolint:testpackage // Tests access unexported tree types (block, parseTree, etc.). + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// --- Round-trip tests: parse → serialize must equal original --- + +func TestParseTreeRoundTrip(t *testing.T) { + tests := []struct { + name string + input string + }{ + { + name: "simple spec", + input: `Name: simple +Version: 1.0 +Release: 1 +Summary: A simple package + +%description +A simple package. + +%build +make + +%install +make install + +%files +/usr/bin/simple`, + }, + { + name: "conditional inside section", + input: `Name: test +Version: 1.0 + +%build +%if 0%{?with_debug} +CFLAGS="-g" make +%else +CFLAGS="-O2" make +%endif`, + }, + { + name: "conditional wrapping sections", + input: `Name: test +Version: 1.0 + +%if 0%{?with_docs} +%package docs +Summary: Documentation + +%description docs +Full docs. +%endif + +%build +make`, + }, + { + name: "else branch with different sections", + input: `Name: test +Version: 1.0 + +%if 0%{?with_docs} +%package docs +Summary: Documentation +%description docs +Full docs. +%else +%package minimal-docs +Summary: Minimal documentation +%description minimal-docs +Minimal docs. +%endif + +%build +make`, + }, + { + name: "straddling conditional", + input: `Name: test +Version: 1.0 + +%build +make + +%if 0%{?with_extra} +%install +make install EXTRA=1 +%endif + +%files +/usr/bin/test`, + }, + { + name: "macro definitions", + input: `Name: test +Version: 1.0 + +%global debug_package %{nil} +%define _builddir %{_topdir}/BUILD + +%build +%define buildflags -O2 -Wall +CFLAGS="%{buildflags}" make`, + }, + { + name: "multi-line macro definition", + input: `Name: test +Version: 1.0 + +%define common_flags \ + -DENABLE_FEATURE=ON \ + -DCMAKE_BUILD_TYPE=Release + +%build +cmake %{common_flags} . +make`, + }, + { + name: "continuation with section keyword", + input: `Name: test +Version: 1.0 + +%global extra_config \ + %files \ + something + +%build +make + +%files +/usr/bin/test`, + }, + { + name: "nested conditionals", + input: `Name: test +Version: 1.0 + +%build +%if 0%{?with_feature} +%if 0%{?with_debug} +make debug +%else +make feature +%endif +%endif`, + }, + { + name: "complex real-world pattern", + input: `Name: complex +Version: 2.0 +Release: 1 +Summary: Complex real-world test +License: MIT + +%global debug_package %{nil} +%define _builddir %{_topdir}/BUILD + +%description +A complex package. + +%package devel +Summary: Development files +Requires: %{name} = %{version}-%{release} + +%description devel +Development files for complex. + +%if 0%{?with_docs} +%package docs +Summary: Documentation subpackage + +%description docs +Full documentation. +%endif + +%prep +%autosetup + +%build +%define buildroot_flags --prefix=%{_prefix} +%if 0%{?with_debug} +CFLAGS="-g" ./configure %{buildroot_flags} +%else +./configure %{buildroot_flags} +%endif +make %{?_smp_mflags} + +%install +make install DESTDIR=%{buildroot} + +%if 0%{?with_docs} +%files docs +%doc README.md +%endif + +%files +%license LICENSE +/usr/bin/complex + +%files devel +/usr/include/complex.h + +%changelog`, + }, + { + name: "empty spec", + input: ``, + }, + { + name: "preamble only", + input: `Name: preamble-only`, + }, + { + name: "comments and blank lines", + input: `# This is a comment +Name: test + +# Another comment + +%build +# Build comment +make + +%install +make install`, + }, + { + name: "multiple conditionals at top level", + input: `Name: test + +%if 0%{?with_a} +%package a +Summary: Package A +%endif + +%if 0%{?with_b} +%package b +Summary: Package B +%endif + +%build +make`, + }, + { + name: "elif chain", + input: `Name: test + +%if 0%{?rhel} +Requires: rhel-thing +%elif 0%{?fedora} +Requires: fedora-thing +%elif 0%{?suse} +Requires: suse-thing +%else +Requires: generic-thing +%endif + +%build +make`, + }, + { + name: "elif with sections in branches", + input: `Name: test + +%if 0%{?rhel} +%package rhel-extras +Summary: RHEL extras +%elif 0%{?fedora} +%package fedora-extras +Summary: Fedora extras +%else +%package generic-extras +Summary: Generic extras +%endif + +%build +make`, + }, + { + name: "elif without terminal else", + input: `Name: test + +%if 0%{?rhel} +Requires: rhel-thing +%elif 0%{?fedora} +Requires: fedora-thing +%endif + +%build +make`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + lines := splitLines(tt.input) + + root, err := parseTree(lines) + require.NoError(t, err) + + serialized := serializeTree(root) + assert.Equal(t, lines, serialized, "round-trip should preserve all lines exactly") + }) + } +} + +//nolint:maintidx // Comprehensive table-driven structural test covering all block kinds. +func TestParseTreeStructure(t *testing.T) { + t.Run("simple spec sections", func(t *testing.T) { + input := `Name: test +Version: 1.0 + +%description +A test. + +%build +make + +%install +make install + +%files +/usr/bin/test` + lines := splitLines(input) + + root, err := parseTree(lines) + require.NoError(t, err) + + // Preamble is now wrapped in a sectionBlock with empty name. + // Named sections are sectionBlock children. + var sectionNames []string + + for _, child := range root.Children { + if child.Kind == sectionBlock { + sectionNames = append(sectionNames, child.Name) + } + } + + assert.Equal(t, []string{"", "%description", "%build", "%install", "%files"}, sectionNames) + }) + + t.Run("straddling conditional is wrapper not content", func(t *testing.T) { + input := `Name: test + +%build +make + +%if 0%{?with_extra} +%install +make install +%endif + +%files +/usr/bin/test` + lines := splitLines(input) + + root, err := parseTree(lines) + require.NoError(t, err) + + // %build should NOT contain the %if. + buildSect := findSectionBlock(root, "%build", "") + require.NotNil(t, buildSect) + + for _, child := range buildSect.Children { + assert.NotEqual(t, conditionalBlock, child.Kind, + "straddling %%if should be a sibling wrapper, not %%build content") + } + + // %install should be findable inside the conditional wrapper. + installSect := findSectionBlock(root, "%install", "") + assert.NotNil(t, installSect, "%%install should be findable inside conditional wrapper") + }) + + t.Run("conditional inside section is content", func(t *testing.T) { + input := `Name: test + +%build +%if 0%{?with_debug} +make debug +%else +make release +%endif` + lines := splitLines(input) + + root, err := parseTree(lines) + require.NoError(t, err) + + buildSect := findSectionBlock(root, "%build", "") + require.NotNil(t, buildSect) + + hasConditional := false + + for _, child := range buildSect.Children { + if child.Kind == conditionalBlock { + hasConditional = true + + break + } + } + + assert.True(t, hasConditional, "%%if inside section should be a content conditionalBlock") + }) + + t.Run("else branch with sections is wrapper", func(t *testing.T) { + input := `Name: test + +%if 0%{?with_docs} +%package docs +Summary: Docs +%else +%package minimal +Summary: Minimal +%endif + +%build +make` + lines := splitLines(input) + + root, err := parseTree(lines) + require.NoError(t, err) + + docsSect := findSectionBlock(root, "%package", "docs") + assert.NotNil(t, docsSect, "%%package docs in then branch") + + minSect := findSectionBlock(root, "%package", "minimal") + assert.NotNil(t, minSect, "%%package minimal in else branch") + }) + + t.Run("macro def recognized", func(t *testing.T) { + input := `Name: test + +%build +%define buildflags -O2 +CFLAGS="%{buildflags}" make` + lines := splitLines(input) + + root, err := parseTree(lines) + require.NoError(t, err) + + buildSect := findSectionBlock(root, "%build", "") + require.NotNil(t, buildSect) + + hasMacroDef := false + + for _, child := range buildSect.Children { + if child.Kind == macroDefBlock && child.Name == "buildflags" { + hasMacroDef = true + + break + } + } + + assert.True(t, hasMacroDef, "%%define should be recognized as macroDefBlock") + }) + + t.Run("multi-line macro continuation", func(t *testing.T) { + input := `Name: test + +%define flags \ + -DFOO=ON \ + -DBAR=OFF + +%build +make` + lines := splitLines(input) + + root, err := parseTree(lines) + require.NoError(t, err) + + // The macro should have 3 lines (header + 2 continuation). + var macroBlock *block + + for _, child := range root.Children { + if child.Kind == sectionBlock && child.Name == "" { + // Preamble — look for macro. + for _, pChild := range child.Children { + if pChild.Kind == macroDefBlock && pChild.Name == "flags" { + macroBlock = pChild + + break + } + } + } + + if child.Kind == macroDefBlock && child.Name == "flags" { + macroBlock = child + + break + } + } + + require.NotNil(t, macroBlock, "should find macroDefBlock for 'flags'") + assert.Len(t, macroBlock.Lines, 3, "multi-line macro should have 3 lines") + }) + + t.Run("continuation with section keyword not a section", func(t *testing.T) { + input := `Name: test + +%global extra \ + %files \ + stuff + +%build +make + +%files +/usr/bin/test` + lines := splitLines(input) + + root, err := parseTree(lines) + require.NoError(t, err) + + // Only one real %files section. + allFiles := findAllSectionBlocks(root, "%files", "") + assert.Len(t, allFiles, 1, "continuation body should not create phantom %%files section") + }) + + t.Run("find sections by package", func(t *testing.T) { + input := `Name: test + +%package devel +Summary: Dev + +%description devel +Dev files. + +%files devel +/usr/include/* + +%build +make` + lines := splitLines(input) + + root, err := parseTree(lines) + require.NoError(t, err) + + develSections := findAllSectionBlocksByPackage(root, "devel") + assert.Len(t, develSections, 3, "should find 3 sections for 'devel' package") + + var names []string + for _, s := range develSections { + names = append(names, s.Name) + } + + assert.Contains(t, names, "%package") + assert.Contains(t, names, "%description") + assert.Contains(t, names, "%files") + }) + + t.Run("elif chain structure", func(t *testing.T) { + input := `Name: test + +%if 0%{?rhel} +Requires: rhel-thing +%elif 0%{?fedora} +Requires: fedora-thing +%elif 0%{?suse} +Requires: suse-thing +%else +Requires: generic-thing +%endif + +%build +make` + lines := splitLines(input) + + root, err := parseTree(lines) + require.NoError(t, err) + + // Find the conditional block (inside preamble section as content). + var cond *block + + for _, child := range root.Children { + if child.Kind == sectionBlock && child.Name == "" { + for _, pc := range child.Children { + if pc.Kind == conditionalBlock { + cond = pc + + break + } + } + } + } + + require.NotNil(t, cond, "should find conditional in preamble") + assert.Equal(t, "%if 0%{?rhel}", cond.Header) + assert.Equal(t, "%endif", cond.Endif) + + // Then-branch: "Requires: rhel-thing" + require.Len(t, cond.Children, 1) + assert.Equal(t, textBlock, cond.Children[0].Kind) + assert.Equal(t, []string{"Requires: rhel-thing"}, cond.Children[0].Lines) + + // Else is a single conditionalBlock for %elif fedora. + require.Len(t, cond.Else, 1) + elif1 := cond.Else[0] + assert.Equal(t, conditionalBlock, elif1.Kind) + assert.Equal(t, "%elif 0%{?fedora}", elif1.Header) + assert.Empty(t, elif1.Endif, "inner elif should not own %%endif") + + // elif1 then-branch: "Requires: fedora-thing" + require.Len(t, elif1.Children, 1) + assert.Equal(t, []string{"Requires: fedora-thing"}, elif1.Children[0].Lines) + + // elif1 else is another conditionalBlock for %elif suse. + require.Len(t, elif1.Else, 1) + elif2 := elif1.Else[0] + assert.Equal(t, conditionalBlock, elif2.Kind) + assert.Equal(t, "%elif 0%{?suse}", elif2.Header) + assert.Empty(t, elif2.Endif) + + // elif2 then-branch: "Requires: suse-thing" + require.Len(t, elif2.Children, 1) + assert.Equal(t, []string{"Requires: suse-thing"}, elif2.Children[0].Lines) + + // elif2 else is a terminal %else with content blocks. + assert.Equal(t, "%else", elif2.ElseDirective) + require.Len(t, elif2.Else, 1) + assert.Equal(t, []string{"Requires: generic-thing"}, elif2.Else[0].Lines) + }) + + t.Run("elif with sections finds all packages", func(t *testing.T) { + input := `Name: test + +%if 0%{?rhel} +%package rhel-extras +Summary: RHEL extras +%elif 0%{?fedora} +%package fedora-extras +Summary: Fedora extras +%else +%package generic-extras +Summary: Generic extras +%endif + +%build +make` + lines := splitLines(input) + + root, err := parseTree(lines) + require.NoError(t, err) + + for _, pkg := range []string{"rhel-extras", "fedora-extras", "generic-extras"} { + sect := findSectionBlock(root, "%package", pkg) + assert.NotNil(t, sect, "should find %%package %s", pkg) + } + }) +} + +func TestParseTreeErrors(t *testing.T) { + t.Run("unmatched endif", func(t *testing.T) { + input := `Name: test +%endif` + lines := splitLines(input) + + _, err := parseTree(lines) + require.Error(t, err) + assert.Contains(t, err.Error(), "unmatched %endif") + }) + + t.Run("unmatched if", func(t *testing.T) { + input := `Name: test +%if 0%{?foo}` + lines := splitLines(input) + + _, err := parseTree(lines) + require.Error(t, err) + assert.Contains(t, err.Error(), "unmatched %if") + }) +} + +// splitLines splits input into lines. For an empty string, returns a slice with +// one empty element (matching strings.Split behavior). +func splitLines(input string) []string { + return strings.Split(input, "\n") +} From 36f496bfc6deec5a2c29f02f5e1a57029830829a Mon Sep 17 00:00:00 2001 From: Thien Trung Vuong Date: Tue, 26 May 2026 21:58:56 +0000 Subject: [PATCH 3/9] refactor(spec): introduce specTree/sectionHandle and migrate first batch of edit operations Add a tree interface in tree_api.go so edit.go callers no longer reach into the parse tree directly. specTree wraps the parsed *block root and exposes query helpers (Section, Sections, SectionsByPackage, RemoveSections); sectionHandle wraps a section block with append/prepend operations. Spec.mutateTree handles parse -> mutate -> serialize so call sites become 3-line orchestrations. Three edit.go operations now go through the interface as a starting point: AppendLinesToSection, RemoveSection, and RemoveSubpackage. Remaining Visit-based operations will migrate in follow-up commits. --- internal/rpm/spec/edit.go | 95 +++++++++++--------------- internal/rpm/spec/tree_api.go | 121 ++++++++++++++++++++++++++++++++++ 2 files changed, 161 insertions(+), 55 deletions(-) create mode 100644 internal/rpm/spec/tree_api.go diff --git a/internal/rpm/spec/edit.go b/internal/rpm/spec/edit.go index ac318d71..9fed1390 100644 --- a/internal/rpm/spec/edit.go +++ b/internal/rpm/spec/edit.go @@ -450,43 +450,24 @@ func (s *Spec) PrependLinesToSection(sectionName, packageName string, lines []st } // AppendLinesToSection appends the given lines at the end of the specified section, placing -// them just after the current last line of the section. An error is returned if the identified -// section cannot be found in the spec. +// them just after the current last line of the section's content. When a conditional block +// (%if/%endif) straddles the section boundary, the appended lines are placed before the +// conditional — they do not land inside it. +// +// An error is returned if the identified section cannot be found in the spec. func (s *Spec) AppendLinesToSection(sectionName, packageName string, lines []string) (err error) { slog.Debug("Appending lines to spec", "section", sectionName, "package", packageName, "lines", lines) - var updated bool - - err = s.Visit(func(ctx *Context) error { - // Make sure this is a section start. - if ctx.Target.TargetType != SectionEndTarget { - return nil - } - - // Make sure section name matches. - if ctx.CurrentSection.SectName != sectionName { - return nil - } - - // Make sure package name matches. - if ctx.CurrentSection.Package != packageName { - return nil + return s.mutateTree(func(tree *specTree) error { + sect := tree.Section(sectionName, packageName) + if sect == nil { + return fmt.Errorf("section %#q (package=%#q) not found:\n%w", sectionName, packageName, ErrSectionNotFound) } - // Insert the line. - ctx.InsertLinesBefore(lines) - - // Note that we've made an update. - updated = true + sect.AppendLines(lines) return nil }) - - if !updated { - return fmt.Errorf("section %#q (package=%#q) not found:\n%w", sectionName, packageName, ErrSectionNotFound) - } - - return err } // SearchAndReplace performs a regex-based search-and-replace against all lines in the specified @@ -796,20 +777,14 @@ func (s *Spec) RemoveSection(sectionName, packageName string) error { return errors.New("cannot remove the global/preamble section") } - ranges, err := s.collectSectionRanges(func(sn, pn string) bool { - return sn == sectionName && pn == packageName - }) - if err != nil { - return fmt.Errorf("failed to scan spec for section %#q (package=%#q):\n%w", sectionName, packageName, err) - } - - if len(ranges) == 0 { - return fmt.Errorf("section %#q (package=%#q) not found:\n%w", sectionName, packageName, ErrSectionNotFound) - } - - s.removeRanges(ranges) + return s.mutateTree(func(tree *specTree) error { + matches := tree.Sections(sectionName, packageName) + if len(matches) == 0 { + return fmt.Errorf("section %#q (package=%#q) not found:\n%w", sectionName, packageName, ErrSectionNotFound) + } - return nil + return tree.RemoveSections(matches) + }) } // RemoveSubpackage removes every section in the spec that is associated with the given @@ -841,20 +816,14 @@ func (s *Spec) RemoveSubpackage(packageName string) error { return errors.New("cannot remove sub-package with empty name") } - ranges, err := s.collectSectionRanges(func(_, pn string) bool { - return pn == packageName - }) - if err != nil { - return fmt.Errorf("failed to scan spec for sub-package %#q:\n%w", packageName, err) - } - - if len(ranges) == 0 { - return fmt.Errorf("sub-package %#q not found:\n%w", packageName, ErrSectionNotFound) - } - - s.removeRanges(ranges) + return s.mutateTree(func(tree *specTree) error { + matches := tree.SectionsByPackage(packageName) + if len(matches) == 0 { + return fmt.Errorf("sub-package %#q not found:\n%w", packageName, ErrSectionNotFound) + } - return nil + return tree.RemoveSections(matches) + }) } // sectionLineRange identifies a half-open `[start, end)` range of raw line numbers @@ -950,7 +919,23 @@ func collectConditionalPairs(rawLines []string) ([]conditionalPair, error) { stack []int ) + inMacroCont := false + for lineNum, line := range rawLines { + if inMacroCont { + inMacroCont = strings.HasSuffix(line, "\\") + + continue + } + + // Only skip continuations that start from a %define/%global line — + // those are macro body text where %if/%endif are not structural. + if _, isMacro := isMacroDefLine(line); isMacro && strings.HasSuffix(line, "\\") { + inMacroCont = true + + continue + } + switch conditionalDepthChange(line) { case 1: stack = append(stack, lineNum) diff --git a/internal/rpm/spec/tree_api.go b/internal/rpm/spec/tree_api.go new file mode 100644 index 00000000..e0e80e67 --- /dev/null +++ b/internal/rpm/spec/tree_api.go @@ -0,0 +1,121 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package spec + +import "fmt" + +// specTree is an opaque handle wrapping the parsed structural tree of a spec. +// Operations on the tree are exposed via methods so callers in edit.go do not +// depend on the internal [block] representation. Obtain one via [Spec.mutateTree]. +type specTree struct { + root *block +} + +// sectionHandle is an opaque reference to a single section within a [specTree]. +// Returned by [specTree.Section] / [specTree.Sections] and used to apply edits +// to that section's content. +type sectionHandle struct { + block *block + tree *specTree +} + +// mutateTree parses the spec into a tree, runs mutate against it, and serializes +// the tree back into [Spec.rawLines]. If mutate returns an error, [Spec.rawLines] +// is left unchanged. +func (s *Spec) mutateTree(mutate func(*specTree) error) error { + root, err := parseTree(s.rawLines) + if err != nil { + return fmt.Errorf("parsing spec tree:\n%w", err) + } + + tree := &specTree{root: root} + if err := mutate(tree); err != nil { + return err + } + + s.rawLines = serializeTree(root) + + return nil +} + +// --- specTree query API --- + +// Section returns a handle to the first section matching name and pkg, or nil +// if no such section exists. Searches recursively into conditional wrappers. +func (t *specTree) Section(name, pkg string) *sectionHandle { + b := findSectionBlock(t.root, name, pkg) + if b == nil { + return nil + } + + return §ionHandle{block: b, tree: t} +} + +// Sections returns handles for every section matching name and pkg, including +// sections inside conditional branches (both %if and %else). +func (t *specTree) Sections(name, pkg string) []*sectionHandle { + return t.handles(findAllSectionBlocks(t.root, name, pkg)) +} + +// SectionsByPackage returns handles for every section associated with the given +// package name (regardless of section keyword). +func (t *specTree) SectionsByPackage(pkg string) []*sectionHandle { + return t.handles(findAllSectionBlocksByPackage(t.root, pkg)) +} + +func (t *specTree) handles(blocks []*block) []*sectionHandle { + hs := make([]*sectionHandle, len(blocks)) + for i, b := range blocks { + hs[i] = §ionHandle{block: b, tree: t} + } + + return hs +} + +// --- specTree mutation API --- + +// RemoveSections removes the given sections from the tree. Removal is validated +// as a set: if any one removal would orphan content or break a conditional's +// semantics, the entire operation fails and the tree is left unmodified. +func (t *specTree) RemoveSections(handles []*sectionHandle) error { + blocks := make([]*block, len(handles)) + for i, h := range handles { + blocks[i] = h.block + } + + if err := validateSectionRemoval(t.root, blocks); err != nil { + return err + } + + for _, b := range blocks { + removeBlockFromParent(t.root, b) + } + + return nil +} + +// --- sectionHandle accessors and mutations --- + +// Name returns the section's keyword (e.g. "%build"). Empty for the preamble. +func (h *sectionHandle) Name() string { return h.block.Name } + +// Package returns the section's package qualifier (e.g. "devel"). Empty for +// sections that target the main package. +func (h *sectionHandle) Package() string { return h.block.Package } + +// AppendLines appends the given lines as a new text block at the end of the +// section's content. +func (h *sectionHandle) AppendLines(lines []string) { + h.block.Children = append(h.block.Children, &block{ + Kind: textBlock, + Lines: lines, + }) +} + +// PrependLines inserts the given lines as a new text block at the start of the +// section's content (right after the section header). +func (h *sectionHandle) PrependLines(lines []string) { + newChild := &block{Kind: textBlock, Lines: lines} + h.block.Children = append([]*block{newChild}, h.block.Children...) +} From 6bdbefb054005706df8346408cc5a90047c39825 Mon Sep 17 00:00:00 2001 From: Thien Trung Vuong Date: Thu, 28 May 2026 23:37:21 +0000 Subject: [PATCH 4/9] refactor(spec): migrate HasSection to tree interface Move HasSection off the Visit-based scan and onto the tree interface. Adds Spec.inspectTree (the read-only sibling of mutateTree) and specTree.HasSection, which walks the parsed tree (including sections inside conditional wrappers, matching previous Visit semantics). --- internal/rpm/spec/edit.go | 8 ++----- internal/rpm/spec/tree_api.go | 44 ++++++++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/internal/rpm/spec/edit.go b/internal/rpm/spec/edit.go index 9fed1390..c4d87c0a 100644 --- a/internal/rpm/spec/edit.go +++ b/internal/rpm/spec/edit.go @@ -599,10 +599,8 @@ func ParsePatchTagNumber(tag string) (int, bool) { func (s *Spec) HasSection(sectionName string) (bool, error) { var found bool - err := s.Visit(func(ctx *Context) error { - if ctx.Target.TargetType == SectionStartTarget && ctx.CurrentSection.SectName == sectionName { - found = true - } + err := s.inspectTree(func(tree *specTree) error { + found = tree.HasSection(sectionName) return nil }) @@ -928,8 +926,6 @@ func collectConditionalPairs(rawLines []string) ([]conditionalPair, error) { continue } - // Only skip continuations that start from a %define/%global line — - // those are macro body text where %if/%endif are not structural. if _, isMacro := isMacroDefLine(line); isMacro && strings.HasSuffix(line, "\\") { inMacroCont = true diff --git a/internal/rpm/spec/tree_api.go b/internal/rpm/spec/tree_api.go index e0e80e67..c4e030b8 100644 --- a/internal/rpm/spec/tree_api.go +++ b/internal/rpm/spec/tree_api.go @@ -7,7 +7,8 @@ import "fmt" // specTree is an opaque handle wrapping the parsed structural tree of a spec. // Operations on the tree are exposed via methods so callers in edit.go do not -// depend on the internal [block] representation. Obtain one via [Spec.mutateTree]. +// depend on the internal [block] representation. Obtain one via [Spec.mutateTree] +// or [Spec.inspectTree]. type specTree struct { root *block } @@ -39,6 +40,18 @@ func (s *Spec) mutateTree(mutate func(*specTree) error) error { return nil } +// inspectTree parses the spec into a tree and passes it to inspect for read-only +// inspection. The tree is discarded after inspect returns; [Spec.rawLines] is +// never modified. +func (s *Spec) inspectTree(inspect func(*specTree) error) error { + root, err := parseTree(s.rawLines) + if err != nil { + return fmt.Errorf("parsing spec tree:\n%w", err) + } + + return inspect(&specTree{root: root}) +} + // --- specTree query API --- // Section returns a handle to the first section matching name and pkg, or nil @@ -52,6 +65,35 @@ func (t *specTree) Section(name, pkg string) *sectionHandle { return §ionHandle{block: b, tree: t} } +// HasSection reports whether the tree contains any section with the given name +// (regardless of package qualifier), including sections inside conditional +// wrappers. +func (t *specTree) HasSection(name string) bool { + return hasSectionWithName(t.root, name) +} + +func hasSectionWithName(b *block, name string) bool { + if b.Kind == sectionBlock && b.Name == name { + return true + } + + for _, child := range b.Children { + if hasSectionWithName(child, name) { + return true + } + } + + if b.Kind == conditionalBlock { + for _, child := range b.Else { + if hasSectionWithName(child, name) { + return true + } + } + } + + return false +} + // Sections returns handles for every section matching name and pkg, including // sections inside conditional branches (both %if and %else). func (t *specTree) Sections(name, pkg string) []*sectionHandle { From 8972a32bec59600e255b823804c8bcd084161cb4 Mon Sep 17 00:00:00 2001 From: Thien Trung Vuong Date: Thu, 28 May 2026 23:42:26 +0000 Subject: [PATCH 5/9] refactor(spec): migrate tag family operations to tree interface Add line-level iteration to the tree interface (specTree.VisitAllLines / sectionHandle.VisitLines + lineHandle) plus a tag-aware insertion helper (sectionHandle.InsertTag), and use them to rewrite the tag-mutating operations on Spec. No behavior change for callers. Removes findInsertTagPosition, skipPastConditional, and insertTagScanResult. --- internal/rpm/spec/edit.go | 240 +++++++++---------------- internal/rpm/spec/tree_api.go | 320 ++++++++++++++++++++++++++++++++++ 2 files changed, 403 insertions(+), 157 deletions(-) diff --git a/internal/rpm/spec/edit.go b/internal/rpm/spec/edit.go index c4d87c0a..cad37ca9 100644 --- a/internal/rpm/spec/edit.go +++ b/internal/rpm/spec/edit.go @@ -55,23 +55,34 @@ func (s *Spec) UpdateExistingTag(packageName string, tag string, value string) ( var updated bool - err = s.VisitTagsPackage(packageName, func(tagLine *TagLine, ctx *Context) error { - if strings.ToLower(tagLine.Tag) != tagToCompareAgainst { - return nil - } + err = s.mutateTree(func(tree *specTree) error { + return tree.VisitAllLines(func(secName, secPkg string, lh *lineHandle) error { + if updated || secPkg != packageName || !isTagBearingSection(secName) { + return nil + } - ctx.ReplaceLine(fmt.Sprintf("%s: %s", tag, value)) + parsedTag, _, isTag := parseTagLine(lh.Text) + if !isTag || strings.ToLower(parsedTag) != tagToCompareAgainst { + return nil + } - updated = true + lh.Replace(fmt.Sprintf("%s: %s", tag, value)) - return nil + updated = true + + return nil + }) }) + if err != nil { + return err + } + if !updated { return fmt.Errorf("tag %#q not found in spec:\n%w", tag, ErrNoSuchTag) } - return err + return nil } // RemoveTag removes all instances of the given tag from the spec, under the specified @@ -146,16 +157,23 @@ func (s *Spec) VisitTagsPackage(packageName string, visitor func(tagLine *TagLin func (s *Spec) RemoveTagsMatching(packageName string, matcher func(tag, value string) bool) (int, error) { removed := 0 - err := s.VisitTagsPackage(packageName, func(tagLine *TagLine, ctx *Context) error { - if !matcher(tagLine.Tag, tagLine.Value) { - return nil - } + err := s.mutateTree(func(tree *specTree) error { + return tree.VisitAllLines(func(secName, secPkg string, lh *lineHandle) error { + if secPkg != packageName || !isTagBearingSection(secName) { + return nil + } - ctx.RemoveLine() + parsedTag, parsedValue, isTag := parseTagLine(lh.Text) + if !isTag || !matcher(parsedTag, parsedValue) { + return nil + } - removed++ + lh.Remove() - return nil + removed++ + + return nil + }) }) return removed, err @@ -271,135 +289,21 @@ func isConditionalBranchDirective(rawLine string) bool { func (s *Spec) InsertTag(packageName string, tag string, value string) error { slog.Debug("Inserting tag to spec", "package", packageName, "tag", tag, "value", value) - family := tagFamily(tag) - newLine := fmt.Sprintf("%s: %s", tag, value) - sectionName := "" if packageName != "" { sectionName = "%package" } - result, err := s.findInsertTagPosition(sectionName, packageName, family) - if err != nil { - return err - } - - // Determine insertion point: prefer same-family, then any tag, then fall back to AddTag. - insertAfterLine := result.lastFamilyTagLineNum - if insertAfterLine < 0 { - insertAfterLine = result.lastAnyTagLineNum - } - - if insertAfterLine < 0 { - // No tags at all — fall back to AddTag behavior. - return s.AddTag(packageName, tag, value) - } - - // If the insertion point is inside a conditional block, move it forward past the - // closing %endif so the new tag doesn't become conditional. - insertAfterLine = s.skipPastConditional(insertAfterLine, result.sectionEndLineNum) - - // Insert after the found line (0-indexed, so insertAfterLine+1). - s.InsertLinesAt([]string{newLine}, insertAfterLine+1) - - return nil -} - -// insertTagScanResult holds the results of scanning a spec for a tag insertion point. -type insertTagScanResult struct { - lastFamilyTagLineNum int - lastAnyTagLineNum int - sectionEndLineNum int -} - -// findInsertTagPosition scans the spec to find the best insertion point for a tag of the -// given family within the specified section/package. Returns the scan results or an error -// if the target section is not found. -func (s *Spec) findInsertTagPosition( - sectionName, packageName, family string, -) (insertTagScanResult, error) { - result := insertTagScanResult{ - lastFamilyTagLineNum: -1, - lastAnyTagLineNum: -1, - sectionEndLineNum: len(s.rawLines), - } - - sectionFound := false - - err := s.Visit(func(ctx *Context) error { - if ctx.Target.TargetType == SectionStartTarget { - if ctx.CurrentSection.SectName == sectionName && ctx.CurrentSection.Package == packageName { - sectionFound = true - } - } - - if ctx.Target.TargetType == SectionEndTarget { - if ctx.CurrentSection.SectName == sectionName && ctx.CurrentSection.Package == packageName { - result.sectionEndLineNum = ctx.CurrentLineNum - } - } - - if ctx.Target.TargetType != SectionLineTarget { - return nil - } - - if ctx.CurrentSection.SectName != sectionName || ctx.CurrentSection.Package != packageName { - return nil - } - - if ctx.Target.Line.Parsed.GetType() != Tag { - return nil - } - - tagLine, ok := ctx.Target.Line.Parsed.(*TagLine) - if !ok { - return nil + return s.mutateTree(func(tree *specTree) error { + sect := tree.Section(sectionName, packageName) + if sect == nil { + return fmt.Errorf("section %#q (package=%#q) not found:\n%w", sectionName, packageName, ErrSectionNotFound) } - result.lastAnyTagLineNum = ctx.CurrentLineNum - - if tagFamily(tagLine.Tag) == family { - result.lastFamilyTagLineNum = ctx.CurrentLineNum - } + sect.InsertTag(tag, value, tagFamily(tag)) return nil }) - if err != nil { - return result, fmt.Errorf("failed to scan spec for tag insertion point:\n%w", err) - } - - if !sectionFound { - return result, fmt.Errorf("section %#q (package=%#q) not found:\n%w", sectionName, packageName, ErrSectionNotFound) - } - - return result, nil -} - -// skipPastConditional checks whether lineNum falls inside a conditional block by computing -// the conditional nesting depth from the start of the file up to that line. If depth > 0, -// it scans forward to find the %endif that brings depth back to 0 and returns that line -// number. Otherwise it returns lineNum unchanged. -func (s *Spec) skipPastConditional(lineNum int, sectionEnd int) int { - // Compute conditional depth at the insertion point by scanning from the start. - depth := 0 - for i := 0; i <= lineNum && i < len(s.rawLines); i++ { - depth += conditionalDepthChange(s.rawLines[i]) - } - - if depth <= 0 { - return lineNum - } - - // Scan forward to find the %endif that closes the conditional. - for i := lineNum + 1; i < sectionEnd && i < len(s.rawLines); i++ { - depth += conditionalDepthChange(s.rawLines[i]) - if depth <= 0 { - return i - } - } - - // Could not find a closing %endif within the section; return the original position. - return lineNum } // PrependLinesToSection prepends the given lines to the start of the specified section, placing @@ -672,23 +576,34 @@ func (s *Spec) RemovePatchEntry(pattern string) error { func (s *Spec) removePatchTagsMatching(pattern string) (int, error) { removed := 0 - err := s.VisitTags(func(tagLine *TagLine, ctx *Context) error { - if _, ok := ParsePatchTagNumber(tagLine.Tag); !ok { - return nil - } + err := s.mutateTree(func(tree *specTree) error { + return tree.VisitAllLines(func(secName, _ string, lh *lineHandle) error { + if !isTagBearingSection(secName) { + return nil + } - matched, matchErr := doublestar.Match(pattern, tagLine.Value) - if matchErr != nil { - return fmt.Errorf("failed to match glob pattern %#q against %#q:\n%w", pattern, tagLine.Value, matchErr) - } + parsedTag, parsedValue, isTag := parseTagLine(lh.Text) + if !isTag { + return nil + } - if matched { - ctx.RemoveLine() + if _, ok := ParsePatchTagNumber(parsedTag); !ok { + return nil + } - removed++ - } + matched, matchErr := doublestar.Match(pattern, parsedValue) + if matchErr != nil { + return fmt.Errorf("failed to match glob pattern %#q against %#q:\n%w", pattern, parsedValue, matchErr) + } - return nil + if matched { + lh.Remove() + + removed++ + } + + return nil + }) }) return removed, err @@ -739,17 +654,28 @@ func (s *Spec) GetHighestPatchTagNumber() (int, error) { highest := -1 unnumberedCount := 0 - err := s.VisitTags(func(tagLine *TagLine, _ *Context) error { - num, isPatchTag := ParsePatchTagNumber(tagLine.Tag) - if isPatchTag && num > highest { - highest = num - } else if strings.EqualFold(tagLine.Tag, "patch") { - // Bare "Patch:" with no numeric suffix — RPM auto-numbers these - // sequentially starting from 0. - unnumberedCount++ - } + err := s.inspectTree(func(tree *specTree) error { + return tree.VisitAllLines(func(secName, _ string, lh *lineHandle) error { + if !isTagBearingSection(secName) { + return nil + } - return nil + parsedTag, _, isTag := parseTagLine(lh.Text) + if !isTag { + return nil + } + + num, isPatchTag := ParsePatchTagNumber(parsedTag) + if isPatchTag && num > highest { + highest = num + } else if strings.EqualFold(parsedTag, "patch") { + // Bare "Patch:" with no numeric suffix — RPM auto-numbers these + // sequentially starting from 0. + unnumberedCount++ + } + + return nil + }) }) // Unnumbered patches occupy slots 0..unnumberedCount-1. diff --git a/internal/rpm/spec/tree_api.go b/internal/rpm/spec/tree_api.go index c4e030b8..00cc35f2 100644 --- a/internal/rpm/spec/tree_api.go +++ b/internal/rpm/spec/tree_api.go @@ -161,3 +161,323 @@ func (h *sectionHandle) PrependLines(lines []string) { newChild := &block{Kind: textBlock, Lines: lines} h.block.Children = append([]*block{newChild}, h.block.Children...) } + +// --- Line-level iteration & mutation --- + +// lineHandle is an opaque reference to a single content line within a tree. +// Mutations (Replace, Remove) are queued during iteration and applied when the +// enclosing [specTree.VisitAllLines] / [sectionHandle.VisitLines] call returns, +// so callers can mutate freely during the walk without invalidating indices. +type lineHandle struct { + // Text is the original line text. Mutations made via Replace do not update + // this field; callers should treat the visited handle as a single snapshot. + Text string + + block *block + idx int + replaced bool + removed bool + newText string +} + +// Replace marks the line for replacement with newText. A subsequent Remove +// overrides any prior Replace; subsequent Replace overrides any prior Remove. +func (lh *lineHandle) Replace(newText string) { + lh.replaced = true + lh.removed = false + lh.newText = newText +} + +// Remove marks the line for deletion. +func (lh *lineHandle) Remove() { + lh.removed = true + lh.replaced = false +} + +// VisitAllLines walks every content line in the spec (text-block lines only; +// macro definitions and section/conditional headers are skipped). The visitor +// receives the enclosing section name and package qualifier plus a handle that +// can buffer Replace/Remove mutations. Mutations are flushed after the walk. +// Returning a non-nil error stops iteration; buffered mutations made prior to +// the error are still flushed. +func (t *specTree) VisitAllLines(visit func(secName, secPkg string, lh *lineHandle) error) error { + var handles []*lineHandle + + visitErr := collectAndVisitLines(t.root, "", "", visit, &handles) + + flushLineMutations(handles) + + return visitErr +} + +// VisitLines walks every content line inside this section, including lines +// nested inside conditional branches. Macro definitions and section/conditional +// headers are skipped. See [specTree.VisitAllLines] for mutation semantics. +func (h *sectionHandle) VisitLines(visit func(lh *lineHandle) error) error { + var handles []*lineHandle + + wrap := func(_, _ string, lh *lineHandle) error { return visit(lh) } + + visitErr := collectAndVisitLines(h.block, h.block.Name, h.block.Package, wrap, &handles) + + flushLineMutations(handles) + + return visitErr +} + +// collectAndVisitLines walks b, calls visit on every text-line, and records +// each handle for later mutation flushing. +func collectAndVisitLines( + b *block, + secName, secPkg string, + visit func(string, string, *lineHandle) error, + handles *[]*lineHandle, +) error { + switch b.Kind { + case rootBlock: + for _, child := range b.Children { + if err := collectAndVisitLines(child, secName, secPkg, visit, handles); err != nil { + return err + } + } + + case sectionBlock: + for _, child := range b.Children { + if err := collectAndVisitLines(child, b.Name, b.Package, visit, handles); err != nil { + return err + } + } + + case conditionalBlock: + for _, child := range b.Children { + if err := collectAndVisitLines(child, secName, secPkg, visit, handles); err != nil { + return err + } + } + + for _, child := range b.Else { + if err := collectAndVisitLines(child, secName, secPkg, visit, handles); err != nil { + return err + } + } + + case textBlock: + for i, line := range b.Lines { + lh := &lineHandle{Text: line, block: b, idx: i} + *handles = append(*handles, lh) + + if err := visit(secName, secPkg, lh); err != nil { + return err + } + } + + case macroDefBlock: + // Macro definitions are not visited as content lines. + } + + return nil +} + +// flushLineMutations applies buffered Replace/Remove operations. +// Iterates handles in reverse insertion order so per-block removals don't +// invalidate the indices of yet-to-be-applied operations. +func flushLineMutations(handles []*lineHandle) { + for i := len(handles) - 1; i >= 0; i-- { + h := handles[i] + + switch { + case h.removed: + h.block.Lines = append(h.block.Lines[:h.idx], h.block.Lines[h.idx+1:]...) + case h.replaced: + h.block.Lines[h.idx] = h.newText + } + } +} + +// --- Tag-aware insertion --- + +// InsertTag inserts a tag-style line into this section, placing it after the +// last existing tag from the same family (e.g., "Source9999" lands after the +// last Source* tag). If no same-family tag exists, the new tag goes after the +// last tag of any kind. If the section has no tags at all, the new line is +// appended to the section's end. +// +// If the chosen anchor tag lives inside a [conditionalBlock], the new tag is +// inserted after the entire conditional block instead, so it remains +// unconditional. +func (h *sectionHandle) InsertTag(tag, value, family string) { + newLine := fmt.Sprintf("%s: %s", tag, value) + + anchor := h.findTagInsertAnchor(family) + if anchor == nil { + h.AppendLines([]string{newLine}) + + return + } + + anchor.insertAfter(h.block, newLine) +} + +// tagInsertAnchor records where a new tag should be placed relative to an +// existing tag. Exactly one of inTextBlock or afterChild is set: +// - inTextBlock != nil: the anchor tag is a direct line inside a textBlock; +// the new line is spliced into that block right after lineIdx. +// - afterChild != nil: the anchor tag lives inside a top-level conditionalBlock +// of the section; the new line goes into a new sibling textBlock immediately +// after afterChild in the section's Children. +type tagInsertAnchor struct { + inTextBlock *block + lineIdx int + afterChild *block +} + +func (a *tagInsertAnchor) insertAfter(parent *block, newLine string) { + if a.inTextBlock != nil { + lines := a.inTextBlock.Lines + spliced := make([]string, 0, len(lines)+1) + spliced = append(spliced, lines[:a.lineIdx+1]...) + spliced = append(spliced, newLine) + spliced = append(spliced, lines[a.lineIdx+1:]...) + a.inTextBlock.Lines = spliced + + return + } + + childIdx := -1 + + for i, child := range parent.Children { + if child == a.afterChild { + childIdx = i + + break + } + } + + newChild := &block{Kind: textBlock, Lines: []string{newLine}} + + if childIdx < 0 { + // Defensive: anchor not found in parent.Children — append. + parent.Children = append(parent.Children, newChild) + + return + } + + spliced := make([]*block, 0, len(parent.Children)+1) + spliced = append(spliced, parent.Children[:childIdx+1]...) + spliced = append(spliced, newChild) + spliced = append(spliced, parent.Children[childIdx+1:]...) + parent.Children = spliced +} + +// findTagInsertAnchor walks the section's top-level children to find the last +// tag matching family (preferred) or the last tag of any kind (fallback). +// Returns nil if the section contains no tags. +func (h *sectionHandle) findTagInsertAnchor(family string) *tagInsertAnchor { + var lastAny, lastFamily *tagInsertAnchor + + for _, child := range h.block.Children { + switch child.Kind { + case textBlock: + for i, line := range child.Lines { + tag, _, isTag := parseTagLine(line) + if !isTag { + continue + } + + anchor := &tagInsertAnchor{inTextBlock: child, lineIdx: i} + lastAny = anchor + + if tagFamily(tag) == family { + lastFamily = anchor + } + } + + case conditionalBlock: + hasAny, hasFamily := scanConditionalForTags(child, family) + if hasAny { + anchor := &tagInsertAnchor{afterChild: child} + lastAny = anchor + + if hasFamily { + lastFamily = anchor + } + } + + case rootBlock, sectionBlock, macroDefBlock: + // Not encountered as a section child (or carry no tag lines). + } + } + + if lastFamily != nil { + return lastFamily + } + + return lastAny +} + +// scanConditionalForTags reports whether the conditional block (any branch, +// any nesting depth) contains at least one tag line, and whether any of those +// tags belongs to the given family. +func scanConditionalForTags(cond *block, family string) (hasAny, hasFamily bool) { + scan := func(blocks []*block) { + for _, b := range blocks { + a, f := scanForTags(b, family) + hasAny = hasAny || a + hasFamily = hasFamily || f + } + } + + scan(cond.Children) + scan(cond.Else) + + return hasAny, hasFamily +} + +func scanForTags(b *block, family string) (hasAny, hasFamily bool) { + switch b.Kind { + case textBlock: + for _, line := range b.Lines { + tag, _, isTag := parseTagLine(line) + if !isTag { + continue + } + + hasAny = true + + if tagFamily(tag) == family { + hasFamily = true + } + } + + case conditionalBlock: + return scanConditionalForTags(b, family) + + case rootBlock, sectionBlock, macroDefBlock: + // Not searched for tags here. + } + + return hasAny, hasFamily +} + +// parseTagLine attempts to parse line as an RPM tag line ("Name: value"). +// Returns the tag name and value, or ok=false if line is not a tag. +func parseTagLine(line string) (tag, value string, ok bool) { + const reSubmatchCount = 3 + + matches := tagRegex.FindStringSubmatch(line) + if len(matches) != reSubmatchCount { + return "", "", false + } + + return matches[1], matches[2], true +} + +// isTagBearingSection reports whether a section keyword can legally hold RPM +// tag declarations (e.g. "Name:", "Source0:"). Only the preamble (empty name) +// and "%package" sections qualify. Script-style sections like "%build" may +// contain shell that happens to match the "word: word" pattern; we must avoid +// treating those as tags. +func isTagBearingSection(secName string) bool { + return secName == "" || secName == "%package" +} + From b245e97cd3232760f3af038e07761cf10ea595dc Mon Sep 17 00:00:00 2001 From: Thien Trung Vuong Date: Fri, 29 May 2026 00:27:11 +0000 Subject: [PATCH 6/9] refactor(spec): migrate remaining edit operations, remove Visit Convert the last group of Visit-based mutators to mutateTree + section or line handles, then remove the Visit infrastructure entirely: Migrated operations: - PrependLinesToSection -> sectionHandle.PrependLines. - AddChangelogEntry -> sectionHandle.PrependLines on %changelog. - SearchAndReplace -> searchReplaceBlock, a recursive tree walker that visits ALL line types (text, macros, conditional headers, %endif) so patterns targeting %define/%global or %if lines are found correctly. Wrapper else-branch section filter is relaxed for loose content. - removePatchlistEntriesMatching -> sectionHandle.VisitLines on %patchlist. New API: - GetTag(packageName, tag) for read-only tag lookup via inspectTree. Removed (Visit infrastructure): - Visit, VisitTags, VisitTagsPackage from edit.go. - Context, VisitTarget, VisitTargetType, Visitor, InsertLinesBefore, InsertLinesAfter, RemoveLine, ReplaceLine from spec.go. - parseState, inContinuation, newParseState, parseSpecLine, newParsedLine, parseLogicalLine from spec.go. Dead code from line-range approach also removed: - sectionLineRange, collectSectionRanges, balanceRange, validateNoContentAfter, isBlankOrComment, removeRanges, etc. Migrated external caller: release.go GetReleaseValue now uses GetTag instead of VisitTagsPackage. --- internal/app/azldev/core/sources/release.go | 17 +- internal/rpm/spec/edit.go | 567 +++++++------------- internal/rpm/spec/spec.go | 312 ----------- internal/rpm/spec/spec_test.go | 8 +- internal/rpm/spec/tree_api.go | 83 +-- 5 files changed, 249 insertions(+), 738 deletions(-) diff --git a/internal/app/azldev/core/sources/release.go b/internal/app/azldev/core/sources/release.go index 55dcad6c..506ad472 100644 --- a/internal/app/azldev/core/sources/release.go +++ b/internal/app/azldev/core/sources/release.go @@ -8,7 +8,6 @@ import ( "log/slog" "regexp" "strconv" - "strings" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components" "github.com/microsoft/azure-linux-dev-tools/internal/global/opctx" @@ -46,21 +45,9 @@ func GetReleaseTagValue(fs opctx.FS, specPath string) (string, error) { return "", fmt.Errorf("failed to parse spec %#q:\n%w", specPath, err) } - var releaseValue string - - err = openedSpec.VisitTagsPackage("", func(tagLine *spec.TagLine, _ *spec.Context) error { - if strings.EqualFold(tagLine.Tag, "Release") { - releaseValue = tagLine.Value - } - - return nil - }) + releaseValue, err := openedSpec.GetTag("", "Release") if err != nil { - return "", fmt.Errorf("failed to visit tags in spec %#q:\n%w", specPath, err) - } - - if releaseValue == "" { - return "", fmt.Errorf("release tag not found in spec %#q:\n%w", specPath, spec.ErrNoSuchTag) + return "", fmt.Errorf("failed to get Release tag from spec %#q:\n%w", specPath, err) } return releaseValue, nil diff --git a/internal/rpm/spec/edit.go b/internal/rpm/spec/edit.go index cad37ca9..5c54770e 100644 --- a/internal/rpm/spec/edit.go +++ b/internal/rpm/spec/edit.go @@ -56,24 +56,23 @@ func (s *Spec) UpdateExistingTag(packageName string, tag string, value string) ( var updated bool err = s.mutateTree(func(tree *specTree) error { - return tree.VisitAllLines(func(secName, secPkg string, lh *lineHandle) error { + return tree.VisitAllLines(func(secName, secPkg string, line *lineHandle) error { if updated || secPkg != packageName || !isTagBearingSection(secName) { return nil } - parsedTag, _, isTag := parseTagLine(lh.Text) + parsedTag, _, isTag := parseTagLine(line.Text) if !isTag || strings.ToLower(parsedTag) != tagToCompareAgainst { return nil } - lh.Replace(fmt.Sprintf("%s: %s", tag, value)) + line.Replace(fmt.Sprintf("%s: %s", tag, value)) updated = true return nil }) }) - if err != nil { return err } @@ -116,39 +115,38 @@ func (s *Spec) RemoveTag(packageName string, tag string, value string) (err erro return nil } -// VisitTags iterates over all tag lines across all packages, calling the visitor function -// for each one. The visitor receives the parsed [TagLine] and the mutation [Context]. -func (s *Spec) VisitTags(visitor func(tagLine *TagLine, ctx *Context) error) error { - return s.Visit(func(ctx *Context) error { - if ctx.Target.TargetType != SectionLineTarget { - return nil - } +// GetTag returns the value of the first instance of the named tag in the given package. +// Returns [ErrNoSuchTag] if the tag does not exist. +func (s *Spec) GetTag(packageName string, tag string) (string, error) { + tagToCompareAgainst := strings.ToLower(tag) - if ctx.Target.Line.Parsed.GetType() != Tag { - return nil - } + var foundValue string - tagLine, isTagLine := ctx.Target.Line.Parsed.(*TagLine) - if !isTagLine { - return nil - } + err := s.inspectTree(func(tree *specTree) error { + return tree.VisitAllLines(func(secName, secPkg string, line *lineHandle) error { + if foundValue != "" || secPkg != packageName || !isTagBearingSection(secName) { + return nil + } - return visitor(tagLine, ctx) - }) -} + parsedTag, parsedValue, isTag := parseTagLine(line.Text) + if !isTag || strings.ToLower(parsedTag) != tagToCompareAgainst { + return nil + } -// VisitTagsPackage iterates over all tag lines in the given package, calling the visitor -// function for each one. The visitor receives the parsed [TagLine] and the mutation [Context]. -// This extracts the common target-type / package / tag-type filtering that many tag-oriented -// methods need. -func (s *Spec) VisitTagsPackage(packageName string, visitor func(tagLine *TagLine, ctx *Context) error) error { - return s.VisitTags(func(tagLine *TagLine, ctx *Context) error { - if ctx.CurrentSection.Package != packageName { - return nil - } + foundValue = parsedValue - return visitor(tagLine, ctx) + return nil + }) }) + if err != nil { + return "", err + } + + if foundValue == "" { + return "", fmt.Errorf("tag %#q not found in package %#q:\n%w", tag, packageName, ErrNoSuchTag) + } + + return foundValue, nil } // RemoveTagsMatching removes all tags in the given package for which the provided matcher @@ -158,17 +156,17 @@ func (s *Spec) RemoveTagsMatching(packageName string, matcher func(tag, value st removed := 0 err := s.mutateTree(func(tree *specTree) error { - return tree.VisitAllLines(func(secName, secPkg string, lh *lineHandle) error { + return tree.VisitAllLines(func(secName, secPkg string, line *lineHandle) error { if secPkg != packageName || !isTagBearingSection(secName) { return nil } - parsedTag, parsedValue, isTag := parseTagLine(lh.Text) + parsedTag, parsedValue, isTag := parseTagLine(line.Text) if !isTag || !matcher(parsedTag, parsedValue) { return nil } - lh.Remove() + line.Remove() removed++ @@ -192,7 +190,7 @@ func (s *Spec) AddTag(packageName string, tag string, value string) (err error) sectionName := "" if packageName != "" { - sectionName = "%package" + sectionName = packageSectionName } return s.AppendLinesToSection(sectionName, packageName, []string{fmt.Sprintf("%s: %s", tag, value)}) @@ -291,7 +289,7 @@ func (s *Spec) InsertTag(packageName string, tag string, value string) error { sectionName := "" if packageName != "" { - sectionName = "%package" + sectionName = packageSectionName } return s.mutateTree(func(tree *specTree) error { @@ -312,45 +310,16 @@ func (s *Spec) InsertTag(packageName string, tag string, value string) error { func (s *Spec) PrependLinesToSection(sectionName, packageName string, lines []string) (err error) { slog.Debug("Prepending lines to spec", "section", sectionName, "package", packageName, "lines", lines) - var updated bool - - err = s.Visit(func(ctx *Context) error { - // Make sure this is a section start. - if ctx.Target.TargetType != SectionStartTarget { - return nil - } - - // Make sure section name matches. - if ctx.CurrentSection.SectName != sectionName { - return nil - } - - // Make sure package name matches. - if ctx.CurrentSection.Package != packageName { - return nil - } - - // Insert the lines. The global section doesn't have a header line, so we insert the - // lines *before* the start. For all other sections, including sub-package %package - // sections, we need to make sure we insert the lines after the header line of the - // section. - if ctx.CurrentSection.SectName == "" && ctx.CurrentSection.Package == "" { - ctx.InsertLinesBefore(lines) - } else { - ctx.InsertLinesAfter(lines) + return s.mutateTree(func(tree *specTree) error { + sect := tree.Section(sectionName, packageName) + if sect == nil { + return fmt.Errorf("section %#q (package=%#q) not found:\n%w", sectionName, packageName, ErrSectionNotFound) } - // Note that we've made an update. - updated = true + sect.PrependLines(lines) return nil }) - - if !updated { - return fmt.Errorf("section %#q (package=%#q) not found:\n%w", sectionName, packageName, ErrSectionNotFound) - } - - return err } // AppendLinesToSection appends the given lines at the end of the specified section, placing @@ -378,6 +347,11 @@ func (s *Spec) AppendLinesToSection(sectionName, packageName string, lines []str // section. If `sectionName` is empty, the operation acts against all sections. If no matches were // found to replace, an error is returned. The replacement is performed literally; regex capture // group references like $1 are not expanded. +// +// Unlike [specTree.VisitAllLines] (which skips structural lines), this function +// walks every line in the tree including macro definitions (%define/%global) and +// conditional directives (%if/%else/%endif), so patterns that match those lines +// are found correctly. func (s *Spec) SearchAndReplace(sectionName, packageName, regex, replacement string) (err error) { slog.Debug("Searching and replacing in spec", "section", sectionName, @@ -394,47 +368,124 @@ func (s *Spec) SearchAndReplace(sectionName, packageName, regex, replacement str var updated bool - err = s.Visit(func(ctx *Context) error { - // Make sure this is a section line. - if ctx.Target.TargetType != SectionLineTarget { - return nil + err = s.mutateTree(func(tree *specTree) error { + updated = searchReplaceBlock(tree.root, "", "", sectionName, packageName, compiledRegex, replacement) + + return nil + }) + if err != nil { + return err + } + + if !updated { + return fmt.Errorf( + "pattern %#q not found (section=%#q, package=%#q):\n%w", + regex, sectionName, packageName, ErrPatternNotFound, + ) + } + + return nil +} + +// searchReplaceBlock recursively walks a [block] tree, applying regex replacement +// to every line including macro definitions and conditional directives. Returns +// true if any replacement was made. +// +//nolint:cyclop,gocognit,funlen // Switch over blockKind with recursive calls; splitting would hurt readability. +func searchReplaceBlock( + blk *block, + secName, secPkg string, + filterSection, filterPkg string, + compiledRegex *regexp.Regexp, + replacement string, +) bool { + updated := false + + matchesFilter := (filterSection == "" || filterSection == secName) && + (filterPkg == "" || filterPkg == secPkg) + + switch blk.Kind { + case rootBlock: + for _, child := range blk.Children { + if searchReplaceBlock(child, secName, secPkg, filterSection, filterPkg, compiledRegex, replacement) { + updated = true + } } - // Make sure section name matches (or was omitted). - if sectionName != "" && ctx.CurrentSection.SectName != sectionName { - return nil + case sectionBlock: + // Section header itself is not subject to search-replace; content is. + for _, child := range blk.Children { + if searchReplaceBlock(child, blk.Name, blk.Package, filterSection, filterPkg, compiledRegex, replacement) { + updated = true + } } - // Make sure package name matches (or was omitted). - if packageName != "" && ctx.CurrentSection.Package != packageName { - return nil + case conditionalBlock: + // Replace in the %if header line. + if matchesFilter { + if newHeader := compiledRegex.ReplaceAllLiteralString(blk.Header, replacement); newHeader != blk.Header { + blk.Header = newHeader + updated = true + } } - // Get the line. - line := ctx.Target.Line.Text + // Check if this is a wrapper (contains section headers). If so, the + // else branch's content has ambiguous section context — the section + // established before the wrapper may continue in the else branch + // (common RPM spec pattern). Relax the section filter for the else + // branch text that has no enclosing section. + isWrapper := containsSectionBlocks(blk) - // Try to replace. If no replacements were made, return. - updatedLine := compiledRegex.ReplaceAllLiteralString(line, replacement) - if line == updatedLine { - return nil + // Then-branch children. + for _, child := range blk.Children { + if searchReplaceBlock(child, secName, secPkg, filterSection, filterPkg, compiledRegex, replacement) { + updated = true + } } - ctx.ReplaceLine(updatedLine) + // %else/%elif directive line. + if matchesFilter && blk.ElseDirective != "" { + if newDir := compiledRegex.ReplaceAllLiteralString(blk.ElseDirective, replacement); newDir != blk.ElseDirective { + blk.ElseDirective = newDir + updated = true + } + } - // Note that we've made an update. - updated = true + // Else-branch children. For wrapper conditionals, relax the section + // filter on loose content (text/macros not inside a section block) + // so that content belonging to the preceding section is reachable. + for _, child := range blk.Else { + elseSec, elsePkg := secName, secPkg + if isWrapper && child.Kind != sectionBlock { + elseSec = filterSection + elsePkg = filterPkg + } - return nil - }) + if searchReplaceBlock(child, elseSec, elsePkg, filterSection, filterPkg, compiledRegex, replacement) { + updated = true + } + } - if !updated { - return fmt.Errorf( - "pattern %#q not found (section=%#q, package=%#q):\n%w", - regex, sectionName, packageName, ErrPatternNotFound, - ) + // %endif line. + if matchesFilter && blk.Endif != "" { + if newEndif := compiledRegex.ReplaceAllLiteralString(blk.Endif, replacement); newEndif != blk.Endif { + blk.Endif = newEndif + updated = true + } + } + + case textBlock, macroDefBlock: + if matchesFilter { + for i, line := range blk.Lines { + if newLine := compiledRegex.ReplaceAllLiteralString(line, replacement); newLine != line { + blk.Lines[i] = newLine + updated = true + } + } + } } - return err + return updated } // AddChangelogEntry adds a changelog entry to the spec's changelog section. An error is returned if @@ -443,42 +494,26 @@ func (s *Spec) AddChangelogEntry(user, email, version, release string, time time slog.Debug("Adding changelog entry to spec", "user", user, "email", email, "version", version, "release", release, "details", details) - var updated bool - - err = s.Visit(func(ctx *Context) error { - // Make sure we're in the right section. - if ctx.Target.TargetType != SectionStartTarget { - return nil - } + formattedDate := time.Format("Mon Jan 02 2006") + header := fmt.Sprintf("* %s %s <%s> - %s-%s", formattedDate, user, email, version, release) - if ctx.CurrentSection.SectName != "%changelog" { - return nil - } + lines := []string{header} + for _, detail := range details { + lines = append(lines, "- "+detail) + } - // Insert an entry. - formattedDate := time.Format("Mon Jan 02 2006") - header := fmt.Sprintf("* %s %s <%s> - %s-%s", formattedDate, user, email, version, release) + lines = append(lines, "") - lines := []string{header} - for _, detail := range details { - lines = append(lines, "- "+detail) + return s.mutateTree(func(tree *specTree) error { + sect := tree.Section("%changelog", "") + if sect == nil { + return errors.New("existing changelog section could not be found") } - lines = append(lines, "") - - ctx.InsertLinesAfter(lines) - - // Note that we've made an update. - updated = true + sect.PrependLines(lines) return nil }) - - if !updated { - return errors.New("existing changelog section could not be found") - } - - return err } // ParsePatchTagNumber checks if the given tag name is a PatchN tag (case-insensitive) @@ -577,12 +612,12 @@ func (s *Spec) removePatchTagsMatching(pattern string) (int, error) { removed := 0 err := s.mutateTree(func(tree *specTree) error { - return tree.VisitAllLines(func(secName, _ string, lh *lineHandle) error { + return tree.VisitAllLines(func(secName, _ string, line *lineHandle) error { if !isTagBearingSection(secName) { return nil } - parsedTag, parsedValue, isTag := parseTagLine(lh.Text) + parsedTag, parsedValue, isTag := parseTagLine(line.Text) if !isTag { return nil } @@ -597,7 +632,7 @@ func (s *Spec) removePatchTagsMatching(pattern string) (int, error) { } if matched { - lh.Remove() + line.Remove() removed++ } @@ -614,32 +649,31 @@ func (s *Spec) removePatchTagsMatching(pattern string) (int, error) { func (s *Spec) removePatchlistEntriesMatching(pattern string) (int, error) { removed := 0 - err := s.Visit(func(ctx *Context) error { - if ctx.Target.TargetType != SectionLineTarget { - return nil - } - - if ctx.CurrentSection.SectName != "%patchlist" { + err := s.mutateTree(func(tree *specTree) error { + sect := tree.Section("%patchlist", "") + if sect == nil { return nil } - line := strings.TrimSpace(ctx.Target.Line.Text) - if line == "" { - return nil - } + return sect.VisitLines(func(line *lineHandle) error { + trimmed := strings.TrimSpace(line.Text) + if trimmed == "" { + return nil + } - matched, err := doublestar.Match(pattern, line) - if err != nil { - return fmt.Errorf("failed to match glob pattern %#q against %#q:\n%w", pattern, line, err) - } + matched, matchErr := doublestar.Match(pattern, trimmed) + if matchErr != nil { + return fmt.Errorf("failed to match glob pattern %#q against %#q:\n%w", pattern, trimmed, matchErr) + } - if matched { - ctx.RemoveLine() + if matched { + line.Remove() - removed++ - } + removed++ + } - return nil + return nil + }) }) return removed, err @@ -655,12 +689,12 @@ func (s *Spec) GetHighestPatchTagNumber() (int, error) { unnumberedCount := 0 err := s.inspectTree(func(tree *specTree) error { - return tree.VisitAllLines(func(secName, _ string, lh *lineHandle) error { + return tree.VisitAllLines(func(secName, _ string, line *lineHandle) error { if !isTagBearingSection(secName) { return nil } - parsedTag, _, isTag := parseTagLine(lh.Text) + parsedTag, _, isTag := parseTagLine(line.Text) if !isTag { return nil } @@ -750,84 +784,6 @@ func (s *Spec) RemoveSubpackage(packageName string) error { }) } -// sectionLineRange identifies a half-open `[start, end)` range of raw line numbers -// covering one section, from its header line through (but not including) the start -// of the next section. -type sectionLineRange struct { - start int - end int -} - -// collectSectionRanges walks the spec and returns one [sectionLineRange] for every -// section whose `(sectName, packageName)` pair satisfies the predicate, in the order -// they appear in the spec. -// -// Each returned range is adjusted to maintain conditional balance: if a range would -// include trailing `%if` or `%endif` lines that create a nesting imbalance, those -// lines are trimmed from the range so that removing the range does not break the -// spec's conditional structure. If a conditional block is interleaved with section -// content in a way that cannot be resolved by trimming, an [ErrConditionalSpansSections] -// error is returned. -func (s *Spec) collectSectionRanges( - matches func(sectName, packageName string) bool, -) ([]sectionLineRange, error) { - var ( - ranges []sectionLineRange - curStart = -1 - ) - - err := s.Visit(func(ctx *Context) error { - matched := matches(ctx.CurrentSection.SectName, ctx.CurrentSection.Package) - - //nolint:exhaustive // We intentionally only react to section boundaries. - switch ctx.Target.TargetType { - case SectionStartTarget: - if matched { - curStart = ctx.CurrentLineNum - } - case SectionEndTarget: - if matched && curStart >= 0 { - ranges = append(ranges, sectionLineRange{start: curStart, end: ctx.CurrentLineNum}) - curStart = -1 - } - } - - return nil - }) - - // Defensive fallback: today [Spec.Visit] always emits a trailing SectionEndTarget at - // EOF, so this branch is unreachable. We keep it so that this helper does not silently - // misbehave if that invariant ever changes (a section running to EOF would otherwise - // be silently dropped from the result). - if curStart >= 0 { - ranges = append(ranges, sectionLineRange{start: curStart, end: len(s.rawLines)}) - } - - // Skip conditional balancing when no matching ranges were found, so callers - // get the expected empty-result / not-found behavior rather than a conditional - // parse error from an unrelated part of the spec. - if len(ranges) == 0 { - return ranges, err - } - - // Balance each range to avoid breaking conditional nesting. - pairs, pairErr := collectConditionalPairs(s.rawLines) - if pairErr != nil { - return nil, fmt.Errorf("failed to parse conditional structure:\n%w", pairErr) - } - - for idx := range ranges { - balanced, balanceErr := balanceRange(ranges[idx], s.rawLines, pairs) - if balanceErr != nil { - return nil, balanceErr - } - - ranges[idx] = balanced - } - - return ranges, err -} - // conditionalPair represents a matched `%if`/`%endif` pair by their line numbers. type conditionalPair struct { ifLine int @@ -835,8 +791,14 @@ type conditionalPair struct { } // collectConditionalPairs walks the raw lines and returns all matched `%if`/`%endif` -// pairs using a stack. Nested pairs are properly matched. Returns an error if there -// are unmatched `%if` or `%endif` directives. +// pairs using a stack. Nested pairs are properly matched. Lines inside +// macro definition continuations are skipped — `%if`/`%endif` that appear +// inside multi-line `%define`/`%global` bodies (e.g. `%define foo() \` … +// `%if …\` … `%endif\`) are RPM macro body text, not structural conditionals. +// However, `%if`/`%endif` inside general shell continuations (e.g. +// `configure \` … `%if …` … `%endif`) ARE structural — RPM evaluates them +// as preprocessor directives before shell interpretation. Returns an error if +// there are unmatched `%if` or `%endif` directives. func collectConditionalPairs(rawLines []string) ([]conditionalPair, error) { var ( pairs []conditionalPair @@ -852,6 +814,8 @@ func collectConditionalPairs(rawLines []string) ([]conditionalPair, error) { continue } + // Only skip continuations that start from a %define/%global line — + // those are macro body text where %if/%endif are not structural. if _, isMacro := isMacroDefLine(line); isMacro && strings.HasSuffix(line, "\\") { inMacroCont = true @@ -879,148 +843,3 @@ func collectConditionalPairs(rawLines []string) ([]conditionalPair, error) { return pairs, nil } - -// balanceRange adjusts a section line range so that removing it does not leave -// unbalanced `%if`/`%endif` directives in the spec. It uses pre-computed conditional -// pairs to identify straddling conditionals — pairs where one half is inside the -// range and the other half is outside. -// -// Straddling conditional lines inside the range are excluded (the range is trimmed -// so they remain in the spec). This handles: -// - Trailing `%endif` from a wrapping conditional: excluded, leaving an empty -// `%if`/`%endif` wrapper. -// - Trailing `%if` belonging to the next section: excluded, keeping the next -// section's conditional intact. -// - Balanced pairs fully inside the range: removed along with the section content. -// -// If a straddling conditional is interleaved with real section content (not just -// other conditional directives and blank lines), an [ErrConditionalSpansSections] -// error is returned. -func balanceRange(sectionRange sectionLineRange, rawLines []string, pairs []conditionalPair) (sectionLineRange, error) { - // Find the earliest straddling line inside the range and validate that no - // straddling %if has real content after it. A pair straddles if exactly one - // of its lines falls within [sectionRange.start, sectionRange.end). - trimmed := sectionRange.end - - for _, pair := range pairs { - ifInside := pair.ifLine >= sectionRange.start && pair.ifLine < sectionRange.end - endifInside := pair.endifLine >= sectionRange.start && pair.endifLine < sectionRange.end - - if ifInside == endifInside { - // Both inside (fully contained) or both outside (irrelevant). - continue - } - - // Straddling: the line that's inside our range should be excluded. - var insideLine int - if ifInside { - insideLine = pair.ifLine - } else { - insideLine = pair.endifLine - } - - if insideLine < trimmed { - trimmed = insideLine - } - - // If the straddling line is an %if (opener inside, closer outside), - // check for real content between the %if and the range end. Such content - // would belong to this section but span into the next via the conditional. - if ifInside { - if err := validateNoContentAfter(pair.ifLine, sectionRange.end, rawLines); err != nil { - return sectionRange, fmt.Errorf( - "section at lines %d-%d has a conditional block that spans into the next section; "+ - "use a spec-search-replace overlay to adjust conditionals before removing:\n%w", - sectionRange.start+1, sectionRange.end, ErrConditionalSpansSections, - ) - } - } - } - - // Check for %else/%elif branch directives that would be broken by the removal. - if err := validateNoBranchDirectivesInExternalConditional(sectionRange, rawLines, pairs); err != nil { - return sectionRange, err - } - - if trimmed == sectionRange.end { - // No straddling pairs — range is already balanced. - return sectionRange, nil - } - - // Validate: the trimmed zone [trimmed, sectionRange.end) will remain in the spec. - // If it contains real section content (not just conditional directives and blanks), - // we'd be leaving behind part of the section the caller asked to remove. - if err := validateNoContentAfter(trimmed-1, sectionRange.end, rawLines); err != nil { - return sectionRange, fmt.Errorf( - "section at lines %d-%d has a conditional block that spans into the next section; "+ - "use a spec-search-replace overlay to adjust conditionals before removing:\n%w", - sectionRange.start+1, sectionRange.end, ErrConditionalSpansSections, - ) - } - - return sectionLineRange{start: sectionRange.start, end: trimmed}, nil -} - -// validateNoContentAfter checks that there is no real section content (non-blank, -// non-conditional lines) between startLine and endLine. Returns an error if any -// such content is found. -func validateNoContentAfter(startLine, endLine int, rawLines []string) error { - for lineNum := startLine + 1; lineNum < endLine; lineNum++ { - if !isBlankOrComment(rawLines[lineNum]) && conditionalDepthChange(rawLines[lineNum]) == 0 { - return fmt.Errorf("real content found at line %d", lineNum+1) - } - } - - return nil -} - -// validateNoBranchDirectivesInExternalConditional checks that the section range -// does not contain any `%else`/`%elif` branch directives whose enclosing -// `%if`/`%endif` pair extends beyond the range. Removing such a branch directive -// while keeping the enclosing conditional would change which branch is active. -func validateNoBranchDirectivesInExternalConditional( - sectionRange sectionLineRange, - rawLines []string, - pairs []conditionalPair, -) error { - for lineNum := sectionRange.start; lineNum < sectionRange.end; lineNum++ { - if !isConditionalBranchDirective(rawLines[lineNum]) { - continue - } - - for _, pair := range pairs { - if pair.ifLine <= lineNum && pair.endifLine >= lineNum { - pairFullyInside := pair.ifLine >= sectionRange.start && pair.endifLine < sectionRange.end - - if !pairFullyInside { - return fmt.Errorf( - "section at lines %d-%d contains a %%else/%%elif branch directive inside a "+ - "conditional block that extends beyond the section boundary; "+ - "use a spec-search-replace overlay to adjust conditionals before removing:\n%w", - sectionRange.start+1, sectionRange.end, ErrConditionalSpansSections, - ) - } - - break - } - } - } - - return nil -} - -// isBlankOrComment returns true if the line is empty, whitespace-only, or a comment. -func isBlankOrComment(line string) bool { - trimmed := strings.TrimSpace(line) - - return trimmed == "" || strings.HasPrefix(trimmed, "#") -} - -// removeRanges deletes the given line ranges from the spec. Ranges must be -// non-overlapping and in ascending order (as produced by [Spec.collectSectionRanges]); -// they are removed from last to first so earlier indices remain valid. -func (s *Spec) removeRanges(ranges []sectionLineRange) { - for i := len(ranges) - 1; i >= 0; i-- { - s.RemoveLines(ranges[i].start, ranges[i].end) - } -} diff --git a/internal/rpm/spec/spec.go b/internal/rpm/spec/spec.go index 1ab04206..0b199d84 100644 --- a/internal/rpm/spec/spec.go +++ b/internal/rpm/spec/spec.go @@ -7,7 +7,6 @@ import ( "bufio" "fmt" "io" - "regexp" "slices" "strings" ) @@ -136,20 +135,6 @@ func (*RawLine) GetType() ParsedLineType { return Raw } -type parseState struct { - currentSect SectionTarget -} - -func newParseState() parseState { - return parseState{ - currentSect: SectionTarget{ - SectType: PackageSection, - SectName: "", - Package: "", - }, - } -} - // OpenSpec reads in the contents of an RPM spec file from the provided reader, returning a [Spec] object. // An error is returned if the reader cannot be fully read (e.g., I/O error or line exceeds buffer size). func OpenSpec(reader io.Reader) (*Spec, error) { @@ -207,110 +192,6 @@ func (s *Spec) InsertLinesAt(insertedLines []string, lineNumber int) { s.rawLines = slices.Insert(s.rawLines, lineNumber, insertedLines...) } -// Context provides context information to a visitor function when visiting a spec. -type Context struct { - // Target is the current visit target. - Target VisitTarget - // RawLine is the raw text of the current line, if applicable (nil otherwise). - RawLine *string - // CurrentSection is the current section being visited. - CurrentSection SectionTarget - // CurrentLineNum is the current (0-indexed) line number being visited. - CurrentLineNum int - - // parseStateBeforeCurrentLine represents the parse state for this spec as it was - // before parsing the current line. - parseStateBeforeCurrentLine parseState - // parseState represents the parse state for this spec after having parsed the - // current line. - parseState parseState - // nextLineNumToParse is the next (0-indexed) line number that will be parsed - // during visiting; note that not all parsed lines will be send to the visitor, - // though. - nextLineNumToParse int - // nextLineNumToVisit is the next (0-indexed) line number that will be visited. - nextLineNumToVisit int - // spec is the spec being visited. - spec *Spec -} - -// InsertLinesBefore inserts the provided lines just before the line currently being visited, -// updating the context accordingly. The next line to be visited will be the line following -// the current one being visited. -func (ctx *Context) InsertLinesBefore(lines []string) { - ctx.spec.InsertLinesAt(lines, ctx.CurrentLineNum) - - // Account for the displacement from the inserted lines. We will parse the - // new lines but not visit them, nor will we visit the current line again. - // This will require rollback back to the previous parse state first. - ctx.parseState = ctx.parseStateBeforeCurrentLine - ctx.nextLineNumToParse = ctx.CurrentLineNum - ctx.nextLineNumToVisit = ctx.CurrentLineNum + len(lines) + 1 -} - -// InsertLinesAfter inserts the provided lines just after the line currently being visited, -// updating the context accordingly. The next line to be visited will be the line following -// the newly inserted lines. -func (ctx *Context) InsertLinesAfter(lines []string) { - ctx.spec.InsertLinesAt(lines, ctx.CurrentLineNum+1) - - // Skip ahead past the newly inserted lines. - ctx.nextLineNumToParse = ctx.CurrentLineNum + 1 - ctx.nextLineNumToVisit += len(lines) -} - -// RemoveLine removes the line currently being visited, updating the context accordingly. -// The next line to be visited will be the line that followed the removed line. -func (ctx *Context) RemoveLine() { - ctx.spec.RemoveLine(ctx.CurrentLineNum) - - // Account for the removed line. We will reparse the new current line and revisit it. - // This will require rolling back to the previous parse state first. - ctx.parseState = ctx.parseStateBeforeCurrentLine - ctx.nextLineNumToParse = ctx.CurrentLineNum - ctx.nextLineNumToVisit = ctx.CurrentLineNum -} - -// ReplaceLine replaces the line currently being visited with the provided replacement line, -// updating the context accordingly. -func (ctx *Context) ReplaceLine(replacement string) { - ctx.spec.ReplaceLine(ctx.CurrentLineNum, replacement) - - // Account for the replaced line. We will reparse the current line, but not revisit it. - // This will require rolling back to the previous parse state. - ctx.parseState = ctx.parseStateBeforeCurrentLine - ctx.nextLineNumToParse = ctx.CurrentLineNum - ctx.nextLineNumToVisit = ctx.CurrentLineNum + 1 -} - -// VisitTarget encapsulates the current target of a visit operation. -type VisitTarget struct { - // TargetType is the type of the current visit target. - TargetType VisitTargetType - // Optionally, provides more detail about the target's section. - // Left nil when the target is not part of a section. - Section *SectionTarget - // Optionally, provides more detail about the target's line. Left nil - // when the target is not a line. - Line *Line -} - -// VisitTargetType indicates the type of a visit target. -type VisitTargetType string - -const ( - // SpecStartTarget indicates the start of the spec. - SpecStartTarget VisitTargetType = "SpecStart" - // SectionStartTarget indicates the start of a section. - SectionStartTarget VisitTargetType = "SectionStart" - // SectionLineTarget indicates a line within a section. - SectionLineTarget VisitTargetType = "SectionLine" - // SectionEndTarget indicates the end of a section. - SectionEndTarget VisitTargetType = "SectionEnd" - // SpecEndTarget indicates the end of the spec. - SpecEndTarget VisitTargetType = "SpecEnd" -) - // SectionTarget encapsulates information about the current section context. type SectionTarget struct { // SectName is the name of the section, e.g. "%description". @@ -322,199 +203,6 @@ type SectionTarget struct { Package string } -// Visitor is the type of a visitor function that can be passed to [Spec.Visit]. -type Visitor = func(ctx *Context) error - -// Visit walks through the spec, invoking the provided visitor function for each relevant target. -// -// State Management Invariants: -// - nextLineNumToParse: The next 0-indexed line number to parse. May be less than or equal to -// CurrentLineNum when context mutations (InsertLinesBefore, RemoveLine, ReplaceLine) require -// re-parsing the current position. -// - nextLineNumToVisit: The next 0-indexed line number to send to the visitor. A line is visited -// only if CurrentLineNum >= nextLineNumToVisit, allowing mutations to skip visiting newly -// inserted lines or re-visit lines after removal. -// - Context mutation methods update these values to maintain correct traversal after modifications. -// -//nolint:funlen -func (s *Spec) Visit(visitor Visitor) error { - ctx := Context{ - Target: VisitTarget{TargetType: SpecStartTarget}, - CurrentSection: newParseState().currentSect, - CurrentLineNum: 0, - parseState: newParseState(), - parseStateBeforeCurrentLine: newParseState(), - nextLineNumToVisit: 0, - nextLineNumToParse: 1, - spec: s, - } - - // Visit the spec start. - err := visitor(&ctx) - if err != nil { - return err - } - - // Visit the preamble start. - ctx.Target = VisitTarget{ - TargetType: SectionStartTarget, - Section: &ctx.CurrentSection, - } - - err = visitor(&ctx) - if err != nil { - return err - } - - // Go through the lines. - for ctx.CurrentLineNum < len(ctx.spec.rawLines) { - var parsedLine ParsedLine - - rawLine := ctx.spec.rawLines[ctx.CurrentLineNum] - - ctx.parseStateBeforeCurrentLine = ctx.parseState - parsedLine, ctx.parseState = parseSpecLine(rawLine, ctx.parseStateBeforeCurrentLine) - - if _, ok := parsedLine.(*SectionStartLine); ok { - // Visit the end of the preceding section. - ctx.Target = VisitTarget{ - TargetType: SectionEndTarget, - Section: &ctx.CurrentSection, - } - - ctx.RawLine = nil - - // Skip visiting if this line was inserted or we're re-parsing after a mutation. - if ctx.CurrentLineNum >= ctx.nextLineNumToVisit { - err = visitor(&ctx) - if err != nil { - return err - } - } - - // Visit the start of the new section. - ctx.CurrentSection = ctx.parseState.currentSect - ctx.RawLine = &rawLine - ctx.Target = VisitTarget{ - TargetType: SectionStartTarget, - Section: &ctx.CurrentSection, - } - } else { - ctx.RawLine = &rawLine - ctx.Target = VisitTarget{ - TargetType: SectionLineTarget, - Line: &Line{ - Text: rawLine, - Parsed: parsedLine, - }, - } - } - - // Visit the line (if so requested). - if ctx.CurrentLineNum >= ctx.nextLineNumToVisit { - err = visitor(&ctx) - if err != nil { - return err - } - } - - // Move to whatever is the next line that we need to parse; note that it - // may be the same as the previous line number in case that line got removed. - // It's also possible that we won't send the line to the visitor. - ctx.CurrentLineNum = ctx.nextLineNumToParse - - // Update next *next* lines to visit/parse. - ctx.nextLineNumToParse++ - ctx.nextLineNumToVisit = max(ctx.CurrentLineNum, ctx.nextLineNumToVisit) - } - - // Visit the end of the last section. - ctx.Target = VisitTarget{ - TargetType: SectionEndTarget, - Section: &ctx.CurrentSection, - } - - ctx.RawLine = nil - - err = visitor(&ctx) - if err != nil { - return err - } - - // Visit the spec end. - ctx.Target = VisitTarget{TargetType: SpecEndTarget} - ctx.RawLine = nil - - err = visitor(&ctx) - if err != nil { - return err - } - - return nil -} - -func parseSpecLine(physicalText string, state parseState) (ParsedLine, parseState) { - parsedLine := newParsedLine(physicalText, state) - - if sectionStartLine, ok := parsedLine.(*SectionStartLine); ok { - state.currentSect.SectType = sectionStartLine.SectType - state.currentSect.SectName = sectionStartLine.SectName - state.currentSect.Package = getPackageNameForSection(sectionStartLine.SectType, sectionStartLine.Tokens) - } - - return parsedLine, state -} - -func newParsedLine(physicalText string, state parseState) ParsedLine { - logicalLine := physicalText - if strings.HasPrefix(physicalText, "#") { - logicalLine = "" - } - - logicalLine = strings.TrimSpace(logicalLine) - - return parseLogicalLine(logicalLine, state) -} - -var tagRegex = regexp.MustCompile(`^\s*([^\s:]+):\s*(.*?)\s*$`) - -func parseLogicalLine(logicalLine string, state parseState) ParsedLine { - tokens := strings.Fields(logicalLine) - if len(tokens) == 0 { - return &RawLine{} - } - - // See if this appears to be the start of a new section. - if strings.HasPrefix(tokens[0], "%") { - if newSectionType, known := sectionTypesByName[strings.ToLower(tokens[0])]; known { - return &SectionStartLine{ - SectType: newSectionType, - SectName: tokens[0], - Tokens: tokens, - } - } - } - - // Otherwise, if we're currently in a package section, see if this looks like a line that defines - // one or more tags. - if state.currentSect.SectType == PackageSection { - const reSubmatchCount = 3 - - matches := tagRegex.FindStringSubmatch(logicalLine) - if len(matches) == reSubmatchCount { - return &TagLine{ - Tag: matches[1], - Value: matches[2], - } - } - } - - // This doesn't appear to be the start of a section, nor a tag definition; treat it as a raw line. - return &RawLine{ - Content: logicalLine, - } -} - func getPackageNameForSection(sectionType SectionType, headerTokens []string) string { switch sectionType { case SourceFileListSection: diff --git a/internal/rpm/spec/spec_test.go b/internal/rpm/spec/spec_test.go index 88167abf..b8c242e4 100644 --- a/internal/rpm/spec/spec_test.go +++ b/internal/rpm/spec/spec_test.go @@ -54,12 +54,8 @@ func TestOpenSpec_EmptyInput(t *testing.T) { require.NoError(t, err) // Empty spec is parseable but has no tags. - err = sf.VisitTags(func(_ *spec.TagLine, _ *spec.Context) error { - t.Fatal("no tags should be visited in an empty spec") - - return nil - }) - require.NoError(t, err) + _, err = sf.GetTag("", "Name") + require.ErrorIs(t, err, spec.ErrNoSuchTag) } func TestOpenSpec_BinaryContent(t *testing.T) { diff --git a/internal/rpm/spec/tree_api.go b/internal/rpm/spec/tree_api.go index 00cc35f2..d7c6aaff 100644 --- a/internal/rpm/spec/tree_api.go +++ b/internal/rpm/spec/tree_api.go @@ -3,7 +3,10 @@ package spec -import "fmt" +import ( + "fmt" + "regexp" +) // specTree is an opaque handle wrapping the parsed structural tree of a spec. // Operations on the tree are exposed via methods so callers in edit.go do not @@ -72,19 +75,19 @@ func (t *specTree) HasSection(name string) bool { return hasSectionWithName(t.root, name) } -func hasSectionWithName(b *block, name string) bool { - if b.Kind == sectionBlock && b.Name == name { +func hasSectionWithName(blk *block, name string) bool { + if blk.Kind == sectionBlock && blk.Name == name { return true } - for _, child := range b.Children { + for _, child := range blk.Children { if hasSectionWithName(child, name) { return true } } - if b.Kind == conditionalBlock { - for _, child := range b.Else { + if blk.Kind == conditionalBlock { + for _, child := range blk.Else { if hasSectionWithName(child, name) { return true } @@ -120,6 +123,11 @@ func (t *specTree) handles(blocks []*block) []*sectionHandle { // RemoveSections removes the given sections from the tree. Removal is validated // as a set: if any one removal would orphan content or break a conditional's // semantics, the entire operation fails and the tree is left unmodified. +// +// Macro definitions (`%define` / `%global`) that live inside the removed +// sections but are referenced by surviving content are automatically hoisted +// to the root level just before the first removed section. See +// [hoistReferencedMacros] for the full behavior. func (t *specTree) RemoveSections(handles []*sectionHandle) error { blocks := make([]*block, len(handles)) for i, h := range handles { @@ -130,6 +138,8 @@ func (t *specTree) RemoveSections(handles []*sectionHandle) error { return err } + hoistReferencedMacros(t.root, blocks) + for _, b := range blocks { removeBlockFromParent(t.root, b) } @@ -225,48 +235,50 @@ func (h *sectionHandle) VisitLines(visit func(lh *lineHandle) error) error { return visitErr } -// collectAndVisitLines walks b, calls visit on every text-line, and records +// collectAndVisitLines walks blk, calls visit on every text-line, and records // each handle for later mutation flushing. +// +//nolint:cyclop // Switch over blockKind with a small recursive call per kind; splitting hurts readability. func collectAndVisitLines( - b *block, + blk *block, secName, secPkg string, visit func(string, string, *lineHandle) error, handles *[]*lineHandle, ) error { - switch b.Kind { + switch blk.Kind { case rootBlock: - for _, child := range b.Children { + for _, child := range blk.Children { if err := collectAndVisitLines(child, secName, secPkg, visit, handles); err != nil { return err } } case sectionBlock: - for _, child := range b.Children { - if err := collectAndVisitLines(child, b.Name, b.Package, visit, handles); err != nil { + for _, child := range blk.Children { + if err := collectAndVisitLines(child, blk.Name, blk.Package, visit, handles); err != nil { return err } } case conditionalBlock: - for _, child := range b.Children { + for _, child := range blk.Children { if err := collectAndVisitLines(child, secName, secPkg, visit, handles); err != nil { return err } } - for _, child := range b.Else { + for _, child := range blk.Else { if err := collectAndVisitLines(child, secName, secPkg, visit, handles); err != nil { return err } } case textBlock: - for i, line := range b.Lines { - lh := &lineHandle{Text: line, block: b, idx: i} - *handles = append(*handles, lh) + for i, line := range blk.Lines { + handle := &lineHandle{Text: line, block: blk, idx: i} + *handles = append(*handles, handle) - if err := visit(secName, secPkg, lh); err != nil { + if err := visit(secName, secPkg, handle); err != nil { return err } } @@ -283,13 +295,13 @@ func collectAndVisitLines( // invalidate the indices of yet-to-be-applied operations. func flushLineMutations(handles []*lineHandle) { for i := len(handles) - 1; i >= 0; i-- { - h := handles[i] + handle := handles[i] switch { - case h.removed: - h.block.Lines = append(h.block.Lines[:h.idx], h.block.Lines[h.idx+1:]...) - case h.replaced: - h.block.Lines[h.idx] = h.newText + case handle.removed: + handle.block.Lines = append(handle.block.Lines[:handle.idx], handle.block.Lines[handle.idx+1:]...) + case handle.replaced: + handle.block.Lines[handle.idx] = handle.newText } } } @@ -378,13 +390,13 @@ func (h *sectionHandle) findTagInsertAnchor(family string) *tagInsertAnchor { for _, child := range h.block.Children { switch child.Kind { case textBlock: - for i, line := range child.Lines { + for tagLineIdx, line := range child.Lines { tag, _, isTag := parseTagLine(line) if !isTag { continue } - anchor := &tagInsertAnchor{inTextBlock: child, lineIdx: i} + anchor := &tagInsertAnchor{inTextBlock: child, lineIdx: tagLineIdx} lastAny = anchor if tagFamily(tag) == family { @@ -433,10 +445,10 @@ func scanConditionalForTags(cond *block, family string) (hasAny, hasFamily bool) return hasAny, hasFamily } -func scanForTags(b *block, family string) (hasAny, hasFamily bool) { - switch b.Kind { +func scanForTags(blk *block, family string) (hasAny, hasFamily bool) { + switch blk.Kind { case textBlock: - for _, line := range b.Lines { + for _, line := range blk.Lines { tag, _, isTag := parseTagLine(line) if !isTag { continue @@ -450,7 +462,7 @@ func scanForTags(b *block, family string) (hasAny, hasFamily bool) { } case conditionalBlock: - return scanConditionalForTags(b, family) + return scanConditionalForTags(blk, family) case rootBlock, sectionBlock, macroDefBlock: // Not searched for tags here. @@ -459,6 +471,9 @@ func scanForTags(b *block, family string) (hasAny, hasFamily bool) { return hasAny, hasFamily } +// tagRegex matches RPM tag lines in the form "Name: value". +var tagRegex = regexp.MustCompile(`^\s*([^\s:]+):\s*(.*?)\s*$`) + // parseTagLine attempts to parse line as an RPM tag line ("Name: value"). // Returns the tag name and value, or ok=false if line is not a tag. func parseTagLine(line string) (tag, value string, ok bool) { @@ -472,12 +487,18 @@ func parseTagLine(line string) (tag, value string, ok bool) { return matches[1], matches[2], true } +// packageSectionName is the canonical section name for sub-package definitions +// (the `%package ` directive). The preamble (empty section name) and these +// sections are the only places where tag-style lines (`Foo: bar`) carry semantic +// meaning; script-style sections such as `%build` may contain lines that match +// the tag regex but are not actually tags. +const packageSectionName = "%package" + // isTagBearingSection reports whether a section keyword can legally hold RPM // tag declarations (e.g. "Name:", "Source0:"). Only the preamble (empty name) // and "%package" sections qualify. Script-style sections like "%build" may // contain shell that happens to match the "word: word" pattern; we must avoid // treating those as tags. func isTagBearingSection(secName string) bool { - return secName == "" || secName == "%package" + return secName == "" || secName == packageSectionName } - From 23b48908891b5e36aef551f695c41877ac181567 Mon Sep 17 00:00:00 2001 From: Thien Trung Vuong Date: Fri, 29 May 2026 02:35:39 +0000 Subject: [PATCH 7/9] test(spec): add curated testdata fixtures + synthetic round-trip stress Stand up 13 hand-crafted minimized .spec fixtures under internal/rpm/spec/testdata/specs/ distilling real-world patterns. testdata_test.go drives fixtures through two tiers: - Tier 1: lossless round-trip (byte-for-byte identity). - Tier 3: per-pattern edit-operation regression tests. Synthetic generator composes primitives into random spec inputs (64 round-trip + 32 AddTag seeds). Also includes SearchAndReplace tests for macro definitions, conditional headers, and wrapper else-branch section filter. --- internal/rpm/spec/edit_test.go | 491 ++++++++++++--- .../specs/comment-only-conditional.spec | 32 + .../rpm/spec/testdata/specs/elif-chain.spec | 39 ++ .../testdata/specs/elif-with-sections.spec | 52 ++ .../testdata/specs/if-with-continuation.spec | 34 ++ .../testdata/specs/macro-conditional.spec | 44 ++ .../testdata/specs/macro-continuation.spec | 33 + .../testdata/specs/macro-with-parameters.spec | 35 ++ .../testdata/specs/multi-package-mixed.spec | 69 +++ .../spec/testdata/specs/nested-wrappers.spec | 43 ++ .../specs/script-section-tag-shaped.spec | 43 ++ .../testdata/specs/straddling-wrapper.spec | 30 + .../specs/subpackage-define-referenced.spec | 39 ++ .../specs/subpackage-define-unreferenced.spec | 36 ++ internal/rpm/spec/testdata_test.go | 565 ++++++++++++++++++ 15 files changed, 1502 insertions(+), 83 deletions(-) create mode 100644 internal/rpm/spec/testdata/specs/comment-only-conditional.spec create mode 100644 internal/rpm/spec/testdata/specs/elif-chain.spec create mode 100644 internal/rpm/spec/testdata/specs/elif-with-sections.spec create mode 100644 internal/rpm/spec/testdata/specs/if-with-continuation.spec create mode 100644 internal/rpm/spec/testdata/specs/macro-conditional.spec create mode 100644 internal/rpm/spec/testdata/specs/macro-continuation.spec create mode 100644 internal/rpm/spec/testdata/specs/macro-with-parameters.spec create mode 100644 internal/rpm/spec/testdata/specs/multi-package-mixed.spec create mode 100644 internal/rpm/spec/testdata/specs/nested-wrappers.spec create mode 100644 internal/rpm/spec/testdata/specs/script-section-tag-shaped.spec create mode 100644 internal/rpm/spec/testdata/specs/straddling-wrapper.spec create mode 100644 internal/rpm/spec/testdata/specs/subpackage-define-referenced.spec create mode 100644 internal/rpm/spec/testdata/specs/subpackage-define-unreferenced.spec create mode 100644 internal/rpm/spec/testdata_test.go diff --git a/internal/rpm/spec/edit_test.go b/internal/rpm/spec/edit_test.go index 1c63b806..f231ad23 100644 --- a/internal/rpm/spec/edit_test.go +++ b/internal/rpm/spec/edit_test.go @@ -871,6 +871,116 @@ func TestSearchAndReplace(t *testing.T) { require.Equal(t, expected, actual.String()) }) + + t.Run("replace in macro definition", func(t *testing.T) { + input := ` +%global with_doc 1 +Name: test + +%build +make +` + + specFile, err := spec.OpenSpec(strings.NewReader(input)) + require.NoError(t, err) + + expected := strings.ReplaceAll(input, "%global with_doc 1", "%global with_doc 0") + + err = specFile.SearchAndReplace("", "", `%global with_doc 1`, "%global with_doc 0") + require.NoError(t, err) + + actual := new(bytes.Buffer) + + err = specFile.Serialize(actual) + require.NoError(t, err) + + require.Equal(t, expected, actual.String()) + }) + + t.Run("replace in conditional header", func(t *testing.T) { + input := ` +Name: test + +%if 0%{?fedora} +BuildRequires: fedora-only +%endif + +%build +make +` + + specFile, err := spec.OpenSpec(strings.NewReader(input)) + require.NoError(t, err) + + expected := strings.ReplaceAll(input, "%if 0%{?fedora}", "%if 0%{?rhel}") + + err = specFile.SearchAndReplace("", "", `^%if 0%\{\?fedora\}$`, "%if 0%{?rhel}") + require.NoError(t, err) + + actual := new(bytes.Buffer) + + err = specFile.Serialize(actual) + require.NoError(t, err) + + require.Equal(t, expected, actual.String()) + }) + + t.Run("replace in multi-line macro definition", func(t *testing.T) { + input := ` +%global cfg_content --gcc-triple=%{_target_cpu}-redhat-linux \ + --extra-flag=old +Name: test + +%build +make +` + + specFile, err := spec.OpenSpec(strings.NewReader(input)) + require.NoError(t, err) + + expected := strings.ReplaceAll(input, "redhat-linux", "azl-linux") + + err = specFile.SearchAndReplace("", "", `redhat-linux`, "azl-linux") + require.NoError(t, err) + + actual := new(bytes.Buffer) + + err = specFile.Serialize(actual) + require.NoError(t, err) + + require.Equal(t, expected, actual.String()) + }) + + t.Run("replace in wrapper else branch with section filter", func(t *testing.T) { + input := ` +Name: test + +%files +/usr/bin/test + +%ifarch x86_64 +%files nonlinux +%{_datadir}/syslinux/*.exe +%else +%exclude %{_datadir}/syslinux/*.exe +%endif +` + + specFile, err := spec.OpenSpec(strings.NewReader(input)) + require.NoError(t, err) + + expected := strings.ReplaceAll(input, "%exclude %{_datadir}/syslinux/*.exe", "") + + err = specFile.SearchAndReplace("%files", "", `^%exclude %\{_datadir\}/syslinux/\*\.exe$`, "") + require.NoError(t, err) + + actual := new(bytes.Buffer) + + err = specFile.Serialize(actual) + require.NoError(t, err) + + require.Equal(t, expected, actual.String()) + }) } func TestAddChangelogEntry(t *testing.T) { @@ -1101,6 +1211,209 @@ Name: test err = specFile.AppendLinesToSection("%description", "", []string{"New line"}) require.Error(t, err) }) + + // ---- Conditional boundary tests ---- + // + // These tests document AppendLinesToSection behavior at conditional boundaries. + // When a section is followed by a %if wrapper that contains the next section, + // the tree parser correctly identifies the wrapper boundary. Appended lines + // land at the end of the section body, before the wrapper. + + t.Run("appends before conditional wrapping next section", func(t *testing.T) { + // The %if wraps %install, not %build. "echo done" belongs in %build. + input := ` +Name: test + +%build +make + +%if %{with_docs} +%install +install.sh +%endif +` + specFile, err := spec.OpenSpec(strings.NewReader(input)) + require.NoError(t, err) + + err = specFile.AppendLinesToSection("%build", "", []string{"echo done"}) + require.NoError(t, err) + + actual := new(bytes.Buffer) + + err = specFile.Serialize(actual) + require.NoError(t, err) + + // Tree-based code correctly places "echo done" before the wrapper %if. + assert.Equal(t, ` +Name: test + +%build +make + +echo done +%if %{with_docs} +%install +install.sh +%endif +`, actual.String()) + }) + + t.Run("appends before nested conditionals wrapping next section", func(t *testing.T) { + input := ` +Name: test + +%build +make + +%if %{with_docs} +%ifarch x86_64 +%install +install.sh +%endif +%endif +` + specFile, err := spec.OpenSpec(strings.NewReader(input)) + require.NoError(t, err) + + err = specFile.AppendLinesToSection("%build", "", []string{"echo done"}) + require.NoError(t, err) + + actual := new(bytes.Buffer) + + err = specFile.Serialize(actual) + require.NoError(t, err) + + // Tree-based code correctly places "echo done" before the outer wrapper. + assert.Equal(t, ` +Name: test + +%build +make + +echo done +%if %{with_docs} +%ifarch x86_64 +%install +install.sh +%endif +%endif +`, actual.String()) + }) + + t.Run("appends correctly when section has own balanced conditional", func(t *testing.T) { + // %if/%endif is fully within %build, so the boundary is correct. + input := ` +Name: test + +%build +%if %{with_docs} +make docs +%endif +make + +%install +install.sh +` + specFile, err := spec.OpenSpec(strings.NewReader(input)) + require.NoError(t, err) + + err = specFile.AppendLinesToSection("%build", "", []string{"echo done"}) + require.NoError(t, err) + + actual := new(bytes.Buffer) + + err = specFile.Serialize(actual) + require.NoError(t, err) + + assert.Equal(t, ` +Name: test + +%build +%if %{with_docs} +make docs +%endif +make + +echo done +%install +install.sh +`, actual.String()) + }) + + t.Run("appends correctly when no conditionals at boundary", func(t *testing.T) { + input := ` +Name: test + +%build +make + +%install +install.sh +` + specFile, err := spec.OpenSpec(strings.NewReader(input)) + require.NoError(t, err) + + err = specFile.AppendLinesToSection("%build", "", []string{"echo done"}) + require.NoError(t, err) + + actual := new(bytes.Buffer) + + err = specFile.Serialize(actual) + require.NoError(t, err) + + assert.Equal(t, ` +Name: test + +%build +make + +echo done +%install +install.sh +`, actual.String()) + }) + + t.Run("appends before preamble conditional wrapping next section", func(t *testing.T) { + // In the preamble, a trailing %if wrapper contains %description. + // The tree parser correctly identifies the wrapper boundary. + input := ` +Name: test +Source0: test.tar.gz + +%if %{with_docs} +%description +A test package. +%endif + +%build +make +` + specFile, err := spec.OpenSpec(strings.NewReader(input)) + require.NoError(t, err) + + err = specFile.AppendLinesToSection("", "", []string{"Vendor: Microsoft"}) + require.NoError(t, err) + + actual := new(bytes.Buffer) + + err = specFile.Serialize(actual) + require.NoError(t, err) + + // Tree-based code correctly places "Vendor: Microsoft" before the wrapper. + assert.Equal(t, ` +Name: test +Source0: test.tar.gz + +Vendor: Microsoft +%if %{with_docs} +%description +A test package. +%endif + +%build +make +`, actual.String()) + }) } func TestHasSection(t *testing.T) { @@ -1411,105 +1724,105 @@ func TestParsePatchTagNumber(t *testing.T) { } } -func TestVisitTags(t *testing.T) { - input := `Name: main-pkg +func TestContinuationSuppressesStructuralParsing(t *testing.T) { + t.Run("section keyword in continuation body is not a section start", func(t *testing.T) { + input := `Name: test Version: 1.0 -Patch0: main.patch -%package devel -Summary: Development files -Patch1: devel.patch +%description +A package. -%package -n other -Summary: Other package -Patch2: other.patch +%install +echo \ +%files \ +done + +%files +/usr/bin/test ` + specFile, err := spec.OpenSpec(strings.NewReader(input)) + require.NoError(t, err) - tests := []struct { - name string - expectedTags []string - }{ - { - name: "visits tags across all packages", - expectedTags: []string{"Name", "Version", "Patch0", "Summary", "Patch1", "Summary", "Patch2"}, - }, - } + // %files inside the continuation should NOT be treated as a section start. + // Only one real %files section should exist. + found, err := specFile.HasSection("%files") + require.NoError(t, err) + assert.True(t, found, "real %%files section should be found") + }) - for _, testCase := range tests { - t.Run(testCase.name, func(t *testing.T) { - sf, err := spec.OpenSpec(strings.NewReader(input)) - require.NoError(t, err) + t.Run("tag-like line in continuation body is not a tag", func(t *testing.T) { + input := `Name: test +Version: 1.0 - var tags []string +%description +A package. - err = sf.VisitTags(func(tagLine *spec.TagLine, _ *spec.Context) error { - tags = append(tags, tagLine.Tag) +%install +echo \ +Name: fake \ +done +` + specFile, err := spec.OpenSpec(strings.NewReader(input)) + require.NoError(t, err) - return nil - }) - require.NoError(t, err) - assert.Equal(t, testCase.expectedTags, tags) - }) - } -} + // The "Name: fake" inside the continuation should not be found as a tag. + // GetTag should return the real "Name: test" from the preamble. + value, err := specFile.GetTag("", "Name") + require.NoError(t, err) + assert.Equal(t, "test", value, "should find the real Name tag, not the continuation body") + }) -func TestVisitTagsPackage(t *testing.T) { - input := `Name: main-pkg + t.Run("normal parsing resumes after continuation ends", func(t *testing.T) { + input := `Name: test Version: 1.0 -Patch0: main.patch -%package devel -Summary: Development files -Patch1: devel.patch +%description +A package. -%package -n other -Summary: Other package -Patch2: other.patch +%build +echo \ +%install \ +done + +%install +make install + +%files +/usr/bin/test ` + specFile, err := spec.OpenSpec(strings.NewReader(input)) + require.NoError(t, err) - tests := []struct { - name string - packageName string - expectedTags []string - }{ - { - name: "global package only", - packageName: "", - expectedTags: []string{"Name", "Version", "Patch0"}, - }, - { - name: "devel sub-package only", - packageName: "devel", - expectedTags: []string{"Summary", "Patch1"}, - }, - { - name: "other sub-package only", - packageName: "other", - expectedTags: []string{"Summary", "Patch2"}, - }, - { - name: "non-existing package returns no tags", - packageName: "nonexistent", - expectedTags: nil, - }, - } + found, err := specFile.HasSection("%install") + require.NoError(t, err) + assert.True(t, found, "real %%install section after continuation should be found") - for _, testCase := range tests { - t.Run(testCase.name, func(t *testing.T) { - sf, err := spec.OpenSpec(strings.NewReader(input)) - require.NoError(t, err) + found, err = specFile.HasSection("%files") + require.NoError(t, err) + assert.True(t, found, "%%files section should be found") + }) - var tags []string + t.Run("chained multi-line continuation", func(t *testing.T) { + input := `Name: test +Version: 1.0 - err = sf.VisitTagsPackage(testCase.packageName, func(tagLine *spec.TagLine, _ *spec.Context) error { - tags = append(tags, tagLine.Tag) +%build +echo \ +%description \ +%files \ +%install \ +done +` + sf, err := spec.OpenSpec(strings.NewReader(input)) + require.NoError(t, err) - return nil - }) + // None of the keywords in the continuation chain should create sections. + for _, sect := range []string{"%description", "%files", "%install"} { + found, err := sf.HasSection(sect) require.NoError(t, err) - assert.Equal(t, testCase.expectedTags, tags) - }) - } + assert.False(t, found, "%%s in continuation chain should not be a section", sect) + } + }) } func TestRemoveSection(t *testing.T) { @@ -1960,7 +2273,7 @@ Main. errorContains: "conditional block spans", }, { - name: "errors on else branch inside straddling conditional", + name: "removes section from one branch while else branch retains sections", input: `Name: test %description @@ -1977,9 +2290,21 @@ Main. %files /usr/bin/test `, - packageName: "foo", - errorExpected: true, - errorContains: "branch directive", + packageName: "foo", + expectedOutput: `Name: test + +%description +Main. + +%if cond +%else +%files bar +/usr/share/bar +%endif + +%files +/usr/bin/test +`, }, { name: "errors when trimmed zone contains section content in a balanced conditional", diff --git a/internal/rpm/spec/testdata/specs/comment-only-conditional.spec b/internal/rpm/spec/testdata/specs/comment-only-conditional.spec new file mode 100644 index 00000000..be663d10 --- /dev/null +++ b/internal/rpm/spec/testdata/specs/comment-only-conditional.spec @@ -0,0 +1,32 @@ +Name: comment-only-conditional +Version: 1.0 +Release: 1 +Summary: %%if blocks whose entire body is comments and blank lines +License: MIT + +%if 0%{?with_future} +# Reserved for the upcoming foo backend. +# Empty until upstream finalizes the API. + +# Track: https://example.invalid/issues/42 +%endif + +%description +Fixture: a top-level conditional whose body contains only RPM-spec comments +and blank lines, plus a guard inside a script section with the same shape. + +%build +%if 0%{?with_future} +# TODO(future): wire up the foo backend once it lands upstream. +%endif +make + +%install +make install DESTDIR=%{buildroot} + +%files +/usr/bin/comment-only-conditional + +%changelog +* Thu Jan 01 1970 Builder - 1.0-1 +- Initial fixture. diff --git a/internal/rpm/spec/testdata/specs/elif-chain.spec b/internal/rpm/spec/testdata/specs/elif-chain.spec new file mode 100644 index 00000000..7c950765 --- /dev/null +++ b/internal/rpm/spec/testdata/specs/elif-chain.spec @@ -0,0 +1,39 @@ +Name: elif-chain +Version: 1.0 +Release: 1 +Summary: %%if / %%elif / %%else chain inside preamble +License: MIT + +%if 0%{?rhel} >= 10 +Requires: rhel10-runtime +BuildRequires: rhel10-devel +%elif 0%{?rhel} >= 9 +Requires: rhel9-runtime +BuildRequires: rhel9-devel +%elif 0%{?fedora} >= 40 +Requires: fedora-runtime +BuildRequires: fedora-devel +%elif 0%{?suse_version} +Requires: suse-runtime +BuildRequires: suse-devel +%else +Requires: generic-runtime +BuildRequires: generic-devel +%endif + +%description +Fixture: deep %%elif chain with terminal %%else, content-style conditional +(no section headers in any branch). + +%build +make + +%install +make install DESTDIR=%{buildroot} + +%files +/usr/bin/elif-chain + +%changelog +* Thu Jan 01 1970 Builder - 1.0-1 +- Initial fixture. diff --git a/internal/rpm/spec/testdata/specs/elif-with-sections.spec b/internal/rpm/spec/testdata/specs/elif-with-sections.spec new file mode 100644 index 00000000..d3811051 --- /dev/null +++ b/internal/rpm/spec/testdata/specs/elif-with-sections.spec @@ -0,0 +1,52 @@ +Name: elif-with-sections +Version: 1.0 +Release: 1 +Summary: %%elif branches that each contain entire %%package sections +License: MIT + +%description +Fixture: %%elif chain where every branch (including %%else) introduces a +distinct %%package + %%description + %%files trio. Each conditional branch +acts as a wrapper, not as in-section content. + +%if 0%{?rhel} +%package rhel-extras +Summary: RHEL-specific extras + +%description rhel-extras +Extras only built for RHEL. + +%files rhel-extras +/usr/share/elif-with-sections/rhel +%elif 0%{?fedora} +%package fedora-extras +Summary: Fedora-specific extras + +%description fedora-extras +Extras only built for Fedora. + +%files fedora-extras +/usr/share/elif-with-sections/fedora +%else +%package generic-extras +Summary: Generic extras + +%description generic-extras +Fallback extras for all other distros. + +%files generic-extras +/usr/share/elif-with-sections/generic +%endif + +%build +make + +%install +make install DESTDIR=%{buildroot} + +%files +/usr/bin/elif-with-sections + +%changelog +* Thu Jan 01 1970 Builder - 1.0-1 +- Initial fixture. diff --git a/internal/rpm/spec/testdata/specs/if-with-continuation.spec b/internal/rpm/spec/testdata/specs/if-with-continuation.spec new file mode 100644 index 00000000..c3648167 --- /dev/null +++ b/internal/rpm/spec/testdata/specs/if-with-continuation.spec @@ -0,0 +1,34 @@ +Name: if-with-continuation +Version: 1.0 +Release: 1 +Summary: %%if condition that spans multiple lines via backslash continuation +License: MIT + +%global _is_long_arch \ + 0%{?rhel} >= 9 || \ + 0%{?fedora} >= 40 || \ + 0%{?suse_version} >= 1550 + +%if %{_is_long_arch} && \ + %{undefined disable_long_arch} && \ + "%{_arch}" != "armv7hl" +BuildRequires: long-arch-support +Requires: long-arch-runtime +%endif + +%description +Fixture: backslash-continuation inside an %%if condition itself (not just in +the body) and in a %%global that the condition references. + +%build +make + +%install +make install DESTDIR=%{buildroot} + +%files +/usr/bin/if-with-continuation + +%changelog +* Thu Jan 01 1970 Builder - 1.0-1 +- Initial fixture. diff --git a/internal/rpm/spec/testdata/specs/macro-conditional.spec b/internal/rpm/spec/testdata/specs/macro-conditional.spec new file mode 100644 index 00000000..4ed659db --- /dev/null +++ b/internal/rpm/spec/testdata/specs/macro-conditional.spec @@ -0,0 +1,44 @@ +Name: macro-conditional +Version: 1.0 +Release: 1%{?dist} +Summary: Fixture with %if/%endif inside macro continuation bodies +License: MIT + +# Parameterized macro with %if/%endif in the body (kernel pattern). +# The %if here is RPM macro body text, NOT a structural conditional. +%define kernel_reqprovconf(o) \ +%if %{-o:0}%{!-o:1}\ +Provides: kernel = %{version}-%{release}\ +Provides: %{name} = %{version}-%{release}\ +%endif\ +%{nil} + +# Global macro with conditional (ghc pattern). +%global obsoletes_pkg() \ +%if %{defined old_name}\ +Obsoletes: %{old_name}%{?1:-%1} < %{version}-%{release}\ +Provides: %{old_name}%{?1:-%1} = %{version}-%{release}\ +%endif\ +%{nil} + +# Real structural conditional (should still be parsed). +%if 0%{?fedora} +BuildRequires: fedora-only-dep +%endif + +%description +A spec testing that %if/%endif inside backslash-continued macro +definitions are treated as macro body text, not structural conditionals. + +%build +make %{?_smp_mflags} + +%install +make install DESTDIR=%{buildroot} + +%files +/usr/bin/macro-conditional + +%changelog +* Thu Jan 01 1970 Builder - 1.0-1 +- Initial build. diff --git a/internal/rpm/spec/testdata/specs/macro-continuation.spec b/internal/rpm/spec/testdata/specs/macro-continuation.spec new file mode 100644 index 00000000..7852d1d2 --- /dev/null +++ b/internal/rpm/spec/testdata/specs/macro-continuation.spec @@ -0,0 +1,33 @@ +Name: macro-continuation +Version: 1.0 +Release: 1 +Summary: %%define / %%global with backslash continuation +License: MIT + +%global cmake_flags \ + -DENABLE_FOO=ON \ + -DENABLE_BAR=OFF \ + -DCMAKE_BUILD_TYPE=Release + +%define configure_args \ + --prefix=%{_prefix} \ + --libdir=%{_libdir} \ + --sysconfdir=%{_sysconfdir} + +%description +Fixture: %%define / %%global with backslash continuation lines. + +%build +cmake %{cmake_flags} . +./configure %{configure_args} +make + +%install +make install DESTDIR=%{buildroot} + +%files +/usr/bin/macro-continuation + +%changelog +* Thu Jan 01 1970 Builder - 1.0-1 +- Initial fixture. diff --git a/internal/rpm/spec/testdata/specs/macro-with-parameters.spec b/internal/rpm/spec/testdata/specs/macro-with-parameters.spec new file mode 100644 index 00000000..b0042af2 --- /dev/null +++ b/internal/rpm/spec/testdata/specs/macro-with-parameters.spec @@ -0,0 +1,35 @@ +Name: macro-with-parameters +Version: 1.0 +Release: 1 +Summary: %%define macros that accept positional parameters +License: MIT + +%define uname_suffix() %{?1:+%{1}} +%define uname_variant() %{lua: + local v = rpm.expand("%{?1}") + if v == "" then return "" end + return "-" .. v +} + +%define build_with(opt) \ +%{expand:%%global _with_%{1} --with-%{1}} \ +%global _enable_%{1} 1 + +%description +Fixture: parameterized %%define macros — empty-arg, lua body, and a +multi-line definition that itself expands further %%global calls. + +%build +%{build_with foo} +%{build_with bar} +make %{?_smp_mflags} + +%install +make install DESTDIR=%{buildroot} + +%files +/usr/bin/macro-with-parameters + +%changelog +* Thu Jan 01 1970 Builder - 1.0-1 +- Initial fixture. diff --git a/internal/rpm/spec/testdata/specs/multi-package-mixed.spec b/internal/rpm/spec/testdata/specs/multi-package-mixed.spec new file mode 100644 index 00000000..81dd8903 --- /dev/null +++ b/internal/rpm/spec/testdata/specs/multi-package-mixed.spec @@ -0,0 +1,69 @@ +Name: multi-package-mixed +Version: 1.0 +Release: 1 +Summary: Multiple subpackages mixed with conditionals and macros +License: MIT +URL: https://example.invalid/ +Source0: %{name}-%{version}.tar.gz + +%global commit_id 0123456789abcdef0123456789abcdef01234567 +%define short_commit %(echo %{commit_id} | cut -c1-7) + +%description +Fixture: realistic multi-subpackage layout combining %%package -n +renaming, mixed conditional wrappers, and shared macros. Exercises tag +walks, section enumeration, and per-package filtering against a +non-trivial topology. + +%package devel +Summary: Development files for %{name} +Requires: %{name}%{?_isa} = %{version}-%{release} + +%description devel +Headers and link-time helpers for building against %{name}. + +%package -n lib%{name} +Summary: Runtime library for %{name} +Provides: bundled(%{name}-internal) = %{short_commit} + +%description -n lib%{name} +Just the shared library, suitable for stand-alone consumption. + +%if 0%{?with_docs} +%package doc +Summary: Documentation for %{name} +BuildArch: noarch + +%description doc +HTML and man pages for %{name}, built from the in-tree sources. +%endif + +%prep +%autosetup -n %{name}-%{version} + +%build +%configure +%make_build + +%install +%make_install + +%files +%license LICENSE +/usr/bin/multi-package-mixed + +%files devel +/usr/include/%{name}/ + +%files -n lib%{name} +/usr/lib64/lib%{name}.so.* + +%if 0%{?with_docs} +%files doc +%doc README.md +%doc docs/html/ +%endif + +%changelog +* Thu Jan 01 1970 Builder - 1.0-1 +- Initial fixture. diff --git a/internal/rpm/spec/testdata/specs/nested-wrappers.spec b/internal/rpm/spec/testdata/specs/nested-wrappers.spec new file mode 100644 index 00000000..7d3488cd --- /dev/null +++ b/internal/rpm/spec/testdata/specs/nested-wrappers.spec @@ -0,0 +1,43 @@ +Name: nested-wrappers +Version: 1.0 +Release: 1 +Summary: %%if wrappers nested inside other %%if wrappers across sections +License: MIT + +%description +Fixture: outer %%if wraps the %%package devel section, which itself contains +an inner %%if that wraps %%description devel / %%files devel. + +%if 0%{?with_devel} +%package devel +Summary: Development files +Requires: %{name} = %{version}-%{release} + +%if 0%{?with_devel_docs} +%description devel +Devel files for nested-wrappers, including extra documentation. + +%files devel +/usr/include/nested-wrappers.h +/usr/share/doc/nested-wrappers/devel/ +%else +%description devel +Devel files for nested-wrappers. + +%files devel +/usr/include/nested-wrappers.h +%endif +%endif + +%build +make + +%install +make install DESTDIR=%{buildroot} + +%files +/usr/bin/nested-wrappers + +%changelog +* Thu Jan 01 1970 Builder - 1.0-1 +- Initial fixture. diff --git a/internal/rpm/spec/testdata/specs/script-section-tag-shaped.spec b/internal/rpm/spec/testdata/specs/script-section-tag-shaped.spec new file mode 100644 index 00000000..c91992bc --- /dev/null +++ b/internal/rpm/spec/testdata/specs/script-section-tag-shaped.spec @@ -0,0 +1,43 @@ +Name: script-section-tag-shaped +Version: 1.0 +Release: 1 +Summary: Tag-shaped shell lines inside script sections must not be parsed as tags +License: MIT + +%description +Fixture: script sections (%%build, %%install, %%post, %%pre, %%check) contain +shell commands whose arguments look exactly like spec tags +(`echo "Name: foo"`, `printf "Version: ...\n"`, etc.). Tag-edit operations +must skip these — only the preamble and %%package blocks accept tag writes. + +%build +echo "Name: not-a-tag-write" +printf "Version: still-not-a-tag\n" +echo "Requires: bash" >> .build-manifest +make + +%install +make install DESTDIR=%{buildroot} +cat < %{buildroot}/etc/%{name}.conf +Name: %{name} +Version: %{version} +EOF + +%check +echo "License: MIT" | tee -a check.log +make check + +%pre +echo "Conflicts: previous-version" >&2 + +%post +ldconfig +echo "Provides: %{name}-runtime" > /var/log/%{name}-post.log + +%files +/usr/bin/script-section-tag-shaped +/etc/%{name}.conf + +%changelog +* Thu Jan 01 1970 Builder - 1.0-1 +- Initial fixture. diff --git a/internal/rpm/spec/testdata/specs/straddling-wrapper.spec b/internal/rpm/spec/testdata/specs/straddling-wrapper.spec new file mode 100644 index 00000000..3f63284e --- /dev/null +++ b/internal/rpm/spec/testdata/specs/straddling-wrapper.spec @@ -0,0 +1,30 @@ +Name: straddling-wrapper +Version: 1.0 +Release: 1 +Summary: %%if opens before a section header and %%endif closes inside it +License: MIT + +%description +Fixture: classic Fedora-style "straddling" conditional. The %%if directive +appears at the top level (between %%build and %%install) but is paired with +an %%endif that lives several sections later — bracketing %%install and +%%check into the conditional wrapper. + +%build +make + +%if 0%{?with_tests} +%install +make install DESTDIR=%{buildroot} +make install-tests DESTDIR=%{buildroot} + +%check +make check +%endif + +%files +/usr/bin/straddling-wrapper + +%changelog +* Thu Jan 01 1970 Builder - 1.0-1 +- Initial fixture. diff --git a/internal/rpm/spec/testdata/specs/subpackage-define-referenced.spec b/internal/rpm/spec/testdata/specs/subpackage-define-referenced.spec new file mode 100644 index 00000000..f430da4b --- /dev/null +++ b/internal/rpm/spec/testdata/specs/subpackage-define-referenced.spec @@ -0,0 +1,39 @@ +Name: subpackage-define-referenced +Version: 1.0 +Release: 1 +Summary: %%define inside a subpackage referenced from %%install (issue #203 repro) +License: MIT + +%description +Fixture mirroring issue #203 -- the helper macro is defined inside the +test subpackage but referenced from the unconditional install section. +Removing the subpackage naively drops the macro and leaves dangling +references in surviving sections. + +%package tests +Summary: Tests for %{name} +Requires: %{name} = %{version}-%{release} + +%define testsdir %{_libdir}/%{name}/tests-src + +%description tests +The %{name}-tests rpm contains test fixtures for %{name}. + +%files tests +%{testsdir} + +%build +make + +%install +make install DESTDIR=%{buildroot} +mkdir -p %{buildroot}%{testsdir}/python +mkdir -p %{buildroot}%{testsdir}/scripts +install -p -m 0644 tests/Makefile.include %{buildroot}%{testsdir}/ + +%files +/usr/bin/subpackage-define-referenced + +%changelog +* Thu Jan 01 1970 Builder - 1.0-1 +- Initial fixture. diff --git a/internal/rpm/spec/testdata/specs/subpackage-define-unreferenced.spec b/internal/rpm/spec/testdata/specs/subpackage-define-unreferenced.spec new file mode 100644 index 00000000..d1fe37a2 --- /dev/null +++ b/internal/rpm/spec/testdata/specs/subpackage-define-unreferenced.spec @@ -0,0 +1,36 @@ +Name: subpackage-define-unreferenced +Version: 1.0 +Release: 1 +Summary: %%define inside a subpackage only referenced from within itself +License: MIT + +%description +Fixture companion to subpackage-define-referenced. The macro defined inside +the helper subpackage is only referenced from within the same subpackage, +so removing that subpackage should drop the macro cleanly without any +hoisting. + +%package tools +Summary: Helper tools for %{name} +Requires: %{name} = %{version}-%{release} + +%define toolsdir %{_libexecdir}/%{name}/tools + +%description tools +Helper command-line utilities used only with the tools subpackage. + +%files tools +%{toolsdir} + +%build +make + +%install +make install DESTDIR=%{buildroot} + +%files +/usr/bin/subpackage-define-unreferenced + +%changelog +* Thu Jan 01 1970 Builder - 1.0-1 +- Initial fixture. diff --git a/internal/rpm/spec/testdata_test.go b/internal/rpm/spec/testdata_test.go new file mode 100644 index 00000000..a0e3be85 --- /dev/null +++ b/internal/rpm/spec/testdata_test.go @@ -0,0 +1,565 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package spec_test + +import ( + "bytes" + "math/rand/v2" + "os" + "path/filepath" + "strconv" + "strings" + "testing" + + "github.com/microsoft/azure-linux-dev-tools/internal/rpm/spec" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// testdataSpecsDir is the directory containing curated hand-crafted spec +// fixtures harvested from real-world Fedora / Azure Linux patterns. +const testdataSpecsDir = "testdata/specs" + +// loadFixture reads a fixture file and returns its raw bytes. +func loadFixture(t *testing.T, name string) []byte { + t.Helper() + + path := filepath.Join(testdataSpecsDir, name) + + raw, err := os.ReadFile(path) + require.NoError(t, err, "reading fixture %s", path) + + return raw +} + +// openFixture parses a fixture into a *Spec. +func openFixture(t *testing.T, name string) *spec.Spec { + t.Helper() + + raw := loadFixture(t, name) + + specObj, err := spec.OpenSpec(bytes.NewReader(raw)) + require.NoError(t, err, "parsing fixture %s", name) + + return specObj +} + +// serializeSpec serializes a Spec to a string for assertion. +func serializeSpec(t *testing.T, s *spec.Spec) string { + t.Helper() + + var buf bytes.Buffer + + require.NoError(t, s.Serialize(&buf)) + + return buf.String() +} + +// listFixtures returns the names (basename only) of all *.spec files in +// the curated testdata directory. +func listFixtures(t *testing.T) []string { + t.Helper() + + entries, err := os.ReadDir(testdataSpecsDir) + require.NoError(t, err) + + names := make([]string, 0, len(entries)) + + for _, entry := range entries { + if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".spec") { + continue + } + + names = append(names, entry.Name()) + } + + require.NotEmpty(t, names, "expected at least one fixture in %s", testdataSpecsDir) + + return names +} + +// --- Tier 1: round-trip preservation for every curated fixture. --- + +// TestTestdataRoundTrip parses every fixture in testdata/specs/, serializes +// it back to bytes, and asserts byte-for-byte equality with the source. +// Failures here indicate the parser/serializer is dropping or mutating lines +// for some real-world spec pattern. +func TestTestdataRoundTrip(t *testing.T) { + for _, name := range listFixtures(t) { + t.Run(name, func(t *testing.T) { + raw := loadFixture(t, name) + + s, err := spec.OpenSpec(bytes.NewReader(raw)) + require.NoError(t, err, "parsing %s", name) + + got := serializeSpec(t, s) + assert.Equal(t, string(raw), got, "round-trip mismatch for %s", name) + }) + } +} + +// --- Tier 3: targeted edit-operation tests per fixture pattern. --- + +// TestTestdataAddTagPreservesStructure exercises AddTag against every fixture +// and asserts that: +// 1. The operation succeeds (no parse-state corruption from quirky inputs). +// 2. Serialized output still round-trips through OpenSpec (idempotent parser). +// 3. The injected tag is observable in the rendered output. +func TestTestdataAddTagPreservesStructure(t *testing.T) { + for _, name := range listFixtures(t) { + t.Run(name, func(t *testing.T) { + s := openFixture(t, name) + + require.NoError(t, s.AddTag("", "BuildRequires", "regression-marker")) + + out := serializeSpec(t, s) + assert.Contains(t, out, "BuildRequires: regression-marker", + "injected tag should appear in serialized output") + + // Re-parse the result. The parser should accept its own output. + _, err := spec.OpenSpec(bytes.NewReader([]byte(out))) + require.NoError(t, err, "serialized output should re-parse cleanly") + }) + } +} + +// TestTestdataHasSectionWalksWrappers verifies HasSection finds sections that +// live inside conditional wrappers (straddling, nested, elif-with-sections). +func TestTestdataHasSectionWalksWrappers(t *testing.T) { + cases := []struct { + fixture string + section string + want bool + }{ + // straddling-wrapper: %install and %check live inside %if 0%{?with_tests}. + {"straddling-wrapper.spec", "%install", true}, + {"straddling-wrapper.spec", "%check", true}, + {"straddling-wrapper.spec", "%files", true}, + {"straddling-wrapper.spec", "%post", false}, + + // nested-wrappers: %package devel lives inside %if 0%{?with_devel}. + {"nested-wrappers.spec", "%package", true}, + {"nested-wrappers.spec", "%description", true}, + {"nested-wrappers.spec", "%files", true}, + {"nested-wrappers.spec", "%check", false}, + + // elif-with-sections: each branch contributes a different %package, but + // from the spec's perspective at least one %package header exists. + {"elif-with-sections.spec", "%package", true}, + {"elif-with-sections.spec", "%files", true}, + + // multi-package-mixed: %if-wrapped %package doc. + {"multi-package-mixed.spec", "%package", true}, + {"multi-package-mixed.spec", "%changelog", true}, + } + + for _, testCase := range cases { + t.Run(testCase.fixture+"/"+testCase.section, func(t *testing.T) { + specObj := openFixture(t, testCase.fixture) + + got, err := specObj.HasSection(testCase.section) + require.NoError(t, err) + assert.Equal(t, testCase.want, got) + }) + } +} + +// TestTestdataAppendLinesToSection verifies AppendLinesToSection works through +// straddling and nested conditional wrappers — the lines must land inside the +// targeted section even when the section itself is inside an %if block. +func TestTestdataAppendLinesToSection(t *testing.T) { + cases := []struct { + fixture string + section string + pkg string + marker string + }{ + {"straddling-wrapper.spec", "%install", "", "echo straddling-marker"}, + {"nested-wrappers.spec", "%files", "devel", "/usr/share/nested-marker"}, + {"multi-package-mixed.spec", "%files", "devel", "/usr/share/multi-marker"}, + {"elif-with-sections.spec", "%files", "", "/usr/share/elif-marker"}, + } + + for _, testCase := range cases { + t.Run(testCase.fixture+"/"+testCase.section+"/"+testCase.pkg, func(t *testing.T) { + specObj := openFixture(t, testCase.fixture) + + require.NoError(t, specObj.AppendLinesToSection(testCase.section, testCase.pkg, []string{testCase.marker})) + + out := serializeSpec(t, specObj) + assert.Contains(t, out, testCase.marker, "marker should appear in serialized output") + + // Sanity: parser should accept its own output. + _, err := spec.OpenSpec(bytes.NewReader([]byte(out))) + require.NoError(t, err) + }) + } +} + +// TestTestdataScriptSectionTagShapedSafety asserts that tag-walking operations +// targeting script sections leave shell lines that look like tags untouched. +// This guards against regressions in the isTagBearingSection filter. +func TestTestdataScriptSectionTagShapedSafety(t *testing.T) { + raw := loadFixture(t, "script-section-tag-shaped.spec") + + // Capture the script-section shell lines that look like tags. + scriptyLines := []string{ + `echo "Name: not-a-tag-write"`, + `printf "Version: still-not-a-tag\n"`, + `echo "Requires: bash" >> .build-manifest`, + `echo "License: MIT" | tee -a check.log`, + `echo "Conflicts: previous-version" >&2`, + `echo "Provides: %{name}-runtime" > /var/log/%{name}-post.log`, + } + + for _, line := range scriptyLines { + require.Contains(t, string(raw), line, "fixture must contain %q for the test to be meaningful", line) + } + + specObj, err := spec.OpenSpec(bytes.NewReader(raw)) + require.NoError(t, err) + + // Try to remove every tag named "Name", "Version", "Requires", "License", + // "Conflicts", "Provides" in the main package. Only real preamble tags + // (Name, Version, Release, Summary, License at the top of the file) + // should be considered; the shell lines must be left alone. + for _, tag := range []string{"Name", "Version", "Requires", "License", "Conflicts", "Provides"} { + _, err := specObj.RemoveTagsMatching("", func(t, _ string) bool { + return strings.EqualFold(t, tag) + }) + require.NoError(t, err, "RemoveTagsMatching(%q) should not error", tag) + } + + out := serializeSpec(t, specObj) + + // Every shell-shaped line must still be present. + for _, line := range scriptyLines { + assert.Contains(t, out, line, + "script-section shell line %q must NOT be removed by tag operations", line) + } +} + +// TestTestdataSearchAndReplaceSectionScope verifies that SearchAndReplace +// confined to a section only touches that section. We replace a string that +// appears in both %install and %files of multi-package-mixed.spec, restricted +// to %install, and confirm %files is untouched. +func TestTestdataSearchAndReplaceSectionScope(t *testing.T) { + specObj := openFixture(t, "multi-package-mixed.spec") + + // %make_install appears only in %install for this fixture — replace with a + // marker, then confirm the marker shows up once. + require.NoError(t, specObj.SearchAndReplace("%install", "", "%make_install", "%make_install # PATCHED")) + + out := serializeSpec(t, specObj) + assert.Contains(t, out, "%make_install # PATCHED") + assert.Equal(t, 1, strings.Count(out, "# PATCHED"), + "section-scoped SearchAndReplace must apply exactly once") +} + +// --- Synthetic generator stress test. --- + +// syntheticBuilder composes the same primitive patterns the curated fixtures +// exercise (preamble tags, conditionals, macros, sections) into random spec +// inputs. The test asserts every generated input round-trips through the +// parser/serializer. +type syntheticBuilder struct { + rng *rand.Rand + out strings.Builder + pkgIdx int + condIdx int +} + +func (b *syntheticBuilder) line(s string) { + b.out.WriteString(s) + b.out.WriteByte('\n') +} + +func (b *syntheticBuilder) writePreamble(name string) { + b.line("Name: " + name) + b.line("Version: 1.0") + b.line("Release: 1") + b.line("Summary: Synthetic test fixture") + b.line("License: MIT") + b.line("") +} + +func (b *syntheticBuilder) writeMacroContinuation() { + b.line("%global synth_flags \\") + b.line(" --enable-foo \\") + b.line(" --enable-bar \\") + b.line(" --enable-baz") + b.line("") +} + +func (b *syntheticBuilder) writeIfWrapper(body func()) { + b.condIdx++ + b.line("%if 0%{?with_synth_" + strconv.Itoa(b.condIdx) + "}") + body() + b.line("%endif") + b.line("") +} + +func (b *syntheticBuilder) writeIfElseContent(then, els string) { + b.condIdx++ + b.line("%if 0%{?fedora}") + b.line(then) + b.line("%else") + b.line(els) + b.line("%endif") + b.line("") +} + +func (b *syntheticBuilder) writeElifChain() { + b.condIdx++ + b.line("%if 0%{?rhel}") + b.line("Requires: rhel-thing") + b.line("%elif 0%{?fedora}") + b.line("Requires: fedora-thing") + b.line("%elif 0%{?suse_version}") + b.line("Requires: suse-thing") + b.line("%else") + b.line("Requires: generic-thing") + b.line("%endif") + b.line("") +} + +func (b *syntheticBuilder) writeSubpackage() { + b.pkgIdx++ + + name := "sub" + strconv.Itoa(b.pkgIdx) + + b.line("%package " + name) + b.line("Summary: Sub-package " + name) + b.line("") + b.line("%description " + name) + b.line("Synthetic sub-package " + name + ".") + b.line("") + b.line("%files " + name) + b.line("/usr/share/synth/" + name) + b.line("") +} + +func (b *syntheticBuilder) writeScriptSection(name string) { + b.line(name) + b.line(`echo "Name: not-a-tag"`) + b.line(`printf "Version: still-not-a-tag\n"`) + b.line("make") + b.line("") +} + +func (b *syntheticBuilder) writeFooter() { + b.line("%changelog") + b.line("* Thu Jan 01 1970 Builder - 1.0-1") + b.line("- Synthetic.") +} + +// generateSyntheticSpec composes a random spec from the primitive patterns +// above. The output ends with a newline so byte-for-byte round-trip checks +// match Spec.Serialize behavior. +func generateSyntheticSpec(seed1, seed2 uint64) string { + //nolint:gosec // deterministic synthetic test data, not security-sensitive + builder := &syntheticBuilder{rng: rand.New(rand.NewPCG(seed1, seed2))} + + builder.writePreamble("synthetic") + + // Insert a randomized 0..4 mix of preamble-level primitives. + for range 4 { + switch builder.rng.IntN(5) { + case 0: + builder.writeMacroContinuation() + case 1: + builder.writeElifChain() + case 2: + builder.writeIfElseContent("BuildRequires: fedora-only", "BuildRequires: other") + case 3: + builder.writeIfWrapper(func() { + builder.writeSubpackage() + }) + case 4: + builder.writeSubpackage() + } + } + + builder.line("%description") + builder.line("Synthetic top-level description.") + builder.line("") + + for _, sect := range []string{"%prep", "%build", "%install", "%check"} { + builder.writeScriptSection(sect) + } + + builder.line("%files") + builder.line("/usr/bin/synthetic") + builder.line("") + + builder.writeFooter() + + return builder.out.String() +} + +// TestSyntheticSpecsRoundTrip generates 64 random spec bodies from primitive +// patterns and asserts every one round-trips byte-for-byte. Failures expose a +// composition the parser handles incorrectly even though the individual +// patterns work in isolation. +func TestSyntheticSpecsRoundTrip(t *testing.T) { + const iterations = 64 + + for iteration := range iterations { + //nolint:gosec // iteration is bounded by iterations, no overflow risk + seed1 := uint64(iteration + 1) + //nolint:gosec // iteration is bounded by iterations, no overflow risk + seed2 := uint64(iteration)*1099511628211 + 14695981039346656037 + + t.Run("seed_"+strconv.Itoa(iteration), func(t *testing.T) { + input := generateSyntheticSpec(seed1, seed2) + + s, err := spec.OpenSpec(bytes.NewReader([]byte(input))) + require.NoError(t, err, "parsing synthetic spec (seed1=%d, seed2=%d)", seed1, seed2) + + out := serializeSpec(t, s) + assert.Equal(t, input, out, + "synthetic spec must round-trip (seed1=%d, seed2=%d)", seed1, seed2) + }) + } +} + +// TestSyntheticSpecsAddTag generates random specs and asserts AddTag preserves +// re-parseability. This is the random-composition analogue of +// TestTestdataAddTagPreservesStructure. +func TestSyntheticSpecsAddTag(t *testing.T) { + const iterations = 32 + + for iteration := range iterations { + //nolint:gosec // iteration is bounded by iterations, no overflow risk + seed1 := uint64(iteration + 1000) + //nolint:gosec // iteration is bounded by iterations, no overflow risk + seed2 := uint64(iteration)*0x9E3779B97F4A7C15 + 0xBF58476D1CE4E5B9 + + t.Run("seed_"+strconv.Itoa(iteration), func(t *testing.T) { + input := generateSyntheticSpec(seed1, seed2) + + s, err := spec.OpenSpec(bytes.NewReader([]byte(input))) + require.NoError(t, err) + + require.NoError(t, s.AddTag("", "BuildRequires", "synth-marker")) + + out := serializeSpec(t, s) + assert.Contains(t, out, "BuildRequires: synth-marker") + + _, err = spec.OpenSpec(bytes.NewReader([]byte(out))) + require.NoError(t, err, "spec must re-parse after AddTag") + }) + } +} + +// --- Issue #203: macro hoisting on subpackage removal. --- + +// lineIndex returns the 0-based line index where target appears as an exact +// trimmed-line match in lines, or -1 if no such line exists. +func lineIndex(lines []string, target string) int { + for i, line := range lines { + if strings.TrimSpace(line) == target { + return i + } + } + + return -1 +} + +// hasLine reports whether any trimmed line in lines equals target. +func hasLine(lines []string, target string) bool { + return lineIndex(lines, target) >= 0 +} + +// hasLineWithPrefix reports whether any trimmed line in lines starts with +// prefix. Useful for header lines like `%package tests` where the trailing +// whitespace may vary. +func hasLineWithPrefix(lines []string, prefix string) bool { + for _, line := range lines { + if strings.HasPrefix(strings.TrimSpace(line), prefix) { + return true + } + } + + return false +} + +// TestTestdataRemoveSubpackageHoistsReferencedMacro is the issue #203 repro +// turned into a regression test. The fixture has `%define testsdir` inside +// `%package tests` and references `%{testsdir}` from `%install` (which is +// unconditional and survives subpackage removal). +// +// Required behavior: removing the `tests` subpackage must hoist the macro +// definition to the root level (before the first removed section) so the +// surviving references in `%install` still resolve. All sections targeting +// the `tests` subpackage must still be removed. +func TestTestdataRemoveSubpackageHoistsReferencedMacro(t *testing.T) { + specObj := openFixture(t, "subpackage-define-referenced.spec") + + require.NoError(t, specObj.RemoveSubpackage("tests")) + + out := serializeSpec(t, specObj) + outLines := strings.Split(out, "\n") + + // The macro definition must survive as its own header line. + macroLine := "%define testsdir %{_libdir}/%{name}/tests-src" + assert.True(t, hasLine(outLines, macroLine), + "referenced macro must be hoisted, not dropped with the subpackage") + + // All subpackage section headers must be gone (line-exact, ignoring the + // description text which legitimately mentions `%%package tests`). + assert.False(t, hasLineWithPrefix(outLines, "%package tests"), + "subpackage header must be removed") + assert.False(t, hasLineWithPrefix(outLines, "%description tests"), + "subpackage description must be removed") + assert.False(t, hasLineWithPrefix(outLines, "%files tests"), + "subpackage files must be removed") + + // The hoisted macro must appear before %install so the surviving + // `%{testsdir}` references resolve. + macroIdx := lineIndex(outLines, macroLine) + installIdx := lineIndex(outLines, "%install") + + require.GreaterOrEqual(t, macroIdx, 0, "hoisted macro must be in output") + require.GreaterOrEqual(t, installIdx, 0, "%install section must remain") + assert.Less(t, macroIdx, installIdx, + "hoisted macro must appear before %%install so the reference resolves") + + // The reference itself must still exist in %install. + assert.Contains(t, out, "%{buildroot}%{testsdir}/python", + "surviving %%install must still reference the hoisted macro") + + // Output must re-parse cleanly. + _, err := spec.OpenSpec(bytes.NewReader([]byte(out))) + require.NoError(t, err, "spec must re-parse after subpackage removal") +} + +// TestTestdataRemoveSubpackageDoesNotHoistUnreferencedMacro verifies the +// negative case: when a `%define` inside a subpackage is only referenced from +// within that same subpackage, removal drops it cleanly (no hoisting needed, +// no noise added to the result). +func TestTestdataRemoveSubpackageDoesNotHoistUnreferencedMacro(t *testing.T) { + specObj := openFixture(t, "subpackage-define-unreferenced.spec") + + require.NoError(t, specObj.RemoveSubpackage("tools")) + + out := serializeSpec(t, specObj) + outLines := strings.Split(out, "\n") + + // The macro must be gone -- no `%define toolsdir ...` line anywhere. + assert.False(t, hasLineWithPrefix(outLines, "%define toolsdir"), + "unreferenced macro must be dropped along with the subpackage") + + // All subpackage section headers must be gone. + assert.False(t, hasLineWithPrefix(outLines, "%package tools"), + "subpackage header must be removed") + assert.False(t, hasLineWithPrefix(outLines, "%description tools"), + "subpackage description must be removed") + assert.False(t, hasLineWithPrefix(outLines, "%files tools"), + "subpackage files must be removed") + + // Output must re-parse cleanly. + _, err := spec.OpenSpec(bytes.NewReader([]byte(out))) + require.NoError(t, err, "spec must re-parse after subpackage removal") +} From 4a8cd0ee88e49bad6ee29a5a8e9a77f06380285c Mon Sep 17 00:00:00 2001 From: Thien Trung Vuong Date: Fri, 29 May 2026 02:47:03 +0000 Subject: [PATCH 8/9] feat(spec): hoist referenced subpackage macros on removal When a %define/%global directive lives inside a %package subpackage block but the macro it declares is referenced by surviving content, RemoveSections now automatically hoists it to the root level. Two regression tests validate both the positive case (macro hoisted) and the negative case (unreferenced macro dropped cleanly). --- internal/rpm/spec/tree_hoist.go | 239 ++++++++++++++++++++++++++++++++ 1 file changed, 239 insertions(+) create mode 100644 internal/rpm/spec/tree_hoist.go diff --git a/internal/rpm/spec/tree_hoist.go b/internal/rpm/spec/tree_hoist.go new file mode 100644 index 00000000..47047d75 --- /dev/null +++ b/internal/rpm/spec/tree_hoist.go @@ -0,0 +1,239 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package spec + +import ( + "regexp" +) + +// hoistReferencedMacros moves [macroDefBlock] children of soon-to-be-removed +// sections to the root level when those macros are referenced by content that +// will survive removal. +// +// Motivation (issue #203): spec authors sometimes place `%define` inside a +// `%package` subpackage block (e.g. a `%define testsdir` under `%package tests`) +// even though the macro is referenced by an unconditional section like +// `%install`. Naively removing the subpackage drops the macro and leaves +// dangling `%{testsdir}` references in survivors. Hoisting preserves the +// definition just before the first removed block so the survivors still +// resolve. +// +// Hoisted blocks are inserted at the root in declaration order, immediately +// before the topmost ancestor (a direct child of root) of the first removed +// section. Macros that are NOT referenced by any survivor are left alone -- +// they are subsequently dropped along with their enclosing section by the +// normal removal pass. +// +// This function mutates root in place. It must be called BEFORE removed +// blocks are detached from the tree so that the "referenced outside the +// removed subtrees" check can compute the survivor set correctly. +func hoistReferencedMacros(root *block, removed []*block) { + if len(removed) == 0 { + return + } + + removedSet := blockSet(removed) + + macros := collectMacrosInSections(removed) + if len(macros) == 0 { + return + } + + hoist := make([]*block, 0, len(macros)) + + for _, macro := range macros { + if isMacroReferencedOutside(root, macro.Name, removedSet) { + hoist = append(hoist, macro) + } + } + + if len(hoist) == 0 { + return + } + + insertIdx := firstRemovedRootChildIdx(root, removedSet) + if insertIdx < 0 { + // Defensive: no top-level removal found (shouldn't happen if removal + // validation passed). Prepend at root to keep the macros alive. + insertIdx = 0 + } + + spliced := make([]*block, 0, len(root.Children)+len(hoist)) + spliced = append(spliced, root.Children[:insertIdx]...) + spliced = append(spliced, hoist...) + spliced = append(spliced, root.Children[insertIdx:]...) + root.Children = spliced +} + +// blockSet builds an identity-set of block pointers for O(1) lookup. +func blockSet(blocks []*block) map[*block]bool { + set := make(map[*block]bool, len(blocks)) + for _, b := range blocks { + set[b] = true + } + + return set +} + +// collectMacrosInSections gathers every [macroDefBlock] reachable from any of +// the given section blocks, preserving declaration order across sections. +func collectMacrosInSections(sections []*block) []*block { + var macros []*block + + for _, sec := range sections { + collectMacrosInBlock(sec, ¯os) + } + + return macros +} + +func collectMacrosInBlock(blk *block, out *[]*block) { + switch blk.Kind { + case macroDefBlock: + *out = append(*out, blk) + case rootBlock, sectionBlock: + for _, child := range blk.Children { + collectMacrosInBlock(child, out) + } + case conditionalBlock: + for _, child := range blk.Children { + collectMacrosInBlock(child, out) + } + + for _, child := range blk.Else { + collectMacrosInBlock(child, out) + } + case textBlock: + // No nested macros. + } +} + +// isMacroReferencedOutside walks the tree looking for references to name in +// any block whose enclosing section is NOT in removedSet. References include +// the standard RPM forms: %{name}, %{?name}, %{!?name}, %{name:...}, and bare +// %name terminated by a non-word character. +func isMacroReferencedOutside(root *block, name string, removedSet map[*block]bool) bool { + pattern := macroReferencePattern(name) + + return scanForMacroReference(root, pattern, removedSet) +} + +func scanForMacroReference(blk *block, pattern *regexp.Regexp, removedSet map[*block]bool) bool { + // Skip entire subtrees rooted at a removed section. + if blk.Kind == sectionBlock && removedSet[blk] { + return false + } + + switch blk.Kind { + case textBlock, macroDefBlock: + // A macro definition that lives outside the removed set may itself + // reference the hoisted macro (e.g. `%define foo %{name}-suffix`). + return anyLineMatches(blk.Lines, pattern) + case rootBlock, sectionBlock: + return anyChildMatches(blk.Children, pattern, removedSet) + case conditionalBlock: + return conditionalReferencesMacro(blk, pattern, removedSet) + } + + return false +} + +// anyLineMatches reports whether any line in lines matches pattern. +func anyLineMatches(lines []string, pattern *regexp.Regexp) bool { + for _, line := range lines { + if pattern.MatchString(line) { + return true + } + } + + return false +} + +// anyChildMatches reports whether any child block references the macro. +func anyChildMatches(children []*block, pattern *regexp.Regexp, removedSet map[*block]bool) bool { + for _, child := range children { + if scanForMacroReference(child, pattern, removedSet) { + return true + } + } + + return false +} + +// conditionalReferencesMacro reports whether the conditional itself (via its +// header or %else directive) or any of its branches reference the macro. +func conditionalReferencesMacro(cond *block, pattern *regexp.Regexp, removedSet map[*block]bool) bool { + // The %if header itself can reference macros (e.g. `%if 0%{?with_foo}`). + if pattern.MatchString(cond.Header) { + return true + } + + if cond.ElseDirective != "" && pattern.MatchString(cond.ElseDirective) { + return true + } + + return anyChildMatches(cond.Children, pattern, removedSet) || + anyChildMatches(cond.Else, pattern, removedSet) +} + +// macroReferencePattern builds a regexp that matches references to a named +// RPM macro. Supported forms: +// - %{name}, %{?name}, %{!?name} +// - %{name:default} (parameterized expansion) +// - bare %name terminated by a non-word character or end of string +// +// The bare form requires a word boundary so we don't match %nameOther. +func macroReferencePattern(name string) *regexp.Regexp { + quoted := regexp.QuoteMeta(name) + // Braced: %{ optional ! optional ? NAME ( } | : ... ) + // Bare: %NAME terminated by \b + pattern := `%(?:\{!?\??` + quoted + `[}:]|` + quoted + `\b)` + + return regexp.MustCompile(pattern) +} + +// firstRemovedRootChildIdx returns the index of the first child of root that +// either IS a removed block or contains a removed section in any descendant +// (including inside conditional branches). Returns -1 if no root child +// contains a removed block. +func firstRemovedRootChildIdx(root *block, removedSet map[*block]bool) int { + for i, child := range root.Children { + if containsRemoved(child, removedSet) { + return i + } + } + + return -1 +} + +func containsRemoved(blk *block, removedSet map[*block]bool) bool { + if removedSet[blk] { + return true + } + + switch blk.Kind { + case rootBlock, sectionBlock: + for _, child := range blk.Children { + if containsRemoved(child, removedSet) { + return true + } + } + case conditionalBlock: + for _, child := range blk.Children { + if containsRemoved(child, removedSet) { + return true + } + } + + for _, child := range blk.Else { + if containsRemoved(child, removedSet) { + return true + } + } + case textBlock, macroDefBlock: + // Leaves can't contain sections. + } + + return false +} From 56a4c63cb78d4805319847b674d2676a9aee7750 Mon Sep 17 00:00:00 2001 From: Thien Trung Vuong Date: Sat, 30 May 2026 10:24:03 -0700 Subject: [PATCH 9/9] feat(spec): add PrependLinesToAllSections and spec-prepend-all-lines overlay Add a new PrependLinesToAllSections method that prepends lines to every section matching a given name and package, unlike PrependLinesToSection which targets only the first match. This is exposed as the new spec-prepend-all-lines overlay type. Update the check-disablement synthesizer to use the new overlay type so that specs with multiple %check sections (e.g., python-azure-mgmt-media, python-zmq) get exit 0 prepended to all of them. --- internal/app/azldev/core/sources/overlays.go | 5 + .../app/azldev/core/sources/sourceprep.go | 7 +- internal/projectconfig/overlay.go | 8 +- internal/rpm/spec/edit.go | 29 +++- internal/rpm/spec/edit_test.go | 130 ++++++++++++++++++ 5 files changed, 172 insertions(+), 7 deletions(-) diff --git a/internal/app/azldev/core/sources/overlays.go b/internal/app/azldev/core/sources/overlays.go index cfc23069..65c60569 100644 --- a/internal/app/azldev/core/sources/overlays.go +++ b/internal/app/azldev/core/sources/overlays.go @@ -150,6 +150,11 @@ func ApplySpecOverlay(overlay projectconfig.ComponentOverlay, openedSpec *spec.S if err != nil { return fmt.Errorf("failed to prepend lines to spec:\n%w", err) } + case projectconfig.ComponentOverlayPrependAllSpecLines: + err := openedSpec.PrependLinesToAllSections(overlay.SectionName, overlay.PackageName, overlay.Lines) + if err != nil { + return fmt.Errorf("failed to prepend lines to all matching sections in spec:\n%w", err) + } case projectconfig.ComponentOverlayAppendSpecLines: err := openedSpec.AppendLinesToSection(overlay.SectionName, overlay.PackageName, overlay.Lines) if err != nil { diff --git a/internal/app/azldev/core/sources/sourceprep.go b/internal/app/azldev/core/sources/sourceprep.go index 617dcbd8..de2af315 100644 --- a/internal/app/azldev/core/sources/sourceprep.go +++ b/internal/app/azldev/core/sources/sourceprep.go @@ -1046,8 +1046,9 @@ func generateFileHeaderOverlay() []projectconfig.ComponentOverlay { } // synthesizeCheckSkipOverlays generates overlays to disable the %check section if configured. -// When check.skip is true, it prepends an 'exit 0' to the %check section with a comment -// explaining why the section was disabled. +// When check.skip is true, it prepends an 'exit 0' to every %check section in the spec with +// a comment explaining why the section was disabled. Uses [ComponentOverlayPrependAllSpecLines] +// to handle specs that contain multiple %check sections gated by different conditionals. func synthesizeCheckSkipOverlays(checkConfig projectconfig.CheckConfig) []projectconfig.ComponentOverlay { if !checkConfig.Skip { return nil @@ -1055,7 +1056,7 @@ func synthesizeCheckSkipOverlays(checkConfig projectconfig.CheckConfig) []projec return []projectconfig.ComponentOverlay{ { - Type: projectconfig.ComponentOverlayPrependSpecLines, + Type: projectconfig.ComponentOverlayPrependAllSpecLines, SectionName: "%check", Lines: []string{ "# Check section disabled: " + checkConfig.SkipReason, diff --git a/internal/projectconfig/overlay.go b/internal/projectconfig/overlay.go index 7ec0b50e..179dd8b6 100644 --- a/internal/projectconfig/overlay.go +++ b/internal/projectconfig/overlay.go @@ -111,6 +111,7 @@ func (c *ComponentOverlay) ModifiesSpec() bool { c.Type == ComponentOverlayUpdateSpecTag || c.Type == ComponentOverlayRemoveSpecTag || c.Type == ComponentOverlayPrependSpecLines || + c.Type == ComponentOverlayPrependAllSpecLines || c.Type == ComponentOverlayAppendSpecLines || c.Type == ComponentOverlaySearchAndReplaceInSpec || c.Type == ComponentOverlayRemoveSection || @@ -152,6 +153,11 @@ const ( // ComponentOverlayPrependSpecLines is an overlay that prepends lines to a section in a spec; fails if the section // doesn't exist. ComponentOverlayPrependSpecLines ComponentOverlayType = "spec-prepend-lines" + // ComponentOverlayPrependAllSpecLines is an overlay that prepends lines to every section + // matching the given name and package in a spec. This is useful when a spec contains multiple + // sections with the same name (e.g., two %check sections gated by different conditionals) + // and all of them need the same modification. Fails if no matching section exists. + ComponentOverlayPrependAllSpecLines ComponentOverlayType = "spec-prepend-all-lines" // ComponentOverlayAppendSpecLines is an overlay that appends lines to a section in a spec; fails if the section // doesn't exist. ComponentOverlayAppendSpecLines ComponentOverlayType = "spec-append-lines" @@ -246,7 +252,7 @@ func (c *ComponentOverlay) Validate() error { if c.Tag == "" { return missingField("tag") } - case ComponentOverlayPrependSpecLines, ComponentOverlayAppendSpecLines: + case ComponentOverlayPrependSpecLines, ComponentOverlayPrependAllSpecLines, ComponentOverlayAppendSpecLines: if len(c.Lines) == 0 { return missingField("lines") } diff --git a/internal/rpm/spec/edit.go b/internal/rpm/spec/edit.go index 5c54770e..7fb1516d 100644 --- a/internal/rpm/spec/edit.go +++ b/internal/rpm/spec/edit.go @@ -304,9 +304,10 @@ func (s *Spec) InsertTag(packageName string, tag string, value string) error { }) } -// PrependLinesToSection prepends the given lines to the start of the specified section, placing -// them just after the section header (or at the top of the file in the global section). An error -// is returned if the identified section cannot be found in the spec. +// PrependLinesToSection prepends the given lines to the start of the first section matching +// the specified name and package, placing them just after the section header (or at the top +// of the file in the global section). An error is returned if the identified section cannot +// be found in the spec. func (s *Spec) PrependLinesToSection(sectionName, packageName string, lines []string) (err error) { slog.Debug("Prepending lines to spec", "section", sectionName, "package", packageName, "lines", lines) @@ -322,6 +323,28 @@ func (s *Spec) PrependLinesToSection(sectionName, packageName string, lines []st }) } +// PrependLinesToAllSections prepends the given lines to the start of every section matching +// the specified name and package, placing them just after each section header. This is useful +// when a spec contains multiple sections with the same name (e.g., two %check sections gated +// by different conditionals) and all of them need the same modification. +// An error is returned if no matching section exists. +func (s *Spec) PrependLinesToAllSections(sectionName, packageName string, lines []string) (err error) { + slog.Debug("Prepending lines to all matching sections", "section", sectionName, "package", packageName, "lines", lines) + + return s.mutateTree(func(tree *specTree) error { + sections := tree.Sections(sectionName, packageName) + if len(sections) == 0 { + return fmt.Errorf("section %#q (package=%#q) not found:\n%w", sectionName, packageName, ErrSectionNotFound) + } + + for _, sect := range sections { + sect.PrependLines(lines) + } + + return nil + }) +} + // AppendLinesToSection appends the given lines at the end of the specified section, placing // them just after the current last line of the section's content. When a conditional block // (%if/%endif) straddles the section boundary, the appended lines are placed before the diff --git a/internal/rpm/spec/edit_test.go b/internal/rpm/spec/edit_test.go index f231ad23..34e2158f 100644 --- a/internal/rpm/spec/edit_test.go +++ b/internal/rpm/spec/edit_test.go @@ -1158,6 +1158,99 @@ Name: test err = specFile.PrependLinesToSection("%description", "", []string{"New line"}) require.Error(t, err) }) + + t.Run("prepends to first matching section only", func(t *testing.T) { + input := ` +Name: test + +%if %{with tests} +%check +%pytest +%endif + +%check +%pyproject_check_import +` + + specFile, err := spec.OpenSpec(strings.NewReader(input)) + require.NoError(t, err) + + err = specFile.PrependLinesToSection("%check", "", []string{"# disabled", "exit 0"}) + require.NoError(t, err) + + actual := new(bytes.Buffer) + + err = specFile.Serialize(actual) + require.NoError(t, err) + + assert.Equal(t, ` +Name: test + +%if %{with tests} +%check +# disabled +exit 0 +%pytest +%endif + +%check +%pyproject_check_import +`, actual.String()) + }) +} + +func TestPrependLinesToAllSections(t *testing.T) { + t.Run("prepends to all matching sections", func(t *testing.T) { + input := ` +Name: test + +%if %{with tests} +%check +%pytest +%endif + +%check +%pyproject_check_import +` + + specFile, err := spec.OpenSpec(strings.NewReader(input)) + require.NoError(t, err) + + err = specFile.PrependLinesToAllSections("%check", "", []string{"# disabled", "exit 0"}) + require.NoError(t, err) + + actual := new(bytes.Buffer) + + err = specFile.Serialize(actual) + require.NoError(t, err) + + assert.Equal(t, ` +Name: test + +%if %{with tests} +%check +# disabled +exit 0 +%pytest +%endif + +%check +# disabled +exit 0 +%pyproject_check_import +`, actual.String()) + }) + + t.Run("no such section", func(t *testing.T) { + input := ` +Name: test-no-check +` + specFile, err := spec.OpenSpec(strings.NewReader(input)) + require.NoError(t, err) + + err = specFile.PrependLinesToAllSections("%check", "", []string{"exit 0"}) + require.Error(t, err) + }) } func TestAppendLinesToSection(t *testing.T) { @@ -1412,6 +1505,43 @@ A test package. %build make +`, actual.String()) + }) + + t.Run("appends inside wrapper when section is in conditional", func(t *testing.T) { + // %files modules-extra-matched is inside %ifnarch. The tree correctly + // scopes the section within the conditional, so appended lines land + // inside the wrapper (before %endif). + input := ` +Name: test + +%ifnarch noarch +%files modules-extra-matched +%endif + +%changelog +` + specFile, err := spec.OpenSpec(strings.NewReader(input)) + require.NoError(t, err) + + err = specFile.AppendLinesToSection("%files", "modules-extra-matched", []string{"", "# ANCHOR"}) + require.NoError(t, err) + + actual := new(bytes.Buffer) + + err = specFile.Serialize(actual) + require.NoError(t, err) + + assert.Equal(t, ` +Name: test + +%ifnarch noarch +%files modules-extra-matched + +# ANCHOR +%endif + +%changelog `, actual.String()) }) }