Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions docs/developer/reference/component-identity.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Component Identity & Change Detection

The `component identity` and `component diff-identity` subcommands compute deterministic fingerprints of component build inputs. For example, CI can compute fingerprints for the base and head commits of a PR, then diff them to determine exactly which components have changed and need to be rebuilt/tested.

```bash
# Typical CI workflow
git checkout $BASE_REF && azldev component identity -a -O json > base.json
git checkout $HEAD_REF && azldev component identity -a -O json > head.json
azldev component diff-identity base.json head.json -O json -c
# → {"changed": ["curl"], "added": ["wget"], "removed": [], "unchanged": []}
```

## Fingerprint Inputs

A component's fingerprint is a SHA256 combining:

1. **Config hash** — `hashstructure.Hash()` of the resolved `ComponentConfig` (after all merging). Fields tagged `fingerprint:"-"` are excluded.
2. **Source identity** — content hash for local specs (all files in the spec directory), commit hash for upstream.
3. **Overlay file hashes** — SHA256 of each file referenced by overlay `Source` fields.
4. **Distro name + version**
5. **Affects commit count** — number of `Affects: <component>` commits in the project repo.

Global change propagation works automatically: the fingerprint operates on the fully-merged config, so a change to a distro or group default changes the resolved config of every inheriting component.

## `fingerprint:"-"` Tag System

The `hashstructure` library uses `TagName: "fingerprint"`. Untagged fields are **included by default** (safe default: false positive > false negative).

A guard test (`TestAllFingerprintedFieldsHaveDecision`) reflects over all fingerprinted structs and maintains a bi-directional allowlist of exclusions. It fails if a `fingerprint:"-"` tag is added without registering it, or if a registered exclusion's tag is removed.

### Adding a New Config Field

1. Add the field to the struct in `internal/projectconfig/`.
2. **If NOT a build input**: add `fingerprint:"-"` to the struct tag and register it in `expectedExclusions` in `internal/projectconfig/fingerprint_test.go`.
3. **If a build input**: do nothing — included by default.
4. Run `mage unit`.

### Adding a New Source Type

1. Implement `SourceIdentityProvider` on your provider (see `ResolveLocalSourceIdentity` in `localidentity.go` for a simple example).
2. Add a case to `sourceManager.ResolveSourceIdentity()` in `sourcemanager.go`.
3. Add tests in `identityprovider_test.go`.

## CLI

### `azldev component identity`

Compute fingerprints. Uses standard component filter flags (`-a`, `-p`, `-g`, `-s`). Exposed as an MCP tool.

### `azldev component diff-identity`

Compare two identity JSON files. The `--changed-only` / `-c` flag filters to only changed and added components (the build queue). Applies to both table and JSON output.

## Known Limitations

- It is difficult to determine WHY a diff occurred (e.g., which specific field changed) since the fingerprint is a single opaque hash. The JSON output includes an `inputs` breakdown (`configHash`, `sourceIdentity`, `overlayFileHashes`, etc.) that can help narrow it down by comparing the two identity files manually.
1 change: 1 addition & 0 deletions docs/user/reference/cli/azldev_component.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

54 changes: 54 additions & 0 deletions docs/user/reference/cli/azldev_component_diff-identity.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions internal/app/azldev/cmds/component/component.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ components defined in the project configuration.`,
app.AddTopLevelCommand(cmd)
addOnAppInit(app, cmd)
buildOnAppInit(app, cmd)
diffIdentityOnAppInit(app, cmd)
diffSourcesOnAppInit(app, cmd)
listOnAppInit(app, cmd)
prepareOnAppInit(app, cmd)
Expand Down
197 changes: 197 additions & 0 deletions internal/app/azldev/cmds/component/diffidentity.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

package component

import (
"encoding/json"
"fmt"
"sort"

"github.com/microsoft/azure-linux-dev-tools/internal/app/azldev"
"github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils"
"github.com/spf13/cobra"
)

func diffIdentityOnAppInit(_ *azldev.App, parentCmd *cobra.Command) {
parentCmd.AddCommand(NewDiffIdentityCommand())
}

// diffIdentityArgCount is the number of positional arguments required by the diff-identity command.
const diffIdentityArgCount = 2

// NewDiffIdentityCommand constructs a [cobra.Command] for "component diff-identity".
func NewDiffIdentityCommand() *cobra.Command {
var options struct {
ChangedOnly bool
}

cmd := &cobra.Command{
Use: "diff-identity <base.json> <head.json>",
Short: "Compare two identity files and report changed components",
Long: `Compare two component identity JSON files (produced by 'component identity -a -O json')
and report which components have changed, been added, or been removed.

CI uses the 'changed' and 'added' lists to determine the build queue.`,
Example: ` # Compare base and head identity files
azldev component diff-identity base-identity.json head-identity.json

# JSON output for CI
azldev component diff-identity base.json head.json -O json`,
Args: cobra.ExactArgs(diffIdentityArgCount),
RunE: azldev.RunFuncWithoutRequiredConfigWithExtraArgs(
func(env *azldev.Env, args []string) (interface{}, error) {
return DiffIdentities(env, args[0], args[1], options.ChangedOnly)
},
),
}

cmd.Flags().BoolVarP(&options.ChangedOnly, "changed-only", "c", false,
"Only show changed and added components (the build queue)")

return cmd
}

// IdentityDiffStatus represents the change status of a component.
type IdentityDiffStatus string

const (
// IdentityDiffChanged indicates the component's fingerprint changed.
IdentityDiffChanged IdentityDiffStatus = "changed"
// IdentityDiffAdded indicates the component is new in the head.
IdentityDiffAdded IdentityDiffStatus = "added"
// IdentityDiffRemoved indicates the component was removed in the head.
IdentityDiffRemoved IdentityDiffStatus = "removed"
// IdentityDiffUnchanged indicates the component's fingerprint is identical.
IdentityDiffUnchanged IdentityDiffStatus = "unchanged"
)

// IdentityDiffResult is the per-component row in table output.
type IdentityDiffResult struct {
Component string `json:"component" table:",sortkey"`
Status IdentityDiffStatus `json:"status"`
}

// IdentityDiffReport is the structured output for JSON format.
type IdentityDiffReport struct {
Changed []string `json:"changed"`
Added []string `json:"added"`
Removed []string `json:"removed"`
Unchanged []string `json:"unchanged"`
}

// DiffIdentities reads two identity JSON files and computes the diff.
func DiffIdentities(env *azldev.Env, basePath string, headPath string, changedOnly bool) (interface{}, error) {
baseIdentities, err := readIdentityFile(env, basePath)
if err != nil {
return nil, fmt.Errorf("reading base identity file %#q:\n%w", basePath, err)
}

headIdentities, err := readIdentityFile(env, headPath)
if err != nil {
return nil, fmt.Errorf("reading head identity file %#q:\n%w", headPath, err)
}

report := ComputeDiff(baseIdentities, headIdentities, changedOnly)

// Return table-friendly results for table/CSV format, or the report for JSON.
if env.DefaultReportFormat() == azldev.ReportFormatJSON {
return report, nil
}

return buildTableResults(report), nil
}

// readIdentityFile reads and parses a component identity JSON file into a map of
// component name to fingerprint.
func readIdentityFile(
env *azldev.Env, filePath string,
) (map[string]string, error) {
data, err := fileutils.ReadFile(env.FS(), filePath)
if err != nil {
return nil, fmt.Errorf("reading file:\n%w", err)
}

var entries []ComponentIdentityResult

Check failure on line 115 in internal/app/azldev/cmds/component/diffidentity.go

View workflow job for this annotation

GitHub Actions / Publish prep check

undefined: ComponentIdentityResult

Check failure on line 115 in internal/app/azldev/cmds/component/diffidentity.go

View workflow job for this annotation

GitHub Actions / Analyze

undefined: ComponentIdentityResult

Check failure on line 115 in internal/app/azldev/cmds/component/diffidentity.go

View workflow job for this annotation

GitHub Actions / Unit tests

undefined: ComponentIdentityResult

Check failure on line 115 in internal/app/azldev/cmds/component/diffidentity.go

View workflow job for this annotation

GitHub Actions / Scenario tests (ubuntu-latest)

undefined: ComponentIdentityResult

Check failure on line 115 in internal/app/azldev/cmds/component/diffidentity.go

View workflow job for this annotation

GitHub Actions / Coverage

undefined: ComponentIdentityResult

err = json.Unmarshal(data, &entries)
if err != nil {
return nil, fmt.Errorf("parsing JSON:\n%w", err)
}

result := make(map[string]string, len(entries))
for _, entry := range entries {
result[entry.Component] = entry.Fingerprint
}
Comment on lines +110 to +125
Copy link

Copilot AI Mar 31, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

readIdentityFile unmarshals into []ComponentIdentityResult, but that type is not defined anywhere in the repo (searching the whole tree only finds this reference). This will fail to compile. Define a local struct matching the on-disk JSON schema (e.g., fields for component and fingerprint) or import/rename to the actual identity entry type used by component identity output.

Copilot uses AI. Check for mistakes.

return result, nil
}

// ComputeDiff compares base and head identity maps and produces a diff report.
// When changedOnly is true, the Removed and Unchanged lists are left empty.
func ComputeDiff(base map[string]string, head map[string]string, changedOnly bool) *IdentityDiffReport {
// Initialize all slices so JSON serialization produces [] instead of null.
report := &IdentityDiffReport{
Changed: make([]string, 0),
Added: make([]string, 0),
Removed: make([]string, 0),
Unchanged: make([]string, 0),
}

// Check base components against head.
for name, baseFP := range base {
headFP, exists := head[name]

switch {
case !exists:
if !changedOnly {
report.Removed = append(report.Removed, name)
}
case baseFP != headFP:
report.Changed = append(report.Changed, name)
default:
if !changedOnly {
report.Unchanged = append(report.Unchanged, name)
}
}
}

// Check for new components in head.
for name := range head {
if _, exists := base[name]; !exists {
report.Added = append(report.Added, name)
}
}

// Sort all lists for deterministic output.
sort.Strings(report.Changed)
sort.Strings(report.Added)
sort.Strings(report.Removed)
sort.Strings(report.Unchanged)

return report
}

// buildTableResults converts the diff report into a slice for table output.
func buildTableResults(report *IdentityDiffReport) []IdentityDiffResult {
results := make([]IdentityDiffResult, 0,
len(report.Changed)+len(report.Added)+len(report.Removed)+len(report.Unchanged))

for _, name := range report.Changed {
results = append(results, IdentityDiffResult{Component: name, Status: IdentityDiffChanged})
}

for _, name := range report.Added {
results = append(results, IdentityDiffResult{Component: name, Status: IdentityDiffAdded})
}

for _, name := range report.Removed {
results = append(results, IdentityDiffResult{Component: name, Status: IdentityDiffRemoved})
}

for _, name := range report.Unchanged {
results = append(results, IdentityDiffResult{Component: name, Status: IdentityDiffUnchanged})
}

return results
}
Loading
Loading