From 1b0642422cf645a8acb13888bfa1214fcc68aace Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Mon, 30 Mar 2026 14:24:33 -0700 Subject: [PATCH 01/12] refactor(env): Add a WithCancel option to the Env context --- internal/app/azldev/env.go | 14 ++++++++++++++ internal/app/azldev/env_test.go | 17 +++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/internal/app/azldev/env.go b/internal/app/azldev/env.go index 7305a88..d947384 100644 --- a/internal/app/azldev/env.go +++ b/internal/app/azldev/env.go @@ -211,6 +211,20 @@ func (env *Env) Context() context.Context { return env.ctx } +// WithCancel returns a shallow copy of the [Env] with a child [context.Context] +// derived from [context.WithCancel]. The returned [Env] shares all features +// (FS, config, event listener, cmd factory, etc.) with the original but has an +// independently cancellable context. The caller must call the returned +// [context.CancelFunc] when done. Useful when performing parallel operations +// that benefit from early cancellation on error. +func (env *Env) WithCancel() (*Env, context.CancelFunc) { + childCtx, cancel := context.WithCancel(env.ctx) + childEnv := *env + childEnv.ctx = childCtx + + return &childEnv, cancel +} + // ConfirmAutoResolution prompts the user to confirm auto-resolution of a problem. The provided // text is displayed to the user as explanation. func (env *Env) ConfirmAutoResolution(text string) bool { diff --git a/internal/app/azldev/env_test.go b/internal/app/azldev/env_test.go index 2314e66..d7ace41 100644 --- a/internal/app/azldev/env_test.go +++ b/internal/app/azldev/env_test.go @@ -115,3 +115,20 @@ func TestEnvConstructionTime(t *testing.T) { // Make sure it appears to be a valid time that's before or at *now*. assert.LessOrEqual(t, testEnv.Env.ConstructionTime(), time.Now()) } + +func TestEnvWithCancel(t *testing.T) { + testEnv := testutils.NewTestEnv(t) + original := testEnv.Env + + child, cancel := original.WithCancel() + defer cancel() + + // Child should share config and project dir with original. + assert.Equal(t, original.ProjectDir(), child.ProjectDir()) + assert.Equal(t, original.Config(), child.Config()) + + // Cancelling the child should not cancel the original. + cancel() + require.Error(t, child.Err()) + assert.NoError(t, original.Err()) +} From 05270b098e441c052ebaafde709c03b1fb0c4ddd Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Mon, 30 Mar 2026 15:20:56 -0700 Subject: [PATCH 02/12] feat(git): Add GetCurrentCommit function and WithMetadataOnly option. --- internal/utils/git/git.go | 79 ++++++++++++++++++++---- internal/utils/git/git_test.go | 68 ++++++++++++++++++++ internal/utils/git/git_test/git_mocks.go | 15 +++++ 3 files changed, 149 insertions(+), 13 deletions(-) diff --git a/internal/utils/git/git.go b/internal/utils/git/git.go index 82fdcb8..c826929 100644 --- a/internal/utils/git/git.go +++ b/internal/utils/git/git.go @@ -24,6 +24,8 @@ type GitProvider interface { Checkout(ctx context.Context, repoDir string, commitHash string) error // GetCommitHashBeforeDate returns the commit hash at or before the specified date in the repository. GetCommitHashBeforeDate(ctx context.Context, repoDir string, dateTime time.Time) (string, error) + // GetCurrentCommit returns the current commit hash of the repository at the given directory, regardless of the date. + GetCurrentCommit(ctx context.Context, repoDir string) (string, error) } type GitProviderImpl struct { @@ -33,7 +35,20 @@ type GitProviderImpl struct { var _ GitProvider = (*GitProviderImpl)(nil) -type GitOptions func() []string +// GitOptions is a functional option that configures a clone operation. +// Options may add CLI arguments and/or request post-clone actions. +type GitOptions func(opts *cloneOptions) + +// cloneOptions holds the resolved configuration for a clone operation, +// including any post-clone actions. +type cloneOptions struct { + // args are the CLI arguments to pass to 'git clone'. + args []string + // quiet suppresses event emission during the clone. Use this for + // internal clones (e.g., identity resolution) that run concurrently + // and would otherwise produce misleading nested output. + quiet bool +} func NewGitProviderImpl(eventListener opctx.EventListener, cmdFactory opctx.CmdFactory) (*GitProviderImpl, error) { if eventListener == nil { @@ -64,13 +79,10 @@ func (g *GitProviderImpl) Clone(ctx context.Context, repoURL, destDir string, op return errors.New("destination directory cannot be empty") } - args := []string{"clone"} - - // Add options before URL and destination - for _, opt := range options { - args = append(args, opt()...) - } + // Resolve options into args and post-clone actions. + resolved := resolveCloneOptions(options) + args := append([]string{"clone"}, resolved.args...) args = append(args, repoURL, destDir) cmd := exec.CommandContext(ctx, "git", args...) @@ -80,9 +92,10 @@ func (g *GitProviderImpl) Clone(ctx context.Context, repoURL, destDir string, op return fmt.Errorf("failed to create git command:\n%w", err) } - event := g.eventListener.StartEvent("Cloning git repo", "repoURL", repoURL) - - defer event.End() + if !resolved.quiet { + event := g.eventListener.StartEvent("Cloning git repo", "repoURL", repoURL) + defer event.End() + } err = wrappedCmd.Run(ctx) if err != nil { @@ -163,9 +176,49 @@ func (g *GitProviderImpl) GetCommitHashBeforeDate( return output, nil } -// WithGitBranch returns a GitOptions that specifies the branch to clone. +// GetCurrentCommit returns the current commit hash of the repository at the given directory, regardless of the date. +func (g *GitProviderImpl) GetCurrentCommit(ctx context.Context, repoDir string) (string, error) { + // Pass zero time to get the current commit + return g.GetCommitHashBeforeDate(ctx, repoDir, time.Time{}) +} + +// resolveCloneOptions collects all [GitOptions] into a [cloneOptions] struct. +func resolveCloneOptions(options []GitOptions) cloneOptions { + var resolved cloneOptions + + for _, opt := range options { + if opt == nil { + continue + } + + opt(&resolved) + } + + return resolved +} + +// WithGitBranch returns a [GitOptions] that specifies the branch to clone. func WithGitBranch(branch string) GitOptions { - return func() []string { - return []string{"--branch", branch} + return func(opts *cloneOptions) { + opts.args = append(opts.args, "--branch", branch) + } +} + +// WithQuiet returns a [GitOptions] that suppresses event emission during +// the clone. Use this for internal operations (e.g., identity resolution) +// that run concurrently and would produce misleading nested log output. +func WithQuiet() GitOptions { + return func(opts *cloneOptions) { + opts.quiet = true + } +} + +// WithMetadataOnly returns a [GitOptions] that performs a blobless partial clone +// (--filter=blob:none --no-checkout). Only git metadata is fetched; no working-tree +// files are checked out. +func WithMetadataOnly() GitOptions { + return func(opts *cloneOptions) { + opts.args = append(opts.args, "--filter=blob:none") + opts.args = append(opts.args, "--no-checkout") } } diff --git a/internal/utils/git/git_test.go b/internal/utils/git/git_test.go index 01ebe8b..7620dfe 100644 --- a/internal/utils/git/git_test.go +++ b/internal/utils/git/git_test.go @@ -161,3 +161,71 @@ func TestCloneNonExistentRepo(t *testing.T) { require.Error(t, err) assert.Contains(t, err.Error(), errMsgCloneFailed) } + +func TestGetCurrentCommit(t *testing.T) { + ctrl := gomock.NewController(t) + + cmdFactory, err := externalcmd.NewCmdFactory( + opctx_test.NewNoOpMockDryRunnable(ctrl), + opctx_test.NewNoOpMockEventListener(ctrl), + ) + require.NoError(t, err) + + provider, err := git.NewGitProviderImpl(opctx_test.NewNoOpMockEventListener(ctrl), cmdFactory) + require.NoError(t, err) + + destDir := filepath.Join(t.TempDir(), testRepoSubDir) + + err = provider.Clone(context.Background(), testRepoURL, destDir) + require.NoError(t, err) + + commitHash, err := provider.GetCurrentCommit(context.Background(), destDir) + require.NoError(t, err) + + // A full SHA-1 hash is 40 hex characters. + assert.Len(t, commitHash, 40) + assert.Regexp(t, `^[0-9a-f]{40}$`, commitHash) +} + +func TestGetCurrentCommitEmptyRepoDir(t *testing.T) { + ctrl := gomock.NewController(t) + + provider, err := git.NewGitProviderImpl( + opctx_test.NewMockEventListener(ctrl), + opctx_test.NewMockCmdFactory(ctrl), + ) + require.NoError(t, err) + + _, err = provider.GetCurrentCommit(context.Background(), "") + require.Error(t, err) + assert.Contains(t, err.Error(), "repository directory cannot be empty") +} + +func TestCloneWithMetadataOnly(t *testing.T) { + ctrl := gomock.NewController(t) + + cmdFactory, err := externalcmd.NewCmdFactory( + opctx_test.NewNoOpMockDryRunnable(ctrl), + opctx_test.NewNoOpMockEventListener(ctrl), + ) + require.NoError(t, err) + + provider, err := git.NewGitProviderImpl(opctx_test.NewNoOpMockEventListener(ctrl), cmdFactory) + require.NoError(t, err) + + destDir := filepath.Join(t.TempDir(), testRepoSubDir) + + err = provider.Clone( + context.Background(), + testRepoURL, + destDir, + git.WithMetadataOnly(), + ) + + require.NoError(t, err) + + // Git metadata should exist. + assert.DirExists(t, filepath.Join(destDir, testGitDir)) + // --no-checkout means the working tree file should NOT be present. + assert.NoFileExists(t, filepath.Join(destDir, testRepoReadmeFile)) +} diff --git a/internal/utils/git/git_test/git_mocks.go b/internal/utils/git/git_test/git_mocks.go index af98047..008a1a5 100644 --- a/internal/utils/git/git_test/git_mocks.go +++ b/internal/utils/git/git_test/git_mocks.go @@ -93,3 +93,18 @@ func (mr *MockGitProviderMockRecorder) GetCommitHashBeforeDate(ctx, repoDir, dat mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetCommitHashBeforeDate", reflect.TypeOf((*MockGitProvider)(nil).GetCommitHashBeforeDate), ctx, repoDir, dateTime) } + +// GetCurrentCommit mocks base method. +func (m *MockGitProvider) GetCurrentCommit(ctx context.Context, repoDir string) (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetCurrentCommit", ctx, repoDir) + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetCurrentCommit indicates an expected call of GetCurrentCommit. +func (mr *MockGitProviderMockRecorder) GetCurrentCommit(ctx, repoDir any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetCurrentCommit", reflect.TypeOf((*MockGitProvider)(nil).GetCurrentCommit), ctx, repoDir) +} From 695fb2451a88b19df40f4132111d9ff5109acabe Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Mon, 30 Mar 2026 15:31:57 -0700 Subject: [PATCH 03/12] feat(Provider): Add ResolveSourceIdentity() to the source provider interface --- .../sourceproviders/fedorasourceprovider.go | 90 ++++++ .../sourceproviders/identityprovider_test.go | 289 ++++++++++++++++++ .../sourceproviders/localidentity.go | 74 +++++ .../sourceproviders/rpmcontentsprovider.go | 28 ++ .../sourceproviders/sourcemanager.go | 69 ++++- .../sourcemanager_mocks.go | 15 + 6 files changed, 564 insertions(+), 1 deletion(-) create mode 100644 internal/providers/sourceproviders/identityprovider_test.go create mode 100644 internal/providers/sourceproviders/localidentity.go diff --git a/internal/providers/sourceproviders/fedorasourceprovider.go b/internal/providers/sourceproviders/fedorasourceprovider.go index d86117f..36d7da6 100644 --- a/internal/providers/sourceproviders/fedorasourceprovider.go +++ b/internal/providers/sourceproviders/fedorasourceprovider.go @@ -272,3 +272,93 @@ func (g *FedoraSourcesProviderImpl) checkoutTargetCommit( return nil } + +// ResolveSourceIdentity implements [SourceIdentityProvider] by resolving the upstream +// commit hash for the component. Resolution priority matches [checkoutTargetCommit]: +// 1. Explicit upstream commit hash (pinned per-component) — returned directly. +// 2. Snapshot time — shallow clone + rev-list to find the commit at the snapshot date. +// 3. Default — query HEAD of the dist-git branch via ls-remote. +func (g *FedoraSourcesProviderImpl) ResolveSourceIdentity( + ctx context.Context, + component components.Component, +) (string, error) { + // Case 1: Explicit upstream commit hash — no network call needed. + if pinnedCommit := component.GetConfig().Spec.UpstreamCommit; pinnedCommit != "" { + slog.Debug("Using pinned upstream commit for identity", + "component", component.GetName(), + "commit", pinnedCommit) + + return pinnedCommit, nil + } + + // Case 2: Need to resolve the commit for the snapshot time or current HEAD + upstreamName := component.GetConfig().Spec.UpstreamName + if upstreamName == "" { + upstreamName = component.GetName() + } + + gitRepoURL := strings.ReplaceAll(g.distroGitBaseURI, "$pkg", upstreamName) + + return g.resolveCommit(ctx, gitRepoURL, upstreamName) +} + +// resolveCommit clones the branch and determines the effective commit, either +// at the snapshot time, or at the latest commit if no snapshot time is configured. +func (g *FedoraSourcesProviderImpl) resolveCommit( + ctx context.Context, gitRepoURL string, upstreamName string, +) (string, error) { + tempDir, err := fileutils.MkdirTempInTempDir(g.fs, "azldev-identity-snapshot-") + if err != nil { + return "", fmt.Errorf("creating temp directory for snapshot clone:\n%w", err) + } + + defer func() { + if removeErr := g.fs.RemoveAll(tempDir); removeErr != nil { + slog.Debug("Failed to clean up snapshot clone temp directory", + "path", tempDir, "error", removeErr) + } + }() + + // Clone a single branch to resolve the snapshot commit. We use a full + // (non-shallow) clone because not all git servers support --shallow-since + // (e.g., Pagure returns "the remote end hung up unexpectedly"). + err = retry.Do(ctx, g.retryConfig, func() error { + _ = g.fs.RemoveAll(tempDir) + _ = fileutils.MkdirAll(g.fs, tempDir) + + return g.gitProvider.Clone(ctx, gitRepoURL, tempDir, + git.WithGitBranch(g.distroGitBranch), + git.WithMetadataOnly(), + git.WithQuiet(), + ) + }) + if err != nil { + return "", fmt.Errorf("partial clone for identity of %#q:\n%w", upstreamName, err) + } + + var commitHash string + if g.snapshotTime != "" { + snapshotDateTime, parseErr := time.Parse(time.RFC3339, g.snapshotTime) + if parseErr != nil { + return "", fmt.Errorf("invalid snapshot time %#q:\n%w", g.snapshotTime, parseErr) + } + + commitHash, err = g.gitProvider.GetCommitHashBeforeDate(ctx, tempDir, snapshotDateTime) + if err != nil { + return "", fmt.Errorf("resolving snapshot commit for %#q at %s:\n%w", + upstreamName, snapshotDateTime.Format(time.RFC3339), err) + } + } else { + commitHash, err = g.gitProvider.GetCurrentCommit(ctx, tempDir) + if err != nil { + return "", fmt.Errorf("resolving current commit for %#q:\n%w", upstreamName, err) + } + } + + slog.Debug("Resolved snapshot commit for identity", + "component", upstreamName, + "snapshot", g.snapshotTime, + "commit", commitHash) + + return commitHash, nil +} diff --git a/internal/providers/sourceproviders/identityprovider_test.go b/internal/providers/sourceproviders/identityprovider_test.go new file mode 100644 index 0000000..2b3d24c --- /dev/null +++ b/internal/providers/sourceproviders/identityprovider_test.go @@ -0,0 +1,289 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package sourceproviders_test + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "io" + "strings" + "testing" + "time" + + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components/components_testutils" + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/microsoft/azure-linux-dev-tools/internal/providers/rpmprovider/rpmprovider_test" + "github.com/microsoft/azure-linux-dev-tools/internal/providers/sourceproviders" + "github.com/microsoft/azure-linux-dev-tools/internal/rpm/rpm_test" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/git/git_test" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/retry" + "github.com/spf13/afero" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/mock/gomock" +) + +// --- ResolveLocalSourceIdentity tests --- + +func TestResolveLocalSourceIdentity_EmptyDir(t *testing.T) { + identity, err := sourceproviders.ResolveLocalSourceIdentity(afero.NewMemMapFs(), "") + require.NoError(t, err) + assert.Empty(t, identity) +} + +func TestResolveLocalSourceIdentity_Deterministic(t *testing.T) { + filesystem := afero.NewMemMapFs() + require.NoError(t, fileutils.WriteFile(filesystem, "/specs/test.spec", + []byte("Name: test\nVersion: 1.0"), fileperms.PublicFile)) + + identity1, err := sourceproviders.ResolveLocalSourceIdentity(filesystem, "/specs") + require.NoError(t, err) + + identity2, err := sourceproviders.ResolveLocalSourceIdentity(filesystem, "/specs") + require.NoError(t, err) + + assert.Equal(t, identity1, identity2) + assert.NotEmpty(t, identity1) + assert.Contains(t, identity1, "sha256:", "identity should have sha256: prefix") +} + +func TestResolveLocalSourceIdentity_ContentChange(t *testing.T) { + fs1 := afero.NewMemMapFs() + require.NoError(t, fileutils.WriteFile(fs1, "/specs/test.spec", []byte("Version: 1.0"), fileperms.PublicFile)) + + fs2 := afero.NewMemMapFs() + require.NoError(t, fileutils.WriteFile(fs2, "/specs/test.spec", []byte("Version: 2.0"), fileperms.PublicFile)) + + identity1, err := sourceproviders.ResolveLocalSourceIdentity(fs1, "/specs") + require.NoError(t, err) + + identity2, err := sourceproviders.ResolveLocalSourceIdentity(fs2, "/specs") + require.NoError(t, err) + + assert.NotEqual(t, identity1, identity2) +} + +func TestResolveLocalSourceIdentity_SidecarFileChangesIdentity(t *testing.T) { + fsSpecOnly := afero.NewMemMapFs() + require.NoError(t, fileutils.WriteFile(fsSpecOnly, "/specs/test.spec", []byte("spec"), fileperms.PublicFile)) + + fsWithPatch := afero.NewMemMapFs() + require.NoError(t, fileutils.WriteFile(fsWithPatch, "/specs/test.spec", []byte("spec"), fileperms.PublicFile)) + require.NoError(t, fileutils.WriteFile(fsWithPatch, "/specs/fix.patch", []byte("patch"), fileperms.PublicFile)) + + identity1, err := sourceproviders.ResolveLocalSourceIdentity(fsSpecOnly, "/specs") + require.NoError(t, err) + + identity2, err := sourceproviders.ResolveLocalSourceIdentity(fsWithPatch, "/specs") + require.NoError(t, err) + + assert.NotEqual(t, identity1, identity2, "adding a sidecar file must change identity") +} + +// --- FedoraSourcesProviderImpl.ResolveSourceIdentity tests --- + +func TestFedoraProvider_ResolveSourceIdentity(t *testing.T) { + ctrl := gomock.NewController(t) + mockGitProvider := git_test.NewMockGitProvider(ctrl) + + provider, err := sourceproviders.NewFedoraSourcesProviderImpl( + afero.NewMemMapFs(), + newNoOpDryRunnable(), + mockGitProvider, + newNoOpDownloader(), + testResolvedDistro(), + retry.Disabled(), + ) + require.NoError(t, err) + + t.Run("resolves commit via clone", func(t *testing.T) { + expectedCommit := "abc123def456" + + // Expect: metadata-only clone, then GetCurrentCommit. + mockGitProvider.EXPECT(). + Clone(gomock.Any(), repoURL, gomock.Any(), gomock.Any()). + Return(nil) + mockGitProvider.EXPECT(). + GetCurrentCommit(gomock.Any(), gomock.Any()). + Return(expectedCommit, nil) + + comp := newMockComp(ctrl, testPackageName) + identity, resolveErr := provider.ResolveSourceIdentity(t.Context(), comp) + require.NoError(t, resolveErr) + assert.Equal(t, expectedCommit, identity) + }) + + t.Run("returns error on clone failure", func(t *testing.T) { + mockGitProvider.EXPECT(). + Clone(gomock.Any(), repoURL, gomock.Any(), gomock.Any()). + Return(errors.New("network error")) + + comp := newMockComp(ctrl, testPackageName) + _, resolveErr := provider.ResolveSourceIdentity(t.Context(), comp) + require.Error(t, resolveErr) + assert.Contains(t, resolveErr.Error(), testPackageName) + }) + + t.Run("returns pinned commit without network call", func(t *testing.T) { + pinnedCommit := "deadbeef12345678" + comp := newMockCompWithConfig(ctrl, testPackageName, &projectconfig.ComponentConfig{ + Name: testPackageName, + Spec: projectconfig.SpecSource{ + SourceType: projectconfig.SpecSourceTypeUpstream, + UpstreamCommit: pinnedCommit, + }, + }) + + // No LsRemoteHead expectation — the pinned commit should be returned directly. + identity, resolveErr := provider.ResolveSourceIdentity(t.Context(), comp) + require.NoError(t, resolveErr) + assert.Equal(t, pinnedCommit, identity) + }) +} + +func TestFedoraProvider_ResolveSourceIdentity_Snapshot(t *testing.T) { + ctrl := gomock.NewController(t) + mockGitProvider := git_test.NewMockGitProvider(ctrl) + + snapshotTimeStr := "2025-06-15T00:00:00Z" + snapshotTime, _ := time.Parse(time.RFC3339, snapshotTimeStr) + + provider, err := sourceproviders.NewFedoraSourcesProviderImpl( + afero.NewMemMapFs(), + newNoOpDryRunnable(), + mockGitProvider, + newNoOpDownloader(), + testResolvedDistroWithSnapshot(snapshotTimeStr), + retry.Disabled(), + ) + require.NoError(t, err) + + t.Run("resolves commit via clone for snapshot", func(t *testing.T) { + expectedCommit := "snapshot123abc" + + // Expect: full single-branch clone, then rev-list --before. + mockGitProvider.EXPECT(). + Clone(gomock.Any(), repoURL, gomock.Any(), + gomock.Any()). // branch option + Return(nil) + mockGitProvider.EXPECT(). + GetCommitHashBeforeDate(gomock.Any(), gomock.Any(), snapshotTime). + Return(expectedCommit, nil) + + comp := newMockComp(ctrl, testPackageName) + identity, resolveErr := provider.ResolveSourceIdentity(t.Context(), comp) + require.NoError(t, resolveErr) + assert.Equal(t, expectedCommit, identity) + }) + + t.Run("pinned commit takes priority over snapshot", func(t *testing.T) { + pinnedCommit := "pinned999" + comp := newMockCompWithConfig(ctrl, testPackageName, &projectconfig.ComponentConfig{ + Name: testPackageName, + Spec: projectconfig.SpecSource{ + SourceType: projectconfig.SpecSourceTypeUpstream, + UpstreamCommit: pinnedCommit, + }, + }) + + // No Clone/Deepen/GetCommitHashBeforeDate expectations — pinned commit is returned directly. + identity, resolveErr := provider.ResolveSourceIdentity(t.Context(), comp) + require.NoError(t, resolveErr) + assert.Equal(t, pinnedCommit, identity) + }) +} + +// --- RPMContentsProviderImpl.ResolveSourceIdentity tests --- + +func TestRPMProvider_ResolveSourceIdentity(t *testing.T) { + ctrl := gomock.NewController(t) + + t.Run("hashes downloaded RPM", func(t *testing.T) { + rpmContent := "test-rpm-file-content" + mockRPMProvider := rpmprovider_test.NewMockRPMProvider(ctrl) + mockRPMProvider.EXPECT(). + GetRPM(gomock.Any(), "test-pkg", nil). + Return(io.NopCloser(strings.NewReader(rpmContent)), nil) + + provider, provErr := sourceproviders.NewRPMContentsProviderImpl( + rpm_test.NewMockRPMExtractor(ctrl), mockRPMProvider) + require.NoError(t, provErr) + + comp := newMockComp(ctrl, "test-pkg") + identity, resolveErr := provider.ResolveSourceIdentity(t.Context(), comp) + require.NoError(t, resolveErr) + assert.Equal(t, "sha256:"+sha256Hex(rpmContent), identity) + }) + + t.Run("returns error on RPM download failure", func(t *testing.T) { + mockRPMProvider := rpmprovider_test.NewMockRPMProvider(ctrl) + mockRPMProvider.EXPECT(). + GetRPM(gomock.Any(), "test-pkg", nil). + Return(nil, errors.New("download failed")) + + provider, provErr := sourceproviders.NewRPMContentsProviderImpl( + rpm_test.NewMockRPMExtractor(ctrl), mockRPMProvider) + require.NoError(t, provErr) + + comp := newMockComp(ctrl, "test-pkg") + _, resolveErr := provider.ResolveSourceIdentity(t.Context(), comp) + require.Error(t, resolveErr) + assert.Contains(t, resolveErr.Error(), "test-pkg") + }) +} + +// --- Helpers --- + +// newMockComp creates a mock component with the given name and an empty upstream config. +func newMockComp(ctrl *gomock.Controller, name string) *components_testutils.MockComponent { + return newMockCompWithConfig(ctrl, name, &projectconfig.ComponentConfig{ + Name: name, + Spec: projectconfig.SpecSource{}, + }) +} + +// newMockCompWithConfig creates a mock component with the given name and a custom config. +func newMockCompWithConfig( + ctrl *gomock.Controller, name string, config *projectconfig.ComponentConfig, +) *components_testutils.MockComponent { + comp := components_testutils.NewMockComponent(ctrl) + comp.EXPECT().GetName().AnyTimes().Return(name) + comp.EXPECT().GetConfig().AnyTimes().Return(config) + + return comp +} + +func sha256Hex(content string) string { + hasher := sha256.New() + fmt.Fprint(hasher, content) + + return hex.EncodeToString(hasher.Sum(nil)) +} + +// newNoOpDryRunnable returns a mock that reports dry-run as false. +func newNoOpDryRunnable() *opctxNoOpDryRunnable { + return &opctxNoOpDryRunnable{} +} + +type opctxNoOpDryRunnable struct{} + +func (d *opctxNoOpDryRunnable) DryRun() bool { return false } + +// newNoOpDownloader returns a stub FedoraSourceDownloader that does nothing. +func newNoOpDownloader() *noOpDownloader { + return &noOpDownloader{} +} + +type noOpDownloader struct{} + +func (d *noOpDownloader) ExtractSourcesFromRepo( + _ context.Context, _, _, _ string, _ []string, +) error { + return nil +} diff --git a/internal/providers/sourceproviders/localidentity.go b/internal/providers/sourceproviders/localidentity.go new file mode 100644 index 0000000..2b9f4ec --- /dev/null +++ b/internal/providers/sourceproviders/localidentity.go @@ -0,0 +1,74 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package sourceproviders + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "io/fs" + "path/filepath" + "sort" + + "github.com/microsoft/azure-linux-dev-tools/internal/global/opctx" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" + "github.com/spf13/afero" +) + +// ResolveLocalSourceIdentity computes a SHA256 hash over all files in the given +// spec directory (spec file + sidecar files like patches and scripts). +// Files are sorted by path for determinism. Returns an empty string if specDir +// is empty or contains no files. +func ResolveLocalSourceIdentity(filesystem opctx.FS, specDir string) (string, error) { + if specDir == "" { + return "", nil + } + + // Collect all files in the spec directory. + var filePaths []string + + err := afero.Walk(filesystem, specDir, + func(path string, info fs.FileInfo, walkErr error) error { + if walkErr != nil { + return walkErr + } + + if !info.IsDir() { + filePaths = append(filePaths, path) + } + + return nil + }) + if err != nil { + return "", fmt.Errorf("walking spec directory %#q:\n%w", specDir, err) + } + + if len(filePaths) == 0 { + return "", nil + } + + // Sort for determinism across runs. + sort.Strings(filePaths) + + // Hash each file and combine into a single digest. + combinedHasher := sha256.New() + + for _, filePath := range filePaths { + fileHash, hashErr := fileutils.ComputeFileHash( + filesystem, fileutils.HashTypeSHA256, filePath, + ) + if hashErr != nil { + return "", fmt.Errorf("hashing file %#q:\n%w", filePath, hashErr) + } + + relPath, relErr := filepath.Rel(specDir, filePath) + if relErr != nil { + return "", fmt.Errorf("computing relative path for %#q:\n%w", filePath, relErr) + } + + fmt.Fprintf(combinedHasher, "%s=%s\n", relPath, fileHash) + } + + return "sha256:" + hex.EncodeToString(combinedHasher.Sum(nil)), nil +} diff --git a/internal/providers/sourceproviders/rpmcontentsprovider.go b/internal/providers/sourceproviders/rpmcontentsprovider.go index a02525c..d66658e 100644 --- a/internal/providers/sourceproviders/rpmcontentsprovider.go +++ b/internal/providers/sourceproviders/rpmcontentsprovider.go @@ -5,8 +5,11 @@ package sourceproviders import ( "context" + "crypto/sha256" + "encoding/hex" "errors" "fmt" + "io" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components" "github.com/microsoft/azure-linux-dev-tools/internal/providers/rpmprovider" @@ -73,3 +76,28 @@ func (r *RPMContentsProviderImpl) GetComponent( return nil } + +// ResolveSourceIdentity implements [SourceIdentityProvider] by downloading the source RPM +// and computing its SHA256 hash. This is a heavyweight operation since it requires a full +// RPM download. +func (r *RPMContentsProviderImpl) ResolveSourceIdentity( + ctx context.Context, + component components.Component, +) (identity string, err error) { + rpmReader, err := r.rpmProvider.GetRPM(ctx, component.GetName(), nil) + if err != nil { + return "", fmt.Errorf("failed to get RPM for identity of component %#q:\n%w", + component.GetName(), err) + } + + defer defers.HandleDeferError(rpmReader.Close, &err) + + hasher := sha256.New() + + if _, err := io.Copy(hasher, rpmReader); err != nil { + return "", fmt.Errorf("failed to hash RPM for component %#q:\n%w", + component.GetName(), err) + } + + return "sha256:" + hex.EncodeToString(hasher.Sum(nil)), nil +} diff --git a/internal/providers/sourceproviders/sourcemanager.go b/internal/providers/sourceproviders/sourcemanager.go index 51fe503..c738136 100644 --- a/internal/providers/sourceproviders/sourcemanager.go +++ b/internal/providers/sourceproviders/sourcemanager.go @@ -37,6 +37,18 @@ type FileSourceProvider interface { GetFiles(ctx context.Context, fileRefs []projectconfig.SourceFileReference, destDirPath string) error } +// SourceIdentityProvider resolves a reproducible identity string for a component's source. +// The identity changes whenever the source content would change — the exact representation +// depends on the source type (e.g., a commit hash for dist-git, a content hash for local files). +// +// Consumers should treat the returned string as opaque; it is only meaningful for equality +// comparison between two runs. +type SourceIdentityProvider interface { + // ResolveSourceIdentity returns a deterministic identity string for the component's source. + // Returns an error if the identity cannot be determined (e.g., network failure for upstream sources). + ResolveSourceIdentity(ctx context.Context, component components.Component) (string, error) +} + // FetchComponentOptions holds optional parameters for component fetching operations. type FetchComponentOptions struct { // PreserveGitDir, when true, instructs the provider to keep the upstream .git directory @@ -72,9 +84,10 @@ func resolveFetchComponentOptions(opts []FetchComponentOption) FetchComponentOpt } // ComponentSourceProvider is an abstract interface implemented by a source provider that can retrieve the -// full file contents of a given component. +// full file contents of a given component or calculate an identity. type ComponentSourceProvider interface { Provider + SourceIdentityProvider // GetComponent retrieves the `.spec` for the specified component along with any sidecar // files stored along with it, placing the fetched files in the provided directory. @@ -96,6 +109,11 @@ type SourceManager interface { ctx context.Context, component components.Component, destDirPath string, opts ...FetchComponentOption, ) error + + // ResolveSourceIdentity returns a deterministic identity string for the component's source. + // For local components, this is a content hash of the spec directory. + // For upstream components, this is the resolved commit hash from the dist-git provider. + ResolveSourceIdentity(ctx context.Context, component components.Component) (string, error) } // ResolvedDistro holds the fully resolved distro configuration for a component. @@ -443,6 +461,55 @@ func (m *sourceManager) FetchComponent( component.GetName()) } +func (m *sourceManager) ResolveSourceIdentity( + ctx context.Context, component components.Component, +) (string, error) { + if component.GetName() == "" { + return "", errors.New("component name is empty") + } + + sourceType := component.GetConfig().Spec.SourceType + + switch sourceType { + case projectconfig.SpecSourceTypeLocal, projectconfig.SpecSourceTypeUnspecified: + specDir := "" + if component.GetConfig().Spec.Path != "" { + specDir = filepath.Dir(component.GetConfig().Spec.Path) + } + + return ResolveLocalSourceIdentity(m.fs, specDir) + + case projectconfig.SpecSourceTypeUpstream: + return m.resolveUpstreamSourceIdentity(ctx, component) + } + + return "", fmt.Errorf("no identity provider for source type %#q on component %#q", + sourceType, component.GetName()) +} + +func (m *sourceManager) resolveUpstreamSourceIdentity( + ctx context.Context, component components.Component, +) (string, error) { + if len(m.upstreamComponentProviders) == 0 { + return "", fmt.Errorf("no upstream providers configured for component %#q", + component.GetName()) + } + + var lastError error + + for _, provider := range m.upstreamComponentProviders { + identity, err := provider.ResolveSourceIdentity(ctx, component) + if err == nil { + return identity, nil + } + + lastError = err + } + + return "", fmt.Errorf("failed to resolve source identity for upstream component %#q:\n%w", + component.GetName(), lastError) +} + func (m *sourceManager) fetchLocalComponent( ctx context.Context, component components.Component, destDirPath string, ) error { diff --git a/internal/providers/sourceproviders/sourceproviders_test/sourcemanager_mocks.go b/internal/providers/sourceproviders/sourceproviders_test/sourcemanager_mocks.go index db09ac8..6707ba0 100644 --- a/internal/providers/sourceproviders/sourceproviders_test/sourcemanager_mocks.go +++ b/internal/providers/sourceproviders/sourceproviders_test/sourcemanager_mocks.go @@ -74,3 +74,18 @@ func (mr *MockSourceManagerMockRecorder) FetchFiles(ctx, component, destDirPath mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FetchFiles", reflect.TypeOf((*MockSourceManager)(nil).FetchFiles), ctx, component, destDirPath) } + +// ResolveSourceIdentity mocks base method. +func (m *MockSourceManager) ResolveSourceIdentity(ctx context.Context, component components.Component) (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ResolveSourceIdentity", ctx, component) + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ResolveSourceIdentity indicates an expected call of ResolveSourceIdentity. +func (mr *MockSourceManagerMockRecorder) ResolveSourceIdentity(ctx, component any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ResolveSourceIdentity", reflect.TypeOf((*MockSourceManager)(nil).ResolveSourceIdentity), ctx, component) +} From bc000d02054483920b1c250f3bee11a33f1a676f Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Mon, 30 Mar 2026 15:55:21 -0700 Subject: [PATCH 04/12] feat(config): Add fingerprint ignore tags to some config fields --- internal/projectconfig/build.go | 12 +-- internal/projectconfig/component.go | 8 +- internal/projectconfig/distro.go | 2 +- internal/projectconfig/fingerprint_test.go | 113 +++++++++++++++++++++ internal/projectconfig/overlay.go | 2 +- 5 files changed, 125 insertions(+), 12 deletions(-) create mode 100644 internal/projectconfig/fingerprint_test.go diff --git a/internal/projectconfig/build.go b/internal/projectconfig/build.go index 50ee2c7..67c7ffa 100644 --- a/internal/projectconfig/build.go +++ b/internal/projectconfig/build.go @@ -13,7 +13,7 @@ type CheckConfig struct { // Skip indicates whether the %check section should be disabled for this component. Skip bool `toml:"skip,omitempty" json:"skip,omitempty" jsonschema:"title=Skip check,description=Disables the %check section by prepending 'exit 0' when set to true"` // SkipReason provides a required justification when Skip is true. - SkipReason string `toml:"skip_reason,omitempty" json:"skipReason,omitempty" jsonschema:"title=Skip reason,description=Required justification for skipping the %check section"` + SkipReason string `toml:"skip_reason,omitempty" json:"skipReason,omitempty" jsonschema:"title=Skip reason,description=Required justification for skipping the %check section" fingerprint:"-"` } // Validate checks that required fields are set when Skip is true. @@ -43,9 +43,9 @@ type ComponentBuildConfig struct { // Check section configuration. Check CheckConfig `toml:"check,omitempty" json:"check,omitempty" jsonschema:"title=Check configuration,description=Configuration for the %check section"` // Failure configuration and policy for this component's build. - Failure ComponentBuildFailureConfig `toml:"failure,omitempty" json:"failure,omitempty" jsonschema:"title=Build failure configuration,description=Configuration and policy regarding build failures for this component."` + Failure ComponentBuildFailureConfig `toml:"failure,omitempty" json:"failure,omitempty" jsonschema:"title=Build failure configuration,description=Configuration and policy regarding build failures for this component." fingerprint:"-"` // Hints for how or when to build the component; must not be required for correctness of builds. - Hints ComponentBuildHints `toml:"hints,omitempty" json:"hints,omitempty" jsonschema:"title=Build hints,description=Non-essential hints for how or when to build the component."` + Hints ComponentBuildHints `toml:"hints,omitempty" json:"hints,omitempty" jsonschema:"title=Build hints,description=Non-essential hints for how or when to build the component." fingerprint:"-"` } // ComponentBuildFailureConfig encapsulates configuration and policy regarding a component's @@ -53,9 +53,9 @@ type ComponentBuildConfig struct { type ComponentBuildFailureConfig struct { // Expected indicates that this component is expected to fail building. This is intended to be used as a temporary // marker for components that are expected to fail until they can be fixed. - Expected bool `toml:"expected,omitempty" json:"expected,omitempty" jsonschema:"title=Expected failure,description=Indicates that this component is expected to fail building."` + Expected bool `toml:"expected,omitempty" json:"expected,omitempty" jsonschema:"title=Expected failure,description=Indicates that this component is expected to fail building." fingerprint:"-"` // ExpectedReason provides a required justification when Expected is true. - ExpectedReason string `toml:"expected-reason,omitempty" json:"expectedReason,omitempty" jsonschema:"title=Expected failure reason,description=Required justification for why this component is expected to fail building."` + ExpectedReason string `toml:"expected-reason,omitempty" json:"expectedReason,omitempty" jsonschema:"title=Expected failure reason,description=Required justification for why this component is expected to fail building." fingerprint:"-"` } // ComponentBuildHints encapsulates non-essential hints for how or when to build a component. @@ -63,7 +63,7 @@ type ComponentBuildFailureConfig struct { // or optimizations. type ComponentBuildHints struct { // Expensive indicates that building this component is relatively expensive compared to the rest of the distro. - Expensive bool `toml:"expensive,omitempty" json:"expensive,omitempty" jsonschema:"title=Expensive to build,description=Indicates that building this component is expensive and should be carefully considered when scheduling."` + Expensive bool `toml:"expensive,omitempty" json:"expensive,omitempty" jsonschema:"title=Expensive to build,description=Indicates that building this component is expensive and should be carefully considered when scheduling." fingerprint:"-"` } // Validate checks that the build configuration is valid. diff --git a/internal/projectconfig/component.go b/internal/projectconfig/component.go index 6910021..b005282 100644 --- a/internal/projectconfig/component.go +++ b/internal/projectconfig/component.go @@ -49,7 +49,7 @@ type Origin struct { // SourceFileReference encapsulates a reference to a specific source file artifact. type SourceFileReference struct { // Reference to the component to which the source file belongs. - Component ComponentReference `toml:"-" json:"-"` + Component ComponentReference `toml:"-" json:"-" fingerprint:"-"` // Name of the source file; must be non-empty. Filename string `toml:"filename" json:"filename"` @@ -61,7 +61,7 @@ type SourceFileReference struct { HashType fileutils.HashType `toml:"hash-type,omitempty" json:"hashType,omitempty"` // Origin for this source file. When omitted, the file is resolved via the lookaside cache. - Origin Origin `toml:"origin,omitempty" json:"origin,omitempty"` + Origin Origin `toml:"origin,omitempty" json:"origin,omitempty" fingerprint:"-"` } // Defines a component group. Component groups are logical groupings of components (see [ComponentConfig]). @@ -111,11 +111,11 @@ func (g ComponentGroupConfig) WithAbsolutePaths(referenceDir string) ComponentGr // Defines a component. type ComponentConfig struct { // The component's name; not actually present in serialized files. - Name string `toml:"-" json:"name" table:",sortkey"` + Name string `toml:"-" json:"name" table:",sortkey" fingerprint:"-"` // Reference to the source config file that this definition came from; not present // in serialized files. - SourceConfigFile *ConfigFile `toml:"-" json:"-" table:"-"` + SourceConfigFile *ConfigFile `toml:"-" json:"-" table:"-" fingerprint:"-"` // Where to get its spec and adjacent files from. Spec SpecSource `toml:"spec,omitempty" json:"spec,omitempty" jsonschema:"title=Spec,description=Identifies where to find the spec for this component"` diff --git a/internal/projectconfig/distro.go b/internal/projectconfig/distro.go index 5409411..89fe7e2 100644 --- a/internal/projectconfig/distro.go +++ b/internal/projectconfig/distro.go @@ -18,7 +18,7 @@ type DistroReference struct { // Version of the referenced distro. Version string `toml:"version,omitempty" json:"version,omitempty" jsonschema:"title=Version,description=Version of the referenced distro"` // Snapshot date/time for source code if specified components will use source as it existed at this time. - Snapshot string `toml:"snapshot,omitempty" json:"snapshot,omitempty" jsonschema:"format=date-time,title=Snapshot,description=If specified use source code as it existed at this date/time"` + Snapshot string `toml:"snapshot,omitempty" json:"snapshot,omitempty" jsonschema:"format=date-time,title=Snapshot,description=If specified use source code as it existed at this date/time" fingerprint:"-"` } // Implements the [Stringer] interface for [DistroReference]. diff --git a/internal/projectconfig/fingerprint_test.go b/internal/projectconfig/fingerprint_test.go new file mode 100644 index 0000000..c6a8b71 --- /dev/null +++ b/internal/projectconfig/fingerprint_test.go @@ -0,0 +1,113 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package projectconfig_test + +import ( + "reflect" + "testing" + + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/stretchr/testify/assert" +) + +// TestAllFingerprintedFieldsHaveDecision verifies that every field in every +// fingerprinted struct has been consciously categorized as either included +// (no fingerprint tag) or excluded (`fingerprint:"-"`). +// +// This test serves two purposes: +// 1. It ensures that newly added fields default to **included** in the fingerprint +// (the safe default — you get a false positive, never a false negative). +// 2. It catches accidental removal of `fingerprint:"-"` tags from excluded fields, +// since all exclusions are tracked in expectedExclusions. +func TestAllFingerprintedFieldsHaveDecision(t *testing.T) { + // All struct types whose fields participate in component fingerprinting. + // When adding a new struct that feeds into the fingerprint, add it here. + fingerprintedStructs := []reflect.Type{ + reflect.TypeFor[projectconfig.ComponentConfig](), + reflect.TypeFor[projectconfig.ComponentBuildConfig](), + reflect.TypeFor[projectconfig.CheckConfig](), + reflect.TypeFor[projectconfig.ComponentBuildFailureConfig](), + reflect.TypeFor[projectconfig.ComponentBuildHints](), + reflect.TypeFor[projectconfig.ComponentOverlay](), + reflect.TypeFor[projectconfig.SpecSource](), + reflect.TypeFor[projectconfig.DistroReference](), + reflect.TypeFor[projectconfig.SourceFileReference](), + reflect.TypeFor[projectconfig.Origin](), + } + + // Maps "StructName.FieldName" for every field that should carry a + // `fingerprint:"-"` tag. Catches accidental tag removal. + // + // Each entry documents WHY the field is excluded from the fingerprint: + expectedExclusions := map[string]bool{ + // ComponentConfig.Name — metadata, already the map key in project config. + "ComponentConfig.Name": true, + // ComponentConfig.SourceConfigFile — internal bookkeeping reference, not a build input. + "ComponentConfig.SourceConfigFile": true, + + // ComponentBuildConfig.Failure — CI policy (expected failure tracking), not a build input. + "ComponentBuildConfig.Failure": true, + // ComponentBuildConfig.Hints — scheduling hints (e.g. expensive), not a build input. + "ComponentBuildConfig.Hints": true, + + // CheckConfig.SkipReason — human documentation for why check is skipped, not a build input. + "CheckConfig.SkipReason": true, + + // ComponentBuildFailureConfig — entire struct excluded via parent, but individual + // fields are also tagged so reflection on the struct alone is consistent. + // ComponentBuildFailureConfig.Expected — CI decision about expected failures. + "ComponentBuildFailureConfig.Expected": true, + // ComponentBuildFailureConfig.ExpectedReason — documentation for expected failure. + "ComponentBuildFailureConfig.ExpectedReason": true, + + // ComponentBuildHints — entire struct excluded via parent, fields also tagged. + // ComponentBuildHints.Expensive — scheduling hint, does not affect build output. + "ComponentBuildHints.Expensive": true, + + // ComponentOverlay.Description — human-readable documentation for the overlay. + "ComponentOverlay.Description": true, + + // SourceFileReference.Component — back-reference to parent, not a build input. + "SourceFileReference.Component": true, + + // DistroReference.Snapshot — snapshot timestamp is not a build input; the resolved + // upstream commit hash (captured separately via SourceIdentity) is what matters. + // Excluding this prevents a snapshot bump from marking all upstream components as changed. + "DistroReference.Snapshot": true, + + // SourceFileReference.Origin — download location metadata (URI, type), not a build input. + // The file content is already captured by Filename + Hash; changing a CDN URL should not + // trigger a rebuild. + "SourceFileReference.Origin": true, + } + + // Collect all actual exclusions found via reflection. + actualExclusions := make(map[string]bool) + + for _, st := range fingerprintedStructs { + for i := range st.NumField() { + field := st.Field(i) + key := st.Name() + "." + field.Name + + tag := field.Tag.Get("fingerprint") + if tag == "-" { + actualExclusions[key] = true + } + } + } + + // Verify every expected exclusion is actually present. + for key := range expectedExclusions { + assert.Truef(t, actualExclusions[key], + "expected field %q to have `fingerprint:\"-\"` tag, but it does not — "+ + "was the tag accidentally removed?", key) + } + + // Verify no unexpected exclusions exist. + for key := range actualExclusions { + assert.Truef(t, expectedExclusions[key], + "field %q has `fingerprint:\"-\"` tag but is not in expectedExclusions — "+ + "add it to expectedExclusions if the exclusion is intentional", key) + } +} diff --git a/internal/projectconfig/overlay.go b/internal/projectconfig/overlay.go index 7613ecf..1bb1937 100644 --- a/internal/projectconfig/overlay.go +++ b/internal/projectconfig/overlay.go @@ -17,7 +17,7 @@ type ComponentOverlay struct { // The type of overlay to apply. Type ComponentOverlayType `toml:"type" json:"type" validate:"required" jsonschema:"enum=spec-add-tag,enum=spec-insert-tag,enum=spec-set-tag,enum=spec-update-tag,enum=spec-remove-tag,enum=spec-prepend-lines,enum=spec-append-lines,enum=spec-search-replace,enum=patch-add,enum=patch-remove,enum=file-prepend-lines,enum=file-search-replace,enum=file-add,enum=file-remove,enum=file-rename,title=Overlay type,description=The type of overlay to apply"` // Human readable description of overlay; primarily present to document the need for the change. - Description string `toml:"description,omitempty" json:"description,omitempty" jsonschema:"title=Description,description=Human readable description of overlay"` + Description string `toml:"description,omitempty" json:"description,omitempty" jsonschema:"title=Description,description=Human readable description of overlay" fingerprint:"-"` // For overlays that apply to non-spec files, indicates the filename. For overlays that can // apply to multiple files, supports glob patterns (including globstar). From 115fa912758903ada8c8413446aef84198ab3884 Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Mon, 30 Mar 2026 15:59:33 -0700 Subject: [PATCH 05/12] feat(component): Add deterministic component fingerprints --- go.mod | 1 + go.sum | 2 + .../app/azldev/core/sources/synthistory.go | 4 +- internal/fingerprint/doc.go | 13 + internal/fingerprint/fingerprint.go | 159 +++++ internal/fingerprint/fingerprint_test.go | 597 ++++++++++++++++++ 6 files changed, 774 insertions(+), 2 deletions(-) create mode 100644 internal/fingerprint/doc.go create mode 100644 internal/fingerprint/fingerprint.go create mode 100644 internal/fingerprint/fingerprint_test.go diff --git a/go.mod b/go.mod index b989b4d..405463b 100644 --- a/go.mod +++ b/go.mod @@ -36,6 +36,7 @@ require ( github.com/magefile/mage v1.16.1 github.com/mark3labs/mcp-go v0.45.0 github.com/mattn/go-isatty v0.0.20 + github.com/mitchellh/hashstructure/v2 v2.0.2 github.com/muesli/termenv v0.16.0 github.com/nxadm/tail v1.4.11 github.com/opencontainers/selinux v1.13.1 diff --git a/go.sum b/go.sum index 0529d58..f3e3227 100644 --- a/go.sum +++ b/go.sum @@ -229,6 +229,8 @@ github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mattn/go-runewidth v0.0.21 h1:jJKAZiQH+2mIinzCJIaIG9Be1+0NR+5sz/lYEEjdM8w= github.com/mattn/go-runewidth v0.0.21/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= +github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4= +github.com/mitchellh/hashstructure/v2 v2.0.2/go.mod h1:MG3aRVU/N29oo/V/IhBX8GR/zz4kQkprJgF2EVszyDE= github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= github.com/moby/go-archive v0.2.0 h1:zg5QDUM2mi0JIM9fdQZWC7U8+2ZfixfTYoHL7rWUcP8= diff --git a/internal/app/azldev/core/sources/synthistory.go b/internal/app/azldev/core/sources/synthistory.go index 0c41f2e..96f606d 100644 --- a/internal/app/azldev/core/sources/synthistory.go +++ b/internal/app/azldev/core/sources/synthistory.go @@ -140,7 +140,7 @@ func buildSyntheticCommits( return nil, nil } - projectRepo, err := openProjectRepo(configFilePath) + projectRepo, err := OpenProjectRepo(configFilePath) if err != nil { return nil, err } @@ -208,7 +208,7 @@ func resolveConfigFilePath(config *projectconfig.ComponentConfig, componentName // openProjectRepo finds and opens the git repository containing configFilePath by // walking up the directory tree. -func openProjectRepo(configFilePath string) (*gogit.Repository, error) { +func OpenProjectRepo(configFilePath string) (*gogit.Repository, error) { repo, err := gogit.PlainOpenWithOptions(filepath.Dir(configFilePath), &gogit.PlainOpenOptions{ DetectDotGit: true, }) diff --git a/internal/fingerprint/doc.go b/internal/fingerprint/doc.go new file mode 100644 index 0000000..45a7fb4 --- /dev/null +++ b/internal/fingerprint/doc.go @@ -0,0 +1,13 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +// Package fingerprint computes deterministic identity fingerprints for components. +// A fingerprint captures all resolved build inputs so that changes to any input +// (config fields, spec content, overlay files, distro context, upstream refs, or +// Affects commit count) produce a different fingerprint. +// +// The primary entry point is [ComputeIdentity], which takes a resolved +// [projectconfig.ComponentConfig] and additional context, and returns a +// [ComponentIdentity] containing the overall fingerprint hash plus a breakdown +// of individual input hashes for debugging. +package fingerprint diff --git a/internal/fingerprint/fingerprint.go b/internal/fingerprint/fingerprint.go new file mode 100644 index 0000000..18c853b --- /dev/null +++ b/internal/fingerprint/fingerprint.go @@ -0,0 +1,159 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package fingerprint + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "sort" + "strconv" + + "github.com/microsoft/azure-linux-dev-tools/internal/global/opctx" + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" + "github.com/mitchellh/hashstructure/v2" +) + +// hashstructureTagName is the struct tag name used by hashstructure to determine +// field inclusion. Fields tagged with `fingerprint:"-"` are excluded. +const hashstructureTagName = "fingerprint" + +// ComponentIdentity holds the computed fingerprint for a single component plus +// a breakdown of individual input hashes for debugging. +type ComponentIdentity struct { + // Fingerprint is the overall SHA256 hash combining all inputs. + Fingerprint string `json:"fingerprint"` + // Inputs provides the individual input hashes that were combined. + Inputs ComponentInputs `json:"inputs"` +} + +// ComponentInputs contains the individual input hashes that comprise a component's +// fingerprint. +type ComponentInputs struct { + // ConfigHash is the hash of the resolved component config fields (uint64 from hashstructure). + ConfigHash uint64 `json:"configHash"` + // SourceIdentity is the opaque identity string for the component's source. + // For local specs this is a content hash; for upstream specs this is a commit hash. + SourceIdentity string `json:"sourceIdentity,omitempty"` + // OverlayFileHashes maps overlay source file paths to their SHA256 hashes. + OverlayFileHashes map[string]string `json:"overlayFileHashes,omitempty"` + // AffectsCommitCount is the number of "Affects: " commits in the project repo. + AffectsCommitCount int `json:"affectsCommitCount"` + // Distro is the effective distro name. + Distro string `json:"distro"` + // DistroVersion is the effective distro version. + DistroVersion string `json:"distroVersion"` +} + +// IdentityOptions holds additional inputs for computing a component's identity +// that are not part of the component config itself. +type IdentityOptions struct { + // AffectsCommitCount is the number of "Affects: " commits. + AffectsCommitCount int + // SourceIdentity is the opaque identity string from a [sourceproviders.SourceIdentityProvider]. + SourceIdentity string +} + +// ComputeIdentity computes the fingerprint for a component from its resolved config +// and additional context. The fs parameter is used to read spec file and overlay +// source file contents for hashing. +func ComputeIdentity( + fs opctx.FS, + component projectconfig.ComponentConfig, + distroRef projectconfig.DistroReference, + opts IdentityOptions, +) (*ComponentIdentity, error) { + inputs := ComponentInputs{ + AffectsCommitCount: opts.AffectsCommitCount, + SourceIdentity: opts.SourceIdentity, + Distro: distroRef.Name, + DistroVersion: distroRef.Version, + } + + // 1. Hash the resolved config struct (excluding fingerprint:"-" fields). + configHash, err := hashstructure.Hash(component, hashstructure.FormatV2, &hashstructure.HashOptions{ + TagName: hashstructureTagName, + }) + if err != nil { + return nil, fmt.Errorf("hashing component config:\n%w", err) + } + + inputs.ConfigHash = configHash + + // 2. Hash overlay source file contents. + overlayHashes, err := hashOverlayFiles(fs, component.Overlays) + if err != nil { + return nil, fmt.Errorf("hashing overlay files:\n%w", err) + } + + inputs.OverlayFileHashes = overlayHashes + + // 3. Combine all inputs into the overall fingerprint. + return &ComponentIdentity{ + Fingerprint: combineInputs(inputs), + Inputs: inputs, + }, nil +} + +// hashOverlayFiles computes SHA256 hashes for all overlay source files that reference +// local files. Returns a map of source path to hex hash, or an empty map if no overlay +// source files exist. +func hashOverlayFiles( + fs opctx.FS, + overlays []projectconfig.ComponentOverlay, +) (map[string]string, error) { + hashes := make(map[string]string) + + for _, overlay := range overlays { + if overlay.Source == "" { + continue + } + + fileHash, err := fileutils.ComputeFileHash(fs, fileutils.HashTypeSHA256, overlay.Source) + if err != nil { + return nil, fmt.Errorf("hashing overlay source %#q:\n%w", overlay.Source, err) + } + + hashes[overlay.Source] = fileHash + } + + return hashes, nil +} + +// combineInputs deterministically combines all input hashes into a single SHA256 fingerprint. +func combineInputs(inputs ComponentInputs) string { + hasher := sha256.New() + + // Write each input in a fixed order with field labels for domain separation. + writeField(hasher, "config_hash", strconv.FormatUint(inputs.ConfigHash, 10)) + writeField(hasher, "source_identity", inputs.SourceIdentity) + writeField(hasher, "affects_commit_count", strconv.Itoa(inputs.AffectsCommitCount)) + writeField(hasher, "distro", inputs.Distro) + writeField(hasher, "distro_version", inputs.DistroVersion) + + // Overlay file hashes in sorted key order for determinism. + if len(inputs.OverlayFileHashes) > 0 { + keys := make([]string, 0, len(inputs.OverlayFileHashes)) + for key := range inputs.OverlayFileHashes { + keys = append(keys, key) + } + + sort.Strings(keys) + + for _, key := range keys { + writeField(hasher, "overlay:"+key, inputs.OverlayFileHashes[key]) + } + } + + return "sha256:" + hex.EncodeToString(hasher.Sum(nil)) +} + +// writeField writes a labeled value to the hasher for domain separation. +func writeField(writer io.Writer, label string, value string) { + // Use label=value\n format. Length-prefixing the label prevents + // collisions between field names that are prefixes of each other. + fmt.Fprintf(writer, "%d:%s=%s\n", len(label), label, value) +} diff --git a/internal/fingerprint/fingerprint_test.go b/internal/fingerprint/fingerprint_test.go new file mode 100644 index 0000000..748c414 --- /dev/null +++ b/internal/fingerprint/fingerprint_test.go @@ -0,0 +1,597 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package fingerprint_test + +import ( + "testing" + + "github.com/microsoft/azure-linux-dev-tools/internal/fingerprint" + "github.com/microsoft/azure-linux-dev-tools/internal/global/testctx" + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" + "github.com/spf13/afero" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func newTestFS(t *testing.T, files map[string]string) *testctx.TestCtx { + t.Helper() + + ctx := testctx.NewCtx() + + for path, content := range files { + err := afero.WriteFile(ctx.FS(), path, []byte(content), 0o644) + require.NoError(t, err) + } + + return ctx +} + +func baseDistroRef() projectconfig.DistroReference { + return projectconfig.DistroReference{ + Name: "azl", + Version: "3.0", + } +} + +func baseComponent() projectconfig.ComponentConfig { + return projectconfig.ComponentConfig{ + Name: "testpkg", + Spec: projectconfig.SpecSource{ + SourceType: projectconfig.SpecSourceTypeLocal, + Path: "/specs/test.spec", + }, + } +} + +func computeFingerprint( + t *testing.T, + ctx *testctx.TestCtx, + comp projectconfig.ComponentConfig, + distro projectconfig.DistroReference, + affects int, +) string { + t.Helper() + + identity, err := fingerprint.ComputeIdentity(ctx.FS(), comp, distro, fingerprint.IdentityOptions{ + AffectsCommitCount: affects, + }) + require.NoError(t, err) + + return identity.Fingerprint +} + +func TestComputeIdentity_Deterministic(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + }) + + comp := baseComponent() + distro := baseDistroRef() + + fp1 := computeFingerprint(t, ctx, comp, distro, 0) + fp2 := computeFingerprint(t, ctx, comp, distro, 0) + + assert.Equal(t, fp1, fp2, "identical inputs must produce identical fingerprints") + assert.Contains(t, fp1, "sha256:", "fingerprint should have sha256: prefix") +} + +func TestComputeIdentity_SourceIdentityChange(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + }) + + comp := baseComponent() + distro := baseDistroRef() + + identity1, err := fingerprint.ComputeIdentity(ctx.FS(), comp, distro, fingerprint.IdentityOptions{ + SourceIdentity: "abc123", + }) + require.NoError(t, err) + + identity2, err := fingerprint.ComputeIdentity(ctx.FS(), comp, distro, fingerprint.IdentityOptions{ + SourceIdentity: "def456", + }) + require.NoError(t, err) + + assert.NotEqual(t, identity1.Fingerprint, identity2.Fingerprint, + "different source identity must produce different fingerprints") +} + +func TestComputeIdentity_BuildWithChange(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + }) + + comp1 := baseComponent() + comp2 := baseComponent() + comp2.Build.With = []string{"feature_x"} + + distro := baseDistroRef() + + fp1 := computeFingerprint(t, ctx, comp1, distro, 0) + fp2 := computeFingerprint(t, ctx, comp2, distro, 0) + + assert.NotEqual(t, fp1, fp2, "adding build.with must change fingerprint") +} + +func TestComputeIdentity_BuildWithoutChange(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + }) + + comp1 := baseComponent() + comp2 := baseComponent() + comp2.Build.Without = []string{"docs"} + + distro := baseDistroRef() + + fp1 := computeFingerprint(t, ctx, comp1, distro, 0) + fp2 := computeFingerprint(t, ctx, comp2, distro, 0) + + assert.NotEqual(t, fp1, fp2, "adding build.without must change fingerprint") +} + +func TestComputeIdentity_BuildDefinesChange(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + }) + + comp1 := baseComponent() + comp2 := baseComponent() + comp2.Build.Defines = map[string]string{"debug": "1"} + + distro := baseDistroRef() + + fp1 := computeFingerprint(t, ctx, comp1, distro, 0) + fp2 := computeFingerprint(t, ctx, comp2, distro, 0) + + assert.NotEqual(t, fp1, fp2, "adding build.defines must change fingerprint") +} + +func TestComputeIdentity_CheckSkipChange(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + }) + + comp1 := baseComponent() + comp2 := baseComponent() + comp2.Build.Check.Skip = true + + distro := baseDistroRef() + + fp1 := computeFingerprint(t, ctx, comp1, distro, 0) + fp2 := computeFingerprint(t, ctx, comp2, distro, 0) + + assert.NotEqual(t, fp1, fp2, "changing check.skip must change fingerprint") +} + +func TestComputeIdentity_ExcludedFieldsDoNotChange(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + }) + distro := baseDistroRef() + + // Base component. + comp := baseComponent() + fpBase := computeFingerprint(t, ctx, comp, distro, 0) + + // Changing Name (fingerprint:"-") should NOT change fingerprint. + compName := baseComponent() + compName.Name = "different-name" + fpName := computeFingerprint(t, ctx, compName, distro, 0) + assert.Equal(t, fpBase, fpName, "changing Name must NOT change fingerprint") + + // Changing Build.Failure.Expected (fingerprint:"-") should NOT change fingerprint. + compFailure := baseComponent() + compFailure.Build.Failure.Expected = true + compFailure.Build.Failure.ExpectedReason = "known issue" + fpFailure := computeFingerprint(t, ctx, compFailure, distro, 0) + assert.Equal(t, fpBase, fpFailure, "changing failure.expected must NOT change fingerprint") + + // Changing Build.Hints.Expensive (fingerprint:"-") should NOT change fingerprint. + compHints := baseComponent() + compHints.Build.Hints.Expensive = true + fpHints := computeFingerprint(t, ctx, compHints, distro, 0) + assert.Equal(t, fpBase, fpHints, "changing hints.expensive must NOT change fingerprint") + + // Changing Build.Check.SkipReason (fingerprint:"-") should NOT change fingerprint. + compReason := baseComponent() + compReason.Build.Check.SkipReason = "tests require network" + fpReason := computeFingerprint(t, ctx, compReason, distro, 0) + assert.Equal(t, fpBase, fpReason, "changing check.skip_reason must NOT change fingerprint") +} + +func TestComputeIdentity_OverlayDescriptionExcluded(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + }) + distro := baseDistroRef() + + comp1 := baseComponent() + comp1.Overlays = []projectconfig.ComponentOverlay{ + {Type: "spec-set-tag", Tag: "Release", Value: "2%{?dist}"}, + } + + comp2 := baseComponent() + comp2.Overlays = []projectconfig.ComponentOverlay{ + {Type: "spec-set-tag", Tag: "Release", Value: "2%{?dist}", Description: "bumped release"}, + } + + fp1 := computeFingerprint(t, ctx, comp1, distro, 0) + fp2 := computeFingerprint(t, ctx, comp2, distro, 0) + + assert.Equal(t, fp1, fp2, "overlay description must NOT change fingerprint") +} + +func TestComputeIdentity_OverlaySourceFileChange(t *testing.T) { + ctx1 := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + "/patches/fix.patch": "--- a/file\n+++ b/file\n@@ original @@", + }) + ctx2 := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + "/patches/fix.patch": "--- a/file\n+++ b/file\n@@ modified @@", + }) + distro := baseDistroRef() + + comp := baseComponent() + comp.Overlays = []projectconfig.ComponentOverlay{ + {Type: "patch-add", Source: "/patches/fix.patch"}, + } + + fp1 := computeFingerprint(t, ctx1, comp, distro, 0) + fp2 := computeFingerprint(t, ctx2, comp, distro, 0) + + assert.NotEqual(t, fp1, fp2, "different overlay source content must produce different fingerprints") +} + +func TestComputeIdentity_DistroChange(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + }) + + comp := baseComponent() + + fp1 := computeFingerprint(t, ctx, comp, projectconfig.DistroReference{Name: "azl", Version: "3.0"}, 0) + fp2 := computeFingerprint(t, ctx, comp, projectconfig.DistroReference{Name: "azl", Version: "4.0"}, 0) + + assert.NotEqual(t, fp1, fp2, "different distro version must produce different fingerprints") +} + +func TestComputeIdentity_DistroNameChange(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + }) + + comp := baseComponent() + + fp1 := computeFingerprint(t, ctx, comp, projectconfig.DistroReference{Name: "azl", Version: "3.0"}, 0) + fp2 := computeFingerprint(t, ctx, comp, projectconfig.DistroReference{Name: "fedora", Version: "3.0"}, 0) + + assert.NotEqual(t, fp1, fp2, "different distro name must produce different fingerprints") +} + +func TestComputeIdentity_AffectsCountChange(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + }) + + comp := baseComponent() + distro := baseDistroRef() + + fp1 := computeFingerprint(t, ctx, comp, distro, 0) + fp2 := computeFingerprint(t, ctx, comp, distro, 1) + + assert.NotEqual(t, fp1, fp2, "different affects commit count must produce different fingerprints") +} + +func TestComputeIdentity_UpstreamCommitChange(t *testing.T) { + ctx := newTestFS(t, nil) + + comp1 := projectconfig.ComponentConfig{ + Spec: projectconfig.SpecSource{ + SourceType: projectconfig.SpecSourceTypeUpstream, + UpstreamName: "curl", + UpstreamCommit: "abc1234", + UpstreamDistro: projectconfig.DistroReference{Name: "fedora", Version: "41"}, + }, + } + comp2 := projectconfig.ComponentConfig{ + Spec: projectconfig.SpecSource{ + SourceType: projectconfig.SpecSourceTypeUpstream, + UpstreamName: "curl", + UpstreamCommit: "def5678", + UpstreamDistro: projectconfig.DistroReference{Name: "fedora", Version: "41"}, + }, + } + distro := baseDistroRef() + + fp1 := computeFingerprint(t, ctx, comp1, distro, 0) + fp2 := computeFingerprint(t, ctx, comp2, distro, 0) + + assert.NotEqual(t, fp1, fp2, "different upstream commit must produce different fingerprints") +} + +func TestComputeIdentity_SourceFilesChange(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + }) + + comp1 := baseComponent() + comp1.SourceFiles = []projectconfig.SourceFileReference{ + {Filename: "source.tar.gz", Hash: "aaa111", HashType: fileutils.HashTypeSHA256}, + } + + comp2 := baseComponent() + comp2.SourceFiles = []projectconfig.SourceFileReference{ + {Filename: "source.tar.gz", Hash: "bbb222", HashType: fileutils.HashTypeSHA256}, + } + distro := baseDistroRef() + + fp1 := computeFingerprint(t, ctx, comp1, distro, 0) + fp2 := computeFingerprint(t, ctx, comp2, distro, 0) + + assert.NotEqual(t, fp1, fp2, "different source file hash must produce different fingerprints") +} + +func TestComputeIdentity_SourceFileOriginExcluded(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + }) + + comp1 := baseComponent() + comp1.SourceFiles = []projectconfig.SourceFileReference{ + { + Filename: "source.tar.gz", + Hash: "aaa111", + HashType: fileutils.HashTypeSHA256, + Origin: projectconfig.Origin{Type: "download", Uri: "https://old-cdn.example.com/source.tar.gz"}, + }, + } + + comp2 := baseComponent() + comp2.SourceFiles = []projectconfig.SourceFileReference{ + { + Filename: "source.tar.gz", + Hash: "aaa111", + HashType: fileutils.HashTypeSHA256, + Origin: projectconfig.Origin{Type: "download", Uri: "https://new-cdn.example.com/source.tar.gz"}, + }, + } + distro := baseDistroRef() + + fp1 := computeFingerprint(t, ctx, comp1, distro, 0) + fp2 := computeFingerprint(t, ctx, comp2, distro, 0) + + assert.Equal(t, fp1, fp2, "changing source file origin URL must NOT change fingerprint") +} + +func TestComputeIdentity_InputsBreakdown(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + "/patches/fix.patch": "patch content here", + }) + + comp := baseComponent() + comp.Overlays = []projectconfig.ComponentOverlay{ + {Type: "patch-add", Source: "/patches/fix.patch"}, + } + distro := baseDistroRef() + + identity, err := fingerprint.ComputeIdentity(ctx.FS(), comp, distro, fingerprint.IdentityOptions{ + AffectsCommitCount: 3, + SourceIdentity: "test-source-identity-hash", + }) + require.NoError(t, err) + + assert.NotEmpty(t, identity.Fingerprint) + assert.NotZero(t, identity.Inputs.ConfigHash) + assert.Equal(t, "test-source-identity-hash", identity.Inputs.SourceIdentity) + assert.Equal(t, 3, identity.Inputs.AffectsCommitCount) + assert.Equal(t, "azl", identity.Inputs.Distro) + assert.Equal(t, "3.0", identity.Inputs.DistroVersion) + assert.Contains(t, identity.Inputs.OverlayFileHashes, "/patches/fix.patch") +} + +func TestComputeIdentity_NoSpecPath(t *testing.T) { + ctx := newTestFS(t, nil) + + comp := projectconfig.ComponentConfig{ + Spec: projectconfig.SpecSource{ + SourceType: projectconfig.SpecSourceTypeLocal, + }, + } + distro := baseDistroRef() + + identity, err := fingerprint.ComputeIdentity(ctx.FS(), comp, distro, fingerprint.IdentityOptions{}) + require.NoError(t, err) + + assert.Empty(t, identity.Inputs.SourceIdentity) +} + +func TestComputeIdentity_OverlayFunctionalFieldChange(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + }) + distro := baseDistroRef() + + comp1 := baseComponent() + comp1.Overlays = []projectconfig.ComponentOverlay{ + {Type: "spec-set-tag", Tag: "Release", Value: "2%{?dist}"}, + } + + comp2 := baseComponent() + comp2.Overlays = []projectconfig.ComponentOverlay{ + {Type: "spec-set-tag", Tag: "Release", Value: "3%{?dist}"}, + } + + fp1 := computeFingerprint(t, ctx, comp1, distro, 0) + fp2 := computeFingerprint(t, ctx, comp2, distro, 0) + + assert.NotEqual(t, fp1, fp2, "changing overlay value must change fingerprint") +} + +func TestComputeIdentity_AddingOverlay(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + }) + distro := baseDistroRef() + + comp1 := baseComponent() + + comp2 := baseComponent() + comp2.Overlays = []projectconfig.ComponentOverlay{ + {Type: "spec-set-tag", Tag: "Release", Value: "2%{?dist}"}, + } + + fp1 := computeFingerprint(t, ctx, comp1, distro, 0) + fp2 := computeFingerprint(t, ctx, comp2, distro, 0) + + assert.NotEqual(t, fp1, fp2, "adding an overlay must change fingerprint") +} + +func TestComputeIdentity_BuildUndefinesChange(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + }) + distro := baseDistroRef() + + comp1 := baseComponent() + comp2 := baseComponent() + comp2.Build.Undefines = []string{"_debuginfo"} + + fp1 := computeFingerprint(t, ctx, comp1, distro, 0) + fp2 := computeFingerprint(t, ctx, comp2, distro, 0) + + assert.NotEqual(t, fp1, fp2, "adding build.undefines must change fingerprint") +} + +// Tests below verify global change propagation: changes to shared config +// (distro defaults, group defaults) must fan out to all inheriting components. + +func TestComputeIdentity_DistroDefaultPropagation(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/curl.spec": "Name: curl\nVersion: 1.0", + "/specs/openssl.spec": "Name: openssl\nVersion: 3.0", + }) + + // Simulate two components that both inherit from a distro default. + // First, compute fingerprints with no distro-level build options. + curl := projectconfig.ComponentConfig{ + Spec: projectconfig.SpecSource{SourceType: projectconfig.SpecSourceTypeLocal, Path: "/specs/curl.spec"}, + } + openssl := projectconfig.ComponentConfig{ + Spec: projectconfig.SpecSource{SourceType: projectconfig.SpecSourceTypeLocal, Path: "/specs/openssl.spec"}, + } + distro := baseDistroRef() + + fpCurl1 := computeFingerprint(t, ctx, curl, distro, 0) + fpOpenssl1 := computeFingerprint(t, ctx, openssl, distro, 0) + + // Now simulate a distro default adding build.with — after config merging, + // both components would have this option in their resolved config. + curl.Build.With = []string{"distro_feature"} + openssl.Build.With = []string{"distro_feature"} + + fpCurl2 := computeFingerprint(t, ctx, curl, distro, 0) + fpOpenssl2 := computeFingerprint(t, ctx, openssl, distro, 0) + + assert.NotEqual(t, fpCurl1, fpCurl2, + "distro default change must propagate to curl's fingerprint") + assert.NotEqual(t, fpOpenssl1, fpOpenssl2, + "distro default change must propagate to openssl's fingerprint") +} + +func TestComputeIdentity_GroupDefaultPropagation(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/a.spec": "Name: a\nVersion: 1.0", + "/specs/b.spec": "Name: b\nVersion: 1.0", + "/specs/c.spec": "Name: c\nVersion: 1.0", + }) + + distro := baseDistroRef() + + // Three components: a and b are in a group, c is not. + compA := projectconfig.ComponentConfig{ + Spec: projectconfig.SpecSource{SourceType: projectconfig.SpecSourceTypeLocal, Path: "/specs/a.spec"}, + } + compB := projectconfig.ComponentConfig{ + Spec: projectconfig.SpecSource{SourceType: projectconfig.SpecSourceTypeLocal, Path: "/specs/b.spec"}, + } + compC := projectconfig.ComponentConfig{ + Spec: projectconfig.SpecSource{SourceType: projectconfig.SpecSourceTypeLocal, Path: "/specs/c.spec"}, + } + + fpA1 := computeFingerprint(t, ctx, compA, distro, 0) + fpB1 := computeFingerprint(t, ctx, compB, distro, 0) + fpC1 := computeFingerprint(t, ctx, compC, distro, 0) + + // Simulate a group default adding check.skip — after merging, only a and b have it. + compA.Build.Check.Skip = true + compB.Build.Check.Skip = true + // compC is not in the group, remains unchanged. + + fpA2 := computeFingerprint(t, ctx, compA, distro, 0) + fpB2 := computeFingerprint(t, ctx, compB, distro, 0) + fpC2 := computeFingerprint(t, ctx, compC, distro, 0) + + assert.NotEqual(t, fpA1, fpA2, "group default must propagate to member A") + assert.NotEqual(t, fpB1, fpB2, "group default must propagate to member B") + assert.Equal(t, fpC1, fpC2, "non-group member C must NOT be affected") +} + +func TestComputeIdentity_MergeUpdatesFromPropagation(t *testing.T) { + ctx := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + }) + distro := baseDistroRef() + + // Start with a base component. + comp := baseComponent() + fpBefore := computeFingerprint(t, ctx, comp, distro, 0) + + // Simulate applying a distro default via MergeUpdatesFrom. + distroDefault := &projectconfig.ComponentConfig{ + Build: projectconfig.ComponentBuildConfig{ + Defines: map[string]string{"vendor": "azl"}, + }, + } + + err := comp.MergeUpdatesFrom(distroDefault) + require.NoError(t, err) + + fpAfter := computeFingerprint(t, ctx, comp, distro, 0) + + assert.NotEqual(t, fpBefore, fpAfter, + "merged distro default must change the fingerprint") +} + +func TestComputeIdentity_SnapshotChangeDoesNotAffectFingerprint(t *testing.T) { + ctx := newTestFS(t, nil) + + comp := projectconfig.ComponentConfig{ + Spec: projectconfig.SpecSource{ + SourceType: projectconfig.SpecSourceTypeUpstream, + UpstreamName: "curl", + UpstreamCommit: "abc1234", + UpstreamDistro: projectconfig.DistroReference{ + Name: "fedora", + Version: "41", + Snapshot: "2025-01-01T00:00:00Z", + }, + }, + } + distro := baseDistroRef() + + fp1 := computeFingerprint(t, ctx, comp, distro, 0) + + // Change only the snapshot timestamp. + comp.Spec.UpstreamDistro.Snapshot = "2026-06-15T00:00:00Z" + fp2 := computeFingerprint(t, ctx, comp, distro, 0) + + assert.Equal(t, fp1, fp2, + "changing upstream distro snapshot must NOT change fingerprint "+ + "(snapshot is excluded; resolved commit hash is what matters)") +} From f3130bde3d6733f5385803a03dc6d4083f799914 Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Mon, 30 Mar 2026 16:13:59 -0700 Subject: [PATCH 06/12] feat(cli): Add component identity command --- docs/user/reference/cli/azldev_component.md | 1 + .../cli/azldev_component_identity.md | 64 ++++ .../app/azldev/cmds/component/component.go | 1 + .../app/azldev/cmds/component/identity.go | 304 ++++++++++++++++++ 4 files changed, 370 insertions(+) create mode 100644 docs/user/reference/cli/azldev_component_identity.md create mode 100644 internal/app/azldev/cmds/component/identity.go diff --git a/docs/user/reference/cli/azldev_component.md b/docs/user/reference/cli/azldev_component.md index c1a8d4e..2bfe59b 100644 --- a/docs/user/reference/cli/azldev_component.md +++ b/docs/user/reference/cli/azldev_component.md @@ -41,6 +41,7 @@ components defined in the project configuration. * [azldev component add](azldev_component_add.md) - Add component(s) to this project * [azldev component build](azldev_component_build.md) - Build packages for components * [azldev component diff-sources](azldev_component_diff-sources.md) - Show the diff that overlays apply to a component's sources +* [azldev component identity](azldev_component_identity.md) - Compute identity fingerprints for components * [azldev component list](azldev_component_list.md) - List components in this project * [azldev component prepare-sources](azldev_component_prepare-sources.md) - Prepare buildable sources for components * [azldev component query](azldev_component_query.md) - Query info for components in this project diff --git a/docs/user/reference/cli/azldev_component_identity.md b/docs/user/reference/cli/azldev_component_identity.md new file mode 100644 index 0000000..e7d55a4 --- /dev/null +++ b/docs/user/reference/cli/azldev_component_identity.md @@ -0,0 +1,64 @@ + + +## azldev component identity + +Compute identity fingerprints for components + +### Synopsis + +Compute a deterministic identity fingerprint for each selected component. + +The fingerprint captures all resolved build inputs (config fields, spec file +content, overlay source files, distro context, and Affects commit count). +A change to any input produces a different fingerprint. + +Use this with 'component diff-identity' to determine which components need +rebuilding between two commits. + +``` +azldev component identity [flags] +``` + +### Examples + +``` + # All components, JSON output for CI + azldev component identity -a -O json > identity.json + + # Single component, table output for dev + azldev component identity -p curl + + # Components in a group + azldev component identity -g core +``` + +### Options + +``` + -a, --all-components Include all components + -p, --component stringArray Component name pattern + -g, --component-group stringArray Component group name + -h, --help help for identity + -s, --spec-path stringArray Spec path +``` + +### Options inherited from parent commands + +``` + -y, --accept-all accept all prompts + --color mode output colorization mode {always, auto, never} (default auto) + --config-file stringArray additional TOML config file(s) to merge (may be repeated) + -n, --dry-run dry run only (do not take action) + --network-retries int maximum number of attempts for network operations (minimum 1) (default 3) + --no-default-config disable default configuration + -O, --output-format fmt output format {csv, json, markdown, table} (default table) + --permissive-config do not fail on unknown fields in TOML config files + -C, --project string path to Azure Linux project + -q, --quiet only enable minimal output + -v, --verbose enable verbose output +``` + +### SEE ALSO + +* [azldev component](azldev_component.md) - Manage components + diff --git a/internal/app/azldev/cmds/component/component.go b/internal/app/azldev/cmds/component/component.go index 6713b64..ad47ba1 100644 --- a/internal/app/azldev/cmds/component/component.go +++ b/internal/app/azldev/cmds/component/component.go @@ -26,6 +26,7 @@ components defined in the project configuration.`, addOnAppInit(app, cmd) buildOnAppInit(app, cmd) diffSourcesOnAppInit(app, cmd) + identityOnAppInit(app, cmd) listOnAppInit(app, cmd) prepareOnAppInit(app, cmd) queryOnAppInit(app, cmd) diff --git a/internal/app/azldev/cmds/component/identity.go b/internal/app/azldev/cmds/component/identity.go new file mode 100644 index 0000000..25d8454 --- /dev/null +++ b/internal/app/azldev/cmds/component/identity.go @@ -0,0 +1,304 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package component + +import ( + "fmt" + "log/slog" + "sync" + + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev" + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components" + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/sources" + "github.com/microsoft/azure-linux-dev-tools/internal/fingerprint" + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/microsoft/azure-linux-dev-tools/internal/providers/sourceproviders" + "github.com/spf13/cobra" +) + +// Options for computing component identity fingerprints. +type IdentityComponentOptions struct { + // Standard filter for selecting components. + ComponentFilter components.ComponentFilter +} + +func identityOnAppInit(_ *azldev.App, parentCmd *cobra.Command) { + parentCmd.AddCommand(NewComponentIdentityCommand()) +} + +// NewComponentIdentityCommand constructs a [cobra.Command] for "component identity" CLI subcommand. +func NewComponentIdentityCommand() *cobra.Command { + options := &IdentityComponentOptions{} + + cmd := &cobra.Command{ + Use: "identity", + Short: "Compute identity fingerprints for components", + Long: `Compute a deterministic identity fingerprint for each selected component. + +The fingerprint captures all resolved build inputs (config fields, spec file +content, overlay source files, distro context, and Affects commit count). +A change to any input produces a different fingerprint. + +Use this with 'component diff-identity' to determine which components need +rebuilding between two commits.`, + Example: ` # All components, JSON output for CI + azldev component identity -a -O json > identity.json + + # Single component, table output for dev + azldev component identity -p curl + + # Components in a group + azldev component identity -g core`, + RunE: azldev.RunFuncWithExtraArgs(func(env *azldev.Env, args []string) (interface{}, error) { + options.ComponentFilter.ComponentNamePatterns = append( + args, options.ComponentFilter.ComponentNamePatterns..., + ) + + return ComputeComponentIdentities(env, options) + }), + ValidArgsFunction: components.GenerateComponentNameCompletions, + } + + components.AddComponentFilterOptionsToCommand(cmd, &options.ComponentFilter) + + return cmd +} + +// ComponentIdentityResult is the per-component output for the identity command. +type ComponentIdentityResult struct { + // Component is the component name. + Component string `json:"component" table:",sortkey"` + // Fingerprint is the overall identity hash string. + Fingerprint string `json:"fingerprint"` + // Inputs provides the individual input hashes (shown in JSON output). + Inputs fingerprint.ComponentInputs `json:"inputs" table:"-"` +} + +// ComputeComponentIdentities computes fingerprints for all selected components. +func ComputeComponentIdentities( + env *azldev.Env, options *IdentityComponentOptions, +) ([]ComponentIdentityResult, error) { + resolver := components.NewResolver(env) + + comps, err := resolver.FindComponents(&options.ComponentFilter) + if err != nil { + return nil, fmt.Errorf("failed to resolve components:\n%w", err) + } + + distroRef := env.Config().Project.DefaultDistro + + // Resolve the distro definition (fills in default version for the fingerprint). + distroRef, err = resolveDistroForIdentity(env, distroRef) + if err != nil { + slog.Debug("Could not resolve distro", "error", err) + } + + return computeIdentitiesParallel( + env, comps.Components(), distroRef, + ) +} + +// maxConcurrentIdentity limits the number of concurrent identity computations. +// This bounds both git ls-remote calls and file I/O. +const maxConcurrentIdentity = 32 + +// computeIdentitiesParallel computes fingerprints for all components concurrently, +// including source identity resolution, affects count, and overlay file hashing. +func computeIdentitiesParallel( + env *azldev.Env, + comps []components.Component, + distroRef projectconfig.DistroReference, +) ([]ComponentIdentityResult, error) { + progressEvent := env.StartEvent("Computing component identities", + "count", len(comps)) + defer progressEvent.End() + + // Create a cancellable child env so we can stop remaining goroutines on first error. + workerEnv, cancel := env.WithCancel() + defer cancel() + + type indexedResult struct { + index int + result ComponentIdentityResult + err error + } + + resultsChan := make(chan indexedResult, len(comps)) + semaphore := make(chan struct{}, maxConcurrentIdentity) + + var waitGroup sync.WaitGroup + + for compIdx, comp := range comps { + waitGroup.Add(1) + + go func() { + defer waitGroup.Done() + + // Context-aware semaphore acquisition. + select { + case semaphore <- struct{}{}: + defer func() { <-semaphore }() + case <-workerEnv.Done(): + resultsChan <- indexedResult{index: compIdx, err: workerEnv.Err()} + + return + } + + result, computeErr := computeSingleIdentity( + workerEnv, comp, distroRef, + ) + + resultsChan <- indexedResult{index: compIdx, result: result, err: computeErr} + }() + } + + // Close channel when all goroutines complete. + go func() { waitGroup.Wait(); close(resultsChan) }() + + // Collect results in order. + results := make([]ComponentIdentityResult, len(comps)) + total := int64(len(comps)) + + var ( + completed int64 + firstErr error + ) + + for indexed := range resultsChan { + if indexed.err != nil { + if firstErr == nil { + firstErr = indexed.err + + cancel() + } + + // Drain remaining results so the closer goroutine can finish. + continue + } + + if firstErr == nil { + results[indexed.index] = indexed.result + completed++ + progressEvent.SetProgress(completed, total) + } + } + + if firstErr != nil { + return nil, firstErr + } + + return results, nil +} + +// computeSingleIdentity computes the identity for a single component, including +// source identity resolution, affects commit counting, and overlay file hashing. +func computeSingleIdentity( + env *azldev.Env, + comp components.Component, + distroRef projectconfig.DistroReference, +) (ComponentIdentityResult, error) { + config := comp.GetConfig() + componentName := comp.GetName() + + identityOpts := fingerprint.IdentityOptions{ + AffectsCommitCount: countAffectsCommits(config, componentName), + } + + // Resolve source identity, selecting the appropriate method based on source type (local vs. upstream etc.). + sourceIdentity, err := resolveSourceIdentityForComponent(env, comp) + if err != nil { + return ComponentIdentityResult{}, fmt.Errorf( + "source identity resolution failed for %#q:\n%w", + componentName, err) + } + + identityOpts.SourceIdentity = sourceIdentity + + identity, err := fingerprint.ComputeIdentity(env.FS(), *config, distroRef, identityOpts) + if err != nil { + return ComponentIdentityResult{}, fmt.Errorf("computing identity for component %#q:\n%w", + componentName, err) + } + + return ComponentIdentityResult{ + Component: componentName, + Fingerprint: identity.Fingerprint, + Inputs: identity.Inputs, + }, nil +} + +// resolveDistroForIdentity resolves the default distro reference, filling in the +// default version when unspecified. +func resolveDistroForIdentity( + env *azldev.Env, distroRef projectconfig.DistroReference, +) (projectconfig.DistroReference, error) { + distroDef, _, err := env.ResolveDistroRef(distroRef) + if err != nil { + return distroRef, + fmt.Errorf("resolving distro %#q:\n%w", distroRef.Name, err) + } + + // Fill in the resolved version if the ref didn't specify one. + if distroRef.Version == "" { + distroRef.Version = distroDef.DefaultVersion + } + + return distroRef, nil +} + +// countAffectsCommits counts the number of "Affects: " commits in the +// project repo. Returns 0 if the count cannot be determined (e.g., no git repo). +func countAffectsCommits(config *projectconfig.ComponentConfig, componentName string, +) int { + configFile := config.SourceConfigFile + if configFile == nil || configFile.SourcePath() == "" { + return 0 + } + + repo, err := sources.OpenProjectRepo(configFile.SourcePath()) + if err != nil { + slog.Debug("Could not open project repo for Affects commits; defaulting to 0", + "component", componentName, "error", err) + + return 0 + } + + commits, err := sources.FindAffectsCommits(repo, componentName) + if err != nil { + slog.Debug("Could not count Affects commits; defaulting to 0", + "component", componentName, "error", err) + + return 0 + } + + return len(commits) +} + +// resolveSourceIdentityForComponent returns a deterministic identity string for the +// component's source by delegating to [sourceproviders.SourceManager.ResolveSourceIdentity]. +func resolveSourceIdentityForComponent( + env *azldev.Env, comp components.Component, +) (string, error) { + distro, err := sourceproviders.ResolveDistro(env, comp) + if err != nil { + return "", fmt.Errorf("resolving distro for component %#q:\n%w", + comp.GetName(), err) + } + + // A new source manager is created per component because each may reference a different + // upstream distro. + srcManager, err := sourceproviders.NewSourceManager(env, distro) + if err != nil { + return "", fmt.Errorf("creating source manager for component %#q:\n%w", + comp.GetName(), err) + } + + identity, err := srcManager.ResolveSourceIdentity(env.Context(), comp) + if err != nil { + return "", fmt.Errorf("resolving source identity for %#q:\n%w", + comp.GetName(), err) + } + + return identity, nil +} From 8326413ddc6792e76043f8321662f32125498ab6 Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Mon, 30 Mar 2026 16:15:03 -0700 Subject: [PATCH 07/12] feat(cli): Add component identity-diff command, scenario test --- .../developer/reference/component-identity.md | 56 +++++ docs/user/reference/cli/azldev_component.md | 1 + .../cli/azldev_component_diff-identity.md | 54 ++++ .../app/azldev/cmds/component/component.go | 1 + .../app/azldev/cmds/component/diffidentity.go | 197 +++++++++++++++ .../cmds/component/diffidentity_test.go | 235 ++++++++++++++++++ ...tySnapshots_diff-identity_help_1.snap.json | 3 + ...Snapshots_diff-identity_help_stderr_1.snap | 1 + ...Snapshots_diff-identity_help_stdout_1.snap | 32 +++ ...dentitySnapshots_identity_help_1.snap.json | 3 + ...ntitySnapshots_identity_help_stderr_1.snap | 1 + ...ntitySnapshots_identity_help_stdout_1.snap | 42 ++++ scenario/component_identity_test.go | 168 +++++++++++++ 13 files changed, 794 insertions(+) create mode 100644 docs/developer/reference/component-identity.md create mode 100644 docs/user/reference/cli/azldev_component_diff-identity.md create mode 100644 internal/app/azldev/cmds/component/diffidentity.go create mode 100644 internal/app/azldev/cmds/component/diffidentity_test.go create mode 100755 scenario/__snapshots__/TestComponentIdentitySnapshots_diff-identity_help_1.snap.json create mode 100755 scenario/__snapshots__/TestComponentIdentitySnapshots_diff-identity_help_stderr_1.snap create mode 100755 scenario/__snapshots__/TestComponentIdentitySnapshots_diff-identity_help_stdout_1.snap create mode 100755 scenario/__snapshots__/TestComponentIdentitySnapshots_identity_help_1.snap.json create mode 100755 scenario/__snapshots__/TestComponentIdentitySnapshots_identity_help_stderr_1.snap create mode 100755 scenario/__snapshots__/TestComponentIdentitySnapshots_identity_help_stdout_1.snap create mode 100644 scenario/component_identity_test.go diff --git a/docs/developer/reference/component-identity.md b/docs/developer/reference/component-identity.md new file mode 100644 index 0000000..07ee91f --- /dev/null +++ b/docs/developer/reference/component-identity.md @@ -0,0 +1,56 @@ +# Component Identity & Change Detection + +The `component identity` and `component diff-identity` subcommands compute deterministic fingerprints of component build inputs. For example, CI can compute fingerprints for the base and head commits of a PR, then diff them to determine exactly which components have changed and need to be rebuilt/tested. + +```bash +# Typical CI workflow +git checkout $BASE_REF && azldev component identity -a -O json > base.json +git checkout $HEAD_REF && azldev component identity -a -O json > head.json +azldev component diff-identity base.json head.json -O json -c +# → {"changed": ["curl"], "added": ["wget"], "removed": [], "unchanged": []} +``` + +## Fingerprint Inputs + +A component's fingerprint is a SHA256 combining: + +1. **Config hash** — `hashstructure.Hash()` of the resolved `ComponentConfig` (after all merging). Fields tagged `fingerprint:"-"` are excluded. +2. **Source identity** — content hash for local specs (all files in the spec directory), commit hash for upstream. +3. **Overlay file hashes** — SHA256 of each file referenced by overlay `Source` fields. +4. **Distro name + version** +5. **Affects commit count** — number of `Affects: ` commits in the project repo. + +Global change propagation works automatically: the fingerprint operates on the fully-merged config, so a change to a distro or group default changes the resolved config of every inheriting component. + +## `fingerprint:"-"` Tag System + +The `hashstructure` library uses `TagName: "fingerprint"`. Untagged fields are **included by default** (safe default: false positive > false negative). + +A guard test (`TestAllFingerprintedFieldsHaveDecision`) reflects over all fingerprinted structs and maintains a bi-directional allowlist of exclusions. It fails if a `fingerprint:"-"` tag is added without registering it, or if a registered exclusion's tag is removed. + +### Adding a New Config Field + +1. Add the field to the struct in `internal/projectconfig/`. +2. **If NOT a build input**: add `fingerprint:"-"` to the struct tag and register it in `expectedExclusions` in `internal/projectconfig/fingerprint_test.go`. +3. **If a build input**: do nothing — included by default. +4. Run `mage unit`. + +### Adding a New Source Type + +1. Implement `SourceIdentityProvider` on your provider (see `ResolveLocalSourceIdentity` in `localidentity.go` for a simple example). +2. Add a case to `sourceManager.ResolveSourceIdentity()` in `sourcemanager.go`. +3. Add tests in `identityprovider_test.go`. + +## CLI + +### `azldev component identity` + +Compute fingerprints. Uses standard component filter flags (`-a`, `-p`, `-g`, `-s`). Exposed as an MCP tool. + +### `azldev component diff-identity` + +Compare two identity JSON files. The `--changed-only` / `-c` flag filters to only changed and added components (the build queue). Applies to both table and JSON output. + +## Known Limitations + +- It is difficult to determine WHY a diff occurred (e.g., which specific field changed) since the fingerprint is a single opaque hash. The JSON output includes an `inputs` breakdown (`configHash`, `sourceIdentity`, `overlayFileHashes`, etc.) that can help narrow it down by comparing the two identity files manually. diff --git a/docs/user/reference/cli/azldev_component.md b/docs/user/reference/cli/azldev_component.md index 2bfe59b..ee6457f 100644 --- a/docs/user/reference/cli/azldev_component.md +++ b/docs/user/reference/cli/azldev_component.md @@ -40,6 +40,7 @@ components defined in the project configuration. * [azldev](azldev.md) - 🐧 Azure Linux Dev Tool * [azldev component add](azldev_component_add.md) - Add component(s) to this project * [azldev component build](azldev_component_build.md) - Build packages for components +* [azldev component diff-identity](azldev_component_diff-identity.md) - Compare two identity files and report changed components * [azldev component diff-sources](azldev_component_diff-sources.md) - Show the diff that overlays apply to a component's sources * [azldev component identity](azldev_component_identity.md) - Compute identity fingerprints for components * [azldev component list](azldev_component_list.md) - List components in this project diff --git a/docs/user/reference/cli/azldev_component_diff-identity.md b/docs/user/reference/cli/azldev_component_diff-identity.md new file mode 100644 index 0000000..59afbfb --- /dev/null +++ b/docs/user/reference/cli/azldev_component_diff-identity.md @@ -0,0 +1,54 @@ + + +## azldev component diff-identity + +Compare two identity files and report changed components + +### Synopsis + +Compare two component identity JSON files (produced by 'component identity -a -O json') +and report which components have changed, been added, or been removed. + +CI uses the 'changed' and 'added' lists to determine the build queue. + +``` +azldev component diff-identity [flags] +``` + +### Examples + +``` + # Compare base and head identity files + azldev component diff-identity base-identity.json head-identity.json + + # JSON output for CI + azldev component diff-identity base.json head.json -O json +``` + +### Options + +``` + -c, --changed-only Only show changed and added components (the build queue) + -h, --help help for diff-identity +``` + +### Options inherited from parent commands + +``` + -y, --accept-all accept all prompts + --color mode output colorization mode {always, auto, never} (default auto) + --config-file stringArray additional TOML config file(s) to merge (may be repeated) + -n, --dry-run dry run only (do not take action) + --network-retries int maximum number of attempts for network operations (minimum 1) (default 3) + --no-default-config disable default configuration + -O, --output-format fmt output format {csv, json, markdown, table} (default table) + --permissive-config do not fail on unknown fields in TOML config files + -C, --project string path to Azure Linux project + -q, --quiet only enable minimal output + -v, --verbose enable verbose output +``` + +### SEE ALSO + +* [azldev component](azldev_component.md) - Manage components + diff --git a/internal/app/azldev/cmds/component/component.go b/internal/app/azldev/cmds/component/component.go index ad47ba1..3166362 100644 --- a/internal/app/azldev/cmds/component/component.go +++ b/internal/app/azldev/cmds/component/component.go @@ -25,6 +25,7 @@ components defined in the project configuration.`, app.AddTopLevelCommand(cmd) addOnAppInit(app, cmd) buildOnAppInit(app, cmd) + diffIdentityOnAppInit(app, cmd) diffSourcesOnAppInit(app, cmd) identityOnAppInit(app, cmd) listOnAppInit(app, cmd) diff --git a/internal/app/azldev/cmds/component/diffidentity.go b/internal/app/azldev/cmds/component/diffidentity.go new file mode 100644 index 0000000..cfbb381 --- /dev/null +++ b/internal/app/azldev/cmds/component/diffidentity.go @@ -0,0 +1,197 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package component + +import ( + "encoding/json" + "fmt" + "sort" + + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" + "github.com/spf13/cobra" +) + +func diffIdentityOnAppInit(_ *azldev.App, parentCmd *cobra.Command) { + parentCmd.AddCommand(NewDiffIdentityCommand()) +} + +// diffIdentityArgCount is the number of positional arguments required by the diff-identity command. +const diffIdentityArgCount = 2 + +// NewDiffIdentityCommand constructs a [cobra.Command] for "component diff-identity". +func NewDiffIdentityCommand() *cobra.Command { + var options struct { + ChangedOnly bool + } + + cmd := &cobra.Command{ + Use: "diff-identity ", + Short: "Compare two identity files and report changed components", + Long: `Compare two component identity JSON files (produced by 'component identity -a -O json') +and report which components have changed, been added, or been removed. + +CI uses the 'changed' and 'added' lists to determine the build queue.`, + Example: ` # Compare base and head identity files + azldev component diff-identity base-identity.json head-identity.json + + # JSON output for CI + azldev component diff-identity base.json head.json -O json`, + Args: cobra.ExactArgs(diffIdentityArgCount), + RunE: azldev.RunFuncWithoutRequiredConfigWithExtraArgs( + func(env *azldev.Env, args []string) (interface{}, error) { + return DiffIdentities(env, args[0], args[1], options.ChangedOnly) + }, + ), + } + + cmd.Flags().BoolVarP(&options.ChangedOnly, "changed-only", "c", false, + "Only show changed and added components (the build queue)") + + return cmd +} + +// IdentityDiffStatus represents the change status of a component. +type IdentityDiffStatus string + +const ( + // IdentityDiffChanged indicates the component's fingerprint changed. + IdentityDiffChanged IdentityDiffStatus = "changed" + // IdentityDiffAdded indicates the component is new in the head. + IdentityDiffAdded IdentityDiffStatus = "added" + // IdentityDiffRemoved indicates the component was removed in the head. + IdentityDiffRemoved IdentityDiffStatus = "removed" + // IdentityDiffUnchanged indicates the component's fingerprint is identical. + IdentityDiffUnchanged IdentityDiffStatus = "unchanged" +) + +// IdentityDiffResult is the per-component row in table output. +type IdentityDiffResult struct { + Component string `json:"component" table:",sortkey"` + Status IdentityDiffStatus `json:"status"` +} + +// IdentityDiffReport is the structured output for JSON format. +type IdentityDiffReport struct { + Changed []string `json:"changed"` + Added []string `json:"added"` + Removed []string `json:"removed"` + Unchanged []string `json:"unchanged"` +} + +// DiffIdentities reads two identity JSON files and computes the diff. +func DiffIdentities(env *azldev.Env, basePath string, headPath string, changedOnly bool) (interface{}, error) { + baseIdentities, err := readIdentityFile(env, basePath) + if err != nil { + return nil, fmt.Errorf("reading base identity file %#q:\n%w", basePath, err) + } + + headIdentities, err := readIdentityFile(env, headPath) + if err != nil { + return nil, fmt.Errorf("reading head identity file %#q:\n%w", headPath, err) + } + + report := ComputeDiff(baseIdentities, headIdentities, changedOnly) + + // Return table-friendly results for table/CSV format, or the report for JSON. + if env.DefaultReportFormat() == azldev.ReportFormatJSON { + return report, nil + } + + return buildTableResults(report), nil +} + +// readIdentityFile reads and parses a component identity JSON file into a map of +// component name to fingerprint. +func readIdentityFile( + env *azldev.Env, filePath string, +) (map[string]string, error) { + data, err := fileutils.ReadFile(env.FS(), filePath) + if err != nil { + return nil, fmt.Errorf("reading file:\n%w", err) + } + + var entries []ComponentIdentityResult + + err = json.Unmarshal(data, &entries) + if err != nil { + return nil, fmt.Errorf("parsing JSON:\n%w", err) + } + + result := make(map[string]string, len(entries)) + for _, entry := range entries { + result[entry.Component] = entry.Fingerprint + } + + return result, nil +} + +// ComputeDiff compares base and head identity maps and produces a diff report. +// When changedOnly is true, the Removed and Unchanged lists are left empty. +func ComputeDiff(base map[string]string, head map[string]string, changedOnly bool) *IdentityDiffReport { + // Initialize all slices so JSON serialization produces [] instead of null. + report := &IdentityDiffReport{ + Changed: make([]string, 0), + Added: make([]string, 0), + Removed: make([]string, 0), + Unchanged: make([]string, 0), + } + + // Check base components against head. + for name, baseFP := range base { + headFP, exists := head[name] + + switch { + case !exists: + if !changedOnly { + report.Removed = append(report.Removed, name) + } + case baseFP != headFP: + report.Changed = append(report.Changed, name) + default: + if !changedOnly { + report.Unchanged = append(report.Unchanged, name) + } + } + } + + // Check for new components in head. + for name := range head { + if _, exists := base[name]; !exists { + report.Added = append(report.Added, name) + } + } + + // Sort all lists for deterministic output. + sort.Strings(report.Changed) + sort.Strings(report.Added) + sort.Strings(report.Removed) + sort.Strings(report.Unchanged) + + return report +} + +// buildTableResults converts the diff report into a slice for table output. +func buildTableResults(report *IdentityDiffReport) []IdentityDiffResult { + results := make([]IdentityDiffResult, 0, + len(report.Changed)+len(report.Added)+len(report.Removed)+len(report.Unchanged)) + + for _, name := range report.Changed { + results = append(results, IdentityDiffResult{Component: name, Status: IdentityDiffChanged}) + } + + for _, name := range report.Added { + results = append(results, IdentityDiffResult{Component: name, Status: IdentityDiffAdded}) + } + + for _, name := range report.Removed { + results = append(results, IdentityDiffResult{Component: name, Status: IdentityDiffRemoved}) + } + + for _, name := range report.Unchanged { + results = append(results, IdentityDiffResult{Component: name, Status: IdentityDiffUnchanged}) + } + + return results +} diff --git a/internal/app/azldev/cmds/component/diffidentity_test.go b/internal/app/azldev/cmds/component/diffidentity_test.go new file mode 100644 index 0000000..7c26fc3 --- /dev/null +++ b/internal/app/azldev/cmds/component/diffidentity_test.go @@ -0,0 +1,235 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package component_test + +import ( + "encoding/json" + "testing" + + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev" + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/cmds/component" + "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/testutils" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestComputeDiff(t *testing.T) { + t.Run("all categories", func(t *testing.T) { + base := map[string]string{ + "curl": "sha256:aaa", + "wget": "sha256:bbb", + "openssl": "sha256:ccc", + "libold": "sha256:fff", + } + head := map[string]string{ + "curl": "sha256:aaa", + "wget": "sha256:ddd", + "libfoo": "sha256:eee", + "openssl": "sha256:ccc", + } + + report := component.ComputeDiff(base, head, false) + + assert.Equal(t, []string{"wget"}, report.Changed) + assert.Equal(t, []string{"libfoo"}, report.Added) + assert.Equal(t, []string{"libold"}, report.Removed) + assert.Equal(t, []string{"curl", "openssl"}, report.Unchanged) + }) + + t.Run("removed component", func(t *testing.T) { + base := map[string]string{ + "curl": "sha256:aaa", + "libfoo": "sha256:bbb", + } + head := map[string]string{ + "curl": "sha256:aaa", + } + + report := component.ComputeDiff(base, head, false) + + assert.Empty(t, report.Changed) + assert.Empty(t, report.Added) + assert.Equal(t, []string{"libfoo"}, report.Removed) + assert.Equal(t, []string{"curl"}, report.Unchanged) + }) + + t.Run("empty base", func(t *testing.T) { + base := map[string]string{} + head := map[string]string{ + "curl": "sha256:aaa", + "wget": "sha256:bbb", + } + + report := component.ComputeDiff(base, head, false) + + assert.Empty(t, report.Changed) + assert.Equal(t, []string{"curl", "wget"}, report.Added) + assert.Empty(t, report.Removed) + assert.Empty(t, report.Unchanged) + }) + + t.Run("empty head", func(t *testing.T) { + base := map[string]string{ + "curl": "sha256:aaa", + } + head := map[string]string{} + + report := component.ComputeDiff(base, head, false) + + assert.Empty(t, report.Changed) + assert.Empty(t, report.Added) + assert.Equal(t, []string{"curl"}, report.Removed) + assert.Empty(t, report.Unchanged) + }) + + t.Run("both empty", func(t *testing.T) { + report := component.ComputeDiff(map[string]string{}, map[string]string{}, false) + + assert.Empty(t, report.Changed) + assert.Empty(t, report.Added) + assert.Empty(t, report.Removed) + assert.Empty(t, report.Unchanged) + }) + + t.Run("identical", func(t *testing.T) { + both := map[string]string{ + "curl": "sha256:aaa", + "openssl": "sha256:bbb", + } + + report := component.ComputeDiff(both, both, false) + + assert.Empty(t, report.Changed) + assert.Empty(t, report.Added) + assert.Empty(t, report.Removed) + assert.Equal(t, []string{"curl", "openssl"}, report.Unchanged) + }) + + t.Run("sorted output", func(t *testing.T) { + base := map[string]string{ + "zlib": "sha256:aaa", + "curl": "sha256:bbb", + "openssl": "sha256:ccc", + } + head := map[string]string{ + "zlib": "sha256:xxx", + "curl": "sha256:yyy", + "openssl": "sha256:ccc", + } + + report := component.ComputeDiff(base, head, false) + + assert.Equal(t, []string{"curl", "zlib"}, report.Changed, "changed list should be sorted") + }) + + t.Run("changed only", func(t *testing.T) { + base := map[string]string{ + "curl": "sha256:aaa", + "wget": "sha256:bbb", + "openssl": "sha256:ccc", + "libold": "sha256:fff", + } + head := map[string]string{ + "curl": "sha256:aaa", + "wget": "sha256:ddd", + "libfoo": "sha256:eee", + "openssl": "sha256:ccc", + } + + report := component.ComputeDiff(base, head, true) + + assert.Equal(t, []string{"wget"}, report.Changed) + assert.Equal(t, []string{"libfoo"}, report.Added) + assert.Empty(t, report.Removed, "removed should be empty with changedOnly") + assert.Empty(t, report.Unchanged, "unchanged should be empty with changedOnly") + }) +} + +func TestDiffIdentities_MissingFile(t *testing.T) { + testEnv := testutils.NewTestEnv(t) + + _, err := component.DiffIdentities(testEnv.Env, "/nonexistent/base.json", "/nonexistent/head.json", false) + require.Error(t, err) + assert.Contains(t, err.Error(), "base identity file") +} + +func TestDiffIdentities_MalformedJSON(t *testing.T) { + testEnv := testutils.NewTestEnv(t) + + require.NoError(t, fileutils.WriteFile(testEnv.TestFS, "/base.json", + []byte("not valid json"), fileperms.PublicFile)) + require.NoError(t, fileutils.WriteFile(testEnv.TestFS, "/head.json", + []byte(`[{"component":"a","fingerprint":"sha256:aaa"}]`), fileperms.PublicFile)) + + _, err := component.DiffIdentities(testEnv.Env, "/base.json", "/head.json", false) + require.Error(t, err) + assert.Contains(t, err.Error(), "base identity file") +} + +func TestDiffIdentities_ValidFiles(t *testing.T) { + testEnv := testutils.NewTestEnv(t) + + require.NoError(t, fileutils.WriteFile(testEnv.TestFS, "/base.json", + []byte(`[{"component":"curl","fingerprint":"sha256:aaa"}]`), fileperms.PublicFile)) + require.NoError(t, fileutils.WriteFile(testEnv.TestFS, "/head.json", + []byte(`[{"component":"curl","fingerprint":"sha256:bbb"},{"component":"wget","fingerprint":"sha256:ccc"}]`), + fileperms.PublicFile)) + + result, err := component.DiffIdentities(testEnv.Env, "/base.json", "/head.json", false) + require.NoError(t, err) + + // Default format is table, so we get []IdentityDiffResult. + tableResults, ok := result.([]component.IdentityDiffResult) + require.True(t, ok, "expected table results for default report format") + require.Len(t, tableResults, 2) +} + +func TestDiffIdentities_EmptyArray(t *testing.T) { + testEnv := testutils.NewTestEnv(t) + + require.NoError(t, fileutils.WriteFile(testEnv.TestFS, "/base.json", + []byte(`[]`), fileperms.PublicFile)) + require.NoError(t, fileutils.WriteFile(testEnv.TestFS, "/head.json", + []byte(`[]`), fileperms.PublicFile)) + + result, err := component.DiffIdentities(testEnv.Env, "/base.json", "/head.json", false) + require.NoError(t, err) + + tableResults, ok := result.([]component.IdentityDiffResult) + require.True(t, ok) + assert.Empty(t, tableResults) +} + +func TestDiffIdentities_JSONFormat(t *testing.T) { + testEnv := testutils.NewTestEnv(t) + testEnv.Env.SetDefaultReportFormat(azldev.ReportFormatJSON) + + require.NoError(t, fileutils.WriteFile(testEnv.TestFS, "/base.json", + []byte(`[{"component":"curl","fingerprint":"sha256:aaa"}]`), fileperms.PublicFile)) + require.NoError(t, fileutils.WriteFile(testEnv.TestFS, "/head.json", + []byte(`[{"component":"curl","fingerprint":"sha256:bbb"},{"component":"wget","fingerprint":"sha256:ccc"}]`), + fileperms.PublicFile)) + + result, err := component.DiffIdentities(testEnv.Env, "/base.json", "/head.json", false) + require.NoError(t, err) + + report, ok := result.(*component.IdentityDiffReport) + require.True(t, ok, "expected IdentityDiffReport for JSON format") + + assert.Equal(t, []string{"curl"}, report.Changed) + assert.Equal(t, []string{"wget"}, report.Added) + assert.Empty(t, report.Removed) + assert.Empty(t, report.Unchanged) + + // Verify JSON serialization produces [] not null for empty arrays. + jsonBytes, err := json.Marshal(report) + require.NoError(t, err) + + jsonStr := string(jsonBytes) + assert.Contains(t, jsonStr, `"removed":[]`) + assert.Contains(t, jsonStr, `"unchanged":[]`) + assert.NotContains(t, jsonStr, "null") +} diff --git a/scenario/__snapshots__/TestComponentIdentitySnapshots_diff-identity_help_1.snap.json b/scenario/__snapshots__/TestComponentIdentitySnapshots_diff-identity_help_1.snap.json new file mode 100755 index 0000000..dc0a9bb --- /dev/null +++ b/scenario/__snapshots__/TestComponentIdentitySnapshots_diff-identity_help_1.snap.json @@ -0,0 +1,3 @@ +{ + "ExitCode": 0 +} \ No newline at end of file diff --git a/scenario/__snapshots__/TestComponentIdentitySnapshots_diff-identity_help_stderr_1.snap b/scenario/__snapshots__/TestComponentIdentitySnapshots_diff-identity_help_stderr_1.snap new file mode 100755 index 0000000..b77b53d --- /dev/null +++ b/scenario/__snapshots__/TestComponentIdentitySnapshots_diff-identity_help_stderr_1.snap @@ -0,0 +1 @@ +##:##:## INF No Azure Linux project found; some commands will not be available. diff --git a/scenario/__snapshots__/TestComponentIdentitySnapshots_diff-identity_help_stdout_1.snap b/scenario/__snapshots__/TestComponentIdentitySnapshots_diff-identity_help_stdout_1.snap new file mode 100755 index 0000000..e0585ad --- /dev/null +++ b/scenario/__snapshots__/TestComponentIdentitySnapshots_diff-identity_help_stdout_1.snap @@ -0,0 +1,32 @@ +Compare two component identity JSON files (produced by 'component identity -a -O json') +and report which components have changed, been added, or been removed. + +CI uses the 'changed' and 'added' lists to determine the build queue. + +Usage: + azldev component diff-identity [flags] + +Examples: + # Compare base and head identity files + azldev component diff-identity base-identity.json head-identity.json + + # JSON output for CI + azldev component diff-identity base.json head.json -O json + +Flags: + -c, --changed-only Only show changed and added components (the build queue) + -h, --help help for diff-identity + +Global Flags: + -y, --accept-all accept all prompts + --color mode output colorization mode {always, auto, never} (default auto) + --config-file stringArray additional TOML config file(s) to merge (may be repeated) + -n, --dry-run dry run only (do not take action) + --network-retries int maximum number of attempts for network operations (minimum 1) (default 3) + --no-default-config disable default configuration + -O, --output-format fmt output format {csv, json, markdown, table} (default table) + --permissive-config do not fail on unknown fields in TOML config files + -C, --project string path to Azure Linux project + -q, --quiet only enable minimal output + -v, --verbose enable verbose output + diff --git a/scenario/__snapshots__/TestComponentIdentitySnapshots_identity_help_1.snap.json b/scenario/__snapshots__/TestComponentIdentitySnapshots_identity_help_1.snap.json new file mode 100755 index 0000000..dc0a9bb --- /dev/null +++ b/scenario/__snapshots__/TestComponentIdentitySnapshots_identity_help_1.snap.json @@ -0,0 +1,3 @@ +{ + "ExitCode": 0 +} \ No newline at end of file diff --git a/scenario/__snapshots__/TestComponentIdentitySnapshots_identity_help_stderr_1.snap b/scenario/__snapshots__/TestComponentIdentitySnapshots_identity_help_stderr_1.snap new file mode 100755 index 0000000..b77b53d --- /dev/null +++ b/scenario/__snapshots__/TestComponentIdentitySnapshots_identity_help_stderr_1.snap @@ -0,0 +1 @@ +##:##:## INF No Azure Linux project found; some commands will not be available. diff --git a/scenario/__snapshots__/TestComponentIdentitySnapshots_identity_help_stdout_1.snap b/scenario/__snapshots__/TestComponentIdentitySnapshots_identity_help_stdout_1.snap new file mode 100755 index 0000000..6c3dae5 --- /dev/null +++ b/scenario/__snapshots__/TestComponentIdentitySnapshots_identity_help_stdout_1.snap @@ -0,0 +1,42 @@ +Compute a deterministic identity fingerprint for each selected component. + +The fingerprint captures all resolved build inputs (config fields, spec file +content, overlay source files, distro context, and Affects commit count). +A change to any input produces a different fingerprint. + +Use this with 'component diff-identity' to determine which components need +rebuilding between two commits. + +Usage: + azldev component identity [flags] + +Examples: + # All components, JSON output for CI + azldev component identity -a -O json > identity.json + + # Single component, table output for dev + azldev component identity -p curl + + # Components in a group + azldev component identity -g core + +Flags: + -a, --all-components Include all components + -p, --component stringArray Component name pattern + -g, --component-group stringArray Component group name + -h, --help help for identity + -s, --spec-path stringArray Spec path + +Global Flags: + -y, --accept-all accept all prompts + --color mode output colorization mode {always, auto, never} (default auto) + --config-file stringArray additional TOML config file(s) to merge (may be repeated) + -n, --dry-run dry run only (do not take action) + --network-retries int maximum number of attempts for network operations (minimum 1) (default 3) + --no-default-config disable default configuration + -O, --output-format fmt output format {csv, json, markdown, table} (default table) + --permissive-config do not fail on unknown fields in TOML config files + -C, --project string path to Azure Linux project + -q, --quiet only enable minimal output + -v, --verbose enable verbose output + diff --git a/scenario/component_identity_test.go b/scenario/component_identity_test.go new file mode 100644 index 0000000..d32b098 --- /dev/null +++ b/scenario/component_identity_test.go @@ -0,0 +1,168 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//go:build scenario + +package scenario_tests + +import ( + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/microsoft/azure-linux-dev-tools/scenario/internal/cmdtest" + "github.com/microsoft/azure-linux-dev-tools/scenario/internal/projecttest" + "github.com/microsoft/azure-linux-dev-tools/scenario/internal/snapshot" + "github.com/microsoft/azure-linux-dev-tools/scenario/internal/testhelpers" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestComponentIdentitySnapshots tests basic CLI output snapshots for the identity commands. +func TestComponentIdentitySnapshots(t *testing.T) { + t.Parallel() + + if testing.Short() { + t.Skip("skipping long test") + } + + tests := map[string]testhelpers.ScenarioTest{ + "identity help": cmdtest.NewScenarioTest("component", "identity", "--help").Locally(), + "diff-identity help": cmdtest.NewScenarioTest("component", "diff-identity", "--help").Locally(), + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + t.Parallel() + snapshot.TestSnapshottableCmd(t, test) + }) + } +} + +// TestComponentIdentityInContainer runs the full identity pipeline in a container: +// creates a project with two components, computes identity, modifies one component, +// recomputes identity, and diffs the two. +func TestComponentIdentityInContainer(t *testing.T) { + t.Parallel() + + if testing.Short() { + t.Skip("skipping long test") + } + + // Create two specs for the project. + specA := projecttest.NewSpec( + projecttest.WithName("component-a"), + projecttest.WithVersion("1.0.0"), + ) + specB := projecttest.NewSpec( + projecttest.WithName("component-b"), + projecttest.WithVersion("2.0.0"), + ) + + project := projecttest.NewDynamicTestProject( + projecttest.AddSpec(specA), + projecttest.AddSpec(specB), + projecttest.UseTestDefaultConfigs(), + ) + + // Script that: + // 1. Computes identity for all components → base.json + // 2. Modifies component-a's spec file (changes version) + // 3. Recomputes identity → head.json + // 4. Diffs the two → diff.json + testScript := ` +set -ex + +rm -rf project/build +ln -s /var/lib/mock project/build + +# Compute base identity +azldev -C project -v component identity -a --output-format json > base.json + +# Modify component-a's spec (change version) +sed -i 's/Version: 1.0.0/Version: 1.1.0/' project/specs/component-a/component-a.spec + +# Compute head identity +azldev -C project -v component identity -a --output-format json > head.json + +# Diff the two +azldev -v component diff-identity base.json head.json --output-format json > diff.json +` + + scenarioTest := cmdtest.NewScenarioTest(). + WithScript(strings.NewReader(testScript)) + + // Serialize the project and add it to the container. + projectStagingDir := t.TempDir() + project.Serialize(t, projectStagingDir) + scenarioTest.AddDirRecursive(t, "project", projectStagingDir) + + // Add test default configs. + scenarioTest.AddDirRecursive(t, projecttest.TestDefaultConfigsSubdir, projecttest.TestDefaultConfigsDir()) + + results, err := scenarioTest. + InContainer(). + WithPrivilege(). + WithNetwork(). + Run(t) + + require.NoError(t, err) + results.AssertZeroExitCode(t) + + t.Logf("stdout:\n%s", results.Stdout) + t.Logf("stderr:\n%s", results.Stderr) + + // Parse base identity. + baseBytes, err := os.ReadFile(filepath.Join(results.Workdir, "base.json")) + require.NoError(t, err, "base.json should exist") + + var baseIdentities []map[string]interface{} + require.NoError(t, json.Unmarshal(baseBytes, &baseIdentities)) + require.Len(t, baseIdentities, 2, "should have 2 components in base identity") + + // Parse head identity. + headBytes, err := os.ReadFile(filepath.Join(results.Workdir, "head.json")) + require.NoError(t, err, "head.json should exist") + + var headIdentities []map[string]interface{} + require.NoError(t, json.Unmarshal(headBytes, &headIdentities)) + require.Len(t, headIdentities, 2, "should have 2 components in head identity") + + // Verify fingerprints differ for the modified component. + baseFPs := identityMap(baseIdentities) + headFPs := identityMap(headIdentities) + + assert.NotEqual(t, baseFPs["component-a"], headFPs["component-a"], + "component-a fingerprint should change after spec modification") + assert.Equal(t, baseFPs["component-b"], headFPs["component-b"], + "component-b fingerprint should NOT change") + + // Parse and validate the diff output. + diffBytes, err := os.ReadFile(filepath.Join(results.Workdir, "diff.json")) + require.NoError(t, err, "diff.json should exist") + + var diffReport map[string][]string + require.NoError(t, json.Unmarshal(diffBytes, &diffReport)) + + assert.Contains(t, diffReport["changed"], "component-a", + "diff should report component-a as changed") + assert.Contains(t, diffReport["unchanged"], "component-b", + "diff should report component-b as unchanged") + assert.Empty(t, diffReport["added"], "no components should be added") + assert.Empty(t, diffReport["removed"], "no components should be removed") +} + +// identityMap converts the JSON identity array to a map of component name → fingerprint. +func identityMap(identities []map[string]interface{}) map[string]string { + result := make(map[string]string, len(identities)) + + for _, entry := range identities { + name, _ := entry["component"].(string) + fingerprint, _ := entry["fingerprint"].(string) + result[name] = fingerprint + } + + return result +} From 057316c60fc6e74b0fa5dffc4f0acd0c86459885 Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Mon, 30 Mar 2026 17:57:28 -0700 Subject: [PATCH 08/12] fix lint --- internal/providers/sourceproviders/fedorasourceprovider.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/providers/sourceproviders/fedorasourceprovider.go b/internal/providers/sourceproviders/fedorasourceprovider.go index 36d7da6..2fb6963 100644 --- a/internal/providers/sourceproviders/fedorasourceprovider.go +++ b/internal/providers/sourceproviders/fedorasourceprovider.go @@ -337,6 +337,7 @@ func (g *FedoraSourcesProviderImpl) resolveCommit( } var commitHash string + if g.snapshotTime != "" { snapshotDateTime, parseErr := time.Parse(time.RFC3339, g.snapshotTime) if parseErr != nil { From b823d76ab451bbe68a6cc6a833f83fc8a9c15874 Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Mon, 30 Mar 2026 18:04:32 -0700 Subject: [PATCH 09/12] fix synthistory --- internal/app/azldev/core/sources/synthistory.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/app/azldev/core/sources/synthistory.go b/internal/app/azldev/core/sources/synthistory.go index 96f606d..403fcd4 100644 --- a/internal/app/azldev/core/sources/synthistory.go +++ b/internal/app/azldev/core/sources/synthistory.go @@ -206,7 +206,7 @@ func resolveConfigFilePath(config *projectconfig.ComponentConfig, componentName return configFilePath, nil } -// openProjectRepo finds and opens the git repository containing configFilePath by +// OpenProjectRepo finds and opens the git repository containing configFilePath by // walking up the directory tree. func OpenProjectRepo(configFilePath string) (*gogit.Repository, error) { repo, err := gogit.PlainOpenWithOptions(filepath.Dir(configFilePath), &gogit.PlainOpenOptions{ From 1cc16b71e8e4e3019d99ee1303e4c9fcb4fe6111 Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Mon, 30 Mar 2026 18:04:52 -0700 Subject: [PATCH 10/12] fix computeidentity comment --- internal/fingerprint/fingerprint.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/fingerprint/fingerprint.go b/internal/fingerprint/fingerprint.go index 18c853b..45eb061 100644 --- a/internal/fingerprint/fingerprint.go +++ b/internal/fingerprint/fingerprint.go @@ -58,8 +58,8 @@ type IdentityOptions struct { } // ComputeIdentity computes the fingerprint for a component from its resolved config -// and additional context. The fs parameter is used to read spec file and overlay -// source file contents for hashing. +// and additional context. The fs parameter is used to read overlay source file +// contents for hashing; spec content identity is provided via opts.SourceIdentity. func ComputeIdentity( fs opctx.FS, component projectconfig.ComponentConfig, From 002e17f5765e066297c11d6a0f335f5f7bad05d2 Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Mon, 30 Mar 2026 18:25:31 -0700 Subject: [PATCH 11/12] source manager feedback --- .../providers/sourceproviders/fedorasourceprovider.go | 9 +++++++-- .../providers/sourceproviders/identityprovider_test.go | 5 ++--- internal/providers/sourceproviders/localidentity.go | 7 ++++--- internal/providers/sourceproviders/sourcemanager.go | 8 ++++---- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/internal/providers/sourceproviders/fedorasourceprovider.go b/internal/providers/sourceproviders/fedorasourceprovider.go index 2fb6963..9d659a3 100644 --- a/internal/providers/sourceproviders/fedorasourceprovider.go +++ b/internal/providers/sourceproviders/fedorasourceprovider.go @@ -276,12 +276,17 @@ func (g *FedoraSourcesProviderImpl) checkoutTargetCommit( // ResolveSourceIdentity implements [SourceIdentityProvider] by resolving the upstream // commit hash for the component. Resolution priority matches [checkoutTargetCommit]: // 1. Explicit upstream commit hash (pinned per-component) — returned directly. -// 2. Snapshot time — shallow clone + rev-list to find the commit at the snapshot date. -// 3. Default — query HEAD of the dist-git branch via ls-remote. +// 2. Snapshot time — perform a metadata-only clone of the dist-git branch and use the +// local git history to find the commit immediately before the snapshot date. +// 3. Default — perform a metadata-only clone of the dist-git branch and use its current HEAD. func (g *FedoraSourcesProviderImpl) ResolveSourceIdentity( ctx context.Context, component components.Component, ) (string, error) { + if component.GetName() == "" { + return "", errors.New("component name cannot be empty") + } + // Case 1: Explicit upstream commit hash — no network call needed. if pinnedCommit := component.GetConfig().Spec.UpstreamCommit; pinnedCommit != "" { slog.Debug("Using pinned upstream commit for identity", diff --git a/internal/providers/sourceproviders/identityprovider_test.go b/internal/providers/sourceproviders/identityprovider_test.go index 2b3d24c..2cf9a67 100644 --- a/internal/providers/sourceproviders/identityprovider_test.go +++ b/internal/providers/sourceproviders/identityprovider_test.go @@ -32,9 +32,8 @@ import ( // --- ResolveLocalSourceIdentity tests --- func TestResolveLocalSourceIdentity_EmptyDir(t *testing.T) { - identity, err := sourceproviders.ResolveLocalSourceIdentity(afero.NewMemMapFs(), "") - require.NoError(t, err) - assert.Empty(t, identity) + _, err := sourceproviders.ResolveLocalSourceIdentity(afero.NewMemMapFs(), "") + require.Error(t, err) } func TestResolveLocalSourceIdentity_Deterministic(t *testing.T) { diff --git a/internal/providers/sourceproviders/localidentity.go b/internal/providers/sourceproviders/localidentity.go index 2b9f4ec..1b0f67b 100644 --- a/internal/providers/sourceproviders/localidentity.go +++ b/internal/providers/sourceproviders/localidentity.go @@ -6,6 +6,7 @@ package sourceproviders import ( "crypto/sha256" "encoding/hex" + "errors" "fmt" "io/fs" "path/filepath" @@ -18,11 +19,11 @@ import ( // ResolveLocalSourceIdentity computes a SHA256 hash over all files in the given // spec directory (spec file + sidecar files like patches and scripts). -// Files are sorted by path for determinism. Returns an empty string if specDir -// is empty or contains no files. +// Files are sorted by path for determinism. Returns an empty string if the +// directory contains no files. func ResolveLocalSourceIdentity(filesystem opctx.FS, specDir string) (string, error) { if specDir == "" { - return "", nil + return "", errors.New("spec directory cannot be empty") } // Collect all files in the spec directory. diff --git a/internal/providers/sourceproviders/sourcemanager.go b/internal/providers/sourceproviders/sourcemanager.go index c738136..67c62d3 100644 --- a/internal/providers/sourceproviders/sourcemanager.go +++ b/internal/providers/sourceproviders/sourcemanager.go @@ -472,12 +472,12 @@ func (m *sourceManager) ResolveSourceIdentity( switch sourceType { case projectconfig.SpecSourceTypeLocal, projectconfig.SpecSourceTypeUnspecified: - specDir := "" - if component.GetConfig().Spec.Path != "" { - specDir = filepath.Dir(component.GetConfig().Spec.Path) + specPath := component.GetConfig().Spec.Path + if specPath == "" { + return "", fmt.Errorf("component %#q has no spec path configured", component.GetName()) } - return ResolveLocalSourceIdentity(m.fs, specDir) + return ResolveLocalSourceIdentity(m.fs, filepath.Dir(specPath)) case projectconfig.SpecSourceTypeUpstream: return m.resolveUpstreamSourceIdentity(ctx, component) From aa7df9c1a703ae70bb944627d5360b8de22ee707 Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Mon, 30 Mar 2026 18:30:15 -0700 Subject: [PATCH 12/12] add srcmanager test, name validation --- .../sourceproviders/rpmcontentsprovider.go | 4 + .../sourceproviders/sourcemanager_test.go | 138 ++++++++++++++++++ 2 files changed, 142 insertions(+) diff --git a/internal/providers/sourceproviders/rpmcontentsprovider.go b/internal/providers/sourceproviders/rpmcontentsprovider.go index d66658e..9cbe75c 100644 --- a/internal/providers/sourceproviders/rpmcontentsprovider.go +++ b/internal/providers/sourceproviders/rpmcontentsprovider.go @@ -84,6 +84,10 @@ func (r *RPMContentsProviderImpl) ResolveSourceIdentity( ctx context.Context, component components.Component, ) (identity string, err error) { + if component.GetName() == "" { + return "", errors.New("component name cannot be empty") + } + rpmReader, err := r.rpmProvider.GetRPM(ctx, component.GetName(), nil) if err != nil { return "", fmt.Errorf("failed to get RPM for identity of component %#q:\n%w", diff --git a/internal/providers/sourceproviders/sourcemanager_test.go b/internal/providers/sourceproviders/sourcemanager_test.go index fa94707..0ec66a9 100644 --- a/internal/providers/sourceproviders/sourcemanager_test.go +++ b/internal/providers/sourceproviders/sourcemanager_test.go @@ -16,6 +16,7 @@ import ( "github.com/microsoft/azure-linux-dev-tools/internal/providers/sourceproviders" "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/mock/gomock" ) @@ -369,3 +370,140 @@ func TestSourceManager_FetchFiles_Errors(t *testing.T) { }) } } + +func TestSourceManager_ResolveSourceIdentity_EmptyComponentName(t *testing.T) { + env := testutils.NewTestEnv(t) + ctrl := gomock.NewController(t) + component := components_testutils.NewMockComponent(ctrl) + + component.EXPECT().GetName().Return("") + + sourceManager, err := sourceproviders.NewSourceManager(env.Env, testDefaultDistro()) + require.NoError(t, err) + + _, err = sourceManager.ResolveSourceIdentity(t.Context(), component) + require.Error(t, err) + require.Contains(t, err.Error(), "component name is empty") +} + +func TestSourceManager_ResolveSourceIdentity_LocalNoSpecPath(t *testing.T) { + env := testutils.NewTestEnv(t) + ctrl := gomock.NewController(t) + component := components_testutils.NewMockComponent(ctrl) + + componentConfig := &projectconfig.ComponentConfig{ + Spec: projectconfig.SpecSource{ + SourceType: projectconfig.SpecSourceTypeLocal, + }, + } + + component.EXPECT().GetName().AnyTimes().Return("test-component") + component.EXPECT().GetConfig().AnyTimes().Return(componentConfig) + + sourceManager, err := sourceproviders.NewSourceManager(env.Env, testDefaultDistro()) + require.NoError(t, err) + + _, err = sourceManager.ResolveSourceIdentity(t.Context(), component) + require.Error(t, err) + require.Contains(t, err.Error(), "no spec path configured") +} + +func TestSourceManager_ResolveSourceIdentity_LocalSuccess(t *testing.T) { + env := testutils.NewTestEnv(t) + ctrl := gomock.NewController(t) + component := components_testutils.NewMockComponent(ctrl) + + specContent := []byte("Name: test\nVersion: 1.0\n") + require.NoError(t, fileutils.WriteFile(env.TestFS, "/specs/test.spec", specContent, fileperms.PrivateFile)) + + componentConfig := &projectconfig.ComponentConfig{ + Spec: projectconfig.SpecSource{ + SourceType: projectconfig.SpecSourceTypeLocal, + Path: "/specs/test.spec", + }, + } + + component.EXPECT().GetName().AnyTimes().Return("test-component") + component.EXPECT().GetConfig().AnyTimes().Return(componentConfig) + + sourceManager, err := sourceproviders.NewSourceManager(env.Env, testDefaultDistro()) + require.NoError(t, err) + + identity, err := sourceManager.ResolveSourceIdentity(t.Context(), component) + require.NoError(t, err) + assert.Contains(t, identity, "sha256:") +} + +func TestSourceManager_ResolveSourceIdentity_UpstreamNoProviders(t *testing.T) { + env := testutils.NewTestEnv(t) + ctrl := gomock.NewController(t) + component := components_testutils.NewMockComponent(ctrl) + + // Clear the distro so no upstream providers are registered. + emptyDistro := sourceproviders.ResolvedDistro{} + + componentConfig := &projectconfig.ComponentConfig{ + Spec: projectconfig.SpecSource{ + SourceType: projectconfig.SpecSourceTypeUpstream, + }, + } + + component.EXPECT().GetName().AnyTimes().Return("test-component") + component.EXPECT().GetConfig().AnyTimes().Return(componentConfig) + + sourceManager, err := sourceproviders.NewSourceManager(env.Env, emptyDistro) + require.NoError(t, err) + + _, err = sourceManager.ResolveSourceIdentity(t.Context(), component) + require.Error(t, err) + require.Contains(t, err.Error(), "no upstream providers configured") +} + +func TestSourceManager_ResolveSourceIdentity_UpstreamAllProvidersFail(t *testing.T) { + env := testutils.NewTestEnv(t) + ctrl := gomock.NewController(t) + component := components_testutils.NewMockComponent(ctrl) + + componentConfig := &projectconfig.ComponentConfig{ + Spec: projectconfig.SpecSource{ + SourceType: projectconfig.SpecSourceTypeUpstream, + }, + } + + component.EXPECT().GetName().AnyTimes().Return("test-component") + component.EXPECT().GetConfig().AnyTimes().Return(componentConfig) + + // Make git commands fail so all providers return errors. + env.CmdFactory.RunHandler = func(cmd *exec.Cmd) error { + return errors.New("simulated git failure") + } + + sourceManager, err := sourceproviders.NewSourceManager(env.Env, testDefaultDistro()) + require.NoError(t, err) + + _, err = sourceManager.ResolveSourceIdentity(t.Context(), component) + require.Error(t, err) + require.Contains(t, err.Error(), "failed to resolve source identity") +} + +func TestSourceManager_ResolveSourceIdentity_UnknownSourceType(t *testing.T) { + env := testutils.NewTestEnv(t) + ctrl := gomock.NewController(t) + component := components_testutils.NewMockComponent(ctrl) + + componentConfig := &projectconfig.ComponentConfig{ + Spec: projectconfig.SpecSource{ + SourceType: "unknown-type", + }, + } + + component.EXPECT().GetName().AnyTimes().Return("test-component") + component.EXPECT().GetConfig().AnyTimes().Return(componentConfig) + + sourceManager, err := sourceproviders.NewSourceManager(env.Env, testDefaultDistro()) + require.NoError(t, err) + + _, err = sourceManager.ResolveSourceIdentity(t.Context(), component) + require.Error(t, err) + require.Contains(t, err.Error(), "no identity provider for source type") +}