From 1517b21d3b2996a78a147a7a57f4e9e27688c1b1 Mon Sep 17 00:00:00 2001 From: Antonio Salinas Date: Mon, 18 May 2026 21:23:02 +0000 Subject: [PATCH 1/7] feat: Source-prep origin summary Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../cli/azldev_component_prepare-sources.md | 4 + .../azldev/cmds/component/preparesources.go | 33 +- internal/app/azldev/cmds/component/render.go | 2 +- .../cmds/downloadsources/downloadsources.go | 2 +- .../downloadsources/downloadsources_test.go | 6 +- .../core/componentbuilder/componentbuilder.go | 2 +- .../componentbuilder/componentbuilder_test.go | 6 +- .../app/azldev/core/sources/provenance.go | 16 + .../app/azldev/core/sources/sourceprep.go | 97 +++++- .../azldev/core/sources/sourceprep_test.go | 292 +++++++++++++++--- .../fedorasource/fedorasource.go | 65 ++-- .../fedorasource/fedorasource_test.go | 47 ++- .../fedorasource_test/fedorasource_mocks.go | 7 +- .../sourceproviders/fedorasourceprovider.go | 33 +- .../fedorasourceprovider_test.go | 62 ++-- .../sourceproviders/identityprovider_test.go | 4 +- .../providers/sourceproviders/provenance.go | 62 ++++ .../sourceproviders/rpmcontentsprovider.go | 12 +- .../rpmcontentsprovider_test.go | 10 +- .../sourceproviders/sourcemanager.go | 147 ++++++--- .../sourceproviders/sourcemanager_test.go | 20 +- .../sourcemanager_mocks.go | 14 +- .../sourcemanager_mocks_noop.go | 4 +- 23 files changed, 718 insertions(+), 229 deletions(-) create mode 100644 internal/app/azldev/core/sources/provenance.go create mode 100644 internal/providers/sourceproviders/provenance.go diff --git a/docs/user/reference/cli/azldev_component_prepare-sources.md b/docs/user/reference/cli/azldev_component_prepare-sources.md index 590286e7..43a46bbe 100644 --- a/docs/user/reference/cli/azldev_component_prepare-sources.md +++ b/docs/user/reference/cli/azldev_component_prepare-sources.md @@ -13,6 +13,10 @@ The result is a directory containing the spec file and all sources, ready for inspection or manual building. This is useful for verifying that overlays apply cleanly before running a full build. +The command output reports downloaded source provenance entries with +filename, origin type, URL, hash type, and hash. Only files actually +downloaded during this run are included. + Only one component may be selected at a time. ``` diff --git a/internal/app/azldev/cmds/component/preparesources.go b/internal/app/azldev/cmds/component/preparesources.go index 2f0ffcaa..7c642bfc 100644 --- a/internal/app/azldev/cmds/component/preparesources.go +++ b/internal/app/azldev/cmds/component/preparesources.go @@ -45,6 +45,10 @@ The result is a directory containing the spec file and all sources, ready for inspection or manual building. This is useful for verifying that overlays apply cleanly before running a full build. +The command output reports downloaded source provenance entries with +filename, origin type, URL, hash type, and hash. Only files actually +downloaded during this run are included. + Only one component may be selected at a time.`, Example: ` # Prepare sources for a component azldev component prep-sources -p curl -o ./build/work/scratch/curl --force @@ -54,7 +58,12 @@ Only one component may be selected at a time.`, RunE: azldev.RunFuncWithExtraArgs(func(env *azldev.Env, args []string) (interface{}, error) { options.ComponentFilter.ComponentNamePatterns = append(args, options.ComponentFilter.ComponentNamePatterns...) - return nil, PrepareComponentSources(env, &options) + report, err := PrepareComponentSources(env, &options) + if err != nil { + return nil, err + } + + return report.Sources, nil }), ValidArgsFunction: components.GenerateComponentNameCompletions, Annotations: map[string]string{ @@ -81,24 +90,24 @@ Only one component may be selected at a time.`, return cmd } -func PrepareComponentSources(env *azldev.Env, options *PrepareSourcesOptions) error { +func PrepareComponentSources(env *azldev.Env, options *PrepareSourcesOptions) (*sources.ProvenanceReport, error) { var comps *components.ComponentSet resolver := components.NewResolver(env) comps, err := resolver.FindComponents(&options.ComponentFilter) if err != nil { - return fmt.Errorf("failed to resolve components:\n%w", err) + return nil, fmt.Errorf("failed to resolve components:\n%w", err) } if comps.Len() == 0 { - return errors.New("no components were selected; " + + return nil, errors.New("no components were selected; " + "please use command-line options to indicate which components you would like to build", ) } if comps.Len() != 1 { - return fmt.Errorf("expected exactly one component, got %d", comps.Len()) + return nil, fmt.Errorf("expected exactly one component, got %d", comps.Len()) } component := comps.Components()[0] @@ -111,18 +120,18 @@ func PrepareComponentSources(env *azldev.Env, options *PrepareSourcesOptions) er // Resolve the effective distro for this component before creating the source manager. distro, err := sourceproviders.ResolveDistro(env, component) if err != nil { - return fmt.Errorf("failed to resolve distro for component %#q:\n%w", component.GetName(), err) + return nil, fmt.Errorf("failed to resolve distro for component %#q:\n%w", component.GetName(), err) } // Create source manager to handle all source fetching, both local and upstream. sourceManager, err = sourceproviders.NewSourceManager(env, distro) if err != nil { - return fmt.Errorf("failed to create source manager:\n%w", err) + return nil, fmt.Errorf("failed to create source manager:\n%w", err) } // Pre-flight check: detect non-empty output directory before any work. if err := CheckOutputDir(env, options); err != nil { - return err + return nil, err } if options.SkipOverlays && !options.WithoutGitRepo { @@ -148,15 +157,15 @@ func PrepareComponentSources(env *azldev.Env, options *PrepareSourcesOptions) er preparer, err := sources.NewPreparer(sourceManager, env.FS(), env, env, preparerOpts...) if err != nil { - return fmt.Errorf("failed to create source preparer:\n%w", err) + return nil, fmt.Errorf("failed to create source preparer:\n%w", err) } - err = preparer.PrepareSources(env, component, options.OutputDir, !options.SkipOverlays) + report, err := preparer.PrepareSources(env, component, options.OutputDir, !options.SkipOverlays) if err != nil { - return fmt.Errorf("failed to prepare sources for component %q:\n%w", component.GetName(), err) + return nil, fmt.Errorf("failed to prepare sources for component %q:\n%w", component.GetName(), err) } - return nil + return report, nil } // CheckOutputDir verifies the output directory state before source preparation. diff --git a/internal/app/azldev/cmds/component/render.go b/internal/app/azldev/cmds/component/render.go index 2f2cc54f..78e49994 100644 --- a/internal/app/azldev/cmds/component/render.go +++ b/internal/app/azldev/cmds/component/render.go @@ -534,7 +534,7 @@ func prepareComponentSources( return nil, fmt.Errorf("creating source preparer for %#q:\n%w", componentName, err) } - if prepErr := preparer.PrepareSources(env, comp, componentDir, true /*applyOverlays*/); prepErr != nil { + if _, prepErr := preparer.PrepareSources(env, comp, componentDir, true /*applyOverlays*/); prepErr != nil { return nil, fmt.Errorf("preparing sources for %#q:\n%w", componentName, prepErr) } diff --git a/internal/app/azldev/cmds/downloadsources/downloadsources.go b/internal/app/azldev/cmds/downloadsources/downloadsources.go index 8af63b2b..ce050e49 100644 --- a/internal/app/azldev/cmds/downloadsources/downloadsources.go +++ b/internal/app/azldev/cmds/downloadsources/downloadsources.go @@ -132,7 +132,7 @@ func DownloadSources(env *azldev.Env, options *DownloadSourcesOptions) error { for _, uri := range lookasideBaseURIs { slog.Info("Trying lookaside base URI", "uri", uri) - uriErr := lookasideDownloader.ExtractSourcesFromRepo( + _, uriErr := lookasideDownloader.ExtractSourcesFromRepo( env, options.Directory, packageName, uri, nil, extractOpts..., ) if uriErr == nil { diff --git a/internal/app/azldev/cmds/downloadsources/downloadsources_test.go b/internal/app/azldev/cmds/downloadsources/downloadsources_test.go index b723a458..b83fcf92 100644 --- a/internal/app/azldev/cmds/downloadsources/downloadsources_test.go +++ b/internal/app/azldev/cmds/downloadsources/downloadsources_test.go @@ -71,7 +71,7 @@ func TestDownloadSources_StandaloneMode(t *testing.T) { mockDownloader := fedorasource_test.NewMockFedoraSourceDownloader(ctrl) mockDownloader.EXPECT(). ExtractSourcesFromRepo(gomock.Any(), testPkgDir, "curl", testLookasideURI, gomock.Any()). - Return(nil) + Return(nil, nil) options := &downloadsources.DownloadSourcesOptions{ Directory: testPkgDir, @@ -94,7 +94,7 @@ func TestDownloadSources_StandaloneMode_NoSourcesFile(t *testing.T) { mockDownloader := fedorasource_test.NewMockFedoraSourceDownloader(ctrl) mockDownloader.EXPECT(). ExtractSourcesFromRepo(gomock.Any(), testPkgDir, "curl", testLookasideURI, gomock.Any()). - Return(nil) + Return(nil, nil) options := &downloadsources.DownloadSourcesOptions{ Directory: testPkgDir, @@ -129,7 +129,7 @@ func TestDownloadSources_ComponentMode(t *testing.T) { ExtractSourcesFromRepo( gomock.Any(), testPkgDir, "curl", expectedURI, gomock.Any(), ). - Return(nil) + Return(nil, nil) options := &downloadsources.DownloadSourcesOptions{ Directory: testPkgDir, diff --git a/internal/app/azldev/core/componentbuilder/componentbuilder.go b/internal/app/azldev/core/componentbuilder/componentbuilder.go index 8cb37619..22a94a68 100644 --- a/internal/app/azldev/core/componentbuilder/componentbuilder.go +++ b/internal/app/azldev/core/componentbuilder/componentbuilder.go @@ -125,7 +125,7 @@ func (b *Builder) prepSourcesForSRPM( return "", fmt.Errorf("failed to create work dir for source preparation:\n%w", err) } - err = b.sourcePreparer.PrepareSources(ctx, component, preparedSourcesDir, true /*applyOverlays?*/) + _, err = b.sourcePreparer.PrepareSources(ctx, component, preparedSourcesDir, true /*applyOverlays?*/) if err != nil { return "", fmt.Errorf("failed to prepare sources:\n%w", err) } diff --git a/internal/app/azldev/core/componentbuilder/componentbuilder_test.go b/internal/app/azldev/core/componentbuilder/componentbuilder_test.go index 7fdf7ec8..96c2cc6e 100644 --- a/internal/app/azldev/core/componentbuilder/componentbuilder_test.go +++ b/internal/app/azldev/core/componentbuilder/componentbuilder_test.go @@ -55,15 +55,15 @@ func setupBuilder(t *testing.T) *componentBuilderTestParams { func( _ context.Context, component components.Component, outputDir string, _ ...sourceproviders.FetchComponentOption, - ) error { + ) ([]sourceproviders.SourceProvenance, error) { // Create the expected spec file. specPath := filepath.Join(outputDir, component.GetName()+".spec") - return fileutils.WriteFile(testEnv.Env.FS(), specPath, []byte("# test spec"), fileperms.PublicFile) + return nil, fileutils.WriteFile(testEnv.Env.FS(), specPath, []byte("# test spec"), fileperms.PublicFile) }, ) - sourceManager.EXPECT().FetchFiles(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().Return(nil) + sourceManager.EXPECT().FetchFiles(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().Return(nil, nil) preparer, err := sources.NewPreparer(sourceManager, testEnv.Env.FS(), testEnv.Env, testEnv.Env) diff --git a/internal/app/azldev/core/sources/provenance.go b/internal/app/azldev/core/sources/provenance.go new file mode 100644 index 00000000..9030a6ac --- /dev/null +++ b/internal/app/azldev/core/sources/provenance.go @@ -0,0 +1,16 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package sources + +import "github.com/microsoft/azure-linux-dev-tools/internal/providers/sourceproviders" + +// ProvenanceReport is the output of a source preparation run, listing +// every file that was downloaded and where it came from. +type ProvenanceReport struct { + // ComponentName is the name of the component whose sources were prepared. + ComponentName string `json:"componentName" table:"Component"` + + // Sources lists the provenance of each downloaded source file. + Sources []sourceproviders.SourceProvenance `json:"sources" table:"-"` +} diff --git a/internal/app/azldev/core/sources/sourceprep.go b/internal/app/azldev/core/sources/sourceprep.go index 617dcbd8..8e048f81 100644 --- a/internal/app/azldev/core/sources/sourceprep.go +++ b/internal/app/azldev/core/sources/sourceprep.go @@ -48,7 +48,13 @@ type SourcePreparer interface { // within the output directory will be build-time dependencies on external packages (RPMs), including those // relied on to be present implicitly within the build root, or expressed via BuildRequires or DynamicBuildRequires // in the component's spec file and any defaults from the macros used to interpret the spec file. - PrepareSources(ctx context.Context, component components.Component, outputDir string, applyOverlays bool) error + // + // Returns a [ProvenanceReport] listing each file that was downloaded and where it came from. + // The report only includes files that were actually downloaded; pre-existing files are omitted. + PrepareSources( + ctx context.Context, component components.Component, + outputDir string, applyOverlays bool, + ) (*ProvenanceReport, error) // DiffSources computes a unified diff showing the changes that overlays apply to a component's sources. // The component's sources are fetched once into a subdirectory of baseDir, then copied to a second @@ -214,16 +220,20 @@ func NewPreparer( // PrepareSources implements the [SourcePreparer] interface. func (p *sourcePreparerImpl) PrepareSources( ctx context.Context, component components.Component, outputDir string, applyOverlays bool, -) error { +) (*ProvenanceReport, error) { + var allProvenance []sourceproviders.SourceProvenance + // Use the source manager to fetch source files (archives, patches, etc.) // Skip this step when skipLookaside is set — source tarballs are not needed // for rendering and are the most expensive download. if !p.skipLookaside { - err := p.sourceManager.FetchFiles(ctx, component, outputDir) + fileProv, err := p.sourceManager.FetchFiles(ctx, component, outputDir) if err != nil { - return fmt.Errorf("failed to fetch source files for component %#q:\n%w", + return nil, fmt.Errorf("failed to fetch source files for component %#q:\n%w", component.GetName(), err) } + + allProvenance = append(allProvenance, fileProv...) } // Preserve the upstream .git directory only when dist-git creation is @@ -239,20 +249,22 @@ func (p *sourcePreparerImpl) PrepareSources( } // Use the source manager to fetch the component (spec file and sidecar files). - err := p.sourceManager.FetchComponent(ctx, component, outputDir, fetchOpts...) + compProv, err := p.sourceManager.FetchComponent(ctx, component, outputDir, fetchOpts...) if err != nil { - return fmt.Errorf("failed to fetch sources for component %#q:\n%w", + return nil, fmt.Errorf("failed to fetch sources for component %#q:\n%w", component.GetName(), err) } + allProvenance = append(allProvenance, compProv...) + if applyOverlays { err := p.applyOverlaysToSources(ctx, component, outputDir) if err != nil { - return err + return nil, err } if err := p.updateSourcesFile(component, outputDir); err != nil { - return fmt.Errorf("failed to update 'sources' file for component %#q:\n%w", + return nil, fmt.Errorf("failed to update 'sources' file for component %#q:\n%w", component.GetName(), err) } } else { @@ -261,14 +273,79 @@ func (p *sourcePreparerImpl) PrepareSources( "component", component.GetName()) } + if applyOverlays { + if err := p.enrichProvenanceWithResolvedHashes(allProvenance, outputDir); err != nil { + return nil, fmt.Errorf("failed to resolve provenance hashes for component %#q:\n%w", + component.GetName(), err) + } + } + // Record the changes as synthetic git history when dist-git creation is enabled. if p.withGitRepo { if err := p.trySyntheticHistory(ctx, component, outputDir); err != nil { - return fmt.Errorf("failed to generate synthetic history for component %#q:\n%w", + return nil, fmt.Errorf("failed to generate synthetic history for component %#q:\n%w", component.GetName(), err) } } + return &ProvenanceReport{ + ComponentName: component.GetName(), + Sources: allProvenance, + }, nil +} + +// enrichProvenanceWithResolvedHashes fills missing hash fields in provenance entries +// using values from the finalized 'sources' file. +func (p *sourcePreparerImpl) enrichProvenanceWithResolvedHashes( + provenance []sourceproviders.SourceProvenance, + outputDir string, +) error { + if len(provenance) == 0 { + return nil + } + + sourcesFilePath := filepath.Join(outputDir, fedorasource.SourcesFileName) + + content, err := p.readSourcesFileIfExists(sourcesFilePath) + if err != nil { + return err + } + + if content == "" { + return nil + } + + parsedLines, err := fedorasource.ReadSourcesFile(content) + if err != nil { + return fmt.Errorf("failed to parse finalized 'sources' file %#q:\n%w", sourcesFilePath, err) + } + + hashByFilename := make(map[string]fedorasource.SourcesFileEntry, len(parsedLines)) + for _, line := range parsedLines { + if line.Entry != nil { + hashByFilename[line.Entry.Filename] = *line.Entry + } + } + + for provenanceIndex := range provenance { + if provenance[provenanceIndex].Hash != "" && provenance[provenanceIndex].HashType != "" { + continue + } + + entry, found := hashByFilename[provenance[provenanceIndex].Filename] + if !found { + continue + } + + if provenance[provenanceIndex].HashType == "" { + provenance[provenanceIndex].HashType = entry.HashType + } + + if provenance[provenanceIndex].Hash == "" { + provenance[provenanceIndex].Hash = entry.Hash + } + } + return nil } @@ -584,7 +661,7 @@ func (p *sourcePreparerImpl) DiffSources( defer fileutils.RemoveAllAndUpdateErrorIfNil(p.fs, originalDir, &err) // Prepare sources without applying overlays, to get the original tree. - if err := p.PrepareSources(ctx, component, originalDir, false /* applyOverlays */); err != nil { + if _, err := p.PrepareSources(ctx, component, originalDir, false /* applyOverlays */); err != nil { return nil, err } diff --git a/internal/app/azldev/core/sources/sourceprep_test.go b/internal/app/azldev/core/sources/sourceprep_test.go index 45a9d286..8238b390 100644 --- a/internal/app/azldev/core/sources/sourceprep_test.go +++ b/internal/app/azldev/core/sources/sourceprep_test.go @@ -71,17 +71,20 @@ func TestPrepareSources_Success(t *testing.T) { component.EXPECT().GetName().AnyTimes().Return("test-component") component.EXPECT().GetConfig().AnyTimes().Return(&projectconfig.ComponentConfig{}) - sourceManager.EXPECT().FetchFiles(gomock.Any(), component, testOutputDir).Return(nil) + sourceManager.EXPECT().FetchFiles(gomock.Any(), component, testOutputDir).Return(nil, nil) sourceManager.EXPECT().FetchComponent(gomock.Any(), component, testOutputDir, gomock.Any()).DoAndReturn( - func(_ interface{}, _ interface{}, outputDir string, _ ...sourceproviders.FetchComponentOption) error { + func( + _ interface{}, _ interface{}, outputDir string, + _ ...sourceproviders.FetchComponentOption, + ) ([]sourceproviders.SourceProvenance, error) { // Create the expected spec file. - return fileutils.WriteFile(ctx.FS(), outputSpecPath, []byte("# test spec"), fileperms.PublicFile) + return nil, fileutils.WriteFile(ctx.FS(), outputSpecPath, []byte("# test spec"), fileperms.PublicFile) }, ) preparer, err := sources.NewPreparer(sourceManager, ctx.FS(), ctx, ctx) require.NoError(t, err) - err = preparer.PrepareSources(ctx, component, testOutputDir, true /*applyOverlays?*/) + _, err = preparer.PrepareSources(ctx, component, testOutputDir, true /*applyOverlays?*/) require.NoError(t, err) macrosFileName := "test-component" + sources.MacrosFileExtension @@ -99,6 +102,178 @@ func TestPrepareSources_Success(t *testing.T) { assert.NotContains(t, string(specContents), "Source9999") } +func TestPrepareSources_ProvenanceReport(t *testing.T) { + const ( + testSpecName = "test-component.spec" + outputSpecPath = testOutputDir + "/" + testSpecName + ) + + ctrl := gomock.NewController(t) + component := components_testutils.NewMockComponent(ctrl) + sourceManager := sourceproviders_test.NewMockSourceManager(ctrl) + ctx := testctx.NewCtx() + + component.EXPECT().GetName().AnyTimes().Return("test-component") + component.EXPECT().GetConfig().AnyTimes().Return(&projectconfig.ComponentConfig{}) + + fileProv := []sourceproviders.SourceProvenance{ + { + Filename: "extra.tar.gz", + OriginType: sourceproviders.SourceOriginURL, + URL: "https://example.com/extra.tar.gz", + }, + } + sourceManager.EXPECT(). + FetchFiles(gomock.Any(), component, testOutputDir). + Return(fileProv, nil) + + compProv := []sourceproviders.SourceProvenance{ + { + Filename: "src.tar.gz", + OriginType: sourceproviders.SourceOriginLookaside, + URL: "https://lookaside.example.com/pkg/src.tar.gz/sha512/abc/src.tar.gz", + }, + } + sourceManager.EXPECT(). + FetchComponent(gomock.Any(), component, testOutputDir, gomock.Any()). + DoAndReturn(func( + _ interface{}, _ interface{}, _ string, + _ ...sourceproviders.FetchComponentOption, + ) ([]sourceproviders.SourceProvenance, error) { + err := fileutils.WriteFile( + ctx.FS(), outputSpecPath, + []byte("# test spec"), fileperms.PublicFile) + + return compProv, err + }) + + preparer, err := sources.NewPreparer(sourceManager, ctx.FS(), ctx, ctx) + require.NoError(t, err) + + report, err := preparer.PrepareSources( + ctx, component, testOutputDir, true) + require.NoError(t, err) + require.NotNil(t, report) + + assert.Equal(t, "test-component", report.ComponentName) + require.Len(t, report.Sources, 2) + + assert.Equal(t, "extra.tar.gz", report.Sources[0].Filename) + assert.Equal(t, sourceproviders.SourceOriginURL, report.Sources[0].OriginType) + assert.Equal(t, "https://example.com/extra.tar.gz", report.Sources[0].URL) + + assert.Equal(t, "src.tar.gz", report.Sources[1].Filename) + assert.Equal(t, sourceproviders.SourceOriginLookaside, report.Sources[1].OriginType) +} + +func TestPrepareSources_ProvenanceReportFillsResolvedHashes(t *testing.T) { + const ( + testSpecPath = testOutputDir + "/test-component.spec" + testFilePath = testOutputDir + "/test-file.tar.gz" + // Pre-computed SHA-512 hash of "hello world". + testFileSHA512 = "309ecc489c12d6eb4cc40f50c902f2b4d0ed77ee511a7c7a9bcd3ca86d4cd86f" + + "989dd35bc5ff499670da34255b45b0cfd830e81f605dcf7dc5542e93ae9cd76f" + ) + + ctrl := gomock.NewController(t) + component := components_testutils.NewMockComponent(ctrl) + sourceManager := sourceproviders_test.NewMockSourceManager(ctrl) + ctx := testctx.NewCtx() + + component.EXPECT().GetName().AnyTimes().Return("test-component") + component.EXPECT().GetConfig().AnyTimes().Return(&projectconfig.ComponentConfig{ + SourceFiles: []projectconfig.SourceFileReference{{ + Filename: "test-file.tar.gz", + Origin: projectconfig.Origin{ + Type: projectconfig.OriginTypeURI, + Uri: "https://example.com/test-file.tar.gz", + }, + }}, + }) + + sourceManager.EXPECT(). + FetchFiles(gomock.Any(), component, testOutputDir). + Return([]sourceproviders.SourceProvenance{{ + Filename: "test-file.tar.gz", + OriginType: sourceproviders.SourceOriginURL, + URL: "https://example.com/test-file.tar.gz", + }}, nil) + + sourceManager.EXPECT(). + FetchComponent(gomock.Any(), component, testOutputDir, gomock.Any()). + DoAndReturn(func( + _ interface{}, _ interface{}, outputDir string, + _ ...sourceproviders.FetchComponentOption, + ) ([]sourceproviders.SourceProvenance, error) { + if err := fileutils.WriteFile(ctx.FS(), testFilePath, []byte("hello world"), fileperms.PublicFile); err != nil { + return nil, err + } + + return nil, fileutils.WriteFile(ctx.FS(), testSpecPath, []byte("# test spec"), fileperms.PublicFile) + }) + + preparer, err := sources.NewPreparer(sourceManager, ctx.FS(), ctx, ctx, sources.WithAllowNoHashes()) + require.NoError(t, err) + + report, err := preparer.PrepareSources(ctx, component, testOutputDir, true) + require.NoError(t, err) + require.NotNil(t, report) + require.Len(t, report.Sources, 1) + assert.Equal(t, fileutils.HashTypeSHA512, report.Sources[0].HashType) + assert.Equal(t, testFileSHA512, report.Sources[0].Hash) +} + +func TestPrepareSources_SkipOverlays_DoesNotEnrichHashes(t *testing.T) { + const ( + testSpecPath = testOutputDir + "/test-component.spec" + testSourcePath = testOutputDir + "/sources" + ) + + ctrl := gomock.NewController(t) + component := components_testutils.NewMockComponent(ctrl) + sourceManager := sourceproviders_test.NewMockSourceManager(ctrl) + ctx := testctx.NewCtx() + + component.EXPECT().GetName().AnyTimes().Return("test-component") + component.EXPECT().GetConfig().AnyTimes().Return(&projectconfig.ComponentConfig{}) + + // FetchFiles returns provenance with empty hash (simulating --allow-no-hashes). + sourceManager.EXPECT(). + FetchFiles(gomock.Any(), component, testOutputDir). + Return([]sourceproviders.SourceProvenance{{ + Filename: "src.tar.gz", + OriginType: sourceproviders.SourceOriginURL, + URL: "https://example.com/src.tar.gz", + }}, nil) + + // FetchComponent creates a sources file (as the upstream clone would) and a spec. + sourceManager.EXPECT(). + FetchComponent(gomock.Any(), component, testOutputDir, gomock.Any()). + DoAndReturn(func( + _ interface{}, _ interface{}, outputDir string, + _ ...sourceproviders.FetchComponentOption, + ) ([]sourceproviders.SourceProvenance, error) { + sourcesContent := "SHA512 (src.tar.gz) = abc123def456" + if err := fileutils.WriteFile(ctx.FS(), testSourcePath, []byte(sourcesContent), fileperms.PublicFile); err != nil { + return nil, err + } + + return nil, fileutils.WriteFile(ctx.FS(), testSpecPath, []byte("# test spec"), fileperms.PublicFile) + }) + + preparer, err := sources.NewPreparer(sourceManager, ctx.FS(), ctx, ctx, sources.WithAllowNoHashes()) + require.NoError(t, err) + + // applyOverlays = false — hash enrichment must NOT run to avoid corrupting provenance. + report, err := preparer.PrepareSources(ctx, component, testOutputDir, false) + require.NoError(t, err) + require.NotNil(t, report) + require.Len(t, report.Sources, 1) + + assert.Empty(t, report.Sources[0].HashType, "hash should not be enriched without overlays") + assert.Empty(t, report.Sources[0].Hash, "hash should not be enriched without overlays") +} + func TestPrepareSources_SourceManagerError(t *testing.T) { ctrl := gomock.NewController(t) component := components_testutils.NewMockComponent(ctrl) @@ -108,12 +283,12 @@ func TestPrepareSources_SourceManagerError(t *testing.T) { expectedErr := errors.New("failed to fetch files") component.EXPECT().GetName().AnyTimes().Return("test-component") - sourceManager.EXPECT().FetchFiles(gomock.Any(), component, testOutputDir).Return(expectedErr) + sourceManager.EXPECT().FetchFiles(gomock.Any(), component, testOutputDir).Return(nil, expectedErr) preparer, err := sources.NewPreparer(sourceManager, ctx.FS(), ctx, ctx) require.NoError(t, err) - err = preparer.PrepareSources(ctx, component, testOutputDir, true /*applyOverlays?*/) + _, err = preparer.PrepareSources(ctx, component, testOutputDir, true /*applyOverlays?*/) require.Error(t, err) require.ErrorIs(t, err, expectedErr) } @@ -137,7 +312,10 @@ func TestPrepareSources_WithSkipLookaside_SkipsFetchFiles(t *testing.T) { // FetchComponent should still be called, with at least the SkipLookaside option. sourceManager.EXPECT().FetchComponent(gomock.Any(), component, testOutputDir, gomock.Any()).DoAndReturn( - func(_ interface{}, _ interface{}, outputDir string, opts ...sourceproviders.FetchComponentOption) error { + func( + _ interface{}, _ interface{}, outputDir string, + opts ...sourceproviders.FetchComponentOption, + ) ([]sourceproviders.SourceProvenance, error) { // Verify SkipLookaside is actually set by applying the received options. var resolved sourceproviders.FetchComponentOptions for _, opt := range opts { @@ -146,14 +324,14 @@ func TestPrepareSources_WithSkipLookaside_SkipsFetchFiles(t *testing.T) { assert.True(t, resolved.SkipLookaside, "FetchComponent should receive SkipLookaside option") - return fileutils.WriteFile(ctx.FS(), outputSpecPath, []byte("# test spec"), fileperms.PublicFile) + return nil, fileutils.WriteFile(ctx.FS(), outputSpecPath, []byte("# test spec"), fileperms.PublicFile) }, ) preparer, err := sources.NewPreparer(sourceManager, ctx.FS(), ctx, ctx, sources.WithSkipLookaside()) require.NoError(t, err) - err = preparer.PrepareSources(ctx, component, testOutputDir, true /*applyOverlays?*/) + _, err = preparer.PrepareSources(ctx, component, testOutputDir, true /*applyOverlays?*/) require.NoError(t, err) } @@ -172,17 +350,20 @@ func TestPrepareSources_WithoutSkipLookaside_CallsFetchFiles(t *testing.T) { component.EXPECT().GetConfig().AnyTimes().Return(&projectconfig.ComponentConfig{}) // Without WithSkipLookaside, FetchFiles MUST be called. - sourceManager.EXPECT().FetchFiles(gomock.Any(), component, testOutputDir).Return(nil) + sourceManager.EXPECT().FetchFiles(gomock.Any(), component, testOutputDir).Return(nil, nil) sourceManager.EXPECT().FetchComponent(gomock.Any(), component, testOutputDir, gomock.Any()).DoAndReturn( - func(_ interface{}, _ interface{}, outputDir string, _ ...sourceproviders.FetchComponentOption) error { - return fileutils.WriteFile(ctx.FS(), outputSpecPath, []byte("# test spec"), fileperms.PublicFile) + func( + _ interface{}, _ interface{}, outputDir string, + _ ...sourceproviders.FetchComponentOption, + ) ([]sourceproviders.SourceProvenance, error) { + return nil, fileutils.WriteFile(ctx.FS(), outputSpecPath, []byte("# test spec"), fileperms.PublicFile) }, ) preparer, err := sources.NewPreparer(sourceManager, ctx.FS(), ctx, ctx) require.NoError(t, err) - err = preparer.PrepareSources(ctx, component, testOutputDir, true /*applyOverlays?*/) + _, err = preparer.PrepareSources(ctx, component, testOutputDir, true /*applyOverlays?*/) require.NoError(t, err) } @@ -198,19 +379,22 @@ func TestPrepareSources_WritesMacrosFile(t *testing.T) { With: []string{"feature"}, }, }) - sourceManager.EXPECT().FetchFiles(gomock.Any(), component, testOutputDir).Return(nil) + sourceManager.EXPECT().FetchFiles(gomock.Any(), component, testOutputDir).Return(nil, nil) sourceManager.EXPECT().FetchComponent(gomock.Any(), component, testOutputDir, gomock.Any()).DoAndReturn( - func(_ interface{}, _ interface{}, outputDir string, _ ...sourceproviders.FetchComponentOption) error { + func( + _ interface{}, _ interface{}, outputDir string, + _ ...sourceproviders.FetchComponentOption, + ) ([]sourceproviders.SourceProvenance, error) { // Create the expected spec file. specPath := filepath.Join(outputDir, "my-package.spec") - return fileutils.WriteFile(ctx.FS(), specPath, []byte("# test spec"), fileperms.PublicFile) + return nil, fileutils.WriteFile(ctx.FS(), specPath, []byte("# test spec"), fileperms.PublicFile) }, ) preparer, err := sources.NewPreparer(sourceManager, ctx.FS(), ctx, ctx) require.NoError(t, err) - err = preparer.PrepareSources(ctx, component, testOutputDir, true /*applyOverlays?*/) + _, err = preparer.PrepareSources(ctx, component, testOutputDir, true /*applyOverlays?*/) require.NoError(t, err) // Verify file exists with expected name. @@ -448,9 +632,12 @@ func TestPrepareSources_CheckSkip(t *testing.T) { }, }, }) - sourceManager.EXPECT().FetchFiles(gomock.Any(), component, testOutputDir).Return(nil) + sourceManager.EXPECT().FetchFiles(gomock.Any(), component, testOutputDir).Return(nil, nil) sourceManager.EXPECT().FetchComponent(gomock.Any(), component, testOutputDir, gomock.Any()).DoAndReturn( - func(_ interface{}, _ interface{}, outputDir string, _ ...sourceproviders.FetchComponentOption) error { + func( + _ interface{}, _ interface{}, outputDir string, + _ ...sourceproviders.FetchComponentOption, + ) ([]sourceproviders.SourceProvenance, error) { // Create the expected spec file with a %check section. specContent := `Name: test-component Version: 1.0 @@ -461,13 +648,13 @@ Summary: Test component make test ` - return fileutils.WriteFile(ctx.FS(), outputSpecPath, []byte(specContent), fileperms.PublicFile) + return nil, fileutils.WriteFile(ctx.FS(), outputSpecPath, []byte(specContent), fileperms.PublicFile) }, ) preparer, err := sources.NewPreparer(sourceManager, ctx.FS(), ctx, ctx) require.NoError(t, err) - err = preparer.PrepareSources(ctx, component, testOutputDir, true /*applyOverlays?*/) + _, err = preparer.PrepareSources(ctx, component, testOutputDir, true /*applyOverlays?*/) require.NoError(t, err) // Verify spec has check skip prepended. @@ -505,9 +692,12 @@ func TestPrepareSources_CheckSkipDisabled(t *testing.T) { }, }, }) - sourceManager.EXPECT().FetchFiles(gomock.Any(), component, testOutputDir).Return(nil) + sourceManager.EXPECT().FetchFiles(gomock.Any(), component, testOutputDir).Return(nil, nil) sourceManager.EXPECT().FetchComponent(gomock.Any(), component, testOutputDir, gomock.Any()).DoAndReturn( - func(_ interface{}, _ interface{}, outputDir string, _ ...sourceproviders.FetchComponentOption) error { + func( + _ interface{}, _ interface{}, outputDir string, + _ ...sourceproviders.FetchComponentOption, + ) ([]sourceproviders.SourceProvenance, error) { // Create the expected spec file with a %check section. specContent := `Name: test-component Version: 1.0 @@ -518,13 +708,13 @@ Summary: Test component make test ` - return fileutils.WriteFile(ctx.FS(), outputSpecPath, []byte(specContent), fileperms.PublicFile) + return nil, fileutils.WriteFile(ctx.FS(), outputSpecPath, []byte(specContent), fileperms.PublicFile) }, ) preparer, err := sources.NewPreparer(sourceManager, ctx.FS(), ctx, ctx) require.NoError(t, err) - err = preparer.PrepareSources(ctx, component, testOutputDir, true /*applyOverlays?*/) + _, err = preparer.PrepareSources(ctx, component, testOutputDir, true /*applyOverlays?*/) require.NoError(t, err) // Verify spec does NOT have check skip prepended. @@ -551,12 +741,17 @@ func TestDiffSources_NoOverlays(t *testing.T) { component.EXPECT().GetConfig().AnyTimes().Return(&projectconfig.ComponentConfig{}) // DiffSources fetches sources once, then copies them for overlay application. - sourceManager.EXPECT().FetchFiles(gomock.Any(), component, gomock.Any()).Times(1).Return(nil) + sourceManager.EXPECT().FetchFiles(gomock.Any(), component, gomock.Any()).Times(1).Return(nil, nil) sourceManager.EXPECT().FetchComponent(gomock.Any(), component, gomock.Any()).Times(1).DoAndReturn( - func(_ interface{}, _ interface{}, outputDir string, _ ...sourceproviders.FetchComponentOption) error { + func( + _ interface{}, _ interface{}, outputDir string, + _ ...sourceproviders.FetchComponentOption, + ) ([]sourceproviders.SourceProvenance, error) { specPath := filepath.Join(outputDir, "test-component.spec") + specContent := "Name: test-component\nVersion: 1.0\n" - return fileutils.WriteFile(ctx.FS(), specPath, []byte("Name: test-component\nVersion: 1.0\n"), fileperms.PublicFile) + return nil, fileutils.WriteFile( + ctx.FS(), specPath, []byte(specContent), fileperms.PublicFile) }, ) @@ -594,12 +789,17 @@ func TestDiffSources_WithOverlays(t *testing.T) { }) // DiffSources fetches sources once, then copies them for overlay application. - sourceManager.EXPECT().FetchFiles(gomock.Any(), component, gomock.Any()).Times(1).Return(nil) + sourceManager.EXPECT().FetchFiles(gomock.Any(), component, gomock.Any()).Times(1).Return(nil, nil) sourceManager.EXPECT().FetchComponent(gomock.Any(), component, gomock.Any()).Times(1).DoAndReturn( - func(_ interface{}, _ interface{}, outputDir string, _ ...sourceproviders.FetchComponentOption) error { + func( + _ interface{}, _ interface{}, outputDir string, + _ ...sourceproviders.FetchComponentOption, + ) ([]sourceproviders.SourceProvenance, error) { specPath := filepath.Join(outputDir, "test-component.spec") + specContent := "Name: test-component\nVersion: 1.0\n" - return fileutils.WriteFile(ctx.FS(), specPath, []byte("Name: test-component\nVersion: 1.0\n"), fileperms.PublicFile) + return nil, fileutils.WriteFile( + ctx.FS(), specPath, []byte(specContent), fileperms.PublicFile) }, ) @@ -635,7 +835,7 @@ func TestDiffSources_FetchError(t *testing.T) { component.EXPECT().GetName().AnyTimes().Return("test-component") expectedErr := errors.New("network failure") - sourceManager.EXPECT().FetchFiles(gomock.Any(), component, gomock.Any()).Return(expectedErr) + sourceManager.EXPECT().FetchFiles(gomock.Any(), component, gomock.Any()).Return(nil, expectedErr) preparer, err := sources.NewPreparer(sourceManager, ctx.FS(), ctx, ctx) require.NoError(t, err) @@ -935,26 +1135,29 @@ func TestPrepareSources_UpdatesSourcesFile(t *testing.T) { component.EXPECT().GetConfig().AnyTimes().Return(&projectconfig.ComponentConfig{ SourceFiles: testCase.sourceFiles, }) - sourceManager.EXPECT().FetchFiles(gomock.Any(), component, testOutputDir).Return(nil) + sourceManager.EXPECT().FetchFiles(gomock.Any(), component, testOutputDir).Return(nil, nil) sourceManager.EXPECT().FetchComponent(gomock.Any(), component, testOutputDir, gomock.Any()).DoAndReturn( - func(_ interface{}, _ interface{}, outputDir string, _ ...sourceproviders.FetchComponentOption) error { + func( + _ interface{}, _ interface{}, outputDir string, + _ ...sourceproviders.FetchComponentOption, + ) ([]sourceproviders.SourceProvenance, error) { // Create existing 'sources' file if specified. if testCase.existingSourcesContent != "" { err := fileutils.WriteFile(ctx.FS(), filepath.Join(outputDir, fedorasource.SourcesFileName), []byte(testCase.existingSourcesContent), fileperms.PublicFile) if err != nil { - return err + return nil, err } } - return fileutils.WriteFile(ctx.FS(), outputSpecPath, []byte("# test spec"), fileperms.PublicFile) + return nil, fileutils.WriteFile(ctx.FS(), outputSpecPath, []byte("# test spec"), fileperms.PublicFile) }, ) preparer, err := sources.NewPreparer(sourceManager, ctx.FS(), ctx, ctx) require.NoError(t, err) - err = preparer.PrepareSources(ctx, component, testOutputDir, true /*applyOverlays?*/) + _, err = preparer.PrepareSources(ctx, component, testOutputDir, true /*applyOverlays?*/) if testCase.expectError { require.Error(t, err) @@ -1137,15 +1340,18 @@ func TestPrepareSources_AllowNoHashes(t *testing.T) { }) if !testCase.skipLookaside { - sourceManager.EXPECT().FetchFiles(gomock.Any(), comp, testOutputDir).Return(nil) + sourceManager.EXPECT().FetchFiles(gomock.Any(), comp, testOutputDir).Return(nil, nil) } sourceManager.EXPECT().FetchComponent(gomock.Any(), comp, testOutputDir, gomock.Any()).DoAndReturn( - func(_ interface{}, _ interface{}, outputDir string, _ ...sourceproviders.FetchComponentOption) error { + func( + _ interface{}, _ interface{}, outputDir string, + _ ...sourceproviders.FetchComponentOption, + ) ([]sourceproviders.SourceProvenance, error) { if testCase.existingSourcesContent != "" { if err := fileutils.WriteFile(ctx.FS(), filepath.Join(outputDir, fedorasource.SourcesFileName), []byte(testCase.existingSourcesContent), fileperms.PublicFile); err != nil { - return err + return nil, err } } @@ -1155,19 +1361,19 @@ func TestPrepareSources_AllowNoHashes(t *testing.T) { filePath := filepath.Join(outputDir, sf.Filename) if err := fileutils.WriteFile(ctx.FS(), filePath, []byte(testFileContent), fileperms.PublicFile); err != nil { - return err + return nil, err } } } - return fileutils.WriteFile(ctx.FS(), outputSpecPath, []byte("# test spec"), fileperms.PublicFile) + return nil, fileutils.WriteFile(ctx.FS(), outputSpecPath, []byte("# test spec"), fileperms.PublicFile) }, ) preparer, err := sources.NewPreparer(sourceManager, ctx.FS(), ctx, ctx, testCase.preparerOpts...) require.NoError(t, err) - err = preparer.PrepareSources(ctx, comp, testOutputDir, true /*applyOverlays?*/) + _, err = preparer.PrepareSources(ctx, comp, testOutputDir, true /*applyOverlays?*/) if testCase.expectError { require.Error(t, err) diff --git a/internal/providers/sourceproviders/fedorasource/fedorasource.go b/internal/providers/sourceproviders/fedorasource/fedorasource.go index 719a3759..b7e5776a 100644 --- a/internal/providers/sourceproviders/fedorasource/fedorasource.go +++ b/internal/providers/sourceproviders/fedorasource/fedorasource.go @@ -29,11 +29,25 @@ type FedoraSourceDownloader interface { // lookaside cache files into the repository directory. Files whose names appear // in skipFilenames are not downloaded (e.g., files already fetched separately). // Optional [ExtractOption] values can override default behavior. + // Returns [SourceDownload] entries describing each file that was actually downloaded. ExtractSourcesFromRepo( ctx context.Context, repoDir string, packageName string, lookasideBaseURI string, skipFilenames []string, opts ...ExtractOption, - ) error + ) ([]SourceDownload, error) +} + +// SourceDownload records the provenance of a single file downloaded +// from a lookaside cache during [FedoraSourceDownloader.ExtractSourcesFromRepo]. +type SourceDownload struct { + // Filename is the name of the downloaded file. + Filename string + // URL is the lookaside cache URL from which the file was downloaded. + URL string + // HashType is the hash algorithm used for validation (e.g., "sha512"). + HashType fileutils.HashType + // Hash is the hex-encoded expected hash value. + Hash string } // extractOptions holds optional configuration for [ExtractSourcesFromRepo]. @@ -154,16 +168,17 @@ func NewFedoraRepoExtractorImpl( // ExtractSourcesFromRepo processes the git repository by downloading any required // lookaside cache files into the repository directory. +// Returns [SourceDownload] entries for each file that was actually downloaded. func (g *FedoraSourceDownloaderImpl) ExtractSourcesFromRepo( ctx context.Context, repoDir string, packageName string, lookasideBaseURI string, skipFileNames []string, opts ...ExtractOption, -) error { +) ([]SourceDownload, error) { if repoDir == "" { - return errors.New("repository directory cannot be empty") + return nil, errors.New("repository directory cannot be empty") } if lookasideBaseURI == "" { - return errors.New("lookaside base URI cannot be empty") + return nil, errors.New("lookaside base URI cannot be empty") } // Apply functional options. @@ -179,35 +194,35 @@ func (g *FedoraSourceDownloaderImpl) ExtractSourcesFromRepo( repoDirExists, err := fileutils.Exists(g.fileSystem, repoDir) if err != nil { - return fmt.Errorf("failed to check if repository directory exists at %#q:\n%w", repoDir, err) + return nil, fmt.Errorf("failed to check if repository directory exists at %#q:\n%w", repoDir, err) } if !repoDirExists { - return fmt.Errorf("repository directory does not exist at %#q, cloning failed", repoDir) + return nil, fmt.Errorf("repository directory does not exist at %#q, cloning failed", repoDir) } sourcesFilePath := filepath.Join(repoDir, SourcesFileName) sourcesExists, err := fileutils.Exists(g.fileSystem, sourcesFilePath) if err != nil { - return fmt.Errorf("failed to check if 'sources' file exists at %#q:\n%w", sourcesFilePath, err) + return nil, fmt.Errorf("failed to check if 'sources' file exists at %#q:\n%w", sourcesFilePath, err) } // If the 'sources' file does not exist, there are no external sources to download. if !sourcesExists { slog.Info("No 'sources' file found, nothing to download", "dir", repoDir) - return nil + return nil, nil } sourcesContent, err := fileutils.ReadFile(g.fileSystem, sourcesFilePath) if err != nil { - return fmt.Errorf("failed to read 'sources' file at %#q:\n%w", sourcesFilePath, err) + return nil, fmt.Errorf("failed to read 'sources' file at %#q:\n%w", sourcesFilePath, err) } sourceFiles, err := parseSourcesFile(string(sourcesContent), packageName, lookasideBaseURI) if err != nil { - return fmt.Errorf("failed to parse 'sources' file at %#q:\n%w", sourcesFilePath, err) + return nil, fmt.Errorf("failed to parse 'sources' file at %#q:\n%w", sourcesFilePath, err) } skipSet := make(map[string]bool, len(skipFileNames)) @@ -221,16 +236,16 @@ func (g *FedoraSourceDownloaderImpl) ExtractSourcesFromRepo( destDir = options.outputDir if err := fileutils.MkdirAll(g.fileSystem, destDir); err != nil { - return fmt.Errorf("failed to create output directory %#q:\n%w", destDir, err) + return nil, fmt.Errorf("failed to create output directory %#q:\n%w", destDir, err) } } - err = g.downloadAndVerifySources(ctx, sourceFiles, destDir, skipSet) + downloads, err := g.downloadAndVerifySources(ctx, sourceFiles, destDir, skipSet) if err != nil { - return fmt.Errorf("failed to download sources:\n%w", err) + return nil, fmt.Errorf("failed to download sources:\n%w", err) } - return nil + return downloads, nil } func (g *FedoraSourceDownloaderImpl) downloadAndVerifySources( @@ -238,7 +253,9 @@ func (g *FedoraSourceDownloaderImpl) downloadAndVerifySources( sourceFiles []sourceFileInfo, repoDir string, skipSet map[string]bool, -) error { +) ([]SourceDownload, error) { + downloads := make([]SourceDownload, 0, len(sourceFiles)) + sourcesTotal := len(sourceFiles) for sourceIndex, sourceFile := range sourceFiles { if skipSet[sourceFile.fileName] { @@ -252,7 +269,7 @@ func (g *FedoraSourceDownloaderImpl) downloadAndVerifySources( exists, err := fileutils.Exists(g.fileSystem, destFilePath) if err != nil { - return fmt.Errorf("failed to check if file exists at %#q:\n%w", destFilePath, err) + return nil, fmt.Errorf("failed to check if file exists at %#q:\n%w", destFilePath, err) } if exists { @@ -287,11 +304,23 @@ func (g *FedoraSourceDownloaderImpl) downloadAndVerifySources( // callers (e.g. retrying with a different URI) don't see it as valid. _ = g.fileSystem.Remove(destFilePath) - return fmt.Errorf("failed to retrieve source file %#q:\n%w", sourceFile.fileName, err) + return nil, fmt.Errorf("failed to retrieve source file %#q:\n%w", sourceFile.fileName, err) } + + downloads = append(downloads, SourceDownload{ + Filename: sourceFile.fileName, + URL: sourceFile.uri, + HashType: sourceFile.hashType, + Hash: sourceFile.expectedHash, + }) } - return nil + // In dry-run mode the downloader no-ops, so no files were actually fetched. + if g.dryRunnable.DryRun() { + return nil, nil + } + + return downloads, nil } func (g *FedoraSourceDownloaderImpl) validateDownloadedFile( diff --git a/internal/providers/sourceproviders/fedorasource/fedorasource_test.go b/internal/providers/sourceproviders/fedorasource/fedorasource_test.go index d76aa0ea..4eb54857 100644 --- a/internal/providers/sourceproviders/fedorasource/fedorasource_test.go +++ b/internal/providers/sourceproviders/fedorasource/fedorasource_test.go @@ -38,7 +38,10 @@ SHA256 (patch-1.patch) = 67899aaa0f2f55e55e715cb65596449cb29bb0a76a764fe8f1e51bf ` // Expected URLs (must match the hashes in testSourcesContent). - testExpectedURL1 = "https://example.com/sha512/af5ae0eb4ad9c3f07b7d78ec9dfd03f6a00c257df6b829b21051d4ba2d106bf9d2f" + + testExpectedHash1 = "af5ae0eb4ad9c3f07b7d78ec9dfd03f6a00c257df6b829b21051d4ba2d106bf" + + "9d2f7563c0373b45e0ce5b1ad8a3bad9c05a2769547e67f4bc53692950db0ba37" + testExpectedHash2 = "67899aaa0f2f55e55e715cb65596449cb29bb0a76a764fe8f1e51bf4d0af648f" + testExpectedURL1 = "https://example.com/sha512/af5ae0eb4ad9c3f07b7d78ec9dfd03f6a00c257df6b829b21051d4ba2d106bf9d2f" + "7563c0373b45e0ce5b1ad8a3bad9c05a2769547e67f4bc53692950db0ba37/test-package/example-1.0.tar.gz" testExpectedURL2 = "https://example.com/sha256/67899aaa0f2f55e55e715cb65596449cb29bb0a76a764fe8f1e51bf4d0af648f/" + "test-package/patch-1.patch" @@ -91,7 +94,7 @@ func TestExtractSourcesFromRepo(t *testing.T) { require.NoError(t, err) require.NoError(t, ctx.FS().MkdirAll(testRepoDir, testDirPerms)) - setupSourcesFile(t, ctx.FS(), testRepoDir, testSourcesContent) + setupSourcesFile(t, ctx.FS(), testSourcesContent) // Mock downloader should create files with content matching the expected hashes mockDownloader.EXPECT().Download( @@ -112,8 +115,28 @@ func TestExtractSourcesFromRepo(t *testing.T) { return afero.WriteFile(ctx.FS(), destPath, []byte("test patch content"), testFilePerms) }) - err = extractor.ExtractSourcesFromRepo(context.Background(), testRepoDir, testPackageName, testLookasideURI, nil) + downloads, err := extractor.ExtractSourcesFromRepo( + context.Background(), + testRepoDir, + testPackageName, + testLookasideURI, + nil, + ) require.NoError(t, err) + require.Len(t, downloads, 2) + + assert.Equal(t, SourceDownload{ + Filename: testTarballFile, + URL: testExpectedURL1, + HashType: fileutils.HashTypeSHA512, + Hash: testExpectedHash1, + }, downloads[0]) + assert.Equal(t, SourceDownload{ + Filename: testPatchFile, + URL: testExpectedURL2, + HashType: fileutils.HashTypeSHA256, + Hash: testExpectedHash2, + }, downloads[1]) } func TestExtractSourcesFromRepoValidation(t *testing.T) { @@ -125,13 +148,13 @@ func TestExtractSourcesFromRepoValidation(t *testing.T) { require.NoError(t, err) t.Run("empty repo dir", func(t *testing.T) { - err := extractor.ExtractSourcesFromRepo(context.Background(), "", testPackageName, testLookasideURI, nil) + _, err := extractor.ExtractSourcesFromRepo(context.Background(), "", testPackageName, testLookasideURI, nil) require.Error(t, err) assert.Contains(t, err.Error(), "repository directory cannot be empty") }) t.Run("empty lookaside URI", func(t *testing.T) { - err := extractor.ExtractSourcesFromRepo(context.Background(), testRepoDir, testPackageName, "", nil) + _, err := extractor.ExtractSourcesFromRepo(context.Background(), testRepoDir, testPackageName, "", nil) require.Error(t, err) assert.Contains(t, err.Error(), "lookaside base URI cannot be empty") }) @@ -140,7 +163,7 @@ func TestExtractSourcesFromRepoValidation(t *testing.T) { require.NoError(t, ctx.FS().MkdirAll(testEmptyRepoDir, fileperms.PublicDir)) // Missing 'sources' file is valid - it means no external sources to download - err := extractor.ExtractSourcesFromRepo( + _, err := extractor.ExtractSourcesFromRepo( context.Background(), testEmptyRepoDir, testPackageName, testLookasideURI, nil, ) require.NoError(t, err) @@ -156,13 +179,13 @@ func TestExtractSourcesFromRepoDownloadFailure(t *testing.T) { require.NoError(t, err) require.NoError(t, ctx.FS().MkdirAll(testRepoDir, testDirPerms)) - setupSourcesFile(t, ctx.FS(), testRepoDir, testSingleSourceContent) + setupSourcesFile(t, ctx.FS(), testSingleSourceContent) downloadErr := assert.AnError mockDownloader.EXPECT().Download(gomock.Any(), gomock.Any(), gomock.Any()). Return(downloadErr) - err = extractor.ExtractSourcesFromRepo(context.Background(), testRepoDir, testPackageName, testLookasideURI, nil) + _, err = extractor.ExtractSourcesFromRepo(context.Background(), testRepoDir, testPackageName, testLookasideURI, nil) require.Error(t, err) require.ErrorIs(t, err, downloadErr) assert.Contains(t, err.Error(), "failed to download sources") @@ -177,7 +200,7 @@ func TestExtractSourcesFromRepoHashMismatch(t *testing.T) { require.NoError(t, err) require.NoError(t, ctx.FS().MkdirAll(testRepoDir, testDirPerms)) - setupSourcesFile(t, ctx.FS(), testRepoDir, testSingleSourceContent) + setupSourcesFile(t, ctx.FS(), testSingleSourceContent) // Mock downloader creates a file with WRONG content (hash mismatch) mockDownloader.EXPECT().Download(gomock.Any(), gomock.Any(), gomock.Any()). @@ -186,16 +209,16 @@ func TestExtractSourcesFromRepoHashMismatch(t *testing.T) { return afero.WriteFile(ctx.FS(), destPath, []byte("wrong content"), testFilePerms) }) - err = extractor.ExtractSourcesFromRepo(context.Background(), testRepoDir, testPackageName, testLookasideURI, nil) + _, err = extractor.ExtractSourcesFromRepo(context.Background(), testRepoDir, testPackageName, testLookasideURI, nil) require.Error(t, err) assert.Contains(t, err.Error(), "hash mismatch") } // setupSourcesFile creates a 'sources' file in the specified directory with the given content. -func setupSourcesFile(t *testing.T, fs afero.Fs, repoDir string, content string) { +func setupSourcesFile(t *testing.T, fs afero.Fs, content string) { t.Helper() - sourcesPath := filepath.Join(repoDir, testSourcesFile) + sourcesPath := filepath.Join(testRepoDir, testSourcesFile) require.NoError(t, afero.WriteFile(fs, sourcesPath, []byte(content), testFilePerms)) } diff --git a/internal/providers/sourceproviders/fedorasource/fedorasource_test/fedorasource_mocks.go b/internal/providers/sourceproviders/fedorasource/fedorasource_test/fedorasource_mocks.go index 0f3f07ec..d0a079ea 100644 --- a/internal/providers/sourceproviders/fedorasource/fedorasource_test/fedorasource_mocks.go +++ b/internal/providers/sourceproviders/fedorasource/fedorasource_test/fedorasource_mocks.go @@ -46,15 +46,16 @@ func (m *MockFedoraSourceDownloader) EXPECT() *MockFedoraSourceDownloaderMockRec } // ExtractSourcesFromRepo mocks base method. -func (m *MockFedoraSourceDownloader) ExtractSourcesFromRepo(ctx context.Context, repoDir, packageName, lookasideBaseURI string, skipFilenames []string, opts ...fedorasource.ExtractOption) error { +func (m *MockFedoraSourceDownloader) ExtractSourcesFromRepo(ctx context.Context, repoDir, packageName, lookasideBaseURI string, skipFilenames []string, opts ...fedorasource.ExtractOption) ([]fedorasource.SourceDownload, error) { m.ctrl.T.Helper() varargs := []any{ctx, repoDir, packageName, lookasideBaseURI, skipFilenames} for _, a := range opts { varargs = append(varargs, a) } ret := m.ctrl.Call(m, "ExtractSourcesFromRepo", varargs...) - ret0, _ := ret[0].(error) - return ret0 + ret0, _ := ret[0].([]fedorasource.SourceDownload) + ret1, _ := ret[1].(error) + return ret0, ret1 } // ExtractSourcesFromRepo indicates an expected call of ExtractSourcesFromRepo. diff --git a/internal/providers/sourceproviders/fedorasourceprovider.go b/internal/providers/sourceproviders/fedorasourceprovider.go index 344e90a9..fc59fc93 100644 --- a/internal/providers/sourceproviders/fedorasourceprovider.go +++ b/internal/providers/sourceproviders/fedorasourceprovider.go @@ -85,12 +85,12 @@ func NewFedoraSourcesProviderImpl( func (g *FedoraSourcesProviderImpl) GetComponent( ctx context.Context, component components.Component, destDirPath string, opts ...FetchComponentOption, -) (err error) { +) (_ []SourceProvenance, err error) { resolved := resolveFetchComponentOptions(opts) componentName := component.GetName() if componentName == "" { - return errors.New("component name cannot be empty") + return nil, errors.New("component name cannot be empty") } upstreamNameToUse := componentName @@ -102,12 +102,12 @@ func (g *FedoraSourcesProviderImpl) GetComponent( } if destDirPath == "" { - return errors.New("destination path cannot be empty") + return nil, errors.New("destination path cannot be empty") } gitRepoURL, err := fedorasource.BuildDistGitURL(g.distroGitBaseURI, upstreamNameToUse) if err != nil { - return fmt.Errorf("failed to build dist-git URL for %#q:\n%w", upstreamNameToUse, err) + return nil, fmt.Errorf("failed to build dist-git URL for %#q:\n%w", upstreamNameToUse, err) } // Get the calculated effective commit. @@ -123,7 +123,7 @@ func (g *FedoraSourcesProviderImpl) GetComponent( // Clone to a temp directory first, then copy files to destination. tempDir, err := fileutils.MkdirTempInTempDir(g.fs, "azldev-clone-") if err != nil { - return fmt.Errorf("failed to create temp directory for clone:\n%w", err) + return nil, fmt.Errorf("failed to create temp directory for clone:\n%w", err) } defer fileutils.RemoveAllAndUpdateErrorIfNil(g.fs, tempDir, &err) @@ -137,7 +137,7 @@ func (g *FedoraSourcesProviderImpl) GetComponent( return g.gitProvider.Clone(ctx, gitRepoURL, tempDir, git.WithGitBranch(g.distroGitBranch)) }) if err != nil { - return fmt.Errorf("failed to clone git repository %#q:\n%w", gitRepoURL, err) + return nil, fmt.Errorf("failed to clone git repository %#q:\n%w", gitRepoURL, err) } // Collect filenames from source-files config so the lookaside extractor can skip them. @@ -156,23 +156,24 @@ func (g *FedoraSourcesProviderImpl) GetComponent( // processClonedRepo handles the post-clone steps: checking out the target commit, // extracting lookaside sources, renaming spec files, and copying to the destination. +// Returns provenance entries for files downloaded from the lookaside cache. func (g *FedoraSourcesProviderImpl) processClonedRepo( ctx context.Context, upstreamCommit string, tempDir, upstreamName, componentName, destDirPath string, skipFilenames []string, opts FetchComponentOptions, -) error { +) ([]SourceProvenance, error) { // Checkout the appropriate commit based on component/distro config if err := g.checkoutTargetCommit(ctx, upstreamCommit, tempDir); err != nil { - return fmt.Errorf("failed to checkout target commit:\n%w", err) + return nil, fmt.Errorf("failed to checkout target commit:\n%w", err) } // Delete the .git directory so it's not copied to destination, unless the caller // requested that it be preserved (e.g., for synthetic history generation). if !opts.PreserveGitDir { if err := g.fs.RemoveAll(filepath.Join(tempDir, ".git")); err != nil { - return fmt.Errorf("failed to remove .git directory from cloned repository at %#q:\n%w", + return nil, fmt.Errorf("failed to remove .git directory from cloned repository at %#q:\n%w", tempDir, err) } } @@ -180,18 +181,22 @@ func (g *FedoraSourcesProviderImpl) processClonedRepo( // Extract sources from repo (downloads lookaside files into the temp dir). // Files in skipFilenames are not downloaded — they were already fetched by FetchFiles. // Skip this step entirely when SkipLookaside is set (e.g., during rendering). + var provenance []SourceProvenance + if !opts.SkipLookaside { - err := g.downloader.ExtractSourcesFromRepo( + downloads, err := g.downloader.ExtractSourcesFromRepo( ctx, tempDir, upstreamName, g.lookasideBaseURI, skipFilenames, ) if err != nil { - return fmt.Errorf("failed to extract sources from git repository:\n%w", err) + return nil, fmt.Errorf("failed to extract sources from git repository:\n%w", err) } + + provenance = ConvertDownloadsToProvenance(downloads) } // If the upstream name differs from the component name, rename the spec in temp dir. if err := g.renameSpecIfNeeded(tempDir, upstreamName, componentName); err != nil { - return err + return nil, err } // Copy files from temp dir to destination, skipping files that already exist. @@ -204,10 +209,10 @@ func (g *FedoraSourcesProviderImpl) processClonedRepo( } if err := fileutils.CopyDirRecursive(g.dryRunnable, g.fs, tempDir, destDirPath, copyOptions); err != nil { - return fmt.Errorf("failed to copy files to destination:\n%w", err) + return nil, fmt.Errorf("failed to copy files to destination:\n%w", err) } - return nil + return provenance, nil } // renameSpecIfNeeded renames the spec file in the given directory if the upstream name diff --git a/internal/providers/sourceproviders/fedorasourceprovider_test.go b/internal/providers/sourceproviders/fedorasourceprovider_test.go index 0d45e8fe..f92af29b 100644 --- a/internal/providers/sourceproviders/fedorasourceprovider_test.go +++ b/internal/providers/sourceproviders/fedorasourceprovider_test.go @@ -194,9 +194,17 @@ func TestGetComponentFromGit(t *testing.T) { }) // Execute the method under test - err = provider.GetComponent(context.Background(), mockComponent, destDir) + provenance, err := provider.GetComponent(context.Background(), mockComponent, destDir) require.NoError(t, err) + // Verify provenance returned from dist-git extraction + require.Len(t, provenance, 1, "should return provenance for the lookaside download") + assert.Equal(t, testFileName, provenance[0].Filename) + assert.Equal(t, sourceproviders.SourceOriginLookaside, provenance[0].OriginType) + assert.Equal(t, fileutils.HashType(testHashType), provenance[0].HashType) + assert.Equal(t, testHash, provenance[0].Hash) + assert.NotEmpty(t, provenance[0].URL, "lookaside URL should be populated") + // Verify the spec file was copied to destination specPath := destDir + "/" + testPackageName + ".spec" exists, err := fileutils.Exists(env.FS(), specPath) @@ -301,7 +309,7 @@ func TestGetComponentFromGit(t *testing.T) { require.NoError(t, err) // Should succeed — the file is in skipFilenames so the 404 lookaside download is skipped - err = provider.GetComponent(context.Background(), mockComponent, testDestDir) + _, err = provider.GetComponent(context.Background(), mockComponent, testDestDir) require.NoError(t, err) // Verify the pre-existing file was preserved (not overwritten by git repo version) @@ -363,7 +371,7 @@ func TestGetComponentFromGit(t *testing.T) { Name: testPackageName, }) - err = provider.GetComponent(context.Background(), mockComponent, destDir) + _, err = provider.GetComponent(context.Background(), mockComponent, destDir) require.NoError(t, err) // Verify only spec file exists (no lookaside downloads) @@ -391,7 +399,7 @@ func TestGetComponentFromGit(t *testing.T) { emptyNameComponent := components_testutils.NewMockComponent(ctrl) emptyNameComponent.EXPECT().GetName().AnyTimes().Return("") - err = provider.GetComponent(context.Background(), emptyNameComponent, destDir) + _, err = provider.GetComponent(context.Background(), emptyNameComponent, destDir) require.Error(t, err) assert.Contains(t, err.Error(), "component name cannot be empty") @@ -401,7 +409,7 @@ func TestGetComponentFromGit(t *testing.T) { Name: testPackageName, }) - err = provider.GetComponent(context.Background(), mockComponent, "") + _, err = provider.GetComponent(context.Background(), mockComponent, "") require.Error(t, err) assert.Contains(t, err.Error(), "destination path cannot be empty") }) @@ -430,7 +438,7 @@ func TestGetComponentFromGit(t *testing.T) { cloneError := errors.New("clone failed") mockGitProvider.EXPECT().Clone(gomock.Any(), repoURL, gomock.Any(), gomock.Any()).Return(cloneError) - err = provider.GetComponent(context.Background(), mockComponent, destDir) + _, err = provider.GetComponent(context.Background(), mockComponent, destDir) require.Error(t, err) assert.ErrorIs(t, err, cloneError) }) @@ -473,9 +481,9 @@ func TestGetComponentFromGit(t *testing.T) { extractorError := errors.New("extraction failed") mockExtractor.EXPECT(). ExtractSourcesFromRepo(gomock.Any(), gomock.Any(), testPackageName, gomock.Any(), gomock.Any()). - Return(extractorError) + Return(nil, extractorError) - err = provider.GetComponent(context.Background(), mockComponent, destDir) + _, err = provider.GetComponent(context.Background(), mockComponent, destDir) require.Error(t, err) assert.ErrorIs(t, err, extractorError) }) @@ -533,9 +541,9 @@ func TestGetComponentFromGit(t *testing.T) { // Extractor succeeds mockExtractor.EXPECT(). ExtractSourcesFromRepo(gomock.Any(), gomock.Any(), upstreamName, gomock.Any(), gomock.Any()). - Return(nil) + Return(nil, nil) - err = provider.GetComponent(context.Background(), mockComponent, destDir) + _, err = provider.GetComponent(context.Background(), mockComponent, destDir) require.NoError(t, err) // Verify spec file was renamed to component name @@ -595,9 +603,9 @@ func TestGetComponentFromGit(t *testing.T) { // Extractor succeeds mockExtractor.EXPECT(). ExtractSourcesFromRepo(gomock.Any(), gomock.Any(), upstreamName, gomock.Any(), gomock.Any()). - Return(nil) + Return(nil, nil) - err = provider.GetComponent(context.Background(), mockComponent, destDir) + _, err = provider.GetComponent(context.Background(), mockComponent, destDir) require.Error(t, err) assert.Contains(t, err.Error(), "failed to rename fetched spec file") }) @@ -643,7 +651,7 @@ func TestGetComponentFromGit(t *testing.T) { // ExtractSourcesFromRepo must NOT be called — no EXPECT set for it. // gomock will fail if it's called unexpectedly. - err = provider.GetComponent(context.Background(), mockComponent, destDir, sourceproviders.WithSkipLookaside()) + _, err = provider.GetComponent(context.Background(), mockComponent, destDir, sourceproviders.WithSkipLookaside()) require.NoError(t, err) // Verify spec file was still copied to destination. @@ -713,9 +721,9 @@ func TestCheckoutTargetCommit(t *testing.T) { // Extractor succeeds mockExtractor.EXPECT(). ExtractSourcesFromRepo(gomock.Any(), gomock.Any(), testPackageName, gomock.Any(), gomock.Any()). - Return(nil) + Return(nil, nil) - err = provider.GetComponent(context.Background(), mockComponent, destDir) + _, err = provider.GetComponent(context.Background(), mockComponent, destDir) require.NoError(t, err) }) @@ -766,9 +774,9 @@ func TestCheckoutTargetCommit(t *testing.T) { // Extractor succeeds mockExtractor.EXPECT(). ExtractSourcesFromRepo(gomock.Any(), gomock.Any(), testPackageName, gomock.Any(), gomock.Any()). - Return(nil) + Return(nil, nil) - err = provider.GetComponent(context.Background(), mockComponent, destDir) + _, err = provider.GetComponent(context.Background(), mockComponent, destDir) require.NoError(t, err) }) @@ -807,7 +815,7 @@ func TestCheckoutTargetCommit(t *testing.T) { GetCommitHashBeforeDate(gomock.Any(), gomock.Any(), snapshotTime). Return("", hashError) - err = provider.GetComponent(context.Background(), mockComponent, destDir) + _, err = provider.GetComponent(context.Background(), mockComponent, destDir) require.Error(t, err) assert.Contains(t, err.Error(), "resolving commit for snapshot time") assert.ErrorIs(t, err, hashError) @@ -854,7 +862,7 @@ func TestCheckoutTargetCommit(t *testing.T) { Checkout(gomock.Any(), gomock.Any(), snapshotCommitHash). Return(checkoutError) - err = provider.GetComponent(context.Background(), mockComponent, destDir) + _, err = provider.GetComponent(context.Background(), mockComponent, destDir) require.Error(t, err) assert.Contains(t, err.Error(), "failed to checkout commit") assert.ErrorIs(t, err, checkoutError) @@ -886,7 +894,7 @@ func TestCheckoutTargetCommit(t *testing.T) { Clone(gomock.Any(), repoURL, gomock.Any(), gomock.Any()). Return(nil) - err = provider.GetComponent(context.Background(), mockComponent, destDir) + _, err = provider.GetComponent(context.Background(), mockComponent, destDir) require.Error(t, err) assert.Contains(t, err.Error(), "invalid snapshot time") }) @@ -941,9 +949,9 @@ func TestCheckoutTargetCommit_UpstreamCommit(t *testing.T) { // Extractor succeeds mockExtractor.EXPECT(). ExtractSourcesFromRepo(gomock.Any(), gomock.Any(), testPackageName, gomock.Any(), gomock.Any()). - Return(nil) + Return(nil, nil) - err = provider.GetComponent(context.Background(), mockComponent, destDir) + _, err = provider.GetComponent(context.Background(), mockComponent, destDir) require.NoError(t, err) }) @@ -993,9 +1001,9 @@ func TestCheckoutTargetCommit_UpstreamCommit(t *testing.T) { // Extractor succeeds mockExtractor.EXPECT(). ExtractSourcesFromRepo(gomock.Any(), gomock.Any(), testPackageName, gomock.Any(), gomock.Any()). - Return(nil) + Return(nil, nil) - err = provider.GetComponent(context.Background(), mockComponent, destDir) + _, err = provider.GetComponent(context.Background(), mockComponent, destDir) require.NoError(t, err) }) @@ -1036,7 +1044,7 @@ func TestCheckoutTargetCommit_UpstreamCommit(t *testing.T) { Checkout(gomock.Any(), gomock.Any(), upstreamCommitHash). Return(checkoutError) - err = provider.GetComponent(context.Background(), mockComponent, destDir) + _, err = provider.GetComponent(context.Background(), mockComponent, destDir) require.Error(t, err) assert.Contains(t, err.Error(), "failed to checkout commit") assert.ErrorIs(t, err, checkoutError) @@ -1098,8 +1106,8 @@ func TestGetComponent_LockedCommitTakesPriorityOverConfigPin(t *testing.T) { mockExtractor.EXPECT(). ExtractSourcesFromRepo(gomock.Any(), gomock.Any(), testPackageName, gomock.Any(), gomock.Any()). - Return(nil) + Return(nil, nil) - err = provider.GetComponent(context.Background(), mockComponent, destDir) + _, err = provider.GetComponent(context.Background(), mockComponent, destDir) require.NoError(t, err) } diff --git a/internal/providers/sourceproviders/identityprovider_test.go b/internal/providers/sourceproviders/identityprovider_test.go index 8682090f..2c22103b 100644 --- a/internal/providers/sourceproviders/identityprovider_test.go +++ b/internal/providers/sourceproviders/identityprovider_test.go @@ -293,8 +293,8 @@ type noOpDownloader struct{} func (d *noOpDownloader) ExtractSourcesFromRepo( _ context.Context, _, _, _ string, _ []string, _ ...fedorasource.ExtractOption, -) error { - return nil +) ([]fedorasource.SourceDownload, error) { + return nil, nil } // --- ResolveIdentity always resolves from upstream --- diff --git a/internal/providers/sourceproviders/provenance.go b/internal/providers/sourceproviders/provenance.go new file mode 100644 index 00000000..c46c8ee1 --- /dev/null +++ b/internal/providers/sourceproviders/provenance.go @@ -0,0 +1,62 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package sourceproviders + +import ( + "github.com/microsoft/azure-linux-dev-tools/internal/providers/sourceproviders/fedorasource" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" +) + +// SourceOriginType describes how a source file was obtained during source preparation. +type SourceOriginType string + +const ( + // SourceOriginLookaside indicates the file was downloaded from a lookaside cache. + // The [SourceProvenance.URL] field contains the exact lookaside URL used. + SourceOriginLookaside SourceOriginType = "lookaside-url" + + // SourceOriginURL indicates the file was downloaded from an explicitly configured + // origin URL (the [projectconfig.SourceFileReference.Origin] field). + SourceOriginURL SourceOriginType = "configured-origin-url" +) + +// SourceProvenance records where a single downloaded source file came from. +type SourceProvenance struct { + // Filename is the name of the downloaded file. + Filename string `json:"filename" table:"Filename"` + + // OriginType describes how the file was obtained. + OriginType SourceOriginType `json:"originType" table:"Origin"` + + // URL is the actual download URL that was used to retrieve the file. + URL string `json:"url" table:"URL"` + + // HashType is the hash algorithm used to validate the download (e.g., "sha512"). + HashType fileutils.HashType `json:"hashType,omitempty" table:"Hash Type"` + + // Hash is the hex-encoded hash value used to validate the download. + Hash string `json:"hash,omitempty" table:"Hash"` +} + +// ConvertDownloadsToProvenance converts [fedorasource.SourceDownload] entries +// (returned by lookaside extraction) into [SourceProvenance] entries with the +// [SourceOriginLookaside] origin type. +func ConvertDownloadsToProvenance(downloads []fedorasource.SourceDownload) []SourceProvenance { + if len(downloads) == 0 { + return nil + } + + prov := make([]SourceProvenance, len(downloads)) + for i, download := range downloads { + prov[i] = SourceProvenance{ + Filename: download.Filename, + OriginType: SourceOriginLookaside, + URL: download.URL, + HashType: download.HashType, + Hash: download.Hash, + } + } + + return prov +} diff --git a/internal/providers/sourceproviders/rpmcontentsprovider.go b/internal/providers/sourceproviders/rpmcontentsprovider.go index 0807f7eb..eeada18c 100644 --- a/internal/providers/sourceproviders/rpmcontentsprovider.go +++ b/internal/providers/sourceproviders/rpmcontentsprovider.go @@ -50,19 +50,19 @@ func NewRPMContentsProviderImpl( // in the provided destination path. func (r *RPMContentsProviderImpl) GetComponent( ctx context.Context, component components.Component, destDirPath string, _ ...FetchComponentOption, -) (err error) { +) (_ []SourceProvenance, err error) { if component.GetName() == "" { - return errors.New("component name cannot be empty") + return nil, errors.New("component name cannot be empty") } if destDirPath == "" { - return errors.New("destination path cannot be empty") + return nil, errors.New("destination path cannot be empty") } // Get the RPM rpmReader, err := r.rpmProvider.GetRPM(ctx, component.GetName(), nil) if err != nil { - return fmt.Errorf("failed to get the RPM file for component %#q: %w", + return nil, fmt.Errorf("failed to get the RPM file for component %#q: %w", component.GetName(), err) } defer defers.HandleDeferError(rpmReader.Close, &err) @@ -70,11 +70,11 @@ func (r *RPMContentsProviderImpl) GetComponent( // Extract the RPM contents err = r.extractor.Extract(rpmReader, destDirPath) if err != nil { - return fmt.Errorf("failed to extract the RPM file of component %#q: %w", + return nil, fmt.Errorf("failed to extract the RPM file of component %#q: %w", component.GetName(), err) } - return nil + return nil, nil } // ResolveIdentity implements [SourceIdentityProvider] by downloading the source RPM diff --git a/internal/providers/sourceproviders/rpmcontentsprovider_test.go b/internal/providers/sourceproviders/rpmcontentsprovider_test.go index 41525e91..5b709a62 100644 --- a/internal/providers/sourceproviders/rpmcontentsprovider_test.go +++ b/internal/providers/sourceproviders/rpmcontentsprovider_test.go @@ -103,7 +103,7 @@ func TestGetComponent(t *testing.T) { Return(packageURL, nil). Times(1) - err = provider.GetComponent(t.Context(), mockComponent, testDestinationDir) + _, err = provider.GetComponent(t.Context(), mockComponent, testDestinationDir) require.NoError(t, err) entries, err := afero.ReadDir(localFS, testDestinationDir) @@ -119,13 +119,13 @@ func TestGetComponent(t *testing.T) { emptyNameComponent := components_testutils.NewMockComponent(ctrl) emptyNameComponent.EXPECT().GetName().AnyTimes().Return("") - err = provider.GetComponent(t.Context(), emptyNameComponent, testDestinationDir) + _, err = provider.GetComponent(t.Context(), emptyNameComponent, testDestinationDir) require.Error(t, err) assert.Contains(t, err.Error(), "component name cannot be empty") }) t.Run("empty destination path fails", func(t *testing.T) { - err = provider.GetComponent(t.Context(), mockComponent, "") + _, err = provider.GetComponent(t.Context(), mockComponent, "") require.Error(t, err) assert.Contains(t, err.Error(), "destination path cannot be empty") }) @@ -157,7 +157,7 @@ func TestGetComponentFailureSimulation(t *testing.T) { provider, err := sourceproviders.NewRPMContentsProviderImpl(mockExtractor, mockRPMProvider) require.NoError(t, err) - err = provider.GetComponent(t.Context(), mockComponent, testDestinationDir) + _, err = provider.GetComponent(t.Context(), mockComponent, testDestinationDir) require.Error(t, err) assert.ErrorIs(t, err, rpmProviderError) }) @@ -182,7 +182,7 @@ func TestGetComponentFailureSimulation(t *testing.T) { provider, err := sourceproviders.NewRPMContentsProviderImpl(mockExtractor, mockRPMProvider) require.NoError(t, err) - err = provider.GetComponent(t.Context(), mockComponent, testDestinationDir) + _, err = provider.GetComponent(t.Context(), mockComponent, testDestinationDir) require.Error(t, err) assert.ErrorIs(t, err, extractorError) }) diff --git a/internal/providers/sourceproviders/sourcemanager.go b/internal/providers/sourceproviders/sourcemanager.go index b5431d84..e7291bd5 100644 --- a/internal/providers/sourceproviders/sourcemanager.go +++ b/internal/providers/sourceproviders/sourcemanager.go @@ -106,24 +106,28 @@ type ComponentSourceProvider interface { // GetComponent retrieves the `.spec` for the specified component along with any sidecar // files stored along with it, placing the fetched files in the provided directory. + // Returns provenance entries for any files downloaded from a lookaside cache. GetComponent( ctx context.Context, component components.Component, destDirPath string, opts ...FetchComponentOption, - ) error + ) ([]SourceProvenance, error) } // SourceManager is an abstract interface for a facility that can fetch arbitrary component sources. type SourceManager interface { // FetchFiles fetches the given source files, placing the files in the provided directory. - FetchFiles(ctx context.Context, component components.Component, destDirPath string) error + // Returns provenance entries describing where each downloaded file came from. + // Files that already exist on disk are skipped and produce no provenance entry. + FetchFiles(ctx context.Context, component components.Component, destDirPath string) ([]SourceProvenance, error) // FetchComponent fetches an entire upstream component, including its `.spec` file and any sidecar files. // Optional [FetchComponentOption] values may be passed to control provider behavior (e.g., preserving - // the upstream .git directory). + // the upstream .git directory). Returns provenance entries for any files downloaded from a + // lookaside cache during the fetch. FetchComponent( ctx context.Context, component components.Component, destDirPath string, opts ...FetchComponentOption, - ) error + ) ([]SourceProvenance, error) // ResolveSourceIdentity returns a deterministic identity string for the component's source. // For local components, this is a content hash of the spec directory. @@ -264,50 +268,58 @@ func (m *sourceManager) FetchFiles( ctx context.Context, component components.Component, destDirPath string, -) error { +) ([]SourceProvenance, error) { sourceFiles := component.GetConfig().SourceFiles if len(sourceFiles) == 0 { slog.Debug("No source files to fetch for component", "component", component.GetName()) - return nil + return nil, nil } httpDownloader, err := downloader.NewHTTPDownloader(m.dryRunnable, m.eventListener, m.fs) if err != nil { - return fmt.Errorf("failed to create HTTP downloader:\n%w", err) + return nil, fmt.Errorf("failed to create HTTP downloader:\n%w", err) } + var provenance []SourceProvenance + for i := range sourceFiles { fileRef := &sourceFiles[i] - err := m.fetchSourceFile(ctx, httpDownloader, component, fileRef, destDirPath) + prov, err := m.fetchSourceFile(ctx, httpDownloader, component, fileRef, destDirPath) if err != nil { - return fmt.Errorf("failed to fetch source file %#q:\n%w", fileRef.Filename, err) + return nil, fmt.Errorf("failed to fetch source file %#q:\n%w", fileRef.Filename, err) + } + + if prov != nil { + provenance = append(provenance, *prov) } } - return nil + return provenance, nil } // fetchSourceFile downloads a source file, trying the lookaside cache first and falling // back to the configured origin. When disable-origins is set, fallback is disabled. +// Returns a [SourceProvenance] entry describing where the file was downloaded from, +// or nil if the file already existed on disk and was skipped. func (m *sourceManager) fetchSourceFile( ctx context.Context, httpDownloader downloader.Downloader, component components.Component, fileRef *projectconfig.SourceFileReference, destDirPath string, -) error { +) (*SourceProvenance, error) { // Validate filename to prevent path traversal vulnerabilities if err := fileutils.ValidateFilename(fileRef.Filename); err != nil { - return fmt.Errorf("invalid source file reference:\n%w", err) + return nil, fmt.Errorf("invalid source file reference:\n%w", err) } destPath := filepath.Join(destDirPath, fileRef.Filename) sourceExists, err := fileutils.Exists(m.fs, destPath) if err != nil { - return fmt.Errorf("failed to check existence of destination file %#q:\n%w", destPath, err) + return nil, fmt.Errorf("failed to check existence of destination file %#q:\n%w", destPath, err) } if sourceExists { @@ -315,14 +327,26 @@ func (m *sourceManager) fetchSourceFile( "filename", fileRef.Filename, "path", destPath) - return nil + return nil, nil //nolint:nilnil // nil provenance is intentional — file was not downloaded. + } + + // In dry-run mode the downloader no-ops without fetching files, so no provenance + // should be reported since nothing was actually downloaded. + if m.dryRunnable.DryRun() { + return nil, nil //nolint:nilnil // nil provenance — dry-run does not download. } // Phase 1: Try lookaside cache if hash info is available if fileRef.Hash != "" && fileRef.HashType != "" { - lookasideErr := m.tryLookasideDownload(ctx, httpDownloader, component, fileRef, destPath) + sourceURL, lookasideErr := m.tryLookasideDownload(ctx, httpDownloader, component, fileRef, destPath) if lookasideErr == nil { - return nil + return &SourceProvenance{ + Filename: fileRef.Filename, + OriginType: SourceOriginLookaside, + URL: sourceURL, + HashType: fileRef.HashType, + Hash: fileRef.Hash, + }, nil } slog.Debug("Lookaside cache download failed", @@ -332,29 +356,42 @@ func (m *sourceManager) fetchSourceFile( // Phase 2: Fall back to configured origin (not allowed when disable-origins is set) if m.disableOrigins { - return fmt.Errorf("source file %#q not found in lookaside cache and disable-origins is enabled in the distro config", + return nil, fmt.Errorf( + "source file %#q not found in lookaside cache"+ + " and disable-origins is enabled in the distro config", fileRef.Filename) } if fileRef.Origin.Type == "" { - return fmt.Errorf("source file %#q not found in lookaside cache and no origin configured", + return nil, fmt.Errorf("source file %#q not found in lookaside cache and no origin configured", fileRef.Filename) } - return m.downloadFromOrigin(ctx, httpDownloader, fileRef, destPath) + originURL, err := m.downloadFromOrigin(ctx, httpDownloader, fileRef, destPath) + if err != nil { + return nil, err + } + + return &SourceProvenance{ + Filename: fileRef.Filename, + OriginType: SourceOriginURL, + URL: originURL, + HashType: fileRef.HashType, + Hash: fileRef.Hash, + }, nil } // tryLookasideDownload attempts to download a source file from the lookaside cache. -// Returns nil on success, or an error if the download fails. +// Returns the resolved download URL on success, or an error if the download fails. func (m *sourceManager) tryLookasideDownload( ctx context.Context, httpDownloader downloader.Downloader, component components.Component, fileRef *projectconfig.SourceFileReference, destPath string, -) error { +) (string, error) { if m.lookasideBaseURI == "" { - return errors.New("no lookaside cache configured") + return "", errors.New("no lookaside cache configured") } packageName := resolvePackageName(component) @@ -362,7 +399,7 @@ func (m *sourceManager) tryLookasideDownload( sourceURL, err := fedorasource.BuildLookasideURL(m.lookasideBaseURI, packageName, fileRef.Filename, string(fileRef.HashType), fileRef.Hash) if err != nil { - return fmt.Errorf("failed to build lookaside URL for %#q:\n%w", fileRef.Filename, err) + return "", fmt.Errorf("failed to build lookaside URL for %#q:\n%w", fileRef.Filename, err) } slog.Info("Downloading source file from lookaside cache...", @@ -371,23 +408,24 @@ func (m *sourceManager) tryLookasideDownload( err = m.downloadAndValidate(ctx, httpDownloader, sourceURL, destPath, fileRef) if err != nil { - return fmt.Errorf("lookaside cache download failed for %#q:\n%w", fileRef.Filename, err) + return "", fmt.Errorf("lookaside cache download failed for %#q:\n%w", fileRef.Filename, err) } - return nil + return sourceURL, nil } // downloadFromOrigin downloads a source file using its configured origin. +// Returns the origin URL used for the download on success. func (m *sourceManager) downloadFromOrigin( ctx context.Context, httpDownloader downloader.Downloader, fileRef *projectconfig.SourceFileReference, destPath string, -) error { +) (string, error) { switch fileRef.Origin.Type { case projectconfig.OriginTypeURI: if fileRef.Origin.Uri == "" { - return fmt.Errorf("no URI configured for source file %#q with origin type %#q", + return "", fmt.Errorf("no URI configured for source file %#q with origin type %#q", fileRef.Filename, fileRef.Origin.Type) } @@ -398,13 +436,13 @@ func (m *sourceManager) downloadFromOrigin( err := m.downloadAndValidate(ctx, httpDownloader, fileRef.Origin.Uri, destPath, fileRef) if err != nil { - return fmt.Errorf("failed to retrieve source file %#q:\n%w", fileRef.Filename, err) + return "", fmt.Errorf("failed to retrieve source file %#q:\n%w", fileRef.Filename, err) } - return nil + return fileRef.Origin.Uri, nil default: - return fmt.Errorf("unsupported origin type %#q for source file %#q", + return "", fmt.Errorf("unsupported origin type %#q for source file %#q", fileRef.Origin.Type, fileRef.Filename) } } @@ -457,9 +495,9 @@ func resolvePackageName(component components.Component) string { func (m *sourceManager) FetchComponent( ctx context.Context, component components.Component, destDirPath string, opts ...FetchComponentOption, -) error { +) ([]SourceProvenance, error) { if component.GetName() == "" { - return errors.New("component name is empty") + return nil, errors.New("component name is empty") } sourceType := component.GetConfig().Spec.SourceType @@ -468,13 +506,18 @@ func (m *sourceManager) FetchComponent( switch sourceType { case projectconfig.SpecSourceTypeLocal, projectconfig.SpecSourceTypeUnspecified: - return m.fetchLocalComponent(ctx, component, destDirPath, resolved) + prov, err := m.fetchLocalComponent(ctx, component, destDirPath, resolved) + if err != nil { + return nil, err + } + + return prov, nil case projectconfig.SpecSourceTypeUpstream: return m.fetchUpstreamComponent(ctx, component, destDirPath, opts...) } - return fmt.Errorf("spec for component %#q not found in any configured provider", + return nil, fmt.Errorf("spec for component %#q not found in any configured provider", component.GetName()) } @@ -529,56 +572,58 @@ func (m *sourceManager) resolveUpstreamSourceIdentity( func (m *sourceManager) fetchLocalComponent( ctx context.Context, component components.Component, destDirPath string, opts FetchComponentOptions, -) error { +) ([]SourceProvenance, error) { err := FetchLocalComponent(m.dryRunnable, m.eventListener, m.fs, component, destDirPath, false) if err != nil { - return fmt.Errorf("failed to fetch local component %#q:\n%w", + return nil, fmt.Errorf("failed to fetch local component %#q:\n%w", component.GetName(), err) } // Download source files from lookaside cache if available. // Skip this step when SkipLookaside is set (e.g., during rendering). if !opts.SkipLookaside { - err = m.downloadLookasideSources(ctx, component, destDirPath) + prov, err := m.downloadLookasideSources(ctx, component, destDirPath) if err != nil { - return fmt.Errorf("failed to download lookaside sources for component %#q:\n%w", + return nil, fmt.Errorf("failed to download lookaside sources for component %#q:\n%w", component.GetName(), err) } + + return prov, nil } - return nil + return nil, nil } // downloadLookasideSources downloads source tarballs from a lookaside cache for the given component. // It resolves the appropriate lookaside URI from the distro configuration and uses the component's // upstream name (if set) as the package name for the lookaside lookup. -// Returns nil if no lookaside downloader or URI is available. +// Returns provenance entries for downloaded files, or nil if no lookaside downloader or URI is available. func (m *sourceManager) downloadLookasideSources( ctx context.Context, component components.Component, destDirPath string, -) error { +) ([]SourceProvenance, error) { if m.lookasideDownloader == nil { - return nil + return nil, nil } if m.lookasideBaseURI == "" { - return nil + return nil, nil } packageName := resolvePackageName(component) - err := m.lookasideDownloader.ExtractSourcesFromRepo(ctx, destDirPath, packageName, m.lookasideBaseURI, nil) + downloads, err := m.lookasideDownloader.ExtractSourcesFromRepo(ctx, destDirPath, packageName, m.lookasideBaseURI, nil) if err != nil { - return fmt.Errorf("failed to extract sources from lookaside cache:\n%w", err) + return nil, fmt.Errorf("failed to extract sources from lookaside cache:\n%w", err) } - return nil + return ConvertDownloadsToProvenance(downloads), nil } func (m *sourceManager) fetchUpstreamComponent( ctx context.Context, component components.Component, destDirPath string, opts ...FetchComponentOption, -) error { +) ([]SourceProvenance, error) { if len(m.upstreamComponentProviders) == 0 { - return fmt.Errorf("no upstream component origins configured for component %#q", + return nil, fmt.Errorf("no upstream component origins configured for component %#q", component.GetName()) } @@ -586,20 +631,20 @@ func (m *sourceManager) fetchUpstreamComponent( // Try each upstream component provider, until one succeeds for _, provider := range m.upstreamComponentProviders { - err := provider.GetComponent(ctx, component, destDirPath, opts...) + prov, err := provider.GetComponent(ctx, component, destDirPath, opts...) if err == nil { slog.Debug("Successfully fetched upstream component", "component", component.GetName(), "provider", fmt.Sprintf("%T", provider)) - return nil + return prov, nil } lastError = err } // If we tried providers but none succeeded - return fmt.Errorf("failed to fetch upstream component %#q:\n%w", + return nil, fmt.Errorf("failed to fetch upstream component %#q:\n%w", component.GetName(), lastError) } diff --git a/internal/providers/sourceproviders/sourcemanager_test.go b/internal/providers/sourceproviders/sourcemanager_test.go index 6f100792..26ff3b75 100644 --- a/internal/providers/sourceproviders/sourcemanager_test.go +++ b/internal/providers/sourceproviders/sourcemanager_test.go @@ -24,6 +24,7 @@ import ( const ( testDestDir = "/output" testSourceTarball = "source.tar.gz" + testSourceSHA256 = "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9" ) // testDefaultDistro returns a [sourceproviders.ResolvedDistro] matching the test @@ -74,7 +75,7 @@ func TestSourceManager_FetchComponent_EmptyComponentName(t *testing.T) { sourceManager, err := sourceproviders.NewSourceManager(env.Env, testDefaultDistro()) require.NoError(t, err) - err = sourceManager.FetchComponent(t.Context(), component, "/output") + _, err = sourceManager.FetchComponent(t.Context(), component, "/output") require.Error(t, err) require.Contains(t, err.Error(), "component name is empty") } @@ -103,7 +104,7 @@ func TestSourceManager_FetchComponent_LocalComponent_SpecError(t *testing.T) { sourceManager, err := sourceproviders.NewSourceManager(env.Env, testDefaultDistro()) require.NoError(t, err) - err = sourceManager.FetchComponent(t.Context(), component, "/output") + _, err = sourceManager.FetchComponent(t.Context(), component, "/output") require.Error(t, err) require.Contains(t, err.Error(), "failed to fetch local component") } @@ -184,7 +185,7 @@ func TestSourceManager_FetchComponent_LocalComponent_ProviderError(t *testing.T) sourceManager, err := sourceproviders.NewSourceManager(env.Env, testDefaultDistro()) require.NoError(t, err) - err = sourceManager.FetchComponent(t.Context(), component, "/output") + _, err = sourceManager.FetchComponent(t.Context(), component, "/output") require.Error(t, err) require.Contains(t, err.Error(), "failed to fetch local component") } @@ -213,7 +214,7 @@ func TestSourceManager_FetchComponent_UpstreamComponent_AllProvidersFail(t *test sourceManager, err := sourceproviders.NewSourceManager(env.Env, testDefaultDistro()) require.NoError(t, err) - err = sourceManager.FetchComponent(t.Context(), component, "/output") + _, err = sourceManager.FetchComponent(t.Context(), component, "/output") require.Error(t, err) require.Contains(t, err.Error(), "failed to fetch upstream component") } @@ -243,7 +244,7 @@ func TestSourceManager_FetchComponent_EmptyDestPath(t *testing.T) { require.NoError(t, err) // Empty destination path should be caught by the provider - err = sourceManager.FetchComponent(t.Context(), component, "") + _, err = sourceManager.FetchComponent(t.Context(), component, "") require.Error(t, err) require.Contains(t, err.Error(), "destination path cannot be empty") } @@ -263,7 +264,7 @@ func TestSourceManager_FetchFiles_NoSourceFiles(t *testing.T) { sourceManager, err := sourceproviders.NewSourceManager(env.Env, testDefaultDistro()) require.NoError(t, err) - err = sourceManager.FetchFiles(t.Context(), component, testDestDir) + _, err = sourceManager.FetchFiles(t.Context(), component, testDestDir) require.NoError(t, err) } @@ -291,8 +292,9 @@ func TestSourceManager_FetchFiles_ExistingFile(t *testing.T) { sourceManager, err := sourceproviders.NewSourceManager(env.Env, testDefaultDistro()) require.NoError(t, err) - err = sourceManager.FetchFiles(t.Context(), component, testDestDir) + provenance, err := sourceManager.FetchFiles(t.Context(), component, testDestDir) require.NoError(t, err) + assert.Empty(t, provenance, "existing files should be skipped from provenance") } func TestSourceManager_FetchFiles_Errors(t *testing.T) { @@ -364,7 +366,7 @@ func TestSourceManager_FetchFiles_Errors(t *testing.T) { sourceManager, err := sourceproviders.NewSourceManager(env.Env, distro) require.NoError(t, err) - err = sourceManager.FetchFiles(t.Context(), component, testDestDir) + _, err = sourceManager.FetchFiles(t.Context(), component, testDestDir) require.Error(t, err) require.Contains(t, err.Error(), testCase.expectedError) }) @@ -522,7 +524,7 @@ func TestSourceManager_FetchComponent_LocalComponent_WithSkipLookaside(t *testin require.NoError(t, err) // With SkipLookaside, downloadLookasideSources is not called and the fetch succeeds. - err = sourceManager.FetchComponent(t.Context(), component, testDestDir, sourceproviders.WithSkipLookaside()) + _, err = sourceManager.FetchComponent(t.Context(), component, testDestDir, sourceproviders.WithSkipLookaside()) require.NoError(t, err) // Spec was copied to destination (FetchLocalComponent still ran). diff --git a/internal/providers/sourceproviders/sourceproviders_test/sourcemanager_mocks.go b/internal/providers/sourceproviders/sourceproviders_test/sourcemanager_mocks.go index 6707ba0b..3c2917bd 100644 --- a/internal/providers/sourceproviders/sourceproviders_test/sourcemanager_mocks.go +++ b/internal/providers/sourceproviders/sourceproviders_test/sourcemanager_mocks.go @@ -43,15 +43,16 @@ func (m *MockSourceManager) EXPECT() *MockSourceManagerMockRecorder { } // FetchComponent mocks base method. -func (m *MockSourceManager) FetchComponent(ctx context.Context, component components.Component, destDirPath string, opts ...sourceproviders.FetchComponentOption) error { +func (m *MockSourceManager) FetchComponent(ctx context.Context, component components.Component, destDirPath string, opts ...sourceproviders.FetchComponentOption) ([]sourceproviders.SourceProvenance, error) { m.ctrl.T.Helper() varargs := []any{ctx, component, destDirPath} for _, a := range opts { varargs = append(varargs, a) } ret := m.ctrl.Call(m, "FetchComponent", varargs...) - ret0, _ := ret[0].(error) - return ret0 + ret0, _ := ret[0].([]sourceproviders.SourceProvenance) + ret1, _ := ret[1].(error) + return ret0, ret1 } // FetchComponent indicates an expected call of FetchComponent. @@ -62,11 +63,12 @@ func (mr *MockSourceManagerMockRecorder) FetchComponent(ctx, component, destDirP } // FetchFiles mocks base method. -func (m *MockSourceManager) FetchFiles(ctx context.Context, component components.Component, destDirPath string) error { +func (m *MockSourceManager) FetchFiles(ctx context.Context, component components.Component, destDirPath string) ([]sourceproviders.SourceProvenance, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "FetchFiles", ctx, component, destDirPath) - ret0, _ := ret[0].(error) - return ret0 + ret0, _ := ret[0].([]sourceproviders.SourceProvenance) + ret1, _ := ret[1].(error) + return ret0, ret1 } // FetchFiles indicates an expected call of FetchFiles. diff --git a/internal/providers/sourceproviders/sourceproviders_test/sourcemanager_mocks_noop.go b/internal/providers/sourceproviders/sourceproviders_test/sourcemanager_mocks_noop.go index 8eae2044..375bc596 100644 --- a/internal/providers/sourceproviders/sourceproviders_test/sourcemanager_mocks_noop.go +++ b/internal/providers/sourceproviders/sourceproviders_test/sourcemanager_mocks_noop.go @@ -18,8 +18,8 @@ import gomock "go.uber.org/mock/gomock" // that never errors and does not create any files on the filesystem. func NewNoOpMockSourceManager(ctrl *gomock.Controller) *MockSourceManager { mock := NewMockSourceManager(ctrl) - mock.EXPECT().FetchFiles(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().Return(nil) - mock.EXPECT().FetchComponent(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().Return(nil) + mock.EXPECT().FetchFiles(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().Return(nil, nil) + mock.EXPECT().FetchComponent(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes().Return(nil, nil) return mock } From b0a4324941baa8189e4de7301db48c410c839cbe Mon Sep 17 00:00:00 2001 From: Antonio Salinas Date: Mon, 18 May 2026 23:41:34 +0000 Subject: [PATCH 2/7] fix: prevent local source copy from overwriting fetched files Add SkipExistingFiles filter to copySourceDirectory so that FetchLocalComponent does not overwrite source files already downloaded by FetchFiles, matching the upstream provider behavior. Also suppress provenance reporting during dry-run and restrict hash enrichment to the overlay path to avoid incorrect audit data. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- internal/providers/sourceproviders/localcontentsprovider.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/providers/sourceproviders/localcontentsprovider.go b/internal/providers/sourceproviders/localcontentsprovider.go index e17db5b5..145634e4 100644 --- a/internal/providers/sourceproviders/localcontentsprovider.go +++ b/internal/providers/sourceproviders/localcontentsprovider.go @@ -95,6 +95,7 @@ func copySourceDirectory(dryRunnable opctx.DryRunnable, fs opctx.FS, sourceDirPa CopyFileOptions: fileutils.CopyFileOptions{ PreserveFileMode: true, }, + FileFilter: fileutils.SkipExistingFiles, } err := fileutils.CopyDirRecursive(dryRunnable, fs, sourceDirPath, destDirPath, copyOptions) From eaa586b5fa71ebae6a6208fff52fb49fee9bba39 Mon Sep 17 00:00:00 2001 From: Antonio Salinas Date: Tue, 19 May 2026 00:07:33 +0000 Subject: [PATCH 3/7] fix: preserve validation in dry-run and add provenance tests Move dry-run guards after origin/disable-origins validation so configuration errors are still caught during dry runs. Add FetchFiles tests for lookaside and origin-fallback provenance to cover the returned metadata fields. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../app/azldev/core/sources/sourceprep.go | 2 +- .../fedorasource/fedorasource_test.go | 10 +- .../sourceproviders/sourcemanager.go | 22 +++-- .../sourceproviders/sourcemanager_test.go | 91 +++++++++++++++++++ 4 files changed, 113 insertions(+), 12 deletions(-) diff --git a/internal/app/azldev/core/sources/sourceprep.go b/internal/app/azldev/core/sources/sourceprep.go index 8e048f81..f858b707 100644 --- a/internal/app/azldev/core/sources/sourceprep.go +++ b/internal/app/azldev/core/sources/sourceprep.go @@ -221,7 +221,7 @@ func NewPreparer( func (p *sourcePreparerImpl) PrepareSources( ctx context.Context, component components.Component, outputDir string, applyOverlays bool, ) (*ProvenanceReport, error) { - var allProvenance []sourceproviders.SourceProvenance + allProvenance := []sourceproviders.SourceProvenance{} // Use the source manager to fetch source files (archives, patches, etc.) // Skip this step when skipLookaside is set — source tarballs are not needed diff --git a/internal/providers/sourceproviders/fedorasource/fedorasource_test.go b/internal/providers/sourceproviders/fedorasource/fedorasource_test.go index 4eb54857..1b3898e3 100644 --- a/internal/providers/sourceproviders/fedorasource/fedorasource_test.go +++ b/internal/providers/sourceproviders/fedorasource/fedorasource_test.go @@ -94,7 +94,7 @@ func TestExtractSourcesFromRepo(t *testing.T) { require.NoError(t, err) require.NoError(t, ctx.FS().MkdirAll(testRepoDir, testDirPerms)) - setupSourcesFile(t, ctx.FS(), testSourcesContent) + setupSourcesFile(t, ctx.FS(), testRepoDir, testSourcesContent) // Mock downloader should create files with content matching the expected hashes mockDownloader.EXPECT().Download( @@ -179,7 +179,7 @@ func TestExtractSourcesFromRepoDownloadFailure(t *testing.T) { require.NoError(t, err) require.NoError(t, ctx.FS().MkdirAll(testRepoDir, testDirPerms)) - setupSourcesFile(t, ctx.FS(), testSingleSourceContent) + setupSourcesFile(t, ctx.FS(), testRepoDir, testSingleSourceContent) downloadErr := assert.AnError mockDownloader.EXPECT().Download(gomock.Any(), gomock.Any(), gomock.Any()). @@ -200,7 +200,7 @@ func TestExtractSourcesFromRepoHashMismatch(t *testing.T) { require.NoError(t, err) require.NoError(t, ctx.FS().MkdirAll(testRepoDir, testDirPerms)) - setupSourcesFile(t, ctx.FS(), testSingleSourceContent) + setupSourcesFile(t, ctx.FS(), testRepoDir, testSingleSourceContent) // Mock downloader creates a file with WRONG content (hash mismatch) mockDownloader.EXPECT().Download(gomock.Any(), gomock.Any(), gomock.Any()). @@ -215,10 +215,10 @@ func TestExtractSourcesFromRepoHashMismatch(t *testing.T) { } // setupSourcesFile creates a 'sources' file in the specified directory with the given content. -func setupSourcesFile(t *testing.T, fs afero.Fs, content string) { +func setupSourcesFile(t *testing.T, fs afero.Fs, repoDir, content string) { t.Helper() - sourcesPath := filepath.Join(testRepoDir, testSourcesFile) + sourcesPath := filepath.Join(repoDir, testSourcesFile) require.NoError(t, afero.WriteFile(fs, sourcesPath, []byte(content), testFilePerms)) } diff --git a/internal/providers/sourceproviders/sourcemanager.go b/internal/providers/sourceproviders/sourcemanager.go index e7291bd5..b9eb19c1 100644 --- a/internal/providers/sourceproviders/sourcemanager.go +++ b/internal/providers/sourceproviders/sourcemanager.go @@ -330,16 +330,14 @@ func (m *sourceManager) fetchSourceFile( return nil, nil //nolint:nilnil // nil provenance is intentional — file was not downloaded. } - // In dry-run mode the downloader no-ops without fetching files, so no provenance - // should be reported since nothing was actually downloaded. - if m.dryRunnable.DryRun() { - return nil, nil //nolint:nilnil // nil provenance — dry-run does not download. - } - // Phase 1: Try lookaside cache if hash info is available if fileRef.Hash != "" && fileRef.HashType != "" { sourceURL, lookasideErr := m.tryLookasideDownload(ctx, httpDownloader, component, fileRef, destPath) if lookasideErr == nil { + if m.dryRunnable.DryRun() { + return nil, nil //nolint:nilnil // dry-run — lookaside URL validated but no download. + } + return &SourceProvenance{ Filename: fileRef.Filename, OriginType: SourceOriginLookaside, @@ -372,6 +370,10 @@ func (m *sourceManager) fetchSourceFile( return nil, err } + if m.dryRunnable.DryRun() { + return nil, nil //nolint:nilnil // dry-run — origin validated but no download. + } + return &SourceProvenance{ Filename: fileRef.Filename, OriginType: SourceOriginURL, @@ -402,6 +404,14 @@ func (m *sourceManager) tryLookasideDownload( return "", fmt.Errorf("failed to build lookaside URL for %#q:\n%w", fileRef.Filename, err) } + if m.dryRunnable.DryRun() { + slog.Info("Dry run: would download source file from lookaside cache", + "filename", fileRef.Filename, + "url", sourceURL) + + return sourceURL, nil + } + slog.Info("Downloading source file from lookaside cache...", "filename", fileRef.Filename, "url", sourceURL) diff --git a/internal/providers/sourceproviders/sourcemanager_test.go b/internal/providers/sourceproviders/sourcemanager_test.go index 26ff3b75..e651c3ce 100644 --- a/internal/providers/sourceproviders/sourcemanager_test.go +++ b/internal/providers/sourceproviders/sourcemanager_test.go @@ -5,6 +5,8 @@ package sourceproviders_test import ( "errors" + "net/http" + "net/http/httptest" "os/exec" "path/filepath" "testing" @@ -297,6 +299,95 @@ func TestSourceManager_FetchFiles_ExistingFile(t *testing.T) { assert.Empty(t, provenance, "existing files should be skipped from provenance") } +func TestSourceManager_FetchFiles_LookasideProvenance(t *testing.T) { + env := testutils.NewTestEnv(t) + ctrl := gomock.NewController(t) + component := components_testutils.NewMockComponent(ctrl) + + expectedPath := "/lookaside/test-component/source.tar.gz/sha256/" + testSourceSHA256 + "/source.tar.gz" + + server := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { + if request.URL.Path != expectedPath { + http.NotFound(writer, request) + + return + } + + _, _ = writer.Write([]byte("hello world")) + })) + defer server.Close() + + distro := testDefaultDistro() + distro.Definition.LookasideBaseURI = server.URL + "/lookaside/$pkg/$filename/$hashtype/$hash/$filename" + + componentConfig := &projectconfig.ComponentConfig{ + SourceFiles: []projectconfig.SourceFileReference{{ + Filename: testSourceTarball, + Hash: testSourceSHA256, + HashType: fileutils.HashTypeSHA256, + }}, + } + + component.EXPECT().GetName().AnyTimes().Return("test-component") + component.EXPECT().GetConfig().AnyTimes().Return(componentConfig) + + sourceManager, err := sourceproviders.NewSourceManager(env.Env, distro) + require.NoError(t, err) + + provenance, err := sourceManager.FetchFiles(t.Context(), component, testDestDir) + require.NoError(t, err) + require.Len(t, provenance, 1) + + assert.Equal(t, testSourceTarball, provenance[0].Filename) + assert.Equal(t, sourceproviders.SourceOriginLookaside, provenance[0].OriginType) + assert.Equal(t, server.URL+expectedPath, provenance[0].URL) + assert.Equal(t, fileutils.HashTypeSHA256, provenance[0].HashType) + assert.Equal(t, testSourceSHA256, provenance[0].Hash) +} + +func TestSourceManager_FetchFiles_OriginFallbackProvenance(t *testing.T) { + env := testutils.NewTestEnv(t) + ctrl := gomock.NewController(t) + component := components_testutils.NewMockComponent(ctrl) + + server := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, _ *http.Request) { + _, _ = writer.Write([]byte("hello world")) + })) + defer server.Close() + + distro := testDefaultDistro() + distro.Definition.LookasideBaseURI = "" + + originURL := server.URL + "/origin/source.tar.gz" + componentConfig := &projectconfig.ComponentConfig{ + SourceFiles: []projectconfig.SourceFileReference{{ + Filename: testSourceTarball, + Hash: testSourceSHA256, + HashType: fileutils.HashTypeSHA256, + Origin: projectconfig.Origin{ + Type: projectconfig.OriginTypeURI, + Uri: originURL, + }, + }}, + } + + component.EXPECT().GetName().AnyTimes().Return("test-component") + component.EXPECT().GetConfig().AnyTimes().Return(componentConfig) + + sourceManager, err := sourceproviders.NewSourceManager(env.Env, distro) + require.NoError(t, err) + + provenance, err := sourceManager.FetchFiles(t.Context(), component, testDestDir) + require.NoError(t, err) + require.Len(t, provenance, 1) + + assert.Equal(t, testSourceTarball, provenance[0].Filename) + assert.Equal(t, sourceproviders.SourceOriginURL, provenance[0].OriginType) + assert.Equal(t, originURL, provenance[0].URL) + assert.Equal(t, fileutils.HashTypeSHA256, provenance[0].HashType) + assert.Equal(t, testSourceSHA256, provenance[0].Hash) +} + func TestSourceManager_FetchFiles_Errors(t *testing.T) { tests := []struct { name string From a3e79d23d5261e51e6a43ace64528e9fde7d1315 Mon Sep 17 00:00:00 2001 From: Antonio Salinas Date: Fri, 22 May 2026 22:24:04 +0000 Subject: [PATCH 4/7] fix: address PR review comments for source provenance - Extend RPMProvider.GetRPM to return download URL and populate provenance in RPMContentsProviderImpl - Remove unnecessary length check in ConvertDownloadsToProvenance - Add explanatory comment for SkipExistingFiles in local provider - Name the provenance return value in FedoraSourcesProvider.GetComponent - Run hash enrichment unconditionally (not gated by applyOverlays) - Update enrichment to always sync hashes from the finalized sources file so provenance reflects the post-overlay state - Add example JSON output to prepare-sources command description - Update tests and regenerate mocks Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../azldev/cmds/component/preparesources.go | 12 ++++++++ .../app/azldev/core/sources/sourceprep.go | 29 +++++++------------ .../azldev/core/sources/sourceprep_test.go | 8 ++--- internal/providers/rpmprovider/rpmprovider.go | 21 ++++++++------ .../providers/rpmprovider/rpmprovider_test.go | 16 +++++----- .../rpmprovider_test/rpmprovider_mocks.go | 7 +++-- .../sourceproviders/fedorasourceprovider.go | 2 +- .../sourceproviders/identityprovider_test.go | 4 +-- .../sourceproviders/localcontentsprovider.go | 1 - .../providers/sourceproviders/provenance.go | 4 --- .../sourceproviders/rpmcontentsprovider.go | 15 ++++++++-- .../rpmcontentsprovider_test.go | 4 +-- 12 files changed, 69 insertions(+), 54 deletions(-) diff --git a/internal/app/azldev/cmds/component/preparesources.go b/internal/app/azldev/cmds/component/preparesources.go index 7c642bfc..99cca27f 100644 --- a/internal/app/azldev/cmds/component/preparesources.go +++ b/internal/app/azldev/cmds/component/preparesources.go @@ -49,6 +49,18 @@ The command output reports downloaded source provenance entries with filename, origin type, URL, hash type, and hash. Only files actually downloaded during this run are included. +Example JSON output (azldev component prep-sources -p curl -o /tmp/curl --force -O json): + + [ + { + "filename": "curl-8.12.1.tar.xz", + "originType": "lookaside-url", + "url": "https://src.fedoraproject.org/repo/pkgs/rpms/curl/sha512/.../curl-8.12.1.tar.xz", + "hashType": "sha512", + "hash": "a1b2c3..." + } + ] + Only one component may be selected at a time.`, Example: ` # Prepare sources for a component azldev component prep-sources -p curl -o ./build/work/scratch/curl --force diff --git a/internal/app/azldev/core/sources/sourceprep.go b/internal/app/azldev/core/sources/sourceprep.go index f858b707..69933b3d 100644 --- a/internal/app/azldev/core/sources/sourceprep.go +++ b/internal/app/azldev/core/sources/sourceprep.go @@ -273,11 +273,11 @@ func (p *sourcePreparerImpl) PrepareSources( "component", component.GetName()) } - if applyOverlays { - if err := p.enrichProvenanceWithResolvedHashes(allProvenance, outputDir); err != nil { - return nil, fmt.Errorf("failed to resolve provenance hashes for component %#q:\n%w", - component.GetName(), err) - } + // Back-fill missing hash fields in provenance entries from the 'sources' file. + // Entries that already carry hashes (the common case) are skipped. + if err := p.enrichProvenanceWithResolvedHashes(allProvenance, outputDir); err != nil { + return nil, fmt.Errorf("failed to resolve provenance hashes for component %#q:\n%w", + component.GetName(), err) } // Record the changes as synthetic git history when dist-git creation is enabled. @@ -294,8 +294,10 @@ func (p *sourcePreparerImpl) PrepareSources( }, nil } -// enrichProvenanceWithResolvedHashes fills missing hash fields in provenance entries -// using values from the finalized 'sources' file. +// enrichProvenanceWithResolvedHashes updates hash fields in provenance entries +// to match the finalized 'sources' file. This ensures the provenance report +// reflects the actual state of files after overlays have been applied and +// [updateSourcesFile] has computed the current hashes. func (p *sourcePreparerImpl) enrichProvenanceWithResolvedHashes( provenance []sourceproviders.SourceProvenance, outputDir string, @@ -328,22 +330,13 @@ func (p *sourcePreparerImpl) enrichProvenanceWithResolvedHashes( } for provenanceIndex := range provenance { - if provenance[provenanceIndex].Hash != "" && provenance[provenanceIndex].HashType != "" { - continue - } - entry, found := hashByFilename[provenance[provenanceIndex].Filename] if !found { continue } - if provenance[provenanceIndex].HashType == "" { - provenance[provenanceIndex].HashType = entry.HashType - } - - if provenance[provenanceIndex].Hash == "" { - provenance[provenanceIndex].Hash = entry.Hash - } + provenance[provenanceIndex].HashType = entry.HashType + provenance[provenanceIndex].Hash = entry.Hash } return nil diff --git a/internal/app/azldev/core/sources/sourceprep_test.go b/internal/app/azldev/core/sources/sourceprep_test.go index 8238b390..df6bb7ff 100644 --- a/internal/app/azldev/core/sources/sourceprep_test.go +++ b/internal/app/azldev/core/sources/sourceprep_test.go @@ -223,7 +223,7 @@ func TestPrepareSources_ProvenanceReportFillsResolvedHashes(t *testing.T) { assert.Equal(t, testFileSHA512, report.Sources[0].Hash) } -func TestPrepareSources_SkipOverlays_DoesNotEnrichHashes(t *testing.T) { +func TestPrepareSources_SkipOverlays_EnrichesFromUpstreamSources(t *testing.T) { const ( testSpecPath = testOutputDir + "/test-component.spec" testSourcePath = testOutputDir + "/sources" @@ -264,14 +264,14 @@ func TestPrepareSources_SkipOverlays_DoesNotEnrichHashes(t *testing.T) { preparer, err := sources.NewPreparer(sourceManager, ctx.FS(), ctx, ctx, sources.WithAllowNoHashes()) require.NoError(t, err) - // applyOverlays = false — hash enrichment must NOT run to avoid corrupting provenance. + // applyOverlays = false — enrichment still runs and back-fills from the upstream 'sources' file. report, err := preparer.PrepareSources(ctx, component, testOutputDir, false) require.NoError(t, err) require.NotNil(t, report) require.Len(t, report.Sources, 1) - assert.Empty(t, report.Sources[0].HashType, "hash should not be enriched without overlays") - assert.Empty(t, report.Sources[0].Hash, "hash should not be enriched without overlays") + assert.Equal(t, fileutils.HashTypeSHA512, report.Sources[0].HashType, "hash type should be enriched from sources file") + assert.Equal(t, "abc123def456", report.Sources[0].Hash, "hash should be enriched from sources file") } func TestPrepareSources_SourceManagerError(t *testing.T) { diff --git a/internal/providers/rpmprovider/rpmprovider.go b/internal/providers/rpmprovider/rpmprovider.go index 4882fb47..0d1870ba 100644 --- a/internal/providers/rpmprovider/rpmprovider.go +++ b/internal/providers/rpmprovider/rpmprovider.go @@ -17,9 +17,9 @@ import ( ) type RPMProvider interface { - // GetRPM retrieves an RPM file for the given package and version, and returns it in form of a closeable stream. - // The caller is responsible for closing the returned [io.ReadCloser]. - GetRPM(ctx context.Context, name string, version *rpm.Version) (io.ReadCloser, error) + // GetRPM retrieves an RPM file for the given package and version, and returns it in form of a closeable stream + // along with the URL the RPM was downloaded from. The caller is responsible for closing the returned [io.ReadCloser]. + GetRPM(ctx context.Context, name string, version *rpm.Version) (io.ReadCloser, string, error) } // RPMProviderImpl implements [RPMProvider]. @@ -56,10 +56,13 @@ func NewRPMProviderImpl( }, nil } -// GetRPM retrieves an RPM file for the given package and returns it in form of a closeable stream. -func (p *RPMProviderImpl) GetRPM(ctx context.Context, name string, version *rpm.Version) (io.ReadCloser, error) { +// GetRPM retrieves an RPM file for the given package and returns it in form of a closeable stream +// along with the URL the RPM was downloaded from. +func (p *RPMProviderImpl) GetRPM( + ctx context.Context, name string, version *rpm.Version, +) (io.ReadCloser, string, error) { if name == "" { - return nil, errors.New("package name cannot be empty") + return nil, "", errors.New("package name cannot be empty") } eventArgs := []any{"name", name} @@ -74,7 +77,7 @@ func (p *RPMProviderImpl) GetRPM(ctx context.Context, name string, version *rpm. // Get the RPM URL rpmURL, err := p.querier.GetRPMLocation(ctx, name, version) if err != nil { - return nil, fmt.Errorf("failed to get RPM location for package %#q, version %#q:\n%w", name, version, err) + return nil, "", fmt.Errorf("failed to get RPM location for package %#q, version %#q:\n%w", name, version, err) } evt = p.eventListener.StartEvent("Downloading RPM", "name", name, "url", rpmURL) @@ -83,8 +86,8 @@ func (p *RPMProviderImpl) GetRPM(ctx context.Context, name string, version *rpm. fileStream, err := p.downloader.FetchStream(ctx, rpmURL) if err != nil { - return nil, fmt.Errorf("failed to fetch RPM file for package %#q, version %#q:\n%w", name, version, err) + return nil, "", fmt.Errorf("failed to fetch RPM file for package %#q, version %#q:\n%w", name, version, err) } - return fileStream, nil + return fileStream, rpmURL, nil } diff --git a/internal/providers/rpmprovider/rpmprovider_test.go b/internal/providers/rpmprovider/rpmprovider_test.go index 208dd34b..e75bf5af 100644 --- a/internal/providers/rpmprovider/rpmprovider_test.go +++ b/internal/providers/rpmprovider/rpmprovider_test.go @@ -195,9 +195,10 @@ func TestGetRPM(t *testing.T) { Return(packageURL, nil). Times(1) - rpmStream, err := provider.GetRPM(t.Context(), packageName, packageVersion) + rpmStream, rpmURL, err := provider.GetRPM(t.Context(), packageName, packageVersion) require.NoError(t, err) require.NotNil(t, rpmStream) + assert.Equal(t, packageURL, rpmURL) rpmData, err := io.ReadAll(rpmStream) require.NoError(t, err) @@ -213,9 +214,10 @@ func TestGetRPM(t *testing.T) { Return(packageURL, nil). Times(1) - rpmStream, err := provider.GetRPM(t.Context(), packageName, nil) + rpmStream, rpmURL, err := provider.GetRPM(t.Context(), packageName, nil) require.NoError(t, err) require.NotNil(t, rpmStream) + assert.Equal(t, packageURL, rpmURL) rpmData, err := io.ReadAll(rpmStream) require.NoError(t, err) @@ -224,7 +226,7 @@ func TestGetRPM(t *testing.T) { }) t.Run("empty package name should fail", func(t *testing.T) { - _, err := provider.GetRPM(t.Context(), "", nil) + _, _, err := provider.GetRPM(t.Context(), "", nil) assert.Error(t, err) }) @@ -235,7 +237,7 @@ func TestGetRPM(t *testing.T) { Return("", errors.New(errorMessage)). Times(1) - _, err := provider.GetRPM(t.Context(), packageName, nil) + _, _, err := provider.GetRPM(t.Context(), packageName, nil) assert.Contains(t, err.Error(), errorMessage) assert.Contains(t, err.Error(), packageName) }) @@ -261,7 +263,7 @@ func TestGetRPMFailureSimulation(t *testing.T) { Return("", querierError). Times(1) - stream, err := provider.GetRPM(t.Context(), packageName, packageVersion) + stream, _, err := provider.GetRPM(t.Context(), packageName, packageVersion) require.Error(t, err) assert.Nil(t, stream) @@ -289,7 +291,7 @@ func TestGetRPMFailureSimulation(t *testing.T) { mockedDownloadProvider, err := rpmprovider.NewRPMProviderImpl(eventListener, mockDownloader, mockQuerier) require.NoError(t, err) - stream, err := mockedDownloadProvider.GetRPM(t.Context(), packageName, packageVersion) + stream, _, err := mockedDownloadProvider.GetRPM(t.Context(), packageName, packageVersion) require.Error(t, err) assert.Nil(t, stream) @@ -320,7 +322,7 @@ func TestGetRPMFailureSimulation(t *testing.T) { Return(test.url, nil). Times(1) - stream, err := provider.GetRPM(t.Context(), packageName, packageVersion) + stream, _, err := provider.GetRPM(t.Context(), packageName, packageVersion) require.Error(t, err) assert.Nil(t, stream) assert.Contains(t, err.Error(), test.url) diff --git a/internal/providers/rpmprovider/rpmprovider_test/rpmprovider_mocks.go b/internal/providers/rpmprovider/rpmprovider_test/rpmprovider_mocks.go index 7f1a4c1b..fec86621 100644 --- a/internal/providers/rpmprovider/rpmprovider_test/rpmprovider_mocks.go +++ b/internal/providers/rpmprovider/rpmprovider_test/rpmprovider_mocks.go @@ -47,12 +47,13 @@ func (m *MockRPMProvider) EXPECT() *MockRPMProviderMockRecorder { } // GetRPM mocks base method. -func (m *MockRPMProvider) GetRPM(ctx context.Context, name string, version *rpm.Version) (io.ReadCloser, error) { +func (m *MockRPMProvider) GetRPM(ctx context.Context, name string, version *rpm.Version) (io.ReadCloser, string, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "GetRPM", ctx, name, version) ret0, _ := ret[0].(io.ReadCloser) - ret1, _ := ret[1].(error) - return ret0, ret1 + ret1, _ := ret[1].(string) + ret2, _ := ret[2].(error) + return ret0, ret1, ret2 } // GetRPM indicates an expected call of GetRPM. diff --git a/internal/providers/sourceproviders/fedorasourceprovider.go b/internal/providers/sourceproviders/fedorasourceprovider.go index fc59fc93..6e09f733 100644 --- a/internal/providers/sourceproviders/fedorasourceprovider.go +++ b/internal/providers/sourceproviders/fedorasourceprovider.go @@ -85,7 +85,7 @@ func NewFedoraSourcesProviderImpl( func (g *FedoraSourcesProviderImpl) GetComponent( ctx context.Context, component components.Component, destDirPath string, opts ...FetchComponentOption, -) (_ []SourceProvenance, err error) { +) (provenance []SourceProvenance, err error) { resolved := resolveFetchComponentOptions(opts) componentName := component.GetName() diff --git a/internal/providers/sourceproviders/identityprovider_test.go b/internal/providers/sourceproviders/identityprovider_test.go index 2c22103b..384c0506 100644 --- a/internal/providers/sourceproviders/identityprovider_test.go +++ b/internal/providers/sourceproviders/identityprovider_test.go @@ -218,7 +218,7 @@ func TestRPMProvider_ResolveIdentity(t *testing.T) { mockRPMProvider := rpmprovider_test.NewMockRPMProvider(ctrl) mockRPMProvider.EXPECT(). GetRPM(gomock.Any(), "test-pkg", nil). - Return(io.NopCloser(strings.NewReader(rpmContent)), nil) + Return(io.NopCloser(strings.NewReader(rpmContent)), "", nil) provider, provErr := sourceproviders.NewRPMContentsProviderImpl( rpm_test.NewMockRPMExtractor(ctrl), mockRPMProvider) @@ -234,7 +234,7 @@ func TestRPMProvider_ResolveIdentity(t *testing.T) { mockRPMProvider := rpmprovider_test.NewMockRPMProvider(ctrl) mockRPMProvider.EXPECT(). GetRPM(gomock.Any(), "test-pkg", nil). - Return(nil, errors.New("download failed")) + Return(nil, "", errors.New("download failed")) provider, provErr := sourceproviders.NewRPMContentsProviderImpl( rpm_test.NewMockRPMExtractor(ctrl), mockRPMProvider) diff --git a/internal/providers/sourceproviders/localcontentsprovider.go b/internal/providers/sourceproviders/localcontentsprovider.go index 145634e4..e17db5b5 100644 --- a/internal/providers/sourceproviders/localcontentsprovider.go +++ b/internal/providers/sourceproviders/localcontentsprovider.go @@ -95,7 +95,6 @@ func copySourceDirectory(dryRunnable opctx.DryRunnable, fs opctx.FS, sourceDirPa CopyFileOptions: fileutils.CopyFileOptions{ PreserveFileMode: true, }, - FileFilter: fileutils.SkipExistingFiles, } err := fileutils.CopyDirRecursive(dryRunnable, fs, sourceDirPath, destDirPath, copyOptions) diff --git a/internal/providers/sourceproviders/provenance.go b/internal/providers/sourceproviders/provenance.go index c46c8ee1..fe667070 100644 --- a/internal/providers/sourceproviders/provenance.go +++ b/internal/providers/sourceproviders/provenance.go @@ -43,10 +43,6 @@ type SourceProvenance struct { // (returned by lookaside extraction) into [SourceProvenance] entries with the // [SourceOriginLookaside] origin type. func ConvertDownloadsToProvenance(downloads []fedorasource.SourceDownload) []SourceProvenance { - if len(downloads) == 0 { - return nil - } - prov := make([]SourceProvenance, len(downloads)) for i, download := range downloads { prov[i] = SourceProvenance{ diff --git a/internal/providers/sourceproviders/rpmcontentsprovider.go b/internal/providers/sourceproviders/rpmcontentsprovider.go index eeada18c..dd7923c3 100644 --- a/internal/providers/sourceproviders/rpmcontentsprovider.go +++ b/internal/providers/sourceproviders/rpmcontentsprovider.go @@ -60,7 +60,7 @@ func (r *RPMContentsProviderImpl) GetComponent( } // Get the RPM - rpmReader, err := r.rpmProvider.GetRPM(ctx, component.GetName(), nil) + rpmReader, rpmURL, err := r.rpmProvider.GetRPM(ctx, component.GetName(), nil) if err != nil { return nil, fmt.Errorf("failed to get the RPM file for component %#q: %w", component.GetName(), err) @@ -74,7 +74,16 @@ func (r *RPMContentsProviderImpl) GetComponent( component.GetName(), err) } - return nil, nil + var provenance []SourceProvenance + if rpmURL != "" { + provenance = []SourceProvenance{{ + Filename: component.GetName() + ".src.rpm", + OriginType: SourceOriginURL, + URL: rpmURL, + }} + } + + return provenance, nil } // ResolveIdentity implements [SourceIdentityProvider] by downloading the source RPM @@ -88,7 +97,7 @@ func (r *RPMContentsProviderImpl) ResolveIdentity( return "", errors.New("component name cannot be empty") } - rpmReader, err := r.rpmProvider.GetRPM(ctx, component.GetName(), nil) + rpmReader, _, err := r.rpmProvider.GetRPM(ctx, component.GetName(), nil) if err != nil { return "", fmt.Errorf("failed to get RPM for identity of component %#q:\n%w", component.GetName(), err) diff --git a/internal/providers/sourceproviders/rpmcontentsprovider_test.go b/internal/providers/sourceproviders/rpmcontentsprovider_test.go index 5b709a62..59a02cd2 100644 --- a/internal/providers/sourceproviders/rpmcontentsprovider_test.go +++ b/internal/providers/sourceproviders/rpmcontentsprovider_test.go @@ -151,7 +151,7 @@ func TestGetComponentFailureSimulation(t *testing.T) { mockRPMProvider := rpmprovider_test.NewMockRPMProvider(ctrl) mockRPMProvider.EXPECT(). GetRPM(gomock.Any(), packageName, nil). - Return(nil, rpmProviderError). + Return(nil, "", rpmProviderError). Times(1) provider, err := sourceproviders.NewRPMContentsProviderImpl(mockExtractor, mockRPMProvider) @@ -168,7 +168,7 @@ func TestGetComponentFailureSimulation(t *testing.T) { mockRPMProvider := rpmprovider_test.NewMockRPMProvider(ctrl) mockRPMProvider.EXPECT(). GetRPM(gomock.Any(), packageName, nil). - Return(dummyReadCloser, nil). + Return(dummyReadCloser, "", nil). Times(1) // Set up failing extractor call From bc731306755c0b8c5a4328ec87d47a862e29bb66 Mon Sep 17 00:00:00 2001 From: Antonio Salinas Date: Fri, 22 May 2026 22:43:31 +0000 Subject: [PATCH 5/7] fix: address Copilot review comments on provenance PR - Fix version suffix regex in doc generator to strip '0.0.0-devel' (matches versions with or without 'v' prefix) - Regenerate CLI docs without version churn - Derive RPM provenance filename from download URL instead of synthesizing from component name - Fix stale comment on enrichProvenanceWithResolvedHashes to reflect that hashes are always synced from the sources file Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../cli/azldev_component_prepare-sources.md | 12 ++++++++++ .../app/azldev/core/sources/sourceprep.go | 4 ++-- .../sourceproviders/rpmcontentsprovider.go | 22 ++++++++++++++++++- 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/docs/user/reference/cli/azldev_component_prepare-sources.md b/docs/user/reference/cli/azldev_component_prepare-sources.md index 43a46bbe..ab500f7c 100644 --- a/docs/user/reference/cli/azldev_component_prepare-sources.md +++ b/docs/user/reference/cli/azldev_component_prepare-sources.md @@ -17,6 +17,18 @@ The command output reports downloaded source provenance entries with filename, origin type, URL, hash type, and hash. Only files actually downloaded during this run are included. +Example JSON output (azldev component prep-sources -p curl -o /tmp/curl --force -O json): + + [ + { + "filename": "curl-8.12.1.tar.xz", + "originType": "lookaside-url", + "url": "https://src.fedoraproject.org/repo/pkgs/rpms/curl/sha512/.../curl-8.12.1.tar.xz", + "hashType": "sha512", + "hash": "a1b2c3..." + } + ] + Only one component may be selected at a time. ``` diff --git a/internal/app/azldev/core/sources/sourceprep.go b/internal/app/azldev/core/sources/sourceprep.go index 69933b3d..3ddea315 100644 --- a/internal/app/azldev/core/sources/sourceprep.go +++ b/internal/app/azldev/core/sources/sourceprep.go @@ -273,8 +273,8 @@ func (p *sourcePreparerImpl) PrepareSources( "component", component.GetName()) } - // Back-fill missing hash fields in provenance entries from the 'sources' file. - // Entries that already carry hashes (the common case) are skipped. + // Sync provenance hashes with the finalized 'sources' file so the report + // reflects the current on-disk state (including any overlay modifications). if err := p.enrichProvenanceWithResolvedHashes(allProvenance, outputDir); err != nil { return nil, fmt.Errorf("failed to resolve provenance hashes for component %#q:\n%w", component.GetName(), err) diff --git a/internal/providers/sourceproviders/rpmcontentsprovider.go b/internal/providers/sourceproviders/rpmcontentsprovider.go index dd7923c3..a93e7414 100644 --- a/internal/providers/sourceproviders/rpmcontentsprovider.go +++ b/internal/providers/sourceproviders/rpmcontentsprovider.go @@ -10,6 +10,8 @@ import ( "errors" "fmt" "io" + "net/url" + "path" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components" "github.com/microsoft/azure-linux-dev-tools/internal/providers/rpmprovider" @@ -75,9 +77,11 @@ func (r *RPMContentsProviderImpl) GetComponent( } var provenance []SourceProvenance + if rpmURL != "" { + filename := filenameFromURL(rpmURL, component.GetName()+".src.rpm") provenance = []SourceProvenance{{ - Filename: component.GetName() + ".src.rpm", + Filename: filename, OriginType: SourceOriginURL, URL: rpmURL, }} @@ -86,6 +90,22 @@ func (r *RPMContentsProviderImpl) GetComponent( return provenance, nil } +// filenameFromURL extracts the base filename from a URL path. +// Falls back to fallback if the URL cannot be parsed or the path is empty. +func filenameFromURL(rawURL, fallback string) string { + parsed, err := url.Parse(rawURL) + if err != nil { + return fallback + } + + base := path.Base(parsed.Path) + if base == "" || base == "." || base == "/" { + return fallback + } + + return base +} + // ResolveIdentity implements [SourceIdentityProvider] by downloading the source RPM // and computing its SHA256 hash. This is a heavyweight operation since it requires a full // RPM download. From f3b0424edea139abab3d9681551247273a8fbe64 Mon Sep 17 00:00:00 2001 From: Antonio Salinas Date: Wed, 27 May 2026 16:36:48 +0000 Subject: [PATCH 6/7] Fixed formatting mistakes --- internal/providers/rpmprovider/rpmprovider.go | 4 ++-- internal/providers/sourceproviders/rpmcontentsprovider.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/providers/rpmprovider/rpmprovider.go b/internal/providers/rpmprovider/rpmprovider.go index 0d1870ba..3339616f 100644 --- a/internal/providers/rpmprovider/rpmprovider.go +++ b/internal/providers/rpmprovider/rpmprovider.go @@ -77,7 +77,7 @@ func (p *RPMProviderImpl) GetRPM( // Get the RPM URL rpmURL, err := p.querier.GetRPMLocation(ctx, name, version) if err != nil { - return nil, "", fmt.Errorf("failed to get RPM location for package %#q, version %#q:\n%w", name, version, err) + return nil, "", fmt.Errorf("failed to get RPM location for package %#q, version %v:\n%w", name, version, err) } evt = p.eventListener.StartEvent("Downloading RPM", "name", name, "url", rpmURL) @@ -86,7 +86,7 @@ func (p *RPMProviderImpl) GetRPM( fileStream, err := p.downloader.FetchStream(ctx, rpmURL) if err != nil { - return nil, "", fmt.Errorf("failed to fetch RPM file for package %#q, version %#q:\n%w", name, version, err) + return nil, "", fmt.Errorf("failed to fetch RPM file for package %#q, version %v:\n%w", name, version, err) } return fileStream, rpmURL, nil diff --git a/internal/providers/sourceproviders/rpmcontentsprovider.go b/internal/providers/sourceproviders/rpmcontentsprovider.go index a93e7414..4bbf58ae 100644 --- a/internal/providers/sourceproviders/rpmcontentsprovider.go +++ b/internal/providers/sourceproviders/rpmcontentsprovider.go @@ -64,7 +64,7 @@ func (r *RPMContentsProviderImpl) GetComponent( // Get the RPM rpmReader, rpmURL, err := r.rpmProvider.GetRPM(ctx, component.GetName(), nil) if err != nil { - return nil, fmt.Errorf("failed to get the RPM file for component %#q: %w", + return nil, fmt.Errorf("failed to get the RPM file for component %#q:\n%w", component.GetName(), err) } defer defers.HandleDeferError(rpmReader.Close, &err) @@ -72,7 +72,7 @@ func (r *RPMContentsProviderImpl) GetComponent( // Extract the RPM contents err = r.extractor.Extract(rpmReader, destDirPath) if err != nil { - return nil, fmt.Errorf("failed to extract the RPM file of component %#q: %w", + return nil, fmt.Errorf("failed to extract the RPM file of component %#q:\n%w", component.GetName(), err) } From 50243750e3d2a7e30e70e44c0fdee11174fe4622 Mon Sep 17 00:00:00 2001 From: Antonio Salinas Date: Fri, 29 May 2026 23:31:00 +0000 Subject: [PATCH 7/7] Minor fixes --- .../sourceproviders/fedorasource/fedorasource.go | 11 ++++++----- internal/providers/sourceproviders/provenance.go | 4 ++++ .../providers/sourceproviders/rpmcontentsprovider.go | 10 +++++++++- .../sourceproviders/rpmcontentsprovider_test.go | 2 +- internal/providers/sourceproviders/sourcemanager.go | 7 +------ 5 files changed, 21 insertions(+), 13 deletions(-) diff --git a/internal/providers/sourceproviders/fedorasource/fedorasource.go b/internal/providers/sourceproviders/fedorasource/fedorasource.go index b7e5776a..c01edd80 100644 --- a/internal/providers/sourceproviders/fedorasource/fedorasource.go +++ b/internal/providers/sourceproviders/fedorasource/fedorasource.go @@ -254,6 +254,12 @@ func (g *FedoraSourceDownloaderImpl) downloadAndVerifySources( repoDir string, skipSet map[string]bool, ) ([]SourceDownload, error) { + // In dry-run mode the downloader no-ops, so no files would actually be + // fetched. Short-circuit to avoid wasted allocations and iteration. + if g.dryRunnable.DryRun() { + return nil, nil + } + downloads := make([]SourceDownload, 0, len(sourceFiles)) sourcesTotal := len(sourceFiles) @@ -315,11 +321,6 @@ func (g *FedoraSourceDownloaderImpl) downloadAndVerifySources( }) } - // In dry-run mode the downloader no-ops, so no files were actually fetched. - if g.dryRunnable.DryRun() { - return nil, nil - } - return downloads, nil } diff --git a/internal/providers/sourceproviders/provenance.go b/internal/providers/sourceproviders/provenance.go index fe667070..c46c8ee1 100644 --- a/internal/providers/sourceproviders/provenance.go +++ b/internal/providers/sourceproviders/provenance.go @@ -43,6 +43,10 @@ type SourceProvenance struct { // (returned by lookaside extraction) into [SourceProvenance] entries with the // [SourceOriginLookaside] origin type. func ConvertDownloadsToProvenance(downloads []fedorasource.SourceDownload) []SourceProvenance { + if len(downloads) == 0 { + return nil + } + prov := make([]SourceProvenance, len(downloads)) for i, download := range downloads { prov[i] = SourceProvenance{ diff --git a/internal/providers/sourceproviders/rpmcontentsprovider.go b/internal/providers/sourceproviders/rpmcontentsprovider.go index 4bbf58ae..f1289e5b 100644 --- a/internal/providers/sourceproviders/rpmcontentsprovider.go +++ b/internal/providers/sourceproviders/rpmcontentsprovider.go @@ -17,6 +17,7 @@ import ( "github.com/microsoft/azure-linux-dev-tools/internal/providers/rpmprovider" "github.com/microsoft/azure-linux-dev-tools/internal/rpm" "github.com/microsoft/azure-linux-dev-tools/internal/utils/defers" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" ) // RPMContentsProviderImpl implements [ComponentSourceProvider]. It relies on @@ -69,8 +70,13 @@ func (r *RPMContentsProviderImpl) GetComponent( } defer defers.HandleDeferError(rpmReader.Close, &err) + // Hash the SRPM while extracting so provenance includes a checksum + // without requiring a second download. + hasher := sha256.New() + teeReader := io.TeeReader(rpmReader, hasher) + // Extract the RPM contents - err = r.extractor.Extract(rpmReader, destDirPath) + err = r.extractor.Extract(teeReader, destDirPath) if err != nil { return nil, fmt.Errorf("failed to extract the RPM file of component %#q:\n%w", component.GetName(), err) @@ -84,6 +90,8 @@ func (r *RPMContentsProviderImpl) GetComponent( Filename: filename, OriginType: SourceOriginURL, URL: rpmURL, + HashType: fileutils.HashTypeSHA256, + Hash: hex.EncodeToString(hasher.Sum(nil)), }} } diff --git a/internal/providers/sourceproviders/rpmcontentsprovider_test.go b/internal/providers/sourceproviders/rpmcontentsprovider_test.go index 59a02cd2..a6205b6c 100644 --- a/internal/providers/sourceproviders/rpmcontentsprovider_test.go +++ b/internal/providers/sourceproviders/rpmcontentsprovider_test.go @@ -175,7 +175,7 @@ func TestGetComponentFailureSimulation(t *testing.T) { extractorError := errors.New("extractor failed") mockExtractor := rpm_test.NewMockRPMExtractor(ctrl) mockExtractor.EXPECT(). - Extract(dummyReadCloser, testDestinationDir). + Extract(gomock.Any(), testDestinationDir). Return(extractorError). Times(1) diff --git a/internal/providers/sourceproviders/sourcemanager.go b/internal/providers/sourceproviders/sourcemanager.go index b9eb19c1..b036218a 100644 --- a/internal/providers/sourceproviders/sourcemanager.go +++ b/internal/providers/sourceproviders/sourcemanager.go @@ -516,12 +516,7 @@ func (m *sourceManager) FetchComponent( switch sourceType { case projectconfig.SpecSourceTypeLocal, projectconfig.SpecSourceTypeUnspecified: - prov, err := m.fetchLocalComponent(ctx, component, destDirPath, resolved) - if err != nil { - return nil, err - } - - return prov, nil + return m.fetchLocalComponent(ctx, component, destDirPath, resolved) case projectconfig.SpecSourceTypeUpstream: return m.fetchUpstreamComponent(ctx, component, destDirPath, opts...)