From 616961a4a9054ecdbc4a04eb4003cf7908606bf7 Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Mon, 1 Jun 2026 13:07:42 -0700 Subject: [PATCH 1/4] feat: add historic git-backed filesystem When creating synthetic dist-gits, there are certain packages that use overlays which mutate the calculated release value for a package. If the overlays are only applied to the last commit in the dist-git, then if a package changes the 'version' of a package the autorelease tool will see the version change ONLY on that last commit, and conclude the release should be reset back to '1'. If we then add another commit to the dist-git, the version should be '2', but the autorelease tool will still see the version bump happen only on the last commit, and again reset the release to '1'. (see kernel-headers for an example) To avoid this, we must apply the overlays to all commits in the dist-git, as they would have appeared at the time of the commit. azldev already abstracts filesystem access through 'fs', so we can implement a 'gitfs' that loads toml configs from the project's git history. A copy-on-write in-memory filesystem allows for writing if necessary. --- internal/projectconfig/historic.go | 117 +++++++ internal/projectconfig/historic_test.go | 207 ++++++++++++ internal/utils/fileperms/fileperms.go | 6 + internal/utils/gitfs/gitfs.go | 427 ++++++++++++++++++++++++ internal/utils/gitfs/gitfs_test.go | 179 ++++++++++ 5 files changed, 936 insertions(+) create mode 100644 internal/projectconfig/historic.go create mode 100644 internal/projectconfig/historic_test.go create mode 100644 internal/utils/gitfs/gitfs.go create mode 100644 internal/utils/gitfs/gitfs_test.go diff --git a/internal/projectconfig/historic.go b/internal/projectconfig/historic.go new file mode 100644 index 00000000..163dbf9e --- /dev/null +++ b/internal/projectconfig/historic.go @@ -0,0 +1,117 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package projectconfig + +import ( + "fmt" + + gogit "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/gitfs" + "github.com/spf13/afero" +) + +// historicDryRunnable reports that we are not in dry-run mode: the historic +// loader genuinely writes the embedded default configs into its in-memory +// scratch overlay. +type historicDryRunnable struct{} + +func (historicDryRunnable) DryRun() bool { return false } + +// historicOSEnv is a deliberately inert OS environment. Historic config loading +// must depend only on what is in the git tree, never on the host's working +// directory or user-level XDG config. Returning empty values causes the +// user-config lookup to resolve to nothing. +type historicOSEnv struct{} + +func (historicOSEnv) Getwd() (string, error) { return "", nil } +func (historicOSEnv) Chdir(string) error { return nil } +func (historicOSEnv) Getenv(string) string { return "" } +func (historicOSEnv) IsCurrentUserMemberOf(string) (bool, error) { + return false, nil +} +func (historicOSEnv) LookupGroupID(string) (int, error) { return 0, nil } + +// LoadProjectConfigAtCommit loads the project configuration exactly as it +// existed at a specific commit in the project repository, without checking +// anything out to disk. +// +// It reads files through a read-only [gitfs.Fs] backed by the commit's tree, +// layered under an in-memory writable overlay so the loader can stage its +// embedded default configs. Only in-tree configuration participates: host +// working directory and user-level config are intentionally excluded. +// +// referenceDir is interpreted relative to the tree root (e.g. the project +// subdirectory containing azldev.toml). Both absolute ("/sub") and relative +// ("sub") forms are accepted. +func LoadProjectConfigAtCommit( + repo *gogit.Repository, + commitHash plumbing.Hash, + referenceDir string, + permissiveConfigParsing bool, +) (projectDir string, config *ProjectConfig, err error) { + base, err := gitfs.NewFromCommit(repo, commitHash) + if err != nil { + return "", nil, fmt.Errorf("failed to open git filesystem at commit %s:\n%w", commitHash, err) + } + + // Layer a writable in-memory overlay so the loader can stage its embedded + // default configs (and any other scratch writes) without touching the + // read-only git tree underneath. + fs := afero.NewCopyOnWriteFs(base, afero.NewMemMapFs()) + + return LoadProjectConfig( + historicDryRunnable{}, + fs, + historicOSEnv{}, + referenceDir, + false, // disableDefaultConfig: defaults are part of resolved overlays. + "", // tempDirPath: empty lets the loader pick a default temp dir. + nil, // extraConfigFilePaths: none for historic loads. + permissiveConfigParsing, + ) +} + +// ResolveComponentOverlaysAtCommit loads the project config as of the given +// commit and returns the resolved overlays for the named component, combining +// project-level defaults, component-group defaults, and the component's own +// overlays. +// +// Distro-level default overlays are intentionally excluded: resolving them +// requires distro/version selection (which depends on the live invocation, not +// the historic tree), and distro defaults are not used for version-setting +// overlays. This keeps historic resolution self-contained and deterministic. +// +// Returns (nil, nil) when the component is absent at that commit. +func ResolveComponentOverlaysAtCommit( + repo *gogit.Repository, + commitHash plumbing.Hash, + referenceDir string, + componentName string, + permissiveConfigParsing bool, +) ([]ComponentOverlay, error) { + _, config, err := LoadProjectConfigAtCommit(repo, commitHash, referenceDir, permissiveConfigParsing) + if err != nil { + return nil, err + } + + explicit, ok := config.Components[componentName] + if !ok { + return nil, nil + } + + resolved, err := ResolveComponentConfig( + explicit, + config.DefaultComponentConfig, + ComponentConfig{}, // distro defaults excluded; see doc comment. + config.ComponentGroups, + config.GroupsByComponent[componentName], + ) + if err != nil { + return nil, fmt.Errorf("resolving overlays for component %#q at commit %s:\n%w", + componentName, commitHash, err) + } + + return resolved.Overlays, nil +} diff --git a/internal/projectconfig/historic_test.go b/internal/projectconfig/historic_test.go new file mode 100644 index 00000000..02337065 --- /dev/null +++ b/internal/projectconfig/historic_test.go @@ -0,0 +1,207 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package projectconfig_test + +import ( + "testing" + "time" + + "github.com/go-git/go-billy/v5" + "github.com/go-git/go-billy/v5/memfs" + gogit "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/object" + "github.com/go-git/go-git/v5/storage/memory" + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func writeWorktreeFile(t *testing.T, fs billy.Filesystem, content string) { + t.Helper() + + file, err := fs.Create("azldev.toml") + require.NoError(t, err) + + _, err = file.Write([]byte(content)) + require.NoError(t, err) + require.NoError(t, file.Close()) +} + +func commitWorktree(t *testing.T, repo *gogit.Repository, msg string) plumbing.Hash { + t.Helper() + + worktree, err := repo.Worktree() + require.NoError(t, err) + require.NoError(t, worktree.AddGlob(".")) + + hash, err := worktree.Commit(msg, &gogit.CommitOptions{ + Author: &object.Signature{Name: "t", Email: "t@t.com", When: time.Now()}, + }) + require.NoError(t, err) + + return hash +} + +// TestLoadProjectConfigAtCommit verifies that a component's overlays defined in +// azldev.toml are recovered when loading the project config as of a historical +// commit, reading purely from the git tree (no checkout). +func TestLoadProjectConfigAtCommit(t *testing.T) { + bfs := memfs.New() + + repo, err := gogit.Init(memory.NewStorage(), bfs) + require.NoError(t, err) + + writeWorktreeFile(t, bfs, ` +[components.foo] +[[components.foo.overlays]] +type = "spec-search-replace" +regex = "1\\.0\\.0" +replacement = "2.0.0" +`) + + hash := commitWorktree(t, repo, "add foo overlay") + + projectDir, config, err := projectconfig.LoadProjectConfigAtCommit(repo, hash, "/", false) + require.NoError(t, err) + require.NotNil(t, config) + assert.Equal(t, "/", projectDir) + + comp, ok := config.Components["foo"] + require.True(t, ok, "component foo should be present") + require.Len(t, comp.Overlays, 1) + assert.Equal(t, projectconfig.ComponentOverlaySearchAndReplaceInSpec, comp.Overlays[0].Type) + assert.Equal(t, "2.0.0", comp.Overlays[0].Replacement) +} + +// TestResolveComponentOverlaysAtCommit verifies that overlays inherited from a +// component group default are merged with the component's own overlays when +// resolving historically. +func TestResolveComponentOverlaysAtCommit(t *testing.T) { + bfs := memfs.New() + + repo, err := gogit.Init(memory.NewStorage(), bfs) + require.NoError(t, err) + + writeWorktreeFile(t, bfs, ` +[component-groups.shared] +components = ["foo"] +[[component-groups.shared.default-component-config.overlays]] +type = "spec-search-replace" +regex = "from-group" +replacement = "group-applied" + +[components.foo] +[[components.foo.overlays]] +type = "spec-search-replace" +regex = "from-comp" +replacement = "comp-applied" +`) + + hash := commitWorktree(t, repo, "add group + component overlays") + + overlays, err := projectconfig.ResolveComponentOverlaysAtCommit(repo, hash, "/", "foo", false) + require.NoError(t, err) + require.Len(t, overlays, 2) + + replacements := []string{overlays[0].Replacement, overlays[1].Replacement} + assert.Contains(t, replacements, "group-applied") + assert.Contains(t, replacements, "comp-applied") +} + +// TestResolveComponentOverlaysAtCommit_MissingComponent verifies that a request +// for a component absent at the commit returns nil overlays without error. +func TestResolveComponentOverlaysAtCommit_MissingComponent(t *testing.T) { + bfs := memfs.New() + + repo, err := gogit.Init(memory.NewStorage(), bfs) + require.NoError(t, err) + + writeWorktreeFile(t, bfs, "[components.foo]\n") + + hash := commitWorktree(t, repo, "add foo") + + overlays, err := projectconfig.ResolveComponentOverlaysAtCommit(repo, hash, "/", "absent", false) + require.NoError(t, err) + assert.Nil(t, overlays) +} + +// TestResolveComponentOverlaysAtCommit_TracksHistory verifies that resolving +// overlays at an OLDER commit returns the overlay value as it existed at THAT +// commit — not the latest value. This is the core guarantee historical overlay +// replay relies on: each synthetic commit must see the version it actually +// carried at that point in history. If resolution leaked HEAD's config, every +// historic entry would show the current version. +func TestResolveComponentOverlaysAtCommit_TracksHistory(t *testing.T) { + bfs := memfs.New() + + repo, err := gogit.Init(memory.NewStorage(), bfs) + require.NoError(t, err) + + // Commit A: overlay replacement is 2.0.0. + writeWorktreeFile(t, bfs, ` +[components.foo] +[[components.foo.overlays]] +type = "spec-search-replace" +regex = "VERSION" +replacement = "2.0.0" +`) + hashA := commitWorktree(t, repo, "foo -> 2.0.0") + + // Commit B: same overlay, replacement bumped to 3.0.0. + writeWorktreeFile(t, bfs, ` +[components.foo] +[[components.foo.overlays]] +type = "spec-search-replace" +regex = "VERSION" +replacement = "3.0.0" +`) + hashB := commitWorktree(t, repo, "foo -> 3.0.0") + + overlaysA, err := projectconfig.ResolveComponentOverlaysAtCommit(repo, hashA, "/", "foo", false) + require.NoError(t, err) + require.Len(t, overlaysA, 1) + assert.Equal(t, "2.0.0", overlaysA[0].Replacement, "commit A must resolve its own (older) overlay value") + + overlaysB, err := projectconfig.ResolveComponentOverlaysAtCommit(repo, hashB, "/", "foo", false) + require.NoError(t, err) + require.Len(t, overlaysB, 1) + assert.Equal(t, "3.0.0", overlaysB[0].Replacement, "commit B must resolve its own (newer) overlay value") +} + +// TestResolveComponentOverlaysAtCommit_PermissiveToleratesUndefinedRef verifies +// that with permissive parsing enabled, a config whose component group references +// an undefined component still loads, so the target component's overlays can be +// recovered. Historical commits may legitimately reference components that were +// only defined in a later revision; a strict load would fail the entire resolve +// and mis-attribute the version for that commit. +func TestResolveComponentOverlaysAtCommit_PermissiveToleratesUndefinedRef(t *testing.T) { + bfs := memfs.New() + + repo, err := gogit.Init(memory.NewStorage(), bfs) + require.NoError(t, err) + + // "shared" group references "not-yet-defined", which has no [components] entry. + writeWorktreeFile(t, bfs, ` +[component-groups.shared] +components = ["foo", "not-yet-defined"] + +[components.foo] +[[components.foo.overlays]] +type = "spec-search-replace" +regex = "VERSION" +replacement = "2.0.0" +`) + hash := commitWorktree(t, repo, "foo defined, dangling group ref") + + // Strict load fails on the undefined component reference. + _, err = projectconfig.ResolveComponentOverlaysAtCommit(repo, hash, "/", "foo", false) + require.Error(t, err) + + // Permissive load tolerates it and still returns foo's overlays. + overlays, err := projectconfig.ResolveComponentOverlaysAtCommit(repo, hash, "/", "foo", true) + require.NoError(t, err) + require.Len(t, overlays, 1) + assert.Equal(t, "2.0.0", overlays[0].Replacement) +} diff --git a/internal/utils/fileperms/fileperms.go b/internal/utils/fileperms/fileperms.go index 38c429a2..35e0660f 100644 --- a/internal/utils/fileperms/fileperms.go +++ b/internal/utils/fileperms/fileperms.go @@ -25,4 +25,10 @@ const ( PrivateDir os.FileMode = 0o700 // Directory permissions: user read/write/execute; group read/execute. PublicDir os.FileMode = 0o755 + + // Read-only permissions (no write bits), for immutable or synthetic views. + // ReadOnlyFile: all read. ReadOnlyExec: all read/execute (directories, + // executables, symlinks). + ReadOnlyFile os.FileMode = 0o444 + ReadOnlyExec os.FileMode = 0o555 ) diff --git a/internal/utils/gitfs/gitfs.go b/internal/utils/gitfs/gitfs.go new file mode 100644 index 00000000..7c52aefc --- /dev/null +++ b/internal/utils/gitfs/gitfs.go @@ -0,0 +1,427 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +// Package gitfs provides a read-only [afero.Fs] backed by a git tree at a +// specific commit. It lets code that already speaks [afero.Fs] (such as the +// project-config loader) read files as they existed at an arbitrary point in +// history, without checking anything out to disk. +// +// Paths are interpreted relative to the root of the git tree. Both absolute +// paths (e.g. "/base/comps/x.toml") and io/fs-style relative paths (e.g. +// "base/comps/x.toml") are accepted and normalized identically: the leading +// slash is stripped and the path is cleaned. This mirrors how an +// [afero.OsFs] rooted at the tree root would behave, which is what callers +// like the config loader (which pass absolute paths) and the doublestar glob +// adapter (which passes relative paths) expect. +// +// All mutating operations return an error: the filesystem is strictly +// read-only. To support callers that need to write scratch files (e.g. the +// loader copying embedded default configs to a temp dir), layer a writable +// filesystem on top with [afero.NewCopyOnWriteFs]. +package gitfs + +import ( + "bytes" + "errors" + "fmt" + "io" + "os" + "path" + "path/filepath" + "sort" + "time" + + gogit "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/filemode" + "github.com/go-git/go-git/v5/plumbing/object" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" + "github.com/spf13/afero" +) + +// errReadOnly is returned by all mutating operations. +var errReadOnly = errors.New("gitfs: read-only filesystem") + +// Fs is a read-only [afero.Fs] backed by a git tree. +type Fs struct { + repo *gogit.Repository + tree *object.Tree +} + +// Compile-time assurance that Fs implements afero.Fs. +var _ afero.Fs = (*Fs)(nil) + +// NewFromCommit creates a read-only filesystem exposing the tree of the given +// commit. +func NewFromCommit(repo *gogit.Repository, commitHash plumbing.Hash) (*Fs, error) { + commit, err := repo.CommitObject(commitHash) + if err != nil { + return nil, fmt.Errorf("gitfs: resolve commit %s:\n%w", commitHash, err) + } + + tree, err := commit.Tree() + if err != nil { + return nil, fmt.Errorf("gitfs: read tree for commit %s:\n%w", commitHash, err) + } + + return &Fs{repo: repo, tree: tree}, nil +} + +// normalize converts an incoming afero path (absolute or relative) to a +// clean, slash-separated, tree-relative path. The tree root is "". +func normalize(name string) string { + cleaned := path.Clean("/" + filepath.ToSlash(name)) + + // Strip the leading slash; the root becomes "". + return cleaned[1:] +} + +// notExist builds a PathError that reports as "does not exist" so that helpers +// like afero.Exists and os.IsNotExist behave correctly. +func notExist(op, name string) error { + return &os.PathError{Op: op, Path: name, Err: os.ErrNotExist} +} + +// Open opens the named file or directory for reading. +func (f *Fs) Open(name string) (afero.File, error) { + rel := normalize(name) + + // Root of the tree is always a directory. + if rel == "" { + return newDirFile(f, name, "", f.tree), nil + } + + entry, err := f.tree.FindEntry(rel) + if err != nil { + return nil, notExist("open", name) + } + + if entry.Mode == filemode.Dir { + subtree, subErr := f.tree.Tree(rel) + if subErr != nil { + return nil, notExist("open", name) + } + + return newDirFile(f, name, rel, subtree), nil + } + + content, err := f.blobContents(entry.Hash) + if err != nil { + return nil, &os.PathError{Op: "open", Path: name, Err: err} + } + + return newRegularFile(name, entry, content), nil +} + +// OpenFile opens the named file for reading. Any flag requesting write access +// is rejected. +func (f *Fs) OpenFile(name string, flag int, _ os.FileMode) (afero.File, error) { + if flag&(os.O_WRONLY|os.O_RDWR|os.O_CREATE|os.O_APPEND|os.O_TRUNC) != 0 { + return nil, &os.PathError{Op: "open", Path: name, Err: errReadOnly} + } + + return f.Open(name) +} + +// Stat returns file info for the named file or directory. +func (f *Fs) Stat(name string) (os.FileInfo, error) { + rel := normalize(name) + + if rel == "" { + return &fileInfo{name: ".", isDir: true, mode: os.ModeDir | fileperms.ReadOnlyExec}, nil + } + + entry, err := f.tree.FindEntry(rel) + if err != nil { + return nil, notExist("stat", name) + } + + return f.entryInfo(path.Base(rel), entry) +} + +// Name identifies this filesystem implementation. +func (f *Fs) Name() string { return "gitfs" } + +// blobContents reads the full contents of a blob. +func (f *Fs) blobContents(hash plumbing.Hash) ([]byte, error) { + blob, err := f.repo.BlobObject(hash) + if err != nil { + return nil, fmt.Errorf("read blob %s:\n%w", hash, err) + } + + reader, err := blob.Reader() + if err != nil { + return nil, fmt.Errorf("open blob %s:\n%w", hash, err) + } + + defer reader.Close() + + content, err := io.ReadAll(reader) + if err != nil { + return nil, fmt.Errorf("read blob %s:\n%w", hash, err) + } + + return content, nil +} + +// entryInfo builds a FileInfo for a tree entry, fetching the blob size for +// regular files. +func (f *Fs) entryInfo(name string, entry *object.TreeEntry) (os.FileInfo, error) { + if entry.Mode == filemode.Dir { + return &fileInfo{name: name, isDir: true, mode: os.ModeDir | fileperms.ReadOnlyExec}, nil + } + + var size int64 + + if blob, err := f.repo.BlobObject(entry.Hash); err == nil { + size = blob.Size + } + + return &fileInfo{name: name, size: size, mode: entryFileMode(entry.Mode)}, nil +} + +// entryFileMode maps a git filemode to an os.FileMode for non-directory entries. +func entryFileMode(mode filemode.FileMode) os.FileMode { + switch mode { + case filemode.Executable: + return fileperms.ReadOnlyExec + case filemode.Symlink: + return os.ModeSymlink | fileperms.ReadOnlyExec + case filemode.Empty, filemode.Dir, filemode.Regular, filemode.Deprecated, filemode.Submodule: + return fileperms.ReadOnlyFile + default: + return fileperms.ReadOnlyFile + } +} + +// +// Mutating operations — all unsupported. +// + +func (f *Fs) Create(name string) (afero.File, error) { + return nil, &os.PathError{Op: "create", Path: name, Err: errReadOnly} +} + +func (f *Fs) Mkdir(name string, _ os.FileMode) error { + return &os.PathError{Op: "mkdir", Path: name, Err: errReadOnly} +} + +func (f *Fs) MkdirAll(path string, _ os.FileMode) error { + return &os.PathError{Op: "mkdir", Path: path, Err: errReadOnly} +} + +func (f *Fs) Remove(name string) error { + return &os.PathError{Op: "remove", Path: name, Err: errReadOnly} +} + +func (f *Fs) RemoveAll(path string) error { + return &os.PathError{Op: "removeall", Path: path, Err: errReadOnly} +} + +func (f *Fs) Rename(oldname, _ string) error { + return &os.PathError{Op: "rename", Path: oldname, Err: errReadOnly} +} + +func (f *Fs) Chmod(name string, _ os.FileMode) error { + return &os.PathError{Op: "chmod", Path: name, Err: errReadOnly} +} + +func (f *Fs) Chown(name string, _, _ int) error { + return &os.PathError{Op: "chown", Path: name, Err: errReadOnly} +} + +func (f *Fs) Chtimes(name string, _, _ time.Time) error { + return &os.PathError{Op: "chtimes", Path: name, Err: errReadOnly} +} + +// +// fileInfo +// + +type fileInfo struct { + name string + size int64 + mode os.FileMode + isDir bool +} + +func (i *fileInfo) Name() string { return i.name } +func (i *fileInfo) Size() int64 { return i.size } +func (i *fileInfo) Mode() os.FileMode { return i.mode } +func (i *fileInfo) ModTime() time.Time { return time.Time{} } +func (i *fileInfo) IsDir() bool { return i.isDir } +func (i *fileInfo) Sys() any { return nil } + +// +// regularFile — a read-only view over blob contents. +// + +type regularFile struct { + name string + info os.FileInfo + reader *bytes.Reader +} + +var _ afero.File = (*regularFile)(nil) + +func newRegularFile(name string, entry *object.TreeEntry, content []byte) *regularFile { + return ®ularFile{ + name: name, + info: &fileInfo{ + name: path.Base(normalize(name)), + size: int64(len(content)), + mode: entryFileMode(entry.Mode), + }, + reader: bytes.NewReader(content), + } +} + +func (f *regularFile) Close() error { return nil } +func (f *regularFile) Name() string { return f.name } +func (f *regularFile) Stat() (os.FileInfo, error) { return f.info, nil } +func (f *regularFile) Sync() error { return nil } + +func (f *regularFile) Read(p []byte) (int, error) { + n, err := f.reader.Read(p) + + return n, err //nolint:wrapcheck // pass through bytes.Reader io semantics (incl. io.EOF) unchanged +} + +func (f *regularFile) ReadAt(p []byte, off int64) (int, error) { + n, err := f.reader.ReadAt(p, off) + + return n, err //nolint:wrapcheck // pass through bytes.Reader io semantics unchanged +} + +func (f *regularFile) Seek(off int64, whence int) (int64, error) { + pos, err := f.reader.Seek(off, whence) + + return pos, err //nolint:wrapcheck // pass through bytes.Reader io semantics unchanged +} + +func (f *regularFile) Readdir(int) ([]os.FileInfo, error) { + return nil, &os.PathError{Op: "readdir", Path: f.name, Err: errors.New("not a directory")} +} + +func (f *regularFile) Readdirnames(int) ([]string, error) { + return nil, &os.PathError{Op: "readdir", Path: f.name, Err: errors.New("not a directory")} +} + +func (f *regularFile) Write([]byte) (int, error) { + return 0, &os.PathError{Op: "write", Path: f.name, Err: errReadOnly} +} + +func (f *regularFile) WriteAt([]byte, int64) (int, error) { + return 0, &os.PathError{Op: "write", Path: f.name, Err: errReadOnly} +} + +func (f *regularFile) WriteString(string) (int, error) { + return 0, &os.PathError{Op: "write", Path: f.name, Err: errReadOnly} +} + +func (f *regularFile) Truncate(int64) error { + return &os.PathError{Op: "truncate", Path: f.name, Err: errReadOnly} +} + +// +// dirFile — a read-only view over a tree's immediate entries. +// + +type dirFile struct { + fs *Fs + name string + relPath string + tree *object.Tree + offset int +} + +var _ afero.File = (*dirFile)(nil) + +func newDirFile(fs *Fs, name, relPath string, tree *object.Tree) *dirFile { + return &dirFile{fs: fs, name: name, relPath: relPath, tree: tree} +} + +func (d *dirFile) Close() error { return nil } +func (d *dirFile) Read([]byte) (int, error) { return 0, io.EOF } +func (d *dirFile) ReadAt([]byte, int64) (int, error) { return 0, io.EOF } +func (d *dirFile) Seek(int64, int) (int64, error) { return 0, nil } +func (d *dirFile) Name() string { return d.name } +func (d *dirFile) Sync() error { return nil } + +func (d *dirFile) Stat() (os.FileInfo, error) { + base := "." + if d.relPath != "" { + base = path.Base(d.relPath) + } + + return &fileInfo{name: base, isDir: true, mode: os.ModeDir | fileperms.ReadOnlyExec}, nil +} + +// Readdir returns the immediate children of the directory, sorted by name. +// It honors the offset/count semantics of os.File.Readdir. +func (d *dirFile) Readdir(count int) ([]os.FileInfo, error) { + entries := d.tree.Entries + + infos := make([]os.FileInfo, 0, len(entries)) + + for i := range entries { + entry := entries[i] + + info, err := d.fs.entryInfo(entry.Name, &entry) + if err != nil { + return nil, err + } + + infos = append(infos, info) + } + + sort.Slice(infos, func(a, b int) bool { return infos[a].Name() < infos[b].Name() }) + + if d.offset >= len(infos) { + if count > 0 { + return nil, io.EOF + } + + return []os.FileInfo{}, nil + } + + infos = infos[d.offset:] + + if count > 0 && count < len(infos) { + infos = infos[:count] + } + + d.offset += len(infos) + + return infos, nil +} + +func (d *dirFile) Readdirnames(n int) ([]string, error) { + infos, err := d.Readdir(n) + if err != nil { + return nil, err + } + + names := make([]string, len(infos)) + for i, info := range infos { + names[i] = info.Name() + } + + return names, nil +} + +func (d *dirFile) Write([]byte) (int, error) { + return 0, &os.PathError{Op: "write", Path: d.name, Err: errReadOnly} +} + +func (d *dirFile) WriteAt([]byte, int64) (int, error) { + return 0, &os.PathError{Op: "write", Path: d.name, Err: errReadOnly} +} + +func (d *dirFile) WriteString(string) (int, error) { + return 0, &os.PathError{Op: "write", Path: d.name, Err: errReadOnly} +} + +func (d *dirFile) Truncate(int64) error { + return &os.PathError{Op: "truncate", Path: d.name, Err: errReadOnly} +} diff --git a/internal/utils/gitfs/gitfs_test.go b/internal/utils/gitfs/gitfs_test.go new file mode 100644 index 00000000..741038ad --- /dev/null +++ b/internal/utils/gitfs/gitfs_test.go @@ -0,0 +1,179 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package gitfs_test + +import ( + "io" + "os" + "testing" + "time" + + "github.com/bmatcuk/doublestar/v4" + "github.com/go-git/go-billy/v5" + "github.com/go-git/go-billy/v5/memfs" + gogit "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/object" + "github.com/go-git/go-git/v5/storage/memory" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/gitfs" + "github.com/spf13/afero" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// writeFile creates/overwrites a file in the in-memory worktree. +func writeFile(t *testing.T, fs billy.Filesystem, relPath, content string) { + t.Helper() + + file, err := fs.Create(relPath) + require.NoError(t, err) + + _, err = file.Write([]byte(content)) + require.NoError(t, err) + require.NoError(t, file.Close()) +} + +// commitAll stages everything and commits, returning the commit hash. +func commitAll(t *testing.T, repo *gogit.Repository, msg string) plumbing.Hash { + t.Helper() + + worktree, err := repo.Worktree() + require.NoError(t, err) + + require.NoError(t, worktree.AddGlob(".")) + + hash, err := worktree.Commit(msg, &gogit.CommitOptions{ + Author: &object.Signature{Name: "t", Email: "t@t.com", When: time.Now()}, + }) + require.NoError(t, err) + + return hash +} + +// newTestRepo builds an in-memory repo with a tiny project tree and returns the +// repo plus the single commit's hash. +func newTestRepo(t *testing.T) (*gogit.Repository, plumbing.Hash) { + t.Helper() + + bfs := memfs.New() + + repo, err := gogit.Init(memory.NewStorage(), bfs) + require.NoError(t, err) + + writeFile(t, bfs, "azldev.toml", "includes = [\"comps/**/*.toml\"]\n") + writeFile(t, bfs, "comps/foo.toml", "name = \"foo\"\n") + writeFile(t, bfs, "comps/sub/bar.toml", "name = \"bar\"\n") + + return repo, commitAll(t, repo, "init") +} + +func TestOpenAndReadFile(t *testing.T) { + repo, hash := newTestRepo(t) + + fs, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + for _, name := range []string{"comps/foo.toml", "/comps/foo.toml", "./comps/foo.toml"} { + file, openErr := fs.Open(name) + require.NoError(t, openErr, "open %q", name) + + content, readErr := io.ReadAll(file) + require.NoError(t, readErr) + require.NoError(t, file.Close()) + + assert.Equal(t, "name = \"foo\"\n", string(content), "content via %q", name) + } +} + +func TestStat(t *testing.T) { + repo, hash := newTestRepo(t) + + gitFS, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + fileInfo, err := gitFS.Stat("/comps/foo.toml") + require.NoError(t, err) + assert.False(t, fileInfo.IsDir()) + assert.Equal(t, int64(len("name = \"foo\"\n")), fileInfo.Size()) + + dirInfo, err := gitFS.Stat("/comps") + require.NoError(t, err) + assert.True(t, dirInfo.IsDir()) + + rootInfo, err := gitFS.Stat("/") + require.NoError(t, err) + assert.True(t, rootInfo.IsDir()) +} + +func TestStatMissing(t *testing.T) { + repo, hash := newTestRepo(t) + + gitFS, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + _, err = gitFS.Stat("/nope.toml") + assert.True(t, os.IsNotExist(err), "expected not-exist, got %v", err) + + exists, err := afero.Exists(gitFS, "/nope.toml") + require.NoError(t, err) + assert.False(t, exists) + + exists, err = afero.Exists(gitFS, "/comps/foo.toml") + require.NoError(t, err) + assert.True(t, exists) +} + +func TestReaddir(t *testing.T) { + repo, hash := newTestRepo(t) + + fs, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + dir, err := fs.Open("/comps") + require.NoError(t, err) + + infos, err := dir.Readdir(-1) + require.NoError(t, err) + require.NoError(t, dir.Close()) + + names := make([]string, len(infos)) + for i, info := range infos { + names[i] = info.Name() + } + + assert.ElementsMatch(t, []string{"foo.toml", "sub"}, names) +} + +// TestGlobThroughDoublestar is the load-bearing test: it proves the config +// loader's include-resolution path (fileutils.Glob → afero.IOFS → doublestar) +// works against the git-backed filesystem with an absolute pattern, including +// the writable CopyOnWriteFs overlay the loader needs for scratch writes. +func TestGlobThroughDoublestar(t *testing.T) { + repo, hash := newTestRepo(t) + + base, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + fs := afero.NewCopyOnWriteFs(base, afero.NewMemMapFs()) + + matches, err := fileutils.Glob(fs, "/comps/**/*.toml", + doublestar.WithFailOnIOErrors(), doublestar.WithFilesOnly()) + require.NoError(t, err) + + assert.ElementsMatch(t, []string{"/comps/foo.toml", "/comps/sub/bar.toml"}, matches) +} + +func TestReadOnly(t *testing.T) { + repo, hash := newTestRepo(t) + + gitFS, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + _, err = gitFS.Create("/x") + require.Error(t, err) + + require.Error(t, gitFS.Mkdir("/d", 0o755)) + require.Error(t, gitFS.Remove("/comps/foo.toml")) +} From aeca60fb85d9c056e554ea282f0030fe8a4358a4 Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Mon, 1 Jun 2026 15:30:03 -0700 Subject: [PATCH 2/4] fixup! feat: add historic git-backed filesystem --- internal/projectconfig/historic.go | 14 +++- internal/projectconfig/historic_test.go | 32 +++++++++ internal/utils/gitfs/gitfs.go | 27 ++++++-- internal/utils/gitfs/gitfs_test.go | 92 +++++++++++++++++++++++++ 4 files changed, 158 insertions(+), 7 deletions(-) diff --git a/internal/projectconfig/historic.go b/internal/projectconfig/historic.go index 163dbf9e..681c464e 100644 --- a/internal/projectconfig/historic.go +++ b/internal/projectconfig/historic.go @@ -5,6 +5,7 @@ package projectconfig import ( "fmt" + "path" gogit "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" @@ -39,8 +40,11 @@ func (historicOSEnv) LookupGroupID(string) (int, error) { return 0, nil } // // It reads files through a read-only [gitfs.Fs] backed by the commit's tree, // layered under an in-memory writable overlay so the loader can stage its -// embedded default configs. Only in-tree configuration participates: host -// working directory and user-level config are intentionally excluded. +// embedded default configs. The resolved configuration therefore combines the +// commit's in-tree config with azldev's built-in embedded defaults; the latter +// are part of every load and are not drawn from the git tree. Host working +// directory and user-level config are intentionally excluded, so the only +// per-invocation input is the embedded defaults baked into the binary. // // referenceDir is interpreted relative to the tree root (e.g. the project // subdirectory containing azldev.toml). Both absolute ("/sub") and relative @@ -61,6 +65,12 @@ func LoadProjectConfigAtCommit( // read-only git tree underneath. fs := afero.NewCopyOnWriteFs(base, afero.NewMemMapFs()) + // Interpret referenceDir relative to the git tree root, never the host + // process working directory. path.Join against "/" makes relative forms + // ("sub", "./sub") and absolute forms ("/sub") resolve identically; an + // empty referenceDir collapses to the tree root "/". + referenceDir = path.Join("/", referenceDir) + return LoadProjectConfig( historicDryRunnable{}, fs, diff --git a/internal/projectconfig/historic_test.go b/internal/projectconfig/historic_test.go index 02337065..798d6fdf 100644 --- a/internal/projectconfig/historic_test.go +++ b/internal/projectconfig/historic_test.go @@ -75,6 +75,38 @@ replacement = "2.0.0" assert.Equal(t, "2.0.0", comp.Overlays[0].Replacement) } +// TestLoadProjectConfigAtCommit_ReferenceDirIsTreeRelative verifies that a +// referenceDir naming a project subdirectory is interpreted relative to the git +// tree root, not the host process working directory. Both relative ("sub") and +// absolute ("/sub") forms must resolve to the same in-tree location. Without +// tree-relative normalization, a relative referenceDir resolves against the +// host CWD and the config file is never found in the git tree. +func TestLoadProjectConfigAtCommit_ReferenceDirIsTreeRelative(t *testing.T) { + bfs := memfs.New() + + repo, err := gogit.Init(memory.NewStorage(), bfs) + require.NoError(t, err) + + file, err := bfs.Create("sub/azldev.toml") + require.NoError(t, err) + + _, err = file.Write([]byte("[components.foo]\n")) + require.NoError(t, err) + require.NoError(t, file.Close()) + + hash := commitWorktree(t, repo, "add config under sub/") + + for _, referenceDir := range []string{"sub", "/sub", "./sub"} { + t.Run(referenceDir, func(t *testing.T) { + projectDir, config, err := projectconfig.LoadProjectConfigAtCommit(repo, hash, referenceDir, false) + require.NoError(t, err) + require.NotNil(t, config) + assert.Equal(t, "/sub", projectDir) + assert.Contains(t, config.Components, "foo") + }) + } +} + // TestResolveComponentOverlaysAtCommit verifies that overlays inherited from a // component group default are merged with the component's own overlays when // resolving historically. diff --git a/internal/utils/gitfs/gitfs.go b/internal/utils/gitfs/gitfs.go index 7c52aefc..6a2bc371 100644 --- a/internal/utils/gitfs/gitfs.go +++ b/internal/utils/gitfs/gitfs.go @@ -42,6 +42,11 @@ import ( // errReadOnly is returned by all mutating operations. var errReadOnly = errors.New("gitfs: read-only filesystem") +// ErrSubmodule is returned when a caller tries to read a submodule (gitlink) +// entry. A gitlink records a commit hash in another repository, not file +// content in this tree, so there is nothing to read through the filesystem. +var ErrSubmodule = errors.New("gitfs: submodule entries are not supported") + // Fs is a read-only [afero.Fs] backed by a git tree. type Fs struct { repo *gogit.Repository @@ -105,6 +110,10 @@ func (f *Fs) Open(name string) (afero.File, error) { return newDirFile(f, name, rel, subtree), nil } + if entry.Mode == filemode.Submodule { + return nil, &os.PathError{Op: "open", Path: name, Err: ErrSubmodule} + } + content, err := f.blobContents(entry.Hash) if err != nil { return nil, &os.PathError{Op: "open", Path: name, Err: err} @@ -171,13 +180,19 @@ func (f *Fs) entryInfo(name string, entry *object.TreeEntry) (os.FileInfo, error return &fileInfo{name: name, isDir: true, mode: os.ModeDir | fileperms.ReadOnlyExec}, nil } - var size int64 + // Submodule (gitlink) entries reference a commit in another repository, not + // a blob in this tree. Classify them as non-regular without a blob lookup + // so directory listings work and Open's submodule error stays authoritative. + if entry.Mode == filemode.Submodule { + return &fileInfo{name: name, mode: entryFileMode(entry.Mode)}, nil + } - if blob, err := f.repo.BlobObject(entry.Hash); err == nil { - size = blob.Size + blob, err := f.repo.BlobObject(entry.Hash) + if err != nil { + return nil, fmt.Errorf("gitfs: stat %#q: read blob %s:\n%w", name, entry.Hash, err) } - return &fileInfo{name: name, size: size, mode: entryFileMode(entry.Mode)}, nil + return &fileInfo{name: name, size: blob.Size, mode: entryFileMode(entry.Mode)}, nil } // entryFileMode maps a git filemode to an os.FileMode for non-directory entries. @@ -187,7 +202,9 @@ func entryFileMode(mode filemode.FileMode) os.FileMode { return fileperms.ReadOnlyExec case filemode.Symlink: return os.ModeSymlink | fileperms.ReadOnlyExec - case filemode.Empty, filemode.Dir, filemode.Regular, filemode.Deprecated, filemode.Submodule: + case filemode.Submodule: + return os.ModeIrregular | fileperms.ReadOnlyFile + case filemode.Empty, filemode.Dir, filemode.Regular, filemode.Deprecated: return fileperms.ReadOnlyFile default: return fileperms.ReadOnlyFile diff --git a/internal/utils/gitfs/gitfs_test.go b/internal/utils/gitfs/gitfs_test.go index 741038ad..d5cd91e2 100644 --- a/internal/utils/gitfs/gitfs_test.go +++ b/internal/utils/gitfs/gitfs_test.go @@ -14,6 +14,7 @@ import ( "github.com/go-git/go-billy/v5/memfs" gogit "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/filemode" "github.com/go-git/go-git/v5/plumbing/object" "github.com/go-git/go-git/v5/storage/memory" "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" @@ -177,3 +178,94 @@ func TestReadOnly(t *testing.T) { require.Error(t, gitFS.Mkdir("/d", 0o755)) require.Error(t, gitFS.Remove("/comps/foo.toml")) } + +// storeBlob writes a blob object directly to the repo store and returns its hash. +func storeBlob(t *testing.T, repo *gogit.Repository, content string) plumbing.Hash { + t.Helper() + + obj := repo.Storer.NewEncodedObject() + obj.SetType(plumbing.BlobObject) + + w, err := obj.Writer() + require.NoError(t, err) + + _, err = w.Write([]byte(content)) + require.NoError(t, err) + require.NoError(t, w.Close()) + + hash, err := repo.Storer.SetEncodedObject(obj) + require.NoError(t, err) + + return hash +} + +// newRepoWithSubmodule builds a repo whose root tree contains a gitlink +// (submodule) entry alongside a regular file, and returns the commit hash. The +// submodule entry's hash points at a commit that does not exist as a blob in +// this repo, mirroring a real gitlink. +func newRepoWithSubmodule(t *testing.T) (*gogit.Repository, plumbing.Hash) { + t.Helper() + + repo, err := gogit.Init(memory.NewStorage(), memfs.New()) + require.NoError(t, err) + + blobHash := storeBlob(t, repo, "name = \"foo\"\n") + submoduleHash := plumbing.NewHash("0123456789abcdef0123456789abcdef01234567") + + tree := &object.Tree{Entries: []object.TreeEntry{ + {Name: "azldev.toml", Mode: filemode.Regular, Hash: blobHash}, + {Name: "sub", Mode: filemode.Submodule, Hash: submoduleHash}, + }} + + treeObj := repo.Storer.NewEncodedObject() + require.NoError(t, tree.Encode(treeObj)) + + treeHash, err := repo.Storer.SetEncodedObject(treeObj) + require.NoError(t, err) + + commit := &object.Commit{ + Author: object.Signature{Name: "t", Email: "t@t.com", When: time.Now()}, + Committer: object.Signature{Name: "t", Email: "t@t.com", When: time.Now()}, + Message: "with submodule", + TreeHash: treeHash, + } + + commitObj := repo.Storer.NewEncodedObject() + require.NoError(t, commit.Encode(commitObj)) + + commitHash, err := repo.Storer.SetEncodedObject(commitObj) + require.NoError(t, err) + + return repo, commitHash +} + +// TestSubmoduleEntry verifies that submodule (gitlink) entries are handled +// explicitly: Open reports a clear, stable submodule error instead of a +// confusing "read blob" failure, and Stat/Readdir classify the entry as +// non-regular without silently falling back to a zero-size blob. +func TestSubmoduleEntry(t *testing.T) { + repo, hash := newRepoWithSubmodule(t) + + fs, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + // Open must fail with the stable submodule sentinel, not a blob-read error. + _, err = fs.Open("sub") + require.Error(t, err) + assert.ErrorIs(t, err, gitfs.ErrSubmodule) + assert.NotContains(t, err.Error(), "read blob") + + // Stat must succeed and classify the gitlink as non-regular (no silent + // zero-size blob fallback). + info, err := fs.Stat("sub") + require.NoError(t, err) + assert.False(t, info.Mode().IsRegular(), "submodule entry must not look like a regular file") + + // Readdir must still list the submodule alongside the regular file. + root, err := fs.Open("/") + require.NoError(t, err) + + names, err := root.Readdirnames(-1) + require.NoError(t, err) + assert.ElementsMatch(t, []string{"azldev.toml", "sub"}, names) +} From 6df775579045e45c47d1a1655be675774e9e6e84 Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Mon, 1 Jun 2026 15:34:16 -0700 Subject: [PATCH 3/4] fixup! feat: add historic git-backed filesystem --- internal/utils/gitfs/gitfs.go | 76 +++++++++++++++-------- internal/utils/gitfs/gitfs_test.go | 98 ++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+), 25 deletions(-) diff --git a/internal/utils/gitfs/gitfs.go b/internal/utils/gitfs/gitfs.go index 6a2bc371..953214fd 100644 --- a/internal/utils/gitfs/gitfs.go +++ b/internal/utils/gitfs/gitfs.go @@ -39,14 +39,22 @@ import ( "github.com/spf13/afero" ) -// errReadOnly is returned by all mutating operations. -var errReadOnly = errors.New("gitfs: read-only filesystem") +// ErrReadOnly is returned by all mutating operations: the filesystem is +// strictly read-only. +var ErrReadOnly = errors.New("gitfs: read-only filesystem") // ErrSubmodule is returned when a caller tries to read a submodule (gitlink) // entry. A gitlink records a commit hash in another repository, not file // content in this tree, so there is nothing to read through the filesystem. var ErrSubmodule = errors.New("gitfs: submodule entries are not supported") +// ErrSymlink is returned when a caller tries to read a symlink entry. git +// stores the link target as the blob body; reading it would hand the caller +// the target path string instead of the file it points at. In-tree symlink +// resolution is intentionally unsupported, so Open reports this rather than +// silently returning the target bytes. +var ErrSymlink = errors.New("gitfs: symlink entries are not supported") + // Fs is a read-only [afero.Fs] backed by a git tree. type Fs struct { repo *gogit.Repository @@ -114,6 +122,10 @@ func (f *Fs) Open(name string) (afero.File, error) { return nil, &os.PathError{Op: "open", Path: name, Err: ErrSubmodule} } + if entry.Mode == filemode.Symlink { + return nil, &os.PathError{Op: "open", Path: name, Err: ErrSymlink} + } + content, err := f.blobContents(entry.Hash) if err != nil { return nil, &os.PathError{Op: "open", Path: name, Err: err} @@ -126,7 +138,7 @@ func (f *Fs) Open(name string) (afero.File, error) { // is rejected. func (f *Fs) OpenFile(name string, flag int, _ os.FileMode) (afero.File, error) { if flag&(os.O_WRONLY|os.O_RDWR|os.O_CREATE|os.O_APPEND|os.O_TRUNC) != 0 { - return nil, &os.PathError{Op: "open", Path: name, Err: errReadOnly} + return nil, &os.PathError{Op: "open", Path: name, Err: ErrReadOnly} } return f.Open(name) @@ -180,10 +192,12 @@ func (f *Fs) entryInfo(name string, entry *object.TreeEntry) (os.FileInfo, error return &fileInfo{name: name, isDir: true, mode: os.ModeDir | fileperms.ReadOnlyExec}, nil } - // Submodule (gitlink) entries reference a commit in another repository, not - // a blob in this tree. Classify them as non-regular without a blob lookup - // so directory listings work and Open's submodule error stays authoritative. - if entry.Mode == filemode.Submodule { + // Submodule (gitlink) and symlink entries do not present readable file + // content in this tree (a gitlink points at a commit elsewhere; a symlink's + // blob is just the target path). Classify them as non-regular without a + // blob-size lookup so directory listings work and Open's error stays + // authoritative. + if entry.Mode == filemode.Submodule || entry.Mode == filemode.Symlink { return &fileInfo{name: name, mode: entryFileMode(entry.Mode)}, nil } @@ -216,39 +230,39 @@ func entryFileMode(mode filemode.FileMode) os.FileMode { // func (f *Fs) Create(name string) (afero.File, error) { - return nil, &os.PathError{Op: "create", Path: name, Err: errReadOnly} + return nil, &os.PathError{Op: "create", Path: name, Err: ErrReadOnly} } func (f *Fs) Mkdir(name string, _ os.FileMode) error { - return &os.PathError{Op: "mkdir", Path: name, Err: errReadOnly} + return &os.PathError{Op: "mkdir", Path: name, Err: ErrReadOnly} } func (f *Fs) MkdirAll(path string, _ os.FileMode) error { - return &os.PathError{Op: "mkdir", Path: path, Err: errReadOnly} + return &os.PathError{Op: "mkdir", Path: path, Err: ErrReadOnly} } func (f *Fs) Remove(name string) error { - return &os.PathError{Op: "remove", Path: name, Err: errReadOnly} + return &os.PathError{Op: "remove", Path: name, Err: ErrReadOnly} } func (f *Fs) RemoveAll(path string) error { - return &os.PathError{Op: "removeall", Path: path, Err: errReadOnly} + return &os.PathError{Op: "removeall", Path: path, Err: ErrReadOnly} } func (f *Fs) Rename(oldname, _ string) error { - return &os.PathError{Op: "rename", Path: oldname, Err: errReadOnly} + return &os.PathError{Op: "rename", Path: oldname, Err: ErrReadOnly} } func (f *Fs) Chmod(name string, _ os.FileMode) error { - return &os.PathError{Op: "chmod", Path: name, Err: errReadOnly} + return &os.PathError{Op: "chmod", Path: name, Err: ErrReadOnly} } func (f *Fs) Chown(name string, _, _ int) error { - return &os.PathError{Op: "chown", Path: name, Err: errReadOnly} + return &os.PathError{Op: "chown", Path: name, Err: ErrReadOnly} } func (f *Fs) Chtimes(name string, _, _ time.Time) error { - return &os.PathError{Op: "chtimes", Path: name, Err: errReadOnly} + return &os.PathError{Op: "chtimes", Path: name, Err: ErrReadOnly} } // @@ -325,19 +339,19 @@ func (f *regularFile) Readdirnames(int) ([]string, error) { } func (f *regularFile) Write([]byte) (int, error) { - return 0, &os.PathError{Op: "write", Path: f.name, Err: errReadOnly} + return 0, &os.PathError{Op: "write", Path: f.name, Err: ErrReadOnly} } func (f *regularFile) WriteAt([]byte, int64) (int, error) { - return 0, &os.PathError{Op: "write", Path: f.name, Err: errReadOnly} + return 0, &os.PathError{Op: "write", Path: f.name, Err: ErrReadOnly} } func (f *regularFile) WriteString(string) (int, error) { - return 0, &os.PathError{Op: "write", Path: f.name, Err: errReadOnly} + return 0, &os.PathError{Op: "write", Path: f.name, Err: ErrReadOnly} } func (f *regularFile) Truncate(int64) error { - return &os.PathError{Op: "truncate", Path: f.name, Err: errReadOnly} + return &os.PathError{Op: "truncate", Path: f.name, Err: ErrReadOnly} } // @@ -361,10 +375,22 @@ func newDirFile(fs *Fs, name, relPath string, tree *object.Tree) *dirFile { func (d *dirFile) Close() error { return nil } func (d *dirFile) Read([]byte) (int, error) { return 0, io.EOF } func (d *dirFile) ReadAt([]byte, int64) (int, error) { return 0, io.EOF } -func (d *dirFile) Seek(int64, int) (int64, error) { return 0, nil } func (d *dirFile) Name() string { return d.name } func (d *dirFile) Sync() error { return nil } +// Seek only supports rewinding the directory stream to the start +// (Seek(0, io.SeekStart)), which resets the Readdir paging offset. Any other +// seek is rejected rather than silently ignored. +func (d *dirFile) Seek(offset int64, whence int) (int64, error) { + if offset == 0 && whence == io.SeekStart { + d.offset = 0 + + return 0, nil + } + + return 0, &os.PathError{Op: "seek", Path: d.name, Err: errors.New("gitfs: unsupported directory seek")} +} + func (d *dirFile) Stat() (os.FileInfo, error) { base := "." if d.relPath != "" { @@ -428,17 +454,17 @@ func (d *dirFile) Readdirnames(n int) ([]string, error) { } func (d *dirFile) Write([]byte) (int, error) { - return 0, &os.PathError{Op: "write", Path: d.name, Err: errReadOnly} + return 0, &os.PathError{Op: "write", Path: d.name, Err: ErrReadOnly} } func (d *dirFile) WriteAt([]byte, int64) (int, error) { - return 0, &os.PathError{Op: "write", Path: d.name, Err: errReadOnly} + return 0, &os.PathError{Op: "write", Path: d.name, Err: ErrReadOnly} } func (d *dirFile) WriteString(string) (int, error) { - return 0, &os.PathError{Op: "write", Path: d.name, Err: errReadOnly} + return 0, &os.PathError{Op: "write", Path: d.name, Err: ErrReadOnly} } func (d *dirFile) Truncate(int64) error { - return &os.PathError{Op: "truncate", Path: d.name, Err: errReadOnly} + return &os.PathError{Op: "truncate", Path: d.name, Err: ErrReadOnly} } diff --git a/internal/utils/gitfs/gitfs_test.go b/internal/utils/gitfs/gitfs_test.go index d5cd91e2..cbdc6289 100644 --- a/internal/utils/gitfs/gitfs_test.go +++ b/internal/utils/gitfs/gitfs_test.go @@ -269,3 +269,101 @@ func TestSubmoduleEntry(t *testing.T) { require.NoError(t, err) assert.ElementsMatch(t, []string{"azldev.toml", "sub"}, names) } + +// TestSymlinkEntry verifies that symlink entries are not silently read as file +// content (git stores the link target as the blob body). Open must report a +// clear, stable symlink error rather than handing the target-path string to the +// caller, and Stat must classify the entry as a symlink. +func TestSymlinkEntry(t *testing.T) { + repo, hash := commitTreeWithSymlink(t) + + fs, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + _, err = fs.Open("link") + require.Error(t, err) + assert.ErrorIs(t, err, gitfs.ErrSymlink) + assert.NotContains(t, err.Error(), "azldev.toml", "must not leak the link target as content") + + info, err := fs.Stat("link") + require.NoError(t, err) + assert.False(t, info.Mode().IsRegular(), "symlink must not look like a regular file") + assert.NotZero(t, info.Mode()&os.ModeSymlink, "symlink mode bit must be set") +} + +// commitTreeWithSymlink builds a repo whose root tree contains a symlink entry +// (blob body = target path) next to the regular file it points at. +func commitTreeWithSymlink(t *testing.T) (*gogit.Repository, plumbing.Hash) { + t.Helper() + + repo, err := gogit.Init(memory.NewStorage(), memfs.New()) + require.NoError(t, err) + + targetBlob := storeBlob(t, repo, "name = \"foo\"\n") + linkBlob := storeBlob(t, repo, "azldev.toml") // symlink body is the target path + + entries := []object.TreeEntry{ + {Name: "azldev.toml", Mode: filemode.Regular, Hash: targetBlob}, + {Name: "link", Mode: filemode.Symlink, Hash: linkBlob}, + } + + tree := &object.Tree{Entries: entries} + + treeObj := repo.Storer.NewEncodedObject() + require.NoError(t, tree.Encode(treeObj)) + + treeHash, err := repo.Storer.SetEncodedObject(treeObj) + require.NoError(t, err) + + commit := &object.Commit{ + Author: object.Signature{Name: "t", Email: "t@t.com", When: time.Now()}, + Committer: object.Signature{Name: "t", Email: "t@t.com", When: time.Now()}, + Message: "with symlink", + TreeHash: treeHash, + } + + commitObj := repo.Storer.NewEncodedObject() + require.NoError(t, commit.Encode(commitObj)) + + commitHash, err := repo.Storer.SetEncodedObject(commitObj) + require.NoError(t, err) + + return repo, commitHash +} + +// TestDirSeekResetsOffset verifies that seeking a directory handle back to the +// start lets a subsequent Readdir return the full listing again, rather than an +// empty result because the internal offset was never reset. +func TestDirSeekResetsOffset(t *testing.T) { + repo, hash := newTestRepo(t) + + fs, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + dir, err := fs.Open("/comps") + require.NoError(t, err) + + first, err := dir.Readdir(-1) + require.NoError(t, err) + require.NotEmpty(t, first) + + _, err = dir.Seek(0, io.SeekStart) + require.NoError(t, err) + + again, err := dir.Readdir(-1) + require.NoError(t, err) + assert.Len(t, again, len(first), "rewound directory must list all entries again") +} + +// TestReadOnlyErrorIsExported verifies callers can identify read-only failures +// via errors.Is against the exported sentinel. +func TestReadOnlyErrorIsExported(t *testing.T) { + repo, hash := newTestRepo(t) + + fs, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + _, err = fs.Create("/x") + require.Error(t, err) + assert.ErrorIs(t, err, gitfs.ErrReadOnly) +} From 23b1ef4498824fd4ac93aa60c5f81a6a1c2cd4ce Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Mon, 1 Jun 2026 15:37:07 -0700 Subject: [PATCH 4/4] fixup! feat: add historic git-backed filesystem --- internal/projectconfig/historic.go | 7 +++++++ internal/utils/gitfs/gitfs.go | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/internal/projectconfig/historic.go b/internal/projectconfig/historic.go index 681c464e..68a558ea 100644 --- a/internal/projectconfig/historic.go +++ b/internal/projectconfig/historic.go @@ -93,6 +93,13 @@ func LoadProjectConfigAtCommit( // the historic tree), and distro defaults are not used for version-setting // overlays. This keeps historic resolution self-contained and deterministic. // +// Each call performs a full LoadProjectConfigAtCommit (fresh overlay, re-staged +// defaults, re-parsed config) to extract a single component, so resolving many +// components at one commit reloads the project repeatedly. This favors a simple, +// self-contained API over performance; the currently expected workflows resolve +// few components per commit. If a caller needs many-per-commit resolution, load +// the config once and resolve against the returned *ProjectConfig instead. +// // Returns (nil, nil) when the component is absent at that commit. func ResolveComponentOverlaysAtCommit( repo *gogit.Repository, diff --git a/internal/utils/gitfs/gitfs.go b/internal/utils/gitfs/gitfs.go index 953214fd..c8cdd07d 100644 --- a/internal/utils/gitfs/gitfs.go +++ b/internal/utils/gitfs/gitfs.go @@ -56,6 +56,12 @@ var ErrSubmodule = errors.New("gitfs: submodule entries are not supported") var ErrSymlink = errors.New("gitfs: symlink entries are not supported") // Fs is a read-only [afero.Fs] backed by a git tree. +// +// Fs is NOT safe for concurrent use. Reads go through [object.Tree.FindEntry], +// which lazily populates the tree's internal lookup maps on first access, so +// two goroutines sharing one *Fs (or its underlying *object.Tree) can race. +// This matters because callers replay commits in parallel: give each goroutine +// its own *Fs via a separate [NewFromCommit] rather than sharing one instance. type Fs struct { repo *gogit.Repository tree *object.Tree @@ -187,6 +193,12 @@ func (f *Fs) blobContents(hash plumbing.Hash) ([]byte, error) { // entryInfo builds a FileInfo for a tree entry, fetching the blob size for // regular files. +// +// For regular files this loads the blob object to report an accurate Size(). +// On a large tree a name-only directory scan (Readdirnames / doublestar) thus +// inflates every blob just for its size. We accept that cost: the expected +// workflow scans modest config trees, and the simplicity of always returning a +// correct size outweighs lazy-size bookkeeping for the current consumers. func (f *Fs) entryInfo(name string, entry *object.TreeEntry) (os.FileInfo, error) { if entry.Mode == filemode.Dir { return &fileInfo{name: name, isDir: true, mode: os.ModeDir | fileperms.ReadOnlyExec}, nil