diff --git a/internal/projectconfig/historic.go b/internal/projectconfig/historic.go new file mode 100644 index 00000000..68a558ea --- /dev/null +++ b/internal/projectconfig/historic.go @@ -0,0 +1,134 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package projectconfig + +import ( + "fmt" + "path" + + gogit "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/gitfs" + "github.com/spf13/afero" +) + +// historicDryRunnable reports that we are not in dry-run mode: the historic +// loader genuinely writes the embedded default configs into its in-memory +// scratch overlay. +type historicDryRunnable struct{} + +func (historicDryRunnable) DryRun() bool { return false } + +// historicOSEnv is a deliberately inert OS environment. Historic config loading +// must depend only on what is in the git tree, never on the host's working +// directory or user-level XDG config. Returning empty values causes the +// user-config lookup to resolve to nothing. +type historicOSEnv struct{} + +func (historicOSEnv) Getwd() (string, error) { return "", nil } +func (historicOSEnv) Chdir(string) error { return nil } +func (historicOSEnv) Getenv(string) string { return "" } +func (historicOSEnv) IsCurrentUserMemberOf(string) (bool, error) { + return false, nil +} +func (historicOSEnv) LookupGroupID(string) (int, error) { return 0, nil } + +// LoadProjectConfigAtCommit loads the project configuration exactly as it +// existed at a specific commit in the project repository, without checking +// anything out to disk. +// +// It reads files through a read-only [gitfs.Fs] backed by the commit's tree, +// layered under an in-memory writable overlay so the loader can stage its +// embedded default configs. The resolved configuration therefore combines the +// commit's in-tree config with azldev's built-in embedded defaults; the latter +// are part of every load and are not drawn from the git tree. Host working +// directory and user-level config are intentionally excluded, so the only +// per-invocation input is the embedded defaults baked into the binary. +// +// referenceDir is interpreted relative to the tree root (e.g. the project +// subdirectory containing azldev.toml). Both absolute ("/sub") and relative +// ("sub") forms are accepted. +func LoadProjectConfigAtCommit( + repo *gogit.Repository, + commitHash plumbing.Hash, + referenceDir string, + permissiveConfigParsing bool, +) (projectDir string, config *ProjectConfig, err error) { + base, err := gitfs.NewFromCommit(repo, commitHash) + if err != nil { + return "", nil, fmt.Errorf("failed to open git filesystem at commit %s:\n%w", commitHash, err) + } + + // Layer a writable in-memory overlay so the loader can stage its embedded + // default configs (and any other scratch writes) without touching the + // read-only git tree underneath. + fs := afero.NewCopyOnWriteFs(base, afero.NewMemMapFs()) + + // Interpret referenceDir relative to the git tree root, never the host + // process working directory. path.Join against "/" makes relative forms + // ("sub", "./sub") and absolute forms ("/sub") resolve identically; an + // empty referenceDir collapses to the tree root "/". + referenceDir = path.Join("/", referenceDir) + + return LoadProjectConfig( + historicDryRunnable{}, + fs, + historicOSEnv{}, + referenceDir, + false, // disableDefaultConfig: defaults are part of resolved overlays. + "", // tempDirPath: empty lets the loader pick a default temp dir. + nil, // extraConfigFilePaths: none for historic loads. + permissiveConfigParsing, + ) +} + +// ResolveComponentOverlaysAtCommit loads the project config as of the given +// commit and returns the resolved overlays for the named component, combining +// project-level defaults, component-group defaults, and the component's own +// overlays. +// +// Distro-level default overlays are intentionally excluded: resolving them +// requires distro/version selection (which depends on the live invocation, not +// the historic tree), and distro defaults are not used for version-setting +// overlays. This keeps historic resolution self-contained and deterministic. +// +// Each call performs a full LoadProjectConfigAtCommit (fresh overlay, re-staged +// defaults, re-parsed config) to extract a single component, so resolving many +// components at one commit reloads the project repeatedly. This favors a simple, +// self-contained API over performance; the currently expected workflows resolve +// few components per commit. If a caller needs many-per-commit resolution, load +// the config once and resolve against the returned *ProjectConfig instead. +// +// Returns (nil, nil) when the component is absent at that commit. +func ResolveComponentOverlaysAtCommit( + repo *gogit.Repository, + commitHash plumbing.Hash, + referenceDir string, + componentName string, + permissiveConfigParsing bool, +) ([]ComponentOverlay, error) { + _, config, err := LoadProjectConfigAtCommit(repo, commitHash, referenceDir, permissiveConfigParsing) + if err != nil { + return nil, err + } + + explicit, ok := config.Components[componentName] + if !ok { + return nil, nil + } + + resolved, err := ResolveComponentConfig( + explicit, + config.DefaultComponentConfig, + ComponentConfig{}, // distro defaults excluded; see doc comment. + config.ComponentGroups, + config.GroupsByComponent[componentName], + ) + if err != nil { + return nil, fmt.Errorf("resolving overlays for component %#q at commit %s:\n%w", + componentName, commitHash, err) + } + + return resolved.Overlays, nil +} diff --git a/internal/projectconfig/historic_test.go b/internal/projectconfig/historic_test.go new file mode 100644 index 00000000..798d6fdf --- /dev/null +++ b/internal/projectconfig/historic_test.go @@ -0,0 +1,239 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package projectconfig_test + +import ( + "testing" + "time" + + "github.com/go-git/go-billy/v5" + "github.com/go-git/go-billy/v5/memfs" + gogit "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/object" + "github.com/go-git/go-git/v5/storage/memory" + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func writeWorktreeFile(t *testing.T, fs billy.Filesystem, content string) { + t.Helper() + + file, err := fs.Create("azldev.toml") + require.NoError(t, err) + + _, err = file.Write([]byte(content)) + require.NoError(t, err) + require.NoError(t, file.Close()) +} + +func commitWorktree(t *testing.T, repo *gogit.Repository, msg string) plumbing.Hash { + t.Helper() + + worktree, err := repo.Worktree() + require.NoError(t, err) + require.NoError(t, worktree.AddGlob(".")) + + hash, err := worktree.Commit(msg, &gogit.CommitOptions{ + Author: &object.Signature{Name: "t", Email: "t@t.com", When: time.Now()}, + }) + require.NoError(t, err) + + return hash +} + +// TestLoadProjectConfigAtCommit verifies that a component's overlays defined in +// azldev.toml are recovered when loading the project config as of a historical +// commit, reading purely from the git tree (no checkout). +func TestLoadProjectConfigAtCommit(t *testing.T) { + bfs := memfs.New() + + repo, err := gogit.Init(memory.NewStorage(), bfs) + require.NoError(t, err) + + writeWorktreeFile(t, bfs, ` +[components.foo] +[[components.foo.overlays]] +type = "spec-search-replace" +regex = "1\\.0\\.0" +replacement = "2.0.0" +`) + + hash := commitWorktree(t, repo, "add foo overlay") + + projectDir, config, err := projectconfig.LoadProjectConfigAtCommit(repo, hash, "/", false) + require.NoError(t, err) + require.NotNil(t, config) + assert.Equal(t, "/", projectDir) + + comp, ok := config.Components["foo"] + require.True(t, ok, "component foo should be present") + require.Len(t, comp.Overlays, 1) + assert.Equal(t, projectconfig.ComponentOverlaySearchAndReplaceInSpec, comp.Overlays[0].Type) + assert.Equal(t, "2.0.0", comp.Overlays[0].Replacement) +} + +// TestLoadProjectConfigAtCommit_ReferenceDirIsTreeRelative verifies that a +// referenceDir naming a project subdirectory is interpreted relative to the git +// tree root, not the host process working directory. Both relative ("sub") and +// absolute ("/sub") forms must resolve to the same in-tree location. Without +// tree-relative normalization, a relative referenceDir resolves against the +// host CWD and the config file is never found in the git tree. +func TestLoadProjectConfigAtCommit_ReferenceDirIsTreeRelative(t *testing.T) { + bfs := memfs.New() + + repo, err := gogit.Init(memory.NewStorage(), bfs) + require.NoError(t, err) + + file, err := bfs.Create("sub/azldev.toml") + require.NoError(t, err) + + _, err = file.Write([]byte("[components.foo]\n")) + require.NoError(t, err) + require.NoError(t, file.Close()) + + hash := commitWorktree(t, repo, "add config under sub/") + + for _, referenceDir := range []string{"sub", "/sub", "./sub"} { + t.Run(referenceDir, func(t *testing.T) { + projectDir, config, err := projectconfig.LoadProjectConfigAtCommit(repo, hash, referenceDir, false) + require.NoError(t, err) + require.NotNil(t, config) + assert.Equal(t, "/sub", projectDir) + assert.Contains(t, config.Components, "foo") + }) + } +} + +// TestResolveComponentOverlaysAtCommit verifies that overlays inherited from a +// component group default are merged with the component's own overlays when +// resolving historically. +func TestResolveComponentOverlaysAtCommit(t *testing.T) { + bfs := memfs.New() + + repo, err := gogit.Init(memory.NewStorage(), bfs) + require.NoError(t, err) + + writeWorktreeFile(t, bfs, ` +[component-groups.shared] +components = ["foo"] +[[component-groups.shared.default-component-config.overlays]] +type = "spec-search-replace" +regex = "from-group" +replacement = "group-applied" + +[components.foo] +[[components.foo.overlays]] +type = "spec-search-replace" +regex = "from-comp" +replacement = "comp-applied" +`) + + hash := commitWorktree(t, repo, "add group + component overlays") + + overlays, err := projectconfig.ResolveComponentOverlaysAtCommit(repo, hash, "/", "foo", false) + require.NoError(t, err) + require.Len(t, overlays, 2) + + replacements := []string{overlays[0].Replacement, overlays[1].Replacement} + assert.Contains(t, replacements, "group-applied") + assert.Contains(t, replacements, "comp-applied") +} + +// TestResolveComponentOverlaysAtCommit_MissingComponent verifies that a request +// for a component absent at the commit returns nil overlays without error. +func TestResolveComponentOverlaysAtCommit_MissingComponent(t *testing.T) { + bfs := memfs.New() + + repo, err := gogit.Init(memory.NewStorage(), bfs) + require.NoError(t, err) + + writeWorktreeFile(t, bfs, "[components.foo]\n") + + hash := commitWorktree(t, repo, "add foo") + + overlays, err := projectconfig.ResolveComponentOverlaysAtCommit(repo, hash, "/", "absent", false) + require.NoError(t, err) + assert.Nil(t, overlays) +} + +// TestResolveComponentOverlaysAtCommit_TracksHistory verifies that resolving +// overlays at an OLDER commit returns the overlay value as it existed at THAT +// commit — not the latest value. This is the core guarantee historical overlay +// replay relies on: each synthetic commit must see the version it actually +// carried at that point in history. If resolution leaked HEAD's config, every +// historic entry would show the current version. +func TestResolveComponentOverlaysAtCommit_TracksHistory(t *testing.T) { + bfs := memfs.New() + + repo, err := gogit.Init(memory.NewStorage(), bfs) + require.NoError(t, err) + + // Commit A: overlay replacement is 2.0.0. + writeWorktreeFile(t, bfs, ` +[components.foo] +[[components.foo.overlays]] +type = "spec-search-replace" +regex = "VERSION" +replacement = "2.0.0" +`) + hashA := commitWorktree(t, repo, "foo -> 2.0.0") + + // Commit B: same overlay, replacement bumped to 3.0.0. + writeWorktreeFile(t, bfs, ` +[components.foo] +[[components.foo.overlays]] +type = "spec-search-replace" +regex = "VERSION" +replacement = "3.0.0" +`) + hashB := commitWorktree(t, repo, "foo -> 3.0.0") + + overlaysA, err := projectconfig.ResolveComponentOverlaysAtCommit(repo, hashA, "/", "foo", false) + require.NoError(t, err) + require.Len(t, overlaysA, 1) + assert.Equal(t, "2.0.0", overlaysA[0].Replacement, "commit A must resolve its own (older) overlay value") + + overlaysB, err := projectconfig.ResolveComponentOverlaysAtCommit(repo, hashB, "/", "foo", false) + require.NoError(t, err) + require.Len(t, overlaysB, 1) + assert.Equal(t, "3.0.0", overlaysB[0].Replacement, "commit B must resolve its own (newer) overlay value") +} + +// TestResolveComponentOverlaysAtCommit_PermissiveToleratesUndefinedRef verifies +// that with permissive parsing enabled, a config whose component group references +// an undefined component still loads, so the target component's overlays can be +// recovered. Historical commits may legitimately reference components that were +// only defined in a later revision; a strict load would fail the entire resolve +// and mis-attribute the version for that commit. +func TestResolveComponentOverlaysAtCommit_PermissiveToleratesUndefinedRef(t *testing.T) { + bfs := memfs.New() + + repo, err := gogit.Init(memory.NewStorage(), bfs) + require.NoError(t, err) + + // "shared" group references "not-yet-defined", which has no [components] entry. + writeWorktreeFile(t, bfs, ` +[component-groups.shared] +components = ["foo", "not-yet-defined"] + +[components.foo] +[[components.foo.overlays]] +type = "spec-search-replace" +regex = "VERSION" +replacement = "2.0.0" +`) + hash := commitWorktree(t, repo, "foo defined, dangling group ref") + + // Strict load fails on the undefined component reference. + _, err = projectconfig.ResolveComponentOverlaysAtCommit(repo, hash, "/", "foo", false) + require.Error(t, err) + + // Permissive load tolerates it and still returns foo's overlays. + overlays, err := projectconfig.ResolveComponentOverlaysAtCommit(repo, hash, "/", "foo", true) + require.NoError(t, err) + require.Len(t, overlays, 1) + assert.Equal(t, "2.0.0", overlays[0].Replacement) +} diff --git a/internal/utils/fileperms/fileperms.go b/internal/utils/fileperms/fileperms.go index 38c429a2..35e0660f 100644 --- a/internal/utils/fileperms/fileperms.go +++ b/internal/utils/fileperms/fileperms.go @@ -25,4 +25,10 @@ const ( PrivateDir os.FileMode = 0o700 // Directory permissions: user read/write/execute; group read/execute. PublicDir os.FileMode = 0o755 + + // Read-only permissions (no write bits), for immutable or synthetic views. + // ReadOnlyFile: all read. ReadOnlyExec: all read/execute (directories, + // executables, symlinks). + ReadOnlyFile os.FileMode = 0o444 + ReadOnlyExec os.FileMode = 0o555 ) diff --git a/internal/utils/gitfs/gitfs.go b/internal/utils/gitfs/gitfs.go new file mode 100644 index 00000000..c8cdd07d --- /dev/null +++ b/internal/utils/gitfs/gitfs.go @@ -0,0 +1,482 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +// Package gitfs provides a read-only [afero.Fs] backed by a git tree at a +// specific commit. It lets code that already speaks [afero.Fs] (such as the +// project-config loader) read files as they existed at an arbitrary point in +// history, without checking anything out to disk. +// +// Paths are interpreted relative to the root of the git tree. Both absolute +// paths (e.g. "/base/comps/x.toml") and io/fs-style relative paths (e.g. +// "base/comps/x.toml") are accepted and normalized identically: the leading +// slash is stripped and the path is cleaned. This mirrors how an +// [afero.OsFs] rooted at the tree root would behave, which is what callers +// like the config loader (which pass absolute paths) and the doublestar glob +// adapter (which passes relative paths) expect. +// +// All mutating operations return an error: the filesystem is strictly +// read-only. To support callers that need to write scratch files (e.g. the +// loader copying embedded default configs to a temp dir), layer a writable +// filesystem on top with [afero.NewCopyOnWriteFs]. +package gitfs + +import ( + "bytes" + "errors" + "fmt" + "io" + "os" + "path" + "path/filepath" + "sort" + "time" + + gogit "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/filemode" + "github.com/go-git/go-git/v5/plumbing/object" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" + "github.com/spf13/afero" +) + +// ErrReadOnly is returned by all mutating operations: the filesystem is +// strictly read-only. +var ErrReadOnly = errors.New("gitfs: read-only filesystem") + +// ErrSubmodule is returned when a caller tries to read a submodule (gitlink) +// entry. A gitlink records a commit hash in another repository, not file +// content in this tree, so there is nothing to read through the filesystem. +var ErrSubmodule = errors.New("gitfs: submodule entries are not supported") + +// ErrSymlink is returned when a caller tries to read a symlink entry. git +// stores the link target as the blob body; reading it would hand the caller +// the target path string instead of the file it points at. In-tree symlink +// resolution is intentionally unsupported, so Open reports this rather than +// silently returning the target bytes. +var ErrSymlink = errors.New("gitfs: symlink entries are not supported") + +// Fs is a read-only [afero.Fs] backed by a git tree. +// +// Fs is NOT safe for concurrent use. Reads go through [object.Tree.FindEntry], +// which lazily populates the tree's internal lookup maps on first access, so +// two goroutines sharing one *Fs (or its underlying *object.Tree) can race. +// This matters because callers replay commits in parallel: give each goroutine +// its own *Fs via a separate [NewFromCommit] rather than sharing one instance. +type Fs struct { + repo *gogit.Repository + tree *object.Tree +} + +// Compile-time assurance that Fs implements afero.Fs. +var _ afero.Fs = (*Fs)(nil) + +// NewFromCommit creates a read-only filesystem exposing the tree of the given +// commit. +func NewFromCommit(repo *gogit.Repository, commitHash plumbing.Hash) (*Fs, error) { + commit, err := repo.CommitObject(commitHash) + if err != nil { + return nil, fmt.Errorf("gitfs: resolve commit %s:\n%w", commitHash, err) + } + + tree, err := commit.Tree() + if err != nil { + return nil, fmt.Errorf("gitfs: read tree for commit %s:\n%w", commitHash, err) + } + + return &Fs{repo: repo, tree: tree}, nil +} + +// normalize converts an incoming afero path (absolute or relative) to a +// clean, slash-separated, tree-relative path. The tree root is "". +func normalize(name string) string { + cleaned := path.Clean("/" + filepath.ToSlash(name)) + + // Strip the leading slash; the root becomes "". + return cleaned[1:] +} + +// notExist builds a PathError that reports as "does not exist" so that helpers +// like afero.Exists and os.IsNotExist behave correctly. +func notExist(op, name string) error { + return &os.PathError{Op: op, Path: name, Err: os.ErrNotExist} +} + +// Open opens the named file or directory for reading. +func (f *Fs) Open(name string) (afero.File, error) { + rel := normalize(name) + + // Root of the tree is always a directory. + if rel == "" { + return newDirFile(f, name, "", f.tree), nil + } + + entry, err := f.tree.FindEntry(rel) + if err != nil { + return nil, notExist("open", name) + } + + if entry.Mode == filemode.Dir { + subtree, subErr := f.tree.Tree(rel) + if subErr != nil { + return nil, notExist("open", name) + } + + return newDirFile(f, name, rel, subtree), nil + } + + if entry.Mode == filemode.Submodule { + return nil, &os.PathError{Op: "open", Path: name, Err: ErrSubmodule} + } + + if entry.Mode == filemode.Symlink { + return nil, &os.PathError{Op: "open", Path: name, Err: ErrSymlink} + } + + content, err := f.blobContents(entry.Hash) + if err != nil { + return nil, &os.PathError{Op: "open", Path: name, Err: err} + } + + return newRegularFile(name, entry, content), nil +} + +// OpenFile opens the named file for reading. Any flag requesting write access +// is rejected. +func (f *Fs) OpenFile(name string, flag int, _ os.FileMode) (afero.File, error) { + if flag&(os.O_WRONLY|os.O_RDWR|os.O_CREATE|os.O_APPEND|os.O_TRUNC) != 0 { + return nil, &os.PathError{Op: "open", Path: name, Err: ErrReadOnly} + } + + return f.Open(name) +} + +// Stat returns file info for the named file or directory. +func (f *Fs) Stat(name string) (os.FileInfo, error) { + rel := normalize(name) + + if rel == "" { + return &fileInfo{name: ".", isDir: true, mode: os.ModeDir | fileperms.ReadOnlyExec}, nil + } + + entry, err := f.tree.FindEntry(rel) + if err != nil { + return nil, notExist("stat", name) + } + + return f.entryInfo(path.Base(rel), entry) +} + +// Name identifies this filesystem implementation. +func (f *Fs) Name() string { return "gitfs" } + +// blobContents reads the full contents of a blob. +func (f *Fs) blobContents(hash plumbing.Hash) ([]byte, error) { + blob, err := f.repo.BlobObject(hash) + if err != nil { + return nil, fmt.Errorf("read blob %s:\n%w", hash, err) + } + + reader, err := blob.Reader() + if err != nil { + return nil, fmt.Errorf("open blob %s:\n%w", hash, err) + } + + defer reader.Close() + + content, err := io.ReadAll(reader) + if err != nil { + return nil, fmt.Errorf("read blob %s:\n%w", hash, err) + } + + return content, nil +} + +// entryInfo builds a FileInfo for a tree entry, fetching the blob size for +// regular files. +// +// For regular files this loads the blob object to report an accurate Size(). +// On a large tree a name-only directory scan (Readdirnames / doublestar) thus +// inflates every blob just for its size. We accept that cost: the expected +// workflow scans modest config trees, and the simplicity of always returning a +// correct size outweighs lazy-size bookkeeping for the current consumers. +func (f *Fs) entryInfo(name string, entry *object.TreeEntry) (os.FileInfo, error) { + if entry.Mode == filemode.Dir { + return &fileInfo{name: name, isDir: true, mode: os.ModeDir | fileperms.ReadOnlyExec}, nil + } + + // Submodule (gitlink) and symlink entries do not present readable file + // content in this tree (a gitlink points at a commit elsewhere; a symlink's + // blob is just the target path). Classify them as non-regular without a + // blob-size lookup so directory listings work and Open's error stays + // authoritative. + if entry.Mode == filemode.Submodule || entry.Mode == filemode.Symlink { + return &fileInfo{name: name, mode: entryFileMode(entry.Mode)}, nil + } + + blob, err := f.repo.BlobObject(entry.Hash) + if err != nil { + return nil, fmt.Errorf("gitfs: stat %#q: read blob %s:\n%w", name, entry.Hash, err) + } + + return &fileInfo{name: name, size: blob.Size, mode: entryFileMode(entry.Mode)}, nil +} + +// entryFileMode maps a git filemode to an os.FileMode for non-directory entries. +func entryFileMode(mode filemode.FileMode) os.FileMode { + switch mode { + case filemode.Executable: + return fileperms.ReadOnlyExec + case filemode.Symlink: + return os.ModeSymlink | fileperms.ReadOnlyExec + case filemode.Submodule: + return os.ModeIrregular | fileperms.ReadOnlyFile + case filemode.Empty, filemode.Dir, filemode.Regular, filemode.Deprecated: + return fileperms.ReadOnlyFile + default: + return fileperms.ReadOnlyFile + } +} + +// +// Mutating operations — all unsupported. +// + +func (f *Fs) Create(name string) (afero.File, error) { + return nil, &os.PathError{Op: "create", Path: name, Err: ErrReadOnly} +} + +func (f *Fs) Mkdir(name string, _ os.FileMode) error { + return &os.PathError{Op: "mkdir", Path: name, Err: ErrReadOnly} +} + +func (f *Fs) MkdirAll(path string, _ os.FileMode) error { + return &os.PathError{Op: "mkdir", Path: path, Err: ErrReadOnly} +} + +func (f *Fs) Remove(name string) error { + return &os.PathError{Op: "remove", Path: name, Err: ErrReadOnly} +} + +func (f *Fs) RemoveAll(path string) error { + return &os.PathError{Op: "removeall", Path: path, Err: ErrReadOnly} +} + +func (f *Fs) Rename(oldname, _ string) error { + return &os.PathError{Op: "rename", Path: oldname, Err: ErrReadOnly} +} + +func (f *Fs) Chmod(name string, _ os.FileMode) error { + return &os.PathError{Op: "chmod", Path: name, Err: ErrReadOnly} +} + +func (f *Fs) Chown(name string, _, _ int) error { + return &os.PathError{Op: "chown", Path: name, Err: ErrReadOnly} +} + +func (f *Fs) Chtimes(name string, _, _ time.Time) error { + return &os.PathError{Op: "chtimes", Path: name, Err: ErrReadOnly} +} + +// +// fileInfo +// + +type fileInfo struct { + name string + size int64 + mode os.FileMode + isDir bool +} + +func (i *fileInfo) Name() string { return i.name } +func (i *fileInfo) Size() int64 { return i.size } +func (i *fileInfo) Mode() os.FileMode { return i.mode } +func (i *fileInfo) ModTime() time.Time { return time.Time{} } +func (i *fileInfo) IsDir() bool { return i.isDir } +func (i *fileInfo) Sys() any { return nil } + +// +// regularFile — a read-only view over blob contents. +// + +type regularFile struct { + name string + info os.FileInfo + reader *bytes.Reader +} + +var _ afero.File = (*regularFile)(nil) + +func newRegularFile(name string, entry *object.TreeEntry, content []byte) *regularFile { + return ®ularFile{ + name: name, + info: &fileInfo{ + name: path.Base(normalize(name)), + size: int64(len(content)), + mode: entryFileMode(entry.Mode), + }, + reader: bytes.NewReader(content), + } +} + +func (f *regularFile) Close() error { return nil } +func (f *regularFile) Name() string { return f.name } +func (f *regularFile) Stat() (os.FileInfo, error) { return f.info, nil } +func (f *regularFile) Sync() error { return nil } + +func (f *regularFile) Read(p []byte) (int, error) { + n, err := f.reader.Read(p) + + return n, err //nolint:wrapcheck // pass through bytes.Reader io semantics (incl. io.EOF) unchanged +} + +func (f *regularFile) ReadAt(p []byte, off int64) (int, error) { + n, err := f.reader.ReadAt(p, off) + + return n, err //nolint:wrapcheck // pass through bytes.Reader io semantics unchanged +} + +func (f *regularFile) Seek(off int64, whence int) (int64, error) { + pos, err := f.reader.Seek(off, whence) + + return pos, err //nolint:wrapcheck // pass through bytes.Reader io semantics unchanged +} + +func (f *regularFile) Readdir(int) ([]os.FileInfo, error) { + return nil, &os.PathError{Op: "readdir", Path: f.name, Err: errors.New("not a directory")} +} + +func (f *regularFile) Readdirnames(int) ([]string, error) { + return nil, &os.PathError{Op: "readdir", Path: f.name, Err: errors.New("not a directory")} +} + +func (f *regularFile) Write([]byte) (int, error) { + return 0, &os.PathError{Op: "write", Path: f.name, Err: ErrReadOnly} +} + +func (f *regularFile) WriteAt([]byte, int64) (int, error) { + return 0, &os.PathError{Op: "write", Path: f.name, Err: ErrReadOnly} +} + +func (f *regularFile) WriteString(string) (int, error) { + return 0, &os.PathError{Op: "write", Path: f.name, Err: ErrReadOnly} +} + +func (f *regularFile) Truncate(int64) error { + return &os.PathError{Op: "truncate", Path: f.name, Err: ErrReadOnly} +} + +// +// dirFile — a read-only view over a tree's immediate entries. +// + +type dirFile struct { + fs *Fs + name string + relPath string + tree *object.Tree + offset int +} + +var _ afero.File = (*dirFile)(nil) + +func newDirFile(fs *Fs, name, relPath string, tree *object.Tree) *dirFile { + return &dirFile{fs: fs, name: name, relPath: relPath, tree: tree} +} + +func (d *dirFile) Close() error { return nil } +func (d *dirFile) Read([]byte) (int, error) { return 0, io.EOF } +func (d *dirFile) ReadAt([]byte, int64) (int, error) { return 0, io.EOF } +func (d *dirFile) Name() string { return d.name } +func (d *dirFile) Sync() error { return nil } + +// Seek only supports rewinding the directory stream to the start +// (Seek(0, io.SeekStart)), which resets the Readdir paging offset. Any other +// seek is rejected rather than silently ignored. +func (d *dirFile) Seek(offset int64, whence int) (int64, error) { + if offset == 0 && whence == io.SeekStart { + d.offset = 0 + + return 0, nil + } + + return 0, &os.PathError{Op: "seek", Path: d.name, Err: errors.New("gitfs: unsupported directory seek")} +} + +func (d *dirFile) Stat() (os.FileInfo, error) { + base := "." + if d.relPath != "" { + base = path.Base(d.relPath) + } + + return &fileInfo{name: base, isDir: true, mode: os.ModeDir | fileperms.ReadOnlyExec}, nil +} + +// Readdir returns the immediate children of the directory, sorted by name. +// It honors the offset/count semantics of os.File.Readdir. +func (d *dirFile) Readdir(count int) ([]os.FileInfo, error) { + entries := d.tree.Entries + + infos := make([]os.FileInfo, 0, len(entries)) + + for i := range entries { + entry := entries[i] + + info, err := d.fs.entryInfo(entry.Name, &entry) + if err != nil { + return nil, err + } + + infos = append(infos, info) + } + + sort.Slice(infos, func(a, b int) bool { return infos[a].Name() < infos[b].Name() }) + + if d.offset >= len(infos) { + if count > 0 { + return nil, io.EOF + } + + return []os.FileInfo{}, nil + } + + infos = infos[d.offset:] + + if count > 0 && count < len(infos) { + infos = infos[:count] + } + + d.offset += len(infos) + + return infos, nil +} + +func (d *dirFile) Readdirnames(n int) ([]string, error) { + infos, err := d.Readdir(n) + if err != nil { + return nil, err + } + + names := make([]string, len(infos)) + for i, info := range infos { + names[i] = info.Name() + } + + return names, nil +} + +func (d *dirFile) Write([]byte) (int, error) { + return 0, &os.PathError{Op: "write", Path: d.name, Err: ErrReadOnly} +} + +func (d *dirFile) WriteAt([]byte, int64) (int, error) { + return 0, &os.PathError{Op: "write", Path: d.name, Err: ErrReadOnly} +} + +func (d *dirFile) WriteString(string) (int, error) { + return 0, &os.PathError{Op: "write", Path: d.name, Err: ErrReadOnly} +} + +func (d *dirFile) Truncate(int64) error { + return &os.PathError{Op: "truncate", Path: d.name, Err: ErrReadOnly} +} diff --git a/internal/utils/gitfs/gitfs_test.go b/internal/utils/gitfs/gitfs_test.go new file mode 100644 index 00000000..cbdc6289 --- /dev/null +++ b/internal/utils/gitfs/gitfs_test.go @@ -0,0 +1,369 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package gitfs_test + +import ( + "io" + "os" + "testing" + "time" + + "github.com/bmatcuk/doublestar/v4" + "github.com/go-git/go-billy/v5" + "github.com/go-git/go-billy/v5/memfs" + gogit "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/filemode" + "github.com/go-git/go-git/v5/plumbing/object" + "github.com/go-git/go-git/v5/storage/memory" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/gitfs" + "github.com/spf13/afero" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// writeFile creates/overwrites a file in the in-memory worktree. +func writeFile(t *testing.T, fs billy.Filesystem, relPath, content string) { + t.Helper() + + file, err := fs.Create(relPath) + require.NoError(t, err) + + _, err = file.Write([]byte(content)) + require.NoError(t, err) + require.NoError(t, file.Close()) +} + +// commitAll stages everything and commits, returning the commit hash. +func commitAll(t *testing.T, repo *gogit.Repository, msg string) plumbing.Hash { + t.Helper() + + worktree, err := repo.Worktree() + require.NoError(t, err) + + require.NoError(t, worktree.AddGlob(".")) + + hash, err := worktree.Commit(msg, &gogit.CommitOptions{ + Author: &object.Signature{Name: "t", Email: "t@t.com", When: time.Now()}, + }) + require.NoError(t, err) + + return hash +} + +// newTestRepo builds an in-memory repo with a tiny project tree and returns the +// repo plus the single commit's hash. +func newTestRepo(t *testing.T) (*gogit.Repository, plumbing.Hash) { + t.Helper() + + bfs := memfs.New() + + repo, err := gogit.Init(memory.NewStorage(), bfs) + require.NoError(t, err) + + writeFile(t, bfs, "azldev.toml", "includes = [\"comps/**/*.toml\"]\n") + writeFile(t, bfs, "comps/foo.toml", "name = \"foo\"\n") + writeFile(t, bfs, "comps/sub/bar.toml", "name = \"bar\"\n") + + return repo, commitAll(t, repo, "init") +} + +func TestOpenAndReadFile(t *testing.T) { + repo, hash := newTestRepo(t) + + fs, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + for _, name := range []string{"comps/foo.toml", "/comps/foo.toml", "./comps/foo.toml"} { + file, openErr := fs.Open(name) + require.NoError(t, openErr, "open %q", name) + + content, readErr := io.ReadAll(file) + require.NoError(t, readErr) + require.NoError(t, file.Close()) + + assert.Equal(t, "name = \"foo\"\n", string(content), "content via %q", name) + } +} + +func TestStat(t *testing.T) { + repo, hash := newTestRepo(t) + + gitFS, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + fileInfo, err := gitFS.Stat("/comps/foo.toml") + require.NoError(t, err) + assert.False(t, fileInfo.IsDir()) + assert.Equal(t, int64(len("name = \"foo\"\n")), fileInfo.Size()) + + dirInfo, err := gitFS.Stat("/comps") + require.NoError(t, err) + assert.True(t, dirInfo.IsDir()) + + rootInfo, err := gitFS.Stat("/") + require.NoError(t, err) + assert.True(t, rootInfo.IsDir()) +} + +func TestStatMissing(t *testing.T) { + repo, hash := newTestRepo(t) + + gitFS, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + _, err = gitFS.Stat("/nope.toml") + assert.True(t, os.IsNotExist(err), "expected not-exist, got %v", err) + + exists, err := afero.Exists(gitFS, "/nope.toml") + require.NoError(t, err) + assert.False(t, exists) + + exists, err = afero.Exists(gitFS, "/comps/foo.toml") + require.NoError(t, err) + assert.True(t, exists) +} + +func TestReaddir(t *testing.T) { + repo, hash := newTestRepo(t) + + fs, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + dir, err := fs.Open("/comps") + require.NoError(t, err) + + infos, err := dir.Readdir(-1) + require.NoError(t, err) + require.NoError(t, dir.Close()) + + names := make([]string, len(infos)) + for i, info := range infos { + names[i] = info.Name() + } + + assert.ElementsMatch(t, []string{"foo.toml", "sub"}, names) +} + +// TestGlobThroughDoublestar is the load-bearing test: it proves the config +// loader's include-resolution path (fileutils.Glob → afero.IOFS → doublestar) +// works against the git-backed filesystem with an absolute pattern, including +// the writable CopyOnWriteFs overlay the loader needs for scratch writes. +func TestGlobThroughDoublestar(t *testing.T) { + repo, hash := newTestRepo(t) + + base, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + fs := afero.NewCopyOnWriteFs(base, afero.NewMemMapFs()) + + matches, err := fileutils.Glob(fs, "/comps/**/*.toml", + doublestar.WithFailOnIOErrors(), doublestar.WithFilesOnly()) + require.NoError(t, err) + + assert.ElementsMatch(t, []string{"/comps/foo.toml", "/comps/sub/bar.toml"}, matches) +} + +func TestReadOnly(t *testing.T) { + repo, hash := newTestRepo(t) + + gitFS, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + _, err = gitFS.Create("/x") + require.Error(t, err) + + require.Error(t, gitFS.Mkdir("/d", 0o755)) + require.Error(t, gitFS.Remove("/comps/foo.toml")) +} + +// storeBlob writes a blob object directly to the repo store and returns its hash. +func storeBlob(t *testing.T, repo *gogit.Repository, content string) plumbing.Hash { + t.Helper() + + obj := repo.Storer.NewEncodedObject() + obj.SetType(plumbing.BlobObject) + + w, err := obj.Writer() + require.NoError(t, err) + + _, err = w.Write([]byte(content)) + require.NoError(t, err) + require.NoError(t, w.Close()) + + hash, err := repo.Storer.SetEncodedObject(obj) + require.NoError(t, err) + + return hash +} + +// newRepoWithSubmodule builds a repo whose root tree contains a gitlink +// (submodule) entry alongside a regular file, and returns the commit hash. The +// submodule entry's hash points at a commit that does not exist as a blob in +// this repo, mirroring a real gitlink. +func newRepoWithSubmodule(t *testing.T) (*gogit.Repository, plumbing.Hash) { + t.Helper() + + repo, err := gogit.Init(memory.NewStorage(), memfs.New()) + require.NoError(t, err) + + blobHash := storeBlob(t, repo, "name = \"foo\"\n") + submoduleHash := plumbing.NewHash("0123456789abcdef0123456789abcdef01234567") + + tree := &object.Tree{Entries: []object.TreeEntry{ + {Name: "azldev.toml", Mode: filemode.Regular, Hash: blobHash}, + {Name: "sub", Mode: filemode.Submodule, Hash: submoduleHash}, + }} + + treeObj := repo.Storer.NewEncodedObject() + require.NoError(t, tree.Encode(treeObj)) + + treeHash, err := repo.Storer.SetEncodedObject(treeObj) + require.NoError(t, err) + + commit := &object.Commit{ + Author: object.Signature{Name: "t", Email: "t@t.com", When: time.Now()}, + Committer: object.Signature{Name: "t", Email: "t@t.com", When: time.Now()}, + Message: "with submodule", + TreeHash: treeHash, + } + + commitObj := repo.Storer.NewEncodedObject() + require.NoError(t, commit.Encode(commitObj)) + + commitHash, err := repo.Storer.SetEncodedObject(commitObj) + require.NoError(t, err) + + return repo, commitHash +} + +// TestSubmoduleEntry verifies that submodule (gitlink) entries are handled +// explicitly: Open reports a clear, stable submodule error instead of a +// confusing "read blob" failure, and Stat/Readdir classify the entry as +// non-regular without silently falling back to a zero-size blob. +func TestSubmoduleEntry(t *testing.T) { + repo, hash := newRepoWithSubmodule(t) + + fs, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + // Open must fail with the stable submodule sentinel, not a blob-read error. + _, err = fs.Open("sub") + require.Error(t, err) + assert.ErrorIs(t, err, gitfs.ErrSubmodule) + assert.NotContains(t, err.Error(), "read blob") + + // Stat must succeed and classify the gitlink as non-regular (no silent + // zero-size blob fallback). + info, err := fs.Stat("sub") + require.NoError(t, err) + assert.False(t, info.Mode().IsRegular(), "submodule entry must not look like a regular file") + + // Readdir must still list the submodule alongside the regular file. + root, err := fs.Open("/") + require.NoError(t, err) + + names, err := root.Readdirnames(-1) + require.NoError(t, err) + assert.ElementsMatch(t, []string{"azldev.toml", "sub"}, names) +} + +// TestSymlinkEntry verifies that symlink entries are not silently read as file +// content (git stores the link target as the blob body). Open must report a +// clear, stable symlink error rather than handing the target-path string to the +// caller, and Stat must classify the entry as a symlink. +func TestSymlinkEntry(t *testing.T) { + repo, hash := commitTreeWithSymlink(t) + + fs, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + _, err = fs.Open("link") + require.Error(t, err) + assert.ErrorIs(t, err, gitfs.ErrSymlink) + assert.NotContains(t, err.Error(), "azldev.toml", "must not leak the link target as content") + + info, err := fs.Stat("link") + require.NoError(t, err) + assert.False(t, info.Mode().IsRegular(), "symlink must not look like a regular file") + assert.NotZero(t, info.Mode()&os.ModeSymlink, "symlink mode bit must be set") +} + +// commitTreeWithSymlink builds a repo whose root tree contains a symlink entry +// (blob body = target path) next to the regular file it points at. +func commitTreeWithSymlink(t *testing.T) (*gogit.Repository, plumbing.Hash) { + t.Helper() + + repo, err := gogit.Init(memory.NewStorage(), memfs.New()) + require.NoError(t, err) + + targetBlob := storeBlob(t, repo, "name = \"foo\"\n") + linkBlob := storeBlob(t, repo, "azldev.toml") // symlink body is the target path + + entries := []object.TreeEntry{ + {Name: "azldev.toml", Mode: filemode.Regular, Hash: targetBlob}, + {Name: "link", Mode: filemode.Symlink, Hash: linkBlob}, + } + + tree := &object.Tree{Entries: entries} + + treeObj := repo.Storer.NewEncodedObject() + require.NoError(t, tree.Encode(treeObj)) + + treeHash, err := repo.Storer.SetEncodedObject(treeObj) + require.NoError(t, err) + + commit := &object.Commit{ + Author: object.Signature{Name: "t", Email: "t@t.com", When: time.Now()}, + Committer: object.Signature{Name: "t", Email: "t@t.com", When: time.Now()}, + Message: "with symlink", + TreeHash: treeHash, + } + + commitObj := repo.Storer.NewEncodedObject() + require.NoError(t, commit.Encode(commitObj)) + + commitHash, err := repo.Storer.SetEncodedObject(commitObj) + require.NoError(t, err) + + return repo, commitHash +} + +// TestDirSeekResetsOffset verifies that seeking a directory handle back to the +// start lets a subsequent Readdir return the full listing again, rather than an +// empty result because the internal offset was never reset. +func TestDirSeekResetsOffset(t *testing.T) { + repo, hash := newTestRepo(t) + + fs, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + dir, err := fs.Open("/comps") + require.NoError(t, err) + + first, err := dir.Readdir(-1) + require.NoError(t, err) + require.NotEmpty(t, first) + + _, err = dir.Seek(0, io.SeekStart) + require.NoError(t, err) + + again, err := dir.Readdir(-1) + require.NoError(t, err) + assert.Len(t, again, len(first), "rewound directory must list all entries again") +} + +// TestReadOnlyErrorIsExported verifies callers can identify read-only failures +// via errors.Is against the exported sentinel. +func TestReadOnlyErrorIsExported(t *testing.T) { + repo, hash := newTestRepo(t) + + fs, err := gitfs.NewFromCommit(repo, hash) + require.NoError(t, err) + + _, err = fs.Create("/x") + require.Error(t, err) + assert.ErrorIs(t, err, gitfs.ErrReadOnly) +}