From 87f41b78df6efe38b7623308eb2c04a83c207752 Mon Sep 17 00:00:00 2001 From: Paul Johnston Date: Mon, 18 May 2026 23:23:43 -0600 Subject: [PATCH] starlark_repository: capture upstream BUILD files as starlark_package targets Adds a new "preserve" build_file_generation mode that runs cmd/preserve_packages between fetch_repo and gazelle. The tool mirrors fetch_repo -clean semantics (deletes WORKSPACE / MODULE.bazel / etc.) but renames BUILD and BUILD.bazel to BUILD.package and BUILD.bazel.package so the upstream package layout is preserved as opaque files rather than discarded. The starlarkrepository gazelle extension grows a parallel pair of kinds alongside starlark_module / starlark_module_library: - starlark_package: one per BUILD*.package, src points at the preserved file. Name is the dot-sanitized filename (BUILD.package -> BUILD_package) to avoid clashing with starlark_module names derived from .bzl files. - starlark_package_library: emitted at the configured root, named "starlark_packages" (not "packages", which would clash with packages.bzl). Its `packages` attr is populated in Resolve via the RuleIndex. starlark_repository.archive/.local default build_file_generation to "preserve" so users get the new behavior automatically; they can still override. --- cmd/preserve_packages/BUILD.bazel | 14 ++ cmd/preserve_packages/main.go | 69 +++++++++ extensions/starlark_repository.bzl | 14 ++ language/starlarkrepository/language.go | 133 ++++++++++++++++-- rules/private/proto_repository_tools.bzl | 6 + rules/private/proto_repository_tools_srcs.bzl | 2 + rules/proto/proto_repository.bzl | 31 +++- rules/starlark_package.bzl | 28 ++++ rules/starlark_package_library.bzl | 43 ++++++ 9 files changed, 325 insertions(+), 15 deletions(-) create mode 100644 cmd/preserve_packages/BUILD.bazel create mode 100644 cmd/preserve_packages/main.go create mode 100644 rules/starlark_package.bzl create mode 100644 rules/starlark_package_library.bzl diff --git a/cmd/preserve_packages/BUILD.bazel b/cmd/preserve_packages/BUILD.bazel new file mode 100644 index 000000000..8bc1aa3d4 --- /dev/null +++ b/cmd/preserve_packages/BUILD.bazel @@ -0,0 +1,14 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library") + +go_library( + name = "preserve_packages_lib", + srcs = ["main.go"], + importpath = "github.com/stackb/rules_proto/v4/cmd/preserve_packages", + visibility = ["//visibility:private"], +) + +go_binary( + name = "preserve_packages", + embed = [":preserve_packages_lib"], + visibility = ["//visibility:public"], +) diff --git a/cmd/preserve_packages/main.go b/cmd/preserve_packages/main.go new file mode 100644 index 000000000..a27c1a16c --- /dev/null +++ b/cmd/preserve_packages/main.go @@ -0,0 +1,69 @@ +// preserve_packages walks a fetched external repository and rewrites the +// upstream files that fetch_repo -clean would otherwise delete. BUILD and +// BUILD.bazel are renamed to BUILD.package / BUILD.bazel.package so the +// starlarkrepository gazelle extension can capture them as starlark_package +// rules; everything else fetch_repo -clean removes (MODULE.bazel, +// WORKSPACE, …) is deleted outright. +// +// Intended to be invoked from rules/proto/proto_repository.bzl when +// build_file_generation = "preserve". +package main + +import ( + "flag" + "fmt" + "io/fs" + "log" + "os" + "path/filepath" +) + +var renameMap = map[string]string{ + "BUILD": "BUILD.package", + "BUILD.bazel": "BUILD.bazel.package", +} + +var deleteSet = map[string]bool{ + "MODULE.bazel": true, + "MODULE.bazel.lock": true, + "WORKSPACE": true, + "WORKSPACE.bazel": true, + "WORKSPACE.bzlmod": true, +} + +func main() { + root := flag.String("root", "", "repo root to walk (required)") + flag.Parse() + if *root == "" { + log.Fatal("preserve_packages: -root is required") + } + + if err := run(*root); err != nil { + log.Fatalf("preserve_packages: %v", err) + } +} + +func run(root string) error { + return filepath.Walk(root, func(path string, info fs.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() { + return nil + } + name := info.Name() + if dst, ok := renameMap[name]; ok { + target := filepath.Join(filepath.Dir(path), dst) + if err := os.Rename(path, target); err != nil { + return fmt.Errorf("rename %s -> %s: %w", path, target, err) + } + return nil + } + if deleteSet[name] { + if err := os.Remove(path); err != nil { + return fmt.Errorf("remove %s: %w", path, err) + } + } + return nil + }) +} diff --git a/extensions/starlark_repository.bzl b/extensions/starlark_repository.bzl index 12beb0e72..965832c8c 100644 --- a/extensions/starlark_repository.bzl +++ b/extensions/starlark_repository.bzl @@ -39,6 +39,18 @@ def _extension_metadata( **metadata_kwargs ) +def _default_preserve(kwargs): + """Sets build_file_generation = "preserve" by default. + + starlark_repository exists specifically to capture upstream module + contents for introspection. The "preserve" mode is the only mode that + produces the starlark_package_library aggregator, so it's the desired + default. Users can still override (e.g. to "on" or "clean") by passing + build_file_generation explicitly on the tag. + """ + if not kwargs.get("build_file_generation"): + kwargs["build_file_generation"] = "preserve" + def _starlark_repository_impl(module_ctx): # named_archives / named_locals are dicts where V is the kwargs for # the underlying "starlark_repository" repo rule and K is the tag.name @@ -58,6 +70,7 @@ def _starlark_repository_impl(module_ctx): for attr in _starlark_repository_archive_attrs.keys() if hasattr(tag, attr) } + _default_preserve(kwargs) named_archives[tag.name] = kwargs for tag in module.tags.local: kwargs = { @@ -69,6 +82,7 @@ def _starlark_repository_impl(module_ctx): # The user-facing attr is "path"; the underlying repo rule expects # "local_path" (a sibling of "urls" / "commit" / "version"). kwargs["local_path"] = kwargs.pop("path") + _default_preserve(kwargs) named_locals[tag.name] = kwargs # declare a repository rule foreach one diff --git a/language/starlarkrepository/language.go b/language/starlarkrepository/language.go index 841f4931c..7de18fe8e 100644 --- a/language/starlarkrepository/language.go +++ b/language/starlarkrepository/language.go @@ -45,16 +45,20 @@ import ( ) const ( - languageName = "starlarkrepository" - repoNameDirectiveName = languageName + "_repo_name" - rootDirectiveName = languageName + "_root" - excludeDirectiveName = languageName + "_exclude" - logFileDirectiveName = languageName + "_log_file" - starlarkModuleKind = "starlark_module" - starlarkModuleLibraryKind = "starlark_module_library" - starlarkModuleLibraryName = "modules" - fileType = ".bzl" - visibilityPublic = "//visibility:public" + languageName = "starlarkrepository" + repoNameDirectiveName = languageName + "_repo_name" + rootDirectiveName = languageName + "_root" + excludeDirectiveName = languageName + "_exclude" + logFileDirectiveName = languageName + "_log_file" + starlarkModuleKind = "starlark_module" + starlarkModuleLibraryKind = "starlark_module_library" + starlarkModuleLibraryName = "modules" + starlarkPackageKind = "starlark_package" + starlarkPackageLibraryKind = "starlark_package_library" + starlarkPackageLibraryName = "starlark_packages" + bzlFileType = ".bzl" + packageFileType = ".package" + visibilityPublic = "//visibility:public" ) var ( @@ -73,6 +77,17 @@ var ( NonEmptyAttrs: map[string]bool{"src": true}, }, } + starlarkPackageLibraryKindInfo = map[string]rule.KindInfo{ + starlarkPackageLibraryKind: { + NonEmptyAttrs: map[string]bool{"packages": true}, + ResolveAttrs: map[string]bool{"packages": true}, + }, + } + starlarkPackageKindInfo = map[string]rule.KindInfo{ + starlarkPackageKind: { + NonEmptyAttrs: map[string]bool{"src": true}, + }, + } starlarkModuleLibraryLoadInfo = rule.LoadInfo{ Name: "@build_stack_rules_proto//rules:starlark_module_library.bzl", Symbols: []string{starlarkModuleLibraryKind}, @@ -81,6 +96,14 @@ var ( Name: "@build_stack_rules_proto//rules:starlark_module.bzl", Symbols: []string{starlarkModuleKind}, } + starlarkPackageLibraryLoadInfo = rule.LoadInfo{ + Name: "@build_stack_rules_proto//rules:starlark_package_library.bzl", + Symbols: []string{starlarkPackageLibraryKind}, + } + starlarkPackageLoadInfo = rule.LoadInfo{ + Name: "@build_stack_rules_proto//rules:starlark_package.bzl", + Symbols: []string{starlarkPackageKind}, + } ) type starlarkRepositoryLang struct { @@ -201,6 +224,8 @@ func (*starlarkRepositoryLang) Kinds() map[string]rule.KindInfo { kinds := map[string]rule.KindInfo{} maps.Copy(kinds, starlarkModuleLibraryKindInfo) maps.Copy(kinds, starlarkModuleKindInfo) + maps.Copy(kinds, starlarkPackageLibraryKindInfo) + maps.Copy(kinds, starlarkPackageKindInfo) return kinds } @@ -211,6 +236,8 @@ func (*starlarkRepositoryLang) Loads() []rule.LoadInfo { return []rule.LoadInfo{ starlarkModuleLibraryLoadInfo, starlarkModuleLoadInfo, + starlarkPackageLibraryLoadInfo, + starlarkPackageLoadInfo, } } @@ -229,6 +256,8 @@ func (ext *starlarkRepositoryLang) Imports(c *config.Config, r *rule.Rule, f *ru switch r.Kind() { case starlarkModuleKind: return ext.starlarkModuleImports(c, r, f) + case starlarkPackageKind: + return ext.starlarkPackageImports(c, r, f) default: return nil } @@ -242,6 +271,13 @@ func (ext *starlarkRepositoryLang) starlarkModuleImports(_ *config.Config, r *ru } } +func (ext *starlarkRepositoryLang) starlarkPackageImports(_ *config.Config, r *rule.Rule, f *rule.File) []resolve.ImportSpec { + return []resolve.ImportSpec{ + {Lang: languageName, Imp: fmt.Sprintf("//%s:%s", f.Pkg, r.AttrString("src"))}, + {Lang: languageName, Imp: starlarkPackageKind}, + } +} + // Embeds returns a list of labels of rules that the given rule embeds. If a // rule is embedded by another importable rule of the same language, only the // embedding rule will be indexed. The embedding rule will inherit the imports @@ -261,6 +297,8 @@ func (ext *starlarkRepositoryLang) Resolve(c *config.Config, ix *resolve.RuleInd switch r.Kind() { case starlarkModuleLibraryKind: ext.starlarkModuleLibraryResolve(c, ix, rc, r, importsRaw, from) + case starlarkPackageLibraryKind: + ext.starlarkPackageLibraryResolve(c, ix, rc, r, importsRaw, from) } } @@ -309,10 +347,24 @@ func (ext *starlarkRepositoryLang) GenerateRules(args language.GenerateArgs) (re log.Printf("generated %s %s/%s", r.Kind(), args.Rel, r.Name()) } + for _, f := range args.RegularFiles { + if !isBuildPackageFile(f) { + continue + } + r, imports := ext.starlarkPackageRule(args, f) + result.Gen = append(result.Gen, r) + result.Imports = append(result.Imports, imports) + log.Printf("generated %s %s/%s", r.Kind(), args.Rel, r.Name()) + } + if _, ok := getMatchingRoot(args.Rel, ext.roots); ok { r, imports := ext.starlarkModuleLibraryRule(args) result.Gen = append(result.Gen, r) result.Imports = append(result.Imports, imports) + + r, imports = ext.starlarkPackageLibraryRule(args) + result.Gen = append(result.Gen, r) + result.Imports = append(result.Imports, imports) } return @@ -344,7 +396,7 @@ func mustListFiles(logf LogFunc, dir string) []string { } func (ext *starlarkRepositoryLang) starlarkModuleRule(args language.GenerateArgs, src string, loadStmts []*build.LoadStmt) (*rule.Rule, []any) { - name := strings.TrimSuffix(src, fileType) + name := strings.TrimSuffix(src, bzlFileType) ext.logf("generating %s rule for %s //%s:%s", starlarkModuleKind, src, args.Rel, name) loads := make([]string, 0, len(loadStmts)) @@ -376,6 +428,59 @@ func (ext *starlarkRepositoryLang) starlarkModuleLibraryRule(_ language.Generate return r, []any{} } +func (ext *starlarkRepositoryLang) starlarkPackageRule(args language.GenerateArgs, src string) (*rule.Rule, []any) { + // Sanitize the filename into a target name: "BUILD.package" -> "BUILD_package", + // "BUILD.bazel.package" -> "BUILD_bazel_package". This avoids clashing with + // `pkg.bzl`-derived `starlark_module(name = "pkg")` targets that exist in many + // Starlark codebases. + name := strings.ReplaceAll(src, ".", "_") + ext.logf("generating %s rule for %s //%s:%s", starlarkPackageKind, src, args.Rel, name) + + r := rule.NewRule(starlarkPackageKind, name) + r.SetAttr("src", src) + r.SetAttr("visibility", []string{visibilityPublic}) + + return r, []any{} +} + +func (ext *starlarkRepositoryLang) starlarkPackageLibraryRule(_ language.GenerateArgs) (*rule.Rule, []any) { + r := rule.NewRule(starlarkPackageLibraryKind, starlarkPackageLibraryName) + if ext.bazelVersion != "" { + r.SetAttr("bazelversion", ext.bazelVersion) + } + if len(ext.bazelIgnore) > 0 { + r.SetAttr("bazelignore", ext.bazelIgnore) + } + r.SetAttr("visibility", []string{visibilityPublic}) + return r, []any{} +} + +func (ext *starlarkRepositoryLang) starlarkPackageLibraryResolve(c *config.Config, ix *resolve.RuleIndex, _ *repo.RemoteCache, r *rule.Rule, _ interface{}, from label.Label) { + root, isRoot := getMatchingRoot(from.Pkg, ext.roots) + if !isRoot { + ext.logf("skipping packages resolution for %v (not a root: %v)", from, ext.roots) + return + } + + var packages []string + + matches := ix.FindRulesByImportWithConfig(c, resolve.ImportSpec{ + Lang: languageName, + Imp: starlarkPackageKind, + }, languageName) + for _, m := range matches { + depLabel := m.Label.Rel(from.Repo, from.Pkg) + if strings.HasPrefix(depLabel.Pkg, root) { + packages = append(packages, depLabel.String()) + } + } + + if len(packages) > 0 { + sort.Strings(packages) + r.SetAttr("packages", packages) + } +} + func (ext *starlarkRepositoryLang) starlarkModuleLibraryResolve(c *config.Config, ix *resolve.RuleIndex, _ *repo.RemoteCache, r *rule.Rule, _ interface{}, from label.Label) { // only perform resolve if this is one of the roots root, isRoot := getMatchingRoot(from.Pkg, ext.roots) @@ -511,7 +616,11 @@ func readFileLines(filePath string, logf LogFunc) ([]string, error) { } func isBzlSourceFile(f string) bool { - return strings.HasSuffix(f, fileType) && !ignoreSuffix.Matches(f) + return strings.HasSuffix(f, bzlFileType) && !ignoreSuffix.Matches(f) +} + +func isBuildPackageFile(f string) bool { + return f == "BUILD.package" || f == "BUILD.bazel.package" } func getBzlFileLoadsStmts(path, rel string, logf LogFunc) (*build.File, []*build.LoadStmt, error) { diff --git a/rules/private/proto_repository_tools.bzl b/rules/private/proto_repository_tools.bzl index 957e01c90..c0fd00564 100644 --- a/rules/private/proto_repository_tools.bzl +++ b/rules/private/proto_repository_tools.bzl @@ -24,6 +24,11 @@ filegroup( name = "gazelle", srcs = ["bin/gazelle{extension}"], ) + +filegroup( + name = "preserve_packages", + srcs = ["bin/preserve_packages{extension}"], +) """ def _proto_repository_tools_impl(ctx): @@ -90,6 +95,7 @@ def _proto_repository_tools_impl(ctx): "-asmflags", "all=-trimpath=" + env["GOPATH"], "github.com/stackb/rules_proto/v4/cmd/gazelle", + "github.com/stackb/rules_proto/v4/cmd/preserve_packages", ] result = env_execute(ctx, args, environment = env) if result.return_code: diff --git a/rules/private/proto_repository_tools_srcs.bzl b/rules/private/proto_repository_tools_srcs.bzl index af796bcc2..8651c1557 100644 --- a/rules/private/proto_repository_tools_srcs.bzl +++ b/rules/private/proto_repository_tools_srcs.bzl @@ -21,6 +21,8 @@ PROTO_REPOSITORY_TOOLS_SRCS = [ "@build_stack_rules_proto//cmd/gazelle:wspace.go", "@build_stack_rules_proto//cmd/gencopy:BUILD.bazel", "@build_stack_rules_proto//cmd/gencopy:gencopy.go", + "@build_stack_rules_proto//cmd/preserve_packages:BUILD.bazel", + "@build_stack_rules_proto//cmd/preserve_packages:main.go", "@build_stack_rules_proto//example:BUILD.bazel", "@build_stack_rules_proto//example/assets:BUILD.bazel", "@build_stack_rules_proto//example/assets:api.pb.go", diff --git a/rules/proto/proto_repository.bzl b/rules/proto/proto_repository.bzl index 071649c7e..a06dd8d3d 100644 --- a/rules/proto/proto_repository.bzl +++ b/rules/proto/proto_repository.bzl @@ -74,7 +74,7 @@ def _proto_repository_impl(ctx): watch(ctx, go_env_cache) fetch_repo = str(ctx.path(Label("@bazel_gazelle_go_repository_tools//:bin/fetch_repo{}".format(executable_extension(ctx))))) watch(ctx, fetch_repo) - generate = ctx.attr.build_file_generation in ["on", "clean"] + generate = ctx.attr.build_file_generation in ["on", "clean", "preserve"] _gazelle = "@proto_repository_tools//:bin/gazelle{}".format(executable_extension(ctx)) @@ -254,6 +254,7 @@ def _proto_repository_impl(ctx): fail("%s: %s" % (ctx.name, result.stderr)) _delete_files(ctx, ctx.attr.deleted_files) + _preserve_packages(ctx) if DEBUG: _find(ctx) @@ -422,6 +423,25 @@ def _delete_files(ctx, files_to_delete): # print("delete files result:", result.stdout) +def _preserve_packages(ctx): + """Renames BUILD files to .package suffix for build_file_generation = "preserve". + + Mirrors fetch_repo -clean semantics (deletes MODULE.bazel / WORKSPACE / + etc.) but renames BUILD / BUILD.bazel to BUILD.package / BUILD.bazel.package + so the starlarkrepository gazelle extension can capture them as + starlark_package rules. + """ + if ctx.attr.build_file_generation != "preserve": + return + + preserve_packages = str(ctx.path(Label("@proto_repository_tools//:bin/preserve_packages{}".format(executable_extension(ctx))))) + result = env_execute( + ctx, + [preserve_packages, "-root", ctx.path("")], + ) + if result.return_code: + fail("preserve_packages failed for %s: %s" % (ctx.name, result.stderr)) + def _generate_package_info(*, importpath, version): package_name = importpath @@ -620,17 +640,22 @@ _go_repository_attrs = { ), "build_file_generation": attr.string( default = "auto", - doc = """One of `"auto"`, `"on"`, `"off"`, `"clean"`. + doc = """One of `"auto"`, `"on"`, `"off"`, `"clean"`, `"preserve"`. Whether Gazelle should generate build files in the repository. In `"auto"` mode, Gazelle will run if there is no build file in the repository root directory. In `"clean"` mode, Gazelle will first remove any existing build - files.""", + files. In `"preserve"` mode (used by `starlark_repository`), the upstream + `BUILD` / `BUILD.bazel` files are renamed to `BUILD.package` / + `BUILD.bazel.package` (and `MODULE.bazel` / `WORKSPACE*` files are deleted) + so the starlarkrepository gazelle extension can emit `starlark_package` + rules referencing the preserved files.""", values = [ "on", "auto", "off", "clean", + "preserve", ], ), "build_naming_convention": attr.string( diff --git a/rules/starlark_package.bzl b/rules/starlark_package.bzl new file mode 100644 index 000000000..84e736672 --- /dev/null +++ b/rules/starlark_package.bzl @@ -0,0 +1,28 @@ +"""starlark_package.bzl is similar to bzl_library but for a BUILD.package file.""" + +StarlarkPackageInfo = provider( + "Information about a single .bzl file.", + fields = { + "label": "Label: The label of the target rule", + "src": "File: The .bzl file", + }, +) + +def _starlark_package_impl(ctx): + return [ + StarlarkPackageInfo( + label = ctx.label, + src = ctx.file.src, + ), + ] + +starlark_package = rule( + implementation = _starlark_package_impl, + attrs = { + "src": attr.label( + doc = "the .package source file", + allow_single_file = True, + ), + }, + provides = [StarlarkPackageInfo], +) diff --git a/rules/starlark_package_library.bzl b/rules/starlark_package_library.bzl new file mode 100644 index 000000000..d30406f7e --- /dev/null +++ b/rules/starlark_package_library.bzl @@ -0,0 +1,43 @@ +"""starlark_package_library.bzl is similar to bzl_library but also provides load statement foreach file.""" + +load("//rules:starlark_package.bzl", "StarlarkPackageInfo") + +StarlarkPackageLibraryInfo = provider( + "Information on a set of starlark packages. This is a flat list, non-transitive.", + fields = { + "label": "The label of the target rule", + "packages": "List[StarlarkPackageInfo]: package deps of this rule", + "srcs": "List[File]: source files for the packages, for convenience", + "bazelignore": "List[str] value of ctx.attr.bazelignore", + "bazelversion": "str: the value of ctx.attr.bazelversion", + }, +) + +def _starlark_package_library_impl(ctx): + packages = [m[StarlarkPackageInfo] for m in ctx.attr.packages] + return [ + StarlarkPackageLibraryInfo( + label = ctx.label, + bazelignore = ctx.attr.bazelignore, + bazelversion = ctx.attr.bazelversion, + packages = packages, + srcs = [m.src for m in packages], + ), + ] + +starlark_package_library = rule( + implementation = _starlark_package_library_impl, + attrs = { + "bazelignore": attr.string_list( + doc = "contents of the .bazelignore file, if present", + ), + "bazelversion": attr.string( + doc = "contents of the .bazelversion file, if present", + ), + "packages": attr.label_list( + doc = "list of starlark_package rule dependencies.", + providers = [StarlarkPackageInfo], + ), + }, + provides = [StarlarkPackageLibraryInfo], +)