-
Notifications
You must be signed in to change notification settings - Fork 310
fix(private-org-sync): batch ls-remote calls per repo instead of per branch #5027
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -307,6 +307,22 @@ func (l location) String() string { | |||||
| return fmt.Sprintf("%s/%s@%s", l.org, l.repo, l.branch) | ||||||
| } | ||||||
|
|
||||||
| func (l location) remoteName() string { | ||||||
| return fmt.Sprintf("%s-%s", l.org, l.repo) | ||||||
| } | ||||||
|
|
||||||
| func (g gitSyncer) remoteURL(l location) (string, error) { | ||||||
| raw := fmt.Sprintf("%s/%s/%s", g.prefix, l.org, l.repo) | ||||||
| parsed, err := url.Parse(raw) | ||||||
| if err != nil { | ||||||
| return "", fmt.Errorf("failed to parse remote URL %q: %w", raw, err) | ||||||
| } | ||||||
| if g.token != "" { | ||||||
| parsed.User = url.User(g.token) | ||||||
| } | ||||||
| return parsed.String(), nil | ||||||
| } | ||||||
|
|
||||||
| // makeGitDir creates a directory for a local git repo used for fetching content | ||||||
| // from the given location and pushing it to any other git repo | ||||||
| func (g gitSyncer) makeGitDir(org, repo string) (string, error) { | ||||||
|
|
@@ -343,75 +359,112 @@ func (g gitSyncer) initRepo(repoDir, org, repo string) error { | |||||
| return nil | ||||||
| } | ||||||
|
|
||||||
| // mirror syncs content from source location to destination one, using a local | ||||||
| // repository in the given path. The `repoDir` must have been previously | ||||||
| // initialized via initRepo(). The git content from the `src` location will | ||||||
| // be fetched to this local repository and then pushed to the `dst` location. | ||||||
| // Multiple `mirror` calls over the same `repoDir` will reuse the content | ||||||
| // fetched in previous calls, acting like a cache. | ||||||
| func (g gitSyncer) mirror(repoDir string, src, dst location) error { | ||||||
| mirrorFields := logrus.Fields{ | ||||||
| "source": src.String(), | ||||||
| "destination": dst.String(), | ||||||
| "local-repo": repoDir, | ||||||
| } | ||||||
| logger := g.logger.WithFields(mirrorFields) | ||||||
| logger.Info("Syncing content between locations") | ||||||
| // syncRepo initializes a local git repo, fetches branch heads from both source | ||||||
| // and destination via ls-remote, and mirrors each branch that needs syncing. | ||||||
| func (g gitSyncer) syncRepo(org, repo, targetOrg, dstRepo string, branches []location) error { | ||||||
| repoLogger := g.logger | ||||||
|
|
||||||
| // We ls-remote destination first thing because when it does not exist | ||||||
| // we do not need to do any of the remaining operations. | ||||||
| logger.Debug("Determining HEAD of destination branch") | ||||||
| destUrlRaw := fmt.Sprintf("%s/%s/%s", g.prefix, dst.org, dst.repo) | ||||||
| destUrl, err := url.Parse(destUrlRaw) | ||||||
| gitDir, err := g.makeGitDir(org, repo) | ||||||
| if err != nil { | ||||||
| logger.WithField("remote-url", destUrlRaw).WithError(err).Error("Failed to construct URL for the destination remote") | ||||||
| return fmt.Errorf("failed to construct URL for the destination remote") | ||||||
| return err | ||||||
| } | ||||||
| if g.token != "" { | ||||||
| destUrl.User = url.User(g.token) | ||||||
|
|
||||||
| if err := g.initRepo(gitDir, org, repo); err != nil { | ||||||
| return err | ||||||
| } | ||||||
|
|
||||||
| // ls-remote destination once per repo | ||||||
| // branches is guaranteed non-empty: the caller groups by (org, repo) via append | ||||||
| dstLocation := location{org: targetOrg, repo: dstRepo, branch: branches[0].branch} | ||||||
| destUrl, err := g.remoteURL(dstLocation) | ||||||
| if err != nil { | ||||||
| repoLogger.WithError(err).Error("Failed to construct URL for the destination remote") | ||||||
| return err | ||||||
| } | ||||||
|
|
||||||
| dstHeads, err := getRemoteBranchHeads(logger, g.git, repoDir, destUrl.String()) | ||||||
| dstHeads, err := getRemoteBranchHeads(repoLogger, g.git, gitDir, destUrl) | ||||||
| if err != nil { | ||||||
| message := "destination repository does not exist or we cannot access it" | ||||||
| if g.failOnNonexistentDst { | ||||||
| logger.Errorf("%s", message) | ||||||
| repoLogger.Errorf("%s", message) | ||||||
| return fmt.Errorf("%s", message) | ||||||
| } | ||||||
|
|
||||||
| logger.Warn(message) | ||||||
| repoLogger.Warn(message) | ||||||
| return nil | ||||||
| } | ||||||
| dstCommitHash := dstHeads[dst.branch] | ||||||
|
|
||||||
| srcRemote := fmt.Sprintf("%s-%s", src.org, src.repo) | ||||||
|
|
||||||
| logger.Debug("Determining HEAD of source branch") | ||||||
| srcHeads, err := getRemoteBranchHeads(logger, withRetryOnNonzero(g.git, 5), repoDir, srcRemote) | ||||||
| // ls-remote source once per repo | ||||||
| srcRemote := branches[0].remoteName() | ||||||
| srcHeads, err := getRemoteBranchHeads(repoLogger, withRetryOnNonzero(g.git, 5), gitDir, srcRemote) | ||||||
|
petr-muller marked this conversation as resolved.
|
||||||
| if err != nil { | ||||||
| logger.WithError(err).Error("Failed to determine branch HEADs in source") | ||||||
| return fmt.Errorf("failed to determine branch HEADs in source") | ||||||
| repoLogger.WithError(err).Error("Failed to determine branch HEADs in source") | ||||||
| return fmt.Errorf("failed to determine branch HEADs in source: %w", err) | ||||||
| } | ||||||
| srcCommitHash, ok := srcHeads[src.branch] | ||||||
| if !ok { | ||||||
| if api.FlavorForBranch(src.branch) == "misc" { | ||||||
| logger.Warn("Non-release branch does not exist in source remote, likely deleted; skipping") | ||||||
| return nil | ||||||
|
|
||||||
| initialDepth := startDepth | ||||||
| if len(dstHeads) == 0 { | ||||||
| repoLogger.Info("Destination is an empty repo: will do a full fetch right away") | ||||||
| initialDepth = fullFetch | ||||||
| } | ||||||
|
|
||||||
| var errs []error | ||||||
| for _, source := range branches { | ||||||
| branchSyncer := g | ||||||
| branchSyncer.logger = config.LoggerForInfo(config.Info{ | ||||||
| Metadata: api.Metadata{ | ||||||
| Org: source.org, | ||||||
| Repo: source.repo, | ||||||
| Branch: source.branch, | ||||||
| }, | ||||||
| }) | ||||||
|
|
||||||
| destination := location{org: targetOrg, repo: dstRepo, branch: source.branch} | ||||||
| srcCommitHash, srcFound := srcHeads[source.branch] | ||||||
| if !srcFound { | ||||||
| if api.FlavorForBranch(source.branch) == "misc" { | ||||||
| branchSyncer.logger.Warn("Non-release branch does not exist in source remote, likely deleted; skipping") | ||||||
| continue | ||||||
| } | ||||||
| branchSyncer.logger.Error("Release/main branch does not exist in source remote; this may indicate the branch was deleted") | ||||||
| errs = append(errs, fmt.Errorf("%s: branch does not exist in source remote", source.String())) | ||||||
| continue | ||||||
| } | ||||||
|
|
||||||
| if err := branchSyncer.mirror(gitDir, source, destination, srcCommitHash, dstHeads[source.branch], initialDepth); err != nil { | ||||||
| errs = append(errs, fmt.Errorf("%s->%s: %w", source.String(), destination.String(), err)) | ||||||
|
petr-muller marked this conversation as resolved.
|
||||||
| } | ||||||
| logger.Error("Release/main branch does not exist in source remote; this may indicate the branch was deleted") | ||||||
| return fmt.Errorf("branch does not exist in source remote") | ||||||
| } | ||||||
|
|
||||||
| return utilerrors.NewAggregate(errs) | ||||||
| } | ||||||
|
|
||||||
| // mirror syncs a single branch from source to destination. The `repoDir` must | ||||||
| // have been previously initialized with git init and remote setup. Multiple | ||||||
| // `mirror` calls over the same `repoDir` will reuse the content fetched in | ||||||
| // previous calls, acting like a cache. | ||||||
| func (g gitSyncer) mirror(repoDir string, src, dst location, srcCommitHash string, dstCommitHash string, initialDepth int) error { | ||||||
| mirrorFields := logrus.Fields{ | ||||||
| "source": src.String(), | ||||||
| "destination": dst.String(), | ||||||
| "local-repo": repoDir, | ||||||
| } | ||||||
| logger := g.logger.WithFields(mirrorFields) | ||||||
| logger.Info("Syncing content between locations") | ||||||
|
|
||||||
| destUrl, err := g.remoteURL(dst) | ||||||
| if err != nil { | ||||||
| logger.WithError(err).Error("Failed to construct URL for the destination remote") | ||||||
| return err | ||||||
| } | ||||||
|
|
||||||
| srcRemote := src.remoteName() | ||||||
|
|
||||||
| if srcCommitHash == dstCommitHash { | ||||||
| logger.Info("Branches are already in sync") | ||||||
| return nil | ||||||
| } | ||||||
|
|
||||||
| depth := startDepth | ||||||
| if len(dstHeads) == 0 { | ||||||
| logger.Info("Destination is an empty repo: will do a full fetch right away") | ||||||
| depth = fullFetch | ||||||
| } | ||||||
| depth := initialDepth | ||||||
|
|
||||||
| push := func() (retry func() error, err error) { | ||||||
| cmd := []string{"push", "--tags"} | ||||||
|
|
@@ -420,7 +473,7 @@ func (g gitSyncer) mirror(repoDir string, src, dst location) error { | |||||
| cmd = append(cmd, "--dry-run") | ||||||
| logDryRun = " (dry-run)" | ||||||
| } | ||||||
| cmd = append(cmd, destUrl.String(), fmt.Sprintf("FETCH_HEAD:refs/heads/%s", dst.branch)) | ||||||
| cmd = append(cmd, destUrl, fmt.Sprintf("FETCH_HEAD:refs/heads/%s", dst.branch)) | ||||||
| logger.Infof("Pushing to destination%s", logDryRun) | ||||||
|
|
||||||
| out, exitCode, err := g.git(logger, repoDir, cmd...) | ||||||
|
|
@@ -437,7 +490,7 @@ func (g gitSyncer) mirror(repoDir string, src, dst location) error { | |||||
|
|
||||||
| if depth == unshallow { | ||||||
| logger.Info("Trying to fetch source and destination full history and perform a merge") | ||||||
| if err := mergeRemotesAndPush(logger, g.git, repoDir, srcRemote, dst.branch, destUrl.String(), g.confirm, g.gitName, g.gitEmail); err != nil { | ||||||
| if err := mergeRemotesAndPush(logger, g.git, repoDir, srcRemote, dst.branch, destUrl, g.confirm, g.gitName, g.gitEmail); err != nil { | ||||||
| return nil, fmt.Errorf("failed to fetch remote and merge: %w", err) | ||||||
| } | ||||||
| return nil, nil | ||||||
|
|
@@ -453,7 +506,7 @@ func (g gitSyncer) mirror(repoDir string, src, dst location) error { | |||||
| switch strings.TrimSpace(shallowOut) { | ||||||
| case "false": | ||||||
| logger.Info("Trying to fetch source and destination full history and perform a merge") | ||||||
| if err := mergeRemotesAndPush(logger, g.git, repoDir, srcRemote, dst.branch, destUrl.String(), g.confirm, g.gitName, g.gitEmail); err != nil { | ||||||
| if err := mergeRemotesAndPush(logger, g.git, repoDir, srcRemote, dst.branch, destUrl, g.confirm, g.gitName, g.gitEmail); err != nil { | ||||||
| return nil, fmt.Errorf("failed to fetch remote and merge: %w", err) | ||||||
| } | ||||||
| return nil, nil | ||||||
|
|
@@ -664,41 +717,12 @@ func main() { | |||||
| } | ||||||
|
|
||||||
| for key, branches := range grouped { | ||||||
| gitDir, err := syncer.makeGitDir(key.org, key.repo) | ||||||
| if err != nil { | ||||||
| for _, source := range branches { | ||||||
| errs = append(errs, fmt.Errorf("%s: %w", source.String(), err)) | ||||||
| } | ||||||
| continue | ||||||
| } | ||||||
|
|
||||||
| syncer.logger = logrus.WithFields(logrus.Fields{ | ||||||
| "org": key.org, | ||||||
| "repo": key.repo, | ||||||
| }) | ||||||
| if err := syncer.initRepo(gitDir, key.org, key.repo); err != nil { | ||||||
| for _, source := range branches { | ||||||
| errs = append(errs, fmt.Errorf("%s: %w", source.String(), err)) | ||||||
| } | ||||||
| continue | ||||||
| } | ||||||
| syncer.logger = logrus.WithFields(logrus.Fields{"org": key.org, "repo": key.repo}) | ||||||
|
|
||||||
| for _, source := range branches { | ||||||
| syncer.logger = config.LoggerForInfo(config.Info{ | ||||||
| Metadata: api.Metadata{ | ||||||
| Org: source.org, | ||||||
| Repo: source.repo, | ||||||
| Branch: source.branch, | ||||||
| }, | ||||||
| }) | ||||||
| dstRepo := privateorg.MirroredRepoName(key.org, key.repo, flattenedOrgs) | ||||||
|
|
||||||
| destination := source | ||||||
| destination.org = o.targetOrg | ||||||
| destination.repo = privateorg.MirroredRepoName(source.org, source.repo, flattenedOrgs) | ||||||
|
|
||||||
| if err := syncer.mirror(gitDir, source, destination); err != nil { | ||||||
| errs = append(errs, fmt.Errorf("%s->%s: %w", source.String(), destination.String(), err)) | ||||||
| } | ||||||
| if err := syncer.syncRepo(key.org, key.repo, o.targetOrg, dstRepo, branches); err != nil { | ||||||
| errs = append(errs, err) | ||||||
|
||||||
| errs = append(errs, err) | |
| errs = append(errs, fmt.Errorf("%s/%s: %w", key.org, key.repo, err)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
syncRepo()runsmakeGitDir()+initRepo()before checking destination accessibility vials-remote. For the common “destination repo missing / no access and failOnNonexistentDst=false” case, this does unnecessary local work (and can leave behind initialized dirs when--git-diris persistent). Consider doing the destinationls-remotefirst (it can run without a local repo) and only initializing the local repo if the destination check succeeds.