Skip to content

Commit 136df2b

Browse files
petr-mullerclaude
andcommitted
fix(private-org-sync): batch ls-remote calls per repo instead of per branch
Previously, mirror() called ls-remote for both source and destination on every branch, resulting in 2*N network calls per repo where N is the number of branches. Since ls-remote --heads returns ALL branches at once, we can call it once per repo and pass the results to mirror(). This moves git init, remote setup, and ls-remote calls from mirror() into the main loop where repos are already grouped, reducing network calls from 2*N to 2 per repo. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 7c4fa01 commit 136df2b

2 files changed

Lines changed: 99 additions & 148 deletions

File tree

cmd/private-org-sync/main.go

Lines changed: 55 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -364,13 +364,13 @@ func (g gitSyncer) initRepo(repoDir, org, repo string) error {
364364
return nil
365365
}
366366

367-
// mirror syncs content from source location to destination one, using a local
368-
// repository in the given path. The `repoDir` must have been previously
369-
// initialized via initRepo(). The git content from the `src` location will
370-
// be fetched to this local repository and then pushed to the `dst` location.
367+
// mirror syncs a single branch from source to destination, using pre-fetched
368+
// branch head information. The `repoDir` must have been previously initialized
369+
// with git init and remote setup. The `srcHeads` and `dstHeads` must have been
370+
// obtained from ls-remote calls against the source and destination repos.
371371
// Multiple `mirror` calls over the same `repoDir` will reuse the content
372372
// fetched in previous calls, acting like a cache.
373-
func (g gitSyncer) mirror(repoDir string, src, dst location) error {
373+
func (g gitSyncer) mirror(repoDir string, src, dst location, srcHeads, dstHeads RemoteBranchHeads, destUrl *url.URL) error {
374374
mirrorFields := logrus.Fields{
375375
"source": src.String(),
376376
"destination": dst.String(),
@@ -379,43 +379,13 @@ func (g gitSyncer) mirror(repoDir string, src, dst location) error {
379379
logger := g.logger.WithFields(mirrorFields)
380380
logger.Info("Syncing content between locations")
381381

382-
// We ls-remote destination first thing because when it does not exist
383-
// we do not need to do any of the remaining operations.
384-
logger.Debug("Determining HEAD of destination branch")
385-
destUrlRaw := fmt.Sprintf("%s/%s/%s", g.prefix, dst.org, dst.repo)
386-
destUrl, err := url.Parse(destUrlRaw)
387-
if err != nil {
388-
logger.WithField("remote-url", destUrlRaw).WithError(err).Error("Failed to construct URL for the destination remote")
389-
return fmt.Errorf("failed to construct URL for the destination remote")
390-
}
391-
if g.token != "" {
392-
destUrl.User = url.User(g.token)
393-
}
394-
395-
dstHeads, err := getRemoteBranchHeads(logger, g.git, repoDir, destUrl.String())
396-
if err != nil {
397-
message := "destination repository does not exist or we cannot access it"
398-
if g.failOnNonexistentDst {
399-
logger.Errorf("%s", message)
400-
return fmt.Errorf("%s", message)
401-
}
402-
403-
logger.Warn(message)
404-
return nil
405-
}
406382
dstCommitHash := dstHeads[dst.branch]
407383

408384
srcRemote := fmt.Sprintf("%s-%s", src.org, src.repo)
409385

410-
logger.Debug("Determining HEAD of source branch")
411-
srcHeads, err := getRemoteBranchHeads(logger, withRetryOnNonzero(g.git, 5), repoDir, srcRemote)
412-
if err != nil {
413-
logger.WithError(err).Error("Failed to determine branch HEADs in source")
414-
return fmt.Errorf("failed to determine branch HEADs in source")
415-
}
416386
srcCommitHash, ok := srcHeads[src.branch]
417387
if !ok {
418-
logger.WithError(err).Error("Branch does not exist in source remote")
388+
logger.Error("Branch does not exist in source remote")
419389
return fmt.Errorf("branch does not exist in source remote")
420390
}
421391

@@ -681,6 +651,14 @@ func main() {
681651
}
682652

683653
for key, branches := range grouped {
654+
repoLogger := logrus.WithFields(logrus.Fields{"org": key.org, "repo": key.repo})
655+
syncer.logger = repoLogger
656+
657+
dstRepo := key.repo
658+
if !flattenedOrgs.Has(key.org) {
659+
dstRepo = fmt.Sprintf("%s-%s", key.org, key.repo)
660+
}
661+
684662
gitDir, err := syncer.makeGitDir(key.org, key.repo)
685663
if err != nil {
686664
for _, source := range branches {
@@ -689,17 +667,52 @@ func main() {
689667
continue
690668
}
691669

692-
syncer.logger = logrus.WithFields(logrus.Fields{
693-
"org": key.org,
694-
"repo": key.repo,
695-
})
696670
if err := syncer.initRepo(gitDir, key.org, key.repo); err != nil {
697671
for _, source := range branches {
698672
errs = append(errs, fmt.Errorf("%s: %w", source.String(), err))
699673
}
700674
continue
701675
}
702676

677+
// ls-remote destination once per repo
678+
destUrlRaw := fmt.Sprintf("%s/%s/%s", syncer.prefix, o.targetOrg, dstRepo)
679+
destUrl, err := url.Parse(destUrlRaw)
680+
if err != nil {
681+
repoLogger.WithField("remote-url", destUrlRaw).WithError(err).Error("Failed to construct URL for the destination remote")
682+
for _, source := range branches {
683+
errs = append(errs, fmt.Errorf("%s: failed to construct URL for the destination remote", source.String()))
684+
}
685+
continue
686+
}
687+
if syncer.token != "" {
688+
destUrl.User = url.User(syncer.token)
689+
}
690+
691+
dstHeads, err := getRemoteBranchHeads(repoLogger, syncer.git, gitDir, destUrl.String())
692+
if err != nil {
693+
message := "destination repository does not exist or we cannot access it"
694+
if syncer.failOnNonexistentDst {
695+
repoLogger.Errorf("%s", message)
696+
for _, source := range branches {
697+
errs = append(errs, fmt.Errorf("%s: %s", source.String(), message))
698+
}
699+
} else {
700+
repoLogger.Warn(message)
701+
}
702+
continue
703+
}
704+
705+
// ls-remote source once per repo
706+
srcRemote := fmt.Sprintf("%s-%s", key.org, key.repo)
707+
srcHeads, err := getRemoteBranchHeads(repoLogger, withRetryOnNonzero(syncer.git, 5), gitDir, srcRemote)
708+
if err != nil {
709+
repoLogger.WithError(err).Error("Failed to determine branch HEADs in source")
710+
for _, source := range branches {
711+
errs = append(errs, fmt.Errorf("%s: failed to determine branch HEADs in source", source.String()))
712+
}
713+
continue
714+
}
715+
703716
for _, source := range branches {
704717
syncer.logger = config.LoggerForInfo(config.Info{
705718
Metadata: api.Metadata{
@@ -709,13 +722,9 @@ func main() {
709722
},
710723
})
711724

712-
destination := source
713-
destination.org = o.targetOrg
714-
if !flattenedOrgs.Has(source.org) {
715-
destination.repo = fmt.Sprintf("%s-%s", source.org, source.repo)
716-
}
725+
destination := location{org: o.targetOrg, repo: dstRepo, branch: source.branch}
717726

718-
if err := syncer.mirror(gitDir, source, destination); err != nil {
727+
if err := syncer.mirror(gitDir, source, destination, srcHeads, dstHeads, destUrl); err != nil {
719728
errs = append(errs, fmt.Errorf("%s->%s: %w", source.String(), destination.String(), err))
720729
}
721730
}

0 commit comments

Comments
 (0)