From 9721e47232ace4a2dc349da14e0c54db46a44ea6 Mon Sep 17 00:00:00 2001 From: sfaynet Date: Fri, 17 Apr 2026 17:30:46 +0300 Subject: [PATCH 1/3] fix_list_duration_s3_api_walk --- pkg/backup/download.go | 53 ++++++++++++++++++++++++++++++++++++++++++ pkg/storage/general.go | 31 ++++++++++++++++++++++++ 2 files changed, 84 insertions(+) diff --git a/pkg/backup/download.go b/pkg/backup/download.go index e481ad01c..f69c8da3f 100644 --- a/pkg/backup/download.go +++ b/pkg/backup/download.go @@ -110,6 +110,11 @@ func (b *Backuper) Download(backupName string, tablePattern string, partitions [ } }() + // Prefetch metadata for incremental backup chain to populate cache + if err := b.prefetchBackupMetadataChain(ctx, backupName); err != nil { + log.Warn().Err(err).Msg("prefetchBackupMetadataChain failed, continuing with on-demand fetching") + } + remoteBackups, err := b.dst.BackupList(ctx, true, backupName) if err != nil { return errors.WithMessage(err, "BackupList") @@ -320,6 +325,10 @@ func (b *Backuper) Download(backupName string, tablePattern string, partitions [ "object_disk_size": utils.FormatBytes(backupMetadata.ObjectDiskSize), "version": backupVersion, }).Msg("done") + + // Clear backup list cache after download completes + storage.ClearBackupListCache() + return nil } @@ -1462,3 +1471,47 @@ func (b *Backuper) getDownloadDiskForNonExistsDisk(notExistsDiskType string, fil } return false, filteredDisks[leastUsedIdx].Name, filteredDisks[leastUsedIdx].FreeSpace - partSize, nil } + +// prefetchBackupMetadataChain - prefetch metadata for the entire incremental backup chain +// to populate the in-memory cache and avoid repeated S3 API calls +func (b *Backuper) prefetchBackupMetadataChain(ctx context.Context, backupName string) error { + start := time.Now() + visited := make(map[string]bool) + backupChain := []string{} + + // Discover the backup chain by walking RequiredBackup links + currentBackup := backupName + for currentBackup != "" && !visited[currentBackup] { + backupChain = append(backupChain, currentBackup) + visited[currentBackup] = true + + // Get metadata to find RequiredBackup + backupList, err := b.dst.BackupList(ctx, true, currentBackup) + if err != nil { + return errors.Wrapf(err, "BackupList for %s", currentBackup) + } + + var found bool + for _, backup := range backupList { + if backup.BackupName == currentBackup { + currentBackup = backup.RequiredBackup + found = true + break + } + } + + if !found { + break + } + } + + if len(backupChain) > 1 { + log.Info().Msgf("prefetchBackupMetadataChain: discovered chain of %d backups: %v (took %s)", + len(backupChain), backupChain, utils.HumanizeDuration(time.Since(start))) + } else { + log.Debug().Msgf("prefetchBackupMetadataChain: single backup, no chain (took %s)", + utils.HumanizeDuration(time.Since(start))) + } + + return nil +} diff --git a/pkg/storage/general.go b/pkg/storage/general.go index 5ee16dc15..8e35a50bf 100644 --- a/pkg/storage/general.go +++ b/pkg/storage/general.go @@ -53,6 +53,8 @@ type BackupDestination struct { } var metadataCacheLock sync.RWMutex +var backupListCache = make(map[string][]Backup) +var backupListCacheLock sync.RWMutex func (bd *BackupDestination) RemoveBackupRemote(ctx context.Context, backup Backup, cfg *config.Config, retrierClassifier retrier.Classifier) error { retry := retrier.New(retrier.ExponentialBackoff(cfg.General.RetriesOnFailure, common.AddRandomJitter(cfg.General.RetriesDuration, cfg.General.RetriesJitter)), retrierClassifier) @@ -218,6 +220,19 @@ func (bd *BackupDestination) saveMetadataCache(ctx context.Context, listCache ma func (bd *BackupDestination) BackupList(ctx context.Context, parseMetadata bool, parseMetadataOnly string) ([]Backup, error) { backupListStart := time.Now() + + // Check in-memory cache first for single backup requests + if parseMetadataOnly != "" { + backupListCacheLock.RLock() + cacheKey := bd.Kind() + ":" + parseMetadataOnly + if cachedList, ok := backupListCache[cacheKey]; ok { + backupListCacheLock.RUnlock() + log.Debug().Str("backup", parseMetadataOnly).Msg("BackupList: using in-memory cache") + return cachedList, nil + } + backupListCacheLock.RUnlock() + } + defer func() { log.Info().Dur("list_duration", time.Since(backupListStart)).Send() }() @@ -322,9 +337,25 @@ func (bd *BackupDestination) BackupList(ctx context.Context, parseMetadata bool, return nil, errors.Wrap(err, "bd.saveMetadataCache return error") } } + + // Save to in-memory cache for single backup requests + if parseMetadataOnly != "" && len(result) > 0 { + backupListCacheLock.Lock() + cacheKey := bd.Kind() + ":" + parseMetadataOnly + backupListCache[cacheKey] = result + backupListCacheLock.Unlock() + } + return result, nil } +// ClearBackupListCache clears the in-memory cache of backup lists +func ClearBackupListCache() { + backupListCacheLock.Lock() + defer backupListCacheLock.Unlock() + backupListCache = make(map[string][]Backup) +} + func (bd *BackupDestination) DownloadCompressedStream(ctx context.Context, remotePath string, localPath string, maxSpeed uint64) (int64, error) { if err := os.MkdirAll(localPath, 0750); err != nil { return 0, errors.WithMessage(err, "DownloadCompressedStream MkdirAll") From 64e7575c5156c8396bebcd265a4fc1eb6f27f213 Mon Sep 17 00:00:00 2001 From: slach Date: Fri, 15 May 2026 07:20:00 +0500 Subject: [PATCH 2/3] fix(restore): avoid per-table Walk("/") in BackupList for incremental chains Replace the in-memory backupListCache added in PR #1361 with a direct metadata.json fetch + on-disk cache reuse on the fast path. BackupList(parseMetadata=true, parseMetadataOnly=name) used to Walk the whole bucket root on every call. For incremental-chain restores this was invoked per table (downloadDiffParts -> ReadBackupMetadataRemote), giving N_tables * chain_length list calls (~17500 on a real workload). Now: - Fast path looks up name in the existing /tmp/.clickhouse-backup-metadata.cache.{kind} file; on hit, 0 S3 calls. - On miss: one HEAD + one GET on name/metadata.json (no Walk), then merge into the on-disk cache and atomically rewrite it. - metadataCacheLock removed; saveMetadataCache now writes via tempfile+rename so concurrent callers can't observe a torn file. - prefetchBackupMetadataChain and ClearBackupListCache deleted. Slow path (parseMetadataOnly="") unchanged, list_duration log preserved for TestLongListRemote. Verified: TestLongListRemote, TestS3, TestS3NoDeletePermission. --- pkg/backup/download.go | 51 ------------ pkg/storage/general.go | 176 ++++++++++++++++++++++++++++------------- 2 files changed, 119 insertions(+), 108 deletions(-) diff --git a/pkg/backup/download.go b/pkg/backup/download.go index f0374a243..b36573bc9 100644 --- a/pkg/backup/download.go +++ b/pkg/backup/download.go @@ -110,11 +110,6 @@ func (b *Backuper) Download(backupName string, tablePattern string, partitions [ } }() - // Prefetch metadata for incremental backup chain to populate cache - if err := b.prefetchBackupMetadataChain(ctx, backupName); err != nil { - log.Warn().Err(err).Msg("prefetchBackupMetadataChain failed, continuing with on-demand fetching") - } - remoteBackups, err := b.dst.BackupList(ctx, true, backupName) if err != nil { return errors.WithMessage(err, "BackupList") @@ -326,9 +321,6 @@ func (b *Backuper) Download(backupName string, tablePattern string, partitions [ "version": backupVersion, }).Msg("done") - // Clear backup list cache after download completes - storage.ClearBackupListCache() - return nil } @@ -1485,46 +1477,3 @@ func (b *Backuper) getDownloadDiskForNonExistsDisk(notExistsDiskType string, fil return false, filteredDisks[leastUsedIdx].Name, filteredDisks[leastUsedIdx].FreeSpace - partSize, nil } -// prefetchBackupMetadataChain - prefetch metadata for the entire incremental backup chain -// to populate the in-memory cache and avoid repeated S3 API calls -func (b *Backuper) prefetchBackupMetadataChain(ctx context.Context, backupName string) error { - start := time.Now() - visited := make(map[string]bool) - backupChain := []string{} - - // Discover the backup chain by walking RequiredBackup links - currentBackup := backupName - for currentBackup != "" && !visited[currentBackup] { - backupChain = append(backupChain, currentBackup) - visited[currentBackup] = true - - // Get metadata to find RequiredBackup - backupList, err := b.dst.BackupList(ctx, true, currentBackup) - if err != nil { - return errors.Wrapf(err, "BackupList for %s", currentBackup) - } - - var found bool - for _, backup := range backupList { - if backup.BackupName == currentBackup { - currentBackup = backup.RequiredBackup - found = true - break - } - } - - if !found { - break - } - } - - if len(backupChain) > 1 { - log.Info().Msgf("prefetchBackupMetadataChain: discovered chain of %d backups: %v (took %s)", - len(backupChain), backupChain, utils.HumanizeDuration(time.Since(start))) - } else { - log.Debug().Msgf("prefetchBackupMetadataChain: single backup, no chain (took %s)", - utils.HumanizeDuration(time.Since(start))) - } - - return nil -} diff --git a/pkg/storage/general.go b/pkg/storage/general.go index 8e35a50bf..ff48c0ed7 100644 --- a/pkg/storage/general.go +++ b/pkg/storage/general.go @@ -11,7 +11,6 @@ import ( "path/filepath" "sort" "strings" - "sync" "time" "github.com/Altinity/clickhouse-backup/v2/pkg/common" @@ -52,9 +51,6 @@ type BackupDestination struct { compressionLevel int } -var metadataCacheLock sync.RWMutex -var backupListCache = make(map[string][]Backup) -var backupListCacheLock sync.RWMutex func (bd *BackupDestination) RemoveBackupRemote(ctx context.Context, backup Backup, cfg *config.Config, retrierClassifier retrier.Classifier) error { retry := retrier.New(retrier.ExponentialBackoff(cfg.General.RetriesOnFailure, common.AddRandomJitter(cfg.General.RetriesDuration, cfg.General.RetriesJitter)), retrierClassifier) @@ -170,18 +166,48 @@ func (bd *BackupDestination) loadMetadataCache(ctx context.Context) (map[string] } } -func (bd *BackupDestination) saveMetadataCache(ctx context.Context, listCache map[string]Backup, actualList []Backup) error { +// writeMetadataCacheFile atomically writes the listCache map to the on-disk +// metadata cache. Safe to call concurrently — writers can't observe a partial +// file thanks to tempfile + rename. No pruning happens here. +func (bd *BackupDestination) writeMetadataCacheFile(ctx context.Context, listCache map[string]Backup) error { listCacheFile := path.Join(os.TempDir(), fmt.Sprintf(".clickhouse-backup-metadata.cache.%s", bd.Kind())) - f, err := os.OpenFile(listCacheFile, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644) + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + body, err := json.MarshalIndent(&listCache, "", "\t") if err != nil { - log.Warn().Msgf("can't open %s return error %v", listCacheFile, err) + log.Warn().Msgf("can't json marshal %s return error %v", listCacheFile, err) return nil } - defer func() { - if err := f.Close(); err != nil { - log.Warn().Msgf("can't close %s return error %v", listCacheFile, err) - } - }() + tmp, err := os.CreateTemp(os.TempDir(), filepath.Base(listCacheFile)+".tmp.*") + if err != nil { + log.Warn().Msgf("can't create temp for %s return error %v", listCacheFile, err) + return nil + } + tmpName := tmp.Name() + if _, err := tmp.Write(body); err != nil { + log.Warn().Msgf("can't write to %s return error %v", tmpName, err) + _ = tmp.Close() + _ = os.Remove(tmpName) + return nil + } + if err := tmp.Close(); err != nil { + log.Warn().Msgf("can't close %s return error %v", tmpName, err) + _ = os.Remove(tmpName) + return nil + } + if err := os.Rename(tmpName, listCacheFile); err != nil { + log.Warn().Msgf("can't rename %s -> %s return error %v", tmpName, listCacheFile, err) + _ = os.Remove(tmpName) + return nil + } + log.Debug().Msgf("%s save %d elements", listCacheFile, len(listCache)) + return nil +} + +func (bd *BackupDestination) saveMetadataCache(ctx context.Context, listCache map[string]Backup, actualList []Backup) error { for backupName := range listCache { select { case <-ctx.Done(): @@ -199,46 +225,98 @@ func (bd *BackupDestination) saveMetadataCache(ctx context.Context, listCache ma } } } - select { - case <-ctx.Done(): - return ctx.Err() - default: - body, err := json.MarshalIndent(&listCache, "", "\t") - if err != nil { - log.Warn().Msgf("can't json marshal %s return error %v", listCacheFile, err) - return nil - } - _, err = f.Write(body) - if err != nil { - log.Warn().Msgf("can't write to %s return error %v", listCacheFile, err) - return nil + return bd.writeMetadataCacheFile(ctx, listCache) +} + +// readBackupMetadataDirect fetches a single backup's metadata.json directly via +// StatFile+GetFileReader, without listing the whole bucket. Returns nil if the +// metadata.json does not exist. Returns a "broken" Backup entry on parse errors, +// mirroring the slow-path semantics of BackupList. +func (bd *BackupDestination) readBackupMetadataDirect(ctx context.Context, backupName string) (*Backup, error) { + metadataKey := path.Join(backupName, "metadata.json") + mf, err := bd.StatFile(ctx, metadataKey) + if err != nil { + if errors.Is(err, ErrNotFound) { + return nil, nil } - log.Debug().Msgf("%s save %d elements", listCacheFile, len(listCache)) - return nil + return &Backup{ + BackupMetadata: metadata.BackupMetadata{BackupName: backupName}, + Broken: "broken (can't stat metadata.json)", + }, nil + } + r, err := bd.GetFileReader(ctx, metadataKey) + if err != nil { + return &Backup{ + BackupMetadata: metadata.BackupMetadata{BackupName: backupName}, + Broken: "broken (can't open metadata.json)", + UploadDate: mf.LastModified(), + }, nil + } + body, err := io.ReadAll(r) + closeErr := r.Close() + if err != nil { + return &Backup{ + BackupMetadata: metadata.BackupMetadata{BackupName: backupName}, + Broken: "broken (can't read metadata.json)", + UploadDate: mf.LastModified(), + }, nil + } + if closeErr != nil { + return nil, errors.WithMessage(closeErr, "BackupList close metadata reader") + } + var m metadata.BackupMetadata + if err := json.Unmarshal(body, &m); err != nil { + return &Backup{ + BackupMetadata: metadata.BackupMetadata{BackupName: backupName}, + Broken: "broken (bad metadata.json)", + UploadDate: mf.LastModified(), + }, nil } + return &Backup{BackupMetadata: m, UploadDate: mf.LastModified()}, nil } func (bd *BackupDestination) BackupList(ctx context.Context, parseMetadata bool, parseMetadataOnly string) ([]Backup, error) { backupListStart := time.Now() + defer func() { + log.Info().Dur("list_duration", time.Since(backupListStart)).Send() + }() - // Check in-memory cache first for single backup requests - if parseMetadataOnly != "" { - backupListCacheLock.RLock() - cacheKey := bd.Kind() + ":" + parseMetadataOnly - if cachedList, ok := backupListCache[cacheKey]; ok { - backupListCacheLock.RUnlock() - log.Debug().Str("backup", parseMetadataOnly).Msg("BackupList: using in-memory cache") - return cachedList, nil + // Fast path: when the caller already knows which backup it wants, look it + // up in the on-disk metadata cache first; on miss, fetch metadata.json + // directly via StatFile+GetFileReader instead of listing the whole bucket + // root. This removes the per-table Walk("/") cost on incremental-chain + // restores (see https://github.com/Altinity/clickhouse-backup/pull/1361). + // Staleness on remote delete is healed by the next slow-path list (e.g. + // `clickhouse-backup list remote`), same as before. + if parseMetadata && parseMetadataOnly != "" { + listCache, loadErr := bd.loadMetadataCache(ctx) + if loadErr != nil && !os.IsNotExist(loadErr) { + return nil, errors.WithMessage(loadErr, "BackupList loadMetadataCache") + } + if cached, ok := listCache[parseMetadataOnly]; ok && cached.Broken == "" { + log.Debug().Str("backup", parseMetadataOnly).Msg("BackupList: using on-disk metadata cache") + return []Backup{cached}, nil + } + backup, err := bd.readBackupMetadataDirect(ctx, parseMetadataOnly) + if err != nil { + return nil, errors.WithMessage(err, "BackupList readBackupMetadataDirect") + } + if backup == nil { + return []Backup{}, nil + } + if backup.Broken == "" { + if listCache == nil { + listCache = map[string]Backup{} + } + listCache[parseMetadataOnly] = *backup + if writeErr := bd.writeMetadataCacheFile(ctx, listCache); writeErr != nil { + log.Warn().Err(writeErr).Msg("BackupList writeMetadataCacheFile (fast path)") + } } - backupListCacheLock.RUnlock() + return []Backup{*backup}, nil } - defer func() { - log.Info().Dur("list_duration", time.Since(backupListStart)).Send() - }() result := make([]Backup, 0) - metadataCacheLock.Lock() - defer metadataCacheLock.Unlock() listCache, err := bd.loadMetadataCache(ctx) if err != nil && !os.IsNotExist(err) { return nil, errors.WithMessage(err, "BackupList loadMetadataCache") @@ -337,25 +415,9 @@ func (bd *BackupDestination) BackupList(ctx context.Context, parseMetadata bool, return nil, errors.Wrap(err, "bd.saveMetadataCache return error") } } - - // Save to in-memory cache for single backup requests - if parseMetadataOnly != "" && len(result) > 0 { - backupListCacheLock.Lock() - cacheKey := bd.Kind() + ":" + parseMetadataOnly - backupListCache[cacheKey] = result - backupListCacheLock.Unlock() - } - return result, nil } -// ClearBackupListCache clears the in-memory cache of backup lists -func ClearBackupListCache() { - backupListCacheLock.Lock() - defer backupListCacheLock.Unlock() - backupListCache = make(map[string][]Backup) -} - func (bd *BackupDestination) DownloadCompressedStream(ctx context.Context, remotePath string, localPath string, maxSpeed uint64) (int64, error) { if err := os.MkdirAll(localPath, 0750); err != nil { return 0, errors.WithMessage(err, "DownloadCompressedStream MkdirAll") From 189d331418497659fa51e365c4419eb46ae9cc84 Mon Sep 17 00:00:00 2001 From: slach Date: Wed, 20 May 2026 15:44:26 +0500 Subject: [PATCH 3/3] fix(storage): detect broken backups with missing metadata.json in BackupList When metadata.json is absent, walk the backup prefix to determine whether the folder has content. If it does, return a broken backup entry with the last modified time. If the prefix is empty, return an empty list as before. --- pkg/storage/general.go | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/pkg/storage/general.go b/pkg/storage/general.go index ff48c0ed7..107b58ed6 100644 --- a/pkg/storage/general.go +++ b/pkg/storage/general.go @@ -302,7 +302,27 @@ func (bd *BackupDestination) BackupList(ctx context.Context, parseMetadata bool, return nil, errors.WithMessage(err, "BackupList readBackupMetadataDirect") } if backup == nil { - return []Backup{}, nil + // metadata.json not found — check if the backup prefix has any + // content at all. Walk with recursive=false returns top-level + // entries only; if we get at least one the folder exists and the + // backup is broken (missing metadata.json), otherwise the backup + // name doesn't exist on remote storage at all. + found := false + var lastModified time.Time + walkErr := bd.Walk(ctx, parseMetadataOnly+"/", false, func(_ context.Context, f RemoteFile) error { + found = true + lastModified = f.LastModified() + return io.EOF // stop after first entry + }) + _ = walkErr // Walk returns io.EOF, that's fine + if !found { + return []Backup{}, nil + } + return []Backup{{ + BackupMetadata: metadata.BackupMetadata{BackupName: parseMetadataOnly}, + Broken: "broken (can't stat metadata.json)", + UploadDate: lastModified, + }}, nil } if backup.Broken == "" { if listCache == nil {