From 58cd86c3d4ca817b17a4f1b5007f95cb870debc5 Mon Sep 17 00:00:00 2001
From: Mrityunjay Raj <mr.raj.earth@gmail.com>
Date: Tue, 23 Jun 2026 06:29:16 +0530
Subject: [PATCH 1/4] compact: free space per pack, honor --threshold

Group objects by their pack and act per pack: drop fully-unused packs, and
rewrite mixed packs whose unused bytes reach --threshold (default 40%) by
copying the survivors forward with compact_pack. Two index scans keep the
memory use bounded. refs #8572 #8514
---
 src/borg/archiver/compact_cmd.py              | 116 ++++++++++++++----
 .../testsuite/archiver/compact_cmd_test.py    |  49 +++++++-
 2 files changed, 137 insertions(+), 28 deletions(-)

diff --git a/src/borg/archiver/compact_cmd.py b/src/borg/archiver/compact_cmd.py
index 5fd19dc1a4..647c4af9ce 100644
--- a/src/borg/archiver/compact_cmd.py
+++ b/src/borg/archiver/compact_cmd.py
@@ -19,7 +19,7 @@
 
 
 class ArchiveGarbageCollector:
-    def __init__(self, repository, manifest, *, stats, iec):
+    def __init__(self, repository, manifest, *, stats, iec, threshold):
         self.repository = repository
         assert isinstance(repository, Repository)
         self.manifest = manifest
@@ -28,6 +28,7 @@ def __init__(self, repository, manifest, *, stats, iec):
         self.total_size = None  # overall size of source file content data written to all archives
         self.archives_count = None  # number of archives
         self.stats = stats  # compute repo space usage before/after - lists all repo objects, can be slow.
+        self.threshold = threshold  # rewrite a mixed pack only when its wasted-bytes fraction reaches this percent
         self.iec = iec  # formats statistics using IEC units (1KiB = 1024B)
 
     @property
@@ -165,30 +166,7 @@ def report_and_delete(self):
                 logger.warning(f"Soft-deleted archive {name} {hex_id} not found.")
 
         repo_size_before = self.repository_size
-        logger.info("Determining unused objects...")
-        unused = set()
-        for id, entry in self.chunks.iteritems():
-            if not (entry.flags & ChunkIndex.F_USED):
-                unused.add(id)
-        logger.info(f"Deleting {len(unused)} unused objects...")
-        if unused:
-            # Before deleting any repository object, invalidate all centrally cached chunk indexes.
-            # Otherwise, if we get interrupted within the deletion loop, the still-existing index/*
-            # would claim that already-deleted objects are still present. A later "borg create" would then
-            # trust that stale index, not re-upload the affected chunks and silently create an archive with
-            # dangling object references (see issue #9748). By removing the cached indexes first, an
-            # interruption is conservative: clients must rebuild the index from actual repository contents
-            # and will re-upload any deleted data. save_chunk_index() writes a fresh, valid index afterwards.
-            delete_chunkindex_from_repo(self.repository)
-        pi = ProgressIndicatorPercent(
-            total=len(unused), msg="Deleting unused objects %3.1f%%", step=0.1, msgid="compact.report_and_delete"
-        )
-        for i, id in enumerate(unused):
-            pi.show(i)
-            # N=1: the chunk is alone in its pack, so dropping the pack frees just it; N>1 needs compaction.
-            self.repository.store_delete("packs/" + bin_to_hex(self.chunks[id].pack_id))
-            del self.chunks[id]
-        pi.finish()
+        self.compact_packs()
         repo_size_after = self.repository_size
 
         count = len(self.chunks)
@@ -209,13 +187,91 @@ def report_and_delete(self):
         else:
             logger.info(f"Repository has data stored in {count} objects.")
 
+    def compact_packs(self):
+        """Free space one pack at a time (the store can only delete whole packs).
+
+        analyze_archives() has flagged the used objects F_USED. Per pack:
+
+        - all objects unused -> delete the pack.
+        - all objects used   -> keep it.
+        - mixed              -> rewrite only if the unused bytes reach --threshold percent: copy the
+                                used objects into a new pack via compact_pack and drop the old one.
+                                Below the threshold keep the pack, so we don't rewrite a large pack
+                                to reclaim little.
+
+        Two passes bound the memory use: the first keeps only per-pack byte counts to pick the packs
+        to change, the second collects object ids for just those packs, not the whole index.
+        """
+        # Pass 1: one index scan, keep only per-pack byte tallies (two ints per pack, no id lists).
+        pack_total, pack_unused = {}, {}
+        for id, entry in self.chunks.iteritems():
+            pid = entry.pack_id
+            pack_total[pid] = pack_total.get(pid, 0) + entry.obj_size
+            if not (entry.flags & ChunkIndex.F_USED):
+                pack_unused[pid] = pack_unused.get(pid, 0) + entry.obj_size
+
+        # decide each pack's fate from the tallies
+        drop_packs, rewrite_packs = set(), set()
+        for pid, total in pack_total.items():
+            unused = pack_unused.get(pid, 0)
+            if unused == 0:
+                continue  # all used -> leave alone
+            if unused == total:
+                drop_packs.add(pid)  # all unused -> drop the whole file
+            elif unused / total * 100 >= self.threshold:
+                rewrite_packs.add(pid)  # mixed and wasteful enough -> copy survivors forward
+            # else: mixed but below threshold -> leave alone
+
+        if not drop_packs and not rewrite_packs:
+            logger.info("Deleting 0 unused objects...")
+            return  # nothing to reclaim; do not touch the cached chunk indexes
+
+        # crash-safety (#9748): invalidate cached chunk indexes before the first store change
+        delete_chunkindex_from_repo(self.repository)
+
+        # Pass 2: collect object ids only for the affected packs (a subset, not the whole index)
+        keep = {pid: set() for pid in rewrite_packs}  # survivors to copy forward, per pack
+        drop = {pid: set() for pid in rewrite_packs}  # unused objects in those same packs
+        forget = []  # ids living in fully-unused packs we delete outright
+        for id, entry in self.chunks.iteritems():
+            pid = entry.pack_id
+            if pid in rewrite_packs:
+                (keep if entry.flags & ChunkIndex.F_USED else drop)[pid].add(id)
+            elif pid in drop_packs:
+                forget.append(id)
+
+        # count what we remove: every object of a dropped pack, plus the unused objects cut from
+        # rewritten packs. unused objects in below-threshold packs stay, so they don't count.
+        deleted = len(forget) + sum(len(ids) for ids in drop.values())
+        logger.info(f"Deleting {deleted} unused objects...")
+        pi = ProgressIndicatorPercent(
+            total=len(drop_packs) + len(rewrite_packs),
+            msg="Compacting packs %3.1f%%",
+            step=0.1,
+            msgid="compact.compact_packs",
+        )
+        progress = 0
+        for pid in drop_packs:
+            pi.show(progress)
+            progress += 1
+            self.repository.store_delete("packs/" + bin_to_hex(pid))
+        for id in forget:
+            del self.chunks[id]  # their pack file is gone, so drop their index entries too
+        for pid in rewrite_packs:
+            pi.show(progress)
+            progress += 1
+            self.repository.compact_pack(pid, keep_ids=keep[pid], drop_ids=drop[pid])  # helper owns index update
+        pi.finish()
+
 
 class CompactMixIn:
     @with_repository(exclusive=True, compatibility=(Manifest.Operation.DELETE,))
     def do_compact(self, args, repository, manifest):
         """Collects garbage in the repository."""
         if not args.dry_run:  # support --dry-run to simplify scripting
-            ArchiveGarbageCollector(repository, manifest, stats=args.stats, iec=args.iec).garbage_collect()
+            ArchiveGarbageCollector(
+                repository, manifest, stats=args.stats, iec=args.iec, threshold=args.threshold
+            ).garbage_collect()
 
     def build_parser_compact(self, subparsers, common_parser, mid_common_parser):
         from ._common import process_epilog
@@ -270,3 +326,11 @@ def build_parser_compact(self, subparsers, common_parser, mid_common_parser):
         subparser.add_argument(
             "-s", "--stats", dest="stats", action="store_true", help="print statistics (might be much slower)"
         )
+        subparser.add_argument(
+            "--threshold",
+            metavar="PERCENT",
+            dest="threshold",
+            type=float,
+            default=40.0,
+            help="rewrite a pack when at least PERCENT of its bytes are unused (default: 40)",
+        )
diff --git a/src/borg/testsuite/archiver/compact_cmd_test.py b/src/borg/testsuite/archiver/compact_cmd_test.py
index ffcc802c05..60d4e4f05c 100644
--- a/src/borg/testsuite/archiver/compact_cmd_test.py
+++ b/src/borg/testsuite/archiver/compact_cmd_test.py
@@ -4,11 +4,14 @@
 import pytest
 
 from ...constants import *  # NOQA
-from ...helpers import get_cache_dir
+from ...helpers import get_cache_dir, bin_to_hex
+from ...hashindex import ChunkIndex
+from ...repository import Repository
 from ...cache import files_cache_name, discover_files_cache_names, list_chunkindex_hashes
 from ...manifest import Manifest
 from . import cmd, create_regular_file, create_src_archive, generate_archiver_tests, open_repository, RK_ENCRYPTION
 from . import changedir
+from ..repository_test import H, fchunk, pdchunk, build_one_pack, reopen
 
 pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary")  # NOQA
 
@@ -119,7 +122,7 @@ def test_compact_interrupted_does_not_poison_chunk_index(archivers, request, mon
     repository = open_repository(archiver)
     with repository:
         manifest = Manifest.load(repository, (Manifest.Operation.DELETE,))
-        gc = ArchiveGarbageCollector(repository, manifest, stats=stats, iec=False)
+        gc = ArchiveGarbageCollector(repository, manifest, stats=stats, iec=False, threshold=40.0)
 
         def interrupt():
             raise KeyboardInterrupt("simulated interruption before save_chunk_index")
@@ -143,6 +146,48 @@ def interrupt():
             assert fd.read() == content
 
 
+def test_compact_packs_respects_threshold(tmp_path):
+    # Build two multi-object packs, then run pack-level compaction with a 40% threshold. The pack that
+    # wastes 2/3 of its bytes is rewritten down to its single survivor (and its old file deleted); the
+    # pack that wastes only 1/3 stays untouched, because copying its survivors to reclaim that little
+    # is not worth it. This exercises the rewrite, leave-alone and keep/drop split that N=1 can't reach.
+    from ...archiver.compact_cmd import ArchiveGarbageCollector
+
+    location = os.fspath(tmp_path / "repo")
+    wasteful = [(H(i), fchunk(f"DATA{i}".encode(), chunk_id=H(i))) for i in range(3)]  # H0..H2 in one pack
+    frugal = [(H(i), fchunk(f"DATA{i}".encode(), chunk_id=H(i))) for i in range(3, 6)]  # H3..H5 in another
+
+    repository = Repository(location, exclusive=True, create=True)
+    build_one_pack(repository, wasteful)
+    repository = reopen(repository)
+    build_one_pack(repository, frugal)
+
+    with reopen(repository) as repository:
+        wasteful_pack = repository.chunks[H(0)].pack_id
+        frugal_pack = repository.chunks[H(3)].pack_id
+        # mark usage: wasteful pack keeps only H0 (2/3 wasted), frugal pack keeps H3 and H4 (1/3 wasted)
+        used = {H(0), H(3), H(4)}
+        for i in range(6):
+            entry = repository.chunks[H(i)]
+            flags = ChunkIndex.F_USED if H(i) in used else ChunkIndex.F_NONE
+            repository.chunks[H(i)] = entry._replace(flags=flags)
+
+        gc = ArchiveGarbageCollector(repository, manifest=None, stats=False, iec=False, threshold=40.0)
+        gc.chunks = repository.chunks
+        gc.compact_packs()
+
+        # wasteful pack was rewritten: the survivor reads back, the dropped objects and the old file are gone
+        assert pdchunk(repository.get(H(0))) == b"DATA0"
+        assert repository.get(H(1), raise_missing=False) is None
+        assert repository.get(H(2), raise_missing=False) is None
+        # frugal pack stayed below threshold: untouched, every object (even the unused H5) still present
+        for i in range(3, 6):
+            assert pdchunk(repository.get(H(i))) == f"DATA{i}".encode()
+        pack_names = [info.name for info in repository.store_list("packs")]
+        assert bin_to_hex(wasteful_pack) not in pack_names
+        assert bin_to_hex(frugal_pack) in pack_names
+
+
 def test_compact_files_cache_cleanup(archivers, request):
     """Test that files cache files for deleted archives are removed during compact."""
     archiver = request.getfixturevalue(archivers)

From 78b8bbab505be940c235f8b049b762288bd531b4 Mon Sep 17 00:00:00 2001
From: Mrityunjay Raj <mr.raj.earth@gmail.com>
Date: Wed, 24 Jun 2026 10:42:26 +0530
Subject: [PATCH 2/4] compact: pack-level compaction with --threshold and
 --dry-run

Rewrite a pack only when its unused bytes reach --threshold percent; --dry-run reports the space compact would free without changing the repository (#9379).
---
 src/borg/archiver/compact_cmd.py              | 75 +++++++++++--------
 .../testsuite/archiver/compact_cmd_test.py    | 43 +++++++----
 2 files changed, 73 insertions(+), 45 deletions(-)

diff --git a/src/borg/archiver/compact_cmd.py b/src/borg/archiver/compact_cmd.py
index 647c4af9ce..92915bb27d 100644
--- a/src/borg/archiver/compact_cmd.py
+++ b/src/borg/archiver/compact_cmd.py
@@ -1,3 +1,4 @@
+from collections import defaultdict
 from pathlib import Path
 
 from ._common import with_repository
@@ -19,7 +20,7 @@
 
 
 class ArchiveGarbageCollector:
-    def __init__(self, repository, manifest, *, stats, iec, threshold):
+    def __init__(self, repository, manifest, *, stats, iec, threshold, dry_run=False):
         self.repository = repository
         assert isinstance(repository, Repository)
         self.manifest = manifest
@@ -30,6 +31,7 @@ def __init__(self, repository, manifest, *, stats, iec, threshold):
         self.stats = stats  # compute repo space usage before/after - lists all repo objects, can be slow.
         self.threshold = threshold  # rewrite a mixed pack only when its wasted-bytes fraction reaches this percent
         self.iec = iec  # formats statistics using IEC units (1KiB = 1024B)
+        self.dry_run = dry_run
 
     @property
     def repository_size(self):
@@ -44,8 +46,9 @@ def garbage_collect(self):
         logger.info("Computing object IDs used by archives...")
         (self.missing_chunks, self.total_files, self.total_size, self.archives_count) = self.analyze_archives()
         self.report_and_delete()
-        self.save_chunk_index()
-        self.cleanup_files_cache()
+        if not self.dry_run:
+            self.save_chunk_index()
+            self.cleanup_files_cache()
         logger.info("Finished compaction / garbage collection...")
 
     def get_repository_chunks(self) -> ChunkIndex:
@@ -57,7 +60,7 @@ def get_repository_chunks(self) -> ChunkIndex:
             chunks = build_chunkindex_from_repo(self.repository, disable_caches=True, init_flags=ChunkIndex.F_NONE)
         else:  # faster: rely on existing chunks index (with flags F_NONE and size 0).
             logger.info("Getting object IDs from cached chunks index...")
-            chunks = build_chunkindex_from_repo(self.repository, cache_immediately=True)
+            chunks = build_chunkindex_from_repo(self.repository, cache_immediately=not self.dry_run)
         return chunks
 
     def save_chunk_index(self):
@@ -155,15 +158,15 @@ def report_and_delete(self):
             for id in sorted(self.missing_chunks):
                 logger.debug(f"Missing object {bin_to_hex(id)}")
             set_ec(EXIT_ERROR)
-
-        logger.info("Cleaning archives directory from soft-deleted archives...")
-        archive_infos = self.manifest.archives.list(sort_by=["ts"], deleted=True)
-        for archive_info in archive_infos:
-            name, id, hex_id = archive_info.name, archive_info.id, bin_to_hex(archive_info.id)
-            try:
-                self.manifest.archives.nuke_by_id(id)
-            except self.repository.ObjectNotFound:
-                logger.warning(f"Soft-deleted archive {name} {hex_id} not found.")
+        if not self.dry_run:  # nuking soft-deleted archives mutates the manifest; skip on a dry run
+            logger.info("Cleaning archives directory from soft-deleted archives...")
+            archive_infos = self.manifest.archives.list(sort_by=["ts"], deleted=True)
+            for archive_info in archive_infos:
+                name, id, hex_id = archive_info.name, archive_info.id, bin_to_hex(archive_info.id)
+                try:
+                    self.manifest.archives.nuke_by_id(id)
+                except self.repository.ObjectNotFound:
+                    logger.warning(f"Soft-deleted archive {name} {hex_id} not found.")
 
         repo_size_before = self.repository_size
         self.compact_packs()
@@ -180,10 +183,11 @@ def report_and_delete(self):
                 f"Repository size is {format_file_size(repo_size_after, precision=0, iec=self.iec)} "
                 f"in {count} objects."
             )
-            logger.info(
-                f"Compaction saved "
-                f"{format_file_size(repo_size_before - repo_size_after, precision=0, iec=self.iec)}."
-            )
+            if not self.dry_run:  # nothing was deleted on a dry run, so before == after
+                logger.info(
+                    f"Compaction saved "
+                    f"{format_file_size(repo_size_before - repo_size_after, precision=0, iec=self.iec)}."
+                )
         else:
             logger.info(f"Repository has data stored in {count} objects.")
 
@@ -203,25 +207,31 @@ def compact_packs(self):
         to change, the second collects object ids for just those packs, not the whole index.
         """
         # Pass 1: one index scan, keep only per-pack byte tallies (two ints per pack, no id lists).
-        pack_total, pack_unused = {}, {}
+        pack_total, pack_unused = defaultdict(int), defaultdict(int)
         for id, entry in self.chunks.iteritems():
             pid = entry.pack_id
-            pack_total[pid] = pack_total.get(pid, 0) + entry.obj_size
+            pack_total[pid] += entry.obj_size
             if not (entry.flags & ChunkIndex.F_USED):
-                pack_unused[pid] = pack_unused.get(pid, 0) + entry.obj_size
+                pack_unused[pid] += entry.obj_size
 
         # decide each pack's fate from the tallies
         drop_packs, rewrite_packs = set(), set()
         for pid, total in pack_total.items():
-            unused = pack_unused.get(pid, 0)
+            unused = pack_unused.get(pid, 0)  # .get, not [pid]: don't insert all-used packs into the dict
             if unused == 0:
                 continue  # all used -> leave alone
             if unused == total:
                 drop_packs.add(pid)  # all unused -> drop the whole file
-            elif unused / total * 100 >= self.threshold:
+            elif 100 * unused / total >= self.threshold:
                 rewrite_packs.add(pid)  # mixed and wasteful enough -> copy survivors forward
             # else: mixed but below threshold -> leave alone
-
+        if self.dry_run:
+            freed = sum(pack_unused[pid] for pid in drop_packs) + sum(pack_unused[pid] for pid in rewrite_packs)
+            logger.info(
+                f"Would free {format_file_size(freed, iec=self.iec)} "
+                f"by dropping {len(drop_packs)} packs and rewriting {len(rewrite_packs)} packs."
+            )
+            return  # dry run: report only, change nothing
         if not drop_packs and not rewrite_packs:
             logger.info("Deleting 0 unused objects...")
             return  # nothing to reclaim; do not touch the cached chunk indexes
@@ -268,10 +278,9 @@ class CompactMixIn:
     @with_repository(exclusive=True, compatibility=(Manifest.Operation.DELETE,))
     def do_compact(self, args, repository, manifest):
         """Collects garbage in the repository."""
-        if not args.dry_run:  # support --dry-run to simplify scripting
-            ArchiveGarbageCollector(
-                repository, manifest, stats=args.stats, iec=args.iec, threshold=args.threshold
-            ).garbage_collect()
+        ArchiveGarbageCollector(
+            repository, manifest, stats=args.stats, iec=args.iec, threshold=args.threshold, dry_run=args.dry_run
+        ).garbage_collect()
 
     def build_parser_compact(self, subparsers, common_parser, mid_common_parser):
         from ._common import process_epilog
@@ -321,7 +330,11 @@ def build_parser_compact(self, subparsers, common_parser, mid_common_parser):
         subparser = ArgumentParser(parents=[common_parser], description=self.do_compact.__doc__, epilog=compact_epilog)
         subparsers.add_subcommand("compact", subparser, help="compact the repository")
         subparser.add_argument(
-            "-n", "--dry-run", dest="dry_run", action="store_true", help="do not change the repository"
+            "-n",
+            "--dry-run",
+            dest="dry_run",
+            action="store_true",
+            help="do not change the repository, just show what compact would free",
         )
         subparser.add_argument(
             "-s", "--stats", dest="stats", action="store_true", help="print statistics (might be much slower)"
@@ -330,7 +343,7 @@ def build_parser_compact(self, subparsers, common_parser, mid_common_parser):
             "--threshold",
             metavar="PERCENT",
             dest="threshold",
-            type=float,
-            default=40.0,
-            help="rewrite a pack when at least PERCENT of its bytes are unused (default: 40)",
+            type=int,
+            default=10,
+            help="rewrite a pack when at least PERCENT of its bytes are unused (default: 10)",
         )
diff --git a/src/borg/testsuite/archiver/compact_cmd_test.py b/src/borg/testsuite/archiver/compact_cmd_test.py
index 60d4e4f05c..c0d3f94829 100644
--- a/src/borg/testsuite/archiver/compact_cmd_test.py
+++ b/src/borg/testsuite/archiver/compact_cmd_test.py
@@ -11,7 +11,7 @@
 from ...manifest import Manifest
 from . import cmd, create_regular_file, create_src_archive, generate_archiver_tests, open_repository, RK_ENCRYPTION
 from . import changedir
-from ..repository_test import H, fchunk, pdchunk, build_one_pack, reopen
+from ..repository_test import H, fchunk, pdchunk
 
 pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary")  # NOQA
 
@@ -147,32 +147,32 @@ def interrupt():
 
 
 def test_compact_packs_respects_threshold(tmp_path):
-    # Build two multi-object packs, then run pack-level compaction with a 40% threshold. The pack that
+    # Two multi-object packs in one repo, then pack-level compaction at a 40% threshold. The pack that
     # wastes 2/3 of its bytes is rewritten down to its single survivor (and its old file deleted); the
-    # pack that wastes only 1/3 stays untouched, because copying its survivors to reclaim that little
-    # is not worth it. This exercises the rewrite, leave-alone and keep/drop split that N=1 can't reach.
+    # pack that wastes only 1/3 stays untouched, since copying its survivors to reclaim that little is
+    # not worth it. This covers the rewrite, leave-alone and keep/drop split that N=1 can't reach.
     from ...archiver.compact_cmd import ArchiveGarbageCollector
 
     location = os.fspath(tmp_path / "repo")
-    wasteful = [(H(i), fchunk(f"DATA{i}".encode(), chunk_id=H(i))) for i in range(3)]  # H0..H2 in one pack
-    frugal = [(H(i), fchunk(f"DATA{i}".encode(), chunk_id=H(i))) for i in range(3, 6)]  # H3..H5 in another
+    with Repository(location, exclusive=True, create=True) as repository:
+        repository._pack_writer.max_count = 4  # buffer several objects, so each flush() writes one pack
+        for i in range(3):  # H0..H2 -> wasteful pack
+            repository.put(H(i), fchunk(f"DATA{i}".encode(), chunk_id=H(i)))
+        repository.flush()
+        for i in range(3, 6):  # H3..H5 -> frugal pack
+            repository.put(H(i), fchunk(f"DATA{i}".encode(), chunk_id=H(i)))
+        repository.flush()
 
-    repository = Repository(location, exclusive=True, create=True)
-    build_one_pack(repository, wasteful)
-    repository = reopen(repository)
-    build_one_pack(repository, frugal)
-
-    with reopen(repository) as repository:
         wasteful_pack = repository.chunks[H(0)].pack_id
         frugal_pack = repository.chunks[H(3)].pack_id
-        # mark usage: wasteful pack keeps only H0 (2/3 wasted), frugal pack keeps H3 and H4 (1/3 wasted)
+        # wasteful pack keeps only H0 (2/3 wasted), frugal pack keeps H3 and H4 (1/3 wasted)
         used = {H(0), H(3), H(4)}
         for i in range(6):
             entry = repository.chunks[H(i)]
             flags = ChunkIndex.F_USED if H(i) in used else ChunkIndex.F_NONE
             repository.chunks[H(i)] = entry._replace(flags=flags)
 
-        gc = ArchiveGarbageCollector(repository, manifest=None, stats=False, iec=False, threshold=40.0)
+        gc = ArchiveGarbageCollector(repository, manifest=None, stats=False, iec=False, threshold=40)
         gc.chunks = repository.chunks
         gc.compact_packs()
 
@@ -188,6 +188,21 @@ def test_compact_packs_respects_threshold(tmp_path):
         assert bin_to_hex(frugal_pack) in pack_names
 
 
+def test_compact_dry_run_reports_and_changes_nothing(archivers, request):
+    # --dry-run prints the would-free estimate (issue #9379) and must not touch the repository.
+    archiver = request.getfixturevalue(archivers)
+    cmd(archiver, "repo-create", RK_ENCRYPTION)
+    create_src_archive(archiver, "archive")
+    cmd(archiver, "delete", "-a", "archive")
+
+    out = cmd(archiver, "compact", "--dry-run", "-v", exit_code=0)
+    assert "Would free" in out  # the estimate is reported
+
+    # proof nothing was removed: a real compact afterwards still finds the unused objects to delete
+    out2 = cmd(archiver, "compact", "-v", exit_code=0)
+    assert "Deleting 0 unused objects" not in out2
+
+
 def test_compact_files_cache_cleanup(archivers, request):
     """Test that files cache files for deleted archives are removed during compact."""
     archiver = request.getfixturevalue(archivers)

From 0586cc7b7b83752f9975e7eb9c504df4491fdc83 Mon Sep 17 00:00:00 2001
From: Mrityunjay Raj <mr.raj.earth@gmail.com>
Date: Wed, 24 Jun 2026 11:08:54 +0530
Subject: [PATCH 3/4] compact: report progress after each unit so it reaches
 100%

---
 src/borg/archiver/compact_cmd.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/borg/archiver/compact_cmd.py b/src/borg/archiver/compact_cmd.py
index 92915bb27d..553933eba8 100644
--- a/src/borg/archiver/compact_cmd.py
+++ b/src/borg/archiver/compact_cmd.py
@@ -131,7 +131,6 @@ def use_it(id):
         )
         total_size, total_files = 0, 0
         for i, info in enumerate(archive_infos):
-            pi.show(i)
             logger.info(
                 f"Analyzing archive {info.name} {info.ts.astimezone()} {bin_to_hex(info.id)} ({i + 1}/{num_archives})"
             )
@@ -149,6 +148,7 @@ def use_it(id):
                     for id, size in item.chunks:
                         total_size += size  # original, uncompressed file content size
                         use_it(id)
+            pi.show(i + 1)  # report after each archive, so the last one lands on 100%
         pi.finish()
         return missing_chunks, total_files, total_size, num_archives
 
@@ -158,7 +158,7 @@ def report_and_delete(self):
             for id in sorted(self.missing_chunks):
                 logger.debug(f"Missing object {bin_to_hex(id)}")
             set_ec(EXIT_ERROR)
-        if not self.dry_run:  # nuking soft-deleted archives mutates the manifest; skip on a dry run
+        if not self.dry_run:  # nuking removes the soft-deleted archives from the archives directory; skip on a dry run
             logger.info("Cleaning archives directory from soft-deleted archives...")
             archive_infos = self.manifest.archives.list(sort_by=["ts"], deleted=True)
             for archive_info in archive_infos:
@@ -262,15 +262,15 @@ def compact_packs(self):
         )
         progress = 0
         for pid in drop_packs:
-            pi.show(progress)
-            progress += 1
             self.repository.store_delete("packs/" + bin_to_hex(pid))
+            progress += 1
+            pi.show(progress)  # report after the work, so the final pack lands on 100%
         for id in forget:
             del self.chunks[id]  # their pack file is gone, so drop their index entries too
         for pid in rewrite_packs:
-            pi.show(progress)
-            progress += 1
             self.repository.compact_pack(pid, keep_ids=keep[pid], drop_ids=drop[pid])  # helper owns index update
+            progress += 1
+            pi.show(progress)
         pi.finish()
 
 

From 4a28995f04ae7fecf3fd5adfc77a3481f235ffed Mon Sep 17 00:00:00 2001
From: Mrityunjay Raj <mr.raj.earth@gmail.com>
Date: Wed, 24 Jun 2026 20:15:46 +0530
Subject: [PATCH 4/4] compact: --stats reports sizes from the cached index,
 drop the redundant full scan and fix the epilog

---
 src/borg/archiver/compact_cmd.py | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/src/borg/archiver/compact_cmd.py b/src/borg/archiver/compact_cmd.py
index 553933eba8..8689c3894e 100644
--- a/src/borg/archiver/compact_cmd.py
+++ b/src/borg/archiver/compact_cmd.py
@@ -53,14 +53,10 @@ def garbage_collect(self):
 
     def get_repository_chunks(self) -> ChunkIndex:
         """return a chunks index"""
-        if self.stats:
-            # slow but thorough: scan the pack headers for real sizes/locations and to catch objects
-            # missing from the cached index. Start unused (F_NONE); analyze_archives marks used ones.
-            logger.info("Getting object IDs present in the repository...")
-            chunks = build_chunkindex_from_repo(self.repository, disable_caches=True, init_flags=ChunkIndex.F_NONE)
-        else:  # faster: rely on existing chunks index (with flags F_NONE and size 0).
-            logger.info("Getting object IDs from cached chunks index...")
-            chunks = build_chunkindex_from_repo(self.repository, cache_immediately=not self.dry_run)
+        # The cached index already has each object's obj_size and starts entries as F_NONE, so it
+        # serves both GC and --stats; no need to force the slow pack-header scan just to get sizes.
+        logger.info("Getting object IDs from the cached chunks index...")
+        chunks = build_chunkindex_from_repo(self.repository, cache_immediately=not self.dry_run)
         return chunks
 
     def save_chunk_index(self):
@@ -316,15 +312,8 @@ def build_parser_compact(self, subparsers, common_parser, mid_common_parser):
             seeing fatal errors when creating backups or when archives are missing in
             ``borg repo-list``).
 
-            When using the ``--stats`` option, borg will internally list all repository
-            objects to determine their existence and stored size. It will build a fresh
-            chunks index from that information and cache it in the repository. For some
-            types of repositories, this might be very slow. It will tell you the sum of
-            stored object sizes, before and after compaction.
-
-            Without ``--stats``, borg will rely on the cached chunks index to determine
-            existing object IDs (but there is no stored size information in the index,
-            thus it cannot compute before/after compaction size statistics).
+            With ``--stats``, borg additionally reports the sum of stored object sizes
+            before and after compaction.
             """
         )
         subparser = ArgumentParser(parents=[common_parser], description=self.do_compact.__doc__, epilog=compact_epilog)
@@ -337,7 +326,7 @@ def build_parser_compact(self, subparsers, common_parser, mid_common_parser):
             help="do not change the repository, just show what compact would free",
         )
         subparser.add_argument(
-            "-s", "--stats", dest="stats", action="store_true", help="print statistics (might be much slower)"
+            "-s", "--stats", dest="stats", action="store_true", help="print repository size statistics"
         )
         subparser.add_argument(
             "--threshold",