Skip to content
Merged
17 changes: 14 additions & 3 deletions src/borg/chunkers/reader.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,11 @@ class FileFMAPReader:
if self.try_sparse:
try:
fmap = list(sparsemap(self.fd, self.fh))
except OSError as err:
# seeking did not work
except (OSError, ValueError) as err:
# Building a sparse map failed:
# - OSError: low-level lseek with SEEK_HOLE/SEEK_DATA not supported by FS/OS.
# - ValueError: high-level file objects (e.g. io.BytesIO or some fd wrappers)
# don't accept SEEK_HOLE/SEEK_DATA as a valid "whence" and raise ValueError.
pass

if fmap is None:
Expand Down Expand Up @@ -170,6 +173,9 @@ class FileFMAPReader:
# read block from the range
data = dread(offset, wanted, self.fd, self.fh)
got = len(data)
# Detect zero-filled blocks regardless of sparse mode.
# Zero detection is important to avoid reading/storing allocated zeros
# even when we are not using sparse file handling based on SEEK_HOLE/SEEK_DATA.
if zeros.startswith(data):
data = None
allocation = CH_ALLOC
Expand Down Expand Up @@ -321,7 +327,12 @@ class FileReader:

# Determine the allocation type of the resulting chunk
if has_data:
# If any chunk was CH_DATA, the result is CH_DATA
# If any chunk was CH_DATA, check if the result is all zeros.
# This can happen when a large CH_DATA block (read at read_size granularity)
# contains both real data and zero-filled regions, and we are slicing out
# a zero-filled portion at the block_size granularity.
if zeros.startswith(result):
return Chunk(None, size=bytes_read, allocation=CH_ALLOC)
return Chunk(bytes(result), size=bytes_read, allocation=CH_DATA)
elif has_hole:
# If any chunk was CH_HOLE (and none were CH_DATA), the result is CH_HOLE
Expand Down
32 changes: 28 additions & 4 deletions src/borg/testsuite/chunkers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,38 @@ def fs_supports_sparse():
BS = 4096 # filesystem block size

# Some sparse files. X = content blocks, _ = sparse blocks.
# Block size must always be BS.

# X__XXX____
map_sparse1 = [(0 * BS, 1 * BS, True), (1 * BS, 2 * BS, False), (3 * BS, 3 * BS, True), (6 * BS, 4 * BS, False)]
map_sparse1 = [
(0, BS, True),
(1 * BS, BS, False),
(2 * BS, BS, False),
(3 * BS, BS, True),
(4 * BS, BS, True),
(5 * BS, BS, True),
(6 * BS, BS, False),
(7 * BS, BS, False),
(8 * BS, BS, False),
(9 * BS, BS, False),
]

# _XX___XXXX
map_sparse2 = [(0 * BS, 1 * BS, False), (1 * BS, 2 * BS, True), (3 * BS, 3 * BS, False), (6 * BS, 4 * BS, True)]
map_sparse2 = [
(0, BS, False),
(1 * BS, BS, True),
(2 * BS, BS, True),
(3 * BS, BS, False),
(4 * BS, BS, False),
(5 * BS, BS, False),
(6 * BS, BS, True),
(7 * BS, BS, True),
(8 * BS, BS, True),
(9 * BS, BS, True),
]

# XXX
map_notsparse = [(0 * BS, 3 * BS, True)]
map_notsparse = [(0, BS, True), (BS, BS, True), (2 * BS, BS, True)]

# ___
map_onlysparse = [(0 * BS, 3 * BS, False)]
map_onlysparse = [(0, BS, False), (BS, BS, False), (2 * BS, BS, False)]
43 changes: 39 additions & 4 deletions src/borg/testsuite/chunkers/fixed_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,42 @@

import pytest

from . import cf, cf_expand, make_sparsefile, make_content, fs_supports_sparse
from . import cf, cf_expand, make_sparsefile, make_content
from . import BS, map_sparse1, map_sparse2, map_onlysparse, map_notsparse
from ...chunkers import ChunkerFixed
from ...constants import * # NOQA


@pytest.mark.skipif(not fs_supports_sparse(), reason="filesystem does not support sparse files")
def pretty_print(msg, items):
"""
Pretty-print the result of get_chunks.

For each element in the sequence:
- If it's a bytes object consisting solely of b"H", print "header length: X" where X is its length.
- If it's a bytes object consisting solely of b"X", print "body length: X" where X is its length.
- If it's an int, print "sparse: length: X" where X is the integer value (interpreted as a length).
"""
print(msg)
print("-" * len(msg))
for item in items:
if isinstance(item, bytes):
# Detect sequences of only 'H' (header) or only 'X' (body)
if item.replace(b"H", b"") == b"":
print(f"header({len(item)})")
elif item.replace(b"X", b"") == b"":
print(f"body({len(item)})")
elif item.replace(b"\0", b"") == b"":
print(f"zeros({len(item)})")
else:
# Fallback: unknown content, print as body with its length
print(f"other({len(item)})")
elif isinstance(item, int):
print(f"sparse({item})")
else:
# Unexpected element type, just print a generic line.
print(f"???({item})")


@pytest.mark.parametrize(
"fname, sparse_map, header_size, sparse",
[
Expand All @@ -34,13 +63,19 @@
)
def test_chunkify_sparse(tmpdir, fname, sparse_map, header_size, sparse):
def get_chunks(fname, sparse, header_size):
chunker = ChunkerFixed(4096, header_size=header_size, sparse=sparse)
chunker = ChunkerFixed(BS, header_size=header_size, sparse=sparse)
with open(fname, "rb") as fd:
return cf(chunker.chunkify(fd))

# this only works if sparse map blocks are same size as fixed chunker blocks
fn = str(tmpdir / fname)
make_sparsefile(fn, sparse_map, header_size=header_size)
get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size)
Comment thread
ThomasWaldmann marked this conversation as resolved.
expected_content = make_content(sparse_map, header_size=header_size)
got_chunks = get_chunks(fn, sparse=sparse, header_size=header_size)
print(f"sparse: {sparse}")
pretty_print("expected", expected_content)
pretty_print("got", got_chunks)
assert expected_content == got_chunks


@pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1")
Expand Down
23 changes: 21 additions & 2 deletions src/borg/testsuite/chunkers/reader_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,22 @@
from ...constants import * # NOQA


def coalesce_sparse_map(sparse_map):
"""Coalesce adjacent ranges with the same is_data flag, as the OS would report them."""
if not sparse_map:
return []
result = []
start, size, is_data = sparse_map[0]
for next_start, next_size, next_is_data in sparse_map[1:]:
if next_is_data == is_data:
size += next_size
else:
result.append((start, size, is_data))
start, size, is_data = next_start, next_size, next_is_data
result.append((start, size, is_data))
return result


@pytest.mark.skipif(not fs_supports_sparse(), reason="filesystem does not support sparse files")
@pytest.mark.parametrize(
"fname, sparse_map",
Expand All @@ -28,8 +44,11 @@ def get_sparsemap_fd(fname):

fn = str(tmpdir / fname)
make_sparsefile(fn, sparse_map)
assert get_sparsemap_fh(fn) == sparse_map
assert get_sparsemap_fd(fn) == sparse_map
# The OS coalesces adjacent ranges of the same type (data or hole),
# so we compare against the coalesced version of the expected map.
expected = coalesce_sparse_map(sparse_map)
assert get_sparsemap_fh(fn) == expected
assert get_sparsemap_fd(fn) == expected


@pytest.mark.parametrize(
Expand Down
Loading