Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
282d70c
Add preliminary support for ISO-8601 timestamps (no timezones at the …
c-herz Apr 19, 2025
db46cdb
reformatted to pass style checks
degabe Apr 21, 2025
4363bf7
Applied recommended changes from ThomasWald, still working as intende…
degabe Apr 21, 2025
69e8608
fix bug with local timezone attachment not correctly respecting DST
c-herz Apr 21, 2025
5c20d8f
Reformatted for consistency with code style guide
c-herz Apr 22, 2025
6f1bcd4
Added basic test suite for ISO-8601 and Unix timestamp matching
c-herz Apr 22, 2025
4060e94
Merge remote-tracking branch 'origin/dateFilterImprov' into datefilter
c-herz Apr 22, 2025
e9a8c5f
add day-precision filter test for `date:YYYY-MM-DD`
c-herz Apr 22, 2025
470758d
support timezone suffixes in date: patterns and add tests
c-herz Apr 22, 2025
df2d33d
Wildcard working. Done some manual testing, will focus on more rigoro…
degabe Apr 23, 2025
870bf7a
add tests for wildcard support in date: archive match patterns; refor…
c-herz Apr 25, 2025
461df75
fix bug with wildcards in date: match patterns not respecting supplie…
c-herz Apr 25, 2025
9553c35
remove stray testfile.txt
c-herz Apr 25, 2025
409733b
refactor date: pattern parser to use structured bottom-up regex, per …
c-herz Apr 25, 2025
de03806
refactor date: pattern parsing to use helper functions for datetime c…
c-herz Apr 25, 2025
796981c
add explicit time interval matching in date: archive match pattern (w…
c-herz Apr 25, 2025
7b8a194
add duration-based interval support for date: archive match patterns;…
c-herz Apr 25, 2025
8e3f1e4
add support for keyword-based date intervals in archive date: matchin…
c-herz Apr 25, 2025
904853d
refactor time.py: rename internal functions for clarity and consistency
c-herz Apr 25, 2025
6032c4a
add support for ISO week-date and ordinal-date matching in date: arch…
c-herz Apr 25, 2025
9cb5e5f
enhance compile_date_pattern docstring: clarify TIMESTAMP and DURATIO…
c-herz Apr 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 112 additions & 71 deletions src/borg/helpers/time.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import re
from datetime import datetime, timezone, timedelta
from zoneinfo import ZoneInfo


def parse_timestamp(timestamp, tzinfo=timezone.utc):
Expand Down Expand Up @@ -191,96 +192,136 @@ class DatePatternError(ValueError):
"""Raised when a date: archive pattern cannot be parsed."""


def local(dt: datetime) -> datetime:
"""Interpret naive dt as local time, attach timezone info from the local tz."""
if dt.tzinfo is None:
dt = dt.astimezone()
return dt


def exact_predicate(dt: datetime):
"""Return predicate matching archives whose ts equals dt (UTC)."""
dt_utc = local(dt).astimezone(timezone.utc)
dt_utc = dt.astimezone(timezone.utc)
return lambda ts: ts.astimezone(timezone.utc) == dt_utc


def interval_predicate(start: datetime, end: datetime):
start_utc = local(start).astimezone(timezone.utc)
end_utc = local(end).astimezone(timezone.utc)
start_utc = start.astimezone(timezone.utc)
end_utc = end.astimezone(timezone.utc)
return lambda ts: start_utc <= ts.astimezone(timezone.utc) < end_utc


def compile_date_pattern(expr: str):
def parse_tz(tzstr: str):
"""
Turn a date: expression into a predicate ts->bool.
Supports:
1) Full ISO‑8601 timestamps with minute (and optional seconds/fraction)
2) Hour-only: YYYY‑MM‑DDTHH -> interval of 1 hour
3) Minute-only: YYYY‑MM‑DDTHH:MM -> interval of 1 minute
4) YYYY, YYYY‑MM, YYYY‑MM‑DD -> day/month/year intervals
5) Unix epoch (@123456789) -> exact match
Naive inputs are assumed local, then converted into UTC.
TODO: verify working for fractional seconds; add timezone support.
Parses a UTC offset like +08:00 or [Region/Name] into a timezone object.
"""
expr = expr.strip()
if not tzstr:
return None
if tzstr == "Z":
return timezone.utc
if tzstr[0] in "+-":
sign = 1 if tzstr[0] == "+" else -1
try:
hh, mm = map(int, tzstr[1:].split(":"))
if not (0 <= mm < 60):
raise ValueError
except Exception:
raise DatePatternError("invalid UTC offset format")
# we do it this way so that, for example, -8:30 is
# -8 hours and -30 minutes, not -8 hours and +30 minutes
total_minutes = sign * (hh * 60 + mm)
# enforce ISO-8601 bounds (-12:00 to +14:00)
if not (-12 * 60 <= total_minutes <= 14 * 60):
raise DatePatternError("UTC offset outside ISO-8601 bounds")
return timezone(timedelta(minutes=total_minutes))
# [Region/Name]
try:
return ZoneInfo(tzstr.strip("[]"))
except Exception:
raise DatePatternError("invalid timezone format")

# 1) Full timestamp (with fraction)
full_re = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+")
if full_re.match(expr):
dt = parse_local_timestamp(expr, tzinfo=timezone.utc)
return exact_predicate(dt) # no interval, since we have a fractional timestamp

# 2) Seconds-only
second_re = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$")
if second_re.match(expr):
start = parse_local_timestamp(expr, tzinfo=timezone.utc)
def compile_date_pattern(expr: str):
"""
Accepts any of:
YYYY
YYYY-MM
YYYY-MM-DD
YYYY-MM-DDTHH
YYYY-MM-DDTHH:MM
YYYY-MM-DDTHH:MM:SS
Unix epoch (@123456789)
…with an optional trailing timezone (Z or ±HH:MM or [Region/City]).
Returns a predicate that is True for timestamps in that interval.
"""
expr = expr.strip()
pattern = r"""
^
(?:
(?P<fraction>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+) # full timestamp with fraction
| (?P<second> \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}) # no fraction
| (?P<minute> \d{4}-\d{2}-\d{2}T\d{2}:\d{2}) # minute precision
| (?P<hour> \d{4}-\d{2}-\d{2}T\d{2}) # hour precision
| (?P<day> \d{4}-\d{2}-\d{2}) # day precision
| (?P<month> \d{4}-\d{2}) # month precision
| (?P<year> \d{4}) # year precision
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting approach.

What I meant was rather something like (simplified to cover only YYYY and YYYY-MM here as an example):

(?P<year>\d{4})
(-
 (?P<month>\d{2})
)?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes this would've been much simpler. Will try to work on refactoring it to this approach tomorrow.

| @(?P<epoch>\d+) # unix epoch
)
(?P<tz>Z|[+\-]\d{2}:\d{2}|\[[^\]]+\])? # optional timezone or [Region/City]
$
"""
m = re.match(pattern, expr, re.VERBOSE)
if not m:
raise DatePatternError(f"unrecognised date: {expr!r}")

gd = m.groupdict()
tz = parse_tz(gd.get("tz")) # will be None if tzstr is empty -> local timezone

# unix epoch and user-specified timezone are mutually exclusive
if gd["epoch"] and tz is not None:
raise DatePatternError("unix‐epoch patterns (@123456789) are UTC and must not include a timezone suffix")

# 1) fractional‐second exact match
if gd["fraction"]:
ts = gd["fraction"]
dt = parse_timestamp(ts, tzinfo=tz)
return exact_predicate(dt)

# 2) second‐precision interval
if gd["second"]:
ts = gd["second"]
start = parse_timestamp(ts, tzinfo=tz)
# within one second
return interval_predicate(start, start + timedelta(seconds=1))

# 3) Minute-only
minute_re = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}$")
if minute_re.match(expr):
start = parse_local_timestamp(expr + ":00", tzinfo=timezone.utc)
# 3) minute‐precision interval
if gd["minute"]:
ts = gd["minute"] + ":00"
start = parse_timestamp(ts, tzinfo=tz)
return interval_predicate(start, start + timedelta(minutes=1))

# 4) Hour-only
hour_re = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}$")
if hour_re.match(expr):
start = parse_local_timestamp(expr + ":00:00", tzinfo=timezone.utc)
# 4) hour‐precision interval
if gd["hour"]:
ts = gd["hour"] + ":00:00"
start = parse_timestamp(ts, tzinfo=tz)
return interval_predicate(start, start + timedelta(hours=1))
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe just use 1 regex with group names ((?P<name>...) that covers 1) .. 4) and also YYYY, YYYY-MM, YYYY-MM-DD cases from below.

After a single m = re.match(regex, expr), you can check m.groupdict() in the right order (fraction, S, M, H, d, m, y) to determine which case you have.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe use re.VERBOSE so you can have a multi-line, commented regex for this.


# Unix epoch (@123456789) - Note: We don't support fractional seconds here,
# since Unix epochs are almost always whole numbers.
if expr.startswith("@"):
try:
epoch = int(expr[1:])
except ValueError:
raise DatePatternError(f"invalid epoch: {expr!r}")
# 5a) day‐precision interval
if gd["day"]:
ts = gd["day"] + "T00:00:00"
start = parse_timestamp(ts, tzinfo=tz)
return interval_predicate(start, start + timedelta(days=1))

# 5b) month‐precision interval
if gd["month"]:
ts = gd["month"] + "-01T00:00:00"
start = parse_timestamp(ts, tzinfo=tz)
return interval_predicate(start, offset_n_months(start, 1))

# 5c) year‐precision interval
if gd["year"]:
ts = gd["year"] + "-01-01T00:00:00"
start = parse_timestamp(ts, tzinfo=tz)
return interval_predicate(start, offset_n_months(start, 12))

# 6) unix‐epoch exact‐second match
if gd["epoch"]:
epoch = int(gd["epoch"])
start = datetime.fromtimestamp(epoch, tz=timezone.utc)
# match within the second
return interval_predicate(start, start + timedelta(seconds=1))

# Year/Year-month/Year-month-day
parts = expr.split("-")
try:
if len(parts) == 1: # YYYY
year = int(parts[0])
start = datetime(year, 1, 1)
end = datetime(year + 1, 1, 1)

elif len(parts) == 2: # YYYY‑MM
year, month = map(int, parts)
start = datetime(year, month, 1)
end = offset_n_months(start, 1)

elif len(parts) == 3: # YYYY‑MM‑DD
year, month, day = map(int, parts)
start = datetime(year, month, day)
end = start + timedelta(days=1)

else:
raise DatePatternError(f"unrecognised date: {expr!r}")

except ValueError as e:
raise DatePatternError(str(e)) from None

return interval_predicate(start, end)
# should never get here
raise DatePatternError(f"unrecognised date: {expr!r}")
118 changes: 118 additions & 0 deletions src/borg/testsuite/archiver/match_archives_date_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import pytest
from datetime import datetime, timezone

from ...constants import * # NOQA
from . import cmd, create_src_archive, generate_archiver_tests, RK_ENCRYPTION
from ...helpers.errors import CommandError

pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA

Expand All @@ -19,6 +21,12 @@
("archive-mon-diff", "2025-01-31T23:59:59"),
]

DAY_ARCHIVES = [
("archive-day-start", "2025-01-02T00:00:00"),
("archive-day-same", "2025-01-02T23:59:59"),
("archive-day-diff", "2025-01-01T23:59:59"),
]

HOUR_ARCHIVES = [
("archive-hour-start", "2025-01-01T14:00:00"),
("archive-hour-same", "2025-01-01T14:59:59"),
Expand Down Expand Up @@ -76,6 +84,25 @@ def test_match_archives_month(archivers, request):
assert "archive-mon-diff" not in out_feb


def test_match_archives_day(archivers, request):
archiver = request.getfixturevalue(archivers)
cmd(archiver, "repo-create", RK_ENCRYPTION)
for name, ts in DAY_ARCHIVES:
create_src_archive(archiver, name, ts=ts)

# 2025-01-01 only includes 2025-01-01
out_01 = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-01", exit_code=0)
assert "archive-day-diff" in out_01
assert "archive-day-start" not in out_01
assert "archive-day-same" not in out_01

# 2025-01-02 includes minimum and maximum possible times in 2025-01-02
out_02 = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-02", exit_code=0)
assert "archive-day-start" in out_02
assert "archive-day-same" in out_02
assert "archive-day-diff" not in out_02


def test_match_archives_hour(archivers, request):
archiver = request.getfixturevalue(archivers)
cmd(archiver, "repo-create", RK_ENCRYPTION)
Expand Down Expand Up @@ -143,3 +170,94 @@ def test_unix_timestamps(archivers, request):
assert "archive-sec-target" in output
assert "archive-sec-before" not in output
assert "archive-sec-after" not in output


TIMEZONE_ARCHIVES = [("archive-la", "2025-01-01T12:01:00-08:00"), ("archive-utc", "2025-01-02T12:01:00+00:00")]


@pytest.mark.parametrize("timezone_variant", ["2025-01-01T12:01:00-08:00", "2025-01-01T12:01:00[America/Los_Angeles]"])
def test_match_la_equivalents(archivers, request, timezone_variant):
archiver = request.getfixturevalue(archivers)
cmd(archiver, "repo-create", RK_ENCRYPTION)
for name, ts in TIMEZONE_ARCHIVES:
create_src_archive(archiver, name, ts=ts)

output = cmd(archiver, "repo-list", "-v", f"--match-archives=date:{timezone_variant}", exit_code=0)
assert "archive-la" in output
assert "archive-utc" not in output


@pytest.mark.parametrize(
"timezone_variant", ["2025-01-02T12:01:00+00:00", "2025-01-02T12:01:00Z", "2025-01-02T12:01:00[Etc/UTC]"]
)
def test_match_utc_equivalents(archivers, request, timezone_variant):
archiver = request.getfixturevalue(archivers)
cmd(archiver, "repo-create", RK_ENCRYPTION)
for name, ts in TIMEZONE_ARCHIVES:
create_src_archive(archiver, name, ts=ts)

output = cmd(archiver, "repo-list", "-v", f"--match-archives=date:{timezone_variant}", exit_code=0)
assert "archive-utc" in output
assert "archive-la" not in output


HOUR_TZ_ARCHIVES = [
("archive-hour-diff", "2025-01-01T09:59:00Z"),
("archive-hour-start", "2025-01-01T10:00:00Z"),
("archive-hour-same", "2025-01-01T10:59:59Z"),
]


def test_match_hour_from_different_tz(archivers, request):
"""
Test that the date filter works for hours with archives created in a different timezone.
"""
archiver = request.getfixturevalue(archivers)
cmd(archiver, "repo-create", RK_ENCRYPTION)
for name, ts in HOUR_TZ_ARCHIVES:
create_src_archive(archiver, name, ts=ts)

# We're filtering “local 11:00” in +01:00 zone, which is 10:00–10:59:59 UTC
out = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-01-01T11+01:00", exit_code=0)
assert "archive-hour-start" in out
assert "archive-hour-same" in out
assert "archive-hour-diff" not in out


def test_match_day_from_different_tz(archivers, request):
"""
Test that the date filter works for days with archives created in a different timezone.
"""
archiver = request.getfixturevalue(archivers)
cmd(archiver, "repo-create", RK_ENCRYPTION)

# Local 2025‑03‑02T00:30:00+02:00 → UTC 2025‑03‑01T22:30:00Z
create_src_archive(archiver, "archive-utc-bound", ts="2025-03-02T00:30:00+02:00")

out = cmd(archiver, "repo-list", "-v", "--match-archives=date:2025-03-01[Etc/UTC]", exit_code=0)
assert "archive-utc-bound" in out


@pytest.mark.parametrize(
"invalid_expr",
[
"2025-01-01T00:00:00+14:01", # beyond +14:00 (ISO 8601 boundary)
"2025-01-01T00:00:00-12:01", # beyond -12:00 (ISO 8601 boundary)
"2025-01-01T00:00:00+09:99", # invalid minutes
"2025-01-01T00:00:00[garbage]", # invalid region
"2025-01-01T00:00:00[Not/AZone]", # structured but nonexistent
],
)
def test_invalid_timezones_rejected(archivers, request, invalid_expr):
"""
Test that invalid timezone expressions are rejected.
"""
archiver = request.getfixturevalue(archivers)
cmd(archiver, "repo-create", RK_ENCRYPTION)

with pytest.raises(CommandError) as excinfo:
cmd(archiver, "repo-list", "-v", f"--match-archives=date:{invalid_expr}")

msg = str(excinfo.value)
assert "Invalid date pattern" in msg
assert invalid_expr in msg
Loading