From 0cefcd444c9a44353237537636f82725c76f2c26 Mon Sep 17 00:00:00 2001 From: Tobias Kerkering Date: Mon, 15 Jun 2026 20:46:44 +0000 Subject: [PATCH] Add 'z' redaction option to normalise timezone to UTC Adds 'z' to the reduction pattern: it converts author and committer timestamps to UTC (offset +00:00), removing the timezone offset as a location fingerprint. 'z' is applied before the existing M/d/h/m/s precision reductions, so those operate on the resulting UTC wall-clock time (and, with a limit, the working-hours window is interpreted in UTC). Naive datetimes are treated as UTC for a deterministic result regardless of the host's local timezone. Existing behaviour is unchanged when 'z' is not present in the pattern. Adds TimezoneTestCase covering conversion, instant preservation, ordering before reductions, and the naive-input case. Help text and README updated. Closes #39 --- README.md | 2 ++ gitprivacy/dateredacter/reduce.py | 23 ++++++++++++++--- gitprivacy/gitprivacy.py | 3 ++- tests/test_timestamp.py | 43 ++++++++++++++++++++++++++++++- 4 files changed, 66 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 27a805e..4761412 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ To setup `git-privacy` for a _single Git repository_ do the following: - h: Sets the hour to midnight - m: Sets the minute to zero (full hour) - s: Sets the seconds to zero (full minute) + - z: Normalises the timezone to UTC (offset +00:00) $ git config privacy.pattern @@ -69,6 +70,7 @@ To setup `git-privacy` _globally for all new repositories_ do the following: - h: Sets the hour to midnight - m: Sets the minute to zero (full hour) - s: Sets the seconds to zero (full minute) + - z: Normalises the timezone to UTC (offset +00:00) $ git config --global privacy.pattern diff --git a/gitprivacy/dateredacter/reduce.py b/gitprivacy/dateredacter/reduce.py index fe68915..9487a20 100644 --- a/gitprivacy/dateredacter/reduce.py +++ b/gitprivacy/dateredacter/reduce.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timezone import re from . import DateRedacter @@ -19,10 +19,18 @@ def __init__(self, pattern="s", limit=None, mode="reduce"): def redact(self, timestamp: datetime) -> datetime: """Reduces timestamp precision for the parts specifed by the pattern using - M: month, d: day, h: hour, m: minute, s: second. + M: month, d: day, h: hour, m: minute, s: second, z: timezone (to UTC). - Example: A pattern of 's' sets the seconds to 0.""" + Example: A pattern of 's' sets the seconds to 0. + 'z' converts the timestamp to UTC (offset +00:00) and thereby removes + the timezone offset as a location fingerprint. It is applied before the + precision reductions, so those operate on the resulting UTC wall-clock + time (and, with a 'limit', the working-hours window is interpreted in + UTC as well).""" + + if "z" in self.pattern: + timestamp = self._to_utc(timestamp) if "M" in self.pattern: timestamp = timestamp.replace(month=1) if "d" in self.pattern: @@ -36,6 +44,15 @@ def redact(self, timestamp: datetime) -> datetime: timestamp = self._enforce_limit(timestamp) return timestamp + @staticmethod + def _to_utc(timestamp: datetime) -> datetime: + """Convert an aware timestamp to UTC, preserving the instant. A naive + timestamp is assumed to already be UTC and merely tagged as such, so the + result is deterministic regardless of the host's local timezone.""" + if timestamp.tzinfo is None: + return timestamp.replace(tzinfo=timezone.utc) + return timestamp.astimezone(timezone.utc) + def _enforce_limit(self, timestamp: datetime) -> datetime: if not self.limit: return timestamp diff --git a/gitprivacy/gitprivacy.py b/gitprivacy/gitprivacy.py index d926aad..4f5a71f 100755 --- a/gitprivacy/gitprivacy.py +++ b/gitprivacy/gitprivacy.py @@ -91,7 +91,8 @@ def get_dateredacter(self) -> DateRedacter: "\n" "The pattern is a comma separated list that may contain the " "following time unit identifiers: " - "M: month, d: day, h: hour, m: minute, s: second.", + "M: month, d: day, h: hour, m: minute, s: second, " + "z: timezone (normalise to UTC).", preserve_paragraphs=True)) return ResolutionDateRedacter(self.pattern, self.limit, self.mode) diff --git a/tests/test_timestamp.py b/tests/test_timestamp.py index ad959df..4293aef 100644 --- a/tests/test_timestamp.py +++ b/tests/test_timestamp.py @@ -1,5 +1,5 @@ import unittest -from datetime import datetime +from datetime import datetime, timedelta, timezone from gitprivacy.dateredacter import ResolutionDateRedacter @@ -40,6 +40,47 @@ def test_month(self): self.assertEqual(ts.redact(self.full), expected) +class TimezoneTestCase(unittest.TestCase): + def setUp(self): + # 14:42:13 at UTC+02:00 == 12:42:13 UTC (same instant) + self.cet = datetime(year=2018, month=12, day=18, + hour=14, minute=42, second=13, + tzinfo=timezone(timedelta(hours=2))) + + def test_to_utc(self): + ts = ResolutionDateRedacter(mode="reduce", pattern="z") + result = ts.redact(self.cet) + expected = datetime(year=2018, month=12, day=18, + hour=12, minute=42, second=13, + tzinfo=timezone.utc) + self.assertEqual(result, expected) + self.assertEqual(result.utcoffset(), timedelta(0)) + + def test_preserves_instant(self): + ts = ResolutionDateRedacter(mode="reduce", pattern="z") + self.assertEqual(ts.redact(self.cet).timestamp(), self.cet.timestamp()) + + def test_applied_before_reductions(self): + # 01:30 +02:00 -> 2018-12-17 23:30 UTC -> 'h' zeroes the hour -> 00:30 UTC. + # The date rolling back to the 17th proves the UTC conversion ran first. + ts = ResolutionDateRedacter(mode="reduce", pattern="hz") + early = datetime(year=2018, month=12, day=18, + hour=1, minute=30, second=0, + tzinfo=timezone(timedelta(hours=2))) + expected = datetime(year=2018, month=12, day=17, + hour=0, minute=30, second=0, + tzinfo=timezone.utc) + self.assertEqual(ts.redact(early), expected) + + def test_naive_assumed_utc(self): + ts = ResolutionDateRedacter(mode="reduce", pattern="z") + naive = datetime(year=2018, month=12, day=18, + hour=14, minute=42, second=13) + result = ts.redact(naive) + self.assertEqual(result.utcoffset(), timedelta(0)) + self.assertEqual(result.replace(tzinfo=None), naive) + + class LimitTestCase(unittest.TestCase): def test_before(self): ts = ResolutionDateRedacter(limit="9-17")