diff --git a/README.md b/README.md index 27a805e..4761412 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ To setup `git-privacy` for a _single Git repository_ do the following: - h: Sets the hour to midnight - m: Sets the minute to zero (full hour) - s: Sets the seconds to zero (full minute) + - z: Normalises the timezone to UTC (offset +00:00) $ git config privacy.pattern @@ -69,6 +70,7 @@ To setup `git-privacy` _globally for all new repositories_ do the following: - h: Sets the hour to midnight - m: Sets the minute to zero (full hour) - s: Sets the seconds to zero (full minute) + - z: Normalises the timezone to UTC (offset +00:00) $ git config --global privacy.pattern diff --git a/gitprivacy/dateredacter/reduce.py b/gitprivacy/dateredacter/reduce.py index fe68915..9487a20 100644 --- a/gitprivacy/dateredacter/reduce.py +++ b/gitprivacy/dateredacter/reduce.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timezone import re from . import DateRedacter @@ -19,10 +19,18 @@ def __init__(self, pattern="s", limit=None, mode="reduce"): def redact(self, timestamp: datetime) -> datetime: """Reduces timestamp precision for the parts specifed by the pattern using - M: month, d: day, h: hour, m: minute, s: second. + M: month, d: day, h: hour, m: minute, s: second, z: timezone (to UTC). - Example: A pattern of 's' sets the seconds to 0.""" + Example: A pattern of 's' sets the seconds to 0. + 'z' converts the timestamp to UTC (offset +00:00) and thereby removes + the timezone offset as a location fingerprint. It is applied before the + precision reductions, so those operate on the resulting UTC wall-clock + time (and, with a 'limit', the working-hours window is interpreted in + UTC as well).""" + + if "z" in self.pattern: + timestamp = self._to_utc(timestamp) if "M" in self.pattern: timestamp = timestamp.replace(month=1) if "d" in self.pattern: @@ -36,6 +44,15 @@ def redact(self, timestamp: datetime) -> datetime: timestamp = self._enforce_limit(timestamp) return timestamp + @staticmethod + def _to_utc(timestamp: datetime) -> datetime: + """Convert an aware timestamp to UTC, preserving the instant. A naive + timestamp is assumed to already be UTC and merely tagged as such, so the + result is deterministic regardless of the host's local timezone.""" + if timestamp.tzinfo is None: + return timestamp.replace(tzinfo=timezone.utc) + return timestamp.astimezone(timezone.utc) + def _enforce_limit(self, timestamp: datetime) -> datetime: if not self.limit: return timestamp diff --git a/gitprivacy/gitprivacy.py b/gitprivacy/gitprivacy.py index d926aad..4f5a71f 100755 --- a/gitprivacy/gitprivacy.py +++ b/gitprivacy/gitprivacy.py @@ -91,7 +91,8 @@ def get_dateredacter(self) -> DateRedacter: "\n" "The pattern is a comma separated list that may contain the " "following time unit identifiers: " - "M: month, d: day, h: hour, m: minute, s: second.", + "M: month, d: day, h: hour, m: minute, s: second, " + "z: timezone (normalise to UTC).", preserve_paragraphs=True)) return ResolutionDateRedacter(self.pattern, self.limit, self.mode) diff --git a/tests/test_timestamp.py b/tests/test_timestamp.py index ad959df..4293aef 100644 --- a/tests/test_timestamp.py +++ b/tests/test_timestamp.py @@ -1,5 +1,5 @@ import unittest -from datetime import datetime +from datetime import datetime, timedelta, timezone from gitprivacy.dateredacter import ResolutionDateRedacter @@ -40,6 +40,47 @@ def test_month(self): self.assertEqual(ts.redact(self.full), expected) +class TimezoneTestCase(unittest.TestCase): + def setUp(self): + # 14:42:13 at UTC+02:00 == 12:42:13 UTC (same instant) + self.cet = datetime(year=2018, month=12, day=18, + hour=14, minute=42, second=13, + tzinfo=timezone(timedelta(hours=2))) + + def test_to_utc(self): + ts = ResolutionDateRedacter(mode="reduce", pattern="z") + result = ts.redact(self.cet) + expected = datetime(year=2018, month=12, day=18, + hour=12, minute=42, second=13, + tzinfo=timezone.utc) + self.assertEqual(result, expected) + self.assertEqual(result.utcoffset(), timedelta(0)) + + def test_preserves_instant(self): + ts = ResolutionDateRedacter(mode="reduce", pattern="z") + self.assertEqual(ts.redact(self.cet).timestamp(), self.cet.timestamp()) + + def test_applied_before_reductions(self): + # 01:30 +02:00 -> 2018-12-17 23:30 UTC -> 'h' zeroes the hour -> 00:30 UTC. + # The date rolling back to the 17th proves the UTC conversion ran first. + ts = ResolutionDateRedacter(mode="reduce", pattern="hz") + early = datetime(year=2018, month=12, day=18, + hour=1, minute=30, second=0, + tzinfo=timezone(timedelta(hours=2))) + expected = datetime(year=2018, month=12, day=17, + hour=0, minute=30, second=0, + tzinfo=timezone.utc) + self.assertEqual(ts.redact(early), expected) + + def test_naive_assumed_utc(self): + ts = ResolutionDateRedacter(mode="reduce", pattern="z") + naive = datetime(year=2018, month=12, day=18, + hour=14, minute=42, second=13) + result = ts.redact(naive) + self.assertEqual(result.utcoffset(), timedelta(0)) + self.assertEqual(result.replace(tzinfo=None), naive) + + class LimitTestCase(unittest.TestCase): def test_before(self): ts = ResolutionDateRedacter(limit="9-17")