From 2d81900931e82b393925cea7ec3df0bc8aff91ca Mon Sep 17 00:00:00 2001 From: Daniel Song Date: Mon, 2 Mar 2026 00:04:57 -0800 Subject: [PATCH 1/4] fix: replace fragile datetime string slicing with robust parser (#6) Replace hardcoded `res[:23]` truncation with `_parse_datetime()` helper that preserves microsecond precision and handles variable-length fractional seconds, timezone suffixes, and trailing whitespace. Co-Authored-By: Claude Opus 4.6 --- data_diff/databases/base.py | 16 ++++++++++++++- tests/test_datetime_parsing.py | 37 ++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 tests/test_datetime_parsing.py diff --git a/data_diff/databases/base.py b/data_diff/databases/base.py index 6eaa7d8d..b7e197b2 100644 --- a/data_diff/databases/base.py +++ b/data_diff/databases/base.py @@ -81,6 +81,20 @@ logger = logging.getLogger("database") +def _parse_datetime(s: str) -> datetime: + """Parse a datetime string, truncating sub-microsecond precision if present.""" + s = s.strip() + dot = s.rfind(".") + if dot != -1: + frac_end = dot + 1 + while frac_end < len(s) and s[frac_end].isdigit(): + frac_end += 1 + frac_digits = frac_end - dot - 1 + if frac_digits > 6: + s = s[: dot + 7] + s[frac_end:] + return datetime.fromisoformat(s) + + class CompileError(Exception): pass @@ -987,7 +1001,7 @@ def query(self, sql_ast: Expr | Generator, res_type: type = None, log_message: s elif res_type is datetime: res = _one(_one(res)) if isinstance(res, str): - res = datetime.fromisoformat(res[:23]) # TODO use a better parsing method + res = _parse_datetime(res) return res elif res_type is tuple: if len(res) != 1: diff --git a/tests/test_datetime_parsing.py b/tests/test_datetime_parsing.py new file mode 100644 index 00000000..a62dee8b --- /dev/null +++ b/tests/test_datetime_parsing.py @@ -0,0 +1,37 @@ +import sys +from datetime import datetime, timezone + +import pytest + +from data_diff.databases.base import _parse_datetime + + +class TestParseDatetime: + def test_standard_microsecond_format(self): + result = _parse_datetime("2022-06-03 12:24:35.123456") + assert result == datetime(2022, 6, 3, 12, 24, 35, 123456) + + def test_millisecond_precision(self): + result = _parse_datetime("2022-06-03 12:24:35.123") + assert result == datetime(2022, 6, 3, 12, 24, 35, 123000) + + def test_no_fractional_seconds(self): + result = _parse_datetime("2022-06-03 12:24:35") + assert result == datetime(2022, 6, 3, 12, 24, 35) + + def test_nanosecond_precision_truncated(self): + result = _parse_datetime("2022-06-03 12:24:35.123456789") + assert result == datetime(2022, 6, 3, 12, 24, 35, 123456) + + def test_trailing_whitespace(self): + result = _parse_datetime("2022-06-03 12:24:35.123456 ") + assert result == datetime(2022, 6, 3, 12, 24, 35, 123456) + + @pytest.mark.skipif(sys.version_info < (3, 11), reason="fromisoformat timezone support requires 3.11+") + def test_timezone_suffix(self): + result = _parse_datetime("2022-06-03T12:24:35+00:00") + assert result == datetime(2022, 6, 3, 12, 24, 35, tzinfo=timezone.utc) + + def test_invalid_string_raises(self): + with pytest.raises(ValueError): + _parse_datetime("not-a-date") From 3d513b4f7252b076de2ddaeb4388eda4ca5efad4 Mon Sep 17 00:00:00 2001 From: Daniel Song Date: Mon, 2 Mar 2026 00:38:38 -0800 Subject: [PATCH 2/4] fix: address review feedback for datetime parsing - Normalize Z suffix to +00:00 for Python 3.10 compatibility - Add error logging at call site before ValueError propagates - Fix incorrect skipif guard on timezone test (+HH:MM works on 3.7+) - Add tests: Z suffix, 7-digit boundary, leading whitespace, nanoseconds with timezone, empty string Co-Authored-By: Claude Opus 4.6 --- data_diff/databases/base.py | 8 +++++++- tests/test_datetime_parsing.py | 29 ++++++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/data_diff/databases/base.py b/data_diff/databases/base.py index b7e197b2..68dfa05e 100644 --- a/data_diff/databases/base.py +++ b/data_diff/databases/base.py @@ -84,6 +84,8 @@ def _parse_datetime(s: str) -> datetime: """Parse a datetime string, truncating sub-microsecond precision if present.""" s = s.strip() + if s.endswith("Z"): + s = s[:-1] + "+00:00" dot = s.rfind(".") if dot != -1: frac_end = dot + 1 @@ -1001,7 +1003,11 @@ def query(self, sql_ast: Expr | Generator, res_type: type = None, log_message: s elif res_type is datetime: res = _one(_one(res)) if isinstance(res, str): - res = _parse_datetime(res) + try: + res = _parse_datetime(res) + except ValueError: + logger.error("Failed to parse datetime string returned by database: %r", res) + raise return res elif res_type is tuple: if len(res) != 1: diff --git a/tests/test_datetime_parsing.py b/tests/test_datetime_parsing.py index a62dee8b..9eb8b6d4 100644 --- a/tests/test_datetime_parsing.py +++ b/tests/test_datetime_parsing.py @@ -1,5 +1,5 @@ import sys -from datetime import datetime, timezone +from datetime import datetime, timedelta, timezone import pytest @@ -23,15 +23,38 @@ def test_nanosecond_precision_truncated(self): result = _parse_datetime("2022-06-03 12:24:35.123456789") assert result == datetime(2022, 6, 3, 12, 24, 35, 123456) + def test_seven_fractional_digits_truncated(self): + result = _parse_datetime("2022-06-03 12:24:35.1234567") + assert result == datetime(2022, 6, 3, 12, 24, 35, 123456) + def test_trailing_whitespace(self): result = _parse_datetime("2022-06-03 12:24:35.123456 ") assert result == datetime(2022, 6, 3, 12, 24, 35, 123456) - @pytest.mark.skipif(sys.version_info < (3, 11), reason="fromisoformat timezone support requires 3.11+") - def test_timezone_suffix(self): + def test_leading_whitespace(self): + result = _parse_datetime(" 2022-06-03 12:24:35.123456") + assert result == datetime(2022, 6, 3, 12, 24, 35, 123456) + + def test_timezone_offset(self): result = _parse_datetime("2022-06-03T12:24:35+00:00") assert result == datetime(2022, 6, 3, 12, 24, 35, tzinfo=timezone.utc) + def test_z_suffix_utc(self): + result = _parse_datetime("2022-06-03T12:24:35Z") + assert result == datetime(2022, 6, 3, 12, 24, 35, tzinfo=timezone.utc) + + @pytest.mark.skipif( + sys.version_info < (3, 11), reason="fromisoformat needs 3.11+ for fractional seconds with tz offset" + ) + def test_nanosecond_precision_with_timezone(self): + result = _parse_datetime("2022-06-03T12:24:35.123456789+05:30") + expected_tz = timezone(timedelta(hours=5, minutes=30)) + assert result == datetime(2022, 6, 3, 12, 24, 35, 123456, tzinfo=expected_tz) + def test_invalid_string_raises(self): with pytest.raises(ValueError): _parse_datetime("not-a-date") + + def test_empty_string_raises(self): + with pytest.raises(ValueError): + _parse_datetime("") From 7a685685331aeb57cd3cd96a595afabfd931e043 Mon Sep 17 00:00:00 2001 From: Daniel Song Date: Mon, 2 Mar 2026 00:42:31 -0800 Subject: [PATCH 3/4] fix: remove incorrect skipif and add missing edge case tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove skipif on test_nanosecond_precision_with_timezone — after truncation the string is valid on Python 3.10 - Add test_nanosecond_precision_with_z_suffix (combined Z + truncation) - Add test_trailing_dot_raises (malformed fractional seconds) - Remove unused sys import Co-Authored-By: Claude Opus 4.6 --- tests/test_datetime_parsing.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/test_datetime_parsing.py b/tests/test_datetime_parsing.py index 9eb8b6d4..5d2fe47a 100644 --- a/tests/test_datetime_parsing.py +++ b/tests/test_datetime_parsing.py @@ -1,4 +1,3 @@ -import sys from datetime import datetime, timedelta, timezone import pytest @@ -43,14 +42,19 @@ def test_z_suffix_utc(self): result = _parse_datetime("2022-06-03T12:24:35Z") assert result == datetime(2022, 6, 3, 12, 24, 35, tzinfo=timezone.utc) - @pytest.mark.skipif( - sys.version_info < (3, 11), reason="fromisoformat needs 3.11+ for fractional seconds with tz offset" - ) def test_nanosecond_precision_with_timezone(self): result = _parse_datetime("2022-06-03T12:24:35.123456789+05:30") expected_tz = timezone(timedelta(hours=5, minutes=30)) assert result == datetime(2022, 6, 3, 12, 24, 35, 123456, tzinfo=expected_tz) + def test_nanosecond_precision_with_z_suffix(self): + result = _parse_datetime("2022-06-03T12:24:35.123456789Z") + assert result == datetime(2022, 6, 3, 12, 24, 35, 123456, tzinfo=timezone.utc) + + def test_trailing_dot_raises(self): + with pytest.raises(ValueError): + _parse_datetime("2022-06-03T12:24:35.") + def test_invalid_string_raises(self): with pytest.raises(ValueError): _parse_datetime("not-a-date") From 42fc013715374f24eead85a75b20c81eccf4e9cc Mon Sep 17 00:00:00 2001 From: Daniel Song Date: Mon, 2 Mar 2026 00:48:01 -0800 Subject: [PATCH 4/4] fix: address remaining review suggestions - Expand docstring to document all normalizations (whitespace, Z suffix, truncation) - Add defensive guards for empty/missing datetime results, matching the existing int path - Enrich error log with database name and SQL context - Add tests: all-nines truncation boundary, negative timezone offset Co-Authored-By: Claude Opus 4.6 --- data_diff/databases/base.py | 21 ++++++++++++++++++--- tests/test_datetime_parsing.py | 9 +++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/data_diff/databases/base.py b/data_diff/databases/base.py index 68dfa05e..8d49548f 100644 --- a/data_diff/databases/base.py +++ b/data_diff/databases/base.py @@ -82,7 +82,12 @@ def _parse_datetime(s: str) -> datetime: - """Parse a datetime string, truncating sub-microsecond precision if present.""" + """Parse an ISO 8601 datetime string with the following normalizations: + + - Strips leading/trailing whitespace + - Converts 'Z' timezone suffix to '+00:00' for Python 3.10 compatibility + - Truncates sub-microsecond precision (>6 fractional digits) to microseconds + """ s = s.strip() if s.endswith("Z"): s = s[:-1] + "+00:00" @@ -1001,12 +1006,22 @@ def query(self, sql_ast: Expr | Generator, res_type: type = None, log_message: s return None return int(res) elif res_type is datetime: - res = _one(_one(res)) + if not res: + raise ValueError("Datetime query returned 0 rows, expected 1") + row = _one(res) + if not row: + raise ValueError("Datetime query row is empty, expected 1 column") + res = _one(row) if isinstance(res, str): try: res = _parse_datetime(res) except ValueError: - logger.error("Failed to parse datetime string returned by database: %r", res) + logger.error( + "Failed to parse datetime string returned by database %s: %r (sql: %s)", + self.name, + res, + sql_code, + ) raise return res elif res_type is tuple: diff --git a/tests/test_datetime_parsing.py b/tests/test_datetime_parsing.py index 5d2fe47a..0e83fdaa 100644 --- a/tests/test_datetime_parsing.py +++ b/tests/test_datetime_parsing.py @@ -51,6 +51,15 @@ def test_nanosecond_precision_with_z_suffix(self): result = _parse_datetime("2022-06-03T12:24:35.123456789Z") assert result == datetime(2022, 6, 3, 12, 24, 35, 123456, tzinfo=timezone.utc) + def test_all_nines_nanoseconds_truncates_not_rounds(self): + result = _parse_datetime("2022-06-03 23:59:59.999999999") + assert result == datetime(2022, 6, 3, 23, 59, 59, 999999) + + def test_negative_timezone_offset(self): + result = _parse_datetime("2022-06-03T12:24:35-05:00") + expected_tz = timezone(timedelta(hours=-5)) + assert result == datetime(2022, 6, 3, 12, 24, 35, tzinfo=expected_tz) + def test_trailing_dot_raises(self): with pytest.raises(ValueError): _parse_datetime("2022-06-03T12:24:35.")