diff --git a/data_diff/databases/base.py b/data_diff/databases/base.py index 6eaa7d8d..8d49548f 100644 --- a/data_diff/databases/base.py +++ b/data_diff/databases/base.py @@ -81,6 +81,27 @@ logger = logging.getLogger("database") +def _parse_datetime(s: str) -> datetime: + """Parse an ISO 8601 datetime string with the following normalizations: + + - Strips leading/trailing whitespace + - Converts 'Z' timezone suffix to '+00:00' for Python 3.10 compatibility + - Truncates sub-microsecond precision (>6 fractional digits) to microseconds + """ + s = s.strip() + if s.endswith("Z"): + s = s[:-1] + "+00:00" + dot = s.rfind(".") + if dot != -1: + frac_end = dot + 1 + while frac_end < len(s) and s[frac_end].isdigit(): + frac_end += 1 + frac_digits = frac_end - dot - 1 + if frac_digits > 6: + s = s[: dot + 7] + s[frac_end:] + return datetime.fromisoformat(s) + + class CompileError(Exception): pass @@ -985,9 +1006,23 @@ def query(self, sql_ast: Expr | Generator, res_type: type = None, log_message: s return None return int(res) elif res_type is datetime: - res = _one(_one(res)) + if not res: + raise ValueError("Datetime query returned 0 rows, expected 1") + row = _one(res) + if not row: + raise ValueError("Datetime query row is empty, expected 1 column") + res = _one(row) if isinstance(res, str): - res = datetime.fromisoformat(res[:23]) # TODO use a better parsing method + try: + res = _parse_datetime(res) + except ValueError: + logger.error( + "Failed to parse datetime string returned by database %s: %r (sql: %s)", + self.name, + res, + sql_code, + ) + raise return res elif res_type is tuple: if len(res) != 1: diff --git a/tests/test_datetime_parsing.py b/tests/test_datetime_parsing.py new file mode 100644 index 00000000..0e83fdaa --- /dev/null +++ b/tests/test_datetime_parsing.py @@ -0,0 +1,73 @@ +from datetime import datetime, timedelta, timezone + +import pytest + +from data_diff.databases.base import _parse_datetime + + +class TestParseDatetime: + def test_standard_microsecond_format(self): + result = _parse_datetime("2022-06-03 12:24:35.123456") + assert result == datetime(2022, 6, 3, 12, 24, 35, 123456) + + def test_millisecond_precision(self): + result = _parse_datetime("2022-06-03 12:24:35.123") + assert result == datetime(2022, 6, 3, 12, 24, 35, 123000) + + def test_no_fractional_seconds(self): + result = _parse_datetime("2022-06-03 12:24:35") + assert result == datetime(2022, 6, 3, 12, 24, 35) + + def test_nanosecond_precision_truncated(self): + result = _parse_datetime("2022-06-03 12:24:35.123456789") + assert result == datetime(2022, 6, 3, 12, 24, 35, 123456) + + def test_seven_fractional_digits_truncated(self): + result = _parse_datetime("2022-06-03 12:24:35.1234567") + assert result == datetime(2022, 6, 3, 12, 24, 35, 123456) + + def test_trailing_whitespace(self): + result = _parse_datetime("2022-06-03 12:24:35.123456 ") + assert result == datetime(2022, 6, 3, 12, 24, 35, 123456) + + def test_leading_whitespace(self): + result = _parse_datetime(" 2022-06-03 12:24:35.123456") + assert result == datetime(2022, 6, 3, 12, 24, 35, 123456) + + def test_timezone_offset(self): + result = _parse_datetime("2022-06-03T12:24:35+00:00") + assert result == datetime(2022, 6, 3, 12, 24, 35, tzinfo=timezone.utc) + + def test_z_suffix_utc(self): + result = _parse_datetime("2022-06-03T12:24:35Z") + assert result == datetime(2022, 6, 3, 12, 24, 35, tzinfo=timezone.utc) + + def test_nanosecond_precision_with_timezone(self): + result = _parse_datetime("2022-06-03T12:24:35.123456789+05:30") + expected_tz = timezone(timedelta(hours=5, minutes=30)) + assert result == datetime(2022, 6, 3, 12, 24, 35, 123456, tzinfo=expected_tz) + + def test_nanosecond_precision_with_z_suffix(self): + result = _parse_datetime("2022-06-03T12:24:35.123456789Z") + assert result == datetime(2022, 6, 3, 12, 24, 35, 123456, tzinfo=timezone.utc) + + def test_all_nines_nanoseconds_truncates_not_rounds(self): + result = _parse_datetime("2022-06-03 23:59:59.999999999") + assert result == datetime(2022, 6, 3, 23, 59, 59, 999999) + + def test_negative_timezone_offset(self): + result = _parse_datetime("2022-06-03T12:24:35-05:00") + expected_tz = timezone(timedelta(hours=-5)) + assert result == datetime(2022, 6, 3, 12, 24, 35, tzinfo=expected_tz) + + def test_trailing_dot_raises(self): + with pytest.raises(ValueError): + _parse_datetime("2022-06-03T12:24:35.") + + def test_invalid_string_raises(self): + with pytest.raises(ValueError): + _parse_datetime("not-a-date") + + def test_empty_string_raises(self): + with pytest.raises(ValueError): + _parse_datetime("")