Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 37 additions & 2 deletions data_diff/databases/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,27 @@
logger = logging.getLogger("database")


def _parse_datetime(s: str) -> datetime:
"""Parse an ISO 8601 datetime string with the following normalizations:

- Strips leading/trailing whitespace
- Converts 'Z' timezone suffix to '+00:00' for Python 3.10 compatibility
- Truncates sub-microsecond precision (>6 fractional digits) to microseconds
"""
s = s.strip()
if s.endswith("Z"):
s = s[:-1] + "+00:00"
dot = s.rfind(".")
if dot != -1:
frac_end = dot + 1
while frac_end < len(s) and s[frac_end].isdigit():
frac_end += 1
frac_digits = frac_end - dot - 1
if frac_digits > 6:
s = s[: dot + 7] + s[frac_end:]
return datetime.fromisoformat(s)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Normalize trailing Z suffix before parsing datetime

pyproject.toml supports Python 3.10, but datetime.fromisoformat() on 3.10 rejects RFC3339 UTC strings ending in Z. _parse_datetime() now keeps timezone suffixes after truncating fractional digits, so inputs like 2022-06-03T12:24:35.123456789Z raise ValueError in supported runtimes when a driver returns UTC timestamps in Z form; the previous res[:23] path dropped that suffix and parsed successfully. This turns previously accepted query results into runtime failures for query(..., datetime).

Useful? React with 👍 / 👎.



class CompileError(Exception):
pass

Expand Down Expand Up @@ -985,9 +1006,23 @@ def query(self, sql_ast: Expr | Generator, res_type: type = None, log_message: s
return None
return int(res)
elif res_type is datetime:
res = _one(_one(res))
if not res:
raise ValueError("Datetime query returned 0 rows, expected 1")
row = _one(res)
if not row:
raise ValueError("Datetime query row is empty, expected 1 column")
res = _one(row)
if isinstance(res, str):
res = datetime.fromisoformat(res[:23]) # TODO use a better parsing method
try:
res = _parse_datetime(res)
except ValueError:
logger.error(
"Failed to parse datetime string returned by database %s: %r (sql: %s)",
self.name,
res,
sql_code,
)
raise
return res
elif res_type is tuple:
if len(res) != 1:
Expand Down
73 changes: 73 additions & 0 deletions tests/test_datetime_parsing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from datetime import datetime, timedelta, timezone

import pytest

from data_diff.databases.base import _parse_datetime


class TestParseDatetime:
def test_standard_microsecond_format(self):
result = _parse_datetime("2022-06-03 12:24:35.123456")
assert result == datetime(2022, 6, 3, 12, 24, 35, 123456)

def test_millisecond_precision(self):
result = _parse_datetime("2022-06-03 12:24:35.123")
assert result == datetime(2022, 6, 3, 12, 24, 35, 123000)

def test_no_fractional_seconds(self):
result = _parse_datetime("2022-06-03 12:24:35")
assert result == datetime(2022, 6, 3, 12, 24, 35)

def test_nanosecond_precision_truncated(self):
result = _parse_datetime("2022-06-03 12:24:35.123456789")
assert result == datetime(2022, 6, 3, 12, 24, 35, 123456)

def test_seven_fractional_digits_truncated(self):
result = _parse_datetime("2022-06-03 12:24:35.1234567")
assert result == datetime(2022, 6, 3, 12, 24, 35, 123456)

def test_trailing_whitespace(self):
result = _parse_datetime("2022-06-03 12:24:35.123456 ")
assert result == datetime(2022, 6, 3, 12, 24, 35, 123456)

def test_leading_whitespace(self):
result = _parse_datetime(" 2022-06-03 12:24:35.123456")
assert result == datetime(2022, 6, 3, 12, 24, 35, 123456)

def test_timezone_offset(self):
result = _parse_datetime("2022-06-03T12:24:35+00:00")
assert result == datetime(2022, 6, 3, 12, 24, 35, tzinfo=timezone.utc)

def test_z_suffix_utc(self):
result = _parse_datetime("2022-06-03T12:24:35Z")
assert result == datetime(2022, 6, 3, 12, 24, 35, tzinfo=timezone.utc)

def test_nanosecond_precision_with_timezone(self):
result = _parse_datetime("2022-06-03T12:24:35.123456789+05:30")
expected_tz = timezone(timedelta(hours=5, minutes=30))
assert result == datetime(2022, 6, 3, 12, 24, 35, 123456, tzinfo=expected_tz)

def test_nanosecond_precision_with_z_suffix(self):
result = _parse_datetime("2022-06-03T12:24:35.123456789Z")
assert result == datetime(2022, 6, 3, 12, 24, 35, 123456, tzinfo=timezone.utc)

def test_all_nines_nanoseconds_truncates_not_rounds(self):
result = _parse_datetime("2022-06-03 23:59:59.999999999")
assert result == datetime(2022, 6, 3, 23, 59, 59, 999999)

def test_negative_timezone_offset(self):
result = _parse_datetime("2022-06-03T12:24:35-05:00")
expected_tz = timezone(timedelta(hours=-5))
assert result == datetime(2022, 6, 3, 12, 24, 35, tzinfo=expected_tz)

def test_trailing_dot_raises(self):
with pytest.raises(ValueError):
_parse_datetime("2022-06-03T12:24:35.")

def test_invalid_string_raises(self):
with pytest.raises(ValueError):
_parse_datetime("not-a-date")

def test_empty_string_raises(self):
with pytest.raises(ValueError):
_parse_datetime("")
Loading