Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions deepdiff/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,31 @@
PROGRESS_MSG = "DeepDiff {} seconds in progress. Pass #{}, Diff #{}"


def _items_are_type_equal(a: Any, b: Any) -> bool:
"""
Return True only when *a* and *b* are equal in both value AND type, recursively.

Python's ``==`` conflates int 1 and float 1.0 (``1 == 1.0`` is True), which
causes them to land in the same deephash bucket and be treated as identical.
This helper performs a stricter comparison: two items are "type-equal" only
when ``type(a) is type(b)`` at every level of the structure.

Used exclusively to detect hidden numeric-type differences inside items that
share a deephash bucket due to Python's numeric equality semantics.
"""
if type(a) is not type(b):
return False
if isinstance(a, dict):
if a.keys() != b.keys():
return False
return all(_items_are_type_equal(a[k], b[k]) for k in a)
if isinstance(a, (list, tuple)):
if len(a) != len(b):
return False
return all(_items_are_type_equal(x, y) for x, y in zip(a, b))
return a == b


def _report_progress(_stats: Dict[str, Any], progress_logger: Callable[[str], None], duration: float) -> None:
"""
Report the progress every few seconds.
Expand Down Expand Up @@ -1450,8 +1475,30 @@ def get_other_pair(hash_value, in_t1=True):
for hash_value in items_intersect:
t1_indexes = t1_hashtable[hash_value].indexes
t2_indexes = t2_hashtable[hash_value].indexes
t1_item = t1_hashtable[hash_value].item
t2_item = t2_hashtable[hash_value].item
t1_indexes_len = len(t1_indexes)
t2_indexes_len = len(t2_indexes)
# Python's numeric equality (1 == 1.0, hash(1) == hash(1.0)) can
# cause items that differ only in numeric type to land in the
# intersection. When ignore_numeric_type_changes is False, run
# a type-strict equality check and diff any pairs that are not
# truly equal.
if (
not self.ignore_numeric_type_changes
and t1_item is not t2_item
and not _items_are_type_equal(t1_item, t2_item)
):
for i, j in zip(t1_indexes, t2_indexes):
change_level = level.branch_deeper(
t1_item,
t2_item,
child_relationship_class=SubscriptableIterableRelationship,
child_relationship_param=i,
child_relationship_param2=j,
)
self._diff(change_level, parents_ids, local_tree=local_tree)
continue # handled — skip repetition/equality checks below
if t1_indexes_len != t2_indexes_len: # this is a repetition change!
# create "change" entry, keep current level untouched to handle further changes
repetition_change_level = level.branch_deeper(
Expand Down Expand Up @@ -1511,6 +1558,38 @@ def get_other_pair(hash_value, in_t1=True):
parents_ids_added = add_to_frozen_set(parents_ids, item_id) # pragma: no cover.
self._diff(change_level, parents_ids_added, local_tree=local_tree) # pragma: no cover.

# Python's numeric equality (1 == 1.0, hash(1) == hash(1.0)) can
# place items that differ only in numeric type into the hash
# intersection, causing type differences to be silently ignored.
# When ignore_numeric_type_changes is False, re-examine each
# intersection pair for hidden numeric-type changes by running a
# full _diff. We skip pairs where t1 is t2 (identity) to avoid
# redundant work, and only run _diff when the pair is NOT identity-
# equal AND the DeepHash strings they produce differ — which
# happens here because Python's dict conflates hash(1)==hash(1.0).
# NOTE: use full_*_hashtable (not the reduced t*_hashtable) because
# the reduced tables only contain added/removed hashes.
if not self.ignore_numeric_type_changes:
items_intersect = t2_hashes.intersection(t1_hashes)
for hash_value in items_intersect:
t1_item = full_t1_hashtable[hash_value].item
t2_item = full_t2_hashtable[hash_value].item
# Only re-examine if the objects are not the same Python
# object AND are not strictly equal as typed objects.
# We use `is not` first (cheap) then check for numeric-type
# discrepancy via a recursive type-strict equality walk.
if t1_item is not t2_item and not _items_are_type_equal(t1_item, t2_item):
t1_idx = full_t1_hashtable[hash_value].indexes[0]
t2_idx = full_t2_hashtable[hash_value].indexes[0]
change_level = level.branch_deeper(
t1_item,
t2_item,
child_relationship_class=SubscriptableIterableRelationship,
child_relationship_param=t1_idx,
child_relationship_param2=t2_idx,
)
self._diff(change_level, parents_ids, local_tree=local_tree)

def _diff_booleans(self, level, local_tree=None):
if level.t1 != level.t2:
self._report_result('values_changed', level, local_tree=local_tree)
Expand Down
57 changes: 57 additions & 0 deletions tests/test_ignore_order.py
Original file line number Diff line number Diff line change
Expand Up @@ -1396,3 +1396,60 @@ def test_error_messages_when_ignore_order(self, mock_logger):
assert {} == result

assert not mock_logger.error.called


class TestIgnoreOrderNumericTypeChange:
"""Regression tests for GitHub issue #485.

When ignore_order=True, numerically-equal values of different numeric types
(e.g. int 1 vs float 1.0) must still be reported as type_changes.

Root cause: Python's hash equality (hash(1) == hash(1.0)) and value equality
(1 == 1.0) caused both items to land in the same DeepHash bucket, so they
were treated as identical and silently dropped from the diff result.
"""

def test_int_vs_float_in_list_of_dicts(self):
"""Core regression: type change inside a dict nested in a list."""
result = DeepDiff([{"a": 1}], [{"a": 1.0}], ignore_order=True)
assert "type_changes" in result, (
"Expected type_changes between int 1 and float 1.0, got: %s" % result
)
assert result["type_changes"]["root[0]['a']"]["old_type"] is int
assert result["type_changes"]["root[0]['a']"]["new_type"] is float

def test_ignore_numeric_type_changes_suppresses_report(self):
"""When ignore_numeric_type_changes=True the type change must be hidden."""
result = DeepDiff(
[{"a": 1}], [{"a": 1.0}],
ignore_order=True,
ignore_numeric_type_changes=True,
)
assert result == {}, (
"With ignore_numeric_type_changes=True there should be no diff, got: %s" % result
)

def test_value_change_still_detected(self):
"""Ordinary value differences must still be detected."""
result = DeepDiff([1], [2], ignore_order=True)
assert result != {}, "Expected a diff between [1] and [2]"

def test_reorder_no_false_positive(self):
"""A simple reorder of identical values must not trigger type_changes."""
result = DeepDiff([1, 2, 3], [3, 2, 1], ignore_order=True)
assert result == {}, "Reordering identical ints must not produce a diff"

def test_mixed_list_one_type_change(self):
"""Only the item with a type change should appear in the diff."""
result = DeepDiff(
[{"a": 1}, {"b": 2}],
[{"b": 2}, {"a": 1.0}],
ignore_order=True,
)
assert "type_changes" in result
assert "root[0]['a']" in result["type_changes"]

def test_ignore_order_false_unchanged(self):
"""The ignore_order=False path must continue to work as before."""
result = DeepDiff([{"a": 1}], [{"a": 1.0}], ignore_order=False)
assert "type_changes" in result