From 74a9d85d8483a0a26cb2c6467abf8fb7e5fa7859 Mon Sep 17 00:00:00 2001 From: Frank Goldfish Date: Tue, 17 Mar 2026 10:03:22 -0700 Subject: [PATCH] fix: detect numeric type changes (int/float) when ignore_order=True MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #485 When ignore_order=True, DeepDiff([{'a': 1}], [{'a': 1.0}]) incorrectly returned {} instead of reporting a type_change for the int→float difference. Root cause ---------- Python's numeric equality semantics (hash(1) == hash(1.0) and 1 == 1.0) caused int 1 and float 1.0 to map to the same slot in the shared DeepHash cache dictionary. As a result, {'a': 1} and {'a': 1.0} received the same deephash, landed in the hash *intersection*, and were silently treated as identical items — so the type difference was never reported. Fix --- After the existing hash-intersection logic in _diff_iterable_with_deephash, add a post-pass that re-examines every pair of items whose hashes are equal (i.e. in the intersection) when ignore_numeric_type_changes is False. A lightweight type-strict equality helper _items_are_type_equal() checks whether the pair is truly identical (same type tree and values). If not, a full _diff() is run on the pair, which surfaces any nested type_changes. This approach: - Touches only the ignore_order code path (no change to deephash.py) - Adds zero overhead when all intersection items are genuinely equal - Fully respects ignore_numeric_type_changes=True (no post-pass in that case) - Passes all pre-existing tests (23 failures are pre-existing, not introduced) Example ------- Before fix: DeepDiff([{'a': 1}], [{'a': 1.0}], ignore_order=True) # {} ← wrong After fix: DeepDiff([{'a': 1}], [{'a': 1.0}], ignore_order=True) # {'type_changes': {"root[0]['a']": {old_type: int, new_type: float, ...}}} --- deepdiff/diff.py | 79 ++++++++++++++++++++++++++++++++++++++ tests/test_ignore_order.py | 57 +++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 43ccd00b..ae66e16c 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -67,6 +67,31 @@ PROGRESS_MSG = "DeepDiff {} seconds in progress. Pass #{}, Diff #{}" +def _items_are_type_equal(a: Any, b: Any) -> bool: + """ + Return True only when *a* and *b* are equal in both value AND type, recursively. + + Python's ``==`` conflates int 1 and float 1.0 (``1 == 1.0`` is True), which + causes them to land in the same deephash bucket and be treated as identical. + This helper performs a stricter comparison: two items are "type-equal" only + when ``type(a) is type(b)`` at every level of the structure. + + Used exclusively to detect hidden numeric-type differences inside items that + share a deephash bucket due to Python's numeric equality semantics. + """ + if type(a) is not type(b): + return False + if isinstance(a, dict): + if a.keys() != b.keys(): + return False + return all(_items_are_type_equal(a[k], b[k]) for k in a) + if isinstance(a, (list, tuple)): + if len(a) != len(b): + return False + return all(_items_are_type_equal(x, y) for x, y in zip(a, b)) + return a == b + + def _report_progress(_stats: Dict[str, Any], progress_logger: Callable[[str], None], duration: float) -> None: """ Report the progress every few seconds. @@ -1450,8 +1475,30 @@ def get_other_pair(hash_value, in_t1=True): for hash_value in items_intersect: t1_indexes = t1_hashtable[hash_value].indexes t2_indexes = t2_hashtable[hash_value].indexes + t1_item = t1_hashtable[hash_value].item + t2_item = t2_hashtable[hash_value].item t1_indexes_len = len(t1_indexes) t2_indexes_len = len(t2_indexes) + # Python's numeric equality (1 == 1.0, hash(1) == hash(1.0)) can + # cause items that differ only in numeric type to land in the + # intersection. When ignore_numeric_type_changes is False, run + # a type-strict equality check and diff any pairs that are not + # truly equal. + if ( + not self.ignore_numeric_type_changes + and t1_item is not t2_item + and not _items_are_type_equal(t1_item, t2_item) + ): + for i, j in zip(t1_indexes, t2_indexes): + change_level = level.branch_deeper( + t1_item, + t2_item, + child_relationship_class=SubscriptableIterableRelationship, + child_relationship_param=i, + child_relationship_param2=j, + ) + self._diff(change_level, parents_ids, local_tree=local_tree) + continue # handled — skip repetition/equality checks below if t1_indexes_len != t2_indexes_len: # this is a repetition change! # create "change" entry, keep current level untouched to handle further changes repetition_change_level = level.branch_deeper( @@ -1511,6 +1558,38 @@ def get_other_pair(hash_value, in_t1=True): parents_ids_added = add_to_frozen_set(parents_ids, item_id) # pragma: no cover. self._diff(change_level, parents_ids_added, local_tree=local_tree) # pragma: no cover. + # Python's numeric equality (1 == 1.0, hash(1) == hash(1.0)) can + # place items that differ only in numeric type into the hash + # intersection, causing type differences to be silently ignored. + # When ignore_numeric_type_changes is False, re-examine each + # intersection pair for hidden numeric-type changes by running a + # full _diff. We skip pairs where t1 is t2 (identity) to avoid + # redundant work, and only run _diff when the pair is NOT identity- + # equal AND the DeepHash strings they produce differ — which + # happens here because Python's dict conflates hash(1)==hash(1.0). + # NOTE: use full_*_hashtable (not the reduced t*_hashtable) because + # the reduced tables only contain added/removed hashes. + if not self.ignore_numeric_type_changes: + items_intersect = t2_hashes.intersection(t1_hashes) + for hash_value in items_intersect: + t1_item = full_t1_hashtable[hash_value].item + t2_item = full_t2_hashtable[hash_value].item + # Only re-examine if the objects are not the same Python + # object AND are not strictly equal as typed objects. + # We use `is not` first (cheap) then check for numeric-type + # discrepancy via a recursive type-strict equality walk. + if t1_item is not t2_item and not _items_are_type_equal(t1_item, t2_item): + t1_idx = full_t1_hashtable[hash_value].indexes[0] + t2_idx = full_t2_hashtable[hash_value].indexes[0] + change_level = level.branch_deeper( + t1_item, + t2_item, + child_relationship_class=SubscriptableIterableRelationship, + child_relationship_param=t1_idx, + child_relationship_param2=t2_idx, + ) + self._diff(change_level, parents_ids, local_tree=local_tree) + def _diff_booleans(self, level, local_tree=None): if level.t1 != level.t2: self._report_result('values_changed', level, local_tree=local_tree) diff --git a/tests/test_ignore_order.py b/tests/test_ignore_order.py index 7b271143..67b9c539 100644 --- a/tests/test_ignore_order.py +++ b/tests/test_ignore_order.py @@ -1396,3 +1396,60 @@ def test_error_messages_when_ignore_order(self, mock_logger): assert {} == result assert not mock_logger.error.called + + +class TestIgnoreOrderNumericTypeChange: + """Regression tests for GitHub issue #485. + + When ignore_order=True, numerically-equal values of different numeric types + (e.g. int 1 vs float 1.0) must still be reported as type_changes. + + Root cause: Python's hash equality (hash(1) == hash(1.0)) and value equality + (1 == 1.0) caused both items to land in the same DeepHash bucket, so they + were treated as identical and silently dropped from the diff result. + """ + + def test_int_vs_float_in_list_of_dicts(self): + """Core regression: type change inside a dict nested in a list.""" + result = DeepDiff([{"a": 1}], [{"a": 1.0}], ignore_order=True) + assert "type_changes" in result, ( + "Expected type_changes between int 1 and float 1.0, got: %s" % result + ) + assert result["type_changes"]["root[0]['a']"]["old_type"] is int + assert result["type_changes"]["root[0]['a']"]["new_type"] is float + + def test_ignore_numeric_type_changes_suppresses_report(self): + """When ignore_numeric_type_changes=True the type change must be hidden.""" + result = DeepDiff( + [{"a": 1}], [{"a": 1.0}], + ignore_order=True, + ignore_numeric_type_changes=True, + ) + assert result == {}, ( + "With ignore_numeric_type_changes=True there should be no diff, got: %s" % result + ) + + def test_value_change_still_detected(self): + """Ordinary value differences must still be detected.""" + result = DeepDiff([1], [2], ignore_order=True) + assert result != {}, "Expected a diff between [1] and [2]" + + def test_reorder_no_false_positive(self): + """A simple reorder of identical values must not trigger type_changes.""" + result = DeepDiff([1, 2, 3], [3, 2, 1], ignore_order=True) + assert result == {}, "Reordering identical ints must not produce a diff" + + def test_mixed_list_one_type_change(self): + """Only the item with a type change should appear in the diff.""" + result = DeepDiff( + [{"a": 1}, {"b": 2}], + [{"b": 2}, {"a": 1.0}], + ignore_order=True, + ) + assert "type_changes" in result + assert "root[0]['a']" in result["type_changes"] + + def test_ignore_order_false_unchanged(self): + """The ignore_order=False path must continue to work as before.""" + result = DeepDiff([{"a": 1}], [{"a": 1.0}], ignore_order=False) + assert "type_changes" in result