Skip to content

Commit 71fde30

Browse files
authored
Merge pull request #454 from sf-tcalhoun/diff_for_lists_fix
Fix for bug when diffing two lists with ignore_order and providing compare_func
2 parents a08d550 + d705a4b commit 71fde30

File tree

2 files changed

+118
-1
lines changed

2 files changed

+118
-1
lines changed

deepdiff/diff.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1127,7 +1127,9 @@ def defaultdict_orderedset():
11271127
pre_calced_distances = self._precalculate_numpy_arrays_distance(
11281128
hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type)
11291129

1130-
if hashes_added and hashes_removed and self.iterable_compare_func and len(hashes_added) > 1 and len(hashes_removed) > 1:
1130+
if hashes_added and hashes_removed \
1131+
and self.iterable_compare_func \
1132+
and len(hashes_added) > 0 and len(hashes_removed) > 0:
11311133
pre_calced_distances = self._precalculate_distance_by_custom_compare_func(
11321134
hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type)
11331135

tests/test_ignore_order.py

+115
Original file line numberDiff line numberDiff line change
@@ -1136,6 +1136,121 @@ def compare_func(x, y, level=None):
11361136
assert expected_with_compare_func == ddiff2
11371137
assert ddiff != ddiff2
11381138

1139+
def test_ignore_order_with_compare_func_with_one_each_hashes_added_hashes_removed(self):
1140+
"""
1141+
Scenario:
1142+
In this example which demonstrates the problem... We have two dictionaries containing lists for
1143+
individualNames. Each list contains exactly 2 elements. The effective change is that we are
1144+
replacing the 2nd element in the list.
1145+
NOTE: This is considered a REPLACEMENT of the second element and not an UPDATE of the element
1146+
because we are providing a custom compare_func which will determine matching elements based on
1147+
the value of the nameIdentifier field. If the custom compare_func is not used, then
1148+
deepdiff.diff will mistakenly treat the difference as being individual field updates for every
1149+
field in the second element of the list.
1150+
1151+
Intent:
1152+
Use our custom compare_func, since we have provided it.
1153+
We need to fall into self._precalculate_distance_by_custom_compare_func
1154+
To do this, we are proposing a change to deepdiff.diff line 1128:
1155+
1156+
Original:
1157+
if hashes_added and hashes_removed and self.iterable_compare_func and len(hashes_added) > 1 and len(hashes_removed) > 1:
1158+
1159+
Proposed/Updated:
1160+
if hashes_added and hashes_removed \
1161+
and self.iterable_compare_func \
1162+
and len(hashes_added) > 0 and len(hashes_removed) > 0:
1163+
1164+
NOTE: It is worth mentioning that deepdiff.diff line 1121, might also benefit by changing the length conditions
1165+
to evaluate for > 0 (rather than > 1).
1166+
"""
1167+
1168+
t1 = {
1169+
"individualNames": [
1170+
{
1171+
"firstName": "Johnathan",
1172+
"lastName": "Doe",
1173+
"prefix": "COLONEL",
1174+
"middleName": "A",
1175+
"primaryIndicator": True,
1176+
"professionalDesignation": "PHD",
1177+
"suffix": "SR",
1178+
"nameIdentifier": "00001"
1179+
},
1180+
{
1181+
"firstName": "John",
1182+
"lastName": "Doe",
1183+
"prefix": "",
1184+
"middleName": "",
1185+
"primaryIndicator": False,
1186+
"professionalDesignation": "",
1187+
"suffix": "SR",
1188+
"nameIdentifier": "00002"
1189+
}
1190+
]
1191+
}
1192+
1193+
t2 = {
1194+
"individualNames": [
1195+
{
1196+
"firstName": "Johnathan",
1197+
"lastName": "Doe",
1198+
"prefix": "COLONEL",
1199+
"middleName": "A",
1200+
"primaryIndicator": True,
1201+
"professionalDesignation": "PHD",
1202+
"suffix": "SR",
1203+
"nameIdentifier": "00001"
1204+
},
1205+
{
1206+
"firstName": "Johnny",
1207+
"lastName": "Doe",
1208+
"prefix": "",
1209+
"middleName": "A",
1210+
"primaryIndicator": False,
1211+
"professionalDesignation": "",
1212+
"suffix": "SR",
1213+
"nameIdentifier": "00003"
1214+
}
1215+
]
1216+
}
1217+
def compare_func(item1, item2, level=None):
1218+
print("*** inside compare ***")
1219+
it1_keys = item1.keys()
1220+
1221+
try:
1222+
1223+
# --- individualNames ---
1224+
if 'nameIdentifier' in it1_keys and 'lastName' in it1_keys:
1225+
match_result = item1['nameIdentifier'] == item2['nameIdentifier']
1226+
print("individualNames - matching result:", match_result)
1227+
return match_result
1228+
else:
1229+
print("Unknown list item...", "matching result:", item1 == item2)
1230+
return item1 == item2
1231+
except Exception:
1232+
raise CannotCompare() from None
1233+
# ---------------------------- End of nested function
1234+
1235+
actual_diff = DeepDiff(t1, t2, report_repetition=True,
1236+
ignore_order=True, iterable_compare_func=compare_func, cutoff_intersection_for_pairs=1)
1237+
1238+
old_invalid_diff = {
1239+
'values_changed': {"root['individualNames'][1]['firstName']": {'new_value': 'Johnny', 'old_value': 'John'},
1240+
"root['individualNames'][1]['middleName']": {'new_value': 'A', 'old_value': ''},
1241+
"root['individualNames'][1]['nameIdentifier']": {'new_value': '00003',
1242+
'old_value': '00002'}}}
1243+
new_expected_diff = {'iterable_item_added': {
1244+
"root['individualNames'][1]": {'firstName': 'Johnny', 'lastName': 'Doe', 'prefix': '', 'middleName': 'A',
1245+
'primaryIndicator': False, 'professionalDesignation': '', 'suffix': 'SR',
1246+
'nameIdentifier': '00003'}}, 'iterable_item_removed': {
1247+
"root['individualNames'][1]": {'firstName': 'John', 'lastName': 'Doe', 'prefix': '', 'middleName': '',
1248+
'primaryIndicator': False, 'professionalDesignation': '', 'suffix': 'SR',
1249+
'nameIdentifier': '00002'}}}
1250+
1251+
assert old_invalid_diff != actual_diff
1252+
assert new_expected_diff == actual_diff
1253+
11391254

11401255
class TestDynamicIgnoreOrder:
11411256
def test_ignore_order_func(self):

0 commit comments

Comments
 (0)