Skip to content

Commit a62abc1

Browse files
authored
Merge pull request #460 from seperman/dev
8.0.0
2 parents be22027 + 8a7a004 commit a62abc1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+818
-340
lines changed

.github/workflows/main.yaml

+26-14
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,28 @@ jobs:
1212
runs-on: ubuntu-latest
1313
strategy:
1414
matrix:
15-
python-version: [3.8, 3.9, "3.10", "3.11", "3.12"]
15+
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
1616
architecture: ["x64"]
17-
include:
18-
- python-version: "3.10"
19-
numpy-version: "2.0.dev"
2017
steps:
2118
- uses: actions/checkout@v2
2219
- name: Setup Python ${{ matrix.python-version }} on ${{ matrix.architecture }}
2320
uses: actions/setup-python@v2
2421
with:
2522
python-version: ${{ matrix.python-version }}
2623
architecture: ${{ matrix.architecture }}
24+
- name: Cache pip 3.8
25+
if: matrix.python-version == 3.8
26+
uses: actions/cache@v2
27+
with:
28+
# This path is specific to Ubuntu
29+
path: ~/.cache/pip
30+
# Look to see if there is a cache hit for the corresponding requirements file
31+
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}-${{ hashFiles('requirements-dev3.8.txt') }}
32+
restore-keys: |
33+
${{ runner.os }}-pip-
34+
${{ runner.os }}-
2735
- name: Cache pip
36+
if: matrix.python-version != 3.8
2837
uses: actions/cache@v2
2938
with:
3039
# This path is specific to Ubuntu
@@ -40,28 +49,31 @@ jobs:
4049
# workaround for 3.12, SEE: https://github.com/pypa/setuptools/issues/3661#issuecomment-1813845177
4150
pip install --upgrade setuptools
4251
- name: Install dependencies
52+
if: matrix.python-version != 3.8
4353
run: pip install -r requirements-dev.txt
44-
- name: Install Numpy Dev
45-
if: ${{ matrix.numpy-version }}
46-
run: pip install -I --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple "numpy>=0.0.dev0"
54+
- name: Install dependencies
55+
if: matrix.python-version == 3.8
56+
run: pip install -r requirements-dev3.8.txt
4757
- name: Lint with flake8
48-
if: matrix.python-version == 3.11
58+
if: matrix.python-version == 3.12
4959
run: |
5060
# stop the build if there are Python syntax errors or undefined names
5161
flake8 deepdiff --count --select=E9,F63,F7,F82 --show-source --statistics
5262
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
5363
flake8 deepdiff --count --exit-zero --max-complexity=26 --max-line-lengt=250 --statistics
5464
- name: Test with pytest and get the coverage
55-
if: matrix.python-version == 3.11
65+
if: matrix.python-version == 3.12
5666
run: |
57-
pytest --cov-report=xml --cov=deepdiff tests/ --runslow
67+
pytest --benchmark-disable --cov-report=xml --cov=deepdiff tests/ --runslow
5868
- name: Test with pytest and no coverage report
59-
if: matrix.python-version != 3.11
69+
if: matrix.python-version != 3.12
6070
run: |
61-
pytest
71+
pytest --benchmark-disable
6272
- name: Upload coverage to Codecov
63-
uses: codecov/codecov-action@v3
64-
if: matrix.python-version == 3.11
73+
uses: codecov/codecov-action@v4
74+
if: matrix.python-version == 3.12
75+
env:
76+
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
6577
with:
6678
file: ./coverage.xml
6779
token: ${{ secrets.CODECOV_TOKEN }}

CITATION.cff

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,6 @@ authors:
55
given-names: "Sep"
66
orcid: "https://orcid.org/0009-0009-5828-4345"
77
title: "DeepDiff"
8-
version: 7.0.1
8+
version: 8.0.0
99
date-released: 2024
1010
url: "https://github.com/seperman/deepdiff"

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# DeepDiff v 7.0.1
1+
# DeepDiff v 8.0.0
22

33
![Downloads](https://img.shields.io/pypi/dm/deepdiff.svg?style=flat)
44
![Python Versions](https://img.shields.io/pypi/pyversions/deepdiff.svg?style=flat)
@@ -17,7 +17,7 @@
1717

1818
Tested on Python 3.8+ and PyPy3.
1919

20-
- **[Documentation](https://zepworks.com/deepdiff/7.0.1/)**
20+
- **[Documentation](https://zepworks.com/deepdiff/8.0.0/)**
2121

2222
## What is new?
2323

deepdiff/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""This module offers the DeepDiff, DeepSearch, grep, Delta and DeepHash classes."""
22
# flake8: noqa
3-
__version__ = '7.0.1'
3+
__version__ = '8.0.0'
44
import logging
55

66
if __name__ == '__main__':

deepdiff/anyset.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
from ordered_set import OrderedSet
21
from deepdiff.deephash import DeepHash
3-
from deepdiff.helper import dict_
2+
from deepdiff.helper import dict_, SetOrdered
43

54

65
class AnySet:
@@ -11,7 +10,7 @@ class AnySet:
1110
However one the AnySet object is deleted, all those traces will be gone too.
1211
"""
1312
def __init__(self, items=None):
14-
self._set = OrderedSet()
13+
self._set = SetOrdered()
1514
self._hashes = dict_()
1615
self._hash_to_objects = dict_()
1716
if items:

deepdiff/base.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
from ordered_set import OrderedSet
2-
from deepdiff.helper import strings, numbers
1+
from deepdiff.helper import strings, numbers, SetOrdered
32

43

54
DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES = 12
@@ -31,18 +30,18 @@ def get_ignore_types_in_groups(self, ignore_type_in_groups,
3130

3231
result = []
3332
for item_group in ignore_type_in_groups:
34-
new_item_group = OrderedSet()
33+
new_item_group = SetOrdered()
3534
for item in item_group:
3635
item = type(item) if item is None or not isinstance(item, type) else item
3736
new_item_group.add(item)
3837
result.append(new_item_group)
3938
ignore_type_in_groups = result
4039

4140
if ignore_string_type_changes and self.strings not in ignore_type_in_groups:
42-
ignore_type_in_groups.append(OrderedSet(self.strings))
41+
ignore_type_in_groups.append(SetOrdered(self.strings))
4342

4443
if ignore_numeric_type_changes and self.numbers not in ignore_type_in_groups:
45-
ignore_type_in_groups.append(OrderedSet(self.numbers))
44+
ignore_type_in_groups.append(SetOrdered(self.numbers))
4645

4746
if not ignore_type_subclasses:
4847
# is_instance method needs tuples. When we look for subclasses, we need them to be tuples

deepdiff/commands.py

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def cli():
4747
@click.option('--log-frequency-in-sec', required=False, default=0, type=int, show_default=True)
4848
@click.option('--max-passes', required=False, default=10000000, type=int, show_default=True)
4949
@click.option('--max_diffs', required=False, default=None, type=int, show_default=True)
50+
@click.option('--threshold-to-diff-deeper', required=False, default=0.33, type=float, show_default=False)
5051
@click.option('--number-format-notation', required=False, type=click.Choice(['f', 'e'], case_sensitive=True), show_default=True, default="f")
5152
@click.option('--progress-logger', required=False, type=click.Choice(['info', 'error'], case_sensitive=True), show_default=True, default="info")
5253
@click.option('--report-repetition', is_flag=True, show_default=True)

deepdiff/deephash.py

+36-7
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,17 @@
1414
number_to_string, datetime_normalize, KEY_TO_VAL_STR, short_repr,
1515
get_truncate_datetime, dict_, add_root_to_paths)
1616
from deepdiff.base import Base
17+
18+
try:
19+
import pandas
20+
except ImportError:
21+
pandas = False
22+
23+
try:
24+
import polars
25+
except ImportError:
26+
polars = False
27+
1728
logger = logging.getLogger(__name__)
1829

1930
UNPROCESSED_KEY = object()
@@ -139,6 +150,7 @@ def __init__(self,
139150
ignore_numeric_type_changes=False,
140151
ignore_type_subclasses=False,
141152
ignore_string_case=False,
153+
use_enum_value=False,
142154
exclude_obj_callback=None,
143155
number_to_string_func=None,
144156
ignore_private_variables=True,
@@ -154,7 +166,7 @@ def __init__(self,
154166
"exclude_paths, include_paths, exclude_regex_paths, hasher, ignore_repetition, "
155167
"number_format_notation, apply_hash, ignore_type_in_groups, ignore_string_type_changes, "
156168
"ignore_numeric_type_changes, ignore_type_subclasses, ignore_string_case "
157-
"number_to_string_func, ignore_private_variables, parent "
169+
"number_to_string_func, ignore_private_variables, parent, use_enum_value "
158170
"encodings, ignore_encoding_errors") % ', '.join(kwargs.keys()))
159171
if isinstance(hashes, MutableMapping):
160172
self.hashes = hashes
@@ -170,6 +182,7 @@ def __init__(self,
170182
self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths)
171183
self.hasher = default_hasher if hasher is None else hasher
172184
self.hashes[UNPROCESSED_KEY] = []
185+
self.use_enum_value = use_enum_value
173186

174187
self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes)
175188
self.truncate_datetime = get_truncate_datetime(truncate_datetime)
@@ -206,10 +219,10 @@ def __init__(self,
206219
sha1hex = sha1hex
207220

208221
def __getitem__(self, obj, extract_index=0):
209-
return self._getitem(self.hashes, obj, extract_index=extract_index)
222+
return self._getitem(self.hashes, obj, extract_index=extract_index, use_enum_value=self.use_enum_value)
210223

211224
@staticmethod
212-
def _getitem(hashes, obj, extract_index=0):
225+
def _getitem(hashes, obj, extract_index=0, use_enum_value=False):
213226
"""
214227
extract_index is zero for hash and 1 for count and None to get them both.
215228
To keep it backward compatible, we only get the hash by default so it is set to zero by default.
@@ -220,6 +233,8 @@ def _getitem(hashes, obj, extract_index=0):
220233
key = BoolObj.TRUE
221234
elif obj is False:
222235
key = BoolObj.FALSE
236+
elif use_enum_value and isinstance(obj, Enum):
237+
key = obj.value
223238

224239
result_n_count = (None, 0)
225240

@@ -256,14 +271,14 @@ def get(self, key, default=None, extract_index=0):
256271
return self.get_key(self.hashes, key, default=default, extract_index=extract_index)
257272

258273
@staticmethod
259-
def get_key(hashes, key, default=None, extract_index=0):
274+
def get_key(hashes, key, default=None, extract_index=0, use_enum_value=False):
260275
"""
261276
get_key method for the hashes dictionary.
262277
It can extract the hash for a given key that is already calculated when extract_index=0
263278
or the count of items that went to building the object whenextract_index=1.
264279
"""
265280
try:
266-
result = DeepHash._getitem(hashes, key, extract_index=extract_index)
281+
result = DeepHash._getitem(hashes, key, extract_index=extract_index, use_enum_value=use_enum_value)
267282
except KeyError:
268283
result = default
269284
return result
@@ -444,7 +459,6 @@ def _prep_path(self, obj):
444459
type_ = obj.__class__.__name__
445460
return KEY_TO_VAL_STR.format(type_, obj)
446461

447-
448462
def _prep_number(self, obj):
449463
type_ = "number" if self.ignore_numeric_type_changes else obj.__class__.__name__
450464
if self.significant_digits is not None:
@@ -475,12 +489,14 @@ def _prep_tuple(self, obj, parent, parents_ids):
475489
return result, counts
476490

477491
def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET):
478-
"""The main diff method"""
492+
"""The main hash method"""
479493
counts = 1
480494

481495
if isinstance(obj, bool):
482496
obj = self._prep_bool(obj)
483497
result = None
498+
elif self.use_enum_value and isinstance(obj, Enum):
499+
obj = obj.value
484500
else:
485501
result = not_hashed
486502
try:
@@ -523,6 +539,19 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET):
523539
elif isinstance(obj, tuple):
524540
result, counts = self._prep_tuple(obj=obj, parent=parent, parents_ids=parents_ids)
525541

542+
elif (pandas and isinstance(obj, pandas.DataFrame)):
543+
def gen():
544+
yield ('dtype', obj.dtypes)
545+
yield ('index', obj.index)
546+
yield from obj.items() # which contains (column name, series tuples)
547+
result, counts = self._prep_iterable(obj=gen(), parent=parent, parents_ids=parents_ids)
548+
elif (polars and isinstance(obj, polars.DataFrame)):
549+
def gen():
550+
yield from obj.columns
551+
yield from list(obj.schema.items())
552+
yield from obj.rows()
553+
result, counts = self._prep_iterable(obj=gen(), parent=parent, parents_ids=parents_ids)
554+
526555
elif isinstance(obj, Iterable):
527556
result, counts = self._prep_iterable(obj=obj, parent=parent, parents_ids=parents_ids)
528557

deepdiff/delta.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from functools import partial, cmp_to_key
55
from collections.abc import Mapping
66
from copy import deepcopy
7-
from ordered_set import OrderedSet
87
from deepdiff import DeepDiff
98
from deepdiff.serialization import pickle_load, pickle_dump
109
from deepdiff.helper import (
@@ -14,6 +13,7 @@
1413
Opcode, FlatDeltaRow, UnkownValueCode, FlatDataAction,
1514
OPCODE_TAG_TO_FLAT_DATA_ACTION,
1615
FLAT_DATA_ACTION_TO_OPCODE_TAG,
16+
SetOrdered,
1717
)
1818
from deepdiff.path import (
1919
_path_to_elements, _get_nested_obj, _get_nested_obj_and_force,
@@ -744,7 +744,7 @@ def _do_ignore_order(self):
744744
"""
745745
fixed_indexes = self.diff.get('iterable_items_added_at_indexes', dict_())
746746
remove_indexes = self.diff.get('iterable_items_removed_at_indexes', dict_())
747-
paths = OrderedSet(fixed_indexes.keys()) | OrderedSet(remove_indexes.keys())
747+
paths = SetOrdered(fixed_indexes.keys()) | SetOrdered(remove_indexes.keys())
748748
for path in paths:
749749
# In the case of ignore_order reports, we are pointing to the container object.
750750
# Thus we add a [0] to the elements so we can get the required objects and discard what we don't need.

0 commit comments

Comments
 (0)