Skip to content

Add pickling of dict_keys, dict_values, dict_items #384

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions cloudpickle/cloudpickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -828,3 +828,15 @@ def _get_bases(typ):
# For regular class objects
bases_attr = '__bases__'
return getattr(typ, bases_attr)


def _make_dict_keys(obj):
return dict.fromkeys(obj).keys()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure it's worth rebuilding a fake dict with None valued values. Maybe we could just ship the list of keys and values directly?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One reason not to leave these as just a list of keys is that dict_keys objects are set-like and support set type operations: https://docs.python.org/3/tutorial/datastructures.html#dictionaries

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good point.



def _make_dict_values(obj):
return {i: _ for i, _ in enumerate(obj)}.values()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar comment here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While I am not sure of any particular special properties of dict_values objects, I think the approach taken here should mirror the approach taken with dict_keys.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree.



def _make_dict_items(obj):
return obj.items()
27 changes: 25 additions & 2 deletions cloudpickle/cloudpickle_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
guards present in cloudpickle.py that were written to handle PyPy specificities
are not present in cloudpickle_fast.py
"""
import _collections_abc
import abc
import copyreg
import io
Expand All @@ -33,8 +34,8 @@
_typevar_reduce, _get_bases, _make_cell, _make_empty_cell, CellType,
_is_parametrized_type_hint, PYPY, cell_set,
parametrized_type_hint_getinitargs, _create_parametrized_type_hint,
builtin_code_type

builtin_code_type,
_make_dict_keys, _make_dict_values, _make_dict_items,
)


Expand Down Expand Up @@ -400,6 +401,24 @@ def _class_reduce(obj):
return NotImplemented


def _dict_keys_reduce(obj):
# Safer not to ship the full dict as sending the rest might
# be unintended and could potentially cause leaking of
# sensitive information
return _make_dict_keys, (list(obj), )


def _dict_values_reduce(obj):
# Safer not to ship the full dict as sending the rest might
# be unintended and could potentially cause leaking of
# sensitive information
return _make_dict_values, (list(obj), )


def _dict_items_reduce(obj):
return _make_dict_items, (dict(obj), )


# COLLECTIONS OF OBJECTS STATE SETTERS
# ------------------------------------
# state setters are called at unpickling time, once the object is created and
Expand Down Expand Up @@ -473,6 +492,10 @@ class CloudPickler(Pickler):
_dispatch_table[types.MappingProxyType] = _mappingproxy_reduce
_dispatch_table[weakref.WeakSet] = _weakset_reduce
_dispatch_table[typing.TypeVar] = _typevar_reduce
_dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce
_dispatch_table[_collections_abc.dict_values] = _dict_values_reduce
_dispatch_table[_collections_abc.dict_items] = _dict_items_reduce


dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table)

Expand Down
21 changes: 20 additions & 1 deletion tests/cloudpickle_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import division

import _collections_abc
import abc
import collections
import base64
Expand Down Expand Up @@ -31,7 +32,7 @@
# tests should be skipped if these modules are not available
import numpy as np
import scipy.special as spp
except ImportError:
except (ImportError, RuntimeError):
np = None
spp = None

Expand Down Expand Up @@ -207,6 +208,24 @@ def test_memoryview(self):
self.assertEqual(pickle_depickle(buffer_obj, protocol=self.protocol),
buffer_obj.tobytes())

def test_dict_keys(self):
keys = {"a": 1, "b": 2}.keys()
results = pickle_depickle(keys)
self.assertEqual(results, keys)
assert isinstance(results, _collections_abc.dict_keys)

def test_dict_values(self):
values = {"a": 1, "b": 2}.values()
results = pickle_depickle(values)
self.assertEqual(sorted(results), sorted(values))
assert isinstance(results, _collections_abc.dict_values)

def test_dict_items(self):
items = {"a": 1, "b": 2}.items()
results = pickle_depickle(items)
self.assertEqual(results, items)
assert isinstance(results, _collections_abc.dict_items)

def test_sliced_and_non_contiguous_memoryview(self):
buffer_obj = memoryview(b"Hello!" * 3)[2:15:2]
self.assertEqual(pickle_depickle(buffer_obj, protocol=self.protocol),
Expand Down