Skip to content

Commit a11b718

Browse files
authored
Implement dynamic class provenance tracking to fix isinstance semantics and add support for dynamically defined enums (#246)
1 parent 6ab5eed commit a11b718

File tree

3 files changed

+412
-27
lines changed

3 files changed

+412
-27
lines changed

Diff for: CHANGES.md

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
1.1.0
2+
=====
3+
4+
- Track the provenance of dynamic classes and enums so as to preseve the
5+
usual `isinstance` relationship between pickled objects and their
6+
original class defintions.
7+
([issue #246](https://github.com/cloudpipe/cloudpickle/pull/246))
8+
19
1.0.0
210
=====
311

Diff for: cloudpickle/cloudpickle.py

+165-12
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444

4545
import dis
4646
from functools import partial
47-
import importlib
4847
import io
4948
import itertools
5049
import logging
@@ -56,12 +55,26 @@
5655
import traceback
5756
import types
5857
import weakref
58+
import uuid
59+
import threading
60+
61+
62+
try:
63+
from enum import Enum
64+
except ImportError:
65+
Enum = None
5966

6067
# cloudpickle is meant for inter process communication: we expect all
6168
# communicating processes to run the same Python version hence we favor
6269
# communication speed over compatibility:
6370
DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL
6471

72+
# Track the provenance of reconstructed dynamic classes to make it possible to
73+
# recontruct instances from the matching singleton class definition when
74+
# appropriate and preserve the usual "isinstance" semantics of Python objects.
75+
_DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary()
76+
_DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary()
77+
_DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock()
6578

6679
if sys.version_info[0] < 3: # pragma: no branch
6780
from pickle import Pickler
@@ -71,12 +84,37 @@
7184
from StringIO import StringIO
7285
string_types = (basestring,) # noqa
7386
PY3 = False
87+
PY2 = True
88+
PY2_WRAPPER_DESCRIPTOR_TYPE = type(object.__init__)
89+
PY2_METHOD_WRAPPER_TYPE = type(object.__eq__)
90+
PY2_CLASS_DICT_BLACKLIST = (PY2_METHOD_WRAPPER_TYPE,
91+
PY2_WRAPPER_DESCRIPTOR_TYPE)
7492
else:
7593
types.ClassType = type
7694
from pickle import _Pickler as Pickler
7795
from io import BytesIO as StringIO
7896
string_types = (str,)
7997
PY3 = True
98+
PY2 = False
99+
100+
101+
def _ensure_tracking(class_def):
102+
with _DYNAMIC_CLASS_TRACKER_LOCK:
103+
class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def)
104+
if class_tracker_id is None:
105+
class_tracker_id = uuid.uuid4().hex
106+
_DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
107+
_DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def
108+
return class_tracker_id
109+
110+
111+
def _lookup_class_or_track(class_tracker_id, class_def):
112+
if class_tracker_id is not None:
113+
with _DYNAMIC_CLASS_TRACKER_LOCK:
114+
class_def = _DYNAMIC_CLASS_TRACKER_BY_ID.setdefault(
115+
class_tracker_id, class_def)
116+
_DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
117+
return class_def
80118

81119

82120
def _make_cell_set_template_code():
@@ -112,7 +150,7 @@ def inner(value):
112150
# NOTE: we are marking the cell variable as a free variable intentionally
113151
# so that we simulate an inner function instead of the outer function. This
114152
# is what gives us the ``nonlocal`` behavior in a Python 2 compatible way.
115-
if not PY3: # pragma: no branch
153+
if PY2: # pragma: no branch
116154
return types.CodeType(
117155
co.co_argcount,
118156
co.co_nlocals,
@@ -220,7 +258,7 @@ def _walk_global_ops(code):
220258
global-referencing instructions in *code*.
221259
"""
222260
code = getattr(code, 'co_code', b'')
223-
if not PY3: # pragma: no branch
261+
if PY2: # pragma: no branch
224262
code = map(ord, code)
225263

226264
n = len(code)
@@ -250,6 +288,39 @@ def _walk_global_ops(code):
250288
yield op, instr.arg
251289

252290

291+
def _extract_class_dict(cls):
292+
"""Retrieve a copy of the dict of a class without the inherited methods"""
293+
clsdict = dict(cls.__dict__) # copy dict proxy to a dict
294+
if len(cls.__bases__) == 1:
295+
inherited_dict = cls.__bases__[0].__dict__
296+
else:
297+
inherited_dict = {}
298+
for base in reversed(cls.__bases__):
299+
inherited_dict.update(base.__dict__)
300+
to_remove = []
301+
for name, value in clsdict.items():
302+
try:
303+
base_value = inherited_dict[name]
304+
if value is base_value:
305+
to_remove.append(name)
306+
elif PY2:
307+
# backward compat for Python 2
308+
if hasattr(value, "im_func"):
309+
if value.im_func is getattr(base_value, "im_func", None):
310+
to_remove.append(name)
311+
elif isinstance(value, PY2_CLASS_DICT_BLACKLIST):
312+
# On Python 2 we have no way to pickle those specific
313+
# methods types nor to check that they are actually
314+
# inherited. So we assume that they are always inherited
315+
# from builtin types.
316+
to_remove.append(name)
317+
except KeyError:
318+
pass
319+
for name in to_remove:
320+
clsdict.pop(name)
321+
return clsdict
322+
323+
253324
class CloudPickler(Pickler):
254325

255326
dispatch = Pickler.dispatch.copy()
@@ -277,7 +348,7 @@ def save_memoryview(self, obj):
277348

278349
dispatch[memoryview] = save_memoryview
279350

280-
if not PY3: # pragma: no branch
351+
if PY2: # pragma: no branch
281352
def save_buffer(self, obj):
282353
self.save(str(obj))
283354

@@ -460,15 +531,40 @@ def func():
460531
# then discards the reference to it
461532
self.write(pickle.POP)
462533

463-
def save_dynamic_class(self, obj):
534+
def _save_dynamic_enum(self, obj, clsdict):
535+
"""Special handling for dynamic Enum subclasses
536+
537+
Use a dedicated Enum constructor (inspired by EnumMeta.__call__) as the
538+
EnumMeta metaclass has complex initialization that makes the Enum
539+
subclasses hold references to their own instances.
464540
"""
465-
Save a class that can't be stored as module global.
541+
members = dict((e.name, e.value) for e in obj)
542+
543+
# Python 2.7 with enum34 can have no qualname:
544+
qualname = getattr(obj, "__qualname__", None)
545+
546+
self.save_reduce(_make_skeleton_enum,
547+
(obj.__bases__, obj.__name__, qualname, members,
548+
obj.__module__, _ensure_tracking(obj), None),
549+
obj=obj)
550+
551+
# Cleanup the clsdict that will be passed to _rehydrate_skeleton_class:
552+
# Those attributes are already handled by the metaclass.
553+
for attrname in ["_generate_next_value_", "_member_names_",
554+
"_member_map_", "_member_type_",
555+
"_value2member_map_"]:
556+
clsdict.pop(attrname, None)
557+
for member in members:
558+
clsdict.pop(member)
559+
560+
def save_dynamic_class(self, obj):
561+
"""Save a class that can't be stored as module global.
466562
467563
This method is used to serialize classes that are defined inside
468564
functions, or that otherwise can't be serialized as attribute lookups
469565
from global modules.
470566
"""
471-
clsdict = dict(obj.__dict__) # copy dict proxy to a dict
567+
clsdict = _extract_class_dict(obj)
472568
clsdict.pop('__weakref__', None)
473569

474570
# For ABCMeta in python3.7+, remove _abc_impl as it is not picklable.
@@ -496,8 +592,8 @@ def save_dynamic_class(self, obj):
496592
for k in obj.__slots__:
497593
clsdict.pop(k, None)
498594

499-
# If type overrides __dict__ as a property, include it in the type kwargs.
500-
# In Python 2, we can't set this attribute after construction.
595+
# If type overrides __dict__ as a property, include it in the type
596+
# kwargs. In Python 2, we can't set this attribute after construction.
501597
__dict__ = clsdict.pop('__dict__', None)
502598
if isinstance(__dict__, property):
503599
type_kwargs['__dict__'] = __dict__
@@ -524,8 +620,16 @@ def save_dynamic_class(self, obj):
524620
write(pickle.MARK)
525621

526622
# Create and memoize an skeleton class with obj's name and bases.
527-
tp = type(obj)
528-
self.save_reduce(tp, (obj.__name__, obj.__bases__, type_kwargs), obj=obj)
623+
if Enum is not None and issubclass(obj, Enum):
624+
# Special handling of Enum subclasses
625+
self._save_dynamic_enum(obj, clsdict)
626+
else:
627+
# "Regular" class definition:
628+
tp = type(obj)
629+
self.save_reduce(_make_skeleton_class,
630+
(tp, obj.__name__, obj.__bases__, type_kwargs,
631+
_ensure_tracking(obj), None),
632+
obj=obj)
529633

530634
# Now save the rest of obj's __dict__. Any references to obj
531635
# encountered while saving will point to the skeleton class.
@@ -778,7 +882,7 @@ def save_inst(self, obj):
778882
save(stuff)
779883
write(pickle.BUILD)
780884

781-
if not PY3: # pragma: no branch
885+
if PY2: # pragma: no branch
782886
dispatch[types.InstanceType] = save_inst
783887

784888
def save_property(self, obj):
@@ -1119,6 +1223,22 @@ def _make_skel_func(code, cell_count, base_globals=None):
11191223
return types.FunctionType(code, base_globals, None, None, closure)
11201224

11211225

1226+
def _make_skeleton_class(type_constructor, name, bases, type_kwargs,
1227+
class_tracker_id, extra):
1228+
"""Build dynamic class with an empty __dict__ to be filled once memoized
1229+
1230+
If class_tracker_id is not None, try to lookup an existing class definition
1231+
matching that id. If none is found, track a newly reconstructed class
1232+
definition under that id so that other instances stemming from the same
1233+
class id will also reuse this class definition.
1234+
1235+
The "extra" variable is meant to be a dict (or None) that can be used for
1236+
forward compatibility shall the need arise.
1237+
"""
1238+
skeleton_class = type_constructor(name, bases, type_kwargs)
1239+
return _lookup_class_or_track(class_tracker_id, skeleton_class)
1240+
1241+
11221242
def _rehydrate_skeleton_class(skeleton_class, class_dict):
11231243
"""Put attributes from `class_dict` back on `skeleton_class`.
11241244
@@ -1137,6 +1257,39 @@ def _rehydrate_skeleton_class(skeleton_class, class_dict):
11371257
return skeleton_class
11381258

11391259

1260+
def _make_skeleton_enum(bases, name, qualname, members, module,
1261+
class_tracker_id, extra):
1262+
"""Build dynamic enum with an empty __dict__ to be filled once memoized
1263+
1264+
The creation of the enum class is inspired by the code of
1265+
EnumMeta._create_.
1266+
1267+
If class_tracker_id is not None, try to lookup an existing enum definition
1268+
matching that id. If none is found, track a newly reconstructed enum
1269+
definition under that id so that other instances stemming from the same
1270+
class id will also reuse this enum definition.
1271+
1272+
The "extra" variable is meant to be a dict (or None) that can be used for
1273+
forward compatibility shall the need arise.
1274+
"""
1275+
# enums always inherit from their base Enum class at the last position in
1276+
# the list of base classes:
1277+
enum_base = bases[-1]
1278+
metacls = enum_base.__class__
1279+
classdict = metacls.__prepare__(name, bases)
1280+
1281+
for member_name, member_value in members.items():
1282+
classdict[member_name] = member_value
1283+
enum_class = metacls.__new__(metacls, name, bases, classdict)
1284+
enum_class.__module__ = module
1285+
1286+
# Python 2.7 compat
1287+
if qualname is not None:
1288+
enum_class.__qualname__ = qualname
1289+
1290+
return _lookup_class_or_track(class_tracker_id, enum_class)
1291+
1292+
11401293
def _is_dynamic(module):
11411294
"""
11421295
Return True if the module is special module that cannot be imported by its

0 commit comments

Comments
 (0)