diff --git a/CHANGES.md b/CHANGES.md index ee0101c67..906688259 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,9 @@ +1.2.0 +===== + +- Support pickling of classmethod and staticmethod objects in python2. + arguments. ([issue #262](https://github.com/cloudpipe/cloudpickle/pull/262)) + 1.1.0 ===== diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index fcdac7510..1716003ad 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -116,6 +116,16 @@ def _lookup_class_or_track(class_tracker_id, class_def): _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id return class_def +if PY3: + from pickle import _getattribute +else: + # pickle._getattribute is a python3 addition and enchancement of getattr, + # that can handle dotted attribute names. In cloudpickle for python2, + # handling dotted names is not needed, so we simply define _getattribute as + # a wrapper around getattr. + def _getattribute(obj, name): + return getattr(obj, name, None), None + def _make_cell_set_template_code(): """Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF @@ -244,32 +254,6 @@ def _builtin_type(name): return getattr(types, name) -def _make__new__factory(type_): - def _factory(): - return type_.__new__ - return _factory - - -# NOTE: These need to be module globals so that they're pickleable as globals. -_get_dict_new = _make__new__factory(dict) -_get_frozenset_new = _make__new__factory(frozenset) -_get_list_new = _make__new__factory(list) -_get_set_new = _make__new__factory(set) -_get_tuple_new = _make__new__factory(tuple) -_get_object_new = _make__new__factory(object) - -# Pre-defined set of builtin_function_or_method instances that can be -# serialized. -_BUILTIN_TYPE_CONSTRUCTORS = { - dict.__new__: _get_dict_new, - frozenset.__new__: _get_frozenset_new, - set.__new__: _get_set_new, - list.__new__: _get_list_new, - tuple.__new__: _get_tuple_new, - object.__new__: _get_object_new, -} - - if sys.version_info < (3, 4): # pragma: no branch def _walk_global_ops(code): """ @@ -423,28 +407,12 @@ def save_function(self, obj, name=None): Determines what kind of function obj is (e.g. lambda, defined at interactive prompt, etc) and handles the pickling appropriately. """ - try: - should_special_case = obj in _BUILTIN_TYPE_CONSTRUCTORS - except TypeError: - # Methods of builtin types aren't hashable in python 2. - should_special_case = False - - if should_special_case: - # We keep a special-cased cache of built-in type constructors at - # global scope, because these functions are structured very - # differently in different python versions and implementations (for - # example, they're instances of types.BuiltinFunctionType in - # CPython, but they're ordinary types.FunctionType instances in - # PyPy). - # - # If the function we've received is in that cache, we just - # serialize it as a lookup into the cache. - return self.save_reduce(_BUILTIN_TYPE_CONSTRUCTORS[obj], (), obj=obj) - write = self.write if name is None: - name = obj.__name__ + name = getattr(obj, '__qualname__', None) + if name is None: + name = getattr(obj, '__name__', None) try: # whichmodule() could fail, see # https://bitbucket.org/gutworth/six/issues/63/importing-six-breaks-pickling @@ -462,7 +430,7 @@ def save_function(self, obj, name=None): themodule = None try: - lookedup_by_name = getattr(themodule, name, None) + lookedup_by_name, _ = _getattribute(themodule, name) except Exception: lookedup_by_name = None @@ -470,23 +438,6 @@ def save_function(self, obj, name=None): if lookedup_by_name is obj: return self.save_global(obj, name) - # a builtin_function_or_method which comes in as an attribute of some - # object (e.g., itertools.chain.from_iterable) will end - # up with modname "__main__" and so end up here. But these functions - # have no __code__ attribute in CPython, so the handling for - # user-defined functions below will fail. - # So we pickle them here using save_reduce; have to do it differently - # for different python versions. - if not hasattr(obj, '__code__'): - if PY3: # pragma: no branch - rv = obj.__reduce_ex__(self.proto) - else: - if hasattr(obj, '__self__'): - rv = (getattr, (obj.__self__, name)) - else: - raise pickle.PicklingError("Can't pickle %r" % obj) - return self.save_reduce(obj=obj, *rv) - # if func is lambda, def'ed at prompt, is in main, or is nested, then # we'll pickle the actual function object rather than simply saving a # reference (as is done in default pickler), via save_function_tuple. @@ -813,12 +764,44 @@ def extract_func_data(self, func): return (code, f_globals, defaults, closure, dct, base_globals) - def save_builtin_function(self, obj): - if obj.__module__ == "__builtin__": - return self.save_global(obj) - return self.save_function(obj) - - dispatch[types.BuiltinFunctionType] = save_builtin_function + if not PY3: # pragma: no branch + # Python3 comes with native reducers that allow builtin functions and + # methods pickling as module/class attributes. The following method + # extends this for python2. + # Please note that currently, neither pickle nor cloudpickle support + # dynamically created builtin functions/method pickling. + def save_builtin_function_or_method(self, obj): + is_bound = getattr(obj, '__self__', None) is not None + if is_bound: + # obj is a bound builtin method. + rv = (getattr, (obj.__self__, obj.__name__)) + return self.save_reduce(obj=obj, *rv) + + is_unbound = hasattr(obj, '__objclass__') + if is_unbound: + # obj is an unbound builtin method (accessed from its class) + rv = (getattr, (obj.__objclass__, obj.__name__)) + return self.save_reduce(obj=obj, *rv) + + # Otherwise, obj is not a method, but a function. Fallback to + # default pickling by attribute. + return Pickler.save_global(self, obj) + + dispatch[types.BuiltinFunctionType] = save_builtin_function_or_method + + # A comprehensive summary of the various kinds of builtin methods can + # be found in PEP 579: https://www.python.org/dev/peps/pep-0579/ + classmethod_descriptor_type = type(float.__dict__['fromhex']) + wrapper_descriptor_type = type(float.__repr__) + method_wrapper_type = type(1.5.__repr__) + + dispatch[classmethod_descriptor_type] = save_builtin_function_or_method + dispatch[wrapper_descriptor_type] = save_builtin_function_or_method + dispatch[method_wrapper_type] = save_builtin_function_or_method + + if sys.version_info[:2] < (3, 4): + method_descriptor = type(str.upper) + dispatch[method_descriptor] = save_builtin_function_or_method def save_global(self, obj, name=None, pack=struct.pack): """ @@ -1345,18 +1328,3 @@ def _is_dynamic(module): except ImportError: return True return False - - -""" Use copy_reg to extend global pickle definitions """ - -if sys.version_info < (3, 4): # pragma: no branch - method_descriptor = type(str.upper) - - def _reduce_method_descriptor(obj): - return (getattr, (obj.__objclass__, obj.__name__)) - - try: - import copy_reg as copyreg - except ImportError: - import copyreg - copyreg.pickle(method_descriptor, _reduce_method_descriptor) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 4fb1e2e4e..7f7d7dfd8 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -641,14 +641,151 @@ def test_NotImplementedType(self): res = pickle_depickle(type(NotImplemented), protocol=self.protocol) self.assertEqual(type(NotImplemented), res) - def test_builtin_function_without_module(self): - on = object.__new__ - on_depickled = pickle_depickle(on, protocol=self.protocol) - self.assertEqual(type(on_depickled(object)), type(object())) - - fi = itertools.chain.from_iterable - fi_depickled = pickle_depickle(fi, protocol=self.protocol) - self.assertEqual(list(fi_depickled([[1, 2], [3, 4]])), [1, 2, 3, 4]) + def test_builtin_function(self): + # Note that builtin_function_or_method are special-cased by cloudpickle + # only in python2. + + # builtin function from the __builtin__ module + assert pickle_depickle(zip, protocol=self.protocol) is zip + + from sys import getcheckinterval + # builtin function from a "regular" module + assert pickle_depickle( + getcheckinterval, protocol=self.protocol) is getcheckinterval + + @pytest.mark.skipif(platform.python_implementation() == 'PyPy' and + sys.version_info[:2] == (3, 5), + reason="bug of pypy3.5 in builtin-type constructors") + def test_builtin_type_constructor(self): + # Due to a bug in pypy3.5, cloudpickling builtin-type constructors + # fails. This test makes sure that cloudpickling builtin-type + # constructors works for all other python versions/implementation. + + # pickle_depickle some builtin methods of the __builtin__ module + for t in list, tuple, set, frozenset, dict, object: + cloned_new = pickle_depickle(t.__new__, protocol=self.protocol) + assert isinstance(cloned_new(t), t) + + # The next 4 tests cover all cases into which builtin python methods can + # appear. + # There are 4 kinds of method: 'classic' methods, classmethods, + # staticmethods and slotmethods. They will appear under different types + # depending on whether they are called from the __dict__ of their + # class, their class itself, or an instance of their class. This makes + # 12 total combinations. + # This discussion and the following tests are relevant for the CPython + # implementation only. In PyPy, there is no builtin method or builtin + # function types/flavours. The only way into which a builtin method can be + # identified is with it's builtin-code __code__ attribute. + + def test_builtin_classicmethod(self): + obj = 1.5 # float object + + bound_classicmethod = obj.hex # builtin_function_or_method + unbound_classicmethod = type(obj).hex # method_descriptor + clsdict_classicmethod = type(obj).__dict__['hex'] # method_descriptor + + assert unbound_classicmethod is clsdict_classicmethod + + depickled_bound_meth = pickle_depickle( + bound_classicmethod, protocol=self.protocol) + depickled_unbound_meth = pickle_depickle( + unbound_classicmethod, protocol=self.protocol) + depickled_clsdict_meth = pickle_depickle( + clsdict_classicmethod, protocol=self.protocol) + + # No identity on the bound methods they are bound to different float + # instances + assert depickled_bound_meth() == bound_classicmethod() + assert depickled_unbound_meth is unbound_classicmethod + assert depickled_clsdict_meth is clsdict_classicmethod + + + def test_builtin_classmethod(self): + obj = 1.5 # float object + + bound_clsmethod = obj.fromhex # builtin_function_or_method + unbound_clsmethod = type(obj).fromhex # builtin_function_or_method + clsdict_clsmethod = type( + obj).__dict__['fromhex'] # classmethod_descriptor + + depickled_bound_meth = pickle_depickle( + bound_clsmethod, protocol=self.protocol) + depickled_unbound_meth = pickle_depickle( + unbound_clsmethod, protocol=self.protocol) + depickled_clsdict_meth = pickle_depickle( + clsdict_clsmethod, protocol=self.protocol) + + # float.fromhex takes a string as input. + arg = "0x1" + + # Identity on both the bound and the unbound methods cannot be + # tested: the bound methods are bound to different objects, and the + # unbound methods are actually recreated at each call. + assert depickled_bound_meth(arg) == bound_clsmethod(arg) + assert depickled_unbound_meth(arg) == unbound_clsmethod(arg) + + if platform.python_implementation() == 'CPython': + # Roundtripping a classmethod_descriptor results in a + # builtin_function_or_method (CPython upstream issue). + assert depickled_clsdict_meth(arg) == clsdict_clsmethod(float, arg) + if platform.python_implementation() == 'PyPy': + # builtin-classmethods are simple classmethod in PyPy (not + # callable). We test equality of types and the functionality of the + # __func__ attribute instead. We do not test the the identity of + # the functions as __func__ attributes of classmethods are not + # pickleable and must be reconstructed at depickling time. + assert type(depickled_clsdict_meth) == type(clsdict_clsmethod) + assert depickled_clsdict_meth.__func__( + float, arg) == clsdict_clsmethod.__func__(float, arg) + + def test_builtin_slotmethod(self): + obj = 1.5 # float object + + bound_slotmethod = obj.__repr__ # method-wrapper + unbound_slotmethod = type(obj).__repr__ # wrapper_descriptor + clsdict_slotmethod = type(obj).__dict__['__repr__'] # ditto + + depickled_bound_meth = pickle_depickle( + bound_slotmethod, protocol=self.protocol) + depickled_unbound_meth = pickle_depickle( + unbound_slotmethod, protocol=self.protocol) + depickled_clsdict_meth = pickle_depickle( + clsdict_slotmethod, protocol=self.protocol) + + # No identity tests on the bound slotmethod are they are bound to + # different float instances + assert depickled_bound_meth() == bound_slotmethod() + assert depickled_unbound_meth is unbound_slotmethod + assert depickled_clsdict_meth is clsdict_slotmethod + + @pytest.mark.skipif( + platform.python_implementation() == "PyPy" or + sys.version_info[:1] < (3,), + reason="No known staticmethod example in the python 2 / pypy stdlib") + def test_builtin_staticmethod(self): + obj = "foo" # str object + + bound_staticmethod = obj.maketrans # builtin_function_or_method + unbound_staticmethod = type(obj).maketrans # ditto + clsdict_staticmethod = type(obj).__dict__['maketrans'] # staticmethod + + assert bound_staticmethod is unbound_staticmethod + + depickled_bound_meth = pickle_depickle( + bound_staticmethod, protocol=self.protocol) + depickled_unbound_meth = pickle_depickle( + unbound_staticmethod, protocol=self.protocol) + depickled_clsdict_meth = pickle_depickle( + clsdict_staticmethod, protocol=self.protocol) + + assert depickled_bound_meth is bound_staticmethod + assert depickled_unbound_meth is unbound_staticmethod + + # staticmethod objects are recreated at depickling time, but the + # underlying __func__ object is pickled by attribute. + assert depickled_clsdict_meth.__func__ is clsdict_staticmethod.__func__ + type(depickled_clsdict_meth) is type(clsdict_staticmethod) @pytest.mark.skipif(tornado is None, reason="test needs Tornado installed") @@ -964,14 +1101,6 @@ def test_namedtuple(self): assert isinstance(depickled_t2, MyTuple) assert depickled_t2 == t2 - def test_builtin_type__new__(self): - # Functions occasionally take the __new__ of these types as default - # parameters for factories. For example, on Python 3.3, - # `tuple.__new__` is a default value for some methods of namedtuple. - for t in list, tuple, set, frozenset, dict, object: - cloned = pickle_depickle(t.__new__, protocol=self.protocol) - self.assertTrue(cloned is t.__new__) - def test_interactively_defined_function(self): # Check that callables defined in the __main__ module of a Python # script (or jupyter kernel) can be pickled / unpickled / executed.