From 6d9e7e7a9384f80b2809048083f47aa08bf84d88 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 18 Apr 2019 14:54:30 +0200 Subject: [PATCH 01/21] MNT refactor handling of builtin_function_or_method --- cloudpickle/cloudpickle.py | 100 ++++++++++++------------------------- 1 file changed, 32 insertions(+), 68 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index d84cce76d..5145989d3 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -116,6 +116,14 @@ def _lookup_class_or_track(class_tracker_id, class_def): _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id return class_def +if PY3: + from pickle import _getattribute +else: + # compatibility function for python2 with the same signature as + # _getattribute + def _getattribute(obj, name): + return getattr(obj, name, None), None + def _make_cell_set_template_code(): """Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF @@ -225,32 +233,6 @@ def _builtin_type(name): return getattr(types, name) -def _make__new__factory(type_): - def _factory(): - return type_.__new__ - return _factory - - -# NOTE: These need to be module globals so that they're pickleable as globals. -_get_dict_new = _make__new__factory(dict) -_get_frozenset_new = _make__new__factory(frozenset) -_get_list_new = _make__new__factory(list) -_get_set_new = _make__new__factory(set) -_get_tuple_new = _make__new__factory(tuple) -_get_object_new = _make__new__factory(object) - -# Pre-defined set of builtin_function_or_method instances that can be -# serialized. -_BUILTIN_TYPE_CONSTRUCTORS = { - dict.__new__: _get_dict_new, - frozenset.__new__: _get_frozenset_new, - set.__new__: _get_set_new, - list.__new__: _get_list_new, - tuple.__new__: _get_tuple_new, - object.__new__: _get_object_new, -} - - if sys.version_info < (3, 4): # pragma: no branch def _walk_global_ops(code): """ @@ -393,28 +375,12 @@ def save_function(self, obj, name=None): Determines what kind of function obj is (e.g. lambda, defined at interactive prompt, etc) and handles the pickling appropriately. """ - try: - should_special_case = obj in _BUILTIN_TYPE_CONSTRUCTORS - except TypeError: - # Methods of builtin types aren't hashable in python 2. - should_special_case = False - - if should_special_case: - # We keep a special-cased cache of built-in type constructors at - # global scope, because these functions are structured very - # differently in different python versions and implementations (for - # example, they're instances of types.BuiltinFunctionType in - # CPython, but they're ordinary types.FunctionType instances in - # PyPy). - # - # If the function we've received is in that cache, we just - # serialize it as a lookup into the cache. - return self.save_reduce(_BUILTIN_TYPE_CONSTRUCTORS[obj], (), obj=obj) - write = self.write if name is None: - name = obj.__name__ + name = getattr(obj, '__qualname__', None) + if name is None: + name = getattr(obj, '__name__', None) try: # whichmodule() could fail, see # https://bitbucket.org/gutworth/six/issues/63/importing-six-breaks-pickling @@ -432,7 +398,7 @@ def save_function(self, obj, name=None): themodule = None try: - lookedup_by_name = getattr(themodule, name, None) + lookedup_by_name, _ = _getattribute(themodule, name) except Exception: lookedup_by_name = None @@ -440,23 +406,6 @@ def save_function(self, obj, name=None): if lookedup_by_name is obj: return self.save_global(obj, name) - # a builtin_function_or_method which comes in as an attribute of some - # object (e.g., itertools.chain.from_iterable) will end - # up with modname "__main__" and so end up here. But these functions - # have no __code__ attribute in CPython, so the handling for - # user-defined functions below will fail. - # So we pickle them here using save_reduce; have to do it differently - # for different python versions. - if not hasattr(obj, '__code__'): - if PY3: # pragma: no branch - rv = obj.__reduce_ex__(self.proto) - else: - if hasattr(obj, '__self__'): - rv = (getattr, (obj.__self__, name)) - else: - raise pickle.PicklingError("Can't pickle %r" % obj) - return self.save_reduce(obj=obj, *rv) - # if func is lambda, def'ed at prompt, is in main, or is nested, then # we'll pickle the actual function object rather than simply saving a # reference (as is done in default pickler), via save_function_tuple. @@ -783,12 +732,27 @@ def extract_func_data(self, func): return (code, f_globals, defaults, closure, dct, base_globals) - def save_builtin_function(self, obj): - if obj.__module__ == "__builtin__": - return self.save_global(obj) - return self.save_function(obj) + if not PY3: # pragma: no branch + # In python 3, builtin_function_or_method objects have a __reduce__ + # method, which make them correctly serializable by the standard pickle + + def save_builtin_function(self, obj): + # builtin functions are actually pickled correctly by the standard + # pickle. In python2, only methods are not pickleable. + + is_function = getattr(obj, '__self__', None) is None + if is_function: + # obj is a function, such as zip (from the __builtin__ module), + # or sys.getcheckinterval (from the sys module) + return Pickler.save_global(self, obj) + + # obj is a method, such as dict.__new__ (from the builtin + # module) or itertools.chain.from_iterable (ffrom + # the itertools module). + rv = (getattr, (obj.__self__, obj.__name__)) + return self.save_reduce(obj=obj, *rv) - dispatch[types.BuiltinFunctionType] = save_builtin_function + dispatch[types.BuiltinFunctionType] = save_builtin_function def save_global(self, obj, name=None, pack=struct.pack): """ From d741d1e994255b47ffcccc4692b81ffe3e6b703b Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 18 Apr 2019 14:55:12 +0200 Subject: [PATCH 02/21] TST refactor builtin_function pickling tests --- tests/cloudpickle_test.py | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 8f358ac64..76cc98eb5 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -641,13 +641,33 @@ def test_NotImplementedType(self): res = pickle_depickle(type(NotImplemented), protocol=self.protocol) self.assertEqual(type(NotImplemented), res) - def test_builtin_function_without_module(self): - on = object.__new__ - on_depickled = pickle_depickle(on, protocol=self.protocol) - self.assertEqual(type(on_depickled(object)), type(object())) + @pytest.mark.skipif( + sys.version_info[0] >= 3, + reason="builtin_function_or_method are special-cased only in python2") + def test_builtin_function(self): + # builtin function from the __builtin__ module + assert pickle_depickle(zip, protocol=self.protocol) is zip + + from sys import getcheckinterval + # builtin function from a "regular" module + assert pickle_depickle( + getcheckinterval, protocol=self.protocol) is getcheckinterval + + @pytest.mark.skipif( + sys.version_info[0] >= 3, + reason="builtin_function_or_method are special-cased only in python2") + def test_builtin_method(self): + # pickle_depickle some builtin methods of the __builtin__ module + for t in list, tuple, set, frozenset, dict, object: + cloned = pickle_depickle(t.__new__, protocol=self.protocol) + self.assertTrue(cloned is t.__new__) + # pickle_depickle a method of a "regular" module fi = itertools.chain.from_iterable fi_depickled = pickle_depickle(fi, protocol=self.protocol) + + # fi is fi_depickled will return false, but so does + # itertools.chain.from_iterable is itertools.chain.from_iterable self.assertEqual(list(fi_depickled([[1, 2], [3, 4]])), [1, 2, 3, 4]) @pytest.mark.skipif(tornado is None, @@ -964,14 +984,6 @@ def test_namedtuple(self): assert isinstance(depickled_t2, MyTuple) assert depickled_t2 == t2 - def test_builtin_type__new__(self): - # Functions occasionally take the __new__ of these types as default - # parameters for factories. For example, on Python 3.3, - # `tuple.__new__` is a default value for some methods of namedtuple. - for t in list, tuple, set, frozenset, dict, object: - cloned = pickle_depickle(t.__new__, protocol=self.protocol) - self.assertTrue(cloned is t.__new__) - def test_interactively_defined_function(self): # Check that callables defined in the __main__ module of a Python # script (or jupyter kernel) can be pickled / unpickled / executed. From 5a0081d6d1bf6f2b688a31ddd964fdd6fead4126 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 18 Apr 2019 17:06:21 +0200 Subject: [PATCH 03/21] CLN dont skip test on py3, better comments, typo --- cloudpickle/cloudpickle.py | 2 +- tests/cloudpickle_test.py | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 5145989d3..def024c15 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -747,7 +747,7 @@ def save_builtin_function(self, obj): return Pickler.save_global(self, obj) # obj is a method, such as dict.__new__ (from the builtin - # module) or itertools.chain.from_iterable (ffrom + # module) or itertools.chain.from_iterable (from # the itertools module). rv = (getattr, (obj.__self__, obj.__name__)) return self.save_reduce(obj=obj, *rv) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 76cc98eb5..310cb3f66 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -641,10 +641,10 @@ def test_NotImplementedType(self): res = pickle_depickle(type(NotImplemented), protocol=self.protocol) self.assertEqual(type(NotImplemented), res) - @pytest.mark.skipif( - sys.version_info[0] >= 3, - reason="builtin_function_or_method are special-cased only in python2") def test_builtin_function(self): + # Note that builtin_function_or_method are special-cased by cloudpickle + # only in python2. + # builtin function from the __builtin__ module assert pickle_depickle(zip, protocol=self.protocol) is zip @@ -653,10 +653,10 @@ def test_builtin_function(self): assert pickle_depickle( getcheckinterval, protocol=self.protocol) is getcheckinterval - @pytest.mark.skipif( - sys.version_info[0] >= 3, - reason="builtin_function_or_method are special-cased only in python2") def test_builtin_method(self): + # Note that builtin_function_or_method are special-cased by cloudpickle + # only in python2. + # pickle_depickle some builtin methods of the __builtin__ module for t in list, tuple, set, frozenset, dict, object: cloned = pickle_depickle(t.__new__, protocol=self.protocol) @@ -666,8 +666,10 @@ def test_builtin_method(self): fi = itertools.chain.from_iterable fi_depickled = pickle_depickle(fi, protocol=self.protocol) - # fi is fi_depickled will return false, but so does - # itertools.chain.from_iterable is itertools.chain.from_iterable + # `fi is fi_depickled` would return False, but so will + # `itertools.chain.from_iterable is itertools.chain.from_iterable`. + # Instead of testing physical equality, check that `fi_depickled` + # behaves as expected. self.assertEqual(list(fi_depickled([[1, 2], [3, 4]])), [1, 2, 3, 4]) @pytest.mark.skipif(tornado is None, From 5a9c93cf9d6b819210e4e361298ac22f528f9284 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Fri, 19 Apr 2019 09:39:46 +0200 Subject: [PATCH 04/21] Change assertion on __new__ to pass with PyPy --- tests/cloudpickle_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 310cb3f66..bf911923c 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -659,8 +659,8 @@ def test_builtin_method(self): # pickle_depickle some builtin methods of the __builtin__ module for t in list, tuple, set, frozenset, dict, object: - cloned = pickle_depickle(t.__new__, protocol=self.protocol) - self.assertTrue(cloned is t.__new__) + cloned_new = pickle_depickle(t.__new__, protocol=self.protocol) + assert isinstance(cloned_new(t), t) # pickle_depickle a method of a "regular" module fi = itertools.chain.from_iterable From 9d9c25a8ff312dc76b1c99decb4d43f97e9976bb Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 30 Apr 2019 17:02:24 +0200 Subject: [PATCH 05/21] skip __new__ constructor test on pypy3.5 --- tests/cloudpickle_test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index bf911923c..484ca4660 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -653,6 +653,9 @@ def test_builtin_function(self): assert pickle_depickle( getcheckinterval, protocol=self.protocol) is getcheckinterval + @pytest.mark.skipif(platform.python_implementation() == 'PyPy' and + sys.version_info[:2] == (3, 5), + reason="bug of pypy3.5 in builtin constructors") def test_builtin_method(self): # Note that builtin_function_or_method are special-cased by cloudpickle # only in python2. From e57835600dc5f48f69946784f030860ace3a9712 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 6 May 2019 10:28:25 +0200 Subject: [PATCH 06/21] CLN refactor instancemthod pickling (not builtin) --- cloudpickle/cloudpickle.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index def024c15..f5d8a3a61 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -788,18 +788,16 @@ def save_global(self, obj, name=None, pack=struct.pack): dispatch[type] = save_global dispatch[types.ClassType] = save_global - def save_instancemethod(self, obj): - # Memoization rarely is ever useful due to python bounding - if obj.__self__ is None: - self.save_reduce(getattr, (obj.im_class, obj.__name__)) - else: - if PY3: # pragma: no branch - self.save_reduce(types.MethodType, (obj.__func__, obj.__self__), obj=obj) + if not PY3: + def save_instancemethod(self, obj): + # Memoization rarely is ever useful due to python bounding + if obj.__self__ is None: + self.save_reduce(getattr, (obj.im_class, obj.__name__)) else: - self.save_reduce(types.MethodType, (obj.__func__, obj.__self__, obj.__self__.__class__), + self.save_reduce(getattr, (obj.__self__, obj.__name__), obj=obj) - dispatch[types.MethodType] = save_instancemethod + dispatch[types.MethodType] = save_instancemethod def save_inst(self, obj): """Inner logic to save instance. Based off pickle.save_inst""" From c5457b0d980dc5b2c5bca5943cdd8394e20b5fbe Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 6 May 2019 18:54:12 +0200 Subject: [PATCH 07/21] TST test all possible builtin method flavours --- tests/cloudpickle_test.py | 99 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 484ca4660..3fc5cad8e 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -675,6 +675,105 @@ def test_builtin_method(self): # behaves as expected. self.assertEqual(list(fi_depickled([[1, 2], [3, 4]])), [1, 2, 3, 4]) + # The next 4 tests pickle-depickle all forms into which builtin python + # methods can appear. + # There are 4 kinds of method: 'classic' methods, classmethods, + # staticmethods and slotmethods. They will appear under different types + # depending on whether they are called from the __dict__ of their + # class, their class itself, or an instance of their class. This makes + # 12 total combinations. + + def test_builtin_classicmethod(self): + obj = 1.5 # float object + + bound_classicmethod = obj.hex # builtin_function_or_method + unbound_classicmethod = type(obj).hex # method_descriptor + clsdict_classicmethod = type(obj).__dict__['hex'] # method_descriptor + + depickled_bound_meth = pickle_depickle( + bound_classicmethod, protocol=self.protocol) + depickled_unbound_meth = pickle_depickle( + unbound_classicmethod, protocol=self.protocol) + depickled_clsdict_meth = pickle_depickle( + clsdict_classicmethod, protocol=self.protocol) + + assert depickled_bound_meth() == bound_classicmethod() + assert depickled_unbound_meth(obj) == unbound_classicmethod(obj) + assert depickled_clsdict_meth(obj) == clsdict_classicmethod(obj) + + def test_builtin_classmethod(self): + obj = 1.5 # float object + + bound_clsmethod = obj.fromhex # builtin_function_or_method + unbound_clsmethod = type(obj).fromhex # builtin_function_or_method + clsdict_clsmethod = type( + obj).__dict__['fromhex'] # classmethod_descriptor + + depickled_bound_meth = pickle_depickle( + bound_clsmethod, protocol=self.protocol) + depickled_unbound_meth = pickle_depickle( + unbound_clsmethod, protocol=self.protocol) + depickled_clsdict_meth = pickle_depickle( + clsdict_clsmethod, protocol=self.protocol) + + # classmethods may require objects of another type than the one they + # are bound to. + target = "0x1" + assert depickled_bound_meth(target) == bound_clsmethod(target) + assert depickled_unbound_meth(target) == unbound_clsmethod(target) + + # builtin classmethod_descriptor objects are not callable, neither do + # they have an accessible __func__ object. Moreover, roundtripping them + # results in a builtin_function_or_method (python upstream issue). + # XXX: shall we test anything in this case? + assert depickled_clsdict_meth == unbound_clsmethod + + def test_builtin_slotmethod(self): + obj = 1.5 # float object + + bound_slotmethod = obj.__repr__ # method-wrapper + unbound_slotmethod = type(obj).__repr__ # wrapper_descriptor + clsdict_slotmethod = type(obj).__dict__['__repr__'] # ditto + + depickled_bound_meth = pickle_depickle( + bound_slotmethod, protocol=self.protocol) + depickled_unbound_meth = pickle_depickle( + unbound_slotmethod, protocol=self.protocol) + depickled_clsdict_meth = pickle_depickle( + clsdict_slotmethod, protocol=self.protocol) + + assert depickled_bound_meth() == bound_slotmethod() + assert depickled_unbound_meth(obj) == unbound_slotmethod(obj) + assert depickled_clsdict_meth(obj) == clsdict_slotmethod(obj) + + @pytest.mark.skipif( + sys.version_info[:1] < (3,), + reason="No staticmethod example in the python 2 stdlib") + def test_builtin_staticmethod(self): + obj = "foo" # str object + + bound_staticmethod = obj.maketrans # builtin_function_or_method + unbound_staticmethod = type(obj).maketrans # ditto + clsdict_staticmethod = type(obj).__dict__['maketrans'] # staticmethod + + depickled_bound_meth = pickle_depickle( + bound_staticmethod, protocol=self.protocol) + depickled_unbound_meth = pickle_depickle( + unbound_staticmethod, protocol=self.protocol) + depickled_clsdict_meth = pickle_depickle( + clsdict_staticmethod, protocol=self.protocol) + + # staticmethod may require objects of another type than the one they + # are bound to. + target = {"a": "b"} + assert depickled_bound_meth(target) == bound_staticmethod(target) + assert depickled_unbound_meth(target) == unbound_staticmethod(target) + + # staticmethod objects are not callable. Instead, we test for the + # depickled's object class, and wrapped object attribute. + assert type(depickled_clsdict_meth) is type(clsdict_staticmethod) + assert depickled_clsdict_meth.__func__ is clsdict_staticmethod.__func__ + @pytest.mark.skipif(tornado is None, reason="test needs Tornado installed") def test_tornado_coroutine(self): From 9d5f0f4d19978ae15ddc34530f88a6cf1243561f Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 6 May 2019 19:03:39 +0200 Subject: [PATCH 08/21] TST correct classmethod_descriptor test note --- tests/cloudpickle_test.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 3fc5cad8e..69fa1b2c8 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -722,11 +722,10 @@ def test_builtin_classmethod(self): assert depickled_bound_meth(target) == bound_clsmethod(target) assert depickled_unbound_meth(target) == unbound_clsmethod(target) - # builtin classmethod_descriptor objects are not callable, neither do - # they have an accessible __func__ object. Moreover, roundtripping them - # results in a builtin_function_or_method (python upstream issue). - # XXX: shall we test anything in this case? - assert depickled_clsdict_meth == unbound_clsmethod + # Roundtripping a classmethod_descriptor results in a + # builtin_function_or_method (python upstream issue). + assert depickled_clsdict_meth(target) == clsdict_clsmethod(float, + target) def test_builtin_slotmethod(self): obj = 1.5 # float object From ac5ae5d79186c3acdf711e4ca9c1ce2463fc5e40 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 6 May 2019 19:17:08 +0200 Subject: [PATCH 09/21] CLN refactor method_descriptor pickling --- cloudpickle/cloudpickle.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index f5d8a3a61..fc377c333 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -754,6 +754,14 @@ def save_builtin_function(self, obj): dispatch[types.BuiltinFunctionType] = save_builtin_function + if sys.version_info[:2] < (3, 4): + method_descriptor = type(str.upper) + + def save_method_descriptor(self, obj): + self.save_reduce( + getattr, (obj.__objclass__, obj.__name__), obj=obj) + dispatch[method_descriptor] = save_method_descriptor + def save_global(self, obj, name=None, pack=struct.pack): """ Save a "global". @@ -1277,18 +1285,3 @@ def _is_dynamic(module): except ImportError: return True return False - - -""" Use copy_reg to extend global pickle definitions """ - -if sys.version_info < (3, 4): # pragma: no branch - method_descriptor = type(str.upper) - - def _reduce_method_descriptor(obj): - return (getattr, (obj.__objclass__, obj.__name__)) - - try: - import copy_reg as copyreg - except ImportError: - import copyreg - copyreg.pickle(method_descriptor, _reduce_method_descriptor) From ce5bf7576a1ae704db1dee80f6fba3f10e4a2d57 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 6 May 2019 19:33:43 +0200 Subject: [PATCH 10/21] TST fix python2 edge cases --- cloudpickle/cloudpickle.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index fc377c333..8a9252b7f 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -754,6 +754,21 @@ def save_builtin_function(self, obj): dispatch[types.BuiltinFunctionType] = save_builtin_function + def save_classmethod_descriptor(self, obj): + return self.save_reduce(getattr, (obj.__objclass__, obj.__name__)) + classmethod_descriptor_type = type(float.__dict__['fromhex']) + dispatch[classmethod_descriptor_type] = save_classmethod_descriptor + + def save_wrapper_descriptor(self, obj): + return self.save_reduce(getattr, (obj.__objclass__, obj.__name__)) + wrapper_descriptor_type = type(float.__repr__) + dispatch[wrapper_descriptor_type] = save_wrapper_descriptor + + def save_method_wrapper(self, obj): + return self.save_reduce(getattr, (obj.__self__, obj.__name__)) + method_wrapper_type = type(1.5.__repr__) + dispatch[method_wrapper_type] = save_method_wrapper + if sys.version_info[:2] < (3, 4): method_descriptor = type(str.upper) From d212c10be080b5a884e8154c7194a97fca057c15 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 6 May 2019 22:52:20 +0200 Subject: [PATCH 11/21] CLN merge all builtin method saving funcs into one --- cloudpickle/cloudpickle.py | 68 +++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 8a9252b7f..0b4b54764 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -733,49 +733,43 @@ def extract_func_data(self, func): return (code, f_globals, defaults, closure, dct, base_globals) if not PY3: # pragma: no branch - # In python 3, builtin_function_or_method objects have a __reduce__ - # method, which make them correctly serializable by the standard pickle - - def save_builtin_function(self, obj): - # builtin functions are actually pickled correctly by the standard - # pickle. In python2, only methods are not pickleable. - - is_function = getattr(obj, '__self__', None) is None - if is_function: - # obj is a function, such as zip (from the __builtin__ module), - # or sys.getcheckinterval (from the sys module) - return Pickler.save_global(self, obj) - - # obj is a method, such as dict.__new__ (from the builtin - # module) or itertools.chain.from_iterable (from - # the itertools module). - rv = (getattr, (obj.__self__, obj.__name__)) - return self.save_reduce(obj=obj, *rv) - - dispatch[types.BuiltinFunctionType] = save_builtin_function - - def save_classmethod_descriptor(self, obj): - return self.save_reduce(getattr, (obj.__objclass__, obj.__name__)) + # Python3 comes with native reducers that allow builtin functions and + # methods pickling as module/class attributes. The following method + # extends this for python2. + # Plase note that currently, neither pickle nor cloudpickle support + # dynamically created builtin functions/method pickling. + def save_builtin_function_or_method(self, obj): + is_bound = getattr(obj, '__self__', None) is not None + if is_bound: + # obj is a bound builtin method. + rv = (getattr, (obj.__self__, obj.__name__)) + return self.save_reduce(obj=obj, *rv) + + is_unbound = hasattr(obj, '__objclass__') + if is_unbound: + # obj is a unbound builtin method (accessed from its class) + rv = (getattr, (obj.__objclass__, obj.__name__)) + return self.save_reduce(obj=obj, *rv) + + # Otherwise, obj is not a method, but a function. Fallback to + # default pickling by attribute. + return Pickler.save_global(self, obj) + + dispatch[types.BuiltinFunctionType] = save_builtin_function_or_method + + # A comprehensive summary of the various kinds of builtin methods can + # be found in PEP 579: https://www.python.org/dev/peps/pep-0579/ classmethod_descriptor_type = type(float.__dict__['fromhex']) - dispatch[classmethod_descriptor_type] = save_classmethod_descriptor - - def save_wrapper_descriptor(self, obj): - return self.save_reduce(getattr, (obj.__objclass__, obj.__name__)) wrapper_descriptor_type = type(float.__repr__) - dispatch[wrapper_descriptor_type] = save_wrapper_descriptor - - def save_method_wrapper(self, obj): - return self.save_reduce(getattr, (obj.__self__, obj.__name__)) method_wrapper_type = type(1.5.__repr__) - dispatch[method_wrapper_type] = save_method_wrapper + + dispatch[classmethod_descriptor_type] = save_builtin_function_or_method + dispatch[wrapper_descriptor_type] = save_builtin_function_or_method + dispatch[method_wrapper_type] = save_builtin_function_or_method if sys.version_info[:2] < (3, 4): method_descriptor = type(str.upper) - - def save_method_descriptor(self, obj): - self.save_reduce( - getattr, (obj.__objclass__, obj.__name__), obj=obj) - dispatch[method_descriptor] = save_method_descriptor + dispatch[method_descriptor] = save_builtin_function_or_method def save_global(self, obj, name=None, pack=struct.pack): """ From d4447d43c941abb4b0fc21ebc45d0cd0e45ec043 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 7 May 2019 14:13:12 +0200 Subject: [PATCH 12/21] TST add some PyPy specific tests/comments --- tests/cloudpickle_test.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 69fa1b2c8..c62f2e7ce 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -682,6 +682,10 @@ def test_builtin_method(self): # depending on whether they are called from the __dict__ of their # class, their class itself, or an instance of their class. This makes # 12 total combinations. + # This discussion and the following tests are relevant for the CPython + # implementation only. In PyPy, there is no builtin method or builtin + # function types/flavours. The only way into which a builtin method can be + # identified is with it's builtin-code __code__ attribute. def test_builtin_classicmethod(self): obj = 1.5 # float object @@ -722,10 +726,20 @@ def test_builtin_classmethod(self): assert depickled_bound_meth(target) == bound_clsmethod(target) assert depickled_unbound_meth(target) == unbound_clsmethod(target) - # Roundtripping a classmethod_descriptor results in a - # builtin_function_or_method (python upstream issue). - assert depickled_clsdict_meth(target) == clsdict_clsmethod(float, - target) + if platform.python_implementation() == 'CPython': + # Roundtripping a classmethod_descriptor results in a + # builtin_function_or_method (CPython upstream issue). + assert depickled_clsdict_meth(target) == clsdict_clsmethod(float, + target) + if platform.python_implementation() == 'PyPy': + # builtin-classmethods are simple classmethod in PyPy (not + # callable). We test equality of types and the functionality of the + # __func__ attribute instead. We do not test the the identity of + # the functions as __func__ attributes of classmethods are not + # pickleable and must be reconstructed at depickling time. + assert type(depickled_clsdict_meth) == type(clsdict_clsmethod) + assert depickled_clsdict_meth.__func__( + float, target) == clsdict_clsmethod.__func__(float, target) def test_builtin_slotmethod(self): obj = 1.5 # float object @@ -746,8 +760,9 @@ def test_builtin_slotmethod(self): assert depickled_clsdict_meth(obj) == clsdict_slotmethod(obj) @pytest.mark.skipif( + platform.python_implementation() == "PyPy" or sys.version_info[:1] < (3,), - reason="No staticmethod example in the python 2 stdlib") + reason="No known staticmethod example in the python 2 / pypy stdlib") def test_builtin_staticmethod(self): obj = "foo" # str object From e1b59c804c2395f37e7a258951167b40e5c9ec1c Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 7 May 2019 14:38:03 +0200 Subject: [PATCH 13/21] TST stronger identity assertions --- tests/cloudpickle_test.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index c62f2e7ce..b5e910843 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -694,6 +694,8 @@ def test_builtin_classicmethod(self): unbound_classicmethod = type(obj).hex # method_descriptor clsdict_classicmethod = type(obj).__dict__['hex'] # method_descriptor + assert unbound_classicmethod is clsdict_classicmethod + depickled_bound_meth = pickle_depickle( bound_classicmethod, protocol=self.protocol) depickled_unbound_meth = pickle_depickle( @@ -701,6 +703,12 @@ def test_builtin_classicmethod(self): depickled_clsdict_meth = pickle_depickle( clsdict_classicmethod, protocol=self.protocol) + # No identity on the bound methods they are bound to different float + # instances + # assert depickled_bound_meth is bound_classicmethod + assert depickled_unbound_meth is unbound_classicmethod + assert depickled_clsdict_meth is clsdict_classicmethod + assert depickled_bound_meth() == bound_classicmethod() assert depickled_unbound_meth(obj) == unbound_classicmethod(obj) assert depickled_clsdict_meth(obj) == clsdict_classicmethod(obj) @@ -720,6 +728,10 @@ def test_builtin_classmethod(self): depickled_clsdict_meth = pickle_depickle( clsdict_clsmethod, protocol=self.protocol) + # Identity on both the bound and the unbound methods cannot be + # tested: the bound methods are bound to different objects, and the + # unbound methods are actually recreated at each call. + # classmethods may require objects of another type than the one they # are bound to. target = "0x1" @@ -755,6 +767,11 @@ def test_builtin_slotmethod(self): depickled_clsdict_meth = pickle_depickle( clsdict_slotmethod, protocol=self.protocol) + # No identity tests on the bound slotmethod are they are bound to + # different float instances + assert depickled_unbound_meth is unbound_slotmethod + assert depickled_clsdict_meth is clsdict_slotmethod + assert depickled_bound_meth() == bound_slotmethod() assert depickled_unbound_meth(obj) == unbound_slotmethod(obj) assert depickled_clsdict_meth(obj) == clsdict_slotmethod(obj) @@ -770,6 +787,8 @@ def test_builtin_staticmethod(self): unbound_staticmethod = type(obj).maketrans # ditto clsdict_staticmethod = type(obj).__dict__['maketrans'] # staticmethod + assert bound_staticmethod is unbound_staticmethod + depickled_bound_meth = pickle_depickle( bound_staticmethod, protocol=self.protocol) depickled_unbound_meth = pickle_depickle( @@ -780,6 +799,9 @@ def test_builtin_staticmethod(self): # staticmethod may require objects of another type than the one they # are bound to. target = {"a": "b"} + assert depickled_bound_meth is bound_staticmethod + assert depickled_unbound_meth is unbound_staticmethod + assert depickled_bound_meth(target) == bound_staticmethod(target) assert depickled_unbound_meth(target) == unbound_staticmethod(target) From 83659910bf84814031545614659ee1586c75766a Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 7 May 2019 14:38:18 +0200 Subject: [PATCH 14/21] DOC clearer comments --- tests/cloudpickle_test.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index b5e910843..b2f81c67b 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -732,8 +732,7 @@ def test_builtin_classmethod(self): # tested: the bound methods are bound to different objects, and the # unbound methods are actually recreated at each call. - # classmethods may require objects of another type than the one they - # are bound to. + # float.fromhex takes a string as input. target = "0x1" assert depickled_bound_meth(target) == bound_clsmethod(target) assert depickled_unbound_meth(target) == unbound_clsmethod(target) @@ -796,8 +795,7 @@ def test_builtin_staticmethod(self): depickled_clsdict_meth = pickle_depickle( clsdict_staticmethod, protocol=self.protocol) - # staticmethod may require objects of another type than the one they - # are bound to. + # str.maketrans takes a dict as input. target = {"a": "b"} assert depickled_bound_meth is bound_staticmethod assert depickled_unbound_meth is unbound_staticmethod From 0a36b988eb0bafac6415dcff53834ffd3a276f4b Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 7 May 2019 14:45:22 +0200 Subject: [PATCH 15/21] TST neatier assertions --- tests/cloudpickle_test.py | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index b2f81c67b..9cd725deb 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -705,13 +705,10 @@ def test_builtin_classicmethod(self): # No identity on the bound methods they are bound to different float # instances - # assert depickled_bound_meth is bound_classicmethod + assert depickled_bound_meth() == bound_classicmethod() assert depickled_unbound_meth is unbound_classicmethod assert depickled_clsdict_meth is clsdict_classicmethod - assert depickled_bound_meth() == bound_classicmethod() - assert depickled_unbound_meth(obj) == unbound_classicmethod(obj) - assert depickled_clsdict_meth(obj) == clsdict_classicmethod(obj) def test_builtin_classmethod(self): obj = 1.5 # float object @@ -728,12 +725,12 @@ def test_builtin_classmethod(self): depickled_clsdict_meth = pickle_depickle( clsdict_clsmethod, protocol=self.protocol) + # float.fromhex takes a string as input. + target = "0x1" + # Identity on both the bound and the unbound methods cannot be # tested: the bound methods are bound to different objects, and the # unbound methods are actually recreated at each call. - - # float.fromhex takes a string as input. - target = "0x1" assert depickled_bound_meth(target) == bound_clsmethod(target) assert depickled_unbound_meth(target) == unbound_clsmethod(target) @@ -768,13 +765,10 @@ def test_builtin_slotmethod(self): # No identity tests on the bound slotmethod are they are bound to # different float instances + assert depickled_bound_meth() == bound_slotmethod() assert depickled_unbound_meth is unbound_slotmethod assert depickled_clsdict_meth is clsdict_slotmethod - assert depickled_bound_meth() == bound_slotmethod() - assert depickled_unbound_meth(obj) == unbound_slotmethod(obj) - assert depickled_clsdict_meth(obj) == clsdict_slotmethod(obj) - @pytest.mark.skipif( platform.python_implementation() == "PyPy" or sys.version_info[:1] < (3,), @@ -797,16 +791,14 @@ def test_builtin_staticmethod(self): # str.maketrans takes a dict as input. target = {"a": "b"} + assert depickled_bound_meth is bound_staticmethod assert depickled_unbound_meth is unbound_staticmethod - assert depickled_bound_meth(target) == bound_staticmethod(target) - assert depickled_unbound_meth(target) == unbound_staticmethod(target) - - # staticmethod objects are not callable. Instead, we test for the - # depickled's object class, and wrapped object attribute. - assert type(depickled_clsdict_meth) is type(clsdict_staticmethod) + # staticmethod objects are recreated at depickling time, but the + # underlying __func__ object is pickled by attribute. assert depickled_clsdict_meth.__func__ is clsdict_staticmethod.__func__ + type(depickled_clsdict_meth) is type(clsdict_staticmethod) @pytest.mark.skipif(tornado is None, reason="test needs Tornado installed") From 504d0c9d5d351793ea6555f0d675cfb41d2d74a1 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 7 May 2019 15:01:41 +0200 Subject: [PATCH 16/21] CLN rollback unrelated code change --- cloudpickle/cloudpickle.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 0b4b54764..858ae60cc 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -805,16 +805,18 @@ def save_global(self, obj, name=None, pack=struct.pack): dispatch[type] = save_global dispatch[types.ClassType] = save_global - if not PY3: - def save_instancemethod(self, obj): - # Memoization rarely is ever useful due to python bounding - if obj.__self__ is None: - self.save_reduce(getattr, (obj.im_class, obj.__name__)) + def save_instancemethod(self, obj): + # Memoization rarely is ever useful due to python bounding + if obj.__self__ is None: + self.save_reduce(getattr, (obj.im_class, obj.__name__)) + else: + if PY3: # pragma: no branch + self.save_reduce(types.MethodType, (obj.__func__, obj.__self__), obj=obj) else: - self.save_reduce(getattr, (obj.__self__, obj.__name__), + self.save_reduce(types.MethodType, (obj.__func__, obj.__self__, obj.__self__.__class__), obj=obj) - dispatch[types.MethodType] = save_instancemethod + dispatch[types.MethodType] = save_instancemethod def save_inst(self, obj): """Inner logic to save instance. Based off pickle.save_inst""" From 16e79b55e0f3948be0087cfc46d37ad66a679b2c Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Fri, 17 May 2019 15:33:15 +0200 Subject: [PATCH 17/21] CLN comments --- cloudpickle/cloudpickle.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 858ae60cc..fb3a1eac9 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -119,8 +119,10 @@ def _lookup_class_or_track(class_tracker_id, class_def): if PY3: from pickle import _getattribute else: - # compatibility function for python2 with the same signature as - # _getattribute + # pickle._getattribute is a python3 addition and enchancement of getattr, + # that can handle dotted attribute names. In cloudpickle for python2, + # handling dotted names is not needed, so we simply define _getattribute as + # a wrapper around getattr. def _getattribute(obj, name): return getattr(obj, name, None), None From 8b2f39039e6c83bc107c755b7689cf99f2ca5ea8 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Fri, 17 May 2019 15:38:23 +0200 Subject: [PATCH 18/21] TST renaming in tests --- tests/cloudpickle_test.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 9cd725deb..4cb685a9a 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -726,19 +726,18 @@ def test_builtin_classmethod(self): clsdict_clsmethod, protocol=self.protocol) # float.fromhex takes a string as input. - target = "0x1" + arg = "0x1" # Identity on both the bound and the unbound methods cannot be # tested: the bound methods are bound to different objects, and the # unbound methods are actually recreated at each call. - assert depickled_bound_meth(target) == bound_clsmethod(target) - assert depickled_unbound_meth(target) == unbound_clsmethod(target) + assert depickled_bound_meth(arg) == bound_clsmethod(arg) + assert depickled_unbound_meth(arg) == unbound_clsmethod(arg) if platform.python_implementation() == 'CPython': # Roundtripping a classmethod_descriptor results in a # builtin_function_or_method (CPython upstream issue). - assert depickled_clsdict_meth(target) == clsdict_clsmethod(float, - target) + assert depickled_clsdict_meth(arg) == clsdict_clsmethod(float, arg) if platform.python_implementation() == 'PyPy': # builtin-classmethods are simple classmethod in PyPy (not # callable). We test equality of types and the functionality of the @@ -747,7 +746,7 @@ def test_builtin_classmethod(self): # pickleable and must be reconstructed at depickling time. assert type(depickled_clsdict_meth) == type(clsdict_clsmethod) assert depickled_clsdict_meth.__func__( - float, target) == clsdict_clsmethod.__func__(float, target) + float, arg) == clsdict_clsmethod.__func__(float, arg) def test_builtin_slotmethod(self): obj = 1.5 # float object @@ -789,9 +788,6 @@ def test_builtin_staticmethod(self): depickled_clsdict_meth = pickle_depickle( clsdict_staticmethod, protocol=self.protocol) - # str.maketrans takes a dict as input. - target = {"a": "b"} - assert depickled_bound_meth is bound_staticmethod assert depickled_unbound_meth is unbound_staticmethod From dc794894e11fb853e10f84f1d7f8c960008f34a4 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Fri, 17 May 2019 15:40:26 +0200 Subject: [PATCH 19/21] CLN typos and phrasing --- cloudpickle/cloudpickle.py | 4 ++-- tests/cloudpickle_test.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index fb3a1eac9..556c55382 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -738,7 +738,7 @@ def extract_func_data(self, func): # Python3 comes with native reducers that allow builtin functions and # methods pickling as module/class attributes. The following method # extends this for python2. - # Plase note that currently, neither pickle nor cloudpickle support + # Please note that currently, neither pickle nor cloudpickle support # dynamically created builtin functions/method pickling. def save_builtin_function_or_method(self, obj): is_bound = getattr(obj, '__self__', None) is not None @@ -749,7 +749,7 @@ def save_builtin_function_or_method(self, obj): is_unbound = hasattr(obj, '__objclass__') if is_unbound: - # obj is a unbound builtin method (accessed from its class) + # obj is an unbound builtin method (accessed from its class) rv = (getattr, (obj.__objclass__, obj.__name__)) return self.save_reduce(obj=obj, *rv) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 4cb685a9a..ba8a99e05 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -675,8 +675,8 @@ def test_builtin_method(self): # behaves as expected. self.assertEqual(list(fi_depickled([[1, 2], [3, 4]])), [1, 2, 3, 4]) - # The next 4 tests pickle-depickle all forms into which builtin python - # methods can appear. + # The next 4 tests cover all cases into which builtin python methods can + # appear. # There are 4 kinds of method: 'classic' methods, classmethods, # staticmethods and slotmethods. They will appear under different types # depending on whether they are called from the __dict__ of their From 106700a45672452978cd30ffcce0c3bb1422647a Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Fri, 17 May 2019 16:11:47 +0200 Subject: [PATCH 20/21] TST clearly separate test-cases --- tests/cloudpickle_test.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index ba8a99e05..b03d4a410 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -655,26 +655,17 @@ def test_builtin_function(self): @pytest.mark.skipif(platform.python_implementation() == 'PyPy' and sys.version_info[:2] == (3, 5), - reason="bug of pypy3.5 in builtin constructors") - def test_builtin_method(self): - # Note that builtin_function_or_method are special-cased by cloudpickle - # only in python2. + reason="bug of pypy3.5 in builtin-type constructors") + def test_builtin_type_constructor(self): + # Due to a bug in pypy3.5, cloudpickling builtin-type constructors + # fails. This test makes sure that cloudpickling builtin-type + # constructors works for all other python versions/implementation. # pickle_depickle some builtin methods of the __builtin__ module for t in list, tuple, set, frozenset, dict, object: cloned_new = pickle_depickle(t.__new__, protocol=self.protocol) assert isinstance(cloned_new(t), t) - # pickle_depickle a method of a "regular" module - fi = itertools.chain.from_iterable - fi_depickled = pickle_depickle(fi, protocol=self.protocol) - - # `fi is fi_depickled` would return False, but so will - # `itertools.chain.from_iterable is itertools.chain.from_iterable`. - # Instead of testing physical equality, check that `fi_depickled` - # behaves as expected. - self.assertEqual(list(fi_depickled([[1, 2], [3, 4]])), [1, 2, 3, 4]) - # The next 4 tests cover all cases into which builtin python methods can # appear. # There are 4 kinds of method: 'classic' methods, classmethods, From 43e358b99f0d57014e40781f38394e86237baf94 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Fri, 17 May 2019 16:28:23 +0200 Subject: [PATCH 21/21] MNT changelog --- CHANGES.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index ee0101c67..906688259 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,9 @@ +1.2.0 +===== + +- Support pickling of classmethod and staticmethod objects in python2. + arguments. ([issue #262](https://github.com/cloudpipe/cloudpickle/pull/262)) + 1.1.0 =====