Skip to content

Refactor builtin method pickling #262

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
6d9e7e7
MNT refactor handling of builtin_function_or_method
pierreglaser Apr 18, 2019
d741d1e
TST refactor builtin_function pickling tests
pierreglaser Apr 18, 2019
5a0081d
CLN dont skip test on py3, better comments, typo
pierreglaser Apr 18, 2019
5a9c93c
Change assertion on __new__ to pass with PyPy
ogrisel Apr 19, 2019
9d9c25a
skip __new__ constructor test on pypy3.5
pierreglaser Apr 30, 2019
e578356
CLN refactor instancemthod pickling (not builtin)
pierreglaser May 6, 2019
c5457b0
TST test all possible builtin method flavours
pierreglaser May 6, 2019
9d5f0f4
TST correct classmethod_descriptor test note
pierreglaser May 6, 2019
ac5ae5d
CLN refactor method_descriptor pickling
pierreglaser May 6, 2019
ce5bf75
TST fix python2 edge cases
pierreglaser May 6, 2019
d212c10
CLN merge all builtin method saving funcs into one
pierreglaser May 6, 2019
d4447d4
TST add some PyPy specific tests/comments
pierreglaser May 7, 2019
e1b59c8
TST stronger identity assertions
pierreglaser May 7, 2019
8365991
DOC clearer comments
pierreglaser May 7, 2019
0a36b98
TST neatier assertions
pierreglaser May 7, 2019
504d0c9
CLN rollback unrelated code change
pierreglaser May 7, 2019
16e79b5
CLN comments
pierreglaser May 17, 2019
8b2f390
TST renaming in tests
pierreglaser May 17, 2019
dc79489
CLN typos and phrasing
pierreglaser May 17, 2019
106700a
TST clearly separate test-cases
pierreglaser May 17, 2019
6c60eaf
Merge remote-tracking branch 'upstream/master' into refactor-builtin_…
pierreglaser May 17, 2019
43e358b
MNT changelog
pierreglaser May 17, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
1.2.0
=====

- Support pickling of classmethod and staticmethod objects in python2.
arguments. ([issue #262](https://github.com/cloudpipe/cloudpickle/pull/262))

1.1.0
=====

136 changes: 52 additions & 84 deletions cloudpickle/cloudpickle.py
Original file line number Diff line number Diff line change
@@ -116,6 +116,16 @@ def _lookup_class_or_track(class_tracker_id, class_def):
_DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
return class_def

if PY3:
from pickle import _getattribute
else:
# pickle._getattribute is a python3 addition and enchancement of getattr,
# that can handle dotted attribute names. In cloudpickle for python2,
# handling dotted names is not needed, so we simply define _getattribute as
# a wrapper around getattr.
def _getattribute(obj, name):
return getattr(obj, name, None), None


def _make_cell_set_template_code():
"""Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF
@@ -244,32 +254,6 @@ def _builtin_type(name):
return getattr(types, name)


def _make__new__factory(type_):
def _factory():
return type_.__new__
return _factory


# NOTE: These need to be module globals so that they're pickleable as globals.
_get_dict_new = _make__new__factory(dict)
_get_frozenset_new = _make__new__factory(frozenset)
_get_list_new = _make__new__factory(list)
_get_set_new = _make__new__factory(set)
_get_tuple_new = _make__new__factory(tuple)
_get_object_new = _make__new__factory(object)

# Pre-defined set of builtin_function_or_method instances that can be
# serialized.
_BUILTIN_TYPE_CONSTRUCTORS = {
dict.__new__: _get_dict_new,
frozenset.__new__: _get_frozenset_new,
set.__new__: _get_set_new,
list.__new__: _get_list_new,
tuple.__new__: _get_tuple_new,
object.__new__: _get_object_new,
}


if sys.version_info < (3, 4): # pragma: no branch
def _walk_global_ops(code):
"""
@@ -423,28 +407,12 @@ def save_function(self, obj, name=None):
Determines what kind of function obj is (e.g. lambda, defined at
interactive prompt, etc) and handles the pickling appropriately.
"""
try:
should_special_case = obj in _BUILTIN_TYPE_CONSTRUCTORS
except TypeError:
# Methods of builtin types aren't hashable in python 2.
should_special_case = False

if should_special_case:
# We keep a special-cased cache of built-in type constructors at
# global scope, because these functions are structured very
# differently in different python versions and implementations (for
# example, they're instances of types.BuiltinFunctionType in
# CPython, but they're ordinary types.FunctionType instances in
# PyPy).
#
# If the function we've received is in that cache, we just
# serialize it as a lookup into the cache.
return self.save_reduce(_BUILTIN_TYPE_CONSTRUCTORS[obj], (), obj=obj)

write = self.write

if name is None:
name = obj.__name__
name = getattr(obj, '__qualname__', None)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change solves a related bug, but it deserves a different PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you be more explicit? Maybe you could include a non-regression test directly in this PR and document it the changelog?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

precisely, python3 does not make a difference between unbound methods and functions.

Consider pickle._Pickler.dump: this is a non-builtin unbound method.
The bug is was mentioning is that cloudpickle master currently fails at detecting this as global, and instead pickles this dynamically:

import cloudpickle
import pickle

cloudpickle.dumps(pickle._Pickler.dump)

gives:

Out[4]: b'\x80\x04\x95c\x03\x00\x00\x00\x00\x00\x00\x8c\x17cloudpickle.cloudpickle\x94\x8c\x0e_fill_function\x94\x93\x94(h\x00\x8c\x0f_                                                                                                                                        
make_skel_func\x94\x93\x94h\x00\x8c\r_builtin_type\x94\x93\x94\x8c\x08CodeType\x94\x85\x94R\x94(K\x02K\x00K\x02K\x06KCCrt\x00|\x00d\x01                                                                                                                                        
\x83\x02s\x1ct\x01d\x02|\x00j\x02j\x03f\x01\x16\x00\x83\x01\x82\x01|\x00j\x04d\x03k\x05r<|\x00\xa0\x05t\x06t\x07d\x04|\x00j\x04\x83\x02                                                                                                                                        
\x17\x00\xa1\x01\x01\x00|\x00j\x04d\x05k\x05rP|\x00j\x08\xa0\t\xa1\x00\x01\x00|\x00\xa0\n|\x01\xa1\x01\x01\x00|\x00\xa0\x05t\x0b\xa1\x0                                                                                                                                        
1\x01\x00|\x00j\x08\xa0\x0c\xa1\x00\x01\x00d\x06S\x00\x94(\x8c7Write a pickled representation of obj to the open file.\x94\x8c\x0b_file                                                                                                                                        
_write\x94\x8c2Pickler.__init__() was not called by %s.__init__()\x94K\x02\x8c\x02<B\x94K\x04Nt\x94(\x8c\x07hasattr\x94\x8c\rPicklingEr                                                                                                                                        
ror\x94\x8c\t__class__\x94\x8c\x08__name__\x94\x8c\x05proto\x94\x8c\x05write\x94\x8c\x05PROTO\x94\x8c\x04pack\x94\x8c\x06framer\x94\x8c                                                                                                                                        
\rstart_framing\x94\x8c\x04save\x94\x8c\x04STOP\x94\x8c\x0bend_framing\x94t\x94\x8c\x04self\x94\x8c\x03obj\x94\x86\x94\x8c\x1c/usr/lib/                                                                                                                                        
python3.7/pickle.py\x94\x8c\x04dump\x94M\xaa\x01C\x14\x00\x04\n\x01\x04\x01\x0e\x01\n\x01\x16\x01\n\x01\n\x01\n\x01\n\x01\x94))t\x94R\x                                                                                                                                        
94J\xff\xff\xff\xff}\x94(\x8c\x0b__package__\x94\x8c\x00\x94h\x13\x8c\x06pickle\x94\x8c\x08__file__\x94\x8c\x1c/usr/lib/python3.7/pickl                                                                                                                                        
e.py\x94u\x87\x94R\x94}\x94(\x8c\x07globals\x94}\x94(h\x17\x8c\x07_struct\x94\x8c\x04pack\x94\x93\x94h\x1bC\x01.\x94h\x16C\x01\x80\x94h                                                                                                                                        
\x11\x8c\x07_pickle\x94\x8c\rPicklingError\x94\x93\x94u\x8c\x08defaults\x94N\x8c\x04dict\x94}\x94\x8c\x0eclosure_values\x94N\x8c\x06mod                                                                                                                                        
ule\x94h)\x8c\x04name\x94h"\x8c\x03doc\x94h\x0b\x8c\x0bannotations\x94}\x94\x8c\x08qualname\x94\x8c\r_Pickler.dump\x94utR.'

The reason being that pickle._Pickler.dump.__name__ is simply dump, whereas to retrieve this function, we need to get the qualified name of this function, namely _Pickler.dump

If we use __qualname__ instead of __name__, we cannot use getattr though, because getattr does not accept dotted path. Instead, we need to use pickle._getattribute.

Copy link
Contributor

@ogrisel ogrisel Apr 18, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we use qualname instead of name, we cannot use getattr though, because getattr does not accept dotted path. Instead, we need to use pickle._getattribute.

But this (using getattr) is what you are doing here, no?

Sorry, I misread what you said. That sounds good.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So to summarize, this can only be (easily) fixed in Python 3?

I think we still need a specific test for this fix.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in python2, unbound methods are methods, and get serialized using save_instancemethod:

In [57]: isinstance(pickle.Pickler.dump, types.MethodType)
Out[57]: True

if name is None:
name = getattr(obj, '__name__', None)
try:
# whichmodule() could fail, see
# https://bitbucket.org/gutworth/six/issues/63/importing-six-breaks-pickling
@@ -462,31 +430,14 @@ def save_function(self, obj, name=None):
themodule = None

try:
lookedup_by_name = getattr(themodule, name, None)
lookedup_by_name, _ = _getattribute(themodule, name)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

except Exception:
lookedup_by_name = None

if themodule:
if lookedup_by_name is obj:
return self.save_global(obj, name)

# a builtin_function_or_method which comes in as an attribute of some
# object (e.g., itertools.chain.from_iterable) will end
# up with modname "__main__" and so end up here. But these functions
# have no __code__ attribute in CPython, so the handling for
# user-defined functions below will fail.
# So we pickle them here using save_reduce; have to do it differently
# for different python versions.
if not hasattr(obj, '__code__'):
if PY3: # pragma: no branch
rv = obj.__reduce_ex__(self.proto)
else:
if hasattr(obj, '__self__'):
rv = (getattr, (obj.__self__, name))
else:
raise pickle.PicklingError("Can't pickle %r" % obj)
return self.save_reduce(obj=obj, *rv)

# if func is lambda, def'ed at prompt, is in main, or is nested, then
# we'll pickle the actual function object rather than simply saving a
# reference (as is done in default pickler), via save_function_tuple.
@@ -813,12 +764,44 @@ def extract_func_data(self, func):

return (code, f_globals, defaults, closure, dct, base_globals)

def save_builtin_function(self, obj):
if obj.__module__ == "__builtin__":
return self.save_global(obj)
return self.save_function(obj)

dispatch[types.BuiltinFunctionType] = save_builtin_function
if not PY3: # pragma: no branch
# Python3 comes with native reducers that allow builtin functions and
# methods pickling as module/class attributes. The following method
# extends this for python2.
# Please note that currently, neither pickle nor cloudpickle support
# dynamically created builtin functions/method pickling.
def save_builtin_function_or_method(self, obj):
is_bound = getattr(obj, '__self__', None) is not None
if is_bound:
# obj is a bound builtin method.
rv = (getattr, (obj.__self__, obj.__name__))
return self.save_reduce(obj=obj, *rv)

is_unbound = hasattr(obj, '__objclass__')
if is_unbound:
# obj is an unbound builtin method (accessed from its class)
rv = (getattr, (obj.__objclass__, obj.__name__))
return self.save_reduce(obj=obj, *rv)

# Otherwise, obj is not a method, but a function. Fallback to
# default pickling by attribute.
return Pickler.save_global(self, obj)

dispatch[types.BuiltinFunctionType] = save_builtin_function_or_method

# A comprehensive summary of the various kinds of builtin methods can
# be found in PEP 579: https://www.python.org/dev/peps/pep-0579/
classmethod_descriptor_type = type(float.__dict__['fromhex'])
wrapper_descriptor_type = type(float.__repr__)
method_wrapper_type = type(1.5.__repr__)

dispatch[classmethod_descriptor_type] = save_builtin_function_or_method
dispatch[wrapper_descriptor_type] = save_builtin_function_or_method
dispatch[method_wrapper_type] = save_builtin_function_or_method

if sys.version_info[:2] < (3, 4):
method_descriptor = type(str.upper)
dispatch[method_descriptor] = save_builtin_function_or_method

def save_global(self, obj, name=None, pack=struct.pack):
"""
@@ -1345,18 +1328,3 @@ def _is_dynamic(module):
except ImportError:
return True
return False


""" Use copy_reg to extend global pickle definitions """

if sys.version_info < (3, 4): # pragma: no branch
method_descriptor = type(str.upper)

def _reduce_method_descriptor(obj):
return (getattr, (obj.__objclass__, obj.__name__))

try:
import copy_reg as copyreg
except ImportError:
import copyreg
copyreg.pickle(method_descriptor, _reduce_method_descriptor)
161 changes: 145 additions & 16 deletions tests/cloudpickle_test.py
Original file line number Diff line number Diff line change
@@ -641,14 +641,151 @@ def test_NotImplementedType(self):
res = pickle_depickle(type(NotImplemented), protocol=self.protocol)
self.assertEqual(type(NotImplemented), res)

def test_builtin_function_without_module(self):
on = object.__new__
on_depickled = pickle_depickle(on, protocol=self.protocol)
self.assertEqual(type(on_depickled(object)), type(object()))

fi = itertools.chain.from_iterable
fi_depickled = pickle_depickle(fi, protocol=self.protocol)
self.assertEqual(list(fi_depickled([[1, 2], [3, 4]])), [1, 2, 3, 4])
def test_builtin_function(self):
# Note that builtin_function_or_method are special-cased by cloudpickle
# only in python2.

# builtin function from the __builtin__ module
assert pickle_depickle(zip, protocol=self.protocol) is zip

from sys import getcheckinterval
# builtin function from a "regular" module
assert pickle_depickle(
getcheckinterval, protocol=self.protocol) is getcheckinterval

@pytest.mark.skipif(platform.python_implementation() == 'PyPy' and
sys.version_info[:2] == (3, 5),
reason="bug of pypy3.5 in builtin-type constructors")
def test_builtin_type_constructor(self):
# Due to a bug in pypy3.5, cloudpickling builtin-type constructors
# fails. This test makes sure that cloudpickling builtin-type
# constructors works for all other python versions/implementation.

# pickle_depickle some builtin methods of the __builtin__ module
for t in list, tuple, set, frozenset, dict, object:
cloned_new = pickle_depickle(t.__new__, protocol=self.protocol)
assert isinstance(cloned_new(t), t)

# The next 4 tests cover all cases into which builtin python methods can
# appear.
# There are 4 kinds of method: 'classic' methods, classmethods,
# staticmethods and slotmethods. They will appear under different types
# depending on whether they are called from the __dict__ of their
# class, their class itself, or an instance of their class. This makes
# 12 total combinations.
# This discussion and the following tests are relevant for the CPython
# implementation only. In PyPy, there is no builtin method or builtin
# function types/flavours. The only way into which a builtin method can be
# identified is with it's builtin-code __code__ attribute.

def test_builtin_classicmethod(self):
obj = 1.5 # float object

bound_classicmethod = obj.hex # builtin_function_or_method
unbound_classicmethod = type(obj).hex # method_descriptor
clsdict_classicmethod = type(obj).__dict__['hex'] # method_descriptor

assert unbound_classicmethod is clsdict_classicmethod

depickled_bound_meth = pickle_depickle(
bound_classicmethod, protocol=self.protocol)
depickled_unbound_meth = pickle_depickle(
unbound_classicmethod, protocol=self.protocol)
depickled_clsdict_meth = pickle_depickle(
clsdict_classicmethod, protocol=self.protocol)

# No identity on the bound methods they are bound to different float
# instances
assert depickled_bound_meth() == bound_classicmethod()
assert depickled_unbound_meth is unbound_classicmethod
assert depickled_clsdict_meth is clsdict_classicmethod


def test_builtin_classmethod(self):
obj = 1.5 # float object

bound_clsmethod = obj.fromhex # builtin_function_or_method
unbound_clsmethod = type(obj).fromhex # builtin_function_or_method
clsdict_clsmethod = type(
obj).__dict__['fromhex'] # classmethod_descriptor

depickled_bound_meth = pickle_depickle(
bound_clsmethod, protocol=self.protocol)
depickled_unbound_meth = pickle_depickle(
unbound_clsmethod, protocol=self.protocol)
depickled_clsdict_meth = pickle_depickle(
clsdict_clsmethod, protocol=self.protocol)

# float.fromhex takes a string as input.
arg = "0x1"

# Identity on both the bound and the unbound methods cannot be
# tested: the bound methods are bound to different objects, and the
# unbound methods are actually recreated at each call.
assert depickled_bound_meth(arg) == bound_clsmethod(arg)
assert depickled_unbound_meth(arg) == unbound_clsmethod(arg)

if platform.python_implementation() == 'CPython':
# Roundtripping a classmethod_descriptor results in a
# builtin_function_or_method (CPython upstream issue).
assert depickled_clsdict_meth(arg) == clsdict_clsmethod(float, arg)
if platform.python_implementation() == 'PyPy':
# builtin-classmethods are simple classmethod in PyPy (not
# callable). We test equality of types and the functionality of the
# __func__ attribute instead. We do not test the the identity of
# the functions as __func__ attributes of classmethods are not
# pickleable and must be reconstructed at depickling time.
assert type(depickled_clsdict_meth) == type(clsdict_clsmethod)
assert depickled_clsdict_meth.__func__(
float, arg) == clsdict_clsmethod.__func__(float, arg)

def test_builtin_slotmethod(self):
obj = 1.5 # float object

bound_slotmethod = obj.__repr__ # method-wrapper
unbound_slotmethod = type(obj).__repr__ # wrapper_descriptor
clsdict_slotmethod = type(obj).__dict__['__repr__'] # ditto

depickled_bound_meth = pickle_depickle(
bound_slotmethod, protocol=self.protocol)
depickled_unbound_meth = pickle_depickle(
unbound_slotmethod, protocol=self.protocol)
depickled_clsdict_meth = pickle_depickle(
clsdict_slotmethod, protocol=self.protocol)

# No identity tests on the bound slotmethod are they are bound to
# different float instances
assert depickled_bound_meth() == bound_slotmethod()
assert depickled_unbound_meth is unbound_slotmethod
assert depickled_clsdict_meth is clsdict_slotmethod

@pytest.mark.skipif(
platform.python_implementation() == "PyPy" or
sys.version_info[:1] < (3,),
reason="No known staticmethod example in the python 2 / pypy stdlib")
def test_builtin_staticmethod(self):
obj = "foo" # str object

bound_staticmethod = obj.maketrans # builtin_function_or_method
unbound_staticmethod = type(obj).maketrans # ditto
clsdict_staticmethod = type(obj).__dict__['maketrans'] # staticmethod

assert bound_staticmethod is unbound_staticmethod

depickled_bound_meth = pickle_depickle(
bound_staticmethod, protocol=self.protocol)
depickled_unbound_meth = pickle_depickle(
unbound_staticmethod, protocol=self.protocol)
depickled_clsdict_meth = pickle_depickle(
clsdict_staticmethod, protocol=self.protocol)

assert depickled_bound_meth is bound_staticmethod
assert depickled_unbound_meth is unbound_staticmethod

# staticmethod objects are recreated at depickling time, but the
# underlying __func__ object is pickled by attribute.
assert depickled_clsdict_meth.__func__ is clsdict_staticmethod.__func__
type(depickled_clsdict_meth) is type(clsdict_staticmethod)

@pytest.mark.skipif(tornado is None,
reason="test needs Tornado installed")
@@ -964,14 +1101,6 @@ def test_namedtuple(self):
assert isinstance(depickled_t2, MyTuple)
assert depickled_t2 == t2

def test_builtin_type__new__(self):
# Functions occasionally take the __new__ of these types as default
# parameters for factories. For example, on Python 3.3,
# `tuple.__new__` is a default value for some methods of namedtuple.
for t in list, tuple, set, frozenset, dict, object:
cloned = pickle_depickle(t.__new__, protocol=self.protocol)
self.assertTrue(cloned is t.__new__)

def test_interactively_defined_function(self):
# Check that callables defined in the __main__ module of a Python
# script (or jupyter kernel) can be pickled / unpickled / executed.