Skip to content

Commit a20b804

Browse files
ogriseleffigies
andauthored
Make function piclkling deterministic by default (#428)
Co-authored-by: Christopher J. Markiewicz <[email protected]>
1 parent 2a79b69 commit a20b804

File tree

5 files changed

+78
-11
lines changed

5 files changed

+78
-11
lines changed

Diff for: CHANGES.md

+4
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ dev
1515
_is_parametrized_type_hint to limit false positives.
1616
([PR #409](https://github.com/cloudpipe/cloudpickle/pull/409))
1717

18+
- Suppressed a source of non-determinism when pickling dynamically defined
19+
functions and handles the deprecation of co_lnotab in Python 3.10+.
20+
([PR #428](https://github.com/cloudpipe/cloudpickle/pull/428))
21+
1822
1.6.0
1923
=====
2024

Diff for: cloudpickle/cloudpickle.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,10 @@ def _extract_code_globals(co):
236236
out_names = _extract_code_globals_cache.get(co)
237237
if out_names is None:
238238
names = co.co_names
239-
out_names = {names[oparg] for _, oparg in _walk_global_ops(co)}
239+
# We use a dict with None values instead of a set to get a
240+
# deterministic order (assuming Python 3.6+) and avoid introducing
241+
# non-deterministic pickle bytes as a results.
242+
out_names = {names[oparg]: None for _, oparg in _walk_global_ops(co)}
240243

241244
# Declaring a function inside another one using the "def ..."
242245
# syntax generates a constant code object corresponding to the one
@@ -247,7 +250,7 @@ def _extract_code_globals(co):
247250
if co.co_consts:
248251
for const in co.co_consts:
249252
if isinstance(const, types.CodeType):
250-
out_names |= _extract_code_globals(const)
253+
out_names.update(_extract_code_globals(const))
251254

252255
_extract_code_globals_cache[co] = out_names
253256

Diff for: cloudpickle/cloudpickle_fast.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,19 @@ def _enum_getstate(obj):
244244

245245
def _code_reduce(obj):
246246
"""codeobject reducer"""
247-
if hasattr(obj, "co_posonlyargcount"): # pragma: no branch
247+
if hasattr(obj, "co_linetable"): # pragma: no branch
248+
# Python 3.10 and later: obj.co_lnotab is deprecated and constructor
249+
# expects obj.co_linetable instead.
250+
args = (
251+
obj.co_argcount, obj.co_posonlyargcount,
252+
obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize,
253+
obj.co_flags, obj.co_code, obj.co_consts, obj.co_names,
254+
obj.co_varnames, obj.co_filename, obj.co_name,
255+
obj.co_firstlineno, obj.co_linetable, obj.co_freevars,
256+
obj.co_cellvars
257+
)
258+
elif hasattr(obj, "co_posonlyargcount"):
259+
# Backward compat for 3.9 and older
248260
args = (
249261
obj.co_argcount, obj.co_posonlyargcount,
250262
obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize,
@@ -254,6 +266,7 @@ def _code_reduce(obj):
254266
obj.co_cellvars
255267
)
256268
else:
269+
# Backward compat for even older versions of Python
257270
args = (
258271
obj.co_argcount, obj.co_kwonlyargcount, obj.co_nlocals,
259272
obj.co_stacksize, obj.co_flags, obj.co_code, obj.co_consts,

Diff for: tests/cloudpickle_test.py

+31-2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import logging
1111
import math
1212
from operator import itemgetter, attrgetter
13+
import pickletools
1314
import platform
1415
import random
1516
import shutil
@@ -50,13 +51,15 @@
5051
from cloudpickle.cloudpickle import _lookup_module_and_qualname
5152

5253
from .testutils import subprocess_pickle_echo
54+
from .testutils import subprocess_pickle_string
5355
from .testutils import assert_run_python_script
5456
from .testutils import subprocess_worker
5557

5658
from _cloudpickle_testpkg import relative_imports_factory
5759

5860

5961
_TEST_GLOBAL_VARIABLE = "default_value"
62+
_TEST_GLOBAL_VARIABLE2 = "another_value"
6063

6164

6265
class RaiserOnPickle(object):
@@ -2095,8 +2098,8 @@ def inner_function():
20952098
return _TEST_GLOBAL_VARIABLE
20962099
return inner_function
20972100

2098-
globals_ = cloudpickle.cloudpickle._extract_code_globals(
2099-
function_factory.__code__)
2101+
globals_ = set(cloudpickle.cloudpickle._extract_code_globals(
2102+
function_factory.__code__).keys())
21002103
assert globals_ == {'_TEST_GLOBAL_VARIABLE'}
21012104

21022105
depickled_factory = pickle_depickle(function_factory,
@@ -2330,6 +2333,32 @@ def __type__(self):
23302333
o = MyClass()
23312334
pickle_depickle(o, protocol=self.protocol)
23322335

2336+
@pytest.mark.skipif(
2337+
sys.version_info < (3, 7),
2338+
reason="Determinism can only be guaranteed for Python 3.7+"
2339+
)
2340+
def test_deterministic_pickle_bytes_for_function(self):
2341+
# Ensure that functions with references to several global names are
2342+
# pickled to fixed bytes that do not depend on the PYTHONHASHSEED of
2343+
# the Python process.
2344+
vals = set()
2345+
2346+
def func_with_globals():
2347+
return _TEST_GLOBAL_VARIABLE + _TEST_GLOBAL_VARIABLE2
2348+
2349+
for i in range(5):
2350+
vals.add(
2351+
subprocess_pickle_string(func_with_globals,
2352+
protocol=self.protocol,
2353+
add_env={"PYTHONHASHSEED": str(i)}))
2354+
if len(vals) > 1:
2355+
# Print additional debug info on stdout with dis:
2356+
for val in vals:
2357+
pickletools.dis(val)
2358+
pytest.fail(
2359+
"Expected a single deterministic payload, got %d/5" % len(vals)
2360+
)
2361+
23332362

23342363
class Protocol2CloudPickleTest(CloudPickleTest):
23352364

Diff for: tests/testutils.py

+24-6
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import os
33
import os.path as op
44
import tempfile
5-
import base64
65
from subprocess import Popen, check_output, PIPE, STDOUT, CalledProcessError
76
from cloudpickle.compat import pickle
87
from contextlib import contextmanager
@@ -38,15 +37,16 @@ def _make_cwd_env():
3837
return cloudpickle_repo_folder, env
3938

4039

41-
def subprocess_pickle_echo(input_data, protocol=None, timeout=TIMEOUT):
42-
"""Echo function with a child Python process
40+
def subprocess_pickle_string(input_data, protocol=None, timeout=TIMEOUT,
41+
add_env=None):
42+
"""Retrieve pickle string of an object generated by a child Python process
4343
4444
Pickle the input data into a buffer, send it to a subprocess via
4545
stdin, expect the subprocess to unpickle, re-pickle that data back
4646
and send it back to the parent process via stdout for final unpickling.
4747
48-
>>> subprocess_pickle_echo([1, 'a', None])
49-
[1, 'a', None]
48+
>>> testutils.subprocess_pickle_string([1, 'a', None], protocol=2)
49+
b'\x80\x02]q\x00(K\x01X\x01\x00\x00\x00aq\x01Ne.'
5050
5151
"""
5252
# run then pickle_echo(protocol=protocol) in __main__:
@@ -56,6 +56,8 @@ def subprocess_pickle_echo(input_data, protocol=None, timeout=TIMEOUT):
5656
# which is deprecated in python 3.8
5757
cmd = [sys.executable, '-W ignore', __file__, "--protocol", str(protocol)]
5858
cwd, env = _make_cwd_env()
59+
if add_env:
60+
env.update(add_env)
5961
proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=cwd, env=env,
6062
bufsize=4096)
6163
pickle_string = dumps(input_data, protocol=protocol)
@@ -67,14 +69,30 @@ def subprocess_pickle_echo(input_data, protocol=None, timeout=TIMEOUT):
6769
message = "Subprocess returned %d: " % proc.returncode
6870
message += err.decode('utf-8')
6971
raise RuntimeError(message)
70-
return loads(out)
72+
return out
7173
except TimeoutExpired as e:
7274
proc.kill()
7375
out, err = proc.communicate()
7476
message = u"\n".join([out.decode('utf-8'), err.decode('utf-8')])
7577
raise RuntimeError(message) from e
7678

7779

80+
def subprocess_pickle_echo(input_data, protocol=None, timeout=TIMEOUT,
81+
add_env=None):
82+
"""Echo function with a child Python process
83+
Pickle the input data into a buffer, send it to a subprocess via
84+
stdin, expect the subprocess to unpickle, re-pickle that data back
85+
and send it back to the parent process via stdout for final unpickling.
86+
>>> subprocess_pickle_echo([1, 'a', None])
87+
[1, 'a', None]
88+
"""
89+
out = subprocess_pickle_string(input_data,
90+
protocol=protocol,
91+
timeout=timeout,
92+
add_env=add_env)
93+
return loads(out)
94+
95+
7896
def _read_all_bytes(stream_in, chunk_size=4096):
7997
all_data = b""
8098
while True:

0 commit comments

Comments
 (0)