Skip to content

Commit 1d73b39

Browse files
authored
Global variables handling in dynamically defined functions. (#205)
1 parent 5c781be commit 1d73b39

File tree

3 files changed

+164
-2
lines changed

3 files changed

+164
-2
lines changed

Diff for: CHANGES.md

+5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
master
22
======
33

4+
- Ensure that unpickling a function defined in a dynamic module several times
5+
sequentially does not reset the values of global variables.
6+
([issue #187](https://github.com/cloudpipe/cloudpickle/issues/205))
7+
8+
49
0.5.6
510
=====
611

Diff for: cloudpickle/cloudpickle.py

+23-2
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,22 @@
7878
PY3 = True
7979

8080

81+
# Container for the global namespace to ensure consistent unpickling of
82+
# functions defined in dynamic modules (modules not registed in sys.modules).
83+
_dynamic_modules_globals = weakref.WeakValueDictionary()
84+
85+
86+
class _DynamicModuleFuncGlobals(dict):
87+
"""Global variables referenced by a function defined in a dynamic module
88+
89+
To avoid leaking references we store such context in a WeakValueDictionary
90+
instance. However instances of python builtin types such as dict cannot
91+
be used directly as values in such a construct, hence the need for a
92+
derived class.
93+
"""
94+
pass
95+
96+
8197
def _make_cell_set_template_code():
8298
"""Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF
8399
@@ -1090,12 +1106,17 @@ def _make_skel_func(code, cell_count, base_globals=None):
10901106
if base_globals is None:
10911107
base_globals = {}
10921108
elif isinstance(base_globals, str):
1109+
base_globals_name = base_globals
10931110
if sys.modules.get(base_globals, None) is not None:
1094-
# this checks if we can import the previous environment the object
1111+
# This checks if we can import the previous environment the object
10951112
# lived in
10961113
base_globals = vars(sys.modules[base_globals])
10971114
else:
1098-
base_globals = {}
1115+
base_globals = _dynamic_modules_globals.get(
1116+
base_globals_name, None)
1117+
if base_globals is None:
1118+
base_globals = _DynamicModuleFuncGlobals()
1119+
_dynamic_modules_globals[base_globals_name] = base_globals
10991120

11001121
base_globals['__builtins__'] = __builtins__
11011122

Diff for: tests/cloudpickle_test.py

+136
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import collections
55
import base64
66
import functools
7+
import gc
78
import imp
89
from io import BytesIO
910
import itertools
@@ -44,6 +45,7 @@
4445

4546
import cloudpickle
4647
from cloudpickle.cloudpickle import _find_module, _make_empty_cell, cell_set
48+
from cloudpickle.cloudpickle import _dynamic_modules_globals
4749

4850
from .testutils import subprocess_pickle_echo
4951
from .testutils import assert_run_python_script
@@ -441,6 +443,59 @@ def method(self, x):
441443
mod1, mod2 = pickle_depickle([mod, mod])
442444
self.assertEqual(id(mod1), id(mod2))
443445

446+
def test_dynamic_modules_globals(self):
447+
# _dynamic_modules_globals is a WeakValueDictionary, so if a value
448+
# in this dict (containing a set of global variables from a dynamic
449+
# module created in the parent process) has no other reference than in
450+
# this dict in the child process, it will be garbage collected.
451+
452+
# We first create a module
453+
mod = imp.new_module('mod')
454+
code = '''
455+
x = 1
456+
def func():
457+
return
458+
'''
459+
exec(textwrap.dedent(code), mod.__dict__)
460+
461+
pickled_module_path = 'mod_f.pkl'
462+
463+
child_process_script = '''
464+
import pickle
465+
from cloudpickle.cloudpickle import _dynamic_modules_globals
466+
import gc
467+
with open("{pickled_module_path}", 'rb') as f:
468+
func = pickle.load(f)
469+
470+
# A dictionnary storing the globals of the newly unpickled function
471+
# should have been created
472+
assert list(_dynamic_modules_globals.keys()) == ['mod']
473+
474+
# func.__globals__ is the only non-weak reference to
475+
# _dynamic_modules_globals['mod']. By deleting func, we delete also
476+
# _dynamic_modules_globals['mod']
477+
del func
478+
gc.collect()
479+
480+
# There is no reference to the globals of func since func has been
481+
# deleted and _dynamic_modules_globals is a WeakValueDictionary,
482+
# so _dynamic_modules_globals should now be empty
483+
assert list(_dynamic_modules_globals.keys()) == []
484+
'''
485+
486+
child_process_script = child_process_script.format(
487+
pickled_module_path=pickled_module_path)
488+
489+
try:
490+
with open(pickled_module_path, 'wb') as f:
491+
cloudpickle.dump(mod.func, f)
492+
493+
assert_run_python_script(textwrap.dedent(child_process_script))
494+
495+
finally:
496+
os.unlink(pickled_module_path)
497+
498+
444499
def test_load_dynamic_module_in_grandchild_process(self):
445500
# Make sure that when loaded, a dynamic module preserves its dynamic
446501
# property. Otherwise, this will lead to an ImportError if pickled in
@@ -1018,6 +1073,87 @@ def f1():
10181073
finally:
10191074
_TEST_GLOBAL_VARIABLE = orig_value
10201075

1076+
def test_function_from_dynamic_module_with_globals_modifications(self):
1077+
# This test verifies that the global variable state of a function
1078+
# defined in a dynamic module in a child process are not reset by
1079+
# subsequent uplickling.
1080+
1081+
# first, we create a dynamic module in the parent process
1082+
mod = imp.new_module('mod')
1083+
code = '''
1084+
GLOBAL_STATE = "initial value"
1085+
1086+
def func_defined_in_dynamic_module(v=None):
1087+
global GLOBAL_STATE
1088+
if v is not None:
1089+
GLOBAL_STATE = v
1090+
return GLOBAL_STATE
1091+
'''
1092+
exec(textwrap.dedent(code), mod.__dict__)
1093+
1094+
try:
1095+
# Simple sanity check on the function's output
1096+
assert mod.func_defined_in_dynamic_module() == "initial value"
1097+
1098+
# The function of mod is pickled two times, with two different
1099+
# values for the global variable GLOBAL_STATE.
1100+
# Then we launch a child process that sequentially unpickles the
1101+
# two functions. Those unpickle functions should share the same
1102+
# global variables in the child process:
1103+
# Once the first function gets unpickled, mod is created and
1104+
# tracked in the child environment. This is state is preserved
1105+
# when unpickling the second function whatever the global variable
1106+
# GLOBAL_STATE's value at the time of pickling.
1107+
1108+
with open('function_with_initial_globals.pkl', 'wb') as f:
1109+
cloudpickle.dump(mod.func_defined_in_dynamic_module, f)
1110+
1111+
# Change the mod's global variable
1112+
mod.GLOBAL_STATE = 'changed value'
1113+
1114+
# At this point, mod.func_defined_in_dynamic_module()
1115+
# returns the updated value. Let's pickle it again.
1116+
assert mod.func_defined_in_dynamic_module() == 'changed value'
1117+
with open('function_with_modified_globals.pkl', 'wb') as f:
1118+
cloudpickle.dump(mod.func_defined_in_dynamic_module, f)
1119+
1120+
child_process_code = """
1121+
import pickle
1122+
1123+
with open('function_with_initial_globals.pkl','rb') as f:
1124+
func_with_initial_globals = pickle.load(f)
1125+
1126+
# At this point, a module called 'mod' should exist in
1127+
# _dynamic_modules_globals. Further function loading
1128+
# will use the globals living in mod.
1129+
1130+
assert func_with_initial_globals() == 'initial value'
1131+
1132+
# Load a function with initial global variable that was
1133+
# pickled after a change in the global variable
1134+
with open('function_with_modified_globals.pkl','rb') as f:
1135+
func_with_modified_globals = pickle.load(f)
1136+
1137+
# assert the this unpickling did not modify the value of
1138+
# the local
1139+
assert func_with_modified_globals() == 'initial value'
1140+
1141+
# Update the value from the child process and check that
1142+
# unpickling again does not reset our change.
1143+
assert func_with_initial_globals('new value') == 'new value'
1144+
assert func_with_modified_globals() == 'new value'
1145+
1146+
with open('function_with_initial_globals.pkl','rb') as f:
1147+
func_with_initial_globals = pickle.load(f)
1148+
assert func_with_initial_globals() == 'new value'
1149+
assert func_with_modified_globals() == 'new value'
1150+
"""
1151+
assert_run_python_script(textwrap.dedent(child_process_code))
1152+
1153+
finally:
1154+
os.unlink('function_with_initial_globals.pkl')
1155+
os.unlink('function_with_modified_globals.pkl')
1156+
10211157
@pytest.mark.skipif(sys.version_info >= (3, 0),
10221158
reason="hardcoded pickle bytes for 2.7")
10231159
def test_function_pickle_compat_0_4_0(self):

0 commit comments

Comments
 (0)