@@ -88,6 +88,9 @@ def g():
88
88
# communication speed over compatibility:
89
89
DEFAULT_PROTOCOL = pickle .HIGHEST_PROTOCOL
90
90
91
+ # Names of modules whose resources should be treated as dynamic.
92
+ _PICKLE_BY_VALUE_MODULES = set ()
93
+
91
94
# Track the provenance of reconstructed dynamic classes to make it possible to
92
95
# reconstruct instances from the matching singleton class definition when
93
96
# appropriate and preserve the usual "isinstance" semantics of Python objects.
@@ -124,6 +127,77 @@ def _lookup_class_or_track(class_tracker_id, class_def):
124
127
return class_def
125
128
126
129
130
+ def register_pickle_by_value (module ):
131
+ """Register a module to make it functions and classes picklable by value.
132
+
133
+ By default, functions and classes that are attributes of an importable
134
+ module are to be pickled by reference, that is relying on re-importing
135
+ the attribute from the module at load time.
136
+
137
+ If `register_pickle_by_value(module)` is called, all its functions and
138
+ classes are subsequently to be pickled by value, meaning that they can
139
+ be loaded in Python processes where the module is not importable.
140
+
141
+ This is especially useful when developing a module in a distributed
142
+ execution environment: restarting the client Python process with the new
143
+ source code is enough: there is no need to re-install the new version
144
+ of the module on all the worker nodes nor to restart the workers.
145
+
146
+ Note: this feature is considered experimental. See the cloudpickle
147
+ README.md file for more details and limitations.
148
+ """
149
+ if not isinstance (module , types .ModuleType ):
150
+ raise ValueError (
151
+ f"Input should be a module object, got { str (module )} instead"
152
+ )
153
+ # In the future, cloudpickle may need a way to access any module registered
154
+ # for pickling by value in order to introspect relative imports inside
155
+ # functions pickled by value. (see
156
+ # https://github.com/cloudpipe/cloudpickle/pull/417#issuecomment-873684633).
157
+ # This access can be ensured by checking that module is present in
158
+ # sys.modules at registering time and assuming that it will still be in
159
+ # there when accessed during pickling. Another alternative would be to
160
+ # store a weakref to the module. Even though cloudpickle does not implement
161
+ # this introspection yet, in order to avoid a possible breaking change
162
+ # later, we still enforce the presence of module inside sys.modules.
163
+ if module .__name__ not in sys .modules :
164
+ raise ValueError (
165
+ f"{ module } was not imported correctly, have you used an "
166
+ f"`import` statement to access it?"
167
+ )
168
+ _PICKLE_BY_VALUE_MODULES .add (module .__name__ )
169
+
170
+
171
+ def unregister_pickle_by_value (module ):
172
+ """Unregister that the input module should be pickled by value."""
173
+ if not isinstance (module , types .ModuleType ):
174
+ raise ValueError (
175
+ f"Input should be a module object, got { str (module )} instead"
176
+ )
177
+ if module .__name__ not in _PICKLE_BY_VALUE_MODULES :
178
+ raise ValueError (f"{ module } is not registered for pickle by value" )
179
+ else :
180
+ _PICKLE_BY_VALUE_MODULES .remove (module .__name__ )
181
+
182
+
183
+ def list_registry_pickle_by_value ():
184
+ return _PICKLE_BY_VALUE_MODULES .copy ()
185
+
186
+
187
+ def _is_registered_pickle_by_value (module ):
188
+ module_name = module .__name__
189
+ if module_name in _PICKLE_BY_VALUE_MODULES :
190
+ return True
191
+ while True :
192
+ parent_name = module_name .rsplit ("." , 1 )[0 ]
193
+ if parent_name == module_name :
194
+ break
195
+ if parent_name in _PICKLE_BY_VALUE_MODULES :
196
+ return True
197
+ module_name = parent_name
198
+ return False
199
+
200
+
127
201
def _whichmodule (obj , name ):
128
202
"""Find the module an object belongs to.
129
203
@@ -170,18 +244,35 @@ def _whichmodule(obj, name):
170
244
return None
171
245
172
246
173
- def _is_importable (obj , name = None ):
174
- """Dispatcher utility to test the importability of various constructs."""
175
- if isinstance (obj , types .FunctionType ):
176
- return _lookup_module_and_qualname (obj , name = name ) is not None
177
- elif issubclass (type (obj ), type ):
178
- return _lookup_module_and_qualname (obj , name = name ) is not None
247
+ def _should_pickle_by_reference (obj , name = None ):
248
+ """Test whether an function or a class should be pickled by reference
249
+
250
+ Pickling by reference means by that the object (typically a function or a
251
+ class) is an attribute of a module that is assumed to be importable in the
252
+ target Python environment. Loading will therefore rely on importing the
253
+ module and then calling `getattr` on it to access the function or class.
254
+
255
+ Pickling by reference is the only option to pickle functions and classes
256
+ in the standard library. In cloudpickle the alternative option is to
257
+ pickle by value (for instance for interactively or locally defined
258
+ functions and classes or for attributes of modules that have been
259
+ explicitly registered to be pickled by value.
260
+ """
261
+ if isinstance (obj , types .FunctionType ) or issubclass (type (obj ), type ):
262
+ module_and_name = _lookup_module_and_qualname (obj , name = name )
263
+ if module_and_name is None :
264
+ return False
265
+ module , name = module_and_name
266
+ return not _is_registered_pickle_by_value (module )
267
+
179
268
elif isinstance (obj , types .ModuleType ):
180
269
# We assume that sys.modules is primarily used as a cache mechanism for
181
270
# the Python import machinery. Checking if a module has been added in
182
- # is sys.modules therefore a cheap and simple heuristic to tell us whether
183
- # we can assume that a given module could be imported by name in
184
- # another Python process.
271
+ # is sys.modules therefore a cheap and simple heuristic to tell us
272
+ # whether we can assume that a given module could be imported by name
273
+ # in another Python process.
274
+ if _is_registered_pickle_by_value (obj ):
275
+ return False
185
276
return obj .__name__ in sys .modules
186
277
else :
187
278
raise TypeError (
@@ -839,10 +930,15 @@ def _decompose_typevar(obj):
839
930
840
931
841
932
def _typevar_reduce (obj ):
842
- # TypeVar instances have no __qualname__ hence we pass the name explicitly.
933
+ # TypeVar instances require the module information hence why we
934
+ # are not using the _should_pickle_by_reference directly
843
935
module_and_name = _lookup_module_and_qualname (obj , name = obj .__name__ )
936
+
844
937
if module_and_name is None :
845
938
return (_make_typevar , _decompose_typevar (obj ))
939
+ elif _is_registered_pickle_by_value (module_and_name [0 ]):
940
+ return (_make_typevar , _decompose_typevar (obj ))
941
+
846
942
return (getattr , module_and_name )
847
943
848
944
0 commit comments