Skip to content

Commit 48c2068

Browse files
committed
SDK - Controlling which modules are captured with Lightweight components
All func_to_* functions now accept the modules_to_capture parameter: List of module names that will be captured (instead of just referencing) during the dependency scan. By default the func.__module__ is captured.
1 parent 516ec1d commit 48c2068

File tree

4 files changed

+60
-14
lines changed

4 files changed

+60
-14
lines changed

sdk/python/kfp/components/_python_op.py

+25-14
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from ._structures import *
2323

2424
from pathlib import Path
25-
from typing import TypeVar, Generic
25+
from typing import TypeVar, Generic, List
2626

2727
T = TypeVar('T')
2828

@@ -45,16 +45,23 @@ def _python_function_name_to_component_name(name):
4545
return re.sub(' +', ' ', name.replace('_', ' ')).strip(' ').capitalize()
4646

4747

48-
def _capture_function_code_using_cloudpickle(func) -> str:
48+
def _capture_function_code_using_cloudpickle(func, modules_to_capture: List[str] = None) -> str:
4949
import sys
5050
import cloudpickle
5151
import pickle
52+
53+
if modules_to_capture is None:
54+
modules_to_capture = [func.__module__]
55+
5256
# Hack to force cloudpickle to capture the whole function instead of just referencing the code file. See https://github.com/cloudpipe/cloudpickle/blob/74d69d759185edaeeac7bdcb7015cfc0c652f204/cloudpickle/cloudpickle.py#L490
57+
old_modules = {}
5358
try: # Try is needed to restore the state if something goes wrong
54-
old_module = sys.modules.pop(func.__module__)
59+
for module_name in modules_to_capture:
60+
if module_name in sys.modules:
61+
old_modules[module_name] = sys.modules.pop(module_name)
5562
func_pickle = cloudpickle.dumps(func, pickle.DEFAULT_PROTOCOL)
5663
finally:
57-
sys.modules[func.__module__] = old_module
64+
sys.modules.update(old_modules)
5865
func_code = '{func_name} = pickle.loads({func_pickle})'.format(func_name=func.__name__, func_pickle=repr(func_pickle))
5966

6067
code_lines = [
@@ -73,14 +80,15 @@ def _capture_function_code_using_cloudpickle(func) -> str:
7380
return '\n'.join(code_lines)
7481

7582

76-
def _func_to_component_spec(func, extra_code='', base_image=_default_base_image) -> ComponentSpec:
83+
def _func_to_component_spec(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None) -> ComponentSpec:
7784
'''Takes a self-contained python function and converts it to component
7885
7986
Args:
8087
func: Required. The function to be converted
8188
base_image: Optional. Docker image to be used as a base image for the python component. Must have python 3.5+ installed. Default is tensorflow/tensorflow:1.11.0-py3
8289
Note: The image can also be specified by decorating the function with the @python_component decorator. If different base images are explicitly specified in both places, an error is raised.
8390
extra_code: Optional. Python source code that gets placed before the function code. Can be used as workaround to define types used in function signature.
91+
modules_to_capture: Optional. List of module names that will be captured (instead of just referencing) during the dependency scan. By default the func.__module__ is captured.
8492
'''
8593
decorator_base_image = getattr(func, '_component_base_image', None)
8694
if decorator_base_image is not None:
@@ -155,7 +163,7 @@ def annotation_to_type_struct(annotation):
155163

156164
func_name=func.__name__
157165

158-
func_code = _capture_function_code_using_cloudpickle(func)
166+
func_code = _capture_function_code_using_cloudpickle(func, modules_to_capture)
159167

160168
extra_output_external_names = [name + '_file' for name in extra_output_names]
161169

@@ -232,11 +240,11 @@ def annotation_to_type_struct(annotation):
232240
return component_spec
233241

234242

235-
def _func_to_component_dict(func, extra_code='', base_image=_default_base_image):
236-
return _func_to_component_spec(func, extra_code, base_image).to_dict()
243+
def _func_to_component_dict(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None):
244+
return _func_to_component_spec(func, extra_code, base_image, modules_to_capture).to_dict()
237245

238246

239-
def func_to_component_text(func, extra_code='', base_image=_default_base_image):
247+
def func_to_component_text(func, extra_code='', base_image=_default_base_image, modules_to_capture: List[str] = None):
240248
'''
241249
Converts a Python function to a component definition and returns its textual representation
242250
@@ -254,15 +262,16 @@ def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('s
254262
base_image: Optional. Specify a custom Docker container image to use in the component. For lightweight components, the image needs to have python 3.5+. Default is tensorflow/tensorflow:1.11.0-py3
255263
Note: The image can also be specified by decorating the function with the @python_component decorator. If different base images are explicitly specified in both places, an error is raised.
256264
extra_code: Optional. Extra code to add before the function code. Can be used as workaround to define types used in function signature.
265+
modules_to_capture: Optional. List of module names that will be captured (instead of just referencing) during the dependency scan. By default the func.__module__ is captured.
257266
258267
Returns:
259268
Textual representation of a component definition
260269
'''
261-
component_dict = _func_to_component_dict(func, extra_code, base_image)
270+
component_dict = _func_to_component_dict(func, extra_code, base_image, modules_to_capture)
262271
return dump_yaml(component_dict)
263272

264273

265-
def func_to_component_file(func, output_component_file, base_image=_default_base_image, extra_code='') -> None:
274+
def func_to_component_file(func, output_component_file, base_image=_default_base_image, extra_code='', modules_to_capture: List[str] = None) -> None:
266275
'''
267276
Converts a Python function to a component definition and writes it to a file
268277
@@ -281,14 +290,15 @@ def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('s
281290
base_image: Optional. Specify a custom Docker container image to use in the component. For lightweight components, the image needs to have python 3.5+. Default is tensorflow/tensorflow:1.11.0-py3
282291
Note: The image can also be specified by decorating the function with the @python_component decorator. If different base images are explicitly specified in both places, an error is raised.
283292
extra_code: Optional. Extra code to add before the function code. Can be used as workaround to define types used in function signature.
293+
modules_to_capture: Optional. List of module names that will be captured (instead of just referencing) during the dependency scan. By default the func.__module__ is captured.
284294
'''
285295

286-
component_yaml = func_to_component_text(func, extra_code, base_image)
296+
component_yaml = func_to_component_text(func, extra_code, base_image, modules_to_capture)
287297

288298
Path(output_component_file).write_text(component_yaml)
289299

290300

291-
def func_to_container_op(func, output_component_file=None, base_image=_default_base_image, extra_code=''):
301+
def func_to_container_op(func, output_component_file=None, base_image=_default_base_image, extra_code='', modules_to_capture: List[str] = None):
292302
'''
293303
Converts a Python function to a component and returns a task (ContainerOp) factory
294304
@@ -307,13 +317,14 @@ def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('s
307317
Note: The image can also be specified by decorating the function with the @python_component decorator. If different base images are explicitly specified in both places, an error is raised.
308318
output_component_file: Optional. Write a component definition to a local file. Can be used for sharing.
309319
extra_code: Optional. Extra code to add before the function code. Can be used as workaround to define types used in function signature.
320+
modules_to_capture: Optional. List of module names that will be captured (instead of just referencing) during the dependency scan. By default the func.__module__ is captured.
310321
311322
Returns:
312323
A factory function with a strongly-typed signature taken from the python function.
313324
Once called with the required arguments, the factory constructs a pipeline task instance (ContainerOp) that can run the original function in a container.
314325
'''
315326

316-
component_spec = _func_to_component_spec(func, extra_code, base_image)
327+
component_spec = _func_to_component_spec(func, extra_code, base_image, modules_to_capture)
317328

318329
output_component_file = output_component_file or getattr(func, '_component_target_component_file', None)
319330
if output_component_file:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
module_level_variable = 10
2+
3+
4+
class ModuleLevelClass:
5+
def class_method(self, x):
6+
return x * module_level_variable
7+
8+
9+
def module_func(a: float) -> float:
10+
return a * 5
11+
12+
13+
def module_func_with_deps(a: float, b: float) -> float:
14+
return ModuleLevelClass().class_method(a) + module_func(b)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from .module1 import module_func_with_deps
2+
3+
def module2_func_with_deps(a: float, b: float) -> float:
4+
return module_func_with_deps(a, b) + 10

sdk/python/tests/components/test_python_op.py

+17
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,23 @@ def test_func_to_container_op_call_other_func_global(self):
136136

137137
self.helper_test_2_in_1_out_component_using_local_call(func, op)
138138

139+
def test_func_to_container_op_with_imported_func(self):
140+
from .test_data.module1 import module_func_with_deps as module1_func_with_deps
141+
func = module1_func_with_deps
142+
op = comp.func_to_container_op(func)
143+
144+
self.helper_test_2_in_1_out_component_using_local_call(func, op)
145+
146+
def test_func_to_container_op_with_imported_func2(self):
147+
from .test_data.module2_which_depends_on_module1 import module2_func_with_deps as module2_func_with_deps
148+
func = module2_func_with_deps
149+
op = comp.func_to_container_op(func, modules_to_capture=[
150+
'tests.components.test_data.module1',
151+
'tests.components.test_data.module2_which_depends_on_module1'
152+
])
153+
154+
self.helper_test_2_in_1_out_component_using_local_call(func, op)
155+
139156
def test_func_to_container_op_multiple_named_typed_outputs(self):
140157
from typing import NamedTuple
141158
def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('sum', float), ('product', float)]):

0 commit comments

Comments
 (0)