3
3
import importlib
4
4
import json
5
5
import logging
6
+ import os
6
7
import pkgutil
7
8
from typing import Optional
8
9
@@ -14,18 +15,13 @@ class Pipeline:
14
15
"""Base class for pipelines."""
15
16
16
17
_step_registry = {}
18
+ logger = logging .getLogger ("Pipeline" )
17
19
18
20
def __init__ (self , initial_data : Optional [DataContainer ] = None ):
19
21
self .steps = []
20
22
if not all (isinstance (step , PipelineStep ) for step in self .steps ):
21
23
raise TypeError ("All steps must be instances of PipelineStep" )
22
24
self .initial_data = initial_data
23
- self .init_logger ()
24
-
25
- def init_logger (self ) -> None :
26
- """Initialize the logger."""
27
- self .logger = logging .getLogger (self .__class__ .__name__ )
28
- self .logger .debug (f"{ self .__class__ .__name__ } initialized" )
29
25
30
26
@classmethod
31
27
def register_step (cls , step_class ):
@@ -65,13 +61,56 @@ def auto_register_steps_from_package(cls, package_name):
65
61
):
66
62
cls .register_step (attribute )
67
63
64
+ @staticmethod
65
+ def load_and_register_custom_steps (custom_steps_path : str ) -> None :
66
+ """
67
+ Dynamically loads and registers step classes found in the specified directory.
68
+
69
+ This method scans a specified directory for Python files (excluding __init__.py),
70
+ dynamically imports these files as modules, and registers all classes derived from
71
+ PipelineStep found within these modules.
72
+
73
+ Parameters
74
+ ----------
75
+ custom_steps_path : str
76
+ The path to the directory containing custom step implementation files.
77
+
78
+ Returns
79
+ -------
80
+ NoReturn
81
+ This function does not return anything.
82
+ """
83
+ Pipeline .logger .debug (f"Loading custom steps from: { custom_steps_path } " )
84
+ for filename in os .listdir (custom_steps_path ):
85
+ if filename .endswith (".py" ) and not filename .startswith ("__" ):
86
+ filepath = os .path .join (custom_steps_path , filename )
87
+ module_name = os .path .splitext (filename )[0 ]
88
+ spec = importlib .util .spec_from_file_location (module_name , filepath )
89
+ module = importlib .util .module_from_spec (spec )
90
+
91
+ try :
92
+ spec .loader .exec_module (module )
93
+ Pipeline .logger .debug (f"Successfully loaded module: { module_name } " )
94
+
95
+ for attribute_name in dir (module ):
96
+ attribute = getattr (module , attribute_name )
97
+ if (
98
+ isinstance (attribute , type )
99
+ and issubclass (attribute , PipelineStep )
100
+ and attribute is not PipelineStep
101
+ ):
102
+ Pipeline .register_step (attribute )
103
+ Pipeline .logger .debug (f"Registered step class: { attribute_name } " )
104
+ except Exception as e :
105
+ Pipeline .logger .error (f"Failed to load module: { module_name } . Error: { e } " )
106
+
68
107
def run (self ) -> DataContainer :
69
108
"""Run the pipeline on the given data."""
70
109
71
110
data = DataContainer ()
72
111
73
112
for i , step in enumerate (self .steps ):
74
- self .logger .info (f"Running { step .__class__ .__name__ } - { i + 1 } / { len (self .steps )} " )
113
+ Pipeline .logger .info (f"Running { step .__class__ .__name__ } - { i + 1 } / { len (self .steps )} " )
75
114
data = step .execute (data )
76
115
return data
77
116
@@ -85,14 +124,18 @@ def from_json(cls, path: str) -> Pipeline:
85
124
with open (path , "r" ) as config_file :
86
125
config = json .load (config_file )
87
126
88
- pipeline = Pipeline () # Assuming you have a default or base Pipeline class
127
+ custom_steps_path = config .get ("custom_steps_path" )
128
+ if custom_steps_path :
129
+ Pipeline .load_and_register_custom_steps (custom_steps_path )
130
+
131
+ pipeline = Pipeline ()
89
132
steps = []
90
133
91
134
for step_config in config ["pipeline" ]["steps" ]:
92
135
step_type = step_config ["step_type" ]
93
136
parameters = step_config .get ("parameters" , {})
94
137
95
- pipeline .logger .info (
138
+ Pipeline .logger .info (
96
139
f"Creating step { step_type } with parameters: \n { json .dumps (parameters , indent = 4 )} "
97
140
)
98
141
0 commit comments