55import os
66import queue
77import time
8- from multiprocessing import Event , Process
8+ from multiprocessing import Event
99from multiprocessing .queues import Queue
1010from typing import TYPE_CHECKING , Any , Literal , Optional , Tuple , Union , cast
1111
1414from parsl .log_utils import set_file_logger
1515from parsl .monitoring .errors import MonitoringHubStartError
1616from parsl .monitoring .message_type import MessageType
17- from parsl .monitoring .radios import MultiprocessingQueueRadioSender
17+ from parsl .monitoring .radios . base import MultiprocessingQueueRadioSender , RadioConfig
1818from parsl .monitoring .router import router_starter
1919from parsl .monitoring .types import AddressedMonitoringMessage
2020from parsl .multiprocessing import ForkProcess , SizedQueue
@@ -129,7 +129,7 @@ def start(self, dfk_run_dir: str, config_run_dir: Union[str, os.PathLike]) -> No
129129 # in the future, Queue will allow runtime subscripts.
130130
131131 if TYPE_CHECKING :
132- comm_q : Queue [Union [Tuple [ int , int ] , str ]]
132+ comm_q : Queue [Union [int , str ]]
133133 else :
134134 comm_q : Queue
135135
@@ -150,7 +150,6 @@ def start(self, dfk_run_dir: str, config_run_dir: Union[str, os.PathLike]) -> No
150150 "resource_msgs" : self .resource_msgs ,
151151 "exit_event" : self .router_exit_event ,
152152 "hub_address" : self .hub_address ,
153- "udp_port" : self .hub_port ,
154153 "zmq_port_range" : self .hub_port_range ,
155154 "logdir" : self .logdir ,
156155 "logging_level" : logging .DEBUG if self .monitoring_debug else logging .INFO ,
@@ -172,13 +171,13 @@ def start(self, dfk_run_dir: str, config_run_dir: Union[str, os.PathLike]) -> No
172171 self .dbm_proc .start ()
173172 logger .info ("Started the router process {} and DBM process {}" .format (self .router_proc .pid , self .dbm_proc .pid ))
174173
175- self .filesystem_proc = Process (target = filesystem_receiver ,
176- args = (self .logdir , self .resource_msgs , dfk_run_dir ),
177- name = "Monitoring-Filesystem-Process" ,
178- daemon = True
179- )
180- self .filesystem_proc .start ()
181- logger .info (f"Started filesystem radio receiver process { self .filesystem_proc .pid } " )
174+ # self.filesystem_proc = Process(target=filesystem_receiver,
175+ # args=(self.logdir, self.resource_msgs, dfk_run_dir),
176+ # name="Monitoring-Filesystem-Process",
177+ # daemon=True
178+ # )
179+ # self.filesystem_proc.start()
180+ # logger.info(f"Started filesystem radio receiver process {self.filesystem_proc.pid}")
182181
183182 self .radio = MultiprocessingQueueRadioSender (self .resource_msgs )
184183
@@ -194,9 +193,23 @@ def start(self, dfk_run_dir: str, config_run_dir: Union[str, os.PathLike]) -> No
194193 logger .error (f"MonitoringRouter sent an error message: { comm_q_result } " )
195194 raise RuntimeError (f"MonitoringRouter failed to start: { comm_q_result } " )
196195
197- udp_port , zmq_port = comm_q_result
196+ zmq_port = comm_q_result
197+
198+ self .zmq_port = zmq_port
198199
199- self .monitoring_hub_url = "udp://{}:{}" .format (self .hub_address , udp_port )
200+ # need to initialize radio configs, perhaps first time a radio config is used
201+ # in each executor? (can't do that at startup because executor list is dynamic,
202+ # don't know all the executors till later)
203+ # self.radio_config.monitoring_hub_url = "udp://{}:{}".format(self.hub_address, udp_port)
204+ # How can this config be populated properly?
205+ # There's a UDP port chosen right now by the monitoring router and
206+ # sent back a line above...
207+ # What does that look like for other radios? htexradio has no specific config at all,
208+ # filesystem radio has a path (that should have been created?) for config, and a loop
209+ # that needs to be running, started in this start method.
210+ # so something like... radio_config.receive() generates the appropriate receiver object?
211+ # which has a shutdown method on it for later. and also updates radio_config itself so
212+ # it has the right info to send across the wire? or some state driving like that?
200213
201214 logger .info ("Monitoring Hub initialized" )
202215
@@ -228,7 +241,7 @@ def close(self) -> None:
228241 )
229242 self .router_proc .terminate ()
230243 self .dbm_proc .terminate ()
231- self .filesystem_proc .terminate ()
244+ # self.filesystem_proc.terminate()
232245 logger .info ("Setting router termination event" )
233246 self .router_exit_event .set ()
234247 logger .info ("Waiting for router to terminate" )
@@ -248,9 +261,9 @@ def close(self) -> None:
248261 # should this be message based? it probably doesn't need to be if
249262 # we believe we've received all messages
250263 logger .info ("Terminating filesystem radio receiver process" )
251- self .filesystem_proc .terminate ()
252- self .filesystem_proc .join ()
253- self .filesystem_proc .close ()
264+ # self.filesystem_proc.terminate()
265+ # self.filesystem_proc.join()
266+ # self.filesystem_proc.close()
254267
255268 logger .info ("Closing monitoring multiprocessing queues" )
256269 self .exception_q .close ()
@@ -259,6 +272,17 @@ def close(self) -> None:
259272 self .resource_msgs .join_thread ()
260273 logger .info ("Closed monitoring multiprocessing queues" )
261274
275+ def start_receiver (self , radio_config : RadioConfig , ip : str ) -> Any :
276+ """somehow start a radio receiver here and update radioconfig to be sent over the wire, without
277+ losing the info we need to shut down that receiver later...
278+ """
279+ r = radio_config .create_receiver (ip = ip , resource_msgs = self .resource_msgs ) # TODO: return a shutdownable...
280+ logger .info (f"BENC: created receiver { r } " )
281+ # assert r is not None
282+ return r
283+ # ... that is, a thing we need to do a shutdown call on at shutdown, a "shutdownable"? without
284+ # expecting any more structure on it?
285+
262286
263287@wrap_with_logs
264288def filesystem_receiver (logdir : str , q : "queue.Queue[AddressedMonitoringMessage]" , run_dir : str ) -> None :
0 commit comments