|
18 | 18 |
|
19 | 19 | from . import logger, utils |
20 | 20 |
|
| 21 | +log = logger.get_logger() |
| 22 | + |
21 | 23 | # R as in "Result", so yes it's covariant. |
22 | 24 | # pylint: disable=typevar-name-incorrect-variance |
23 | 25 | R = tp.TypeVar("R", covariant=True) |
@@ -128,12 +130,10 @@ def update(self) -> None: |
128 | 130 | return |
129 | 131 | self._num_calls += 1 |
130 | 132 | try: |
131 | | - logger.get_logger().debug(f"Call #{self.num_calls} - Command {' '.join(command)}") |
| 133 | + log.debug(f"Call #{self.num_calls} - Command {' '.join(command)}") |
132 | 134 | self._output = subprocess.check_output(command, shell=False) |
133 | 135 | except Exception as e: |
134 | | - logger.get_logger().warning( |
135 | | - f"Call #{self.num_calls} - Bypassing sacct error {e}, status may be inaccurate." |
136 | | - ) |
| 136 | + log.warning(f"Call #{self.num_calls} - Bypassing sacct error {e}, status may be inaccurate.") |
137 | 137 | else: |
138 | 138 | self._info_dict.update(self.read_info(self._output)) |
139 | 139 | self._last_status_check = _time.time() |
@@ -323,18 +323,27 @@ def exception(self) -> tp.Optional[tp.Union[utils.UncompletedJobError, utils.Fai |
323 | 323 | return exceptions[0] |
324 | 324 |
|
325 | 325 | try: |
326 | | - outcome, trace = self._get_outcome_and_result() |
| 326 | + outcome, original_err = self._get_outcome_and_result() |
| 327 | + if outcome == "success": |
| 328 | + return None |
327 | 329 | except utils.UncompletedJobError as e: |
328 | 330 | return e |
329 | | - if outcome == "error": |
| 331 | + if isinstance(original_err, str): |
| 332 | + # Normally original_err is an exception, unless we failed to pickle it. |
| 333 | + trace = original_err |
330 | 334 | return utils.FailedJobError( |
331 | 335 | f"Job (task={self.task_id}) failed during processing with trace:\n" |
332 | 336 | f"----------------------\n{trace}\n" |
333 | 337 | "----------------------\n" |
334 | 338 | f"You can check full logs with 'job.stderr({self.task_id})' and 'job.stdout({self.task_id})'" |
335 | 339 | f"or at paths:\n - {self.paths.stderr}\n - {self.paths.stdout}" |
336 | 340 | ) |
337 | | - return None |
| 341 | + log.error( |
| 342 | + f"Job (task={self.task_id}) failed \n." |
| 343 | + f"You can check full logs with 'job.stderr({self.task_id})' and 'job.stdout({self.task_id})'" |
| 344 | + f"or at paths:\n - {self.paths.stderr}\n - {self.paths.stdout}" |
| 345 | + ) |
| 346 | + return original_err |
338 | 347 |
|
339 | 348 | def _get_outcome_and_result(self) -> tp.Tuple[str, tp.Any]: |
340 | 349 | """Getter for the output of the submitted function. |
@@ -371,17 +380,17 @@ def _get_outcome_and_result(self) -> tp.Tuple[str, tp.Any]: |
371 | 380 | f"Job {self.job_id} (task: {self.task_id}) with path {self.paths.result_pickle}", |
372 | 381 | f"has not produced any output (state: {self.state})", |
373 | 382 | ] |
374 | | - log = self.stderr() |
375 | | - if log: |
376 | | - message.extend(["Error stream produced:", "-" * 40, log]) |
| 383 | + stderr = self.stderr() |
| 384 | + if stderr: |
| 385 | + message.extend(["Error stream produced:", "-" * 40, stderr]) |
377 | 386 | elif self.paths.stdout.exists(): |
378 | | - log = subprocess.check_output(["tail", "-40", str(self.paths.stdout)], encoding="utf-8") |
| 387 | + stderr = subprocess.check_output(["tail", "-40", str(self.paths.stdout)], encoding="utf-8") |
379 | 388 | message.extend( |
380 | | - [f"No error stream produced. Look at stdout: {self.paths.stdout}", "-" * 40, log] |
| 389 | + [f"No error stream produced. Look at stdout: {self.paths.stdout}", "-" * 40, stderr] |
381 | 390 | ) |
382 | 391 | else: |
383 | 392 | message.append(f"No output/error stream produced ! Check: {self.paths.stdout}") |
384 | | - raise utils.UncompletedJobError("\n".join(message)) |
| 393 | + raise utils.JobResultsNotFoundError("\n".join(message)) |
385 | 394 | try: |
386 | 395 | output: tp.Tuple[str, tp.Any] = utils.pickle_load(self.paths.result_pickle) |
387 | 396 | except EOFError: |
@@ -504,7 +513,7 @@ def __repr__(self) -> str: |
504 | 513 | try: |
505 | 514 | state = self.state |
506 | 515 | except Exception as e: |
507 | | - logger.get_logger().warning(f"Bypassing state error:\n{e}") |
| 516 | + log.warning(f"Bypassing state error:\n{e}") |
508 | 517 | return f'{self.__class__.__name__}<job_id={self.job_id}, task_id={self.task_id}, state="{state}">' |
509 | 518 |
|
510 | 519 | def __del__(self) -> None: |
@@ -702,9 +711,7 @@ def batch(self, allow_implicit_submissions: bool = False) -> tp.Iterator[None]: |
702 | 711 | try: |
703 | 712 | yield None |
704 | 713 | except Exception as e: |
705 | | - logger.get_logger().error( |
706 | | - 'Caught error within "with executor.batch()" context, submissions are dropped.\n ' |
707 | | - ) |
| 714 | + log.error('Caught error within "with executor.batch()" context, submissions are dropped.\n ') |
708 | 715 | raise e |
709 | 716 | else: |
710 | 717 | self._submit_delayed_batch() |
|
0 commit comments