File tree Expand file tree Collapse file tree 1 file changed +12
-2
lines changed Expand file tree Collapse file tree 1 file changed +12
-2
lines changed Original file line number Diff line number Diff line change @@ -808,7 +808,13 @@ def stop_slurm(fatal=True, quiet=False):
808
808
lambda : pids_from_exe (f"{ properties ['slurm-sbin-dir' ]} /slurmctld" ),
809
809
lambda pids : len (pids ) == 0 ,
810
810
):
811
- failures .append ("Slurmctld is still running" )
811
+ pids = pids_from_exe (f"{ properties ['slurm-sbin-dir' ]} /slurmctld" )
812
+ failures .append (f"Slurmctld is still running ({ pids } )" )
813
+ logging .warning ("Getting the bt of the still running slurmctld" )
814
+ for pid in pids :
815
+ run_command (
816
+ f'sudo gdb -p { pid } -ex "set debuginfod enabled on" -ex "set pagination off" -ex "set confirm off" -ex "thread apply all bt" -ex "quit"'
817
+ )
812
818
813
819
# Build list of slurmds
814
820
slurmd_list = []
@@ -835,8 +841,12 @@ def stop_slurm(fatal=True, quiet=False):
835
841
lambda pids : len (pids ) == 0 ,
836
842
):
837
843
pids = pids_from_exe (f"{ properties ['slurm-sbin-dir' ]} /slurmd" )
838
- run_command (f"pgrep -f { properties ['slurm-sbin-dir' ]} /slurmd -a" , quiet = quiet )
839
844
failures .append (f"Some slurmds are still running ({ pids } )" )
845
+ for pid in pids :
846
+ run_command (
847
+ f'sudo gdb -p { pid } -ex "set debuginfod enabled on" -ex "set pagination off" -ex "set confirm off" -ex "thread apply all bt" -ex "quit"'
848
+ )
849
+ run_command (f"pgrep -f { properties ['slurm-sbin-dir' ]} /slurmd -a" , quiet = quiet )
840
850
841
851
# Stop slurmrestd if was started
842
852
if properties ["slurmrestd-started" ]:
You can’t perform that action at this time.
0 commit comments