From 6d05b63bce67709a71493126120f758ceb4359d3 Mon Sep 17 00:00:00 2001 From: Peter Edwards Date: Wed, 24 May 2023 13:28:42 +0100 Subject: [PATCH] Block signal delivery to OpenBLAS-spawned threads Otherwise non-threaded applications may use signals in a way that is currently affected by OpenBLAS launching threads. For example, it is not uncommon for an application main loop to block signals when busy, then unblock those signals while waiting for IO. (see the sigmask argument to `ppoll(2)`) Signals that arrive during `ppoll(2)` will interrupt the system call, and allow the application to handle any consequences of that signal arriving. Normally (in a single threaded process), on delivery of an externally generated signal such as SIGALRM, SIGCHLD, SIGIO the main loop will be awoken. If the thread is otherwise busy, then the signal will be maintained as pending, and will be delivered when the application next enters its idle state (eg `ppoll`), unblocking signals again. OpenBLAS creates threads during initialization. Such threads inherit their signal masks from the thread that creates them, and, if that loading happens very early in the lifetime of a process, all signals are nominally unblocked in the these threads Later, if the "main" thread is running with signals blocked when a signal is sent to the process, the kernel will deliver it to another thread in the process if it is not currently blocking that signal, in our case, to one of the OpenBLAS threads. This means that by creating threads with open signal masks, OpenBLAS is potentially interfering with the normal operation of programs that are otherwise non-threaded. Instead, we should block all signals before starting new threads from `blas_thread_init`, and then restore the signal mask as it was, so the launched threads do not participate in signal delivery and processing. Signed-off-by: Peter Edwards --- driver/others/blas_server.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c index 051513f272..746639d2f3 100644 --- a/driver/others/blas_server.c +++ b/driver/others/blas_server.c @@ -70,6 +70,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /*********************************************************************/ #include "common.h" +#include #if defined(OS_LINUX) || defined(OS_NETBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) || defined(OS_SUNOS) || defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_HAIKU) #include #include @@ -669,6 +670,25 @@ int blas_thread_init(void){ if (thread_timeout_env > 30) thread_timeout_env = 30; thread_timeout = (1 << thread_timeout_env); } + /* + * Our threads should not deal with signals from the rest of the + * application - mask everything temporarily in this thread, so threads we + * launch will inherit that mask, and block all signal delivery. This + * should not fail, but make it best-effort. + */ + sigset_t sigset_block_all, sigset_restore; + ret = sigfillset(&sigset_block_all); + bool needSigmaskRestore = false; + if (ret != 0) { + fprintf(STDERR, "OpenBLAS blas_thread_init: sigfillset failed to block signals: %s", strerror(ret)); + } else { + ret = pthread_sigmask(SIG_BLOCK, &sigset_block_all, &sigset_restore); + if (ret != 0) { + fprintf(STDERR, "OpenBLAS blas_thread_init: failed to block signals: pthread_sigmask: %s", strerror(ret)); + } else { + needSigmaskRestore = true; + } + } for(i = 0; i < blas_num_threads - 1; i++){ @@ -686,7 +706,7 @@ int blas_thread_init(void){ &blas_thread_server, (void *)i); #endif if(ret!=0){ - struct rlimit rlim; + struct rlimit rlim; const char *msg = strerror(ret); fprintf(STDERR, "OpenBLAS blas_thread_init: pthread_create failed for thread %ld of %d: %s\n", i+1,blas_num_threads,msg); #ifdef RLIMIT_NPROC @@ -695,6 +715,11 @@ int blas_thread_init(void){ "%ld current, %ld max\n", (long)(rlim.rlim_cur), (long)(rlim.rlim_max)); } #endif + if (needSigmaskRestore) { + // Attempt to restore sigmask if required, before raising SIGINT. + pthread_sigmask(SIG_SETMASK, &sigset_restore, NULL); + needSigmaskRestore = false; + } if(0 != raise(SIGINT)) { fprintf(STDERR, "OpenBLAS blas_thread_init: calling exit(3)\n"); exit(EXIT_FAILURE); @@ -708,6 +733,14 @@ int blas_thread_init(void){ #endif blas_server_avail = 1; + + if (needSigmaskRestore) { + // Attempt to restore sigmask if required, before raising SIGINT. + ret = pthread_sigmask(SIG_SETMASK, &sigset_restore, NULL); + if (ret != 0) { + fprintf(STDERR, "OpenBLAS blas_thread_init: failed to restore signal mask: pthread_signask: %s", strerror(ret)); + } + } } UNLOCK_COMMAND(&server_lock);