support for gpu queue

mauriliogenovese · mauriliogenovese · commit 0720aa1ac9c6 · 2024-03-22T13:56:24.000+01:00
diff --git a/nipype/pipeline/engine/nodes.py b/nipype/pipeline/engine/nodes.py
@@ -821,6 +821,10 @@ def update(self, **opts):
         """Update inputs"""
         self.inputs.update(**opts)
 
+    def is_gpu_node(self):
+        return ((hasattr(self.inputs, 'use_cuda') and self.inputs.use_cuda)
+                or (hasattr(self.inputs, 'use_gpu') and self.inputs.use_gpu))
+
 
 class JoinNode(Node):
     """Wraps interface objects that join inputs into a list.
diff --git a/nipype/pipeline/plugins/multiproc.py b/nipype/pipeline/plugins/multiproc.py
@@ -100,6 +100,7 @@ class MultiProcPlugin(DistributedPluginBase):
 
     - non_daemon: boolean flag to execute as non-daemon processes
     - n_procs: maximum number of threads to be executed in parallel
+    - n_gpu_procs: maximum number of GPU threads to be executed in parallel
     - memory_gb: maximum memory (in GB) that can be used at once.
     - raise_insufficient: raise error if the requested resources for
         a node over the maximum `n_procs` and/or `memory_gb`
@@ -130,10 +131,22 @@ def __init__(self, plugin_args=None):
         )
         self.raise_insufficient = self.plugin_args.get("raise_insufficient", True)
 
+        # GPU found on system
+        self.n_gpus_visible = MultiProcPlugin.gpu_count()
+        # proc per GPU set by user
+        self.n_gpu_procs = plugin_args.get('n_gpu_procs', self.n_gpus_visible)
+
+        # total no. of processes allowed on all gpus
+        if self.n_gpu_procs > self.n_gpus_visible:
+            logger.info(
+                'Total number of GPUs proc requested (%d) exceeds the available number of GPUs (%d) on the system. Using requested GPU slots at your own risk!' % (
+                    self.n_gpu_procs, self.n_gpus_visible))
+
         # Instantiate different thread pools for non-daemon processes
         logger.debug(
-            "[MultiProc] Starting (n_procs=%d, mem_gb=%0.2f, cwd=%s)",
+            "[MultiProc] Starting (n_procs=%d, n_gpu_procs=%d, mem_gb=%0.2f, cwd=%s)",
             self.processors,
+            self.n_gpu_procs,
             self.memory_gb,
             self._cwd,
         )
@@ -184,9 +197,12 @@ def _prerun_check(self, graph):
         """Check if any node exceeds the available resources"""
         tasks_mem_gb = []
         tasks_num_th = []
+        tasks_gpu_th = []
         for node in graph.nodes():
             tasks_mem_gb.append(node.mem_gb)
             tasks_num_th.append(node.n_procs)
+            if node.is_gpu_node():
+                tasks_gpu_th.append(node.n_procs)
 
         if np.any(np.array(tasks_mem_gb) > self.memory_gb):
             logger.warning(
@@ -203,6 +219,12 @@ def _prerun_check(self, graph):
             )
             if self.raise_insufficient:
                 raise RuntimeError("Insufficient resources available for job")
+        if np.any(np.array(tasks_gpu_th) > self.n_gpu_procs):
+            logger.warning(
+                'Nodes demand more GPU than allowed (%d).',
+                self.n_gpu_procs)
+            if self.raise_insufficient:
+                raise RuntimeError('Insufficient GPU resources available for job')
 
     def _postrun_check(self):
         self.pool.shutdown()
@@ -213,11 +235,14 @@ def _check_resources(self, running_tasks):
         """
         free_memory_gb = self.memory_gb
         free_processors = self.processors
+        free_gpu_slots = self.n_gpu_procs
         for _, jobid in running_tasks:
             free_memory_gb -= min(self.procs[jobid].mem_gb, free_memory_gb)
             free_processors -= min(self.procs[jobid].n_procs, free_processors)
+            if self.procs[jobid].is_gpu_node():
+                free_gpu_slots -= min(self.procs[jobid].n_procs, free_gpu_slots)
 
-        return free_memory_gb, free_processors
+        return free_memory_gb, free_processors, free_gpu_slots
 
     def _send_procs_to_workers(self, updatehash=False, graph=None):
         """
@@ -232,7 +257,7 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
         )
 
         # Check available resources by summing all threads and memory used
-        free_memory_gb, free_processors = self._check_resources(self.pending_tasks)
+        free_memory_gb, free_processors, free_gpu_slots = self._check_resources(self.pending_tasks)
 
         stats = (
             len(self.pending_tasks),
@@ -241,6 +266,8 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
             self.memory_gb,
             free_processors,
             self.processors,
+            free_gpu_slots,
+            self.n_gpu_procs
         )
         if self._stats != stats:
             tasks_list_msg = ""
@@ -256,13 +283,15 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
                     tasks_list_msg = indent(tasks_list_msg, " " * 21)
             logger.info(
                 "[MultiProc] Running %d tasks, and %d jobs ready. Free "
-                "memory (GB): %0.2f/%0.2f, Free processors: %d/%d.%s",
+                "memory (GB): %0.2f/%0.2f, Free processors: %d/%d, Free GPU slot:%d/%d.%s",
                 len(self.pending_tasks),
                 len(jobids),
                 free_memory_gb,
                 self.memory_gb,
                 free_processors,
                 self.processors,
+                free_gpu_slots,
+                self.n_gpu_procs,
                 tasks_list_msg,
             )
             self._stats = stats
@@ -304,28 +333,36 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
             # Check requirements of this job
             next_job_gb = min(self.procs[jobid].mem_gb, self.memory_gb)
             next_job_th = min(self.procs[jobid].n_procs, self.processors)
+            next_job_gpu_th = min(self.procs[jobid].n_procs, self.n_gpu_procs)
+
+            is_gpu_node = self.procs[jobid].is_gpu_node()
 
             # If node does not fit, skip at this moment
-            if next_job_th > free_processors or next_job_gb > free_memory_gb:
+            if (next_job_th > free_processors or next_job_gb > free_memory_gb
+                    or (is_gpu_node and next_job_gpu_th > free_gpu_slots)):
                 logger.debug(
-                    "Cannot allocate job %d (%0.2fGB, %d threads).",
+                    "Cannot allocate job %d (%0.2fGB, %d threads, %d GPU slots).",
                     jobid,
                     next_job_gb,
                     next_job_th,
+                    next_job_gpu_th,
                 )
                 continue
 
             free_memory_gb -= next_job_gb
             free_processors -= next_job_th
+            if is_gpu_node:
+                free_gpu_slots -= next_job_gpu_th
             logger.debug(
                 "Allocating %s ID=%d (%0.2fGB, %d threads). Free: "
-                "%0.2fGB, %d threads.",
+                "%0.2fGB, %d threads, %d GPU slots.",
                 self.procs[jobid].fullname,
                 jobid,
                 next_job_gb,
                 next_job_th,
                 free_memory_gb,
                 free_processors,
+                free_gpu_slots,
             )
 
             # change job status in appropriate queues
@@ -352,6 +389,8 @@ def _send_procs_to_workers(self, updatehash=False, graph=None):
                 self._remove_node_dirs()
                 free_memory_gb += next_job_gb
                 free_processors += next_job_th
+                if is_gpu_node:
+                    free_gpu_slots -= next_job_gpu_th
                 # Display stats next loop
                 self._stats = None
 
@@ -379,3 +418,12 @@ def _sort_jobs(self, jobids, scheduler="tsort"):
                 key=lambda item: (self.procs[item].mem_gb, self.procs[item].n_procs),
             )
         return jobids
+
+    @staticmethod
+    def gpu_count():
+        n_gpus = 1
+        try:
+            import GPUtil
+            return len(GPUtil.getGPUs())
+        except ImportError:
+            return n_gpus
diff --git a/nipype/pipeline/plugins/tests/test_multiproc.py b/nipype/pipeline/plugins/tests/test_multiproc.py
@@ -56,6 +56,7 @@ def test_run_multiproc(tmpdir):
 class InputSpecSingleNode(nib.TraitedSpec):
     input1 = nib.traits.Int(desc="a random int")
     input2 = nib.traits.Int(desc="a random int")
+    use_gpu = nib.traits.Bool(False, mandatory = False, desc="boolean for GPU nodes")
 
 
 class OutputSpecSingleNode(nib.TraitedSpec):
@@ -116,6 +117,20 @@ def test_no_more_threads_than_specified(tmpdir):
     with pytest.raises(RuntimeError):
         pipe.run(plugin="MultiProc", plugin_args={"n_procs": max_threads})
 
+def test_no_more_gpu_threads_than_specified(tmpdir):
+    tmpdir.chdir()
+
+    pipe = pe.Workflow(name="pipe")
+    n1 = pe.Node(SingleNodeTestInterface(), name="n1", n_procs=2)
+    n1.inputs.use_gpu = True
+    n1.inputs.input1 = 4
+    pipe.add_nodes([n1])
+
+    max_threads = 2
+    max_gpu = 1
+    with pytest.raises(RuntimeError):
+        pipe.run(plugin="MultiProc", plugin_args={"n_procs": max_threads, 'n_gpu_procs': max_gpu})
+
 
 @pytest.mark.skipif(
     sys.version_info >= (3, 8), reason="multiprocessing issues in Python 3.8"