Merge pull request #82 from CDCgov/update-resource-management

slsevilla · web-flow · commit 6e58b0c281a3 · 2025-02-14T11:53:02.000-05:00
Update resource management: completes #80
diff --git a/conf/base.config b/conf/base.config
@@ -10,11 +10,10 @@
 
 process {
 
-    // TODO nf-core: Check the defaults for all processes
-    cpus   = { check_max( 1    * task.attempt, 'cpus'   ) }
-    memory = { check_max( 6.GB * task.attempt, 'memory' ) }
-    time   = { check_max( 4.h  * task.attempt, 'time'   ) }
-
+    // Add defaults
+    cpus   = { 1      * task.attempt }
+    memory = { 6.GB   * task.attempt }
+    time   = { 4.h    * task.attempt }
     errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
     maxRetries    = 1
     maxErrors     = '-1'
@@ -27,30 +26,30 @@ process {
     // TODO nf-core: Customise requirements for specific processes.
     // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
     withLabel:process_single {
-        cpus   = { check_max( 1                  , 'cpus'    ) }
-        memory = { check_max( 6.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 4.h  * task.attempt, 'time'    ) }
+        cpus   = { 1                     }
+        memory = { 6.GB * task.attempt   }
+        time   = { 4.h  * task.attempt   }
     }
     withLabel:process_low {
-        cpus   = { check_max( 2     * task.attempt, 'cpus'    ) }
-        memory = { check_max( 12.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 4.h   * task.attempt, 'time'    ) }
+        cpus   = { 2     * task.attempt  }
+        memory = { 12.GB * task.attempt  }
+        time   = { 4.h   * task.attempt  }
     }
     withLabel:process_medium {
-        cpus   = { check_max( 6     * task.attempt, 'cpus'    ) }
-        memory = { check_max( 36.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 8.h   * task.attempt, 'time'    ) }
+        cpus   = { 6     * task.attempt  }
+        memory = { 36.GB * task.attempt  }
+        time   = { 8.h   * task.attempt  }
     }
     withLabel:process_high {
-        cpus   = { check_max( 12    * task.attempt, 'cpus'    ) }
-        memory = { check_max( 72.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 16.h  * task.attempt, 'time'    ) }
+        cpus   = { 12    * task.attempt  }
+        memory = { 72.GB * task.attempt  }
+        time   = { 16.h  * task.attempt  }
     }
     withLabel:process_long {
-        time   = { check_max( 20.h  * task.attempt, 'time'    ) }
+        time   = { 20.h  * task.attempt  }
     }
     withLabel:process_high_memory {
-        memory = { check_max( 200.GB * task.attempt, 'memory' ) }
+        memory = { 200.GB * task.attempt }
     }
     withLabel:error_ignore {
         errorStrategy = 'ignore'
diff --git a/conf/scicomp.config b/conf/scicomp.config
@@ -24,9 +24,11 @@ params {
   custom_config_version = 'master'
 
   // Default resource parameters. Expecting to be overwritten.
-  max_memory                 = '128.GB'
-  max_cpus                   = 16
-  max_time                   = '240.h'
+  resourceLimits = [
+    cpus: 16,
+    memory: '128.GB',
+    time: '240.h'
+  ]
 }
 
 /*
@@ -121,7 +123,7 @@ profiles {
       time = '72.h'
 
       // Set h_vmem option for qsub submissions. +20 memory to h_vmem prevents memory allocation errors.
-      clusterOptions = { "-l h_vmem=${(check_max((task.memory.toGiga())+20), 'memory').toString().replaceAll(/[\sB]/,'')}G" }
+      clusterOptions = { "-l h_vmem=${(task.memory.toGiga() + 20).toString()}G" }
     }
   }
   training {
@@ -146,7 +148,7 @@ profiles {
       time = '8.h'
 
       // Set h_vmem option for qsub submissions. +20 memory to h_vmem prevents memory allocation errors.
-      clusterOptions = { "-l h_vmem=${(check_max((task.memory.toGiga())+20), 'memory').toString().replaceAll(/[\sB]/,'')}G" }
+      clusterOptions = { "-l h_vmem=${((task.memory.toGiga())+20).toString().replaceAll(/[\sB]/,'')}G" }
     }
   }
   debug {
@@ -175,39 +177,4 @@ profiles {
   }
 }
 
-/*
-==========================================================================================
-    Function to check max resources
-==========================================================================================
-*/
 
-def check_max(obj, type) {
-    if (type == 'memory') {
-        try {
-            if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
-                return params.max_memory as nextflow.util.MemoryUnit
-            else
-                return obj
-        } catch (all) {
-            println "Updating max_memory: '${params.max_memory}' to $obj based on available resources"
-            return obj
-        }
-    } else if (type == 'time') {
-        try {
-            if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
-                return params.max_time as nextflow.util.Duration
-            else
-                return obj
-        } catch (all) {
-            println "Updating max_time: '${params.max_time}' to $obj based on available resources"
-            return obj
-        }
-    } else if (type == 'cpus') {
-        try {
-            return Math.min( obj, params.max_cpus as int )
-        } catch (all) {
-            println "Updating max_cpus: '${params.max_cpus}' to $obj based on available resources"
-            return obj
-        }
-    }
-}
diff --git a/conf/test.config b/conf/test.config
@@ -15,13 +15,13 @@ params {
     config_profile_description = 'Minimal test dataset to check pipeline function'
 
     // Limit resources so that this can run on GitHub Actions
-    max_cpus   = 2
-    max_memory = '6.GB'
-    max_time   = '6.h'
+    resourceLimits = [
+        cpus: 2,
+        memory: '6.GB',
+        time: '6.h'
+    ]
 
     // Input data
-    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
-    // TODO nf-core: Give any required params for the test so that command line flags are not needed
     input  = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'
 
     // Genome references
diff --git a/conf/test_bam.config b/conf/test_bam.config
@@ -15,9 +15,11 @@ params {
     config_profile_description = 'Minimal test dataset to check pipeline function'
 
     // Limit resources so that this can run on GitHub Actions
-    max_cpus   = 2
-    max_memory = 6.GB
-    max_time   = 6.h
+    resourceLimits = [
+        cpus: 2,
+        memory: '6.GB',
+        time: '6.h'
+    ]
 
     // Input data
     input  = "${projectDir}/assets/samplesheet_test_bam.csv"
diff --git a/conf/test_illumina.config b/conf/test_illumina.config
@@ -15,9 +15,11 @@ params {
     config_profile_description = 'Minimal test dataset to check pipeline function'
 
     // Limit resources so that this can run on GitHub Actions
-    max_cpus   = 2
-    max_memory = 6.GB
-    max_time   = 6.h
+    resourceLimits = [
+        cpus: 2,
+        memory: '6.GB',
+        time: '6.h'
+    ]
 
     // Input data
     input  = "${projectDir}/assets/samplesheet_test_illumina.csv"
diff --git a/conf/test_iontorrent.config b/conf/test_iontorrent.config
@@ -15,10 +15,12 @@ params {
     config_profile_description = 'Minimal test dataset to check pipeline function'
 
     // Limit resources so that this can run on GitHub Actions
-    max_cpus   = 2
-    max_memory = 6.GB
-    max_time   = 6.h
-
+    resourceLimits = [
+        cpus: 2,
+        memory: 6.GB,
+        time: 6.h
+    ]
+    
     // Input data
     input  = "${projectDir}/assets/samplesheet_test_iontorrent.csv"
 
diff --git a/conf/test_ont.config b/conf/test_ont.config
@@ -15,9 +15,11 @@ params {
     config_profile_description = 'Minimal test dataset to check pipeline function'
 
     // Limit resources so that this can run on GitHub Actions
-    max_cpus   = 2
-    max_memory = 6.GB
-    max_time   = 6.h
+    resourceLimits = [
+        cpus: 2,
+        memory: '6.GB',
+        time: '6.h'
+    ]
 
     // Input data
     input  = "${projectDir}/assets/samplesheet_test_ont.csv"
diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf
@@ -26,7 +26,7 @@ process FASTQC {
     def rename_to = old_new_pairs*.join(' ').join(' ')
     def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ')
 
-    def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB')
+    def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') / task.cpus
     // FastQC memory value allowed range (100 - 10000)
     def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb)
 
diff --git a/nextflow.config b/nextflow.config
@@ -8,15 +8,14 @@
 
 // Global default params, used in configs
 params {
-
     // Input options
     input                      = null
 
     // References
     fasta                      = "${projectDir}/assets/references/SARS-CoV-2.reference.fasta"
     gff			               = "${projectDir}/assets/references/SARS-CoV-2.reference.gff"
     gff3                       = "${projectDir}/assets/references/SARS-CoV-2.reference.gff3"
-    bed                        = 
+    bed                        = ""
 
     // Freyja barcodes and metadata
     //freyja_repeats             = 1000 //default, change it to at minimum 250 to run it faster
@@ -49,6 +48,7 @@ params {
     hook_url                   = null
     help                       = false
     version                    = null
+
     // Schema validation default options
     validationFailUnrecognisedParams = false
     validationLenientMode            = true
@@ -67,22 +67,6 @@ params {
     config_profile_url         = null
     config_profile_name        = null
     skip_multiqc               = false
-
-    // Max resource options
-    // Defaults only, expecting to be overwritten
-    max_memory                 = '128.GB'
-    max_cpus                   = 16
-    max_time                   = '240.h'
-
-}
-
-// Default publishing logic for pipeline
-process {
-    publishDir = [
-        path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
-        mode: params.publish_dir_mode,
-        saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-    ]
 }
 
 // Load base.config by default for all pipelines
@@ -196,35 +180,4 @@ manifest {
     doi             = ' '
 }
 
-// Function to ensure that resource requirements don't go beyond
-// a maximum limit
-def check_max(obj, type) {
-    if (type == 'memory') {
-        try {
-            if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
-                return params.max_memory as nextflow.util.MemoryUnit
-            else
-                return obj
-        } catch (all) {
-            println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
-            return obj
-        }
-    } else if (type == 'time') {
-        try {
-            if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
-                return params.max_time as nextflow.util.Duration
-            else
-                return obj
-        } catch (all) {
-            println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
-            return obj
-        }
-    } else if (type == 'cpus') {
-        try {
-            return Math.min( obj, params.max_cpus as int )
-        } catch (all) {
-            println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
-            return obj
-        }
-    }
-}
+