nf-core · samuell · Apr 3, 2024 · Jun 27, 2024 · Aug 5, 2024 · Nov 15, 2024
diff --git a/conf/base.config b/conf/base.config
@@ -37,12 +37,12 @@ process {
     }
     withLabel:process_medium {
         cpus   = { 6     * task.attempt }
-        memory = { 36.GB * task.attempt }
+        memory = { 30.GB * task.attempt }
         time   = { 12.h  * task.attempt }
     }
     withLabel:process_high {
-        cpus   = { 20    * task.attempt }
-        memory = { 120.GB* task.attempt }
+        cpus   = { 10    * task.attempt }
+        memory = { 30.GB * task.attempt }
         time   = { 36.h  * task.attempt }
     }
     withLabel:process_long {

diff --git a/conf/modules.config b/conf/modules.config
@@ -279,6 +279,44 @@ process {
         ]
     }
 
+    withName: EMU_ABUNDANCE {
+        publishDir = [
+            [
+                path: { "${params.outdir}/results" },
+                mode: params.publish_dir_mode,
+                pattern: '*{.tsv,txt}'
+            ],
+            [
+                path: { "${params.outdir}/results" },
+                mode: params.publish_dir_mode,
+                pattern: '*{.sam}',
+                enabled: params.keep_files,
+            ],
+            [
+                path: { "${params.outdir}/results" },
+                mode: params.publish_dir_mode,
+                pattern: '*{.fa}',
+                enabled: params.output_unclassified
+            ],
+
+        ]
+
+        ext.args = [
+            "--type ${params.seqtype}",
+            "--db ${params.db}",
+            "--output-dir ./",
+            "--min-abundance ${params.min_abundance}",
+            "--N ${params.minimap_max_alignments}",
+            "--K ${params.minibatch_size}",
+            "--keep-counts",
+            params.keep_read_assignments ? "--keep-read-assignments" : "",
+            params.keep_files ? "--keep-files" : "",
+            params.output_unclassified ? "--output-unclassified" : "",
+        ].join(' ')                                                           // Join converts the list here to a string.
+        ext.prefix = { "${meta.id}" }                                         // A closure can be used to access variables defined in the script
+
+    }
+
     withName: SIDLE_DBFILT {
         ext.args = { params.sidle_ref_taxonomy.startsWith("greengenes") ? '--p-num-degenerates 3' : '--p-num-degenerates 5' } // 3 for greengenes, 5 for SILVA 128
         ext.args2 = { params.sidle_ref_taxonomy.startsWith("greengenes") ? '--p-exclude "p__;,k__;,mitochondria,chloroplast" --p-mode contains' : '--p-exclude "mitochondria,chloroplast" --p-mode contains' } // "p__;,k__;" for greengenes

diff --git a/modules/local/emu_abundance.nf b/modules/local/emu_abundance.nf
@@ -0,0 +1,40 @@
+process EMU_ABUNDANCE {
+    debug true
+    tag "$meta.id"
+    label 'process_high'
+
+    conda "bioconda::emu=3.4.4"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/emu:3.4.4--hdfd78af_1':
+        'quay.io/biocontainers/emu:3.4.4--hdfd78af_1' }"
+
+    input:
+    tuple val(meta), path(reads)
+
+    output:
+    tuple val(meta), path("*abundance.tsv"), emit: report
+    tuple val(meta), path("*read-assignment-distributions.tsv"), emit: assignment_report, optional:true
+    path "versions.yml"           , emit: versions
+    tuple val(meta), path("*.sam"), emit: samfile, optional:true
+    tuple val(meta), path("*.fa"), emit: unclassified_fa , optional:true
+
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    emu \\
+        abundance \\
+        $args \\
+        --threads $task.cpus \\
+        $reads
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        emu: \$(echo \$(emu --version 2>&1) | sed 's/^.*emu //; s/Using.*\$//' )
+    END_VERSIONS
+    """
+}
diff --git a/nextflow.config b/nextflow.config
@@ -17,6 +17,7 @@ params {
     extension                  = "/*_R{1,2}_001.fastq.gz"
     pacbio                     = false
     iontorrent                 = false
+    nanopore                   = false
     FW_primer                  = null
     RV_primer                  = null
     classifier                 = null
@@ -127,6 +128,19 @@ params {
     sidle_ref_tax_custom     = null
     sidle_ref_tree_custom    = null
 
+
+    // emu parameters
+    db                         = '${projectDir}/assets/emu_database'
+
+    reads                      = null
+    seqtype                    = "map-ont"
+    min_abundance              = 0.0001
+    minimap_max_alignments     = 50
+    minibatch_size             = 500000000
+    keep_read_assignments      = true
+    keep_files                 = false
+    output_unclassified        = true
+
     // MultiQC options
     multiqc_config             = null
     multiqc_title              = null

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -82,7 +82,9 @@
                     "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
                 }
             },
-            "required": ["outdir"],
+            "required": [
+                "outdir"
+            ],
             "fa_icon": "fas fa-terminal"
         },
         "sequencing_input": {
@@ -259,7 +261,11 @@
                     "default": "independent",
                     "help_text": "If samples are treated independent (lowest sensitivity and lowest resources), pooled (highest sensitivity and resources) or pseudo-pooled (balance between required resources and sensitivity).",
                     "description": "Mode of sample inference: \"independent\", \"pooled\" or \"pseudo\"",
-                    "enum": ["independent", "pooled", "pseudo"]
+                    "enum": [
+                        "independent",
+                        "pooled",
+                        "pseudo"
+                    ]
                 },
                 "concatenate_reads": {
                     "type": "boolean",
@@ -438,7 +444,10 @@
                     "type": "string",
                     "description": "Method used for alignment, \"hmmer\" or \"mafft\"",
                     "default": "hmmer",
-                    "enum": ["hmmer", "mafft"]
+                    "enum": [
+                        "hmmer",
+                        "mafft"
+                    ]
                 },
                 "pplace_taxonomy": {
                     "type": "string",
@@ -454,7 +463,13 @@
                     "type": "string",
                     "help_text": "Choose any of the supported databases, and optionally also specify the version. Database and version are separated by an equal sign (`=`, e.g. `silva=138`) . This will download the desired database and initiate taxonomic classification with QIIME2 and the chosen database.\n\nIf both, `--dada_ref_taxonomy` and `--qiime_ref_taxonomy` are used, DADA2 classification will be used for downstream analysis.\n\nThe following databases are supported:\n- SILVA ribosomal RNA gene database project - 16S rRNA\n- UNITE - eukaryotic nuclear ribosomal ITS region - ITS\n- Greengenes (only testing!)\n\nGenerally, using `silva`, `unite-fungi`, or `unite-alleuk` will select the most recent supported version. For testing purposes, the tiny database `greengenes85` (dereplicated at 85% sequence similarity) is available. For details on what values are valid, please either use an invalid value such as `x` (causing the pipeline to send an error message with all valid values) or see `conf/ref_databases.config`.",
                     "description": "Name of supported database, and optionally also version number",
-                    "enum": ["silva=138", "silva", "greengenes85", "greengenes2", "greengenes2=2022.10"]
+                    "enum": [
+                        "silva=138",
+                        "silva",
+                        "greengenes85",
+                        "greengenes2",
+                        "greengenes2=2022.10"
+                    ]
                 },
                 "qiime_ref_tax_custom": {
                     "type": "string",
@@ -529,7 +544,12 @@
                     "help_text": "If data is long read ITS sequences, that need to be cut to ITS region (full ITS, only ITS1, or only ITS2) for taxonomy assignment.",
                     "description": "Part of ITS region to use for taxonomy assignment: \"full\", \"its1\", or \"its2\"",
                     "default": "none",
-                    "enum": ["none", "full", "its1", "its2"]
+                    "enum": [
+                        "none",
+                        "full",
+                        "its1",
+                        "its2"
+                    ]
                 },
                 "its_partial": {
                     "type": "integer",
@@ -549,7 +569,13 @@
                     "type": "string",
                     "help_text": "",
                     "description": "Name of supported database, and optionally also version number",
-                    "enum": ["silva", "silva=128", "greengenes", "greengenes=13_8", "greengenes88"]
+                    "enum": [
+                        "silva",
+                        "silva=128",
+                        "greengenes",
+                        "greengenes=13_8",
+                        "greengenes88"
+                    ]
                 },
                 "sidle_ref_tax_custom": {
                     "type": "string",
@@ -681,7 +707,7 @@
                 },
                 "ancombc_effect_size": {
                     "type": "number",
-                    "default": 1,
+                    "default": 1.0,
                     "minimum": 0,
                     "description": "Effect size threshold for differential abundance barplot for `--ancombc` and `--ancombc_formula`",
                     "fa_icon": "fas fa-greater-than-equal"
@@ -822,7 +848,14 @@
                     "description": "Method used to save pipeline results to output directory.",
                     "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
                     "fa_icon": "fas fa-copy",
-                    "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
+                    "enum": [
+                        "symlink",
+                        "rellink",
+                        "link",
+                        "copy",
+                        "copyNoFollow",
+                        "move"
+                    ],
                     "hidden": true
                 },
                 "email_on_fail": {
@@ -995,5 +1028,44 @@
         {
             "$ref": "#/$defs/institutional_config_options"
         }
-    ]
-}
+    ],
+    "properties": {
+        "nanopore": {
+            "type": "boolean"
+        },
+        "db": {
+            "type": "string",
+            "default": "${projectDir}/assets/emu_database"
+        },
+        "seqtype": {
+            "type": "string",
+            "default": "map-ont"
+        },
+        "min_abundance": {
+            "type": "number",
+            "default": 0.0001
+        },
+        "minimap_max_alignments": {
+            "type": "integer",
+            "default": 50
+        },
+        "minibatch_size": {
+            "type": "integer",
+            "default": 500000000
+        },
+        "keep_read_assignments": {
+            "type": "boolean",
+            "default": true
+        },
+        "keep_files": {
+            "type": "boolean"
+        },
+        "output_unclassified": {
+            "type": "boolean",
+            "default": true
+        },
+        "reads": {
+            "type": "string"
+        }
+    }
+}
diff --git a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf
@@ -151,7 +151,7 @@ def validateInputParameters() {
         error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for primer trimming. If primer trimming is not needed, use `--skip_cutadapt`.")
     }
 
-    if ( params.pacbio || params.iontorrent || params.single_end ) {
+    if ( params.pacbio || params.iontorrent || params.nanopore || params.single_end ) {
         if (params.trunclenr) { log.warn "Unused parameter: `--trunclenr` is ignored because the data is single end." }
     } else if (params.trunclenf && !params.trunclenr) {
         error("Invalid command: `--trunclenf` is set, but `--trunclenr` is not. Either both parameters `--trunclenf` and `--trunclenr` must be set or none.")
@@ -234,8 +234,8 @@ def validateInputParameters() {
         "rdp","rdp=18",
         "sbdi-gtdb","sbdi-gtdb=R09-RS220-1","sbdi-gtdb=R08-RS214-1","sbdi-gtdb=R07-RS207-1",
         "silva","silva=138","silva=132",
-        "unite-fungi","unite-fungi=10.0","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2",
-        "unite-alleuk","unite-alleuk=10.0","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"
+        "unite-fungi","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2",
+        "unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"
     ]
     if (params.sbdiexport){
         if (params.sintax_ref_taxonomy ) {