From 1fa79ca8af4a24542e393ec2055264a17bce5b37 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Tue, 23 Jul 2024 14:05:15 +0200 Subject: [PATCH 01/12] Add sbdi-gtdb R09-RS220 --- subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf index 52da55d4..fe810f7d 100644 --- a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf @@ -234,8 +234,8 @@ def validateInputParameters() { "rdp","rdp=18", "sbdi-gtdb","sbdi-gtdb=R09-RS220-1","sbdi-gtdb=R08-RS214-1","sbdi-gtdb=R07-RS207-1", "silva","silva=138","silva=132", - "unite-fungi","unite-fungi=10.0","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2", - "unite-alleuk","unite-alleuk=10.0","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2" + "unite-fungi","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2", + "unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2" ] if (params.sbdiexport){ if (params.sintax_ref_taxonomy ) { From 068c55b96a640070fb8f7d4175312457e9e6d34d Mon Sep 17 00:00:00 2001 From: Samuel Lampa Date: Tue, 28 Jan 2025 16:36:06 +0100 Subject: [PATCH 02/12] Adapt base config for local laptop --- conf/base.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/base.config b/conf/base.config index c052a5bd..3b26435e 100644 --- a/conf/base.config +++ b/conf/base.config @@ -37,12 +37,12 @@ process { } withLabel:process_medium { cpus = { 6 * task.attempt } - memory = { 36.GB * task.attempt } + memory = { 30.GB * task.attempt } time = { 12.h * task.attempt } } withLabel:process_high { - cpus = { 20 * task.attempt } - memory = { 120.GB* task.attempt } + cpus = { 10 * task.attempt } + memory = { 30.GB * task.attempt } time = { 36.h * task.attempt } } withLabel:process_long { From c12d52263bc137584f65a9ad92dbae97f5135613 Mon Sep 17 00:00:00 2001 From: Samuel Lampa Date: Tue, 28 Jan 2025 17:02:10 +0100 Subject: [PATCH 03/12] Add Emu to main AmpliSeq workflow --- workflows/ampliseq.nf | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 7b9168b4..0bdc81d0 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -104,7 +104,7 @@ ch_report_abstract = params.report_abstract ? Channel.fromPath(params.report_abs // Set non-params Variables single_end = params.single_end -if (params.pacbio || params.iontorrent) { +if (params.pacbio || params.iontorrent || params.nanopore) { single_end = true } @@ -220,6 +220,7 @@ include { SUMMARY_REPORT } from '../modules/local/summary_report' include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_PPLACE } from '../modules/local/phyloseq_intax' include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_QIIME2 } from '../modules/local/phyloseq_intax' include { FILTER_CLUSTERS } from '../modules/local/filter_clusters' +include { EMU_ABUNDANCE } from '../modules/local/emu_abundance' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules @@ -367,6 +368,10 @@ workflow AMPLISEQ { ch_trimmed_reads = RENAME_RAW_DATA_FILES.out.fastq } + if (params.nanopore){ + EMU_ABUNDANCE(ch_trimmed_reads) + } + // // SUBWORKFLOW: Read preprocessing & QC plotting with DADA2 // From ffe4f8bca9cd7a36f269ea94238ef7d2b10acf7e Mon Sep 17 00:00:00 2001 From: Samuel Lampa Date: Tue, 28 Jan 2025 17:03:42 +0100 Subject: [PATCH 04/12] Add changes for Emu to nextflow.config --- nextflow.config | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/nextflow.config b/nextflow.config index 97d006e7..09600ba2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,6 +17,7 @@ params { extension = "/*_R{1,2}_001.fastq.gz" pacbio = false iontorrent = false + nanopore = false FW_primer = null RV_primer = null classifier = null @@ -127,6 +128,19 @@ params { sidle_ref_tax_custom = null sidle_ref_tree_custom = null + + // emu parameters + db = '/home/proj/development/microbial/16s/databases/emu_database' + + reads = null + seqtype = "map-ont" + min_abundance = 0.0001 + minimap_max_alignments = 50 + minibatch_size = 500000000 + keep_read_assignments = true + keep_files = false + output_unclassified = true + // MultiQC options multiqc_config = null multiqc_title = null From 731caef1c58e6a5deaec080a7b4086a8786308aa Mon Sep 17 00:00:00 2001 From: Samuel Lampa Date: Tue, 28 Jan 2025 17:04:10 +0100 Subject: [PATCH 05/12] Add changes for Emu to modules.config --- conf/modules.config | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index 5f940e57..7e26c3b4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -279,6 +279,44 @@ process { ] } + withName: EMU_ABUNDANCE { + publishDir = [ + [ + path: { "${params.outdir}/results" }, + mode: params.publish_dir_mode, + pattern: '*{.tsv,txt}' + ], + [ + path: { "${params.outdir}/results" }, + mode: params.publish_dir_mode, + pattern: '*{.sam}', + enabled: params.keep_files, + ], + [ + path: { "${params.outdir}/results" }, + mode: params.publish_dir_mode, + pattern: '*{.fa}', + enabled: params.output_unclassified + ], + + ] + + ext.args = [ + "--type ${params.seqtype}", + "--db ${params.db}", + "--output-dir ./", + "--min-abundance ${params.min_abundance}", + "--N ${params.minimap_max_alignments}", + "--K ${params.minibatch_size}", + "--keep-counts", + params.keep_read_assignments ? "--keep-read-assignments" : "", + params.keep_files ? "--keep-files" : "", + params.output_unclassified ? "--output-unclassified" : "", + ].join(' ') // Join converts the list here to a string. + ext.prefix = { "${meta.id}" } // A closure can be used to access variables defined in the script + + } + withName: SIDLE_DBFILT { ext.args = { params.sidle_ref_taxonomy.startsWith("greengenes") ? '--p-num-degenerates 3' : '--p-num-degenerates 5' } // 3 for greengenes, 5 for SILVA 128 ext.args2 = { params.sidle_ref_taxonomy.startsWith("greengenes") ? '--p-exclude "p__;,k__;,mitochondria,chloroplast" --p-mode contains' : '--p-exclude "mitochondria,chloroplast" --p-mode contains' } // "p__;,k__;" for greengenes From 7f8fe12aabccc2ad344c6c57665d0da4fb364e59 Mon Sep 17 00:00:00 2001 From: Samuel Lampa Date: Tue, 28 Jan 2025 17:05:38 +0100 Subject: [PATCH 06/12] Add changes for Emu to nextflow_schema.json --- nextflow_schema.json | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index e0b263b2..fa5f9056 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -704,7 +704,7 @@ "properties": { "report_template": { "type": "string", - "default": "${projectDir}/assets/report_template.Rmd", + "default": "/home/proj/development/microbial/16s/ampliseq/assets/report_template.Rmd", "description": "Path to Markdown file (Rmd)" }, "report_css": { @@ -995,5 +995,41 @@ { "$ref": "#/$defs/institutional_config_options" } - ] -} + ], + "properties": { + "nanopore": { + "type": "boolean" + }, + "db": { + "type": "string", + "default": "/home/proj/development/microbial/16s/databases/emu_database" + }, + "seqtype": { + "type": "string", + "default": "map-ont" + }, + "min_abundance": { + "type": "number", + "default": 0.0001 + }, + "minimap_max_alignments": { + "type": "integer", + "default": 50 + }, + "minibatch_size": { + "type": "integer", + "default": 500000000 + }, + "keep_read_assignments": { + "type": "boolean", + "default": true + }, + "keep_files": { + "type": "boolean" + }, + "output_unclassified": { + "type": "boolean", + "default": true + } + } +} \ No newline at end of file From d1124228b7019319e27786f6e5131ae10716e3d4 Mon Sep 17 00:00:00 2001 From: Samuel Lampa Date: Tue, 28 Jan 2025 17:06:22 +0100 Subject: [PATCH 07/12] Add changes for Emu to utils_nfcore_ampliseq_pipeline/main.nf --- subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf index fe810f7d..cd8c9c6f 100644 --- a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf @@ -151,7 +151,7 @@ def validateInputParameters() { error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for primer trimming. If primer trimming is not needed, use `--skip_cutadapt`.") } - if ( params.pacbio || params.iontorrent || params.single_end ) { + if ( params.pacbio || params.iontorrent || params.nanopore || params.single_end ) { if (params.trunclenr) { log.warn "Unused parameter: `--trunclenr` is ignored because the data is single end." } } else if (params.trunclenf && !params.trunclenr) { error("Invalid command: `--trunclenf` is set, but `--trunclenr` is not. Either both parameters `--trunclenf` and `--trunclenr` must be set or none.") From 10846287763e4fe63e5bf23071584595cd395f06 Mon Sep 17 00:00:00 2001 From: Samuel Lampa Date: Wed, 29 Jan 2025 10:38:17 +0100 Subject: [PATCH 08/12] Add the Emu module createdy by Frans Wallin (@fwa93) --- modules/local/emu_abundance.nf | 40 ++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 modules/local/emu_abundance.nf diff --git a/modules/local/emu_abundance.nf b/modules/local/emu_abundance.nf new file mode 100644 index 00000000..de86bd3d --- /dev/null +++ b/modules/local/emu_abundance.nf @@ -0,0 +1,40 @@ +process EMU_ABUNDANCE { + debug true + tag "$meta.id" + label 'process_high' + + conda "bioconda::emu=3.4.4" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/emu:3.4.4--hdfd78af_1': + 'quay.io/biocontainers/emu:3.4.4--hdfd78af_1' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*abundance.tsv"), emit: report + tuple val(meta), path("*read-assignment-distributions.tsv"), emit: assignment_report, optional:true + path "versions.yml" , emit: versions + tuple val(meta), path("*.sam"), emit: samfile, optional:true + tuple val(meta), path("*.fa"), emit: unclassified_fa , optional:true + + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + emu \\ + abundance \\ + $args \\ + --threads $task.cpus \\ + $reads + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + emu: \$(echo \$(emu --version 2>&1) | sed 's/^.*emu //; s/Using.*\$//' ) + END_VERSIONS + """ +} From bdb8782584491e68bd63d06945adde55b635e0e8 Mon Sep 17 00:00:00 2001 From: Samuel Lampa Date: Wed, 29 Jan 2025 10:43:01 +0100 Subject: [PATCH 09/12] Turn off stuff not relevant when analyzing Nanopore data --- workflows/ampliseq.nf | 953 +++++++++++++++++++++--------------------- 1 file changed, 479 insertions(+), 474 deletions(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 0bdc81d0..ebe99be8 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -375,520 +375,524 @@ workflow AMPLISEQ { // // SUBWORKFLOW: Read preprocessing & QC plotting with DADA2 // - DADA2_PREPROCESSING ( - ch_trimmed_reads, - single_end, - find_truncation_values, - trunclenf, - trunclenr - ).reads.set { ch_filt_reads } - ch_versions = ch_versions.mix(DADA2_PREPROCESSING.out.versions) - // - // MODULES: ASV generation with DADA2 - // + // START of Non-Nanopore section + if (!params.nanopore) { + DADA2_PREPROCESSING ( + ch_trimmed_reads, + single_end, + find_truncation_values, + trunclenf, + trunclenr + ).reads.set { ch_filt_reads } + ch_versions = ch_versions.mix(DADA2_PREPROCESSING.out.versions) + + // + // MODULES: ASV generation with DADA2 + // + //run error model + if ( !params.illumina_novaseq ) { + DADA2_ERR ( ch_filt_reads ) + ch_errormodel = DADA2_ERR.out.errormodel + ch_versions = ch_versions.mix(DADA2_ERR.out.versions) + } else { + DADA2_ERR ( ch_filt_reads ) + NOVASEQ_ERR ( DADA2_ERR.out.errormodel ) + ch_errormodel = NOVASEQ_ERR.out.errormodel + ch_versions = ch_versions.mix(DADA2_ERR.out.versions) + } - //run error model - if ( !params.illumina_novaseq ) { - DADA2_ERR ( ch_filt_reads ) - ch_errormodel = DADA2_ERR.out.errormodel - ch_versions = ch_versions.mix(DADA2_ERR.out.versions) - } else { - DADA2_ERR ( ch_filt_reads ) - NOVASEQ_ERR ( DADA2_ERR.out.errormodel ) - ch_errormodel = NOVASEQ_ERR.out.errormodel - ch_versions = ch_versions.mix(DADA2_ERR.out.versions) - } + //group by meta + ch_filt_reads + .join( ch_errormodel ) + .set { ch_derep_errormodel } + DADA2_DENOISING ( ch_derep_errormodel.dump(tag: 'into_denoising') ) + ch_versions = ch_versions.mix(DADA2_DENOISING.out.versions) + + DADA2_RMCHIMERA ( DADA2_DENOISING.out.seqtab ) + ch_versions = ch_versions.mix(DADA2_RMCHIMERA.out.versions) + + //group by sequencing run & group by meta + DADA2_PREPROCESSING.out.logs + .join( DADA2_DENOISING.out.denoised ) + .join( DADA2_DENOISING.out.mergers ) + .join( DADA2_RMCHIMERA.out.rds ) + .set { ch_track_numbers } + DADA2_STATS ( ch_track_numbers ) + ch_versions = ch_versions.mix(DADA2_STATS.out.versions) + + //merge if several runs, otherwise just publish + DADA2_MERGE ( + DADA2_STATS.out.stats.map { meta, stats -> stats }.collect(), + DADA2_RMCHIMERA.out.rds.map { meta, rds -> rds }.collect() ) + ch_versions = ch_versions.mix(DADA2_MERGE.out.versions) + + //merge cutadapt_summary and dada_stats files + if (!params.skip_cutadapt) { + MERGE_STATS_STD (CUTADAPT_WORKFLOW.out.summary, DADA2_MERGE.out.dada2stats) + ch_stats = MERGE_STATS_STD.out.tsv + ch_versions = ch_versions.mix(MERGE_STATS_STD.out.versions) + } else { + ch_stats = DADA2_MERGE.out.dada2stats + } - //group by meta - ch_filt_reads - .join( ch_errormodel ) - .set { ch_derep_errormodel } - DADA2_DENOISING ( ch_derep_errormodel.dump(tag: 'into_denoising') ) - ch_versions = ch_versions.mix(DADA2_DENOISING.out.versions) - - DADA2_RMCHIMERA ( DADA2_DENOISING.out.seqtab ) - ch_versions = ch_versions.mix(DADA2_RMCHIMERA.out.versions) - - //group by sequencing run & group by meta - DADA2_PREPROCESSING.out.logs - .join( DADA2_DENOISING.out.denoised ) - .join( DADA2_DENOISING.out.mergers ) - .join( DADA2_RMCHIMERA.out.rds ) - .set { ch_track_numbers } - DADA2_STATS ( ch_track_numbers ) - ch_versions = ch_versions.mix(DADA2_STATS.out.versions) - - //merge if several runs, otherwise just publish - DADA2_MERGE ( - DADA2_STATS.out.stats.map { meta, stats -> stats }.collect(), - DADA2_RMCHIMERA.out.rds.map { meta, rds -> rds }.collect() ) - ch_versions = ch_versions.mix(DADA2_MERGE.out.versions) - - //merge cutadapt_summary and dada_stats files - if (!params.skip_cutadapt) { - MERGE_STATS_STD (CUTADAPT_WORKFLOW.out.summary, DADA2_MERGE.out.dada2stats) - ch_stats = MERGE_STATS_STD.out.tsv - ch_versions = ch_versions.mix(MERGE_STATS_STD.out.versions) - } else { - ch_stats = DADA2_MERGE.out.dada2stats - } + // + // SUBWORKFLOW / MODULES : Taxonomic classification with DADA2, SINTAX and/or QIIME2 + // + if ( params.multiregion ) { + // separate sequences and abundances when several regions + DADA2_SPLITREGIONS ( + //DADA2_DENOISING per run & region -> per run + ch_reads + .map { + info, reads -> + def meta = info.subMap( info.keySet() - 'id' - 'sample' - 'run' ) // All of 'id', 'sample', 'run' must be removed to merge by region + def inf2 = info.subMap( 'id', 'sample' )// May not contain false,true,null; only 'id', 'sample' required + [ meta, inf2 ] } + .groupTuple(by: 0 ).dump(tag:'DADA2_SPLITREGIONS:meta'), + DADA2_MERGE.out.dada2asv ) + ch_versions = ch_versions.mix(DADA2_SPLITREGIONS.out.versions) + + // run q2-sidle + SIDLE_WF ( + DADA2_SPLITREGIONS.out.for_sidle, + ch_sidle_ref_taxonomy.collect(), + val_sidle_ref_taxonomy, + ch_sidle_ref_taxonomy_tree + ) + ch_versions = ch_versions.mix(SIDLE_WF.out.versions) + + // forward results to downstream analysis if multi region + ch_dada2_asv = SIDLE_WF.out.table_tsv + ch_dada2_fasta = Channel.empty() + // Any ASV post-clustering param is not allowed: + // - solved by '!params.multiregion' for vsearch_cluster, filter_ssu, min_len_asv, max_len_asv, filter_codons + // - solved in 'lib/WorkflowAmpliseq.groovy': cut_its + // Must have params: + // - solved by '!params.multiregion' for skip_report + // - solved in 'lib/WorkflowAmpliseq.groovy': skip_dada_taxonomy + } else { + // forward results to downstream analysis if single region + ch_dada2_fasta = DADA2_MERGE.out.fasta + ch_dada2_asv = DADA2_MERGE.out.asv + } - // - // SUBWORKFLOW / MODULES : Taxonomic classification with DADA2, SINTAX and/or QIIME2 - // - if ( params.multiregion ) { - // separate sequences and abundances when several regions - DADA2_SPLITREGIONS ( - //DADA2_DENOISING per run & region -> per run - ch_reads + // + // MODULE : ASV post-clustering with VSEARCH + // + if (params.vsearch_cluster && !params.multiregion) { + ch_fasta_for_clustering = ch_dada2_fasta .map { - info, reads -> - def meta = info.subMap( info.keySet() - 'id' - 'sample' - 'run' ) // All of 'id', 'sample', 'run' must be removed to merge by region - def inf2 = info.subMap( 'id', 'sample' )// May not contain false,true,null; only 'id', 'sample' required - [ meta, inf2 ] } - .groupTuple(by: 0 ).dump(tag:'DADA2_SPLITREGIONS:meta'), - DADA2_MERGE.out.dada2asv ) - ch_versions = ch_versions.mix(DADA2_SPLITREGIONS.out.versions) - - // run q2-sidle - SIDLE_WF ( - DADA2_SPLITREGIONS.out.for_sidle, - ch_sidle_ref_taxonomy.collect(), - val_sidle_ref_taxonomy, - ch_sidle_ref_taxonomy_tree - ) - ch_versions = ch_versions.mix(SIDLE_WF.out.versions) - - // forward results to downstream analysis if multi region - ch_dada2_asv = SIDLE_WF.out.table_tsv - ch_dada2_fasta = Channel.empty() - // Any ASV post-clustering param is not allowed: - // - solved by '!params.multiregion' for vsearch_cluster, filter_ssu, min_len_asv, max_len_asv, filter_codons - // - solved in 'lib/WorkflowAmpliseq.groovy': cut_its - // Must have params: - // - solved by '!params.multiregion' for skip_report - // - solved in 'lib/WorkflowAmpliseq.groovy': skip_dada_taxonomy - } else { - // forward results to downstream analysis if single region - ch_dada2_fasta = DADA2_MERGE.out.fasta - ch_dada2_asv = DADA2_MERGE.out.asv - } - - // - // MODULE : ASV post-clustering with VSEARCH - // - if (params.vsearch_cluster && !params.multiregion) { - ch_fasta_for_clustering = ch_dada2_fasta - .map { - fasta -> - def meta = [:] - meta.id = "ASV_post_clustering" - [ meta, fasta ] } - VSEARCH_CLUSTER ( ch_fasta_for_clustering ) - ch_versions = ch_versions.mix(VSEARCH_CLUSTER.out.versions) - FILTER_CLUSTERS ( VSEARCH_CLUSTER.out.clusters, ch_dada2_asv ) - ch_versions = ch_versions.mix(FILTER_CLUSTERS.out.versions) - ch_dada2_fasta = FILTER_CLUSTERS.out.fasta - ch_dada2_asv = FILTER_CLUSTERS.out.asv - } + fasta -> + def meta = [:] + meta.id = "ASV_post_clustering" + [ meta, fasta ] } + VSEARCH_CLUSTER ( ch_fasta_for_clustering ) + ch_versions = ch_versions.mix(VSEARCH_CLUSTER.out.versions) + FILTER_CLUSTERS ( VSEARCH_CLUSTER.out.clusters, ch_dada2_asv ) + ch_versions = ch_versions.mix(FILTER_CLUSTERS.out.versions) + ch_dada2_fasta = FILTER_CLUSTERS.out.fasta + ch_dada2_asv = FILTER_CLUSTERS.out.asv + } - // - // Entry for ASV fasta files via "--input_fasta" - // - if ( params.input_fasta ) { - FORMAT_FASTAINPUT( ch_input_fasta ) - ch_unfiltered_fasta = FORMAT_FASTAINPUT.out.fasta - ch_versions = ch_versions.mix(FORMAT_FASTAINPUT.out.versions) - } else { - ch_unfiltered_fasta = ch_dada2_fasta - } + // + // Entry for ASV fasta files via "--input_fasta" + // + if ( params.input_fasta ) { + FORMAT_FASTAINPUT( ch_input_fasta ) + ch_unfiltered_fasta = FORMAT_FASTAINPUT.out.fasta + ch_versions = ch_versions.mix(FORMAT_FASTAINPUT.out.versions) + } else { + ch_unfiltered_fasta = ch_dada2_fasta + } - // - // Modules : Filter rRNA - // - if ( !params.skip_barrnap && params.filter_ssu && !params.multiregion ) { - BARRNAP ( ch_unfiltered_fasta ) - ch_versions = ch_versions.mix(BARRNAP.out.versions) - BARRNAPSUMMARY ( BARRNAP.out.gff.collect() ) - ch_versions = ch_versions.mix(BARRNAPSUMMARY.out.versions) - BARRNAPSUMMARY.out.warning.subscribe { - if ( it.baseName.toString().startsWith("WARNING") ) { - error("Barrnap could not identify any rRNA in the ASV sequences! This will result in all sequences being removed with SSU filtering.") + // + // Modules : Filter rRNA + // + if ( !params.skip_barrnap && params.filter_ssu && !params.multiregion ) { + BARRNAP ( ch_unfiltered_fasta ) + ch_versions = ch_versions.mix(BARRNAP.out.versions) + BARRNAPSUMMARY ( BARRNAP.out.gff.collect() ) + ch_versions = ch_versions.mix(BARRNAPSUMMARY.out.versions) + BARRNAPSUMMARY.out.warning.subscribe { + if ( it.baseName.toString().startsWith("WARNING") ) { + error("Barrnap could not identify any rRNA in the ASV sequences! This will result in all sequences being removed with SSU filtering.") + } } + ch_barrnapsummary = BARRNAPSUMMARY.out.summary + FILTER_SSU ( ch_unfiltered_fasta, ch_dada2_asv.ifEmpty( [] ), BARRNAPSUMMARY.out.summary ) + ch_versions = ch_versions.mix(FILTER_SSU.out.versions) + MERGE_STATS_FILTERSSU ( ch_stats, FILTER_SSU.out.stats ) + ch_versions = ch_versions.mix(MERGE_STATS_FILTERSSU.out.versions) + ch_stats = MERGE_STATS_FILTERSSU.out.tsv + ch_dada2_fasta = FILTER_SSU.out.fasta + ch_dada2_asv = FILTER_SSU.out.asv + } else if ( !params.skip_barrnap && !params.filter_ssu && !params.multiregion ) { + BARRNAP ( ch_unfiltered_fasta ) + BARRNAPSUMMARY ( BARRNAP.out.gff.collect() ) + BARRNAPSUMMARY.out.warning.subscribe { if ( it.baseName.toString().startsWith("WARNING") ) log.warn "Barrnap could not identify any rRNA in the ASV sequences. We recommended to use the --skip_barrnap option for these sequences." } + ch_barrnapsummary = BARRNAPSUMMARY.out.summary + ch_versions = ch_versions.mix(BARRNAP.out.versions) + ch_versions = ch_versions.mix(BARRNAPSUMMARY.out.versions) + ch_dada2_fasta = ch_unfiltered_fasta + } else { + ch_barrnapsummary = Channel.empty() + ch_dada2_fasta = ch_unfiltered_fasta } - ch_barrnapsummary = BARRNAPSUMMARY.out.summary - FILTER_SSU ( ch_unfiltered_fasta, ch_dada2_asv.ifEmpty( [] ), BARRNAPSUMMARY.out.summary ) - ch_versions = ch_versions.mix(FILTER_SSU.out.versions) - MERGE_STATS_FILTERSSU ( ch_stats, FILTER_SSU.out.stats ) - ch_versions = ch_versions.mix(MERGE_STATS_FILTERSSU.out.versions) - ch_stats = MERGE_STATS_FILTERSSU.out.tsv - ch_dada2_fasta = FILTER_SSU.out.fasta - ch_dada2_asv = FILTER_SSU.out.asv - } else if ( !params.skip_barrnap && !params.filter_ssu && !params.multiregion ) { - BARRNAP ( ch_unfiltered_fasta ) - BARRNAPSUMMARY ( BARRNAP.out.gff.collect() ) - BARRNAPSUMMARY.out.warning.subscribe { if ( it.baseName.toString().startsWith("WARNING") ) log.warn "Barrnap could not identify any rRNA in the ASV sequences. We recommended to use the --skip_barrnap option for these sequences." } - ch_barrnapsummary = BARRNAPSUMMARY.out.summary - ch_versions = ch_versions.mix(BARRNAP.out.versions) - ch_versions = ch_versions.mix(BARRNAPSUMMARY.out.versions) - ch_dada2_fasta = ch_unfiltered_fasta - } else { - ch_barrnapsummary = Channel.empty() - ch_dada2_fasta = ch_unfiltered_fasta - } - - // - // Modules : amplicon length filtering - // - if ( (params.min_len_asv || params.max_len_asv) && !params.multiregion ) { - FILTER_LEN_ASV ( ch_dada2_fasta, ch_dada2_asv.ifEmpty( [] ) ) - ch_versions = ch_versions.mix(FILTER_LEN_ASV.out.versions) - MERGE_STATS_FILTERLENASV ( ch_stats, FILTER_LEN_ASV.out.stats ) - ch_stats = MERGE_STATS_FILTERLENASV.out.tsv - ch_dada2_fasta = FILTER_LEN_ASV.out.fasta - ch_dada2_asv = FILTER_LEN_ASV.out.asv - // Make sure that not all sequences were removed - ch_dada2_fasta.subscribe { if (it.countLines() == 0) error("ASV length filtering activated by '--min_len_asv' or '--max_len_asv' removed all ASVs, please adjust settings.") } - } - - // - // Modules : Filtering based on codons in an open reading frame - // - if ( params.filter_codons && !params.multiregion ) { - FILTER_CODONS ( ch_dada2_fasta, ch_dada2_asv.ifEmpty( [] ) ) - ch_versions = ch_versions.mix(FILTER_CODONS.out.versions) - MERGE_STATS_CODONS( ch_stats, FILTER_CODONS.out.stats ) - ch_versions = ch_versions.mix(MERGE_STATS_CODONS.out.versions) - ch_stats = MERGE_STATS_CODONS.out.tsv - ch_dada2_fasta = FILTER_CODONS.out.fasta - ch_dada2_asv = FILTER_CODONS.out.asv - // Make sure that not all sequences were removed - ch_dada2_fasta.subscribe { if (it.countLines() == 0) error("ASV codon filtering activated by '--filter_codons' removed all ASVs, please adjust settings.") } - } - // - // Modules : ITSx - cut out ITS region if long ITS reads - // - ch_full_fasta = ch_dada2_fasta - if (params.cut_its == "none") { - ch_fasta = ch_dada2_fasta - } else { - if (params.cut_its == "full") { - outfile = params.its_partial ? "ASV_ITS_seqs.full_and_partial.fasta" : "ASV_ITS_seqs.full.fasta" - } - else if (params.cut_its == "its1") { - outfile = params.its_partial ? "ASV_ITS_seqs.ITS1.full_and_partial.fasta" : "ASV_ITS_seqs.ITS1.fasta" + // + // Modules : amplicon length filtering + // + if ( (params.min_len_asv || params.max_len_asv) && !params.multiregion ) { + FILTER_LEN_ASV ( ch_dada2_fasta, ch_dada2_asv.ifEmpty( [] ) ) + ch_versions = ch_versions.mix(FILTER_LEN_ASV.out.versions) + MERGE_STATS_FILTERLENASV ( ch_stats, FILTER_LEN_ASV.out.stats ) + ch_stats = MERGE_STATS_FILTERLENASV.out.tsv + ch_dada2_fasta = FILTER_LEN_ASV.out.fasta + ch_dada2_asv = FILTER_LEN_ASV.out.asv + // Make sure that not all sequences were removed + ch_dada2_fasta.subscribe { if (it.countLines() == 0) error("ASV length filtering activated by '--min_len_asv' or '--max_len_asv' removed all ASVs, please adjust settings.") } } - else if (params.cut_its == "its2") { - outfile = params.its_partial ? "ASV_ITS_seqs.ITS2.full_and_partial.fasta" : "ASV_ITS_seqs.ITS2.fasta" - } - ITSX_CUTASV ( ch_full_fasta, outfile ) - ch_versions = ch_versions.mix(ITSX_CUTASV.out.versions) - FILTER_LEN_ITSX ( ITSX_CUTASV.out.fasta, [] ) - ch_fasta = FILTER_LEN_ITSX.out.fasta - } - // - // SUBWORKFLOW / MODULES : Taxonomic classification with DADA2, SINTAX and/or QIIME2 - // - - //DADA2 - if (!params.skip_taxonomy && !params.skip_dada_taxonomy) { - if (!params.dada_ref_tax_custom) { - //standard ref taxonomy input from conf/ref_databases.config - FORMAT_TAXONOMY ( ch_dada_ref_taxonomy.collect(), val_dada_ref_taxonomy ) - ch_versions = ch_versions.mix(FORMAT_TAXONOMY.out.versions) - ch_assigntax = FORMAT_TAXONOMY.out.assigntax - ch_addspecies = FORMAT_TAXONOMY.out.addspecies + // + // Modules : Filtering based on codons in an open reading frame + // + if ( params.filter_codons && !params.multiregion ) { + FILTER_CODONS ( ch_dada2_fasta, ch_dada2_asv.ifEmpty( [] ) ) + ch_versions = ch_versions.mix(FILTER_CODONS.out.versions) + MERGE_STATS_CODONS( ch_stats, FILTER_CODONS.out.stats ) + ch_versions = ch_versions.mix(MERGE_STATS_CODONS.out.versions) + ch_stats = MERGE_STATS_CODONS.out.tsv + ch_dada2_fasta = FILTER_CODONS.out.fasta + ch_dada2_asv = FILTER_CODONS.out.asv + // Make sure that not all sequences were removed + ch_dada2_fasta.subscribe { if (it.countLines() == 0) error("ASV codon filtering activated by '--filter_codons' removed all ASVs, please adjust settings.") } } - DADA2_TAXONOMY_WF ( - ch_assigntax, - ch_addspecies, - val_dada_ref_taxonomy, - ch_fasta, - ch_full_fasta, - taxlevels - ).tax.set { ch_dada2_tax } - ch_versions = ch_versions.mix(DADA2_TAXONOMY_WF.out.versions) - ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_dada2_tax.map { it = [ "dada2", file(it) ] } ) - } else { - ch_dada2_tax = Channel.empty() - } - //Kraken2 - if (!params.skip_taxonomy && (params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) { - KRAKEN2_TAXONOMY_WF ( - ch_kraken2_ref_taxonomy, - val_kraken2_ref_taxonomy, - ch_fasta, - kraken2_taxlevels - ).qiime2_tsv.set { ch_kraken2_tax } - ch_versions = ch_versions.mix(KRAKEN2_TAXONOMY_WF.out.versions) - ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_kraken2_tax.map { it = [ "kraken2", file(it) ] } ) - } else { - ch_kraken2_tax = Channel.empty() - } + // + // Modules : ITSx - cut out ITS region if long ITS reads + // + ch_full_fasta = ch_dada2_fasta + if (params.cut_its == "none") { + ch_fasta = ch_dada2_fasta + } else { + if (params.cut_its == "full") { + outfile = params.its_partial ? "ASV_ITS_seqs.full_and_partial.fasta" : "ASV_ITS_seqs.full.fasta" + } + else if (params.cut_its == "its1") { + outfile = params.its_partial ? "ASV_ITS_seqs.ITS1.full_and_partial.fasta" : "ASV_ITS_seqs.ITS1.fasta" + } + else if (params.cut_its == "its2") { + outfile = params.its_partial ? "ASV_ITS_seqs.ITS2.full_and_partial.fasta" : "ASV_ITS_seqs.ITS2.fasta" + } + ITSX_CUTASV ( ch_full_fasta, outfile ) + ch_versions = ch_versions.mix(ITSX_CUTASV.out.versions) + FILTER_LEN_ITSX ( ITSX_CUTASV.out.fasta, [] ) + ch_fasta = FILTER_LEN_ITSX.out.fasta + } - // SINTAX - if (!params.skip_taxonomy && params.sintax_ref_taxonomy) { - SINTAX_TAXONOMY_WF ( - ch_sintax_ref_taxonomy.collect(), - val_sintax_ref_taxonomy, - ch_fasta, - ch_full_fasta, - sintax_taxlevels - ).tax.set { ch_sintax_tax } - ch_versions = ch_versions.mix(SINTAX_TAXONOMY_WF.out.versions) - ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_sintax_tax.map { it = [ "sintax", file(it) ] } ) - } else { - ch_sintax_tax = Channel.empty() - } + // + // SUBWORKFLOW / MODULES : Taxonomic classification with DADA2, SINTAX and/or QIIME2 + // + + //DADA2 + if (!params.skip_taxonomy && !params.skip_dada_taxonomy) { + if (!params.dada_ref_tax_custom) { + //standard ref taxonomy input from conf/ref_databases.config + FORMAT_TAXONOMY ( ch_dada_ref_taxonomy.collect(), val_dada_ref_taxonomy ) + ch_versions = ch_versions.mix(FORMAT_TAXONOMY.out.versions) + ch_assigntax = FORMAT_TAXONOMY.out.assigntax + ch_addspecies = FORMAT_TAXONOMY.out.addspecies + } + DADA2_TAXONOMY_WF ( + ch_assigntax, + ch_addspecies, + val_dada_ref_taxonomy, + ch_fasta, + ch_full_fasta, + taxlevels + ).tax.set { ch_dada2_tax } + ch_versions = ch_versions.mix(DADA2_TAXONOMY_WF.out.versions) + ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_dada2_tax.map { it = [ "dada2", file(it) ] } ) + } else { + ch_dada2_tax = Channel.empty() + } - // Phylo placement - if ( params.pplace_tree ) { - ch_pp_data = ch_fasta.map { it -> - [ meta: [ id: params.pplace_name ?: 'user_tree' ], - data: [ - alignmethod: params.pplace_alnmethod ?: 'hmmer', - queryseqfile: it, - refseqfile: file( params.pplace_aln, checkIfExists: true ), - hmmfile: [], - refphylogeny: file( params.pplace_tree, checkIfExists: true ), - model: params.pplace_model, - taxonomy: params.pplace_taxonomy ? file( params.pplace_taxonomy, checkIfExists: true ) : [] - ] ] + //Kraken2 + if (!params.skip_taxonomy && (params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) { + KRAKEN2_TAXONOMY_WF ( + ch_kraken2_ref_taxonomy, + val_kraken2_ref_taxonomy, + ch_fasta, + kraken2_taxlevels + ).qiime2_tsv.set { ch_kraken2_tax } + ch_versions = ch_versions.mix(KRAKEN2_TAXONOMY_WF.out.versions) + ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_kraken2_tax.map { it = [ "kraken2", file(it) ] } ) + } else { + ch_kraken2_tax = Channel.empty() } - FASTA_NEWICK_EPANG_GAPPA ( ch_pp_data ) - ch_versions = ch_versions.mix( FASTA_NEWICK_EPANG_GAPPA.out.versions ) - ch_pplace_tax = FORMAT_PPLACETAX ( FASTA_NEWICK_EPANG_GAPPA.out.taxonomy_per_query ).tsv - ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( PHYLOSEQ_INTAX_PPLACE ( ch_pplace_tax ).tsv.map { it = [ "pplace", file(it) ] } ) - } else { - ch_pplace_tax = Channel.empty() - } - //QIIME2 - if ( run_qiime2_taxonomy ) { - if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { - QIIME2_PREPTAX ( - ch_qiime_ref_taxonomy.collect(), - val_qiime_ref_taxonomy, - params.FW_primer, - params.RV_primer - ) - ch_qiime_classifier = QIIME2_PREPTAX.out.classifier + // SINTAX + if (!params.skip_taxonomy && params.sintax_ref_taxonomy) { + SINTAX_TAXONOMY_WF ( + ch_sintax_ref_taxonomy.collect(), + val_sintax_ref_taxonomy, + ch_fasta, + ch_full_fasta, + sintax_taxlevels + ).tax.set { ch_sintax_tax } + ch_versions = ch_versions.mix(SINTAX_TAXONOMY_WF.out.versions) + ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_sintax_tax.map { it = [ "sintax", file(it) ] } ) + } else { + ch_sintax_tax = Channel.empty() } - QIIME2_TAXONOMY ( - ch_fasta, - ch_qiime_classifier - ) - ch_versions = ch_versions.mix( QIIME2_TAXONOMY.out.versions ) - ch_qiime2_tax = QIIME2_TAXONOMY.out.tsv - ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( PHYLOSEQ_INTAX_QIIME2 ( ch_qiime2_tax ).tsv.map { it = [ "qiime2", file(it) ] } ) - } else { - ch_qiime2_tax = Channel.empty() - } - // - // SUBWORKFLOW / MODULES : Downstream analysis with QIIME2 - // - if ( run_qiime2 ) { - // Import ASV abundance table and sequences into QIIME2 - QIIME2_INASV ( ch_dada2_asv ) - ch_versions = ch_versions.mix( QIIME2_INASV.out.versions ) - QIIME2_INSEQ ( ch_fasta ) - ch_versions = ch_versions.mix( QIIME2_INSEQ.out.versions ) - - // Import phylogenetic tree into QIIME2 + // Phylo placement if ( params.pplace_tree ) { - ch_tree = QIIME2_INTREE ( FASTA_NEWICK_EPANG_GAPPA.out.grafted_phylogeny ).qza - ch_versions = ch_versions.mix( QIIME2_INTREE.out.versions ) - } else if (params.multiregion) { - ch_tree = SIDLE_WF.out.tree_qza - } else { ch_tree = [] } - - // Import taxonomic classification into QIIME2, if available - if ( params.skip_taxonomy ) { - log.info "Skip taxonomy classification" - val_used_taxonomy = "skipped" - ch_tax = Channel.empty() - tax_agglom_min = 1 - tax_agglom_max = 2 - } else if ( params.multiregion ) { - log.info "Use multi-region SIDLE taxonomy classification" - val_used_taxonomy = "SIDLE" - ch_tax = SIDLE_WF.out.tax_qza - } else if ( params.pplace_tree && params.pplace_taxonomy) { - log.info "Use EPA-NG / GAPPA taxonomy classification" - val_used_taxonomy = "phylogenetic placement" - ch_tax = QIIME2_INTAX ( ch_pplace_tax, "parse_dada2_taxonomy.r" ).qza - } else if ( params.dada_ref_taxonomy && !params.skip_dada_taxonomy ) { - log.info "Use DADA2 taxonomy classification" - val_used_taxonomy = "DADA2" - ch_tax = QIIME2_INTAX ( ch_dada2_tax, "parse_dada2_taxonomy.r" ).qza - } else if ( params.sintax_ref_taxonomy ) { - log.info "Use SINTAX taxonomy classification" - val_used_taxonomy = "SINTAX" - ch_tax = QIIME2_INTAX ( ch_sintax_tax, "parse_dada2_taxonomy.r" ).qza - } else if ( params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) { - log.info "Use Kraken2 taxonomy classification" - val_used_taxonomy = "Kraken2" - ch_tax = QIIME2_INTAX ( ch_kraken2_tax, "" ).qza - } else if ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) { - log.info "Use QIIME2 taxonomy classification" - val_used_taxonomy = "QIIME2" - ch_tax = QIIME2_TAXONOMY.out.qza + ch_pp_data = ch_fasta.map { it -> + [ meta: [ id: params.pplace_name ?: 'user_tree' ], + data: [ + alignmethod: params.pplace_alnmethod ?: 'hmmer', + queryseqfile: it, + refseqfile: file( params.pplace_aln, checkIfExists: true ), + hmmfile: [], + refphylogeny: file( params.pplace_tree, checkIfExists: true ), + model: params.pplace_model, + taxonomy: params.pplace_taxonomy ? file( params.pplace_taxonomy, checkIfExists: true ) : [] + ] ] + } + FASTA_NEWICK_EPANG_GAPPA ( ch_pp_data ) + ch_versions = ch_versions.mix( FASTA_NEWICK_EPANG_GAPPA.out.versions ) + ch_pplace_tax = FORMAT_PPLACETAX ( FASTA_NEWICK_EPANG_GAPPA.out.taxonomy_per_query ).tsv + ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( PHYLOSEQ_INTAX_PPLACE ( ch_pplace_tax ).tsv.map { it = [ "pplace", file(it) ] } ) } else { - log.info "Use no taxonomy classification" - val_used_taxonomy = "none" - ch_tax = Channel.empty() - tax_agglom_min = 1 - tax_agglom_max = 2 + ch_pplace_tax = Channel.empty() } - // Filtering ASVs by taxonomy & prevalence & counts - if (params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1) { - QIIME2_TABLEFILTERTAXA ( - QIIME2_INASV.out.qza, - ch_tax, - params.min_frequency, - params.min_samples, - params.exclude_taxa + //QIIME2 + if ( run_qiime2_taxonomy ) { + if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { + QIIME2_PREPTAX ( + ch_qiime_ref_taxonomy.collect(), + val_qiime_ref_taxonomy, + params.FW_primer, + params.RV_primer + ) + ch_qiime_classifier = QIIME2_PREPTAX.out.classifier + } + QIIME2_TAXONOMY ( + ch_fasta, + ch_qiime_classifier ) - ch_versions = ch_versions.mix( QIIME2_TABLEFILTERTAXA.out.versions ) - QIIME2_SEQFILTERTABLE ( QIIME2_TABLEFILTERTAXA.out.qza, QIIME2_INSEQ.out.qza ) - ch_versions = ch_versions.mix( QIIME2_SEQFILTERTABLE.out.versions ) - FILTER_STATS ( ch_dada2_asv, QIIME2_TABLEFILTERTAXA.out.tsv ) - ch_versions = ch_versions.mix( FILTER_STATS.out.versions.ifEmpty(null) ) - MERGE_STATS_FILTERTAXA (ch_stats, FILTER_STATS.out.tsv) - ch_versions = ch_versions.mix( MERGE_STATS_FILTERTAXA.out.versions ) - ch_asv = QIIME2_TABLEFILTERTAXA.out.qza - ch_seq = QIIME2_SEQFILTERTABLE.out.qza - ch_tsv = QIIME2_TABLEFILTERTAXA.out.tsv + ch_versions = ch_versions.mix( QIIME2_TAXONOMY.out.versions ) + ch_qiime2_tax = QIIME2_TAXONOMY.out.tsv + ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( PHYLOSEQ_INTAX_QIIME2 ( ch_qiime2_tax ).tsv.map { it = [ "qiime2", file(it) ] } ) } else { - ch_asv = QIIME2_INASV.out.qza - ch_seq = QIIME2_INSEQ.out.qza - ch_tsv = ch_dada2_asv - } - //Export various ASV tables - if (!params.skip_abundance_tables) { - QIIME2_EXPORT ( ch_asv, ch_seq, ch_tax, ch_qiime2_tax, ch_dada2_tax, ch_pplace_tax, ch_sintax_tax, tax_agglom_min, tax_agglom_max ) - ch_versions = ch_versions.mix( QIIME2_EXPORT.out.versions ) + ch_qiime2_tax = Channel.empty() } - if (!params.skip_barplot) { - QIIME2_BARPLOT ( ch_metadata.ifEmpty([]), ch_asv, ch_tax, '' ) - ch_versions = ch_versions.mix( QIIME2_BARPLOT.out.versions ) - } + // + // SUBWORKFLOW / MODULES : Downstream analysis with QIIME2 + // + if ( run_qiime2 ) { + // Import ASV abundance table and sequences into QIIME2 + QIIME2_INASV ( ch_dada2_asv ) + ch_versions = ch_versions.mix( QIIME2_INASV.out.versions ) + QIIME2_INSEQ ( ch_fasta ) + ch_versions = ch_versions.mix( QIIME2_INSEQ.out.versions ) + + // Import phylogenetic tree into QIIME2 + if ( params.pplace_tree ) { + ch_tree = QIIME2_INTREE ( FASTA_NEWICK_EPANG_GAPPA.out.grafted_phylogeny ).qza + ch_versions = ch_versions.mix( QIIME2_INTREE.out.versions ) + } else if (params.multiregion) { + ch_tree = SIDLE_WF.out.tree_qza + } else { ch_tree = [] } + + // Import taxonomic classification into QIIME2, if available + if ( params.skip_taxonomy ) { + log.info "Skip taxonomy classification" + val_used_taxonomy = "skipped" + ch_tax = Channel.empty() + tax_agglom_min = 1 + tax_agglom_max = 2 + } else if ( params.multiregion ) { + log.info "Use multi-region SIDLE taxonomy classification" + val_used_taxonomy = "SIDLE" + ch_tax = SIDLE_WF.out.tax_qza + } else if ( params.pplace_tree && params.pplace_taxonomy) { + log.info "Use EPA-NG / GAPPA taxonomy classification" + val_used_taxonomy = "phylogenetic placement" + ch_tax = QIIME2_INTAX ( ch_pplace_tax, "parse_dada2_taxonomy.r" ).qza + } else if ( params.dada_ref_taxonomy && !params.skip_dada_taxonomy ) { + log.info "Use DADA2 taxonomy classification" + val_used_taxonomy = "DADA2" + ch_tax = QIIME2_INTAX ( ch_dada2_tax, "parse_dada2_taxonomy.r" ).qza + } else if ( params.sintax_ref_taxonomy ) { + log.info "Use SINTAX taxonomy classification" + val_used_taxonomy = "SINTAX" + ch_tax = QIIME2_INTAX ( ch_sintax_tax, "parse_dada2_taxonomy.r" ).qza + } else if ( params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) { + log.info "Use Kraken2 taxonomy classification" + val_used_taxonomy = "Kraken2" + ch_tax = QIIME2_INTAX ( ch_kraken2_tax, "" ).qza + } else if ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) { + log.info "Use QIIME2 taxonomy classification" + val_used_taxonomy = "QIIME2" + ch_tax = QIIME2_TAXONOMY.out.qza + } else { + log.info "Use no taxonomy classification" + val_used_taxonomy = "none" + ch_tax = Channel.empty() + tax_agglom_min = 1 + tax_agglom_max = 2 + } - if (params.metadata_category_barplot) { - QIIME2_BARPLOTAVG ( ch_metadata, QIIME2_EXPORT.out.rel_tsv, ch_tax, params.metadata_category_barplot ) - ch_versions = ch_versions.mix( QIIME2_BARPLOTAVG.out.versions ) - } + // Filtering ASVs by taxonomy & prevalence & counts + if (params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1) { + QIIME2_TABLEFILTERTAXA ( + QIIME2_INASV.out.qza, + ch_tax, + params.min_frequency, + params.min_samples, + params.exclude_taxa + ) + ch_versions = ch_versions.mix( QIIME2_TABLEFILTERTAXA.out.versions ) + QIIME2_SEQFILTERTABLE ( QIIME2_TABLEFILTERTAXA.out.qza, QIIME2_INSEQ.out.qza ) + ch_versions = ch_versions.mix( QIIME2_SEQFILTERTABLE.out.versions ) + FILTER_STATS ( ch_dada2_asv, QIIME2_TABLEFILTERTAXA.out.tsv ) + ch_versions = ch_versions.mix( FILTER_STATS.out.versions.ifEmpty(null) ) + MERGE_STATS_FILTERTAXA (ch_stats, FILTER_STATS.out.tsv) + ch_versions = ch_versions.mix( MERGE_STATS_FILTERTAXA.out.versions ) + ch_asv = QIIME2_TABLEFILTERTAXA.out.qza + ch_seq = QIIME2_SEQFILTERTABLE.out.qza + ch_tsv = QIIME2_TABLEFILTERTAXA.out.tsv + } else { + ch_asv = QIIME2_INASV.out.qza + ch_seq = QIIME2_INSEQ.out.qza + ch_tsv = ch_dada2_asv + } + //Export various ASV tables + if (!params.skip_abundance_tables) { + QIIME2_EXPORT ( ch_asv, ch_seq, ch_tax, ch_qiime2_tax, ch_dada2_tax, ch_pplace_tax, ch_sintax_tax, tax_agglom_min, tax_agglom_max ) + ch_versions = ch_versions.mix( QIIME2_EXPORT.out.versions ) + } - //Select metadata categories for diversity analysis & ancom - if (params.metadata_category) { - ch_metacolumn_all = Channel.fromList(params.metadata_category.tokenize(',')) - METADATA_PAIRWISE ( ch_metadata ).category.set { ch_metacolumn_pairwise } - ch_versions = ch_versions.mix( METADATA_PAIRWISE.out.versions ) - ch_metacolumn_pairwise = ch_metacolumn_pairwise.splitCsv().flatten() - ch_metacolumn_pairwise = ch_metacolumn_all.join(ch_metacolumn_pairwise) - } else if (params.ancom || params.ancombc || !params.skip_diversity_indices) { - METADATA_ALL ( ch_metadata ).category.set { ch_metacolumn_all } - ch_versions = ch_versions.mix( METADATA_ALL.out.versions ) - //return empty channel if no appropriate column was found - ch_metacolumn_all.branch { passed: it != "" }.set { result } - ch_metacolumn_all = result.passed - ch_metacolumn_all = ch_metacolumn_all.splitCsv().flatten() - METADATA_PAIRWISE ( ch_metadata ).category.set { ch_metacolumn_pairwise } - ch_versions = ch_versions.mix( METADATA_PAIRWISE.out.versions ) - ch_metacolumn_pairwise = ch_metacolumn_pairwise.splitCsv().flatten() - } else { - ch_metacolumn_all = Channel.empty() - ch_metacolumn_pairwise = Channel.empty() - } + if (!params.skip_barplot) { + QIIME2_BARPLOT ( ch_metadata.ifEmpty([]), ch_asv, ch_tax, '' ) + ch_versions = ch_versions.mix( QIIME2_BARPLOT.out.versions ) + } - //Diversity indices - if ( params.metadata && (!params.skip_alpha_rarefaction || !params.skip_diversity_indices) ) { - QIIME2_DIVERSITY ( - ch_metadata, - ch_asv, - ch_seq, - ch_tree, - ch_tsv, - ch_metacolumn_pairwise, - ch_metacolumn_all, - params.skip_alpha_rarefaction, - params.skip_diversity_indices, - params.diversity_rarefaction_depth - ) - ch_versions = ch_versions.mix( QIIME2_DIVERSITY.out.versions ) - } + if (params.metadata_category_barplot) { + QIIME2_BARPLOTAVG ( ch_metadata, QIIME2_EXPORT.out.rel_tsv, ch_tax, params.metadata_category_barplot ) + ch_versions = ch_versions.mix( QIIME2_BARPLOTAVG.out.versions ) + } - //Perform ANCOM and ANCOMBC tests - if ( ( params.ancom || params.ancombc || params.ancombc_formula ) && params.metadata ) { - QIIME2_ANCOM ( - ch_metadata, - ch_asv, - ch_metacolumn_all, - ch_tax, - tax_agglom_min, - tax_agglom_max, - params.ancombc_formula - ) - ch_versions = ch_versions.mix( QIIME2_ANCOM.out.versions ) - } - } else { - ch_tsv = ch_dada2_asv - } + //Select metadata categories for diversity analysis & ancom + if (params.metadata_category) { + ch_metacolumn_all = Channel.fromList(params.metadata_category.tokenize(',')) + METADATA_PAIRWISE ( ch_metadata ).category.set { ch_metacolumn_pairwise } + ch_versions = ch_versions.mix( METADATA_PAIRWISE.out.versions ) + ch_metacolumn_pairwise = ch_metacolumn_pairwise.splitCsv().flatten() + ch_metacolumn_pairwise = ch_metacolumn_all.join(ch_metacolumn_pairwise) + } else if (params.ancom || params.ancombc || !params.skip_diversity_indices) { + METADATA_ALL ( ch_metadata ).category.set { ch_metacolumn_all } + ch_versions = ch_versions.mix( METADATA_ALL.out.versions ) + //return empty channel if no appropriate column was found + ch_metacolumn_all.branch { passed: it != "" }.set { result } + ch_metacolumn_all = result.passed + ch_metacolumn_all = ch_metacolumn_all.splitCsv().flatten() + METADATA_PAIRWISE ( ch_metadata ).category.set { ch_metacolumn_pairwise } + ch_versions = ch_versions.mix( METADATA_PAIRWISE.out.versions ) + ch_metacolumn_pairwise = ch_metacolumn_pairwise.splitCsv().flatten() + } else { + ch_metacolumn_all = Channel.empty() + ch_metacolumn_pairwise = Channel.empty() + } - // - // MODULE: Predict functional potential of a bacterial community from marker genes with Picrust2 - // - if ( params.picrust ) { - if ( run_qiime2 && !params.skip_abundance_tables && ( params.dada_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier || params.sintax_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) && !params.skip_taxonomy ) { - PICRUST ( QIIME2_EXPORT.out.abs_fasta, QIIME2_EXPORT.out.abs_tsv, "QIIME2", "This Picrust2 analysis is based on filtered reads from QIIME2" ) + //Diversity indices + if ( params.metadata && (!params.skip_alpha_rarefaction || !params.skip_diversity_indices) ) { + QIIME2_DIVERSITY ( + ch_metadata, + ch_asv, + ch_seq, + ch_tree, + ch_tsv, + ch_metacolumn_pairwise, + ch_metacolumn_all, + params.skip_alpha_rarefaction, + params.skip_diversity_indices, + params.diversity_rarefaction_depth + ) + ch_versions = ch_versions.mix( QIIME2_DIVERSITY.out.versions ) + } + + //Perform ANCOM and ANCOMBC tests + if ( ( params.ancom || params.ancombc || params.ancombc_formula ) && params.metadata ) { + QIIME2_ANCOM ( + ch_metadata, + ch_asv, + ch_metacolumn_all, + ch_tax, + tax_agglom_min, + tax_agglom_max, + params.ancombc_formula + ) + ch_versions = ch_versions.mix( QIIME2_ANCOM.out.versions ) + } } else { - PICRUST ( ch_fasta, ch_dada2_asv, "DADA2", "This Picrust2 analysis is based on unfiltered reads from DADA2" ) + ch_tsv = ch_dada2_asv } - ch_versions = ch_versions.mix(PICRUST.out.versions.ifEmpty(null)) - } - // - // MODULE: Export data in SBDI's (Swedish biodiversity infrastructure) format - // - if ( params.sbdiexport ) { - if ( params.sintax_ref_taxonomy ) { - SBDIEXPORT ( ch_dada2_asv, ch_sintax_tax, ch_metadata ) - db_version = params.sintax_ref_databases[params.sintax_ref_taxonomy]["dbversion"] - SBDIEXPORTREANNOTATE ( ch_sintax_tax, "sintax", db_version, params.cut_its, ch_barrnapsummary.ifEmpty([]) ) - } else { - SBDIEXPORT ( ch_dada2_asv, ch_dada2_tax, ch_metadata ) - db_version = params.dada_ref_databases[params.dada_ref_taxonomy]["dbversion"] - SBDIEXPORTREANNOTATE ( ch_dada2_tax, "dada2", db_version, params.cut_its, ch_barrnapsummary.ifEmpty([]) ) + // + // MODULE: Predict functional potential of a bacterial community from marker genes with Picrust2 + // + if ( params.picrust ) { + if ( run_qiime2 && !params.skip_abundance_tables && ( params.dada_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier || params.sintax_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) && !params.skip_taxonomy ) { + PICRUST ( QIIME2_EXPORT.out.abs_fasta, QIIME2_EXPORT.out.abs_tsv, "QIIME2", "This Picrust2 analysis is based on filtered reads from QIIME2" ) + } else { + PICRUST ( ch_fasta, ch_dada2_asv, "DADA2", "This Picrust2 analysis is based on unfiltered reads from DADA2" ) + } + ch_versions = ch_versions.mix(PICRUST.out.versions.ifEmpty(null)) } - ch_versions = ch_versions.mix(SBDIEXPORT.out.versions.first()) - } - // - // SUBWORKFLOW: Create phyloseq objects - // - if ( !params.skip_taxonomy ) { - if ( params.pplace_tree ) { - ch_tree_for_phyloseq = FASTA_NEWICK_EPANG_GAPPA.out.grafted_phylogeny - } else { - ch_tree_for_phyloseq = [] + // + // MODULE: Export data in SBDI's (Swedish biodiversity infrastructure) format + // + if ( params.sbdiexport ) { + if ( params.sintax_ref_taxonomy ) { + SBDIEXPORT ( ch_dada2_asv, ch_sintax_tax, ch_metadata ) + db_version = params.sintax_ref_databases[params.sintax_ref_taxonomy]["dbversion"] + SBDIEXPORTREANNOTATE ( ch_sintax_tax, "sintax", db_version, params.cut_its, ch_barrnapsummary.ifEmpty([]) ) + } else { + SBDIEXPORT ( ch_dada2_asv, ch_dada2_tax, ch_metadata ) + db_version = params.dada_ref_databases[params.dada_ref_taxonomy]["dbversion"] + SBDIEXPORTREANNOTATE ( ch_dada2_tax, "dada2", db_version, params.cut_its, ch_barrnapsummary.ifEmpty([]) ) + } + ch_versions = ch_versions.mix(SBDIEXPORT.out.versions.first()) } - PHYLOSEQ_WORKFLOW ( - ch_tax_for_phyloseq, - ch_tsv, - ch_metadata.ifEmpty([]), - ch_tree_for_phyloseq, - run_qiime2 - ) - ch_versions = ch_versions.mix(PHYLOSEQ_WORKFLOW.out.versions.first()) + // + // SUBWORKFLOW: Create phyloseq objects + // + if ( !params.skip_taxonomy ) { + if ( params.pplace_tree ) { + ch_tree_for_phyloseq = FASTA_NEWICK_EPANG_GAPPA.out.grafted_phylogeny + } else { + ch_tree_for_phyloseq = [] + } + + PHYLOSEQ_WORKFLOW ( + ch_tax_for_phyloseq, + ch_tsv, + ch_metadata.ifEmpty([]), + ch_tree_for_phyloseq, + run_qiime2 + ) + ch_versions = ch_versions.mix(PHYLOSEQ_WORKFLOW.out.versions.first()) + } } + // END of Non-Nanopore section // // Collate and save software versions @@ -952,7 +956,8 @@ workflow AMPLISEQ { // // MODULE: Summary Report // - if (!params.skip_report && !params.multiregion) { + // TODO: Temporarily turn off the summary report for Nanopore data + if (!params.skip_report && !params.multiregion && !params.nanopore) { SUMMARY_REPORT ( ch_report_template, ch_report_css, From fcade0ad1b1859fa5aa94eab40789dbe3e6eed76 Mon Sep 17 00:00:00 2001 From: Samuel Lampa Date: Wed, 29 Jan 2025 13:07:12 +0100 Subject: [PATCH 10/12] Remove hard-coded paths --- nextflow.config | 2 +- nextflow_schema.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nextflow.config b/nextflow.config index 09600ba2..0a827bd8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -130,7 +130,7 @@ params { // emu parameters - db = '/home/proj/development/microbial/16s/databases/emu_database' + db = '${projectDir}/assets/emu_database' reads = null seqtype = "map-ont" diff --git a/nextflow_schema.json b/nextflow_schema.json index fa5f9056..218c2621 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -704,7 +704,7 @@ "properties": { "report_template": { "type": "string", - "default": "/home/proj/development/microbial/16s/ampliseq/assets/report_template.Rmd", + "default": "${projectDir}/assets/report_template.Rmd", "description": "Path to Markdown file (Rmd)" }, "report_css": { @@ -1002,7 +1002,7 @@ }, "db": { "type": "string", - "default": "/home/proj/development/microbial/16s/databases/emu_database" + "default": "${projectDir}/assets/emu_database" }, "seqtype": { "type": "string", From bef1dbb86a91ab46994875cca0a2289b4f869b1f Mon Sep 17 00:00:00 2001 From: Samuel Lampa Date: Wed, 29 Jan 2025 13:33:06 +0100 Subject: [PATCH 11/12] Add missing reads param in schema --- nextflow_schema.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index 218c2621..0ed86f81 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1030,6 +1030,9 @@ "output_unclassified": { "type": "boolean", "default": true + }, + "reads": { + "type": "string" } } } \ No newline at end of file From aa66a9456c5cfe1eb414fb4f7b5a6c446ab59de5 Mon Sep 17 00:00:00 2001 From: Samuel Lampa Date: Wed, 29 Jan 2025 13:34:22 +0100 Subject: [PATCH 12/12] Run nf-core pipelines schema build to format schema file --- nextflow_schema.json | 49 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 0ed86f81..007ce671 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -82,7 +82,9 @@ "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" } }, - "required": ["outdir"], + "required": [ + "outdir" + ], "fa_icon": "fas fa-terminal" }, "sequencing_input": { @@ -259,7 +261,11 @@ "default": "independent", "help_text": "If samples are treated independent (lowest sensitivity and lowest resources), pooled (highest sensitivity and resources) or pseudo-pooled (balance between required resources and sensitivity).", "description": "Mode of sample inference: \"independent\", \"pooled\" or \"pseudo\"", - "enum": ["independent", "pooled", "pseudo"] + "enum": [ + "independent", + "pooled", + "pseudo" + ] }, "concatenate_reads": { "type": "boolean", @@ -438,7 +444,10 @@ "type": "string", "description": "Method used for alignment, \"hmmer\" or \"mafft\"", "default": "hmmer", - "enum": ["hmmer", "mafft"] + "enum": [ + "hmmer", + "mafft" + ] }, "pplace_taxonomy": { "type": "string", @@ -454,7 +463,13 @@ "type": "string", "help_text": "Choose any of the supported databases, and optionally also specify the version. Database and version are separated by an equal sign (`=`, e.g. `silva=138`) . This will download the desired database and initiate taxonomic classification with QIIME2 and the chosen database.\n\nIf both, `--dada_ref_taxonomy` and `--qiime_ref_taxonomy` are used, DADA2 classification will be used for downstream analysis.\n\nThe following databases are supported:\n- SILVA ribosomal RNA gene database project - 16S rRNA\n- UNITE - eukaryotic nuclear ribosomal ITS region - ITS\n- Greengenes (only testing!)\n\nGenerally, using `silva`, `unite-fungi`, or `unite-alleuk` will select the most recent supported version. For testing purposes, the tiny database `greengenes85` (dereplicated at 85% sequence similarity) is available. For details on what values are valid, please either use an invalid value such as `x` (causing the pipeline to send an error message with all valid values) or see `conf/ref_databases.config`.", "description": "Name of supported database, and optionally also version number", - "enum": ["silva=138", "silva", "greengenes85", "greengenes2", "greengenes2=2022.10"] + "enum": [ + "silva=138", + "silva", + "greengenes85", + "greengenes2", + "greengenes2=2022.10" + ] }, "qiime_ref_tax_custom": { "type": "string", @@ -529,7 +544,12 @@ "help_text": "If data is long read ITS sequences, that need to be cut to ITS region (full ITS, only ITS1, or only ITS2) for taxonomy assignment.", "description": "Part of ITS region to use for taxonomy assignment: \"full\", \"its1\", or \"its2\"", "default": "none", - "enum": ["none", "full", "its1", "its2"] + "enum": [ + "none", + "full", + "its1", + "its2" + ] }, "its_partial": { "type": "integer", @@ -549,7 +569,13 @@ "type": "string", "help_text": "", "description": "Name of supported database, and optionally also version number", - "enum": ["silva", "silva=128", "greengenes", "greengenes=13_8", "greengenes88"] + "enum": [ + "silva", + "silva=128", + "greengenes", + "greengenes=13_8", + "greengenes88" + ] }, "sidle_ref_tax_custom": { "type": "string", @@ -681,7 +707,7 @@ }, "ancombc_effect_size": { "type": "number", - "default": 1, + "default": 1.0, "minimum": 0, "description": "Effect size threshold for differential abundance barplot for `--ancombc` and `--ancombc_formula`", "fa_icon": "fas fa-greater-than-equal" @@ -822,7 +848,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": {