Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

741 add nanopore support #830

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ process {
}
withLabel:process_medium {
cpus = { 6 * task.attempt }
memory = { 36.GB * task.attempt }
memory = { 30.GB * task.attempt }
time = { 12.h * task.attempt }
}
withLabel:process_high {
cpus = { 20 * task.attempt }
memory = { 120.GB* task.attempt }
cpus = { 10 * task.attempt }
memory = { 30.GB * task.attempt }
time = { 36.h * task.attempt }
}
withLabel:process_long {
Expand Down
38 changes: 38 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,44 @@ process {
]
}

withName: EMU_ABUNDANCE {
publishDir = [
[
path: { "${params.outdir}/results" },
mode: params.publish_dir_mode,
pattern: '*{.tsv,txt}'
],
[
path: { "${params.outdir}/results" },
mode: params.publish_dir_mode,
pattern: '*{.sam}',
enabled: params.keep_files,
],
[
path: { "${params.outdir}/results" },
mode: params.publish_dir_mode,
pattern: '*{.fa}',
enabled: params.output_unclassified
],

]

ext.args = [
"--type ${params.seqtype}",
"--db ${params.db}",
"--output-dir ./",
"--min-abundance ${params.min_abundance}",
"--N ${params.minimap_max_alignments}",
"--K ${params.minibatch_size}",
"--keep-counts",
params.keep_read_assignments ? "--keep-read-assignments" : "",
params.keep_files ? "--keep-files" : "",
params.output_unclassified ? "--output-unclassified" : "",
].join(' ') // Join converts the list here to a string.
ext.prefix = { "${meta.id}" } // A closure can be used to access variables defined in the script

}

withName: SIDLE_DBFILT {
ext.args = { params.sidle_ref_taxonomy.startsWith("greengenes") ? '--p-num-degenerates 3' : '--p-num-degenerates 5' } // 3 for greengenes, 5 for SILVA 128
ext.args2 = { params.sidle_ref_taxonomy.startsWith("greengenes") ? '--p-exclude "p__;,k__;,mitochondria,chloroplast" --p-mode contains' : '--p-exclude "mitochondria,chloroplast" --p-mode contains' } // "p__;,k__;" for greengenes
Expand Down
40 changes: 40 additions & 0 deletions modules/local/emu_abundance.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
process EMU_ABUNDANCE {
debug true
tag "$meta.id"
label 'process_high'

conda "bioconda::emu=3.4.4"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/emu:3.4.4--hdfd78af_1':
'quay.io/biocontainers/emu:3.4.4--hdfd78af_1' }"

input:
tuple val(meta), path(reads)

output:
tuple val(meta), path("*abundance.tsv"), emit: report
tuple val(meta), path("*read-assignment-distributions.tsv"), emit: assignment_report, optional:true
path "versions.yml" , emit: versions
tuple val(meta), path("*.sam"), emit: samfile, optional:true
tuple val(meta), path("*.fa"), emit: unclassified_fa , optional:true


when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
emu \\
abundance \\
$args \\
--threads $task.cpus \\
$reads

cat <<-END_VERSIONS > versions.yml
"${task.process}":
emu: \$(echo \$(emu --version 2>&1) | sed 's/^.*emu //; s/Using.*\$//' )
END_VERSIONS
"""
}
14 changes: 14 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ params {
extension = "/*_R{1,2}_001.fastq.gz"
pacbio = false
iontorrent = false
nanopore = false
FW_primer = null
RV_primer = null
classifier = null
Expand Down Expand Up @@ -127,6 +128,19 @@ params {
sidle_ref_tax_custom = null
sidle_ref_tree_custom = null


// emu parameters
db = '${projectDir}/assets/emu_database'

reads = null
seqtype = "map-ont"
min_abundance = 0.0001
minimap_max_alignments = 50
minibatch_size = 500000000
keep_read_assignments = true
keep_files = false
output_unclassified = true

// MultiQC options
multiqc_config = null
multiqc_title = null
Expand Down
92 changes: 82 additions & 10 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,9 @@
"pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
}
},
"required": ["outdir"],
"required": [
"outdir"
],
"fa_icon": "fas fa-terminal"
},
"sequencing_input": {
Expand Down Expand Up @@ -259,7 +261,11 @@
"default": "independent",
"help_text": "If samples are treated independent (lowest sensitivity and lowest resources), pooled (highest sensitivity and resources) or pseudo-pooled (balance between required resources and sensitivity).",
"description": "Mode of sample inference: \"independent\", \"pooled\" or \"pseudo\"",
"enum": ["independent", "pooled", "pseudo"]
"enum": [
"independent",
"pooled",
"pseudo"
]
},
"concatenate_reads": {
"type": "boolean",
Expand Down Expand Up @@ -438,7 +444,10 @@
"type": "string",
"description": "Method used for alignment, \"hmmer\" or \"mafft\"",
"default": "hmmer",
"enum": ["hmmer", "mafft"]
"enum": [
"hmmer",
"mafft"
]
},
"pplace_taxonomy": {
"type": "string",
Expand All @@ -454,7 +463,13 @@
"type": "string",
"help_text": "Choose any of the supported databases, and optionally also specify the version. Database and version are separated by an equal sign (`=`, e.g. `silva=138`) . This will download the desired database and initiate taxonomic classification with QIIME2 and the chosen database.\n\nIf both, `--dada_ref_taxonomy` and `--qiime_ref_taxonomy` are used, DADA2 classification will be used for downstream analysis.\n\nThe following databases are supported:\n- SILVA ribosomal RNA gene database project - 16S rRNA\n- UNITE - eukaryotic nuclear ribosomal ITS region - ITS\n- Greengenes (only testing!)\n\nGenerally, using `silva`, `unite-fungi`, or `unite-alleuk` will select the most recent supported version. For testing purposes, the tiny database `greengenes85` (dereplicated at 85% sequence similarity) is available. For details on what values are valid, please either use an invalid value such as `x` (causing the pipeline to send an error message with all valid values) or see `conf/ref_databases.config`.",
"description": "Name of supported database, and optionally also version number",
"enum": ["silva=138", "silva", "greengenes85", "greengenes2", "greengenes2=2022.10"]
"enum": [
"silva=138",
"silva",
"greengenes85",
"greengenes2",
"greengenes2=2022.10"
]
},
"qiime_ref_tax_custom": {
"type": "string",
Expand Down Expand Up @@ -529,7 +544,12 @@
"help_text": "If data is long read ITS sequences, that need to be cut to ITS region (full ITS, only ITS1, or only ITS2) for taxonomy assignment.",
"description": "Part of ITS region to use for taxonomy assignment: \"full\", \"its1\", or \"its2\"",
"default": "none",
"enum": ["none", "full", "its1", "its2"]
"enum": [
"none",
"full",
"its1",
"its2"
]
},
"its_partial": {
"type": "integer",
Expand All @@ -549,7 +569,13 @@
"type": "string",
"help_text": "",
"description": "Name of supported database, and optionally also version number",
"enum": ["silva", "silva=128", "greengenes", "greengenes=13_8", "greengenes88"]
"enum": [
"silva",
"silva=128",
"greengenes",
"greengenes=13_8",
"greengenes88"
]
},
"sidle_ref_tax_custom": {
"type": "string",
Expand Down Expand Up @@ -681,7 +707,7 @@
},
"ancombc_effect_size": {
"type": "number",
"default": 1,
"default": 1.0,
"minimum": 0,
"description": "Effect size threshold for differential abundance barplot for `--ancombc` and `--ancombc_formula`",
"fa_icon": "fas fa-greater-than-equal"
Expand Down Expand Up @@ -822,7 +848,14 @@
"description": "Method used to save pipeline results to output directory.",
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
"fa_icon": "fas fa-copy",
"enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
"enum": [
"symlink",
"rellink",
"link",
"copy",
"copyNoFollow",
"move"
],
"hidden": true
},
"email_on_fail": {
Expand Down Expand Up @@ -995,5 +1028,44 @@
{
"$ref": "#/$defs/institutional_config_options"
}
]
}
],
"properties": {
"nanopore": {
"type": "boolean"
},
"db": {
"type": "string",
"default": "${projectDir}/assets/emu_database"
},
"seqtype": {
"type": "string",
"default": "map-ont"
},
"min_abundance": {
"type": "number",
"default": 0.0001
},
"minimap_max_alignments": {
"type": "integer",
"default": 50
},
"minibatch_size": {
"type": "integer",
"default": 500000000
},
"keep_read_assignments": {
"type": "boolean",
"default": true
},
"keep_files": {
"type": "boolean"
},
"output_unclassified": {
"type": "boolean",
"default": true
},
"reads": {
"type": "string"
}
}
}
6 changes: 3 additions & 3 deletions subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def validateInputParameters() {
error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for primer trimming. If primer trimming is not needed, use `--skip_cutadapt`.")
}

if ( params.pacbio || params.iontorrent || params.single_end ) {
if ( params.pacbio || params.iontorrent || params.nanopore || params.single_end ) {
if (params.trunclenr) { log.warn "Unused parameter: `--trunclenr` is ignored because the data is single end." }
} else if (params.trunclenf && !params.trunclenr) {
error("Invalid command: `--trunclenf` is set, but `--trunclenr` is not. Either both parameters `--trunclenf` and `--trunclenr` must be set or none.")
Expand Down Expand Up @@ -234,8 +234,8 @@ def validateInputParameters() {
"rdp","rdp=18",
"sbdi-gtdb","sbdi-gtdb=R09-RS220-1","sbdi-gtdb=R08-RS214-1","sbdi-gtdb=R07-RS207-1",
"silva","silva=138","silva=132",
"unite-fungi","unite-fungi=10.0","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2",
"unite-alleuk","unite-alleuk=10.0","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"
"unite-fungi","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2",
"unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"
]
if (params.sbdiexport){
if (params.sintax_ref_taxonomy ) {
Expand Down
Loading