Skip to content

Commit

Permalink
Merge pull request #158 from CenterForMedicalGeneticsGhent/filter-imp…
Browse files Browse the repository at this point in the history
…rovements

Some additional filter improvements
  • Loading branch information
nvnieuwk authored Dec 19, 2023
2 parents 236955f + 5eadff3 commit 146aef0
Show file tree
Hide file tree
Showing 11 changed files with 37 additions and 47 deletions.
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### New Features

1. Added the `--output_suffix` option to add a custom suffix to the basename of the output files.
1. Added the `--output_suffix` parameter to add a custom suffix to the basename of the output files.
2. Implemented files for the alphamissense plugin of VEP.
3. Added the `--only_pass` parameter to only output variants that have the `PASS` flag in the FILTER column. (This is only applied when `--filter` is also given)
4. Added the `--keep_alt_contigs` parameter. This will tell the pipeline to not filter out the alternate contigs, which will now be done by default.

### Improvements

1. Updated the seqplorer profile so that the output filenames are correct for easy import
2. Changed the separator in `--vcfanno_resources` to `;`
instead of `,` to allow commas in glob patterns.
3. Removed the reheader step from the vardict subworkflow and added a simple sed substitution to the vardictjava module

## v1.4.0 - Kingly Kortrijk - [December 6 2023]

Expand Down
Binary file removed assets/vardict.header.vcf.gz
Binary file not shown.
1 change: 1 addition & 0 deletions conf/hypercap.config
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
params {
callers = "vardict"
scatter_count = 5
only_pass = true

output_suffix = "-vardict-decomposed-annotated"
}
15 changes: 6 additions & 9 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ process {

withName: "^.*:CRAM_PREPARE_SAMTOOLS_BEDTOOLS:FILTER_BEDS\$" {
ext.prefix = { "${meta.id}.filter"}
ext.args = "-d 150"
ext.args = "-vE \"NO_COVERAGE${params.keep_alt_contigs ? "" : "|alt|random|decoy|Un"}\""
ext.args2 = "-d 150"
publishDir = [
overwrite: true,
enabled: true,
Expand Down Expand Up @@ -267,12 +268,12 @@ process {
if(params.filter){
withName: "^.*:CRAM_CALL_GENOTYPE_GATK4:VCF_FILTER_BCFTOOLS:FILTER_1\$" {
ext.prefix = { "${meta.id}_filtered_snps" }
ext.args = {'--output-type z --soft-filter \'GATKCutoffSNP\' -e \'TYPE="snp" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || (QD < 10.0 && AD[0:1] / (AD[0:1] + AD[0:0]) < 0.25 && ReadPosRankSum < 0.0) || MQ < 30.0)\' -m \'+\''}
ext.args = {"--output-type z --soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'"}
}

withName: "^.*:CRAM_CALL_GENOTYPE_GATK4:VCF_FILTER_BCFTOOLS:FILTER_2\$" {
ext.prefix = final_prefix
ext.args = {'--output-type z --soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 || (QD < 10.0 && AD[0:1] / (AD[0:1] + AD[0:0]) < 0.25 && ReadPosRankSum < 0.0))\' -m \'+\''}
ext.args = {'--output-type z --soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 )\' -m \'+\''}
publishDir = [
enabled: enableOutput("filter"),
overwrite: true,
Expand Down Expand Up @@ -312,23 +313,19 @@ process {

withName: "^.*:CRAM_CALL_VARDICTJAVA:VCF_CONCAT_BCFTOOLS:BCFTOOLS_CONCAT\$" {
ext.args = '--allow-overlaps --output-type z'
}

withName: "^.*:CRAM_CALL_VARDICTJAVA:BCFTOOLS_REHEADER\$" {
ext.prefix = { "${meta.id}.${meta.caller}" }
publishDir = [
overwrite: true,
enabled: enableOutput("original"),
mode: params.publish_dir_mode,
path: final_output,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
] // SAVE
ext.prefix = { "${meta.id}.${meta.caller}" }
ext.args2 = '--output-type z'
}

if(params.filter) {
withName: "^.*:CRAM_CALL_VARDICTJAVA:VCF_FILTER_BCFTOOLS:FILTER_1\$" {
ext.args = '-i \'QUAL >= 0\' --output-type z'
ext.args = "-i 'QUAL >= 0${params.only_pass ? " && FILTER=\"PASS\"" : ""}' --output-type z"
}

withName: "^.*:CRAM_CALL_VARDICTJAVA:VCF_FILTER_BCFTOOLS:FILTER_2\$" {
Expand Down
3 changes: 2 additions & 1 deletion modules/local/filter_beds/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ process FILTER_BEDS {
script:
// Remove regions with no coverage from the callable regions BED file
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

def unzip = bed.extension == "gz" ? "zcat" : "cat"
"""
${unzip} ${bed} | grep -v NO_COVERAGE | bedtools merge ${args} > ${prefix}.bed
${unzip} ${bed} | grep ${args} | bedtools merge ${args2} > ${prefix}.bed
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
1 change: 1 addition & 0 deletions modules/nf-core/vardictjava/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 7 additions & 16 deletions modules/nf-core/vardictjava/vardictjava.diff

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ params {
vardict_min_af = 0.1 // Minimum allele frequency for VarDict
normalize = false
output_suffix = ""
only_pass = false
keep_alt_contigs = false

// Module specific parameters
dragstr = false
Expand Down
13 changes: 10 additions & 3 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -238,14 +238,21 @@
},
"normalize": {
"type": "boolean",
"description": "Normalize the VCF after joint genotyping (will run on the decomposed VCF when --decompose is also used)"
"description": "Normalize the VCFs"
},
"output_suffix": {
"type": "string",
"description": "A custom suffix to add to the basename of the output files"
},
"only_pass": {
"type": "boolean",
"description": "Filter out all variants that don't have the PASS filter for vardict. This only works when --filter is also given"
},
"keep_alt_contigs": {
"type": "boolean",
"description": "Keep all aditional contigs for calling instead of filtering them out before"
}
},
"required": ["scatter_count"]
}
},
"institutional_config_options": {
"title": "Institutional config options",
Expand Down
17 changes: 2 additions & 15 deletions subworkflows/local/cram_call_vardictjava/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -73,28 +73,15 @@ workflow CRAM_CALL_VARDICTJAVA {
)
ch_versions = ch_versions.mix(VCF_CONCAT_BCFTOOLS.out.versions)

VCF_CONCAT_BCFTOOLS.out.vcfs
.combine(["${projectDir}/assets/vardict.header.vcf.gz"])
.map { meta, vcf, header ->
[ meta, vcf, header, [] ]
}
.set { ch_reheader_input}

BCFTOOLS_REHEADER(
ch_reheader_input,
ch_fai
)
ch_versions = ch_versions.mix(BCFTOOLS_REHEADER.out.versions.first())

if(params.filter) {
VCF_FILTER_BCFTOOLS(
BCFTOOLS_REHEADER.out.vcf,
VCF_CONCAT_BCFTOOLS.out.vcfs,
false
)
ch_versions = ch_versions.mix(VCF_FILTER_BCFTOOLS.out.versions)
ch_filter_output = VCF_FILTER_BCFTOOLS.out.vcfs
} else {
ch_filter_output = BCFTOOLS_REHEADER.out.vcf
ch_filter_output = VCF_CONCAT_BCFTOOLS.out.vcfs
}

TABIX_TABIX(
Expand Down
4 changes: 2 additions & 2 deletions tests/fails.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ nextflow_pipeline {

when {
params {
scatter_count = null
fasta = null
outdir = "${outputDir}"
}
}

then {
assert workflow.failed
assert workflow.stderr.join("\n").contains("* Missing required parameter: --scatter_count")
assert workflow.stderr.join("\n").contains("* Missing required parameter: --fasta")
}

}
Expand Down

0 comments on commit 146aef0

Please sign in to comment.