From e06200a52c68076aee3d8c8fbd584bbbebac6a73 Mon Sep 17 00:00:00 2001 From: pruzanov Date: Tue, 21 Jan 2025 13:13:51 -0500 Subject: [PATCH 1/6] Added vidarr_labels, minor changes --- README.md | 8 ++++---- dragenAlign.wdl | 22 ++++++++++++++++++++-- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 6587745..493b96f 100644 --- a/README.md +++ b/README.md @@ -50,10 +50,10 @@ Parameter|Value|Default|Description Output | Type | Description | Labels ---|---|---|--- -`bam`|File|Output bam aligned to genome| -`bamIndex`|File|Index for the aligned bam| -`zippedOut`|File|Zip file containing the supporting .csv and .tab outputs from Dragen| -`outputChimeric`|File?|Output chimeric junctions file, if available| +`bam`|File|BAM file with alignments|vidarr_label: bam +`bamIndex`|File|index of BAM file with alignments|vidarr_label: bamIndex +`zippedOut`|File|Zipped .csv and .tab files (additional outputs)|vidarr_label: zippedOut +`outputChimeric`|File?|Optional output file with chimeric junctions|vidarr_label: outputChimeric ## Commands diff --git a/dragenAlign.wdl b/dragenAlign.wdl index ebdbcb6..dadc2e2 100755 --- a/dragenAlign.wdl +++ b/dragenAlign.wdl @@ -31,7 +31,7 @@ workflow dragenAlign { } Map[String,String] dragenRef_by_genome = { - "hg38": "/staging/data/references/hg38-p12.v9" + "hg38": "/.mounts/labs/gsiprojects/gsi/Dragen/reference/hg38fa.p12/" # /staging/data/references/hg38-p12.v9 } String dragenRef = dragenRef_by_genome[reference] @@ -79,6 +79,24 @@ workflow dragenAlign { url: "https://developer.illumina.com/dragen" } ] + output_meta: { + bam: { + description: "BAM file with alignments", + vidarr_label: "bam" + }, + bamIndex: { + description: "index of BAM file with alignments", + vidarr_label: "bamIndex" + }, + zippedOut: { + description: "Zipped .csv and .tab files (additional outputs)", + vidarr_label: "zippedOut" + } + outputChimeric: { + description: "Optional output file with chimeric junctions", + vidarr_label: "outputChimeric" + } + } } output { @@ -293,4 +311,4 @@ task runDragen { outputChimeric: "Output chimeric junctions file, if available" } } -} \ No newline at end of file +} From bc90d52ce4993963f3dc3e9453e38dbbff66c590 Mon Sep 17 00:00:00 2001 From: pruzanov Date: Tue, 21 Jan 2025 14:25:09 -0500 Subject: [PATCH 2/6] Changes to references, adjusted paths --- dragenAlign.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dragenAlign.wdl b/dragenAlign.wdl index dadc2e2..adbaf5f 100755 --- a/dragenAlign.wdl +++ b/dragenAlign.wdl @@ -91,7 +91,7 @@ workflow dragenAlign { zippedOut: { description: "Zipped .csv and .tab files (additional outputs)", vidarr_label: "zippedOut" - } + }, outputChimeric: { description: "Optional output file with chimeric junctions", vidarr_label: "outputChimeric" @@ -246,8 +246,8 @@ task runDragen { String prefix Boolean isRNA Boolean adapterTrim - String adapter1File = "/staging/data/resources/ADAPTER1" - String adapter2File = "/staging/data/resources/ADAPTER2" + String adapter1File = "/.mounts/labs/gsiprojects/gsi/Dragen/resources/ADAPTER1" + String adapter2File = "/.mounts/labs/gsiprojects/gsi/Dragen/resources/ADAPTER2" Int jobMemory = 500 Int timeout = 96 } From d81b21a7a6f1ab9a371ff839c7142f0f43090a1c Mon Sep 17 00:00:00 2001 From: pruzanov Date: Mon, 10 Feb 2025 16:58:54 -0500 Subject: [PATCH 3/6] Switching to modularized scripts --- CHANGELOG.md | 40 ++++-- README.md | 73 ++-------- commands.txt | 62 +------- dragenAlign.wdl | 259 ++++++++++++++++------------------ vidarrtest-regression.json.in | 149 +++++++++++-------- 5 files changed, 262 insertions(+), 321 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71fe162..a2eda89 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,16 +1,40 @@ -## 1.3.0 - 2024-06-25 +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] - 2025-02-12 +### Changed +- Regression testing adjusted, validates in Jenkins +- default parameters adjusted according to [GBS-5046](https://jira.oicr.on.ca/browse/GBS-5046) +- changed code that generates input list + +## [1.3.0] - 2024-06-25 +### Added [GRD-797](https://jira.oicr.on.ca/browse/GRD-797) - add vidarr labels to outputs (changes to medata only) -## 1.2.2 - 2024-04-09 + +## [1.2.2] - 2024-04-09 +### Changed - Updated to a reference built using hg38/p12 -## 1.2.1 - 2024-04-04 + +## [1.2.1] - 2024-04-04 +### Changed - Changed names in vidarrbuild.json -## 1.2.0 - 2024-03-26 -- Workflow requires an array of fastq files with read-groups as input. A single fastq-pair (or a single fastq file) must also be inputted as an array. -- runDragen task outputs a merged bam file + +## [1.2.0] - 2024-03-26 +### Added - Added new task makeCSV and headerFormat, and removed readGroupFormat task +### Changed +- Workflow requires an array of fastq files with read-groups as input. A single fastq-pair (or a single fastq file) must also be inputted as an array. +- runDragen task outputs a merged bam file - Replaced mode parameter with isRNA parameter (false by default) -## 1.1.0 - 2024-03-05 + +## [1.1.0] - 2024-03-05 +### Changed - Changes the zippedOut output into a zipped directory. This ensures that extraction creates a new directory instead of tarbombing the working directory. -## 1.0.0 - 2024-03-04 + +## [1.0.0] - 2024-03-04 +### Added - Completes lane level alignments using Dragen - Supports whole transcriptome alignment diff --git a/README.md b/README.md index 493b96f..47c2d93 100644 --- a/README.md +++ b/README.md @@ -36,10 +36,14 @@ Parameter|Value|Default|Description #### Optional task parameters: Parameter|Value|Default|Description ---|---|---|--- -`headerFormat.jobMemory`|Int|1|Memory allocated for this job -`headerFormat.timeout`|Int|5|Hours before task timeout -`makeCSV.jobMemory`|Int|1|Memory allocated for this job -`makeCSV.timeout`|Int|5|Hours before task timeout +`extractInfoLine.parsingScript`|String|"$DRAGEN_SCRIPTS_ROOT/bin/composeList.py"|Script for parsing inputs into a line +`extractInfoLine.timeout`|Int|4|Timeout for the job +`extractInfoLine.jobMemory`|Int|4|Job allocated RAM +`extractInfoLine.modules`|String|"dragen-scripts/0.1"|dependency modules +`composeList.listWritingScript`|String|"$DRAGEN_SCRIPTS_ROOT/bin/writeFile.py"|Script for writing out list of inputs +`composeList.jobMemory`|Int|4|Job allocated RAM +`composeList.timeout`|Int|4|Timeout for the job +`composeList.modules`|String|"dragen-scripts/0.1"|dependency modules `runDragen.adapter1File`|String|"/staging/data/resources/ADAPTER1"|Adapters to be trimmed from read 1 `runDragen.adapter2File`|String|"/staging/data/resources/ADAPTER2"|Adapters to be trimmed from read 2 `runDragen.jobMemory`|Int|500|Memory allocated for this job @@ -63,64 +67,15 @@ This section lists command(s) run by dragenAlign workflow ### Ensures the read-group information is valid, and outputs a header for the input CSV. -``` - set -euo pipefail - - headerString="Read1File,Read2File" - - # Split the string into an array of key-value pairs - IFS=, read -ra rgArray <<< ~{readGroupString} - - # Adds valid keys (for Dragen) to headerString - for field in "${rgArray[@]}"; do - tag=${field:0:5} - if [ "$tag" == "RGID=" ] || [ "$tag" == "RGLB=" ] || [ "$tag" == "RGPL=" ] || \ - [ "$tag" == "RGPU=" ] || [ "$tag" == "RGSM=" ] || [ "$tag" == "RGCN=" ] || \ - [ "$tag" == "RGDS=" ] || [ "$tag" == "RGDT=" ] || [ "$tag" == "RGPI=" ] - then - headerString+=",${field:0:4}" - else - # Redirect error message to stderr - echo "Invalid tag: '$tag'" >&2 - exit 1 - fi - done - - # Ensures the required header information is present - if [ "$(echo "$headerString" | grep -c "RGID")" != 1 ] || \ - [ "$(echo "$headerString" | grep -c "RGSM")" != 1 ] || \ - [ "$(echo "$headerString" | grep -c "RGLB")" != 1 ] || \ - [ "$(echo "$headerString" | grep -c "RGPU")" != 1 ]; then - echo "Missing required read-group information from header" >&2 - exit 1 - fi - - echo "$headerString" ``` - -### Format input CSV file for Dragen. - -``` - set -euo pipefail - - echo ~{csvHeader} > ~{csvResult} - - # Load arrays into bash variables - arrRead1s=(~{sep=" " read1s}) - if ~{isPaired}; then arrRead2s=(~{sep=" " read2s}); fi - arrReadGroups=(~{sep=" " readGroups}) - - # Iterate over the arrays concurrently - for (( i = 0; i < ~{arrayLength}; i++ )) - do - read1="${arrRead1s[i]}" - if ~{isPaired}; then read2="${arrRead2s[i]}"; else read2=""; fi - readGroup=$(echo "${arrReadGroups[i]}" | sed 's/RG..=//g') - echo "$read1,$read2,$readGroup" >> ~{csvResult} - done + python3 ~{parsingScript} -i ~{write_json(fastqInput)} ``` -### Align to reference using Dragen. +### Compose a list of inputs for dragen:wq + +``` + python3 ~{listWritingScript} -o ~{outputFileName} -l "~{sep=';' inputLines}" +``` ``` set -euo pipefail diff --git a/commands.txt b/commands.txt index c402710..b53586e 100644 --- a/commands.txt +++ b/commands.txt @@ -3,66 +3,14 @@ This section lists command(s) run by dragenAlign workflow * Running dragenAlign -=== Ensures the read-group information is valid, and outputs a header for the input CSV ===. -``` - set -euo pipefail - - headerString="Read1File,Read2File" - - # Split the string into an array of key-value pairs - IFS=, read -ra rgArray <<< ~{readGroupString} - - # Adds valid keys (for Dragen) to headerString - for field in "${rgArray[@]}"; do - tag=${field:0:5} - if [ "$tag" == "RGID=" ] || [ "$tag" == "RGLB=" ] || [ "$tag" == "RGPL=" ] || \ - [ "$tag" == "RGPU=" ] || [ "$tag" == "RGSM=" ] || [ "$tag" == "RGCN=" ] || \ - [ "$tag" == "RGDS=" ] || [ "$tag" == "RGDT=" ] || [ "$tag" == "RGPI=" ] - then - headerString+=",${field:0:4}" - else - # Redirect error message to stderr - echo "Invalid tag: '$tag'" >&2 - exit 1 - fi - done - - # Ensures the required header information is present - if [ "$(echo "$headerString" | grep -c "RGID")" != 1 ] || \ - [ "$(echo "$headerString" | grep -c "RGSM")" != 1 ] || \ - [ "$(echo "$headerString" | grep -c "RGLB")" != 1 ] || \ - [ "$(echo "$headerString" | grep -c "RGPU")" != 1 ]; then - echo "Missing required read-group information from header" >&2 - exit 1 - fi - - echo "$headerString" ``` - -=== Format input CSV file for Dragen ===. - -``` - set -euo pipefail - - echo ~{csvHeader} > ~{csvResult} - - # Load arrays into bash variables - arrRead1s=(~{sep=" " read1s}) - if ~{isPaired}; then arrRead2s=(~{sep=" " read2s}); fi - arrReadGroups=(~{sep=" " readGroups}) - - # Iterate over the arrays concurrently - for (( i = 0; i < ~{arrayLength}; i++ )) - do - read1="${arrRead1s[i]}" - if ~{isPaired}; then read2="${arrRead2s[i]}"; else read2=""; fi - readGroup=$(echo "${arrReadGroups[i]}" | sed 's/RG..=//g') - echo "$read1,$read2,$readGroup" >> ~{csvResult} - done + python3 ~{parsingScript} -i ~{write_json(fastqInput)} ``` -=== Align to reference using Dragen ===. +``` + python3 ~{listWritingScript} -o ~{outputFileName} -l "~{sep=';' inputLines}" +``` ``` set -euo pipefail @@ -87,4 +35,4 @@ This section lists command(s) run by dragenAlign workflow mkdir ~{zipFileName} cp -t ~{zipFileName} $(ls | grep '~{prefix}.*.csv\|~{prefix}.*.tab' | tr '\n' ' ') zip -r ~{zipFileName}.zip ~{zipFileName} -``` \ No newline at end of file +``` diff --git a/dragenAlign.wdl b/dragenAlign.wdl index adbaf5f..4501627 100755 --- a/dragenAlign.wdl +++ b/dragenAlign.wdl @@ -6,6 +6,11 @@ struct InputGroup { String readGroup } +struct GenomeResources { + String referenceDirectory + String dragenVersion +} + workflow dragenAlign { input { @@ -30,11 +35,15 @@ workflow dragenAlign { } } - Map[String,String] dragenRef_by_genome = { - "hg38": "/.mounts/labs/gsiprojects/gsi/Dragen/reference/hg38fa.p12/" # /staging/data/references/hg38-p12.v9 + Map[String,GenomeResources] dragenRef_by_genome = { + "hg38": { + "referenceDirectory": "/.mounts/labs/gsiprojects/gsi/Dragen/reference/hg38fa.p12/", # /staging/data/references/hg38-p12.v9 + "dragenVersion": "4.2.4" + } } - String dragenRef = dragenRef_by_genome[reference] + String dragenRef = dragenRef_by_genome[reference].referenceDirectory + String dragen_version = dragenRef_by_genome[ reference ].dragenVersion parameter_meta { inputGroups: "Array of fastq files to align using Dragen. Read-group information is required for fastq files, with the following fields being non-optional: RGID, RGSM, RGLB, RGPU. Each FASTQ file can only be referenced once." @@ -44,26 +53,24 @@ workflow dragenAlign { isRNA: "Specifies whether to complete transcriptomic analysis, [false, genomic]" } - call headerFormat { - input: - readGroupString = readGroups[0], - prefix = outputFileNamePrefix + scatter(t in inputGroups) { + call extractInfoLine { + input: + fastqInput = object{fastqR1: t.fastqR1, fastqR2: t.fastqR2, readGroup: t.readGroup} + } } - call makeCSV { - input: - read1s = read1s, - read2s = read2s, - readGroups = readGroups, - isPaired = isPaired, - csvHeader = headerFormat.csvHeader, - prefix = outputFileNamePrefix + call composeList { + input: + inputLines = extractInfoLine.outputLine, + outputFileName = "dragen_inputs.csv" } - call runDragen { + call runDragen { input: - csv = makeCSV.outCSV, + csv = composeList.inputList, dragenRef = dragenRef, + dragenVersion = dragen_version, adapterTrim = adapterTrim, prefix = outputFileNamePrefix, isRNA = isRNA @@ -108,146 +115,116 @@ workflow dragenAlign { } -task headerFormat { - input { - String readGroupString - String prefix - Int jobMemory = 1 - Int timeout = 5 - } +# ===================================================================== +# A scripted extraction of info from RG line to dragen-compliant string +# ===================================================================== +task extractInfoLine { + input { + InputGroup fastqInput + String parsingScript = "$DRAGEN_SCRIPTS_ROOT/bin/composeList.py" + Int timeout = 4 + Int jobMemory = 4 + String modules = "dragen-scripts/0.1" + } - parameter_meta { - readGroupString: "Read-group information of one of the fastq files" - prefix: "Prefix for output files" - jobMemory: "Memory allocated for this job" - timeout: "Hours before task timeout" - } - - command <<< - set -euo pipefail + parameter_meta { + fastqInput: "InputGroup struct entry with fastq files" + parsingScript: "Script for parsing inputs into a line" + timeout: "Timeout for the job" + jobMemory: "Job allocated RAM" + modules: "dependency modules" + } - headerString="Read1File,Read2File" - - # Split the string into an array of key-value pairs - IFS=, read -ra rgArray <<< ~{readGroupString} - - # Adds valid keys (for Dragen) to headerString - for field in "${rgArray[@]}"; do - tag=${field:0:5} - if [ "$tag" == "RGID=" ] || [ "$tag" == "RGLB=" ] || [ "$tag" == "RGPL=" ] || \ - [ "$tag" == "RGPU=" ] || [ "$tag" == "RGSM=" ] || [ "$tag" == "RGCN=" ] || \ - [ "$tag" == "RGDS=" ] || [ "$tag" == "RGDT=" ] || [ "$tag" == "RGPI=" ] - then - headerString+=",${field:0:4}" - else - # Redirect error message to stderr - echo "Invalid tag: '$tag'" >&2 - exit 1 - fi - done - - # Ensures the required header information is present - if [ "$(echo "$headerString" | grep -c "RGID")" != 1 ] || \ - [ "$(echo "$headerString" | grep -c "RGSM")" != 1 ] || \ - [ "$(echo "$headerString" | grep -c "RGLB")" != 1 ] || \ - [ "$(echo "$headerString" | grep -c "RGPU")" != 1 ]; then - echo "Missing required read-group information from header" >&2 - exit 1 - fi - - echo "$headerString" - >>> - - runtime { - memory: "~{jobMemory} GB" - timeout: "~{timeout}" - } - - output { - String csvHeader = read_string(stdout()) - } + command <<< + python3 ~{parsingScript} -i ~{write_json(fastqInput)} + >>> - meta { - output_meta: { - csvHeader: { - description: "Formatted header for the csv input of Dragen", - vidarr_label: "csvHeader" - } + runtime { + timeout: "~{timeout}" + modules: "~{modules}" + memory: "~{jobMemory} GB" + } + + output { + String outputLine = read_string(stdout()) + } + + meta { + output_meta: { + outputLine: "Output line to use in a list of fastq files in dragen-compliant format" + } + } } - } -} - -task makeCSV { - input { - Array[File] read1s - Array[File]? read2s - Array[String] readGroups - Boolean isPaired - String csvHeader - String prefix - Int jobMemory = 1 - Int timeout = 5 - } - parameter_meta { - read1s: "Array of read 1 fastq files" - read2s: "Array of read 2 fastq files. May be empty." - readGroups: "Array of read-group information to be added into the bam file header" - isPaired: "Identifies if paired-end sequencing, [true, paired]" - csvHeader: "Formatted header for the csv input of Dragen" - prefix: "Prefix for output files" - jobMemory: "Memory allocated for this job" - timeout: "Hours before task timeout" - } - - String csvResult = "~{prefix}_dragenInput.csv" - Int arrayLength = length(read1s) +# ===================================================================== +# Compose a dragen-compliant list of inputs to use with snv caller +# ===================================================================== +task composeList { + input { + Array[String] inputLines + String listWritingScript = "$DRAGEN_SCRIPTS_ROOT/bin/writeFile.py" + String outputFileName + Int jobMemory = 4 + Int timeout = 4 + String modules = "dragen-scripts/0.1" + } - command <<< - set -euo pipefail - - echo ~{csvHeader} > ~{csvResult} + parameter_meta { + inputLines: "Array of input lines to print" + listWritingScript: "Script for writing out list of inputs" + outputFileName: "Name of an output file, list of inputs" + jobMemory: "Job allocated RAM" + timeout: "Timeout for the job" + modules: "dependency modules" + } - # Load arrays into bash variables - arrRead1s=(~{sep=" " read1s}) - if ~{isPaired}; then arrRead2s=(~{sep=" " read2s}); fi - arrReadGroups=(~{sep=" " readGroups}) - - # Iterate over the arrays concurrently - for (( i = 0; i < ~{arrayLength}; i++ )) - do - read1="${arrRead1s[i]}" - if ~{isPaired}; then read2="${arrRead2s[i]}"; else read2=""; fi - readGroup=$(echo "${arrReadGroups[i]}" | sed 's/RG..=//g') - echo "$read1,$read2,$readGroup" >> ~{csvResult} - done - >>> - - runtime { - memory: "~{jobMemory} GB" - timeout: "~{timeout}" - } - - output { - File outCSV = "~{csvResult}" - } + command<<< + python3 ~{listWritingScript} -o ~{outputFileName} -l "~{sep=';' inputLines}" + >>> + + + runtime { + timeout: "~{timeout}" + modules: "~{modules}" + memory: "~{jobMemory} GB" + } - meta { - output_meta: { - outCSV: "Formatted csv input for Dragen, containing fastq files and read-group information" - } - } + output { + File inputList = "~{outputFileName}" + } + + meta { + output_meta: { + inputList: "Output file to use with dragen SNV caller" + } + } } + +# ================================================================ +# Main task for generating SNV calls in somatic mode (DRAGEN mode) +# +# we need CSV files with a header and data lines organized as: +# +# RGID Read Group +# RGSM Sample ID +# RGLB Library +# Lane Flow cell lane +# Read1File - Full path to a valid FASTQ input file +# Read2File - Full path to a valid FASTQ input file. Required for paired-end input. If not using paired-end input, leave empty. +# Each FASTQ file can only be referenced once in the CSV list. +# All values in the Read2File column must be reference valid files or must all be empty. +# ================================================================ task runDragen { input { File csv String dragenRef + String dragenVersion String prefix Boolean isRNA Boolean adapterTrim - String adapter1File = "/.mounts/labs/gsiprojects/gsi/Dragen/resources/ADAPTER1" - String adapter2File = "/.mounts/labs/gsiprojects/gsi/Dragen/resources/ADAPTER2" + String adapter1File = "/staging/data/resources/ADAPTER1" + String adapter2File = "/staging/data/resources/ADAPTER2" Int jobMemory = 500 Int timeout = 96 } @@ -255,6 +232,7 @@ task runDragen { parameter_meta { csv: "Formatted csv input for Dragen, containing fastq files and read-group information" dragenRef: "The reference genome to align the sample with by Dragen" + dragenVersion: "Expected version of dragen software on the DRAGEN node" prefix: "Prefix for output files" isRNA: "True/False, whether to complete transcriptomic analysis" adapterTrim: "True/False for adapter trimming" @@ -293,6 +271,7 @@ task runDragen { runtime { timeout: "~{timeout}" + dragen_version: "~{dragenVersion}" backend: "DRAGEN" } diff --git a/vidarrtest-regression.json.in b/vidarrtest-regression.json.in index 5f23d32..bf8d0b3 100644 --- a/vidarrtest-regression.json.in +++ b/vidarrtest-regression.json.in @@ -27,17 +27,21 @@ }, "type": "EXTERNAL" }, - "readGroup": "RGID=121005_h804_0096_AD0V4NACXX-NoIndex_6,RGLB=PCSI0022C,RGPL=ILLUMINA,RGPU=121005_h804_0096_AD0V4NACXX-NoIndex_6,RGSM=PCSI0022C" + "readGroup": "ID:121005_h804_0096_AD0V4NACXX-NoIndex_6 LB:PCSI0022C PL:ILLUMINA PU:121005_h804_0096_AD0V4NACXX-NoIndex_6 SM:PCSI0022C CM:OICR" } ], "dragenAlign.outputFileNamePrefix": "121005_h804_0096_AD0V4NACXX_PCSI0022C_NoIndex_L006_001", "dragenAlign.reference": "hg38", "dragenAlign.adapterTrim": false, "dragenAlign.isRNA": false, - "dragenAlign.headerFormat.jobMemory": null, - "dragenAlign.headerFormat.timeout": null, - "dragenAlign.makeCSV.jobMemory": null, - "dragenAlign.makeCSV.timeout": null, + "dragenAlign.extractInfoLine.jobMemory": null, + "dragenAlign.extractInfoLine.modules": null, + "dragenAlign.extractInfoLine.parsingScript": null, + "dragenAlign.extractInfoLine.timeout": null, + "dragenAlign.composeList.jobMemory": null, + "dragenAlign.composeList.listWritingScript": null, + "dragenAlign.composeList.modules": null, + "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, "dragenAlign.runDragen.adapter2File": null, "dragenAlign.runDragen.jobMemory": null, @@ -87,7 +91,7 @@ { "metrics_calculate": "@CHECKOUT@/tests/calculate.sh", "metrics_compare": "@CHECKOUT@/tests/compare.sh", - "output_metrics": "/.mounts/labs/gsi/testdata/dragenAlign/1.2.2/output_metrics/PCSI0022C_notrim_bam.metrics", + "output_metrics": "/.mounts/labs/gsi/testdata/dragenAlign/1.4.0/output_metrics/PCSI0022C_notrim_bam.metrics", "type": "script" } ] @@ -120,17 +124,21 @@ }, "type": "EXTERNAL" }, - "readGroup": "RGID=121005_h804_0096_AD0V4NACXX-NoIndex_6,RGLB=PCSI0022C,RGPL=ILLUMINA,RGPU=121005_h804_0096_AD0V4NACXX-NoIndex_6,RGSM=PCSI0022C" + "readGroup": "ID:121005_h804_0096_AD0V4NACXX-NoIndex_6 LB:PCSI0022C PL:ILLUMINA PU:121005_h804_0096_AD0V4NACXX-NoIndex_6 SM:PCSI0022C CM:OICR" } ], "dragenAlign.outputFileNamePrefix": "121005_h804_0096_AD0V4NACXX_PCSI0022C_NoIndex_L006_001", "dragenAlign.reference": "hg38", "dragenAlign.adapterTrim": null, "dragenAlign.isRNA": false, - "dragenAlign.headerFormat.jobMemory": null, - "dragenAlign.headerFormat.timeout": null, - "dragenAlign.makeCSV.jobMemory": null, - "dragenAlign.makeCSV.timeout": null, + "dragenAlign.extractInfoLine.jobMemory": null, + "dragenAlign.extractInfoLine.modules": null, + "dragenAlign.extractInfoLine.parsingScript": null, + "dragenAlign.extractInfoLine.timeout": null, + "dragenAlign.composeList.jobMemory": null, + "dragenAlign.composeList.listWritingScript": null, + "dragenAlign.composeList.modules": null, + "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, "dragenAlign.runDragen.adapter2File": null, "dragenAlign.runDragen.jobMemory": null, @@ -202,17 +210,21 @@ "type": "EXTERNAL" }, "fastqR2": null, - "readGroup": "RGID=121005_h804_0096_AD0V4NACXX-NoIndex_6,RGLB=PCSI0022C,RGPL=ILLUMINA,RGPU=121005_h804_0096_AD0V4NACXX-NoIndex_6,RGSM=PCSI0022C" + "readGroup": "ID:121005_h804_0096_AD0V4NACXX-NoIndex_6 LB:PCSI0022C PL:ILLUMINA PU:121005_h804_0096_AD0V4NACXX-NoIndex_6 SM:PCSI0022C CM:OICR" } ], "dragenAlign.outputFileNamePrefix": "121005_h804_0096_AD0V4NACXX_PCSI0022C_NoIndex_L006_001", "dragenAlign.reference": "hg38", "dragenAlign.adapterTrim": null, "dragenAlign.isRNA": false, - "dragenAlign.headerFormat.jobMemory": null, - "dragenAlign.headerFormat.timeout": null, - "dragenAlign.makeCSV.jobMemory": null, - "dragenAlign.makeCSV.timeout": null, + "dragenAlign.extractInfoLine.jobMemory": null, + "dragenAlign.extractInfoLine.modules": null, + "dragenAlign.extractInfoLine.parsingScript": null, + "dragenAlign.extractInfoLine.timeout": null, + "dragenAlign.composeList.jobMemory": null, + "dragenAlign.composeList.listWritingScript": null, + "dragenAlign.composeList.modules": null, + "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, "dragenAlign.runDragen.adapter2File": null, "dragenAlign.runDragen.jobMemory": null, @@ -262,7 +274,7 @@ { "metrics_calculate": "@CHECKOUT@/tests/calculate.sh", "metrics_compare": "@CHECKOUT@/tests/compare.sh", - "output_metrics": "/.mounts/labs/gsi/testdata/dragenAlign/1.2.2/output_metrics/PCSI0022C_trim_bam_se.metrics", + "output_metrics": "/.mounts/labs/gsi/testdata/dragenAlign/1.4.0/output_metrics/PCSI0022C_trim_bam_se.metrics", "type": "script" } ] @@ -284,18 +296,21 @@ "type": "EXTERNAL" }, "fastqR2": null, - "readGroup": "RGID=121005_h804_0096_AD0V4NACXX-NoIndex_6,RGLB=PCSI0022C,RGPL=ILLUMINA,RGPU=121005_h804_0096_AD0V4NACXX-NoIndex_6,RGSM=PCSI0022C" + "readGroup": "ID:121005_h804_0096_AD0V4NACXX-NoIndex_6 LB:PCSI0022C PL:ILLUMINA PU:121005_h804_0096_AD0V4NACXX-NoIndex_6 SM:PCSI0022C CM:OICR" } ], "dragenAlign.outputFileNamePrefix": "121005_h804_0096_AD0V4NACXX_PCSI0022C_NoIndex_L006_001", "dragenAlign.reference": "hg38", "dragenAlign.adapterTrim": false, - "dragenAlign.rgInfo": "ID=121005_h804_0096_AD0V4NACXX-NoIndex_6,LB=PCSI0022C,PL=ILLUMINA,PU=121005_h804_0096_AD0V4NACXX-NoIndex_6,SM=PCSI0022C", "dragenAlign.isRNA": false, - "dragenAlign.headerFormat.jobMemory": null, - "dragenAlign.headerFormat.timeout": null, - "dragenAlign.makeCSV.jobMemory": null, - "dragenAlign.makeCSV.timeout": null, + "dragenAlign.extractInfoLine.jobMemory": null, + "dragenAlign.extractInfoLine.modules": null, + "dragenAlign.extractInfoLine.parsingScript": null, + "dragenAlign.extractInfoLine.timeout": null, + "dragenAlign.composeList.jobMemory": null, + "dragenAlign.composeList.listWritingScript": null, + "dragenAlign.composeList.modules": null, + "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, "dragenAlign.runDragen.adapter2File": null, "dragenAlign.runDragen.jobMemory": null, @@ -345,7 +360,7 @@ { "metrics_calculate": "@CHECKOUT@/tests/calculate.sh", "metrics_compare": "@CHECKOUT@/tests/compare.sh", - "output_metrics": "/.mounts/labs/gsi/testdata/dragenAlign/1.2.2/output_metrics/PCSI0022C_notrim_bam_se.metrics", + "output_metrics": "/.mounts/labs/gsi/testdata/dragenAlign/1.4.0/output_metrics/PCSI0022C_notrim_bam_se.metrics", "type": "script" } ] @@ -378,17 +393,21 @@ }, "type": "EXTERNAL" }, - "readGroup": "RGID=121005_h804_0096_AD0V4NACXX_4_NoIndex,RGPL=Illumina,RGPU=121005_h804_0096_AD0V4NACXX_4_NoIndex,RGLB=K562_1,RGSM=K562_1test" + "readGroup": "ID:121005_h804_0096_AD0V4NACXX_4_NoIndex PL:Illumina PU:121005_h804_0096_AD0V4NACXX_4_NoIndex LB:K562_1 SM:K562_1test CM:OICR" } ], "dragenAlign.outputFileNamePrefix": "K562", "dragenAlign.reference": "hg38", "dragenAlign.adapterTrim": false, "dragenAlign.isRNA": true, - "dragenAlign.headerFormat.jobMemory": null, - "dragenAlign.headerFormat.timeout": null, - "dragenAlign.makeCSV.jobMemory": null, - "dragenAlign.makeCSV.timeout": null, + "dragenAlign.extractInfoLine.jobMemory": null, + "dragenAlign.extractInfoLine.modules": null, + "dragenAlign.extractInfoLine.parsingScript": null, + "dragenAlign.extractInfoLine.timeout": null, + "dragenAlign.composeList.jobMemory": null, + "dragenAlign.composeList.listWritingScript": null, + "dragenAlign.composeList.modules": null, + "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, "dragenAlign.runDragen.adapter2File": null, "dragenAlign.runDragen.jobMemory": null, @@ -438,7 +457,7 @@ { "metrics_calculate": "@CHECKOUT@/tests/calculate.sh", "metrics_compare": "@CHECKOUT@/tests/compare.sh", - "output_metrics": "/.mounts/labs/gsi/testdata/dragenAlign/1.2.2/output_metrics/K562_notrim_wt_bam.metrics", + "output_metrics": "/.mounts/labs/gsi/testdata/dragenAlign/1.4.0/output_metrics/K562_notrim_wt_bam.metrics", "type": "script" } ] @@ -471,17 +490,21 @@ }, "type": "EXTERNAL" }, - "readGroup": "RGID=121005_h804_0096_AD0V4NACXX_4_NoIndex,RGPL=Illumina,RGPU=121005_h804_0096_AD0V4NACXX_4_NoIndex,RGLB=K562_1,RGSM=K562_1test" + "readGroup": "ID:121005_h804_0096_AD0V4NACXX_4_NoIndex PL:Illumina PU:121005_h804_0096_AD0V4NACXX_4_NoIndex LB:K562_1 SM:K562_1test CM:OICR" } ], "dragenAlign.outputFileNamePrefix": "K562", "dragenAlign.reference": "hg38", "dragenAlign.adapterTrim": null, "dragenAlign.isRNA": true, - "dragenAlign.headerFormat.jobMemory": null, - "dragenAlign.headerFormat.timeout": null, - "dragenAlign.makeCSV.jobMemory": null, - "dragenAlign.makeCSV.timeout": null, + "dragenAlign.extractInfoLine.jobMemory": null, + "dragenAlign.extractInfoLine.modules": null, + "dragenAlign.extractInfoLine.parsingScript": null, + "dragenAlign.extractInfoLine.timeout": null, + "dragenAlign.composeList.jobMemory": null, + "dragenAlign.composeList.listWritingScript": null, + "dragenAlign.composeList.modules": null, + "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, "dragenAlign.runDragen.adapter2File": null, "dragenAlign.runDragen.jobMemory": null, @@ -531,7 +554,7 @@ { "metrics_calculate": "@CHECKOUT@/tests/calculate.sh", "metrics_compare": "@CHECKOUT@/tests/compare.sh", - "output_metrics": "/.mounts/labs/gsi/testdata/dragenAlign/1.2.2/output_metrics/K562_trim_wt_bam.metrics", + "output_metrics": "/.mounts/labs/gsi/testdata/dragenAlign/1.4.0/output_metrics/K562_trim_wt_bam.metrics", "type": "script" } ] @@ -553,17 +576,21 @@ "type": "EXTERNAL" }, "fastqR2": null, - "readGroup": "RGID=121005_h804_0096_AD0V4NACXX_4_NoIndex,RGPL=Illumina,RGPU=121005_h804_0096_AD0V4NACXX_4_NoIndex,RGLB=K562_1,RGSM=K562_1test" + "readGroup": "ID:121005_h804_0096_AD0V4NACXX_4_NoIndex PL:Illumina PU:121005_h804_0096_AD0V4NACXX_4_NoIndex LB:K562_1 SM:K562_1test CM:OICR" } ], "dragenAlign.outputFileNamePrefix": "K562", "dragenAlign.reference": "hg38", "dragenAlign.adapterTrim": false, "dragenAlign.isRNA": true, - "dragenAlign.headerFormat.jobMemory": null, - "dragenAlign.headerFormat.timeout": null, - "dragenAlign.makeCSV.jobMemory": null, - "dragenAlign.makeCSV.timeout": null, + "dragenAlign.extractInfoLine.jobMemory": null, + "dragenAlign.extractInfoLine.modules": null, + "dragenAlign.extractInfoLine.parsingScript": null, + "dragenAlign.extractInfoLine.timeout": null, + "dragenAlign.composeList.jobMemory": null, + "dragenAlign.composeList.listWritingScript": null, + "dragenAlign.composeList.modules": null, + "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, "dragenAlign.runDragen.adapter2File": null, "dragenAlign.runDragen.jobMemory": null, @@ -613,7 +640,7 @@ { "metrics_calculate": "@CHECKOUT@/tests/calculate.sh", "metrics_compare": "@CHECKOUT@/tests/compare.sh", - "output_metrics": "/.mounts/labs/gsi/testdata/dragenAlign/1.2.2/output_metrics/K562_notrim_wt_bam_se.metrics", + "output_metrics": "/.mounts/labs/gsi/testdata/dragenAlign/1.4.0/output_metrics/K562_notrim_wt_bam_se.metrics", "type": "script" } ] @@ -635,7 +662,7 @@ "type": "EXTERNAL" }, "fastqR2": null, - "readGroup": "RGID=VENHPV_0260_02_LB02-01_240304_A00469_0642_BHWVM7DSX7_3_TCCGCGAA-GCCTCTAT,RGLB=VENHPV_A00469_0642_WT,RGPL=ILLUMINA,RGPU=VENHPV_0260_02_LB02-01_240304_A00469_0642_BHWVM7DSX7_3_TCCGCGAA-GCCTCTAT,RGSM=VENHPV_TEST" + "readGroup": "ID:VENHPV_0260_02_LB02-01_240304_A00469_0642_BHWVM7DSX7_3_TCCGCGAA-GCCTCTAT LB:VENHPV_A00469_0642_WT PL:ILLUMINA PU:VENHPV_0260_02_LB02-01_240304_A00469_0642_BHWVM7DSX7_3_TCCGCGAA-GCCTCTAT SM:VENHPV_TEST CM:OICR" }, { "fastqR1": { @@ -651,17 +678,21 @@ "type": "EXTERNAL" }, "fastqR2": null, - "readGroup": "RGID=VENHPV_0260_02_LB02-01_240223_A00469_0636_BHWTVJDSX7_1_TCCGCGAA-GCCTCTAT,RGLB=VENHPV_A00469_0636_WT,RGPL=ILLUMINA,RGPU=VENHPV_0260_02_LB02-01_240223_A00469_0636_BHWTVJDSX7_1_TCCGCGAA-GCCTCTAT,RGSM=VENHPV_TEST" + "readGroup": "ID:VENHPV_0260_02_LB02-01_240223_A00469_0636_BHWTVJDSX7_1_TCCGCGAA-GCCTCTAT LB:VENHPV_A00469_0636_WT PL:ILLUMINA PU:VENHPV_0260_02_LB02-01_240223_A00469_0636_BHWTVJDSX7_1_TCCGCGAA-GCCTCTAT SM:VENHPV_TEST CM:OICR" } ], "dragenAlign.outputFileNamePrefix": "VENHPV_0260_02_LB02-01", "dragenAlign.reference": "hg38", "dragenAlign.adapterTrim": false, "dragenAlign.isRNA": true, - "dragenAlign.headerFormat.jobMemory": null, - "dragenAlign.headerFormat.timeout": null, - "dragenAlign.makeCSV.jobMemory": null, - "dragenAlign.makeCSV.timeout": null, + "dragenAlign.extractInfoLine.jobMemory": null, + "dragenAlign.extractInfoLine.modules": null, + "dragenAlign.extractInfoLine.parsingScript": null, + "dragenAlign.extractInfoLine.timeout": null, + "dragenAlign.composeList.jobMemory": null, + "dragenAlign.composeList.listWritingScript": null, + "dragenAlign.composeList.modules": null, + "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, "dragenAlign.runDragen.adapter2File": null, "dragenAlign.runDragen.jobMemory": null, @@ -711,7 +742,7 @@ { "metrics_calculate": "@CHECKOUT@/tests/calculate.sh", "metrics_compare": "@CHECKOUT@/tests/compare.sh", - "output_metrics": "/.mounts/labs/gsi/testdata/dragenAlign/1.2.2/output_metrics/VENHPV_notrim_wt_bam_se.metrics", + "output_metrics": "/.mounts/labs/gsi/testdata/dragenAlign/1.4.0/output_metrics/VENHPV_notrim_wt_bam_se.metrics", "type": "script" } ] @@ -744,7 +775,7 @@ }, "type": "EXTERNAL" }, - "readGroup": "RGID=VENHPV_0260_02_LB02-01_240304_A00469_0642_BHWVM7DSX7_3_TCCGCGAA-GCCTCTAT,RGLB=VENHPV_A00469_0642_WT,RGPL=ILLUMINA,RGPU=VENHPV_0260_02_LB02-01_240304_A00469_0642_BHWVM7DSX7_3_TCCGCGAA-GCCTCTAT,RGSM=VENHPV_TEST" + "readGroup": "ID:VENHPV_0260_02_LB02-01_240304_A00469_0642_BHWVM7DSX7_3_TCCGCGAA-GCCTCTAT LB:VENHPV_A00469_0642_WT PL:ILLUMINA PU:VENHPV_0260_02_LB02-01_240304_A00469_0642_BHWVM7DSX7_3_TCCGCGAA-GCCTCTAT SM:VENHPV_TEST CM:OICR" }, { "fastqR1": { @@ -771,17 +802,21 @@ }, "type": "EXTERNAL" }, - "readGroup": "RGID=VENHPV_0260_02_LB02-01_240223_A00469_0636_BHWTVJDSX7_1_TCCGCGAA-GCCTCTAT,RGLB=VENHPV_A00469_0636_WT,RGPL=ILLUMINA,RGPU=VENHPV_0260_02_LB02-01_240223_A00469_0636_BHWTVJDSX7_1_TCCGCGAA-GCCTCTAT,RGSM=VENHPV_TEST" + "readGroup": "ID:VENHPV_0260_02_LB02-01_240223_A00469_0636_BHWTVJDSX7_1_TCCGCGAA-GCCTCTAT LB:VENHPV_A00469_0636_WT PL:ILLUMINA PU:VENHPV_0260_02_LB02-01_240223_A00469_0636_BHWTVJDSX7_1_TCCGCGAA-GCCTCTAT SM:VENHPV_TEST CM:OICR" } ], "dragenAlign.outputFileNamePrefix": "VENHPV_0260_02_LB02-01", "dragenAlign.reference": "hg38", "dragenAlign.adapterTrim": true, "dragenAlign.isRNA": true, - "dragenAlign.headerFormat.jobMemory": null, - "dragenAlign.headerFormat.timeout": null, - "dragenAlign.makeCSV.jobMemory": null, - "dragenAlign.makeCSV.timeout": null, + "dragenAlign.extractInfoLine.jobMemory": null, + "dragenAlign.extractInfoLine.modules": null, + "dragenAlign.extractInfoLine.parsingScript": null, + "dragenAlign.extractInfoLine.timeout": null, + "dragenAlign.composeList.jobMemory": null, + "dragenAlign.composeList.listWritingScript": null, + "dragenAlign.composeList.modules": null, + "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, "dragenAlign.runDragen.adapter2File": null, "dragenAlign.runDragen.jobMemory": null, @@ -831,9 +866,9 @@ { "metrics_calculate": "@CHECKOUT@/tests/calculate.sh", "metrics_compare": "@CHECKOUT@/tests/compare.sh", - "output_metrics": "/.mounts/labs/gsi/testdata/dragenAlign/1.2.2/output_metrics/VENHPV_trim_wt_bam.metrics", + "output_metrics": "/.mounts/labs/gsi/testdata/dragenAlign/1.4.0/output_metrics/VENHPV_trim_wt_bam.metrics", "type": "script" } ] } -] \ No newline at end of file +] From fb531ccd001a41818c4e51e34330e4a91442773c Mon Sep 17 00:00:00 2001 From: pruzanov Date: Tue, 11 Feb 2025 12:40:40 -0500 Subject: [PATCH 4/6] Switching to modified dragen scripts --- dragenAlign.wdl | 4 ++-- vidarrtest-regression.json.in | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dragenAlign.wdl b/dragenAlign.wdl index 4501627..f1bea4d 100755 --- a/dragenAlign.wdl +++ b/dragenAlign.wdl @@ -124,7 +124,7 @@ task extractInfoLine { String parsingScript = "$DRAGEN_SCRIPTS_ROOT/bin/composeList.py" Int timeout = 4 Int jobMemory = 4 - String modules = "dragen-scripts/0.1" + String modules = "dragen-scripts/0.2" } parameter_meta { @@ -166,7 +166,7 @@ task composeList { String outputFileName Int jobMemory = 4 Int timeout = 4 - String modules = "dragen-scripts/0.1" + String modules = "dragen-scripts/0.2" } parameter_meta { diff --git a/vidarrtest-regression.json.in b/vidarrtest-regression.json.in index bf8d0b3..f22c620 100644 --- a/vidarrtest-regression.json.in +++ b/vidarrtest-regression.json.in @@ -188,7 +188,7 @@ { "metrics_calculate": "@CHECKOUT@/tests/calculate.sh", "metrics_compare": "@CHECKOUT@/tests/compare.sh", - "output_metrics": "/.mounts/labs/gsi/testdata/dragenAlign/1.2.2/output_metrics/PCSI0022C_trim_bam.metrics", + "output_metrics": "/.mounts/labs/gsi/testdata/dragenAlign/1.4.0/output_metrics/PCSI0022C_trim_bam.metrics", "type": "script" } ] From cc179a5346cdaa0e00247e3411dea9eacaa238f2 Mon Sep 17 00:00:00 2001 From: pruzanov Date: Tue, 11 Feb 2025 20:22:31 -0500 Subject: [PATCH 5/6] Test with new version of scripts --- CHANGELOG.md | 2 +- tests/calculate.sh | 4 --- vidarrtest-regression.json.in | 58 +++++++++++++++++------------------ 3 files changed, 30 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a2eda89..15b109a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] - 2025-02-12 +## [1.4.0] - 2025-02-12 ### Changed - Regression testing adjusted, validates in Jenkins - default parameters adjusted according to [GBS-5046](https://jira.oicr.on.ca/browse/GBS-5046) diff --git a/tests/calculate.sh b/tests/calculate.sh index cf67225..5be37b7 100755 --- a/tests/calculate.sh +++ b/tests/calculate.sh @@ -7,12 +7,8 @@ cd $1 module load samtools/1.9 2>/dev/null ls | sort - find -name *.zip -xtype f -exec unzip -l {} \; | awk '{print $4}' | sed -e '/^\(Name\|----\)$/d' -e '/^[[:space:]]*$/d' | sort - find -name *.bam -xtype f -exec samtools view -H {} \; | grep '^@RG' | sort - find -name *.bam -xtype f -exec samtools flagstat {} \; | sort - find -name *.bam -xtype f -exec /bin/bash -c "samtools view {} | md5sum" \; | sort diff --git a/vidarrtest-regression.json.in b/vidarrtest-regression.json.in index f22c620..1f84780 100644 --- a/vidarrtest-regression.json.in +++ b/vidarrtest-regression.json.in @@ -27,7 +27,7 @@ }, "type": "EXTERNAL" }, - "readGroup": "ID:121005_h804_0096_AD0V4NACXX-NoIndex_6 LB:PCSI0022C PL:ILLUMINA PU:121005_h804_0096_AD0V4NACXX-NoIndex_6 SM:PCSI0022C CM:OICR" + "readGroup": "ID:121005_h804_0096_AD0V4NACXX_6_NoIndex LB:PCSI0022C PL:ILLUMINA PU:121005_h804_0096_AD0V4NACXX_6_NoIndex SM:PCSI0022C CN:OICR" } ], "dragenAlign.outputFileNamePrefix": "121005_h804_0096_AD0V4NACXX_PCSI0022C_NoIndex_L006_001", @@ -36,10 +36,10 @@ "dragenAlign.isRNA": false, "dragenAlign.extractInfoLine.jobMemory": null, "dragenAlign.extractInfoLine.modules": null, - "dragenAlign.extractInfoLine.parsingScript": null, + "dragenAlign.extractInfoLine.parsingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/composeList.py", "dragenAlign.extractInfoLine.timeout": null, "dragenAlign.composeList.jobMemory": null, - "dragenAlign.composeList.listWritingScript": null, + "dragenAlign.composeList.listWritingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/writeFile.py", "dragenAlign.composeList.modules": null, "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, @@ -124,7 +124,7 @@ }, "type": "EXTERNAL" }, - "readGroup": "ID:121005_h804_0096_AD0V4NACXX-NoIndex_6 LB:PCSI0022C PL:ILLUMINA PU:121005_h804_0096_AD0V4NACXX-NoIndex_6 SM:PCSI0022C CM:OICR" + "readGroup": "ID:121005_h804_0096_AD0V4NACXX_6_NoIndex LB:PCSI0022C PL:ILLUMINA PU:121005_h804_0096_AD0V4NACXX_6_NoIndex SM:PCSI0022C CN:OICR" } ], "dragenAlign.outputFileNamePrefix": "121005_h804_0096_AD0V4NACXX_PCSI0022C_NoIndex_L006_001", @@ -133,10 +133,10 @@ "dragenAlign.isRNA": false, "dragenAlign.extractInfoLine.jobMemory": null, "dragenAlign.extractInfoLine.modules": null, - "dragenAlign.extractInfoLine.parsingScript": null, + "dragenAlign.extractInfoLine.parsingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/composeList.py", "dragenAlign.extractInfoLine.timeout": null, "dragenAlign.composeList.jobMemory": null, - "dragenAlign.composeList.listWritingScript": null, + "dragenAlign.composeList.listWritingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/writeFile.py", "dragenAlign.composeList.modules": null, "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, @@ -210,7 +210,7 @@ "type": "EXTERNAL" }, "fastqR2": null, - "readGroup": "ID:121005_h804_0096_AD0V4NACXX-NoIndex_6 LB:PCSI0022C PL:ILLUMINA PU:121005_h804_0096_AD0V4NACXX-NoIndex_6 SM:PCSI0022C CM:OICR" + "readGroup": "ID:121005_h804_0096_AD0V4NACXX_6_NoIndex LB:PCSI0022C PL:ILLUMINA PU:121005_h804_0096_AD0V4NACXX_6_NoIndex SM:PCSI0022C CN:OICR" } ], "dragenAlign.outputFileNamePrefix": "121005_h804_0096_AD0V4NACXX_PCSI0022C_NoIndex_L006_001", @@ -219,10 +219,10 @@ "dragenAlign.isRNA": false, "dragenAlign.extractInfoLine.jobMemory": null, "dragenAlign.extractInfoLine.modules": null, - "dragenAlign.extractInfoLine.parsingScript": null, + "dragenAlign.extractInfoLine.parsingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/composeList.py", "dragenAlign.extractInfoLine.timeout": null, "dragenAlign.composeList.jobMemory": null, - "dragenAlign.composeList.listWritingScript": null, + "dragenAlign.composeList.listWritingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/writeFile.py", "dragenAlign.composeList.modules": null, "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, @@ -296,7 +296,7 @@ "type": "EXTERNAL" }, "fastqR2": null, - "readGroup": "ID:121005_h804_0096_AD0V4NACXX-NoIndex_6 LB:PCSI0022C PL:ILLUMINA PU:121005_h804_0096_AD0V4NACXX-NoIndex_6 SM:PCSI0022C CM:OICR" + "readGroup": "ID:121005_h804_0096_AD0V4NACXX_6_NoIndex LB:PCSI0022C PL:ILLUMINA PU:121005_h804_0096_AD0V4NACXX_6_NoIndex SM:PCSI0022C CN:OICR" } ], "dragenAlign.outputFileNamePrefix": "121005_h804_0096_AD0V4NACXX_PCSI0022C_NoIndex_L006_001", @@ -305,10 +305,10 @@ "dragenAlign.isRNA": false, "dragenAlign.extractInfoLine.jobMemory": null, "dragenAlign.extractInfoLine.modules": null, - "dragenAlign.extractInfoLine.parsingScript": null, + "dragenAlign.extractInfoLine.parsingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/composeList.py", "dragenAlign.extractInfoLine.timeout": null, "dragenAlign.composeList.jobMemory": null, - "dragenAlign.composeList.listWritingScript": null, + "dragenAlign.composeList.listWritingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/writeFile.py", "dragenAlign.composeList.modules": null, "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, @@ -393,7 +393,7 @@ }, "type": "EXTERNAL" }, - "readGroup": "ID:121005_h804_0096_AD0V4NACXX_4_NoIndex PL:Illumina PU:121005_h804_0096_AD0V4NACXX_4_NoIndex LB:K562_1 SM:K562_1test CM:OICR" + "readGroup": "ID:121005_h804_0096_AD0V4NACXX_4_NoIndex PL:Illumina PU:121005_h804_0096_AD0V4NACXX_4_NoIndex LB:K562_1 SM:K562_1test CN:OICR" } ], "dragenAlign.outputFileNamePrefix": "K562", @@ -402,10 +402,10 @@ "dragenAlign.isRNA": true, "dragenAlign.extractInfoLine.jobMemory": null, "dragenAlign.extractInfoLine.modules": null, - "dragenAlign.extractInfoLine.parsingScript": null, + "dragenAlign.extractInfoLine.parsingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/composeList.py", "dragenAlign.extractInfoLine.timeout": null, "dragenAlign.composeList.jobMemory": null, - "dragenAlign.composeList.listWritingScript": null, + "dragenAlign.composeList.listWritingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/writeFile.py", "dragenAlign.composeList.modules": null, "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, @@ -490,7 +490,7 @@ }, "type": "EXTERNAL" }, - "readGroup": "ID:121005_h804_0096_AD0V4NACXX_4_NoIndex PL:Illumina PU:121005_h804_0096_AD0V4NACXX_4_NoIndex LB:K562_1 SM:K562_1test CM:OICR" + "readGroup": "ID:121005_h804_0096_AD0V4NACXX_4_NoIndex PL:Illumina PU:121005_h804_0096_AD0V4NACXX_4_NoIndex LB:K562_1 SM:K562_1test CN:OICR" } ], "dragenAlign.outputFileNamePrefix": "K562", @@ -499,10 +499,10 @@ "dragenAlign.isRNA": true, "dragenAlign.extractInfoLine.jobMemory": null, "dragenAlign.extractInfoLine.modules": null, - "dragenAlign.extractInfoLine.parsingScript": null, + "dragenAlign.extractInfoLine.parsingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/composeList.py", "dragenAlign.extractInfoLine.timeout": null, "dragenAlign.composeList.jobMemory": null, - "dragenAlign.composeList.listWritingScript": null, + "dragenAlign.composeList.listWritingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/writeFile.py", "dragenAlign.composeList.modules": null, "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, @@ -576,7 +576,7 @@ "type": "EXTERNAL" }, "fastqR2": null, - "readGroup": "ID:121005_h804_0096_AD0V4NACXX_4_NoIndex PL:Illumina PU:121005_h804_0096_AD0V4NACXX_4_NoIndex LB:K562_1 SM:K562_1test CM:OICR" + "readGroup": "ID:121005_h804_0096_AD0V4NACXX_4_NoIndex PL:Illumina PU:121005_h804_0096_AD0V4NACXX_4_NoIndex LB:K562_1 SM:K562_1test CN:OICR" } ], "dragenAlign.outputFileNamePrefix": "K562", @@ -585,10 +585,10 @@ "dragenAlign.isRNA": true, "dragenAlign.extractInfoLine.jobMemory": null, "dragenAlign.extractInfoLine.modules": null, - "dragenAlign.extractInfoLine.parsingScript": null, + "dragenAlign.extractInfoLine.parsingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/composeList.py", "dragenAlign.extractInfoLine.timeout": null, "dragenAlign.composeList.jobMemory": null, - "dragenAlign.composeList.listWritingScript": null, + "dragenAlign.composeList.listWritingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/writeFile.py", "dragenAlign.composeList.modules": null, "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, @@ -662,7 +662,7 @@ "type": "EXTERNAL" }, "fastqR2": null, - "readGroup": "ID:VENHPV_0260_02_LB02-01_240304_A00469_0642_BHWVM7DSX7_3_TCCGCGAA-GCCTCTAT LB:VENHPV_A00469_0642_WT PL:ILLUMINA PU:VENHPV_0260_02_LB02-01_240304_A00469_0642_BHWVM7DSX7_3_TCCGCGAA-GCCTCTAT SM:VENHPV_TEST CM:OICR" + "readGroup": "ID:VENHPV_0260_02_LB02-01_240304_A00469_0642_BHWVM7DSX7_3_TCCGCGAA-GCCTCTAT LB:VENHPV_A00469_0642_WT PL:ILLUMINA PU:VENHPV_0260_02_LB02-01_240304_A00469_0642_BHWVM7DSX7_3_TCCGCGAA-GCCTCTAT SM:VENHPV_TEST CN:OICR" }, { "fastqR1": { @@ -678,7 +678,7 @@ "type": "EXTERNAL" }, "fastqR2": null, - "readGroup": "ID:VENHPV_0260_02_LB02-01_240223_A00469_0636_BHWTVJDSX7_1_TCCGCGAA-GCCTCTAT LB:VENHPV_A00469_0636_WT PL:ILLUMINA PU:VENHPV_0260_02_LB02-01_240223_A00469_0636_BHWTVJDSX7_1_TCCGCGAA-GCCTCTAT SM:VENHPV_TEST CM:OICR" + "readGroup": "ID:VENHPV_0260_02_LB02-01_240223_A00469_0636_BHWTVJDSX7_1_TCCGCGAA-GCCTCTAT LB:VENHPV_A00469_0636_WT PL:ILLUMINA PU:VENHPV_0260_02_LB02-01_240223_A00469_0636_BHWTVJDSX7_1_TCCGCGAA-GCCTCTAT SM:VENHPV_TEST CN:OICR" } ], "dragenAlign.outputFileNamePrefix": "VENHPV_0260_02_LB02-01", @@ -687,10 +687,10 @@ "dragenAlign.isRNA": true, "dragenAlign.extractInfoLine.jobMemory": null, "dragenAlign.extractInfoLine.modules": null, - "dragenAlign.extractInfoLine.parsingScript": null, + "dragenAlign.extractInfoLine.parsingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/composeList.py", "dragenAlign.extractInfoLine.timeout": null, "dragenAlign.composeList.jobMemory": null, - "dragenAlign.composeList.listWritingScript": null, + "dragenAlign.composeList.listWritingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/writeFile.py", "dragenAlign.composeList.modules": null, "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, @@ -775,7 +775,7 @@ }, "type": "EXTERNAL" }, - "readGroup": "ID:VENHPV_0260_02_LB02-01_240304_A00469_0642_BHWVM7DSX7_3_TCCGCGAA-GCCTCTAT LB:VENHPV_A00469_0642_WT PL:ILLUMINA PU:VENHPV_0260_02_LB02-01_240304_A00469_0642_BHWVM7DSX7_3_TCCGCGAA-GCCTCTAT SM:VENHPV_TEST CM:OICR" + "readGroup": "ID:VENHPV_0260_02_LB02-01_240304_A00469_0642_BHWVM7DSX7_3_TCCGCGAA-GCCTCTAT LB:VENHPV_A00469_0642_WT PL:ILLUMINA PU:VENHPV_0260_02_LB02-01_240304_A00469_0642_BHWVM7DSX7_3_TCCGCGAA-GCCTCTAT SM:VENHPV_TEST CN:OICR" }, { "fastqR1": { @@ -802,7 +802,7 @@ }, "type": "EXTERNAL" }, - "readGroup": "ID:VENHPV_0260_02_LB02-01_240223_A00469_0636_BHWTVJDSX7_1_TCCGCGAA-GCCTCTAT LB:VENHPV_A00469_0636_WT PL:ILLUMINA PU:VENHPV_0260_02_LB02-01_240223_A00469_0636_BHWTVJDSX7_1_TCCGCGAA-GCCTCTAT SM:VENHPV_TEST CM:OICR" + "readGroup": "ID:VENHPV_0260_02_LB02-01_240223_A00469_0636_BHWTVJDSX7_1_TCCGCGAA-GCCTCTAT LB:VENHPV_A00469_0636_WT PL:ILLUMINA PU:VENHPV_0260_02_LB02-01_240223_A00469_0636_BHWTVJDSX7_1_TCCGCGAA-GCCTCTAT SM:VENHPV_TEST CN:OICR" } ], "dragenAlign.outputFileNamePrefix": "VENHPV_0260_02_LB02-01", @@ -811,10 +811,10 @@ "dragenAlign.isRNA": true, "dragenAlign.extractInfoLine.jobMemory": null, "dragenAlign.extractInfoLine.modules": null, - "dragenAlign.extractInfoLine.parsingScript": null, + "dragenAlign.extractInfoLine.parsingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/composeList.py", "dragenAlign.extractInfoLine.timeout": null, "dragenAlign.composeList.jobMemory": null, - "dragenAlign.composeList.listWritingScript": null, + "dragenAlign.composeList.listWritingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/writeFile.py", "dragenAlign.composeList.modules": null, "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, From b7fdad3280eb83fa2583bf4a8852ebdd0079b46f Mon Sep 17 00:00:00 2001 From: pruzanov Date: Thu, 13 Feb 2025 11:30:27 -0500 Subject: [PATCH 6/6] Switch to the latest version of dragen scripts --- README.md | 2 +- dragenAlign.wdl | 8 ++++++-- vidarrtest-regression.json.in | 36 +++++++++++++++++------------------ 3 files changed, 25 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 47c2d93..d2e589a 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ This workflow will align sequence data (WG or WT) provided as fastq files to the ## Dependencies * [dragen](https://developer.illumina.com/dragen) - +* [gsi modules : dragen-scripts 0.3](https://gitlab.oicr.on.ca/ResearchIT/modulator) ## Usage diff --git a/dragenAlign.wdl b/dragenAlign.wdl index f1bea4d..6e40dd1 100755 --- a/dragenAlign.wdl +++ b/dragenAlign.wdl @@ -84,6 +84,10 @@ workflow dragenAlign { { name: "dragen", url: "https://developer.illumina.com/dragen" + }, + { + name: "gsi modules : dragen-scripts/0.3", + url: "https://gitlab.oicr.on.ca/ResearchIT/modulator" } ] output_meta: { @@ -124,7 +128,7 @@ task extractInfoLine { String parsingScript = "$DRAGEN_SCRIPTS_ROOT/bin/composeList.py" Int timeout = 4 Int jobMemory = 4 - String modules = "dragen-scripts/0.2" + String modules = "dragen-scripts/0.3" } parameter_meta { @@ -166,7 +170,7 @@ task composeList { String outputFileName Int jobMemory = 4 Int timeout = 4 - String modules = "dragen-scripts/0.2" + String modules = "dragen-scripts/0.3" } parameter_meta { diff --git a/vidarrtest-regression.json.in b/vidarrtest-regression.json.in index 1f84780..cf5eb3f 100644 --- a/vidarrtest-regression.json.in +++ b/vidarrtest-regression.json.in @@ -36,10 +36,10 @@ "dragenAlign.isRNA": false, "dragenAlign.extractInfoLine.jobMemory": null, "dragenAlign.extractInfoLine.modules": null, - "dragenAlign.extractInfoLine.parsingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/composeList.py", + "dragenAlign.extractInfoLine.parsingScript": null, "dragenAlign.extractInfoLine.timeout": null, "dragenAlign.composeList.jobMemory": null, - "dragenAlign.composeList.listWritingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/writeFile.py", + "dragenAlign.composeList.listWritingScript": null, "dragenAlign.composeList.modules": null, "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, @@ -133,10 +133,10 @@ "dragenAlign.isRNA": false, "dragenAlign.extractInfoLine.jobMemory": null, "dragenAlign.extractInfoLine.modules": null, - "dragenAlign.extractInfoLine.parsingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/composeList.py", + "dragenAlign.extractInfoLine.parsingScript": null, "dragenAlign.extractInfoLine.timeout": null, "dragenAlign.composeList.jobMemory": null, - "dragenAlign.composeList.listWritingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/writeFile.py", + "dragenAlign.composeList.listWritingScript": null, "dragenAlign.composeList.modules": null, "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, @@ -219,10 +219,10 @@ "dragenAlign.isRNA": false, "dragenAlign.extractInfoLine.jobMemory": null, "dragenAlign.extractInfoLine.modules": null, - "dragenAlign.extractInfoLine.parsingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/composeList.py", + "dragenAlign.extractInfoLine.parsingScript": null, "dragenAlign.extractInfoLine.timeout": null, "dragenAlign.composeList.jobMemory": null, - "dragenAlign.composeList.listWritingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/writeFile.py", + "dragenAlign.composeList.listWritingScript": null, "dragenAlign.composeList.modules": null, "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, @@ -305,10 +305,10 @@ "dragenAlign.isRNA": false, "dragenAlign.extractInfoLine.jobMemory": null, "dragenAlign.extractInfoLine.modules": null, - "dragenAlign.extractInfoLine.parsingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/composeList.py", + "dragenAlign.extractInfoLine.parsingScript": null, "dragenAlign.extractInfoLine.timeout": null, "dragenAlign.composeList.jobMemory": null, - "dragenAlign.composeList.listWritingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/writeFile.py", + "dragenAlign.composeList.listWritingScript": null, "dragenAlign.composeList.modules": null, "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, @@ -402,10 +402,10 @@ "dragenAlign.isRNA": true, "dragenAlign.extractInfoLine.jobMemory": null, "dragenAlign.extractInfoLine.modules": null, - "dragenAlign.extractInfoLine.parsingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/composeList.py", + "dragenAlign.extractInfoLine.parsingScript": null, "dragenAlign.extractInfoLine.timeout": null, "dragenAlign.composeList.jobMemory": null, - "dragenAlign.composeList.listWritingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/writeFile.py", + "dragenAlign.composeList.listWritingScript": null, "dragenAlign.composeList.modules": null, "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, @@ -499,10 +499,10 @@ "dragenAlign.isRNA": true, "dragenAlign.extractInfoLine.jobMemory": null, "dragenAlign.extractInfoLine.modules": null, - "dragenAlign.extractInfoLine.parsingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/composeList.py", + "dragenAlign.extractInfoLine.parsingScript": null, "dragenAlign.extractInfoLine.timeout": null, "dragenAlign.composeList.jobMemory": null, - "dragenAlign.composeList.listWritingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/writeFile.py", + "dragenAlign.composeList.listWritingScript": null, "dragenAlign.composeList.modules": null, "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, @@ -585,10 +585,10 @@ "dragenAlign.isRNA": true, "dragenAlign.extractInfoLine.jobMemory": null, "dragenAlign.extractInfoLine.modules": null, - "dragenAlign.extractInfoLine.parsingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/composeList.py", + "dragenAlign.extractInfoLine.parsingScript": null, "dragenAlign.extractInfoLine.timeout": null, "dragenAlign.composeList.jobMemory": null, - "dragenAlign.composeList.listWritingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/writeFile.py", + "dragenAlign.composeList.listWritingScript": null, "dragenAlign.composeList.modules": null, "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, @@ -687,10 +687,10 @@ "dragenAlign.isRNA": true, "dragenAlign.extractInfoLine.jobMemory": null, "dragenAlign.extractInfoLine.modules": null, - "dragenAlign.extractInfoLine.parsingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/composeList.py", + "dragenAlign.extractInfoLine.parsingScript": null, "dragenAlign.extractInfoLine.timeout": null, "dragenAlign.composeList.jobMemory": null, - "dragenAlign.composeList.listWritingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/writeFile.py", + "dragenAlign.composeList.listWritingScript": null, "dragenAlign.composeList.modules": null, "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null, @@ -811,10 +811,10 @@ "dragenAlign.isRNA": true, "dragenAlign.extractInfoLine.jobMemory": null, "dragenAlign.extractInfoLine.modules": null, - "dragenAlign.extractInfoLine.parsingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/composeList.py", + "dragenAlign.extractInfoLine.parsingScript": null, "dragenAlign.extractInfoLine.timeout": null, "dragenAlign.composeList.jobMemory": null, - "dragenAlign.composeList.listWritingScript": "/.mounts/labs/gsi/testdata/dragenSomatic/scripts/writeFile.py", + "dragenAlign.composeList.listWritingScript": null, "dragenAlign.composeList.modules": null, "dragenAlign.composeList.timeout": null, "dragenAlign.runDragen.adapter1File": null,