-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #26 from UMCUGenetics/release/v2.1.0
Release v2.1.0
- Loading branch information
Showing
21 changed files
with
461 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
process CompareGender { | ||
// Custom process to check gender of sample with known status | ||
tag {"CompareGender ${sample_id}"} | ||
label 'CompareGender' | ||
label 'CompareGender_Pysam' | ||
container = 'ghcr.io/umcugenetics/custommodules_gendercheck:1.0.0' | ||
shell = ['/bin/bash', '-eo', 'pipefail'] | ||
|
||
input: | ||
tuple(val(sample_id), val(analysis_id), path(bam_file), path(bai_file), val(true_gender)) | ||
|
||
output: | ||
tuple(path("*gendercheck.txt"), emit: gendercheck_qc) | ||
|
||
script: | ||
""" | ||
python ${projectDir}/CustomModules/GenderCheck/calculate_gender.py \ | ||
${sample_id} \ | ||
${analysis_id} \ | ||
${bam_file} \ | ||
./ \ | ||
${true_gender} \ | ||
$params.gendercheck_ratio_y \ | ||
$params.gendercheck_mapping_qual \ | ||
$params.gendercheck_locus_y | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
################## BASE IMAGE ###################### | ||
FROM --platform=linux/amd64 python:3.11 | ||
|
||
################## METADATA ###################### | ||
LABEL base_image="python:3.11" | ||
LABEL version="1.0.0" | ||
LABEL extra.binaries="pysam,pytest" | ||
|
||
################## INSTALLATIONS ###################### | ||
COPY requirements.txt requirements.txt | ||
RUN pip install -r requirements.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
#! /usr/bin/env python3 | ||
|
||
import argparse | ||
import pysam | ||
|
||
|
||
def is_valid_read(read, mapping_qual): | ||
"""Check if a read is properly mapped.""" | ||
if (read.mapping_quality >= mapping_qual and read.reference_end and read.reference_start): | ||
return True | ||
return False | ||
|
||
|
||
def get_gender_from_bam(bam, mapping_qual, locus_y, ratio_y): | ||
with pysam.AlignmentFile(bam, "rb") as bam_file: | ||
y_reads = float( | ||
sum([is_valid_read(read, mapping_qual) for read in bam_file.fetch(region=locus_y)]) | ||
) | ||
total_reads = float(bam_file.mapped) | ||
y_ratio_perc = (y_reads / total_reads) * 100 | ||
if y_ratio_perc <= ratio_y: | ||
return "female" | ||
else: | ||
return "male" | ||
|
||
|
||
def compare_gender(sample_id, analysis_id, test_gender, true_gender): | ||
if test_gender == true_gender or true_gender == "unknown": # if gender if unknown/onbekend in database, pass | ||
qc = "PASS" | ||
else: # not_detected in database considered failed | ||
qc = "FAIL" | ||
return f"{sample_id}\t{analysis_id}\t{test_gender}\t{true_gender}\t{qc}\n" | ||
|
||
|
||
def write_qc_file(sample_id, analysis_id, comparison, outputfolder): | ||
with open(f"{outputfolder}/{sample_id}_{analysis_id}_gendercheck.txt", 'w') as write_file: | ||
write_file.write("sample_id\tanalysis_id\ttest_gender\ttrue_gender\tstatus\n") | ||
write_file.write(comparison) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('sample_id', help='sample_id') | ||
parser.add_argument('analysis_id', help='analysis_id') | ||
parser.add_argument('bam', help='path to bam file') | ||
parser.add_argument('outputfolder', help='path to output folder') | ||
parser.add_argument('true_gender', help='gender regarded as the truth') | ||
parser.add_argument( | ||
"ratio_y", | ||
type=float, | ||
help="maximunum chromosome Y ratio for females [float]" | ||
) | ||
parser.add_argument('mapping_qual', type=int, help='minimum mapping quality of reads to be considered [int]') | ||
parser.add_argument('locus_y', help='Coordinates for includes region on chromosome Y (chr:start-stop)') | ||
args = parser.parse_args() | ||
|
||
translation = {"Man": "male", "Vrouw": "female", "Onbekend": "unknown", "unknown": "not_detected"} | ||
true_gender = args.true_gender | ||
if true_gender in translation: | ||
true_gender = translation[true_gender] | ||
|
||
test_gender = get_gender_from_bam(args.bam, args.mapping_qual, args.locus_y, args.ratio_y) | ||
comparison = compare_gender(args.sample_id, args.analysis_id, test_gender, true_gender) | ||
write_qc_file(args.sample_id, args.analysis_id, comparison, args.outputfolder) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
iniconfig==2.0.0 | ||
packaging==23.2 | ||
pluggy==1.4.0 | ||
pysam==0.22.0 | ||
pytest==8.0.2 |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import calculate_gender | ||
|
||
import pytest | ||
|
||
|
||
class TestIsValidRead(): | ||
|
||
class MyObject: | ||
def __init__(self, qual, start, end): | ||
self.mapping_quality = qual | ||
self.reference_start = start | ||
self.reference_end = end | ||
|
||
@pytest.mark.parametrize("read,mapping_qual,expected", [ | ||
(MyObject(19, True, True), 20, False), # mapping quality is below the threshold | ||
(MyObject(20, True, True), 20, True), # mapping quality is equal to the threshold | ||
(MyObject(20, True, True), 19, True), # mapping quality is higher than the threshold | ||
(MyObject(20, False, True), 20, False), # reference_end is false | ||
(MyObject(20, True, False), 20, False), # reference_start is false | ||
]) | ||
def test_is_valid_read(self, read, mapping_qual, expected): | ||
assert expected == calculate_gender.is_valid_read(read, mapping_qual) | ||
|
||
|
||
class TestGetGenderFromBam(): | ||
@pytest.mark.parametrize("bam,mapping_qual,locus_y,ratio_y,expected", [ | ||
("./test_bam.bam", 20, "Y:2649520-59034050", 0.02, "male"), # output male below | ||
("./test_bam.bam", 20, "Y:2649520-59034050", 0.22, "female"), # output female | ||
]) | ||
def test_get_gender_from_bam(self, bam, mapping_qual, locus_y, ratio_y, expected): | ||
assert expected == calculate_gender.get_gender_from_bam(bam, mapping_qual, locus_y, ratio_y) | ||
|
||
|
||
class TestCompareGender(): | ||
@pytest.mark.parametrize("sample_id,analysis_id,test_gender,true_gender,expected", [ | ||
# test_gender and true_gender identical, should be PASS | ||
("test_sample", "test_analyse", "male", "male", "test_sample\ttest_analyse\tmale\tmale\tPASS\n"), | ||
# test_gender and true_gender not identical , should be FAIL | ||
("test_sample", "test_analyse", "male", "female", "test_sample\ttest_analyse\tmale\tfemale\tFAIL\n"), | ||
# true_gender unknown, should be PASS | ||
("test_sample", "test_analyse", "male", "unknown", "test_sample\ttest_analyse\tmale\tunknown\tPASS\n"), | ||
# true_gender not_detected, should be FAIL | ||
("test_sample", "test_analyse", "male", "not_detected", "test_sample\ttest_analyse\tmale\tnot_detected\tFAIL\n"), | ||
]) | ||
def test_compare_gender(self, sample_id, analysis_id, test_gender, true_gender, expected): | ||
assert expected == calculate_gender.compare_gender(sample_id, analysis_id, test_gender, true_gender) | ||
|
||
|
||
def test_write_qc_file(tmp_path): | ||
path = tmp_path / "qc_folder" | ||
path.mkdir() | ||
qc_file = path / "test_sample_test_analyse_gendercheck.txt" | ||
calculate_gender.write_qc_file("test_sample", "test_analyse", "test_sample\ttest_analyse\tmale\tmale\tPASS\n", path) | ||
message = "sample_id\tanalysis_id\ttest_gender\ttrue_gender\tstatus\ntest_sample\ttest_analyse\tmale\tmale\tPASS\n" | ||
assert message in qc_file.read_text() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
FROM --platform=linux/amd64 ubuntu:latest | ||
|
||
RUN apt-get update && \ | ||
apt-get -y install openjdk-8-jdk-headless && \ | ||
apt-get clean && \ | ||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* | ||
|
||
RUN apt-get update | ||
RUN apt-get install -y git | ||
RUN apt-get install -y git-lfs | ||
RUN git lfs install | ||
RUN apt-get install libcurl4 | ||
|
||
RUN apt-get install -y rsync | ||
RUN rsync -aP hgdownload.soe.ucsc.edu::genome/admin/exe/linux.x86_64/blat/ ./../usr/bin | ||
|
||
RUN git clone https://github.com/zzhang526/MosaicHunter |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
#!/usr/bin/env nextflow | ||
|
||
process MosaicHunterGetGender { | ||
tag {"MosaicHunterGetGender ${sample_id}"} | ||
label 'MosaicHunterGetGender' | ||
container = 'ghcr.io/umcugenetics/custommodules_gendercheck:1.0.0' | ||
shell = ['/bin/bash', '-eo', 'pipefail'] | ||
|
||
/* | ||
Define inputs. | ||
- Tuple consisting of a sample_id, a path to the .bam file, a path to the .bai file | ||
*/ | ||
input: | ||
tuple(val(sample_id), path(bam_files), path(bai_files)) | ||
|
||
/* | ||
Define outputs. | ||
- A tuple containing respectively the number for the alpha and beta found in the sample. | ||
*/ | ||
output: | ||
tuple(val(sample_id), path("gender_data_${sample_id}.tsv")) | ||
|
||
// The command to execute MosaicHunter Get Gender | ||
script: | ||
""" | ||
python ${workflow.projectDir}/CustomModules/MosaicHunter/1.0.0/get_gender_from_bam_chrx.py \ | ||
${sample_id} \ | ||
${bam_files} \ | ||
./ \ | ||
$params.mh_gender_ratio_x_threshold_male \ | ||
$params.mh_gender_ratio_x_threshold_female \ | ||
$params.mh_gender_mapping_qual \ | ||
$params.mh_gender_locus_x | ||
""" | ||
} | ||
|
||
process MosaicHunterQualityCorrection { | ||
// Step 1: Process input files | ||
tag {"MosaicHunterQualityCorrection ${sample_id}"} | ||
label 'MosaicHunterQualityCorrection' | ||
container = 'docker://umcugenbioinf/mosaic_hunter:1.0.0' | ||
shell = ['/bin/bash', '-euo', 'pipefail'] | ||
|
||
/* | ||
Define inputs. | ||
- Tuple consisting of a sample_id, a path to the .bam file, a path to the .bai file | ||
- Path to the reference file | ||
- Path to the MosaicHunter common site filter bed file | ||
- Path to the MosaicHunter config file for the Quality Correction step | ||
*/ | ||
input: | ||
tuple(val(sample_id), path(bam_files), path(bai_files), path(gender_data)) | ||
path(mh_reference_file) | ||
path(mh_common_site_filter_bed_file) | ||
path(mh_config_file_one) | ||
|
||
/* | ||
Define outputs. | ||
- A tuple containing respectively the number for the alpha and beta found in the sample. | ||
*/ | ||
output: | ||
tuple(val(sample_id), env(MHALPHA), env(MHBETA)) | ||
|
||
// The command to execute MosaicHunter | ||
shell: | ||
''' | ||
SEX_STRING=$(awk 'NR>1 {print $2}' gender_data_!{sample_id}.tsv) | ||
java -Xmx!{task.memory.toGiga()-4}G -jar /MosaicHunter/build/mosaichunter.jar \ | ||
-C !{mh_config_file_one} \ | ||
-P input_file=!{bam_files} \ | ||
-P mosaic_filter.sex=$SEX_STRING \ | ||
-P reference_file=!{mh_reference_file} \ | ||
-P common_site_filter.bed_file=!{mh_common_site_filter_bed_file} \ | ||
-P output_dir=./ | ||
export MHALPHA="\$(grep -Po "(?<=alpha:\\s)\\w+" ./stdout*)" | ||
export MHBETA="\$(grep -Po "(?<=beta:\\s)\\w+" ./stdout*)" | ||
''' | ||
} | ||
|
||
process MosaicHunterMosaicVariantCalculation { | ||
// Caclulate the Mosaic Variants | ||
tag {"MosaicHunterMosaicVariantCalculation ${sample_id}"} | ||
label 'MosaicHunterMosaicVariantCalculation' | ||
container = 'docker://umcugenbioinf/mosaic_hunter:1.0.0' | ||
shell = ['/bin/bash', '-euo', 'pipefail'] | ||
|
||
publishDir "QC/MosaicHunter", saveAs: { filename -> "${sample_id}_$filename" }, mode: 'copy' | ||
|
||
/* | ||
Define inputs. | ||
- Tuple consisting of a sample_id, a path to the .bam file, a path to the .bai file | ||
- Path to the reference file | ||
- Path to the MosaicHunter common site filter bed file | ||
- Path to the MosaicHunter config file for the Mosaic Variant Calculation step | ||
- The output of the MosaicHunterQualityCorrection step. This makes the environment variables available in this process | ||
- A tuple containing respectively the number for the alpha and beta found in the | ||
sample, which are stored in an environment variable. | ||
*/ | ||
input: | ||
tuple(val(sample_id), path(bam_files), path(bai_files), path(gender_data), env(MHALPHA), env(MHBETA)) | ||
path(mh_reference_file) | ||
path(mh_common_site_filter_bed_file) | ||
path(mh_config_file_two) | ||
|
||
output: | ||
path('final.passed.tsv') | ||
|
||
// The command to execute step two of MosaicHunter | ||
// First get the SEX_STRING from the sample | ||
shell: | ||
''' | ||
SEX_STRING=$(awk 'NR>1 {print $2}' gender_data_!{sample_id}.tsv) | ||
java -Xmx!{task.memory.toGiga()-8}G -jar /MosaicHunter/build/mosaichunter.jar \ | ||
-C !{mh_config_file_two} \ | ||
-P mosaic_filter.alpha_param=$MHALPHA -P mosaic_filter.beta_param=$MHBETA \ | ||
-P input_file=!{bam_files} \ | ||
-P mosaic_filter.sex=$SEX_STRING \ | ||
-P reference_file=!{mh_reference_file} \ | ||
-P common_site_filter.bed_file=!{mh_common_site_filter_bed_file} \ | ||
-P output_dir=./ | ||
''' | ||
} |
Oops, something went wrong.