Skip to content

Commit 5ae3239

Browse files
authored
Merge pull request #26 from UMCUGenetics/release/v2.1.0
Release v2.1.0
2 parents d6673e2 + f9d9c0f commit 5ae3239

21 files changed

+461
-13
lines changed

CheckFingerprintVCF/CheckFingerprintVCF.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,6 @@ process CheckFingerprintVCF {
1515

1616
script:
1717
"""
18-
python ${baseDir}/CustomModules/CheckFingerprintVCF/check_fingerprint_vcf.py -o logbook.txt ${vcf_files}
18+
python ${projectDir}/CustomModules/CheckFingerprintVCF/check_fingerprint_vcf.py -o logbook.txt ${vcf_files}
1919
"""
2020
}

CheckQC/CheckQC.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,6 @@ process CheckQC {
1313

1414
script:
1515
"""
16-
python ${baseDir}/CustomModules/CheckQC/check_qc.py ${params.qc_settings_path} '.' ${identifier} ${input_files}
16+
python ${projectDir}/CustomModules/CheckQC/check_qc.py ${params.qc_settings_path} '.' ${identifier} ${input_files}
1717
"""
1818
}
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
process SampleIndications {
2-
// Custom process to run clarity_epp export sample_indications
3-
tag {"ClarityEpp SampleIndications ${sample_id}"}
1+
process SampleUDFDx {
2+
// Custom process to run clarity_epp export sample_udf_dx
3+
tag {"ClarityEpp SampleUDFDx ${sample_id}"}
44
label 'ClarityEpp'
5-
label 'ClarityEpp_SampleIndications'
5+
label 'ClarityEpp_SampleUDFDx'
66
shell = ['/bin/bash', '-eo', 'pipefail']
77
cache = false //Disable cache to force a clarity export restarting the workflow.
88

@@ -15,7 +15,7 @@ process SampleIndications {
1515
script:
1616
"""
1717
source ${params.clarity_epp_path}/venv/bin/activate
18-
python ${params.clarity_epp_path}/clarity_epp.py export sample_indications \
19-
-a ${sample_id} | cut -f 2 | grep -v 'Indication' | tr -d '\n'
18+
python ${params.clarity_epp_path}/clarity_epp.py export sample_udf_dx \
19+
-a ${sample_id} -u '$params.udf' -c '$params.column_name' | cut -f 2 | grep -v $params.column_name | tr -d '\n'
2020
"""
21-
}
21+
}

GenderCheck/CompareGender.nf

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
process CompareGender {
2+
// Custom process to check gender of sample with known status
3+
tag {"CompareGender ${sample_id}"}
4+
label 'CompareGender'
5+
label 'CompareGender_Pysam'
6+
container = 'ghcr.io/umcugenetics/custommodules_gendercheck:1.0.0'
7+
shell = ['/bin/bash', '-eo', 'pipefail']
8+
9+
input:
10+
tuple(val(sample_id), val(analysis_id), path(bam_file), path(bai_file), val(true_gender))
11+
12+
output:
13+
tuple(path("*gendercheck.txt"), emit: gendercheck_qc)
14+
15+
script:
16+
"""
17+
python ${projectDir}/CustomModules/GenderCheck/calculate_gender.py \
18+
${sample_id} \
19+
${analysis_id} \
20+
${bam_file} \
21+
./ \
22+
${true_gender} \
23+
$params.gendercheck_ratio_y \
24+
$params.gendercheck_mapping_qual \
25+
$params.gendercheck_locus_y
26+
"""
27+
}

GenderCheck/Dockerfile

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
################## BASE IMAGE ######################
2+
FROM --platform=linux/amd64 python:3.11
3+
4+
################## METADATA ######################
5+
LABEL base_image="python:3.11"
6+
LABEL version="1.0.0"
7+
LABEL extra.binaries="pysam,pytest"
8+
9+
################## INSTALLATIONS ######################
10+
COPY requirements.txt requirements.txt
11+
RUN pip install -r requirements.txt

GenderCheck/calculate_gender.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#! /usr/bin/env python3
2+
3+
import argparse
4+
import pysam
5+
6+
7+
def is_valid_read(read, mapping_qual):
8+
"""Check if a read is properly mapped."""
9+
if (read.mapping_quality >= mapping_qual and read.reference_end and read.reference_start):
10+
return True
11+
return False
12+
13+
14+
def get_gender_from_bam(bam, mapping_qual, locus_y, ratio_y):
15+
with pysam.AlignmentFile(bam, "rb") as bam_file:
16+
y_reads = float(
17+
sum([is_valid_read(read, mapping_qual) for read in bam_file.fetch(region=locus_y)])
18+
)
19+
total_reads = float(bam_file.mapped)
20+
y_ratio_perc = (y_reads / total_reads) * 100
21+
if y_ratio_perc <= ratio_y:
22+
return "female"
23+
else:
24+
return "male"
25+
26+
27+
def compare_gender(sample_id, analysis_id, test_gender, true_gender):
28+
if test_gender == true_gender or true_gender == "unknown": # if gender if unknown/onbekend in database, pass
29+
qc = "PASS"
30+
else: # not_detected in database considered failed
31+
qc = "FAIL"
32+
return f"{sample_id}\t{analysis_id}\t{test_gender}\t{true_gender}\t{qc}\n"
33+
34+
35+
def write_qc_file(sample_id, analysis_id, comparison, outputfolder):
36+
with open(f"{outputfolder}/{sample_id}_{analysis_id}_gendercheck.txt", 'w') as write_file:
37+
write_file.write("sample_id\tanalysis_id\ttest_gender\ttrue_gender\tstatus\n")
38+
write_file.write(comparison)
39+
40+
41+
if __name__ == "__main__":
42+
parser = argparse.ArgumentParser()
43+
parser.add_argument('sample_id', help='sample_id')
44+
parser.add_argument('analysis_id', help='analysis_id')
45+
parser.add_argument('bam', help='path to bam file')
46+
parser.add_argument('outputfolder', help='path to output folder')
47+
parser.add_argument('true_gender', help='gender regarded as the truth')
48+
parser.add_argument(
49+
"ratio_y",
50+
type=float,
51+
help="maximunum chromosome Y ratio for females [float]"
52+
)
53+
parser.add_argument('mapping_qual', type=int, help='minimum mapping quality of reads to be considered [int]')
54+
parser.add_argument('locus_y', help='Coordinates for includes region on chromosome Y (chr:start-stop)')
55+
args = parser.parse_args()
56+
57+
translation = {"Man": "male", "Vrouw": "female", "Onbekend": "unknown", "unknown": "not_detected"}
58+
true_gender = args.true_gender
59+
if true_gender in translation:
60+
true_gender = translation[true_gender]
61+
62+
test_gender = get_gender_from_bam(args.bam, args.mapping_qual, args.locus_y, args.ratio_y)
63+
comparison = compare_gender(args.sample_id, args.analysis_id, test_gender, true_gender)
64+
write_qc_file(args.sample_id, args.analysis_id, comparison, args.outputfolder)

GenderCheck/requirements.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
iniconfig==2.0.0
2+
packaging==23.2
3+
pluggy==1.4.0
4+
pysam==0.22.0
5+
pytest==8.0.2

GenderCheck/test_bam.bam

864 KB
Binary file not shown.

GenderCheck/test_bam.bam.bai

1.54 MB
Binary file not shown.

GenderCheck/test_calculate_gender.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import calculate_gender
2+
3+
import pytest
4+
5+
6+
class TestIsValidRead():
7+
8+
class MyObject:
9+
def __init__(self, qual, start, end):
10+
self.mapping_quality = qual
11+
self.reference_start = start
12+
self.reference_end = end
13+
14+
@pytest.mark.parametrize("read,mapping_qual,expected", [
15+
(MyObject(19, True, True), 20, False), # mapping quality is below the threshold
16+
(MyObject(20, True, True), 20, True), # mapping quality is equal to the threshold
17+
(MyObject(20, True, True), 19, True), # mapping quality is higher than the threshold
18+
(MyObject(20, False, True), 20, False), # reference_end is false
19+
(MyObject(20, True, False), 20, False), # reference_start is false
20+
])
21+
def test_is_valid_read(self, read, mapping_qual, expected):
22+
assert expected == calculate_gender.is_valid_read(read, mapping_qual)
23+
24+
25+
class TestGetGenderFromBam():
26+
@pytest.mark.parametrize("bam,mapping_qual,locus_y,ratio_y,expected", [
27+
("./test_bam.bam", 20, "Y:2649520-59034050", 0.02, "male"), # output male below
28+
("./test_bam.bam", 20, "Y:2649520-59034050", 0.22, "female"), # output female
29+
])
30+
def test_get_gender_from_bam(self, bam, mapping_qual, locus_y, ratio_y, expected):
31+
assert expected == calculate_gender.get_gender_from_bam(bam, mapping_qual, locus_y, ratio_y)
32+
33+
34+
class TestCompareGender():
35+
@pytest.mark.parametrize("sample_id,analysis_id,test_gender,true_gender,expected", [
36+
# test_gender and true_gender identical, should be PASS
37+
("test_sample", "test_analyse", "male", "male", "test_sample\ttest_analyse\tmale\tmale\tPASS\n"),
38+
# test_gender and true_gender not identical , should be FAIL
39+
("test_sample", "test_analyse", "male", "female", "test_sample\ttest_analyse\tmale\tfemale\tFAIL\n"),
40+
# true_gender unknown, should be PASS
41+
("test_sample", "test_analyse", "male", "unknown", "test_sample\ttest_analyse\tmale\tunknown\tPASS\n"),
42+
# true_gender not_detected, should be FAIL
43+
("test_sample", "test_analyse", "male", "not_detected", "test_sample\ttest_analyse\tmale\tnot_detected\tFAIL\n"),
44+
])
45+
def test_compare_gender(self, sample_id, analysis_id, test_gender, true_gender, expected):
46+
assert expected == calculate_gender.compare_gender(sample_id, analysis_id, test_gender, true_gender)
47+
48+
49+
def test_write_qc_file(tmp_path):
50+
path = tmp_path / "qc_folder"
51+
path.mkdir()
52+
qc_file = path / "test_sample_test_analyse_gendercheck.txt"
53+
calculate_gender.write_qc_file("test_sample", "test_analyse", "test_sample\ttest_analyse\tmale\tmale\tPASS\n", path)
54+
message = "sample_id\tanalysis_id\ttest_gender\ttrue_gender\tstatus\ntest_sample\ttest_analyse\tmale\tmale\tPASS\n"
55+
assert message in qc_file.read_text()

Kinship/Kinship.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,6 @@ process Kinship {
1717
plink --file out --make-bed --noweb
1818
king -b plink.bed --kinship
1919
cp king.kin0 ${analysis_id}.kinship
20-
python ${baseDir}/CustomModules/Kinship/check_kinship.py ${analysis_id}.kinship ${ped_file} > ${analysis_id}.kinship_check.out
20+
python ${projectDir}/CustomModules/Kinship/check_kinship.py ${analysis_id}.kinship ${ped_file} > ${analysis_id}.kinship_check.out
2121
"""
2222
}

MosaicHunter/1.0.0/Dockerfile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
FROM --platform=linux/amd64 ubuntu:latest
2+
3+
RUN apt-get update && \
4+
apt-get -y install openjdk-8-jdk-headless && \
5+
apt-get clean && \
6+
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
7+
8+
RUN apt-get update
9+
RUN apt-get install -y git
10+
RUN apt-get install -y git-lfs
11+
RUN git lfs install
12+
RUN apt-get install libcurl4
13+
14+
RUN apt-get install -y rsync
15+
RUN rsync -aP hgdownload.soe.ucsc.edu::genome/admin/exe/linux.x86_64/blat/ ./../usr/bin
16+
17+
RUN git clone https://github.com/zzhang526/MosaicHunter

MosaicHunter/1.0.0/MosaicHunter.nf

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
#!/usr/bin/env nextflow
2+
3+
process MosaicHunterGetGender {
4+
tag {"MosaicHunterGetGender ${sample_id}"}
5+
label 'MosaicHunterGetGender'
6+
container = 'ghcr.io/umcugenetics/custommodules_gendercheck:1.0.0'
7+
shell = ['/bin/bash', '-eo', 'pipefail']
8+
9+
/*
10+
Define inputs.
11+
- Tuple consisting of a sample_id, a path to the .bam file, a path to the .bai file
12+
*/
13+
input:
14+
tuple(val(sample_id), path(bam_files), path(bai_files))
15+
16+
/*
17+
Define outputs.
18+
- A tuple containing respectively the number for the alpha and beta found in the sample.
19+
*/
20+
output:
21+
tuple(val(sample_id), path("gender_data_${sample_id}.tsv"))
22+
23+
// The command to execute MosaicHunter Get Gender
24+
script:
25+
"""
26+
python ${workflow.projectDir}/CustomModules/MosaicHunter/1.0.0/get_gender_from_bam_chrx.py \
27+
${sample_id} \
28+
${bam_files} \
29+
./ \
30+
$params.mh_gender_ratio_x_threshold_male \
31+
$params.mh_gender_ratio_x_threshold_female \
32+
$params.mh_gender_mapping_qual \
33+
$params.mh_gender_locus_x
34+
"""
35+
}
36+
37+
process MosaicHunterQualityCorrection {
38+
// Step 1: Process input files
39+
tag {"MosaicHunterQualityCorrection ${sample_id}"}
40+
label 'MosaicHunterQualityCorrection'
41+
container = 'docker://umcugenbioinf/mosaic_hunter:1.0.0'
42+
shell = ['/bin/bash', '-euo', 'pipefail']
43+
44+
/*
45+
Define inputs.
46+
- Tuple consisting of a sample_id, a path to the .bam file, a path to the .bai file
47+
- Path to the reference file
48+
- Path to the MosaicHunter common site filter bed file
49+
- Path to the MosaicHunter config file for the Quality Correction step
50+
*/
51+
input:
52+
tuple(val(sample_id), path(bam_files), path(bai_files), path(gender_data))
53+
path(mh_reference_file)
54+
path(mh_common_site_filter_bed_file)
55+
path(mh_config_file_one)
56+
57+
/*
58+
Define outputs.
59+
- A tuple containing respectively the number for the alpha and beta found in the sample.
60+
*/
61+
output:
62+
tuple(val(sample_id), env(MHALPHA), env(MHBETA))
63+
64+
// The command to execute MosaicHunter
65+
shell:
66+
'''
67+
SEX_STRING=$(awk 'NR>1 {print $2}' gender_data_!{sample_id}.tsv)
68+
69+
java -Xmx!{task.memory.toGiga()-4}G -jar /MosaicHunter/build/mosaichunter.jar \
70+
-C !{mh_config_file_one} \
71+
-P input_file=!{bam_files} \
72+
-P mosaic_filter.sex=$SEX_STRING \
73+
-P reference_file=!{mh_reference_file} \
74+
-P common_site_filter.bed_file=!{mh_common_site_filter_bed_file} \
75+
-P output_dir=./
76+
export MHALPHA="\$(grep -Po "(?<=alpha:\\s)\\w+" ./stdout*)"
77+
export MHBETA="\$(grep -Po "(?<=beta:\\s)\\w+" ./stdout*)"
78+
'''
79+
}
80+
81+
process MosaicHunterMosaicVariantCalculation {
82+
// Caclulate the Mosaic Variants
83+
tag {"MosaicHunterMosaicVariantCalculation ${sample_id}"}
84+
label 'MosaicHunterMosaicVariantCalculation'
85+
container = 'docker://umcugenbioinf/mosaic_hunter:1.0.0'
86+
shell = ['/bin/bash', '-euo', 'pipefail']
87+
88+
publishDir "QC/MosaicHunter", saveAs: { filename -> "${sample_id}_$filename" }, mode: 'copy'
89+
90+
/*
91+
Define inputs.
92+
- Tuple consisting of a sample_id, a path to the .bam file, a path to the .bai file
93+
- Path to the reference file
94+
- Path to the MosaicHunter common site filter bed file
95+
- Path to the MosaicHunter config file for the Mosaic Variant Calculation step
96+
- The output of the MosaicHunterQualityCorrection step. This makes the environment variables available in this process
97+
- A tuple containing respectively the number for the alpha and beta found in the
98+
sample, which are stored in an environment variable.
99+
*/
100+
input:
101+
tuple(val(sample_id), path(bam_files), path(bai_files), path(gender_data), env(MHALPHA), env(MHBETA))
102+
path(mh_reference_file)
103+
path(mh_common_site_filter_bed_file)
104+
path(mh_config_file_two)
105+
106+
output:
107+
path('final.passed.tsv')
108+
109+
// The command to execute step two of MosaicHunter
110+
// First get the SEX_STRING from the sample
111+
shell:
112+
'''
113+
SEX_STRING=$(awk 'NR>1 {print $2}' gender_data_!{sample_id}.tsv)
114+
115+
java -Xmx!{task.memory.toGiga()-8}G -jar /MosaicHunter/build/mosaichunter.jar \
116+
-C !{mh_config_file_two} \
117+
-P mosaic_filter.alpha_param=$MHALPHA -P mosaic_filter.beta_param=$MHBETA \
118+
-P input_file=!{bam_files} \
119+
-P mosaic_filter.sex=$SEX_STRING \
120+
-P reference_file=!{mh_reference_file} \
121+
-P common_site_filter.bed_file=!{mh_common_site_filter_bed_file} \
122+
-P output_dir=./
123+
'''
124+
}

0 commit comments

Comments
 (0)