UMCUGenetics
diff --git a/‎CheckFingerprintVCF/CheckFingerprintVCF.nf
Lines changed: 1 addition & 1 deletion b/‎CheckFingerprintVCF/CheckFingerprintVCF.nf
Lines changed: 1 addition & 1 deletion
diff --git a/‎CheckQC/CheckQC.nf
Lines changed: 1 addition & 1 deletion b/‎CheckQC/CheckQC.nf
Lines changed: 1 addition & 1 deletion
diff --git a/‎ClarityEpp/SampleIndications.nf renamed to ‎ClarityEpp/SampleUDFDx.nf
Lines changed: 7 additions & 7 deletions b/‎ClarityEpp/SampleIndications.nf renamed to ‎ClarityEpp/SampleUDFDx.nf
Lines changed: 7 additions & 7 deletions
diff --git a/‎GenderCheck/CompareGender.nf
Lines changed: 27 additions & 0 deletions b/‎GenderCheck/CompareGender.nf
Lines changed: 27 additions & 0 deletions
diff --git a/‎GenderCheck/Dockerfile
Lines changed: 11 additions & 0 deletions b/‎GenderCheck/Dockerfile
Lines changed: 11 additions & 0 deletions
diff --git a/‎GenderCheck/calculate_gender.py
Lines changed: 64 additions & 0 deletions b/‎GenderCheck/calculate_gender.py
Lines changed: 64 additions & 0 deletions
diff --git a/‎GenderCheck/requirements.txt
Lines changed: 5 additions & 0 deletions b/‎GenderCheck/requirements.txt
Lines changed: 5 additions & 0 deletions
diff --git a/‎GenderCheck/test_bam.bam
864 KB b/‎GenderCheck/test_bam.bam
864 KB
diff --git a/‎GenderCheck/test_bam.bam.bai
1.54 MB b/‎GenderCheck/test_bam.bam.bai
1.54 MB
diff --git a/‎GenderCheck/test_calculate_gender.py
Lines changed: 55 additions & 0 deletions b/‎GenderCheck/test_calculate_gender.py
Lines changed: 55 additions & 0 deletions
diff --git a/‎Kinship/Kinship.nf
Lines changed: 1 addition & 1 deletion b/‎Kinship/Kinship.nf
Lines changed: 1 addition & 1 deletion
diff --git a/‎MosaicHunter/1.0.0/Dockerfile
Lines changed: 17 additions & 0 deletions b/‎MosaicHunter/1.0.0/Dockerfile
Lines changed: 17 additions & 0 deletions
diff --git a/‎MosaicHunter/1.0.0/MosaicHunter.nf
Lines changed: 124 additions & 0 deletions b/‎MosaicHunter/1.0.0/MosaicHunter.nf
Lines changed: 124 additions & 0 deletions
@@ -15,6 +15,6 @@ process CheckFingerprintVCF {
 
     script:
         """
-        python ${baseDir}/CustomModules/CheckFingerprintVCF/check_fingerprint_vcf.py -o logbook.txt ${vcf_files}
+        python ${projectDir}/CustomModules/CheckFingerprintVCF/check_fingerprint_vcf.py -o logbook.txt ${vcf_files}
         """
 }
@@ -13,6 +13,6 @@ process CheckQC {
 
     script:
         """
-        python ${baseDir}/CustomModules/CheckQC/check_qc.py ${params.qc_settings_path} '.' ${identifier} ${input_files} 
+        python ${projectDir}/CustomModules/CheckQC/check_qc.py ${params.qc_settings_path} '.' ${identifier} ${input_files}
         """
 }
@@ -1,8 +1,8 @@
-process SampleIndications {
-    // Custom process to run clarity_epp export sample_indications
-    tag {"ClarityEpp SampleIndications ${sample_id}"}
+process SampleUDFDx {
+    // Custom process to run clarity_epp export sample_udf_dx
+    tag {"ClarityEpp SampleUDFDx ${sample_id}"}
     label 'ClarityEpp'
-    label 'ClarityEpp_SampleIndications'
+    label 'ClarityEpp_SampleUDFDx'
     shell = ['/bin/bash', '-eo', 'pipefail']
     cache = false  //Disable cache to force a clarity export restarting the workflow.
 
@@ -15,7 +15,7 @@ process SampleIndications {
     script:
         """
         source ${params.clarity_epp_path}/venv/bin/activate
-        python ${params.clarity_epp_path}/clarity_epp.py export sample_indications \
-        -a ${sample_id} | cut -f 2 | grep -v 'Indication' | tr -d '\n'
+        python ${params.clarity_epp_path}/clarity_epp.py export sample_udf_dx \
+        -a ${sample_id} -u '$params.udf' -c '$params.column_name' | cut -f 2 | grep -v $params.column_name | tr -d '\n'
         """
-}
+}
@@ -0,0 +1,27 @@
+process CompareGender {
+    // Custom process to check gender of sample with known status
+    tag {"CompareGender ${sample_id}"}
+    label 'CompareGender'
+    label 'CompareGender_Pysam'
+    container = 'ghcr.io/umcugenetics/custommodules_gendercheck:1.0.0'
+    shell = ['/bin/bash', '-eo', 'pipefail']
+
+    input:
+        tuple(val(sample_id), val(analysis_id), path(bam_file), path(bai_file), val(true_gender))
+
+    output:
+        tuple(path("*gendercheck.txt"), emit: gendercheck_qc)
+
+    script:
+        """
+        python ${projectDir}/CustomModules/GenderCheck/calculate_gender.py \
+            ${sample_id} \
+            ${analysis_id} \
+            ${bam_file} \
+            ./ \
+            ${true_gender} \
+            $params.gendercheck_ratio_y \
+            $params.gendercheck_mapping_qual \
+            $params.gendercheck_locus_y
+        """
+}
@@ -0,0 +1,11 @@
+################## BASE IMAGE ######################
+FROM --platform=linux/amd64 python:3.11
+
+################## METADATA ######################
+LABEL base_image="python:3.11"
+LABEL version="1.0.0"
+LABEL extra.binaries="pysam,pytest"
+
+################## INSTALLATIONS ######################
+COPY requirements.txt requirements.txt
+RUN pip install -r requirements.txt
@@ -0,0 +1,64 @@
+#! /usr/bin/env python3
+
+import argparse
+import pysam
+
+
+def is_valid_read(read, mapping_qual):
+    """Check if a read is properly mapped."""
+    if (read.mapping_quality >= mapping_qual and read.reference_end and read.reference_start):
+        return True
+    return False
+
+
+def get_gender_from_bam(bam, mapping_qual, locus_y, ratio_y):
+    with pysam.AlignmentFile(bam, "rb") as bam_file:
+        y_reads = float(
+                      sum([is_valid_read(read, mapping_qual) for read in bam_file.fetch(region=locus_y)])
+                  )
+        total_reads = float(bam_file.mapped)
+        y_ratio_perc = (y_reads / total_reads) * 100
+    if y_ratio_perc <= ratio_y:
+        return "female"
+    else:
+        return "male"
+
+
+def compare_gender(sample_id, analysis_id, test_gender, true_gender):
+    if test_gender == true_gender or true_gender == "unknown":  # if gender if unknown/onbekend in database, pass
+        qc = "PASS"
+    else:  # not_detected in database considered failed
+        qc = "FAIL"
+    return f"{sample_id}\t{analysis_id}\t{test_gender}\t{true_gender}\t{qc}\n"
+
+
+def write_qc_file(sample_id, analysis_id, comparison, outputfolder):
+    with open(f"{outputfolder}/{sample_id}_{analysis_id}_gendercheck.txt", 'w') as write_file:
+        write_file.write("sample_id\tanalysis_id\ttest_gender\ttrue_gender\tstatus\n")
+        write_file.write(comparison)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('sample_id', help='sample_id')
+    parser.add_argument('analysis_id', help='analysis_id')
+    parser.add_argument('bam', help='path to bam file')
+    parser.add_argument('outputfolder', help='path to output folder')
+    parser.add_argument('true_gender', help='gender regarded as the truth')
+    parser.add_argument(
+        "ratio_y",
+        type=float,
+        help="maximunum chromosome Y ratio for females [float]"
+    )
+    parser.add_argument('mapping_qual', type=int, help='minimum mapping quality of reads to be considered [int]')
+    parser.add_argument('locus_y', help='Coordinates for includes region on chromosome Y (chr:start-stop)')
+    args = parser.parse_args()
+
+    translation = {"Man": "male", "Vrouw": "female", "Onbekend": "unknown", "unknown": "not_detected"}
+    true_gender = args.true_gender
+    if true_gender in translation:
+        true_gender = translation[true_gender]
+
+    test_gender = get_gender_from_bam(args.bam, args.mapping_qual, args.locus_y, args.ratio_y)
+    comparison = compare_gender(args.sample_id, args.analysis_id, test_gender, true_gender)
+    write_qc_file(args.sample_id, args.analysis_id, comparison, args.outputfolder)
@@ -0,0 +1,5 @@
+iniconfig==2.0.0
+packaging==23.2
+pluggy==1.4.0
+pysam==0.22.0
+pytest==8.0.2
@@ -0,0 +1,55 @@
+import calculate_gender
+
+import pytest
+
+
+class TestIsValidRead():
+
+    class MyObject:
+        def __init__(self, qual, start, end):
+            self.mapping_quality = qual
+            self.reference_start = start
+            self.reference_end = end
+
+    @pytest.mark.parametrize("read,mapping_qual,expected", [
+        (MyObject(19, True, True), 20, False),  # mapping quality is below the threshold
+        (MyObject(20, True, True), 20, True),  # mapping quality is equal to the threshold
+        (MyObject(20, True, True), 19, True),  # mapping quality is higher than the threshold
+        (MyObject(20, False, True), 20, False),  # reference_end is false
+        (MyObject(20, True, False), 20, False),  # reference_start is false
+    ])
+    def test_is_valid_read(self, read, mapping_qual, expected):
+        assert expected == calculate_gender.is_valid_read(read, mapping_qual)
+
+
+class TestGetGenderFromBam():
+    @pytest.mark.parametrize("bam,mapping_qual,locus_y,ratio_y,expected", [
+        ("./test_bam.bam", 20, "Y:2649520-59034050", 0.02, "male"),  # output male below
+        ("./test_bam.bam", 20, "Y:2649520-59034050", 0.22, "female"),  # output female
+    ])
+    def test_get_gender_from_bam(self, bam, mapping_qual, locus_y, ratio_y, expected):
+        assert expected == calculate_gender.get_gender_from_bam(bam, mapping_qual, locus_y, ratio_y)
+
+
+class TestCompareGender():
+    @pytest.mark.parametrize("sample_id,analysis_id,test_gender,true_gender,expected", [
+        # test_gender and true_gender identical, should be PASS
+        ("test_sample", "test_analyse", "male", "male", "test_sample\ttest_analyse\tmale\tmale\tPASS\n"),
+        # test_gender and true_gender not identical , should be FAIL
+        ("test_sample", "test_analyse", "male", "female", "test_sample\ttest_analyse\tmale\tfemale\tFAIL\n"),
+        # true_gender unknown, should be PASS
+        ("test_sample", "test_analyse", "male", "unknown", "test_sample\ttest_analyse\tmale\tunknown\tPASS\n"),
+        # true_gender not_detected, should be FAIL
+        ("test_sample", "test_analyse", "male", "not_detected", "test_sample\ttest_analyse\tmale\tnot_detected\tFAIL\n"),
+    ])
+    def test_compare_gender(self, sample_id, analysis_id, test_gender, true_gender, expected):
+        assert expected == calculate_gender.compare_gender(sample_id, analysis_id, test_gender, true_gender)
+
+
+def test_write_qc_file(tmp_path):
+    path = tmp_path / "qc_folder"
+    path.mkdir()
+    qc_file = path / "test_sample_test_analyse_gendercheck.txt"
+    calculate_gender.write_qc_file("test_sample", "test_analyse", "test_sample\ttest_analyse\tmale\tmale\tPASS\n", path)
+    message = "sample_id\tanalysis_id\ttest_gender\ttrue_gender\tstatus\ntest_sample\ttest_analyse\tmale\tmale\tPASS\n"
+    assert message in qc_file.read_text()
@@ -17,6 +17,6 @@ process Kinship {
         plink --file out --make-bed --noweb
         king -b plink.bed --kinship
         cp king.kin0 ${analysis_id}.kinship
-        python ${baseDir}/CustomModules/Kinship/check_kinship.py ${analysis_id}.kinship ${ped_file} > ${analysis_id}.kinship_check.out
+        python ${projectDir}/CustomModules/Kinship/check_kinship.py ${analysis_id}.kinship ${ped_file} > ${analysis_id}.kinship_check.out
         """
 }
@@ -0,0 +1,17 @@
+FROM --platform=linux/amd64 ubuntu:latest
+
+RUN apt-get update && \
+    apt-get -y install openjdk-8-jdk-headless && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 
+
+RUN apt-get update
+RUN apt-get install -y git
+RUN apt-get install -y git-lfs
+RUN git lfs install
+RUN apt-get install libcurl4
+
+RUN apt-get install -y rsync
+RUN rsync -aP hgdownload.soe.ucsc.edu::genome/admin/exe/linux.x86_64/blat/ ./../usr/bin
+
+RUN git clone https://github.com/zzhang526/MosaicHunter
@@ -0,0 +1,124 @@
+#!/usr/bin/env nextflow
+
+process MosaicHunterGetGender {
+    tag {"MosaicHunterGetGender ${sample_id}"}
+    label 'MosaicHunterGetGender'
+    container = 'ghcr.io/umcugenetics/custommodules_gendercheck:1.0.0'
+    shell = ['/bin/bash', '-eo', 'pipefail']
+
+    /*
+    Define inputs.
+    - Tuple consisting of a sample_id, a path to the .bam file, a path to the .bai file
+    */
+    input:
+        tuple(val(sample_id), path(bam_files), path(bai_files))
+
+    /*
+    Define outputs.
+    - A tuple containing respectively the number for the alpha and beta found in the sample.
+    */
+    output:
+        tuple(val(sample_id), path("gender_data_${sample_id}.tsv"))
+
+    // The command to execute MosaicHunter Get Gender
+    script:
+        """
+        python ${workflow.projectDir}/CustomModules/MosaicHunter/1.0.0/get_gender_from_bam_chrx.py \
+            ${sample_id} \
+            ${bam_files} \
+            ./ \
+            $params.mh_gender_ratio_x_threshold_male \
+            $params.mh_gender_ratio_x_threshold_female \
+            $params.mh_gender_mapping_qual \
+            $params.mh_gender_locus_x
+        """
+}
+
+process MosaicHunterQualityCorrection {
+    // Step 1: Process input files
+    tag {"MosaicHunterQualityCorrection ${sample_id}"}
+    label 'MosaicHunterQualityCorrection'
+    container = 'docker://umcugenbioinf/mosaic_hunter:1.0.0'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    /*
+    Define inputs.
+    - Tuple consisting of a sample_id, a path to the .bam file, a path to the .bai file
+    - Path to the reference file
+    - Path to the MosaicHunter common site filter bed file
+    - Path to the MosaicHunter config file for the Quality Correction step
+    */
+    input:
+        tuple(val(sample_id), path(bam_files), path(bai_files), path(gender_data))
+        path(mh_reference_file)
+        path(mh_common_site_filter_bed_file)
+        path(mh_config_file_one)
+
+    /*
+    Define outputs.
+    - A tuple containing respectively the number for the alpha and beta found in the sample.
+    */
+    output:
+        tuple(val(sample_id), env(MHALPHA), env(MHBETA))
+
+    // The command to execute MosaicHunter
+    shell:
+    '''
+    SEX_STRING=$(awk 'NR>1 {print $2}' gender_data_!{sample_id}.tsv)
+
+    java -Xmx!{task.memory.toGiga()-4}G -jar /MosaicHunter/build/mosaichunter.jar \
+    -C !{mh_config_file_one} \
+    -P input_file=!{bam_files} \
+    -P mosaic_filter.sex=$SEX_STRING \
+    -P reference_file=!{mh_reference_file} \
+    -P common_site_filter.bed_file=!{mh_common_site_filter_bed_file} \
+    -P output_dir=./
+    export MHALPHA="\$(grep -Po "(?<=alpha:\\s)\\w+" ./stdout*)"
+    export MHBETA="\$(grep -Po "(?<=beta:\\s)\\w+" ./stdout*)"
+    '''
+}
+
+process MosaicHunterMosaicVariantCalculation {
+    // Caclulate the Mosaic Variants
+    tag {"MosaicHunterMosaicVariantCalculation ${sample_id}"}
+    label 'MosaicHunterMosaicVariantCalculation'
+    container = 'docker://umcugenbioinf/mosaic_hunter:1.0.0'
+    shell = ['/bin/bash', '-euo', 'pipefail']
+
+    publishDir "QC/MosaicHunter", saveAs: { filename -> "${sample_id}_$filename" }, mode: 'copy'
+
+    /*
+    Define inputs.
+    - Tuple consisting of a sample_id, a path to the .bam file, a path to the .bai file
+    - Path to the reference file
+    - Path to the MosaicHunter common site filter bed file
+    - Path to the MosaicHunter config file for the Mosaic Variant Calculation step
+    - The output of the MosaicHunterQualityCorrection step. This makes the environment variables available in this process
+    - A tuple containing respectively the number for the alpha and beta found in the
+      sample, which are stored in an environment variable.
+    */
+    input:
+        tuple(val(sample_id), path(bam_files), path(bai_files), path(gender_data), env(MHALPHA), env(MHBETA))
+        path(mh_reference_file)
+        path(mh_common_site_filter_bed_file)
+        path(mh_config_file_two)
+
+    output:
+        path('final.passed.tsv')
+
+    // The command to execute step two of MosaicHunter
+    // First get the SEX_STRING from the sample
+    shell:
+    '''
+    SEX_STRING=$(awk 'NR>1 {print $2}' gender_data_!{sample_id}.tsv)
+
+    java -Xmx!{task.memory.toGiga()-8}G -jar /MosaicHunter/build/mosaichunter.jar \
+    -C !{mh_config_file_two} \
+    -P mosaic_filter.alpha_param=$MHALPHA -P mosaic_filter.beta_param=$MHBETA \
+    -P input_file=!{bam_files} \
+    -P mosaic_filter.sex=$SEX_STRING \
+    -P reference_file=!{mh_reference_file} \
+    -P common_site_filter.bed_file=!{mh_common_site_filter_bed_file} \
+    -P output_dir=./
+    '''
+}
Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,6 @@ process CheckFingerprintVCF {`
`15`	`15`
`16`	`16`	`script:`
`17`	`17`	`"""`
`18`		`- python ${baseDir}/CustomModules/CheckFingerprintVCF/check_fingerprint_vcf.py -o logbook.txt ${vcf_files}`
	`18`	`+ python ${projectDir}/CustomModules/CheckFingerprintVCF/check_fingerprint_vcf.py -o logbook.txt ${vcf_files}`
`19`	`19`	`"""`
`20`	`20`	`}`
Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,6 @@ process CheckQC {`
`13`	`13`
`14`	`14`	`script:`
`15`	`15`	`"""`
`16`		`- python ${baseDir}/CustomModules/CheckQC/check_qc.py ${params.qc_settings_path} '.' ${identifier} ${input_files}`
	`16`	`+ python ${projectDir}/CustomModules/CheckQC/check_qc.py ${params.qc_settings_path} '.' ${identifier} ${input_files}`
`17`	`17`	`"""`
`18`	`18`	`}`
Original file line number	Diff line number	Diff line change
`@@ -17,6 +17,6 @@ process Kinship {`
`17`	`17`	`plink --file out --make-bed --noweb`
`18`	`18`	`king -b plink.bed --kinship`
`19`	`19`	`cp king.kin0 ${analysis_id}.kinship`
`20`		`- python ${baseDir}/CustomModules/Kinship/check_kinship.py ${analysis_id}.kinship ${ped_file} > ${analysis_id}.kinship_check.out`
	`20`	`+ python ${projectDir}/CustomModules/Kinship/check_kinship.py ${analysis_id}.kinship ${ped_file} > ${analysis_id}.kinship_check.out`
`21`	`21`	`"""`
`22`	`22`	`}`