-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcfdna_caller.sh
executable file
·108 lines (93 loc) · 3.58 KB
/
cfdna_caller.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/bin/bash
# cfDNA variant caller
set -e -o pipefail
# use conda environment
source /mnt/share/opt/miniconda3/etc/profile.d/conda.sh
conda activate cfdna
# script directory
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
# arguments
ARGPARSE_DESCRIPTION="cfDNA variant caller"
ARGPARSE=$DIR/argparse.bash
source $ARGPARSE || exit 1
argparse "$@" <<EOF || exit 1
parser.add_argument('-bam','--bam', required=True, help='Tumor bam file')
parser.add_argument('-nbam','--nbam', default='', required=False, help='Normal bam file')
parser.add_argument('-b','--bed', default='', help='Bed file of the targeted regions. O-based')
parser.add_argument('-r','--ref', required=True, help='Reference genome - fasta')
parser.add_argument('-o', '--out_file', default='$PWD/Sample.vcf', help='Out vcf file')
parser.add_argument('-p', '--param', default='', help='Beta-binomial parameters table')
parser.add_argument('-mq','--mq', default=30, help='Minimum mapping quality')
parser.add_argument('-bq','--bq', default=20, help='Minimum base quality')
parser.add_argument('-d','--dist', default=5, help='Minimum distance allowed between variants')
parser.add_argument('-ac','--ac', default=3, help='Minimum number of reads supporting a variant')
parser.add_argument('-ns','--num_sites', default=-1, help='Number of sites to be analysed')
parser.add_argument('-str','--strand', default=0, choices=['0','1'], help='Strand filter activation. 0 for desactivating the filter. Default [1]')
parser.add_argument('-t', '--temp_dir', default='$PWD', help='Temporary directory')
EOF
echo Parameters:
echo Tumor bam: "$BAM"
echo Normal bam: "$NBAM"
echo Out vcf file: "$OUT_FILE"
echo Bed file: "$BED"
echo Reference file: "$REF"
echo Beta-binomial parameters table: "$PARAM"
echo Minimum mapping quality: "$MQ"
echo Minimum base quality: "$BQ"
echo Minimum reads supporting variant: "$AC"
echo Minimum distance allowed between variants: "$DIST"
echo Strand filter: "$STRAND"
echo Number of sites analysed: "$NUM_SITES"
tumor_id=$(basename $BAM .bam)
out_dir=$(dirname $OUT_FILE)
mkdir -p $out_dir
temp=$(mktemp -p $TEMP_DIR -d cfdna_tmp.XXXX)
python3 $DIR/split_bam.py --infile $BAM --outprefix "${temp}/dedup" --freq "${temp}/dp_freq.tsv"
for f in ${temp}/dedup*.bam; do
samtools index $f
f_tsv="${f%.bam}.tsv"
# TODO handle -l BED as optional
samtools mpileup \
-d 0 -f $REF -l $BED -Q 1 -x $f 2> /dev/null | \
python3 $DIR/pileup2tsv.py --pileup - --variantf $f_tsv --minBQ $BQ --mindepth 10
rm $f ${f}.bai
done
if [[ -z "$PARAM" ]]; then
# TODO check for sufficient bases in target file
f_params="${temp}/parameters.txt"
Rscript $DIR/Parameters_step1.R \
-t1 "${temp}/dedup_DP1.tsv" \
-t2 "${temp}/dedup_DP2.tsv" \
-t3 "${temp}/dedup_DP3.tsv" \
-t4 "${temp}/dedup_DP4.tsv" \
-o $f_params
else
f_params=$PARAM
fi
Rscript $DIR/Combine_functions_step1.R \
-t1 "${temp}/dedup_DP1.tsv" \
-t2 "${temp}/dedup_DP2.tsv" \
-t3 "${temp}/dedup_DP3.tsv" \
-t4 "${temp}/dedup_DP4.tsv" \
-params $f_params \
-num $NUM_SITES \
-o "${temp}/stats.tsv"
python3 $DIR/TSV2VCF.py \
-i "${temp}/stats.tsv" \
-tID $tumor_id \
-ref $REF \
-o $OUT_FILE \
-cov 10 \
-ac $AC \
--strand $STRAND \
-variant_dist $DIST \
-tmpdir $temp
Rscript $DIR/Barcode_group_distribution.R \
-bd "${temp}/dp_freq.tsv" \
-out $temp
Rscript $DIR/Plot_error_rate_22_11_2018.R \
-bc1 "${temp}/dedup_DP1.tsv" \
-bc2 "${temp}/dedup_DP2.tsv" \
-bc3 "${temp}/dedup_DP3.tsv" \
-bc4 "${temp}/dedup_DP4.tsv" \
-out $temp