-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathpipeline.sh
executable file
·86 lines (74 loc) · 4.26 KB
/
pipeline.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env bash
set -e
folderWithNormals="/mnt/share/data/coverage/ssSC_v2"
folderWithTumors="/mnt/share/data/coverage/ssSC_v2-tumor"
folderWithEnrichmentKits="/mnt/share/data/enrichment/"
bedFile="ssSC_v2_2015_01_26.bed"
columnWhereCoverageStarts=4
outputFolder="/mnt/users/ahdemig1/clinCNV_development/ClinCNV_results/"
fileWithPairs="pairs.txt"
folderWithScript=$PWD
reanalyseCohort="F"
# New parameters (inroduced on 9th of August, 2018)
sampleNamesListGermline="/mnt/users/ahdemig1/clinCNV_development/ClinCNV/somatic/samplesListNormal.txt"
sampleNamesListSomatic="/mnt/users/ahdemig1/clinCNV_development/ClinCNV/somatic/samplesListTumor.txt"
nameOfTheAnalysis="exomes." # names of output files are changed accordingly
typeOfAnalysis="somatic" # two types possible - germline and somatic
scoreGermline=40 # threshold for calling Germline CNAs
lengthGermline=1 # minimum number of regions that forms a Germlinve CNV
scoreSomatic=60 # threshold for calling Somatic CNAs
lengthSomatic=5 # minimum number of regions that forms a Somatic CNA
maximumNumberOfGermlineCNVs=100 # this is a maximum amount of CNVs >=3KBps length expected in WGS 40x sample of European population
maximumNumberOfIterations=3 # tool increases thresholds if the number of CNVs exceeds max amount and re-analyse sample N times (specified)
maximumNumberOfSomaticCNAs=100 # has to be tuned
# New parameters (introduced on 1st of October, 2018)
folderWithNormals="/mnt/share/data/coverage/offtarget_ssSC_v2"
folderWithTumors="/mnt/share/data/coverage/offtarget_ssSC_v2-tumor"
cd $folderWithScript
# prepare bed file
BedAnnotateGC -in $folderWithEnrichmentKits$bedFile -out $bedFile
BedAnnotateGenes -in $bedFile -out "annotated."$bedFile
# merge normal files
if [[ ! -f $nameOfTheAnalysis"normal.txt" ]]; then
if [[ $sampleNamesListGermline = "" ]]; then
/mnt/share/opt/R-3.4.0/bin/Rscript --vanilla mergeFilesFromFolder.R -i $folderWithNormals -o $nameOfTheAnalysis"normal.txt" -n $columnWhereCoverageStarts
else
/mnt/share/opt/R-3.4.0/bin/Rscript --vanilla mergeFilesFromFolder.R -i $sampleNamesListGermline -o $nameOfTheAnalysis"normal.txt" -n $columnWhereCoverageStarts
fi
else
echo "File "$nameOfTheAnalysis"tumor.txt exists. We do not recalculate it. WARNING: if .bed file do not match coverage file, the pipeline may crash."
fi
if [[ $typeOfAnalysis = "somatic" ]]; then
echo "Somatic framework is used. We create file with coverages for Tumors too."
if [[ ! -f $nameOfTheAnalysis"tumor.txt" ]]; then
if [[ $sampleNamesListSomatic = "" ]]; then
/mnt/share/opt/R-3.4.0/bin/Rscript --vanilla mergeFilesFromFolder.R -i $folderWithTumors -o $nameOfTheAnalysis"tumor.txt" -n $columnWhereCoverageStarts
else
/mnt/share/opt/R-3.4.0/bin/Rscript --vanilla mergeFilesFromFolder.R -i $sampleNamesListSomatic -o $nameOfTheAnalysis"tumor.txt" -n $columnWhereCoverageStarts
fi
else
echo "File "$nameOfTheAnalysis"tumor.txt exists. We do not recalculate it. WARNING: if .bed file do not match coverage file, the pipeline may crash."
fi
fi
# check if parameters are correctly specified
if [[ $typeOfAnalysis = "germline" ]]; then
echo "Germline framework is used."
fi
if [[ $typeOfAnalysis != "somatic" && $typeOfAnalysis != "germline" ]]; then
echo "Framework is not specified as somatic or germline. Quit."
fi
echo "Calling"
# run calling
if [[ $typeOfAnalysis = "somatic" ]]
then
/mnt/share/opt/R-3.4.0/bin/Rscript --vanilla firstStep.R --normal $nameOfTheAnalysis"normal.txt" --tumor $nameOfTheAnalysis"tumor.txt" \
--out $outputFolder --pair $fileWithPairs --bed "annotated."$bedFile --colNum $columnWhereCoverageStarts --folderWithScript $folderWithScript --reanalyseCohort TRUE \
--scoreG $scoreGermline --lengthG $lengthGermline \
--scoreS $scoreSomatic --lengthS $lengthSomatic \
--maxNumGermCNVs $maximumNumberOfGermlineCNVs --maxNumIter $maximumNumberOfIterations --maxNumSomCNAs $maximumNumberOfSomaticCNAs
else
/mnt/share/opt/R-3.4.0/bin/Rscript --vanilla firstStep.R --normal $nameOfTheAnalysis"normal.txt" --out $outputFolder --bed "annotated."$bedFile \
--colNum $columnWhereCoverageStarts --folderWithScript $folderWithScript --reanalyseCohort TRUE \
--scoreG $scoreGermline --lengthG $lengthGermline \
--maxNumGermCNVs $maximumNumberOfGermlineCNVs --maxNumIter $maximumNumberOfIterations
fi