forked from ajaybabu27/covid_assembly_pipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile
97 lines (90 loc) · 3.99 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
rule all:
input:
pdb_upload_check=os.environ.get("sample_folder")+'/pdb_upload_complete.txt'
rule assemble:
message: "Assembling SARS-CoV-2 genome"
input:
sample_folder=os.environ.get("sample_folder")
params:
sample_name=os.environ.get("sample_name"),
repo_dir=os.environ.get("repo_dir")
output:
consensus_fasta_file=os.environ.get("sample_folder")+'/pipeline/'+os.environ.get("sample_name")+'.fasta'
shell:
"""
if [ -d {input.sample_folder}/bams ]; then
python scripts/run_pipeline.py -rd {params.repo_dir} -b {input.sample_folder}
else
python scripts/run_pipeline.py -rd {params.repo_dir} -i {input.sample_folder} -s {params.sample_name} -r1 _R1_001.fastq.gz -r2 _R2_001.fastq.gz
fi
"""
rule variant_analysis:
message: "Perform intra host variant analysis on SARS-CoV-2 library"
input:
sample_folder=os.environ.get("sample_folder"),
consensus_fasta_file=os.environ.get("sample_folder")+'/pipeline/'+os.environ.get("sample_name")+'.fasta'
params:
repo_dir=os.environ.get("repo_dir")
output:
pileup_file=os.environ.get("sample_folder")+'/variants/'+'pileup'
shell:
"""
if [ -d {input.sample_folder}/bams ]; then
python scripts/variant_analysis.py -rd {params.repo_dir} -b {input.sample_folder}
else
python scripts/variant_analysis.py -rd {params.repo_dir} -i {input.sample_folder}
fi
"""
rule QC_analysis:
message: "Perform QC analysis of the SARS-CoV-2 library"
input:
sample_folder=os.environ.get("sample_folder"),
pileup_file=os.environ.get("sample_folder")+'/variants/'+'pileup'
params:
sample_name=os.environ.get("sample_name"),
repo_dir=os.environ.get("repo_dir"),
run_ID=os.environ.get("run_ID")
output:
qc_file=os.environ.get("sample_folder")+'/QC/'+'quality_control.pdf'
shell:
"""
if [ -d {input.sample_folder}/bams ]; then
python scripts/run_QC.py -rd {params.repo_dir} -b {input.sample_folder} -kdb /sc/arion/projects/PVI/db/minikraken_8GB_20200312
else
python scripts/run_QC.py -rd {params.repo_dir} -i {input.sample_folder} -kdb /sc/arion/projects/PVI/db/minikraken_8GB_20200312
fi
module load R
Rscript scripts/plot-coverage-report.R -i {input.sample_folder}/variants/variable_bases.tsv -o {input.sample_folder}/variants/{params.sample_name}"_"var
pdfunite {input.sample_folder}/QC/quality_control.pdf {input.sample_folder}/variants/{params.sample_name}"_"var.pdf {input.sample_folder}/QC/quality_control2.pdf
mv {input.sample_folder}/QC/quality_control2.pdf {input.sample_folder}/QC/quality_control.pdf
rm {input.sample_folder}/variants/{params.sample_name}"_"var.pdf
"""
rule vadr_analysis:
message: "Perform VADR analysis on the SARS-CoV-2 genome"
input:
consensus_fasta_file=os.environ.get("sample_folder")+'/pipeline/'+os.environ.get("sample_name")+'.fasta'
params:
sample_folder=os.environ.get("sample_folder"),
sample_name=os.environ.get("sample_name")
output:
vadr_error_file=os.environ.get("sample_folder")+'/pipeline/VADR/VADR.vadr.fail.tbl'
shell:
"python scripts/vadr_run.py {input.consensus_fasta_file} {params.sample_folder}/pipeline/VADR {params.sample_folder}/pipeline/VADR/VADR.gff /sc/arion/projects/PVI/db/vadr-models-corona-1.1-1"
#python scripts/vadr_run.py os.environ.get("sample_folder")+"/pipeline/"+os.environ.get("sample_name")+".fasta" os.environ.get("sample_folder")+"/pipeline/VADR" os.environ.get("sample_folder")+"/pipeline/VADR/VADR.gff" /sc/arion/projects/PVI/db/vadr-models-corona-1.1-1
rule push_data_pathogendb:
message: "Push genome assembly data to pathogenDB"
input:
qc_file=os.environ.get("sample_folder")+'/QC/'+'quality_control.pdf'
params:
sample_folder=os.environ.get("sample_folder"),
sample_name=os.environ.get("sample_name"),
run_ID=os.environ.get("run_ID").split("_")[0]
output:
pdb_upload_check=os.environ.get("sample_folder")+'/pdb_upload_complete.txt'
shell:
"""
module purge
module load python/2.7.16
python scripts/push_pathogendb.py {params.sample_name} {params.run_ID}
touch {params.sample_folder}/pdb_upload_complete.txt
"""