Skip to content

Commit

Permalink
Merge pull request #119 from cov-lineages/p1_lineages
Browse files Browse the repository at this point in the history
P lineages
  • Loading branch information
aineniamh authored Jan 12, 2021
2 parents d88abf3 + f9f60d1 commit 2b5b727
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 2 deletions.
2 changes: 1 addition & 1 deletion pangolin/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
_program = "pangolin"
__version__ = "2.1.6"
__version__ = "2.1.7"
6 changes: 6 additions & 0 deletions pangolin/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,12 @@ def main(sysargs = sys.argv[1:]):

variants_file = pkg_resources.resource_filename('pangolin', 'data/config_b.1.351.csv')
config["b1351_variants"] = variants_file

variants_file = pkg_resources.resource_filename('pangolin', 'data/config_p.1.csv')
config["p1_variants"] = variants_file

variants_file = pkg_resources.resource_filename('pangolin', 'data/config_p.2.csv')
config["p2_variants"] = variants_file

if args.write_tree:
config["write_tree"]="True"
Expand Down
16 changes: 16 additions & 0 deletions pangolin/data/config_p.1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
aa:orf1ab:S1188L
aa:orf1ab:K1795Q
del:11288:9
aa:S:L18F
aa:S:T20N
aa:S:P26S
aa:S:D138Y
aa:S:R190S
aa:S:K417T
aa:S:E484K
aa:S:N501Y
aa:S:H655Y
aa:S:T1027I
aa:orf3a:G174C
aa:orf8:E92K
aa:N:P80R
5 changes: 5 additions & 0 deletions pangolin/data/config_p.2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
aa:orf1ab:L3468V
aa:orf1ab:L3930F
aa:S:E484K
aa:S:V1176F
aa:N:A119S
72 changes: 71 additions & 1 deletion pangolin/scripts/pangolearn.smk
Original file line number Diff line number Diff line change
Expand Up @@ -185,12 +185,48 @@ rule type_variants_b1351:
--append-genotypes
"""

rule type_variants_p2:
input:
fasta = rules.datafunk_trim_and_pad.output.fasta,
variants = config["p2_variants"],
reference = config["reference_fasta"]
output:
variants = os.path.join(config["tempdir"],"variants_p2.csv")
shell:
"""
type_variants.py \
--fasta-in {input.fasta:q} \
--variants-config {input.variants:q} \
--reference {input.reference:q} \
--variants-out {output.variants:q} \
--append-genotypes
"""


rule type_variants_p1:
input:
fasta = rules.datafunk_trim_and_pad.output.fasta,
variants = config["p1_variants"],
reference = config["reference_fasta"]
output:
variants = os.path.join(config["tempdir"],"variants_p1.csv")
shell:
"""
type_variants.py \
--fasta-in {input.fasta:q} \
--variants-config {input.variants:q} \
--reference {input.reference:q} \
--variants-out {output.variants:q} \
--append-genotypes
"""

rule overwrite:
input:
csv = os.path.join(config["tempdir"],"pangolearn_assignments.csv"),
b117_variants = rules.type_variants_b117.output.variants,
b1351_variants = rules.type_variants_b1351.output.variants
b1351_variants = rules.type_variants_b1351.output.variants,
p2_variants = rules.type_variants_p2.output.variants,
p1_variants = rules.type_variants_p1.output.variants
output:
csv = config["outfile"]
run:
Expand All @@ -206,6 +242,18 @@ rule overwrite:
for row in reader:
if int(row["alt_count"]) > 4:
b1351[row["query"]] = row["alt_count"]
p1 = {}
with open(input.p1_variants, "r") as f:
reader = csv.DictReader(f)
for row in reader:
if int(row["alt_count"]) > 10:
p1[row["query"]] = row["alt_count"]
p2 = {}
with open(input.p2_variants, "r") as f:
reader = csv.DictReader(f)
for row in reader:
if int(row["alt_count"]) > 4:
p2[row["query"]] = row["alt_count"]

with open(output.csv, "w") as fw:
# "taxon,lineage,probability,pangoLEARN_version,status,note"
Expand Down Expand Up @@ -253,6 +301,28 @@ rule overwrite:
new_row["probability"] = "1.0"
new_row["lineage"] = "B.1.351"

writer.writerow(new_row)
elif row["taxon"] in p2:
new_row = row

snps = p2[row["taxon"]]
note = f"{snps}/5 P.2 (B.1.1.28.2) SNPs"

new_row["note"] = note
new_row["probability"] = "1.0"
new_row["lineage"] = "P.2"

writer.writerow(new_row)
elif row["taxon"] in p1:
new_row = row

snps = p1[row["taxon"]]
note = f"{snps}/17 P.1 (B.1.1.28.1) SNPs"

new_row["note"] = note
new_row["probability"] = "1.0"
new_row["lineage"] = "P.1"

writer.writerow(new_row)
else:
writer.writerow(row)
Expand Down

0 comments on commit 2b5b727

Please sign in to comment.