Merge pull request #119 from cov-lineages/p1_lineages

P lineages
cov-lineages · Jan 12, 2021 · 2b5b727 · 2b5b727
2 parents d88abf3 + f9f60d1
commit 2b5b727
Show file tree

Hide file tree

Showing 5 changed files with 99 additions and 2 deletions.
diff --git a/pangolin/__init__.py b/pangolin/__init__.py
@@ -1,2 +1,2 @@
 _program = "pangolin"
-__version__ = "2.1.6"
+__version__ = "2.1.7"
diff --git a/pangolin/command.py b/pangolin/command.py
@@ -258,6 +258,12 @@ def main(sysargs = sys.argv[1:]):
 
     variants_file = pkg_resources.resource_filename('pangolin', 'data/config_b.1.351.csv')
     config["b1351_variants"] = variants_file
+
+    variants_file = pkg_resources.resource_filename('pangolin', 'data/config_p.1.csv')
+    config["p1_variants"] = variants_file
+
+    variants_file = pkg_resources.resource_filename('pangolin', 'data/config_p.2.csv')
+    config["p2_variants"] = variants_file
 
     if args.write_tree:
         config["write_tree"]="True"

diff --git a/pangolin/data/config_p.1.csv b/pangolin/data/config_p.1.csv
@@ -0,0 +1,16 @@
+aa:orf1ab:S1188L
+aa:orf1ab:K1795Q
+del:11288:9
+aa:S:L18F
+aa:S:T20N
+aa:S:P26S
+aa:S:D138Y
+aa:S:R190S
+aa:S:K417T
+aa:S:E484K
+aa:S:N501Y
+aa:S:H655Y
+aa:S:T1027I
+aa:orf3a:G174C
+aa:orf8:E92K
+aa:N:P80R
diff --git a/pangolin/data/config_p.2.csv b/pangolin/data/config_p.2.csv
@@ -0,0 +1,5 @@
+aa:orf1ab:L3468V
+aa:orf1ab:L3930F
+aa:S:E484K
+aa:S:V1176F
+aa:N:A119S
diff --git a/pangolin/scripts/pangolearn.smk b/pangolin/scripts/pangolearn.smk
@@ -185,12 +185,48 @@ rule type_variants_b1351:
         --append-genotypes
         """
 
+rule type_variants_p2:
+    input:
+        fasta = rules.datafunk_trim_and_pad.output.fasta,
+        variants = config["p2_variants"],
+        reference = config["reference_fasta"]
+    output:
+        variants = os.path.join(config["tempdir"],"variants_p2.csv")
+    shell:
+        """
+        type_variants.py \
+        --fasta-in {input.fasta:q} \
+        --variants-config {input.variants:q} \
+        --reference {input.reference:q} \
+        --variants-out {output.variants:q} \
+        --append-genotypes
+        """
+
+
+rule type_variants_p1:
+    input:
+        fasta = rules.datafunk_trim_and_pad.output.fasta,
+        variants = config["p1_variants"],
+        reference = config["reference_fasta"]
+    output:
+        variants = os.path.join(config["tempdir"],"variants_p1.csv")
+    shell:
+        """
+        type_variants.py \
+        --fasta-in {input.fasta:q} \
+        --variants-config {input.variants:q} \
+        --reference {input.reference:q} \
+        --variants-out {output.variants:q} \
+        --append-genotypes
+        """
 
 rule overwrite:
     input:
         csv = os.path.join(config["tempdir"],"pangolearn_assignments.csv"),
         b117_variants = rules.type_variants_b117.output.variants,
-        b1351_variants = rules.type_variants_b1351.output.variants
+        b1351_variants = rules.type_variants_b1351.output.variants,
+        p2_variants = rules.type_variants_p2.output.variants,
+        p1_variants = rules.type_variants_p1.output.variants
     output:
         csv = config["outfile"]
     run:
@@ -206,6 +242,18 @@ rule overwrite:
             for row in reader:
                 if int(row["alt_count"]) > 4:
                     b1351[row["query"]] = row["alt_count"]
+        p1 = {}
+        with open(input.p1_variants, "r") as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                if int(row["alt_count"]) > 10:
+                    p1[row["query"]] = row["alt_count"]
+        p2 = {}
+        with open(input.p2_variants, "r") as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                if int(row["alt_count"]) > 4:
+                    p2[row["query"]] = row["alt_count"]
 
         with open(output.csv, "w") as fw:
             # "taxon,lineage,probability,pangoLEARN_version,status,note" 
@@ -253,6 +301,28 @@ rule overwrite:
                         new_row["probability"] = "1.0"
                         new_row["lineage"] = "B.1.351"
 
+                        writer.writerow(new_row)
+                    elif row["taxon"] in p2:
+                        new_row = row
+
+                        snps = p2[row["taxon"]]
+                        note = f"{snps}/5 P.2 (B.1.1.28.2) SNPs"
+
+                        new_row["note"] = note
+                        new_row["probability"] = "1.0"
+                        new_row["lineage"] = "P.2"
+
+                        writer.writerow(new_row)
+                    elif row["taxon"] in p1:
+                        new_row = row
+
+                        snps = p1[row["taxon"]]
+                        note = f"{snps}/17 P.1 (B.1.1.28.1) SNPs"
+
+                        new_row["note"] = note
+                        new_row["probability"] = "1.0"
+                        new_row["lineage"] = "P.1"
+
                         writer.writerow(new_row)
                     else:
                         writer.writerow(row)