|
14 | 14 | from .models import ProfileResult, LinkedSample
|
15 | 15 | from typing import List, Tuple
|
16 | 16 |
|
17 |
| -# def extract_variant_set_old(vcf_file, exclude_bed, min_cov=10, min_freq=0.8): |
18 |
| -# ref_diffs = set() |
19 |
| -# missing = set() |
20 |
| -# for l in cmd_out(f"bcftools view -V indels -T ^{exclude_bed} {vcf_file} | bcftools query -f '%POS[\t%GT:%AD]\n'"): |
21 |
| -# if l[0]=="#": continue |
22 |
| -# row = l.strip().split() |
23 |
| -# pos = int(row[0]) |
24 |
| -# gt,ad = row[1].split(":") |
25 |
| -# if ad==".": # delly |
26 |
| -# continue |
27 |
| -# if gt==".": |
28 |
| -# missing.add(pos) |
29 |
| -# continue |
30 |
| -# ad = [int(x) for x in ad.split(",")] |
31 |
| -# if sum(ad)<=min_cov: |
32 |
| -# missing.add(pos) |
33 |
| -# continue |
34 |
| -# adf = sorted([float(x/sum(ad)) for x in ad]) |
35 |
| -# if adf[-1]<min_freq: |
36 |
| -# missing.add(pos) |
37 |
| -# continue |
38 |
| -# if gt=="1/1": |
39 |
| -# ref_diffs.add(int(pos)) |
40 |
| - |
41 |
| -# return ref_diffs,missing |
42 |
| - |
43 | 17 | def extract_variant_set(vcf_file: str) -> Tuple[set,set]:
|
44 | 18 | ref_diffs = set()
|
45 | 19 | missing = set()
|
@@ -99,9 +73,12 @@ def store(self,result: ProfileResult, vcf_file: str) -> None:
|
99 | 73 | self.conn.commit()
|
100 | 74 | self.diffs = diffs
|
101 | 75 | self.missing = missing
|
102 |
| - def search(self,result: ProfileResult, vcf_file: str, cutoff: int = 20) -> List[LinkedSample]: |
| 76 | + def search(self,result: ProfileResult, vcf_file: str, cutoff: int = 20, snp_dist_search_all: bool = False) -> List[LinkedSample]: |
103 | 77 | logging.info("Searching for close samples in %s" % self.filename)
|
104 |
| - self.c.execute("SELECT sample, diffs, missing FROM variants WHERE lineage=?",(result.sub_lineage,)) |
| 78 | + if snp_dist_search_all: |
| 79 | + self.c.execute("SELECT sample, diffs, missing FROM variants") |
| 80 | + else: |
| 81 | + self.c.execute("SELECT sample, diffs, missing FROM variants WHERE lineage=?",(result.sub_lineage,)) |
105 | 82 | self.diffs,self.missing = extract_variant_set(vcf_file)
|
106 | 83 | sample_dists = []
|
107 | 84 | for s,d,m in tqdm(self.c.fetchall(),desc="Searching for close samples"):
|
@@ -141,7 +118,7 @@ def run_snp_dists(args: argparse.Namespace,result: ProfileResult) -> None:
|
141 | 118 | lock = f"{dbname}.lock"
|
142 | 119 | with filelock.SoftFileLock(lock):
|
143 | 120 | db = DB(dbname)
|
144 |
| - linked_samples = db.search(result,input_vcf,args.snp_dist) |
| 121 | + linked_samples = db.search(result,input_vcf,args.snp_dist,args.snp_dist_search_all) |
145 | 122 | if not args.snp_diff_no_store:
|
146 | 123 | db.store(result,input_vcf)
|
147 | 124 | result.linked_samples = [d for d in linked_samples if d.sample!=result.id]
|
|
0 commit comments