Skip to content

Commit 5cf1f3d

Browse files
committed
snp_dist_search_all
1 parent efc3190 commit 5cf1f3d

File tree

2 files changed

+7
-29
lines changed

2 files changed

+7
-29
lines changed

tb-profiler

+1
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,7 @@ algorithm.add_argument('--call_whole_genome',action="store_true",help="Call vari
381381
algorithm.add_argument('--snp_dist',type=int,help="Store variant set and get all samples with snp distance less than this cutoff (experimental feature)")
382382
algorithm.add_argument('--snp_diff_db',type=str,help=argparse.SUPPRESS)
383383
algorithm.add_argument('--snp_diff_no_store',action='store_true',help=argparse.SUPPRESS)
384+
algorithm.add_argument('--snp_dist_search_all',action='store_true',help=argparse.SUPPRESS)
384385
algorithm.add_argument('--no_trim',action="store_true",help="Don't trim files using trimmomatic")
385386
algorithm.add_argument('--no_coverage_qc',action="store_true",help="Don't collect flagstats")
386387
algorithm.add_argument('--no_samclip',action="store_false",help="Don't remove clipped reads from variant calling")

tbprofiler/snp_dists.py

+6-29
Original file line numberDiff line numberDiff line change
@@ -14,32 +14,6 @@
1414
from .models import ProfileResult, LinkedSample
1515
from typing import List, Tuple
1616

17-
# def extract_variant_set_old(vcf_file, exclude_bed, min_cov=10, min_freq=0.8):
18-
# ref_diffs = set()
19-
# missing = set()
20-
# for l in cmd_out(f"bcftools view -V indels -T ^{exclude_bed} {vcf_file} | bcftools query -f '%POS[\t%GT:%AD]\n'"):
21-
# if l[0]=="#": continue
22-
# row = l.strip().split()
23-
# pos = int(row[0])
24-
# gt,ad = row[1].split(":")
25-
# if ad==".": # delly
26-
# continue
27-
# if gt==".":
28-
# missing.add(pos)
29-
# continue
30-
# ad = [int(x) for x in ad.split(",")]
31-
# if sum(ad)<=min_cov:
32-
# missing.add(pos)
33-
# continue
34-
# adf = sorted([float(x/sum(ad)) for x in ad])
35-
# if adf[-1]<min_freq:
36-
# missing.add(pos)
37-
# continue
38-
# if gt=="1/1":
39-
# ref_diffs.add(int(pos))
40-
41-
# return ref_diffs,missing
42-
4317
def extract_variant_set(vcf_file: str) -> Tuple[set,set]:
4418
ref_diffs = set()
4519
missing = set()
@@ -99,9 +73,12 @@ def store(self,result: ProfileResult, vcf_file: str) -> None:
9973
self.conn.commit()
10074
self.diffs = diffs
10175
self.missing = missing
102-
def search(self,result: ProfileResult, vcf_file: str, cutoff: int = 20) -> List[LinkedSample]:
76+
def search(self,result: ProfileResult, vcf_file: str, cutoff: int = 20, snp_dist_search_all: bool = False) -> List[LinkedSample]:
10377
logging.info("Searching for close samples in %s" % self.filename)
104-
self.c.execute("SELECT sample, diffs, missing FROM variants WHERE lineage=?",(result.sub_lineage,))
78+
if snp_dist_search_all:
79+
self.c.execute("SELECT sample, diffs, missing FROM variants")
80+
else:
81+
self.c.execute("SELECT sample, diffs, missing FROM variants WHERE lineage=?",(result.sub_lineage,))
10582
self.diffs,self.missing = extract_variant_set(vcf_file)
10683
sample_dists = []
10784
for s,d,m in tqdm(self.c.fetchall(),desc="Searching for close samples"):
@@ -141,7 +118,7 @@ def run_snp_dists(args: argparse.Namespace,result: ProfileResult) -> None:
141118
lock = f"{dbname}.lock"
142119
with filelock.SoftFileLock(lock):
143120
db = DB(dbname)
144-
linked_samples = db.search(result,input_vcf,args.snp_dist)
121+
linked_samples = db.search(result,input_vcf,args.snp_dist,args.snp_dist_search_all)
145122
if not args.snp_diff_no_store:
146123
db.store(result,input_vcf)
147124
result.linked_samples = [d for d in linked_samples if d.sample!=result.id]

0 commit comments

Comments
 (0)