Skip to content

Commit 92371f4

Browse files
committed
allow for new yaml rules
1 parent fc634da commit 92371f4

File tree

4 files changed

+127
-51
lines changed

4 files changed

+127
-51
lines changed

db/tbdb.rules.yml

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
mmpR5_rule:
2+
type: epistasisRule
3+
source:
4+
gene_name: mmpL5
5+
type: LoF
6+
target:
7+
drug: bedaquiline
8+
gene_name: mmpR5
9+
source_inactivation_freq_cutoff: 100
10+
target_escape_freq_cutoff: 50
11+
12+
eis_rule:
13+
type: epistasisRule
14+
source:
15+
gene_name: eis
16+
type: LoF
17+
target:
18+
drug:
19+
- kanamycin
20+
- amikacin
21+
gene_name: eis
22+
source_inactivation_freq_cutoff: 100
23+
target_escape_freq_cutoff: 50
24+
25+

db/tbdb.variables.json

+1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
"spoligotype_spacers": "tbdb.spoligotype_spacers.txt",
4444
"spoligotype_annotations": "tbdb.spoligotype_list.csv",
4545
"bedmask": "tbdb.mask.bed",
46+
"rules": "tbdb.rules.yml",
4647
"barcode": "tbdb.barcode.bed"
4748
}
4849
}

tb-profiler

+8-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ from tqdm import tqdm
1818
import logging
1919
from rich.logging import RichHandler
2020
from copy import deepcopy
21-
21+
import yaml
2222
import importlib
2323
import pkgutil
2424

@@ -143,7 +143,11 @@ def main_profile(args):
143143
for plugin in tbp.ProfilePlugin.__subclasses__():
144144
plugin().process_variants(args,variants_profile)
145145

146-
146+
147+
rules_file = args.db_dir+"/"+args.conf['files']['rules']
148+
for rule in yaml.load(open(rules_file),Loader=yaml.SafeLoader).values():
149+
tbp.apply_epistasis_rule(args,variants_profile,rule)
150+
147151
tbp.clean_up_duplicate_annotations(variants_profile)
148152

149153
# Convert variant objects to DrVariant if they cause resistance
@@ -273,6 +277,7 @@ def main_create_db(args):
273277
"spoligotype_spacers": args.spoligotypes,
274278
"spoligotype_annotations": args.spoligotype_annotations,
275279
"bedmask": {"name":args.bedmask,"convert":1},
280+
"rules": args.rules,
276281
}
277282
if args.barcode:
278283
extra_files["barcode"] = args.barcode
@@ -584,6 +589,7 @@ parser_sub.add_argument('--spoligotypes',default="spoligotype_spacers.txt",type=
584589
parser_sub.add_argument('--spoligotype_annotations','--spoligotype-annotations',default="spoligotype_list.csv")
585590
parser_sub.add_argument('--barcode',default="barcode.bed",type=str,help='A bed file containing lineage barcode SNPs')
586591
parser_sub.add_argument('--bedmask',default="mask.bed",type=str,help='A bed file containing a list of low-complexity regions')
592+
parser_sub.add_argument('--rules',default="rules.yml",type=str,help='A yaml file containing rules for the resistance library')
587593
parser_sub.add_argument('--amplicon_primers','--amplicon-primers',type=str,help='A file containing a list of amplicon primers')
588594
parser_sub.add_argument('--match_ref','--match-ref',type=str,help='Match the chromosome name to the given fasta file')
589595
parser_sub.add_argument('--custom',action="store_true",help='Tells the script this is a custom database, this is used to alter the generation of the version definition')

tbprofiler/rules.py

+93-49
Original file line numberDiff line numberDiff line change
@@ -40,80 +40,124 @@ def search_variant(variants: List[Variant], **kwargs) -> List[Variant]:
4040

4141
return list(found_variants)
4242

43-
def inactivate_drug_resistance(variant: Variant):
43+
def inactivate_drug_resistance(variants: List[Variant]):
4444
"""
4545
Inactivate a drug resistance variant
4646
"""
47-
for ann in variant.consequences[0].annotation:
48-
if ann['type']=='drug_resistance':
49-
ann['type'] = 'inactivated_drug_resistance'
50-
for csq in variant.consequences:
51-
for ann in csq.annotation:
52-
if ann['type']=='drug_resistance':
53-
ann['type'] = 'inactivated_drug_resistance'
47+
for var in variants:
48+
for csq in var.consequences:
49+
for ann in csq.annotation:
50+
if ann['type']=='drug_resistance':
51+
ann['type'] = 'inactivated_drug_resistance'
5452

53+
class Rule(ProfilePlugin):
54+
pass
5555

56-
57-
class MmpR5WHORule(ProfilePlugin):
56+
class epistasisRule(Rule):
5857
"""
59-
Epistasis rule for mmpL5/mmpR5
58+
Epistasis rule
6059
"""
6160

62-
def process_variants(self,args,variants: List[Variant]):
61+
def process_variants(
62+
self,
63+
source:dict,
64+
target:dict,
65+
args: argparse.Namespace,
66+
variants: List[Variant],
67+
source_inactivation_freq_cutoff:int=100,
68+
target_escape_freq_cutoff:int=10,
69+
**kwargs
70+
):
6371
"""Generic variant processing method"""
6472

65-
v1 = search_variant(variants,drug='bedaquiline',gene_name='mmpR5')
66-
v2 = search_variant(variants,gene_name='mmpL5',type='LoF')
73+
source_vars = search_variant(variants,**source)
74+
target_vars = search_variant(variants,**target)
6775

68-
v1_total_freq = math.ceil(sum([x.freq*100 for x in v1]))
69-
v1_total_freq = min(v1_total_freq,100)
76+
source_vars_total_freq = math.ceil(sum([x.freq*100 for x in source_vars]))
77+
source_vars_total_freq = min(source_vars_total_freq,100)
7078

71-
v2_total_freq = math.ceil(sum([x.freq*100 for x in v2]))
72-
v2_total_freq = min(v2_total_freq,100)
79+
target_vars_total_freq = math.ceil(sum([x.freq*100 for x in target_vars]))
80+
target_vars_total_freq = min(target_vars_total_freq,100)
7381

74-
if v1 and v2:
75-
v1_changes = ", ".join([x.change for x in v1])
76-
v2_changes = ", ".join([x.change for x in v2])
77-
note = f"Loss of function mutation(s) detected in mmpL5 ({v2_changes}) which may abrogate the effect of the genetically linked mmpR5 mutation(s) ({v1_changes})."
82+
print(source_vars_total_freq,target_vars_total_freq)
83+
if source_vars and target_vars:
84+
source_vars_changes = ", ".join([x.change for x in source_vars])
85+
target_vars_changes = ", ".join([x.change for x in target_vars])
86+
note = f"Mutation(s) detected in {source_vars[0].gene_name} ({source_vars_changes}) which may abrogate the effect of the genetically linked {target_vars[0].gene_name} mutation(s) ({target_vars_changes})."
7887

79-
if v2_total_freq==100:
80-
inactivate_drug_resistance(v1)
81-
freq_diff = v1_total_freq - v2_total_freq
82-
if freq_diff > 10:
83-
note += f" However, the combined frequency of the mmpR5 mutation(s) is {freq_diff}% higher than the mmpL5 mutation(s), indicating a potential resistant subpopulation. Please consult the raw data for more information."
88+
if source_vars_total_freq>=source_inactivation_freq_cutoff:
89+
inactivate_drug_resistance(target_vars)
90+
freq_diff = target_vars_total_freq - source_vars_total_freq
91+
if freq_diff > target_escape_freq_cutoff:
92+
note += f" However, the combined frequency of the {target_vars[0].gene_name} mutation(s) is {freq_diff}% higher than the {source_vars[0].gene_name} mutation(s), indicating a potential resistant subpopulation."
8493

94+
note += " Please consult the raw data for more information."
8595
args.notes.append(note)
8696

97+
def apply_epistasis_rule(args: argparse.Namespace, variants: List[Variant], parameters: dict):
98+
logging.debug(f"Applying epistasis rule with parameters: {parameters}")
99+
epistasisRule().process_variants(args=args,variants=variants,**parameters)
87100

88-
class eisWHORule(ProfilePlugin):
89-
"""
90-
Epistasis rule for mmpL5/mmpR5
91-
"""
101+
# class MmpR5WHORule(ProfilePlugin):
102+
# """
103+
# Epistasis rule for mmpL5/mmpR5
104+
# """
92105

93-
def process_variants(self,args,variants: List[Variant]):
94-
"""Generic variant processing method"""
106+
# def process_variants(self,args,variants: List[Variant]):
107+
# """Generic variant processing method"""
108+
109+
# v1 = search_variant(variants,drug='bedaquiline',gene_name='mmpR5')
110+
# v2 = search_variant(variants,gene_name='mmpL5',type='LoF')
111+
112+
# v1_total_freq = math.ceil(sum([x.freq*100 for x in v1]))
113+
# v1_total_freq = min(v1_total_freq,100)
95114

96-
v1 = search_variant(variants,drug=['kanamycin','amikacin'],gene_name='eis')
97-
v2 = search_variant(variants,gene_name='eis',type='LoF')
115+
# v2_total_freq = math.ceil(sum([x.freq*100 for x in v2]))
116+
# v2_total_freq = min(v2_total_freq,100)
98117

99-
v1_total_freq = math.ceil(sum([x.freq*100 for x in v1]))
100-
v1_total_freq = min(v1_total_freq,100)
118+
# if v1 and v2:
119+
# v1_changes = ", ".join([x.change for x in v1])
120+
# v2_changes = ", ".join([x.change for x in v2])
121+
# note = f"Loss of function mutation(s) detected in mmpL5 ({v2_changes}) which may abrogate the effect of the genetically linked mmpR5 mutation(s) ({v1_changes})."
122+
123+
# if v2_total_freq==100:
124+
# inactivate_drug_resistance(v1)
125+
# freq_diff = v1_total_freq - v2_total_freq
126+
# if freq_diff > 10:
127+
# note += f" However, the combined frequency of the mmpR5 mutation(s) is {freq_diff}% higher than the mmpL5 mutation(s), indicating a potential resistant subpopulation. Please consult the raw data for more information."
128+
129+
# args.notes.append(note)
130+
131+
132+
# class eisWHORule(ProfilePlugin):
133+
# """
134+
# Epistasis rule for mmpL5/mmpR5
135+
# """
101136

102-
v2_total_freq = math.ceil(sum([x.freq*100 for x in v2]))
103-
v2_total_freq = min(v2_total_freq,100)
137+
# def process_variants(self,args,variants: List[Variant]):
138+
# """Generic variant processing method"""
104139

105-
if v1 and v2:
106-
v1_changes = ", ".join([x.change for x in v1])
107-
v2_changes = ", ".join([x.change for x in v2])
108-
note = f"Loss of function mutation(s) detected in eis ({v2_changes}) which may abrogate the effect of the genetically linked eis promoter mutation(s) ({v1_changes})."
140+
# v1 = search_variant(variants,drug=['kanamycin','amikacin'],gene_name='eis')
141+
# v2 = search_variant(variants,gene_name='eis',type='LoF')
109142

110-
if v2_total_freq==100:
111-
inactivate_drug_resistance(v1)
112-
freq_diff = v1_total_freq - v2_total_freq
113-
if freq_diff > 10:
114-
note += f" However, the combined frequency of the eis promoter mutation(s) is {freq_diff}% higher than the eis coding mutation(s), indicating a potential resistant subpopulation. Please consult the raw data for more information."
143+
# v1_total_freq = math.ceil(sum([x.freq*100 for x in v1]))
144+
# v1_total_freq = min(v1_total_freq,100)
145+
146+
# v2_total_freq = math.ceil(sum([x.freq*100 for x in v2]))
147+
# v2_total_freq = min(v2_total_freq,100)
148+
149+
# if v1 and v2:
150+
# v1_changes = ", ".join([x.change for x in v1])
151+
# v2_changes = ", ".join([x.change for x in v2])
152+
# note = f"Loss of function mutation(s) detected in eis ({v2_changes}) which may abrogate the effect of the genetically linked eis promoter mutation(s) ({v1_changes})."
153+
154+
# if v2_total_freq==100:
155+
# inactivate_drug_resistance(v1)
156+
# freq_diff = v1_total_freq - v2_total_freq
157+
# if freq_diff > 10:
158+
# note += f" However, the combined frequency of the eis promoter mutation(s) is {freq_diff}% higher than the eis coding mutation(s), indicating a potential resistant subpopulation. Please consult the raw data for more information."
115159

116-
args.notes.append(note)
160+
# args.notes.append(note)
117161

118162

119163
class SetConfidence(ProfilePlugin):

0 commit comments

Comments
 (0)