Skip to content

Commit 1a7e39a

Browse files
authored
Merge pull request #421 from jodyphelan/dev
Dev
2 parents b187d1c + fc634da commit 1a7e39a

11 files changed

+58
-77
lines changed

db/tbdb.bed

+23-23
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ Chromosome 1 1524 Rv0001 dnaA isoniazid
22
Chromosome 4933 7267 Rv0005 gyrB levofloxacin,moxifloxacin
33
Chromosome 7068 9818 Rv0006 gyrA levofloxacin,moxifloxacin
44
Chromosome 13133 13911 Rv0010c Rv0010c isoniazid
5-
Chromosome 490545 491793 Rv0407 fgd1 pretomanid,delamanid,clofazimine
6-
Chromosome 574479 576790 Rv0486 mshA isoniazid,ethionamide
7-
Chromosome 619500 620865 Rv0529 ccsA kanamycin,capreomycin,amikacin
5+
Chromosome 490545 491793 Rv0407 fgd1 clofazimine,delamanid,pretomanid
6+
Chromosome 574479 576790 Rv0486 mshA ethionamide,isoniazid
7+
Chromosome 619500 620865 Rv0529 ccsA amikacin,capreomycin,kanamycin
88
Chromosome 656010 657739 Rv0565c Rv0565c ethionamide
99
Chromosome 731680 732406 Rv0635 hadA isoniazid
1010
Chromosome 733853 734970 Rv0639 nusG rifampicin
@@ -16,58 +16,58 @@ Chromosome 778477 779624 Rv0677c mmpS5 bedaquiline,clofazimine
1616
Chromosome 778790 779487 Rv0678 mmpR5 bedaquiline,clofazimine
1717
Chromosome 781126 781934 Rv0682 rpsL streptomycin
1818
Chromosome 800106 801462 Rv0701 rplC linezolid
19-
Chromosome 1253074 1254783 Rv1129c Rv1129c levofloxacin,moxifloxacin,isoniazid,rifampicin
20-
Chromosome 1302606 1305501 Rv1173 fbiC pretomanid,delamanid,clofazimine
19+
Chromosome 1253074 1254783 Rv1129c Rv1129c isoniazid,levofloxacin,moxifloxacin,rifampicin
20+
Chromosome 1302606 1305501 Rv1173 fbiC clofazimine,delamanid,pretomanid
2121
Chromosome 1364162 1365186 Rv1221 sigE pyrazinamide
22-
Chromosome 1406081 1407604 Rv1258c Rv1258c streptomycin,pyrazinamide,isoniazid
22+
Chromosome 1406081 1407604 Rv1258c Rv1258c isoniazid,pyrazinamide,streptomycin
2323
Chromosome 1416181 1418048 Rv1267c embR ethambutol
2424
Chromosome 1460802 1461290 Rv1305 atpE bedaquiline
25-
Chromosome 1471498 1473382 EBG00000313325 rrs kanamycin,capreomycin,streptomycin,amikacin
25+
Chromosome 1471498 1473382 EBG00000313325 rrs amikacin,capreomycin,kanamycin,streptomycin
2626
Chromosome 1473408 1476795 EBG00000313339 rrl capreomycin,linezolid
27-
Chromosome 1673148 1675011 Rv1484 inhA isoniazid,ethionamide
27+
Chromosome 1673148 1675011 Rv1484 inhA ethionamide,isoniazid
2828
Chromosome 1833247 1834987 Rv1630 rpsA pyrazinamide
2929
Chromosome 1853358 1854388 Rv1644 tsnR linezolid
3030
Chromosome 1917506 1918746 Rv1694 tlyA capreomycin
31-
Chromosome 2062809 2065010 Rv1819c bacA kanamycin,capreomycin,streptomycin,amikacin
32-
Chromosome 2101651 2103337 Rv1854c ndh isoniazid,delamanid,ethionamide
31+
Chromosome 2062809 2065010 Rv1819c bacA amikacin,capreomycin,kanamycin,streptomycin
32+
Chromosome 2101651 2103337 Rv1854c ndh delamanid,ethionamide,isoniazid
3333
Chromosome 2153889 2156842 Rv1908c katG isoniazid
3434
Chromosome 2167649 2170934 Rv1918c PPE35 pyrazinamide
3535
Chromosome 2221719 2223825 Rv1979c Rv1979c bedaquiline,clofazimine
3636
Chromosome 2288681 2290323 Rv2043c pncA pyrazinamide
3737
Chromosome 2517915 2519365 Rv2245 kasA isoniazid
38-
Chromosome 2714124 2715832 Rv2416c eis kanamycin,amikacin
38+
Chromosome 2714124 2715832 Rv2416c eis amikacin,kanamycin
3939
Chromosome 2725899 2726780 Rv2428 ahpC isoniazid
4040
Chromosome 2746135 2747798 Rv2447c folC para-aminosalicylic_acid
41-
Chromosome 2782366 2786169 Rv2477c Rv2477c kanamycin,levofloxacin,moxifloxacin,streptomycin,rifampicin,ethambutol,amikacin
41+
Chromosome 2782366 2786169 Rv2477c Rv2477c amikacin,ethambutol,kanamycin,levofloxacin,moxifloxacin,rifampicin,streptomycin
4242
Chromosome 2859300 2860640 Rv2535c pepQ bedaquiline,clofazimine
4343
Chromosome 2986639 2987615 Rv2671 ribD para-aminosalicylic_acid
4444
Chromosome 2995772 2996737 Rv2680 Rv2680 capreomycin
4545
Chromosome 2996539 2998055 Rv2681 Rv2681 capreomycin
46-
Chromosome 3064515 3067372 Rv2752c Rv2752c levofloxacin,moxifloxacin,isoniazid,rifampicin,ethambutol
46+
Chromosome 3064515 3067372 Rv2752c Rv2752c ethambutol,isoniazid,levofloxacin,moxifloxacin,rifampicin
4747
Chromosome 3067193 3068161 Rv2754c thyX para-aminosalicylic_acid
4848
Chromosome 3073680 3074671 Rv2764c thyA para-aminosalicylic_acid
4949
Chromosome 3086620 3087935 Rv2780 ald cycloserine
50-
Chromosome 3338868 3339762 Rv2983 fbiD pretomanid,delamanid,clofazimine
50+
Chromosome 3338868 3339762 Rv2983 fbiD clofazimine,delamanid,pretomanid
5151
Chromosome 3448253 3449991 Rv3083 Rv3083 ethionamide
52-
Chromosome 3568401 3569280 Rv3197A whiB7 kanamycin,amikacin,streptomycin
52+
Chromosome 3568401 3569280 Rv3197A whiB7 amikacin,kanamycin,streptomycin
5353
Chromosome 3611959 3613847 Rv3236c Rv3236c pyrazinamide
54-
Chromosome 3623159 3625110 Rv3244c lpqB rifampicin,bedaquiline
55-
Chromosome 3624910 3626860 Rv3245c mtrB rifampicin,bedaquiline
56-
Chromosome 3626663 3627924 Rv3246c mtrA rifampicin,bedaquiline
57-
Chromosome 3640207 3641538 Rv3261 fbiA pretomanid,delamanid,clofazimine
58-
Chromosome 3641335 3642881 Rv3262 fbiB pretomanid,delamanid,clofazimine
54+
Chromosome 3623159 3625110 Rv3244c lpqB bedaquiline,rifampicin
55+
Chromosome 3624910 3626860 Rv3245c mtrB bedaquiline,rifampicin
56+
Chromosome 3626663 3627924 Rv3246c mtrA bedaquiline,rifampicin
57+
Chromosome 3640207 3641538 Rv3261 fbiA clofazimine,delamanid,pretomanid
58+
Chromosome 3641335 3642881 Rv3262 fbiB clofazimine,delamanid,pretomanid
5959
Chromosome 3840194 3841620 Rv3423c alr cycloserine
6060
Chromosome 3877464 3879240 Rv3457c rpoA rifampicin
61-
Chromosome 3986612 3987299 Rv3547 ddn pretomanid,delamanid
61+
Chromosome 3986612 3987299 Rv3547 ddn delamanid,pretomanid
6262
Chromosome 4038158 4041013 Rv3596c clpC1 pyrazinamide
6363
Chromosome 4043862 4046428 Rv3601c panD pyrazinamide
64-
Chromosome 4138202 4140002 Rv3696c glpK levofloxacin,moxifloxacin,isoniazid,streptomycin,rifampicin,ethambutol
64+
Chromosome 4138202 4140002 Rv3696c glpK ethambutol,isoniazid,levofloxacin,moxifloxacin,rifampicin,streptomycin
6565
Chromosome 4237683 4243147 Rv3793 embC ethambutol
6666
Chromosome 4242947 4246517 Rv3794 embA ethambutol
6767
Chromosome 4246314 4249810 Rv3795 embB ethambutol
6868
Chromosome 4266953 4269124 Rv3805c aftB ethambutol
6969
Chromosome 4268925 4270084 Rv3806c ubiA ethambutol
7070
Chromosome 4326004 4330174 Rv3854c ethA ethionamide
7171
Chromosome 4327328 4328199 Rv3855 ethR ethionamide
72-
Chromosome 4338171 4338961 Rv3862c whiB6 kanamycin,capreomycin,amikacin
72+
Chromosome 4338171 4338961 Rv3862c whiB6 amikacin,capreomycin,kanamycin
7373
Chromosome 4407528 4408481 Rv3919c gid streptomycin

db/tbdb.dict

-2
This file was deleted.

db/tbdb.dr.json

+1-1
Large diffs are not rendered by default.

db/tbdb.fasta.fai

-1
This file was deleted.

db/tbdb.mask.bed

+1-3
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,7 @@ Chromosome 99162 99174
5151
Chromosome 102100 102138
5252
Chromosome 102140 102150
5353
Chromosome 103743 103756
54-
Chromosome 103788 104164
55-
Chromosome 104317 104985
56-
Chromosome 104986 104987
54+
Chromosome 103788 104987
5755
Chromosome 106207 106343
5856
Chromosome 125830 125834
5957
Chromosome 126259 126260

db/tbdb.rules.txt

-2
This file was deleted.

db/tbdb.variables.json

+10-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"db-schema-version": "1.0.0",
2+
"db-schema-version": "1.1.0",
33
"snpEff_db": "Mycobacterium_tuberculosis_h37rv",
44
"drugs": [
55
"rifampicin",
@@ -24,10 +24,14 @@
2424
"tb-profiler-version": ">=6.0.0,<7.0.0",
2525
"version": {
2626
"name": "tbdb",
27-
"commit": "72ef6fa",
28-
"Author": "Jody Phelan <[email protected]>",
29-
"Date": "Tue Jul 16 16:56:19 2024 +0100",
30-
"db-schema-version": "1.0.0"
27+
"repo": "[email protected]:jodyphelan/tbdb.git",
28+
"branch": "tbdb",
29+
"commit": "7066eb43",
30+
"status": "clean",
31+
"author": "Jody Phelan",
32+
"date": "Fri Feb 14 09:41:10 2025 +0100",
33+
"db-schema-version": "1.1.0",
34+
"tb-profiler-version": ">=6.0.0,<7.0.0"
3135
},
3236
"amplicon": false,
3337
"files": {
@@ -39,7 +43,6 @@
3943
"spoligotype_spacers": "tbdb.spoligotype_spacers.txt",
4044
"spoligotype_annotations": "tbdb.spoligotype_list.csv",
4145
"bedmask": "tbdb.mask.bed",
42-
"barcode": "tbdb.barcode.bed",
43-
"rules": "tbdb.rules.txt"
46+
"barcode": "tbdb.barcode.bed"
4447
}
4548
}

tb-profiler

+8-10
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ discovered_plugins = {
3535

3636
__softwarename__ = 'tbprofiler'
3737
__default_db_dir__ = f'{sys.base_prefix}/share/{__softwarename__}'
38+
__compatible_db_schema_version__ = '1.0.0'
3839

3940
@atexit.register
4041
def cleanup():
@@ -246,8 +247,6 @@ def main_update_tbdb(args):
246247

247248

248249
extra_args = []
249-
if os.path.isfile('rules.txt'):
250-
extra_args.append("--rules rules.txt")
251250
if args.match_ref:
252251
extra_args.append("--match_ref %s" % os.path.abspath(args.match_ref))
253252

@@ -260,8 +259,8 @@ def main_update_tbdb(args):
260259

261260
def main_create_db(args):
262261

263-
version_string = json.load(open('variables.json'))['tb-profiler-version']
264-
tbp.check_db_version(version_string,tbp.__version__)
262+
version_string = json.load(open('variables.json'))['db-schema-version']
263+
tbp.check_db_version(version_string,__compatible_db_schema_version__)
265264

266265
if args.no_overwrite:
267266
dbs = pp.list_db(args.software_name)
@@ -277,8 +276,6 @@ def main_create_db(args):
277276
}
278277
if args.barcode:
279278
extra_files["barcode"] = args.barcode
280-
if args.rules:
281-
extra_files["rules"] = args.rules
282279

283280
with TempFilePrefix() as tmpfile:
284281
args.csv = tbp.reformat_variant_csv_file(args.csv,f'{tmpfile}.variants.csv')
@@ -371,11 +368,11 @@ def main_batch(args):
371368

372369

373370
def main_list_db(args):
374-
dbs = pp.list_db(args.software_name)
371+
dbs = pp.list_db(args.db_dir)
375372
for db in dbs:
376373
if 'version' in db:
377-
d = dict(**db['version'], location=f"{sys.base_prefix}/share/{args.software_name}/{db['version']['name']}")
378-
sys.stdout.write("%(name)s\t%(commit)s\t%(Author)s\t%(Date)s\t%(location)s\n" % d)
374+
d = dict(**db['version'], location=f"{args.db_dir}/{db['version']['name']}")
375+
sys.stdout.write("%(name)s\t%(commit)s\t%(author)s\t%(date)s\t%(location)s\n" % d)
379376

380377

381378

@@ -587,7 +584,6 @@ parser_sub.add_argument('--spoligotypes',default="spoligotype_spacers.txt",type=
587584
parser_sub.add_argument('--spoligotype_annotations','--spoligotype-annotations',default="spoligotype_list.csv")
588585
parser_sub.add_argument('--barcode',default="barcode.bed",type=str,help='A bed file containing lineage barcode SNPs')
589586
parser_sub.add_argument('--bedmask',default="mask.bed",type=str,help='A bed file containing a list of low-complexity regions')
590-
parser_sub.add_argument('--rules',type=str,default="rules.txt",help='A file containing python rules')
591587
parser_sub.add_argument('--amplicon_primers','--amplicon-primers',type=str,help='A file containing a list of amplicon primers')
592588
parser_sub.add_argument('--match_ref','--match-ref',type=str,help='Match the chromosome name to the given fasta file')
593589
parser_sub.add_argument('--custom',action="store_true",help='Tells the script this is a custom database, this is used to alter the generation of the version definition')
@@ -639,6 +635,7 @@ parser_sub.add_argument('--args',type=str, help='Arguments to use with tb-profil
639635
parser_sub.add_argument('--jobs','-j',default=1,help='Threads to use',type=int)
640636
parser_sub.add_argument('--threads_per_job','--threads-per-job','-t',default=1,help='Threads to use',type=int)
641637
parser_sub.add_argument('--dir','-d',default=".",help='Storage directory')
638+
parser_sub.add_argument('--db_dir',type=os.path.abspath,default=__default_db_dir__,help='Database directory')
642639
parser_sub.add_argument('--no_clean','--no-clean', action='store_true',help=argparse.SUPPRESS)
643640
parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".")
644641
parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__)
@@ -650,6 +647,7 @@ parser_sub = subparsers.add_parser('list_db', help='List loaded databases', form
650647
parser_sub.add_argument('--dir','-d',default=".",help='Storage directory')
651648
parser_sub.add_argument('--no_clean','--no-clean', action='store_true',help=argparse.SUPPRESS)
652649
parser_sub.add_argument('--temp',help="Temp firectory to process all files",type=str,default=".")
650+
parser_sub.add_argument('--db_dir',type=os.path.abspath,default=__default_db_dir__,help='Database directory')
653651
parser_sub.add_argument('--version', action='version', version="tb-profiler version %s" % tbp.__version__)
654652
parser_sub.add_argument('--logging',type=str.upper,default="INFO",choices=["DEBUG","INFO","WARNING","ERROR","CRITICAL"],help='Logging level')
655653
parser_sub.add_argument('--debug',action='store_true',help=argparse.SUPPRESS)

tbprofiler/reformat.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from typing import List, Tuple , Union, Optional
44
from .utils import get_gene2drugs
55
import argparse
6-
from pathogenprofiler.utils import shared_dict
6+
from pathogenprofiler.utils import shared_dict, get_software_used
77

88
def get_main_lineage(lineages: List[Lineage],max_node_skip: int = 1) -> Tuple[str, str]:
99
"""
@@ -206,7 +206,7 @@ def create_lineage_result(
206206
pipeline = Pipeline(
207207
software_version=args.version,
208208
db_version=args.conf['version'],
209-
software=[{'process':k,'software':v} for k,v in shared_dict.items()]
209+
software=get_software_used()
210210
)
211211
data = {
212212
'id':args.prefix,
@@ -233,7 +233,7 @@ def create_resistance_result(
233233
pipeline = Pipeline(
234234
software_version=args.version,
235235
db_version=args.conf['version'],
236-
software=[{'process':k,'software':v} for k,v in shared_dict.items()]
236+
software=get_software_used()
237237
)
238238
if hasattr(qc, 'missing_positions'):
239239
qc.missing_positions = filter_missing_positions(qc.missing_positions)

tbprofiler/utils.py

+10-14
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import csv
55
import logging
66
import re
7+
from packaging.version import Version
8+
79

810
def process_tb_profiler_args(args: argparse.Namespace) -> None:
911
if args.snp_dist:
@@ -93,17 +95,11 @@ def reformat_variant_csv_file(files: list, outfile: str) -> str:
9395

9496
return outfile
9597

96-
def check_db_version(db_version: str, tbprofiler_version: str) -> None:
97-
for d in db_version.split(","):
98-
r = re.search('([<>=]+)(.*)',d)
99-
if r==None:
100-
logging.error(f"Invalid version string: {d}")
101-
quit(1)
102-
103-
d = f"{r.group(1)} '{r.group(2)}'"
104-
if eval(f"'{tbprofiler_version}' {d}")==False:
105-
if ">" in d:
106-
logging.error(f"Your version of tb-profiler ({tbprofiler_version}) is too old to use this version of the database. Please update tb-profiler to {db_version}")
107-
else:
108-
logging.error(f"Your version of tb-profiler ({tbprofiler_version}) is too new to use this version of the database. Please update the database to {db_version}")
109-
quit(1)
98+
def check_db_version(db_current_version_str: str, compatible_schema_version_str: str):
99+
db_current_version = Version(db_current_version_str)
100+
compatible_schema_version = Version(compatible_schema_version_str)
101+
logging.debug(f"Database version: {db_current_version}")
102+
logging.debug(f"Compatible schema version: {compatible_schema_version}")
103+
if db_current_version.major != compatible_schema_version.major:
104+
logging.error(f"Latest database schema version {db_current_version_str} is not compatible with this version of tb-profiler (requires {compatible_schema_version.major}.x.x). Please make sure you are using the latest software and database versions.")
105+
quit(1)

tests/run_test.py

+2-11
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,6 @@
1313
if not os.path.isdir("tb-profiler-test-data"):
1414
run_cmd("git clone https://github.com/jodyphelan/tb-profiler-test-data.git")
1515

16-
# por5_dr_variants = [
17-
# ('rpoB', 'p.Ser450Leu'),
18-
# ('fabG1', 'c.-15C>T'),
19-
# ('inhA', 'p.Ile194Thr'),
20-
# ('pncA', 'p.Val125Gly'),
21-
# ('embB', 'p.Met306Val'),
22-
# ('embB', 'p.Met423Thr'),
23-
# ('gid', 'p.Ala80Pro')
24-
# ]
2516

2617
por5_dr_variants = [
2718
('rpoB', 'p.Ser450Leu'),
@@ -59,8 +50,8 @@ def test_vcf():
5950
check_assertations("results/por5_vcf.results.json")
6051

6152
def test_nanopore():
62-
run_cmd(f"tb-profiler profile --db {db} -1 tb-profiler-test-data/por5A.nanopore_reduced.fastq.gz --platform nanopore -p por5A_illumina_nanopore -t 4 --af '0.5,0.7' --depth '0,5' --txt --csv --docx")
63-
check_assertations("results/por5A_illumina_nanopore.results.json")
53+
run_cmd(f"tb-profiler profile --db {db} -1 tb-profiler-test-data/por5A.nanopore_reduced.fastq.gz --platform nanopore -p por5A_nanopore -t 4 --caller bcftools --af '0.5,0.7' --depth '0,5' --txt --csv --docx")
54+
check_assertations("results/por5A_nanopore.results.json")
6455

6556
def test_fasta():
6657
run_cmd(f"tb-profiler profile --db {db} -f tb-profiler-test-data/por5A1.fasta -p por5A_fasta --txt --csv --docx")

0 commit comments

Comments
 (0)