Skip to content

Commit 5dee360

Browse files
authored
1.6.4 PR (#165)
* Support dorado v5.2.0 clair3 models * Account for model tarballs with mismatching top level dirs
1 parent 96f3057 commit 5dee360

File tree

3 files changed

+36
-36
lines changed

3 files changed

+36
-36
lines changed

artic/get_models.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
from pathlib import Path
44
import tarfile
55
import sys
6+
import shutil
67
from artic.utils import clair3_manifest
7-
from clint.textui import colored
88

99

1010
def download_file(url: str, local_path: Path):
@@ -15,7 +15,9 @@ def download_file(url: str, local_path: Path):
1515
f.write(chunk)
1616

1717

18-
def get_model(model_dir: Path, model_fname: str, model_url: str):
18+
def get_model(
19+
model_dir: Path, model_fname: str, model_url: str, model_name: str = None
20+
):
1921

2022
model_path = Path(model_dir, model_fname)
2123

@@ -24,8 +26,19 @@ def get_model(model_dir: Path, model_fname: str, model_url: str):
2426
download_file(model_url, model_path)
2527

2628
with tarfile.open(model_path, "r") as tar:
29+
paths = [Path(x) for x in tar.getnames()]
30+
root_paths = [str(x.parent) for x in paths if str(x.parent) != "."]
31+
if len(set(root_paths)) != 1:
32+
raise ValueError(
33+
f"The Clair3 model tarfile {model_fname} contains multiple root directories (there can only be one), please check the tar file."
34+
)
35+
2736
tar.extractall(model_dir)
2837

38+
if model_name:
39+
if root_paths[0] != model_name:
40+
shutil.move(Path(model_dir, root_paths[0]), Path(model_dir, model_name))
41+
2942
os.remove(model_path)
3043

3144
return model_path
@@ -45,22 +58,24 @@ def main():
4558

4659
if not os.getenv("CONDA_PREFIX"):
4760
print(
48-
f"CONDA_PREFIX is not set, this probably means you are not running this inside a conda environment, if you have not provided a model path argument '--model-dir' the models might be downloaded somewhere you don't want them to be.",
61+
"CONDA_PREFIX is not set, this probably means you are not running this inside a conda environment, if you have not provided a model path argument '--model-dir' the models might be downloaded somewhere you don't want them to be.",
4962
file=sys.stderr,
5063
)
5164

5265
model_manifest = clair3_manifest()
5366
models = model_manifest.models
5467

5568
for model in models:
56-
if not model["rerio"]:
57-
continue
5869

59-
if not os.path.exists(Path(args.model_dir, model["name"])):
70+
if (
71+
not os.path.exists(Path(args.model_dir, model["name"]))
72+
or len(os.listdir(Path(args.model_dir, model["name"]))) == 0
73+
):
6074
get_model(
6175
model_dir=args.model_dir,
6276
model_fname=model["model_fname"],
6377
model_url=model["model_url"],
78+
model_name=model["name"],
6479
)
6580
print(f"Downloaded model: {model['name']}", file=sys.stderr)
6681

artic/utils.py

Lines changed: 14 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -22,151 +22,136 @@ def __init__(self):
2222
"name": "r1041_e82_260bps_fast_g632",
2323
"model_fname": "r1041_e82_260bps_fast_g632.tar.gz",
2424
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_260bps_fast_g632.tar.gz",
25-
"rerio": True,
2625
},
2726
{
2827
"name": "r1041_e82_400bps_fast_g632",
2928
"model_fname": "r1041_e82_400bps_fast_g632.tar.gz",
3029
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_400bps_fast_g632.tar.gz",
31-
"rerio": True,
3230
},
3331
{
3432
"name": "r1041_e82_400bps_sup_g615",
3533
"model_fname": "r1041_e82_400bps_sup_g615.tar.gz",
3634
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_400bps_sup_g615.tar.gz",
37-
"rerio": True,
3835
},
3936
{
4037
"name": "r1041_e82_260bps_hac_g632",
4138
"model_fname": "r1041_e82_260bps_hac_g632.tar.gz",
4239
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_260bps_hac_g632.tar.gz",
43-
"rerio": True,
4440
},
4541
{
4642
"name": "r1041_e82_400bps_hac_g615",
4743
"model_fname": "r1041_e82_400bps_hac_g615.tar.gz",
4844
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_400bps_hac_g615.tar.gz",
49-
"rerio": True,
5045
},
5146
{
5247
"name": "r1041_e82_400bps_sup_v400",
5348
"model_fname": "r1041_e82_400bps_sup_v400.tar.gz",
5449
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_400bps_sup_v400.tar.gz",
55-
"rerio": True,
5650
},
5751
{
5852
"name": "r1041_e82_260bps_hac_v400",
5953
"model_fname": "r1041_e82_260bps_hac_v400.tar.gz",
6054
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_260bps_hac_v400.tar.gz",
61-
"rerio": True,
6255
},
6356
{
6457
"name": "r1041_e82_400bps_hac_g632",
6558
"model_fname": "r1041_e82_400bps_hac_g632.tar.gz",
6659
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_400bps_hac_g632.tar.gz",
67-
"rerio": True,
6860
},
6961
{
7062
"name": "r1041_e82_400bps_sup_v410",
7163
"model_fname": "r1041_e82_400bps_sup_v410.tar.gz",
7264
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_400bps_sup_v410.tar.gz",
73-
"rerio": True,
7465
},
7566
{
7667
"name": "r1041_e82_260bps_hac_v410",
7768
"model_fname": "r1041_e82_260bps_hac_v410.tar.gz",
7869
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_260bps_hac_v410.tar.gz",
79-
"rerio": True,
8070
},
8171
{
8272
"name": "r1041_e82_400bps_hac_v400",
8373
"model_fname": "r1041_e82_400bps_hac_v400.tar.gz",
8474
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_400bps_hac_v400.tar.gz",
85-
"rerio": True,
8675
},
8776
{
8877
"name": "r1041_e82_400bps_sup_v420",
8978
"model_fname": "r1041_e82_400bps_sup_v420.tar.gz",
9079
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_400bps_sup_v420.tar.gz",
91-
"rerio": True,
9280
},
9381
{
9482
"name": "r1041_e82_260bps_sup_g632",
9583
"model_fname": "r1041_e82_260bps_sup_g632.tar.gz",
9684
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_260bps_sup_g632.tar.gz",
97-
"rerio": True,
9885
},
9986
{
10087
"name": "r1041_e82_400bps_hac_v410",
10188
"model_fname": "r1041_e82_400bps_hac_v410.tar.gz",
10289
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_400bps_hac_v410.tar.gz",
103-
"rerio": True,
10490
},
10591
{
10692
"name": "r1041_e82_400bps_sup_v430",
10793
"model_fname": "r1041_e82_400bps_sup_v430.tar.gz",
10894
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_400bps_sup_v430.tar.gz",
109-
"rerio": True,
11095
},
11196
{
11297
"name": "r1041_e82_260bps_sup_v400",
11398
"model_fname": "r1041_e82_260bps_sup_v400.tar.gz",
11499
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_260bps_sup_v400.tar.gz",
115-
"rerio": True,
116100
},
117101
{
118102
"name": "r1041_e82_400bps_hac_v420",
119103
"model_fname": "r1041_e82_400bps_hac_v420.tar.gz",
120104
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_400bps_hac_v420.tar.gz",
121-
"rerio": True,
122105
},
123106
{
124107
"name": "r1041_e82_400bps_sup_v500",
125108
"model_fname": "r1041_e82_400bps_sup_v500.tar.gz",
126109
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_400bps_sup_v500.tar.gz",
127-
"rerio": True,
128110
},
129111
{
130112
"name": "r1041_e82_260bps_sup_v410",
131113
"model_fname": "r1041_e82_260bps_sup_v410.tar.gz",
132114
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_260bps_sup_v410.tar.gz",
133-
"rerio": True,
134115
},
135116
{
136117
"name": "r1041_e82_400bps_hac_v430",
137118
"model_fname": "r1041_e82_400bps_hac_v430.tar.gz",
138119
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_400bps_hac_v430.tar.gz",
139-
"rerio": True,
140120
},
141121
{
142122
"name": "r104_e81_hac_g5015",
143123
"model_fname": "r104_e81_hac_g5015.tar.gz",
144124
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r104_e81_hac_g5015.tar.gz",
145-
"rerio": True,
146125
},
147126
{
148127
"name": "r1041_e82_400bps_hac_v500",
149128
"model_fname": "r1041_e82_400bps_hac_v500.tar.gz",
150129
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_400bps_hac_v500.tar.gz",
151-
"rerio": True,
152130
},
153131
{
154132
"name": "r104_e81_sup_g5015",
155133
"model_fname": "r104_e81_sup_g5015.tar.gz",
156134
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r104_e81_sup_g5015.tar.gz",
157-
"rerio": True,
135+
},
136+
{
137+
"name": "r1041_e82_400bps_hac_v520",
138+
"model_fname": "r1041_e82_400bps_hac_v520.tar.gz",
139+
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_400bps_hac_v520.tar.gz",
140+
},
141+
{
142+
"name": "r1041_e82_400bps_sup_v520",
143+
"model_fname": "r1041_e82_400bps_sup_v520.tar.gz",
144+
"model_url": "https://cdn.oxfordnanoportal.com/software/analysis/models/clair3/r1041_e82_400bps_sup_v520.tar.gz",
158145
},
159146
{
160147
"name": "r941_prom_sup_g5014",
161-
"model_fname": "NA",
162-
"model_url": "NA",
163-
"rerio": False,
148+
"model_fname": "r941_prom_sup_g5014.tar.gz",
149+
"model_url": "https://www.bio8.cs.hku.hk/clair3/clair3_models/r941_prom_sup_g5014.tar.gz",
164150
},
165151
{
166152
"name": "r941_prom_hac_g360+g422",
167-
"model_fname": "NA",
168-
"model_url": "NA",
169-
"rerio": False,
153+
"model_fname": "r941_prom_hac_g360+g422.tar.gz",
154+
"model_url": "https://www.bio8.cs.hku.hk/clair3/clair3_models/r941_prom_hac_g360+g422.tar.gz",
170155
},
171156
]
172157

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = artic
3-
version = 1.6.3
3+
version = 1.6.4
44
author = Nick Loman
55
author_email = [email protected]
66
maintainer = Sam Wilkinson

0 commit comments

Comments
 (0)