Skip to content
This repository was archived by the owner on Feb 26, 2025. It is now read-only.

Commit 02be6f5

Browse files
authored
Data source processing improvements (#3)
1 parent 254617e commit 02be6f5

33 files changed

+462
-237
lines changed
+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
name: check-pull-request
2+
3+
on:
4+
schedule:
5+
- cron: '48 22 * * 0'
6+
7+
jobs:
8+
9+
check:
10+
runs-on: ubuntu-latest
11+
timeout-minutes: 30
12+
env:
13+
PACKAGE_UPDATED: "False"
14+
15+
steps:
16+
- uses: actions/checkout@v2
17+
- name: Set up Python ${{ matrix.python-version }}
18+
uses: actions/setup-python@v2
19+
with:
20+
python-version: 3.9
21+
- name: Install dependencies
22+
run: |
23+
pip install -U pip
24+
- name: Download and check source data
25+
run: |
26+
pip install -e .[dev]
27+
mkdir sr/temp
28+
python sr/main.py
29+
- name: Blacken code, run tests and get diff
30+
if: ${{ env.PACKAGE_UPDATED == 'True' }}
31+
run: |
32+
black sr/*py
33+
black sr/tables/*.py
34+
pytest
35+
- name: Create PR
36+
if: ${{ env.PACKAGE_UPDATED == 'True' }}
37+
uses: peter-evans/create-pull-request@v3
38+
with:
39+
committer: scaramallion <[email protected]>
40+
author: scaramallion <[email protected]>
41+
delete-branch: true
42+
commit-message: "Update SR package"
43+
title: "Update SR package"
44+
base: master
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,32 @@
1-
name: check-pull-request
1+
name: pull-request-test
22

33
on:
4-
# schedule:
5-
# - cron: '48 22 * * 0'
64
pull_request:
75
branches: [ master ]
86

9-
107
jobs:
118

12-
check-pr:
9+
check:
1310
runs-on: ubuntu-latest
1411
timeout-minutes: 30
12+
env:
13+
PACKAGE_UPDATED: "False"
1514

1615
steps:
1716
- uses: actions/checkout@v2
1817
- name: Set up Python ${{ matrix.python-version }}
1918
uses: actions/setup-python@v2
2019
with:
2120
python-version: 3.9
21+
- name: Install dependencies
22+
run: |
23+
pip install -U pip
2224
- name: Download and check source data
2325
run: |
2426
pip install -e .[dev]
2527
mkdir sr/temp
26-
python sr/main.py --dev --force-regeneration
27-
black sr/*py
28-
black sr/tables/*.py
29-
git diff --output=gitdiff.txt
30-
- name: Upload artifacts
31-
if: ${{ success() }}
32-
uses: actions/upload-artifact@v2
33-
with:
34-
name: diff
35-
path: gitdiff.txt
28+
python sr/main.py --force-regeneration
29+
- name: Run tests
30+
if: ${{ env.PACKAGE_UPDATED == 'True' }}
31+
run: |
32+
pytest

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,7 @@ build/*
2525
distribute*.egg
2626
distribute*.tar.gz
2727
__pycache__
28+
29+
# Other
30+
.mypy_cache
31+
.pytest_cache

mypy.ini

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[mypy]
22
python_version = 3.9
3-
exclude = sr/(tables|temp)
3+
exclude = sr/(temp|test)
44
files = sr/
55
show_error_codes = True
66
warn_redundant_casts = True

setup.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,10 @@ def get_version() -> str:
5454
"black==21.6b0",
5555
"mypy==0.902",
5656
"types-requests==0.1.11",
57+
"pytest==6.2.4",
5758
]
5859
},
5960
entry_points={
60-
"pydicom.data.external_sources": "pydicom-data-sr = srdata:DataStore",
61+
"pydicom.data.sr": "pydicom-data-sr = sr:foo",
6162
},
6263
)

sr/__init__.py

+7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from pathlib import Path
2+
from types import ModuleType
3+
from typing import Tuple
24

35
from sr._version import __version__
6+
from sr.tables import _cid, _concepts, _snomed
47

58

69
PACKAGE_DIR = Path(__file__).parent.resolve(strict=True)
@@ -11,3 +14,7 @@
1114
CID_FILE = SR_TABLES / "_cid.py"
1215
CONCEPTS_FILE = SR_TABLES / "_concepts.py"
1316
SNOMED_FILE = SR_TABLES / "_snomed.py"
17+
18+
19+
def foo() -> Tuple[ModuleType, ModuleType, ModuleType]:
20+
return _cid, _concepts, _snomed

sr/_version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
__version__: str = "2021.06.16"
1+
__version__: str = "2021.06.19"
22
__dicom_version__: str = "2021b"

sr/hashes.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1238,4 +1238,4 @@
12381238
"CID_9570.json": "9271628dc86f9919b296607639be7d7c",
12391239
"part16_d1.html": "0555239bf8a9593c13b9af73ba9a2a2a",
12401240
"part16_o1.html": "a811317afc72f15c17699de6134b941f"
1241-
}
1241+
}

sr/main.py

+52-49
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import argparse
22
from datetime import datetime
33
import logging
4+
import os
45
from pathlib import Path
6+
import subprocess
57
from tempfile import TemporaryDirectory
68
import time
79
from typing import Optional, List, Tuple, Dict, Any
@@ -26,7 +28,7 @@
2628
)
2729
from sr.utils import (
2830
download_cid_files,
29-
download_file,
31+
download_files,
3032
compare_checksums,
3133
calculate_checksums,
3234
)
@@ -41,8 +43,6 @@
4143
TABLE_O1 = PART_16 + "/chapter_O.html"
4244
TABLE_D1 = PART_16 + "/chapter_D.html"
4345

44-
WORKERS = 32
45-
4646

4747
def run(
4848
src: Optional[Path] = None,
@@ -86,9 +86,11 @@ def run(
8686
attempts = 0
8787
while attempts < 5:
8888
try:
89-
download_cid_files((CID_HOST, CID_PATH), src, WORKERS)
90-
download_file(TABLE_D1, src / "part16_d1.html")
91-
download_file(TABLE_O1, src / "part16_o1.html")
89+
download_cid_files((CID_HOST, CID_PATH), src)
90+
download_files(
91+
[TABLE_D1, TABLE_O1],
92+
[src / "part16_d1.html", src / "part16_o1.html"],
93+
)
9294
break
9395
except TimeoutError as exc:
9496
LOGGER.exception(exc)
@@ -102,12 +104,16 @@ def run(
102104
paths = sorted(cid_paths + table_paths)
103105

104106
# 1. Compare the data in `src` against the reference hashes
105-
if compare_checksums(paths, HASH_FILE) and not force_regeneration:
107+
checksums_match = compare_checksums(paths, HASH_FILE)
108+
if checksums_match and not force_regeneration:
106109
LOGGER.info("No change in source data found, exiting...")
107110
return False
108111

109112
# 2. Source data has changed, regenerate the tables and update the package
110-
LOGGER.info("Source data has changed - updating package")
113+
if not checksums_match:
114+
LOGGER.info("Source data has changed - updating package")
115+
else:
116+
LOGGER.debug("'--force-regeneration' used - updating package")
111117

112118
table_o1 = src / "part16_o1.html"
113119
table_d1 = src / "part16_d1.html"
@@ -153,7 +159,7 @@ def write_hash_file(paths: List[Path]) -> None:
153159
else:
154160
LOGGER.warning("No checksums available to write to 'hashes.json'")
155161

156-
f.write("}")
162+
f.write("}\n")
157163

158164
LOGGER.info(f"'hashes.json' written with {len(checksums)} entries")
159165

@@ -182,10 +188,13 @@ def write_snomed_file(codes: List[Tuple[str, str, str]]) -> None:
182188
"# 'SRT': {snomed_id1: concept_id1, snomed_id2: ...},\n",
183189
"# }\n",
184190
"\n",
191+
"from typing import Dict\n",
192+
"\n\n",
185193
]
186194
)
187195

188-
f.write("mapping = {}\n")
196+
f.write("mapping: Dict[str, Dict[str, str]] = {}\n")
197+
189198
# Write the SCT to SRT mappings
190199
f.write("\nmapping['SCT'] = {\n")
191200
for sct, srt, meaning in sorted(codes, key=lambda x: int(x[0])):
@@ -229,10 +238,12 @@ def write_cid_file(cid_lists: CIDListType, name_for_cid: NameForCIDType) -> None
229238
"# {scheme designator: <list of keywords for current cid>\n",
230239
"# scheme_designator: ...}\n",
231240
"\n",
241+
"from typing import Dict, List\n",
242+
"\n\n",
232243
]
233244
)
234-
f.write("name_for_cid = {}\n")
235-
f.write("cid_concepts = {}\n")
245+
f.write("name_for_cid: Dict[int, str] = {}\n")
246+
f.write("cid_concepts: Dict[int, Dict[str, List[str]]] = {}\n")
236247
for cid, value in cid_lists.items():
237248
# cid: int
238249
# value: Dict[str, List[str]]
@@ -273,12 +284,14 @@ def write_concept_files(concepts: ConceptType) -> None:
273284
f"# Auto-generated by pydicom-data-sr\n",
274285
"# -*- coding: utf-8 -*-\n",
275286
"\n",
287+
"from typing import Dict, List, Tuple\n",
288+
"\n\n",
276289
]
277290

278291
imports = []
279292
top_indent = " " * 4
280293
m_indent = " " * 8
281-
for scheme, top_value in concepts.items():
294+
for scheme, attr_codes in concepts.items():
282295
module = f"_concepts_{scheme}"
283296
variable = f"concepts_{scheme}"
284297
imports.append((scheme, module, variable))
@@ -292,21 +305,21 @@ def write_concept_files(concepts: ConceptType) -> None:
292305
# concepts_scheme: {
293306
# <top_value>,
294307
# }
295-
f.write(f"{variable} = {{\n")
296-
for name, middle_value in sorted(top_value.items(), key=lambda x: x[0]):
297-
# name: str
298-
# middle_value: Dict[str, Tuple[str, List[int]]]
308+
f.write(f"{variable}: Dict[str, Dict[str, Tuple[str, List[int]]]] = {{\n")
309+
for attr, codes in sorted(attr_codes.items(), key=lambda x: x[0]):
310+
# attr: str
311+
# codes: Dict[str, Tuple[str, List[int]]]
299312
# Write as:
300-
# name: {
313+
# cc_name: {
301314
# <middle_value>,
302315
# },
303-
f.write(f'{top_indent}"{name}": {{\n')
304-
for key, val in sorted(middle_value.items(), key=lambda x: x[0]):
305-
# key: str
306-
# val: Tuple[str, List[int]]
316+
f.write(f'{top_indent}"{attr}": {{\n')
317+
for code, (meaning, cids) in sorted(codes.items(), key=lambda x: x[0]):
318+
# code: str
319+
# (meaning, cids): Tuple[str, List[int]]
307320
# Write as:
308-
# key: (str, List[int]),
309-
f.write(f'{m_indent}"{key}": ("{val[0]}", {sorted(val[1])}),\n')
321+
# code: (str, List[int]),
322+
f.write(f'{m_indent}"{code}": ("{meaning}", {sorted(cids)}),\n')
310323

311324
f.write(f"{top_indent}}},\n")
312325

@@ -317,11 +330,13 @@ def write_concept_files(concepts: ConceptType) -> None:
317330
with open(CONCEPTS_FILE, "w", encoding="utf8") as f:
318331
f.writelines(header)
319332
for _, module, variable in imports:
320-
# from sr.tables._cid_concepts_<scheme> import _concepts_<scheme>
333+
# from sr.tables._concepts_<scheme> import concepts_<scheme>
321334
f.write(f"from sr.tables.{module} import {variable}\n")
322335

323336
f.write("\n\n")
324-
f.write("concepts = {\n")
337+
f.write(
338+
"concepts: Dict[str, Dict[str, Dict[str, Tuple[str, List[int]]]]] = {\n"
339+
)
325340
for scheme, _, variable in imports:
326341
f.write(f' "{scheme}": {variable},\n')
327342
f.write("}\n")
@@ -331,9 +346,6 @@ def write_version_file(dicom_version: str) -> None:
331346
"""Write a new _version.py file"""
332347

333348
new_version = datetime.now().strftime("%Y.%m.%d")
334-
if new_version == __version__:
335-
raise RuntimeError("Error updating the package: no change in version number")
336-
337349
with open(VERSION_FILE, "w") as f:
338350
f.write(f'__version__: str = "{new_version}"\n')
339351
f.write(f'__dicom_version__: str = "{dicom_version}"\n')
@@ -348,27 +360,15 @@ def _setup_argparser() -> Any:
348360

349361
# General Options
350362
gen_opts = parser.add_argument_group("General Options")
351-
gen_opts.add_argument(
352-
"-d",
353-
"--dev",
354-
help="enable dev mode",
355-
action="store_true",
356-
)
357363
gen_opts.add_argument(
358364
"--force-download",
359-
help="force downloading the data tables",
365+
help="force downloading the data tables to a local directory",
360366
action="store_true",
361367
default=False,
362368
)
363369
gen_opts.add_argument(
364370
"--force-regeneration",
365-
help="force regenerating the data tables",
366-
action="store_true",
367-
default=False,
368-
)
369-
gen_opts.add_argument(
370-
"--clean",
371-
help="remove all data files",
371+
help="force regenerating the data tables from local source data",
372372
action="store_true",
373373
default=False,
374374
)
@@ -381,11 +381,14 @@ def _setup_argparser() -> Any:
381381
logging.basicConfig(level=logging.DEBUG)
382382
args = _setup_argparser()
383383

384-
src = None
385-
if args.dev:
386-
src = PACKAGE_DIR / "temp"
384+
LOCAL_DIR = PACKAGE_DIR / "temp"
387385

388-
if args.clean:
389-
pass
386+
src = None
387+
if args.force_regeneration or args.force_download:
388+
src = LOCAL_DIR
390389

391-
run(src, args.force_download, args.force_regeneration)
390+
result = run(src, args.force_download, args.force_regeneration)
391+
if "GITHUB_ACTION" in os.environ:
392+
subprocess.run(
393+
f"echo 'PACKAGE_UPDATED={str(result)}' >> $GITHUB_ENV", shell=True
394+
)

0 commit comments

Comments
 (0)