Skip to content

Commit 050f085

Browse files
authored
Surgery cohort ECGs near event (#247)
ecg by time
1 parent 6e9178e commit 050f085

File tree

4 files changed

+99
-73
lines changed

4 files changed

+99
-73
lines changed

ml4cvd/TensorMap.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def __init__(
102102
loss_weight: Optional[float] = 1.0,
103103
channel_map: Optional[Dict[str, int]] = None,
104104
storage_type: Optional[StorageType] = None,
105-
dependent_map: Optional[str] = None,
105+
dependent_map: Optional["TensorMap"] = None,
106106
augmentations: Optional[List[Callable[[np.ndarray], np.ndarray]]] = None,
107107
normalization: Optional[Normalizer] = None,
108108
annotation_units: Optional[int] = 32,

ml4cvd/arguments.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from ml4cvd.TensorMap import TensorMap
2525
from ml4cvd.tensor_maps_by_hand import TMAPS
2626
from ml4cvd.defines import IMPUTATION_RANDOM, IMPUTATION_MEAN
27-
from ml4cvd.tensor_maps_partners_ecg import build_partners_tensor_maps
27+
from ml4cvd.tensor_maps_partners_ecg import build_partners_tensor_maps, build_cardiac_surgery_tensor_maps
2828
from ml4cvd.tensor_map_maker import generate_continuous_tensor_map_from_file
2929

3030

@@ -221,6 +221,10 @@ def _get_tmap(name: str, needed_tensor_maps: List[str]) -> TensorMap:
221221
if name in TMAPS:
222222
return TMAPS[name]
223223

224+
TMAPS.update(build_cardiac_surgery_tensor_maps(needed_tensor_maps))
225+
if name in TMAPS:
226+
return TMAPS[name]
227+
224228
from ml4cvd.tensor_maps_partners_ecg import TMAPS as partners_tmaps
225229
TMAPS.update(partners_tmaps)
226230

ml4cvd/tensor_maps_partners_ecg.py

Lines changed: 91 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from typing import Dict, List, Callable, Union, Tuple
99

1010
from ml4cvd.tensor_maps_by_hand import TMAPS
11-
from ml4cvd.defines import ECG_REST_AMP_LEADS, PARTNERS_DATE_FORMAT, STOP_CHAR, PARTNERS_CHAR_2_IDX
11+
from ml4cvd.defines import ECG_REST_AMP_LEADS, PARTNERS_DATE_FORMAT, STOP_CHAR, PARTNERS_CHAR_2_IDX, PARTNERS_DATETIME_FORMAT
1212
from ml4cvd.TensorMap import TensorMap, str2date, Interpretation, make_range_validator, decompress_data, TimeSeriesOrder
1313

1414

@@ -88,7 +88,7 @@ def get_voltage_from_file(tm, hd5, dependents={}):
8888
slices = (i, ..., tm.channel_map[cm]) if dynamic else (..., tm.channel_map[cm])
8989
tensor[slices] = voltage
9090
except KeyError:
91-
pass
91+
logging.warning(f'KeyError for channel {cm} in {tm.name}')
9292
if population_normalize is not None:
9393
tensor /= population_normalize
9494
return tensor
@@ -1370,7 +1370,7 @@ def _diagnosis_channels(disease: str, incidence_only: bool = False):
13701370

13711371

13721372
def _outcome_channels(outcome: str):
1373-
return {f'no_{outcome}': 0, f'future_{outcome}': 1}
1373+
return {f'no_{outcome}': 0, f'{outcome}': 1}
13741374

13751375

13761376
def loyalty_time_to_event(
@@ -1603,13 +1603,23 @@ def _cardiac_surgery_str2date(input_date: str) -> datetime.datetime:
16031603
return datetime.datetime.strptime(input_date, "%d%b%Y")
16041604

16051605

1606+
def _date_in_window_from_dates(ecg_dates, surgery_date, day_window):
1607+
ecg_dates.sort(reverse=True)
1608+
for ecg_date in ecg_dates:
1609+
ecg_datetime = datetime.datetime.strptime(ecg_date, PARTNERS_DATETIME_FORMAT)
1610+
if surgery_date - ecg_datetime <= datetime.timedelta(days=day_window):
1611+
return ecg_date
1612+
raise ValueError(f'No ECG in time window')
1613+
1614+
16061615
def build_cardiac_surgery_outcome_tensor_from_file(
16071616
file_name: str,
1617+
outcome2column: Dict[str, str],
16081618
patient_column: str = "medrecn",
1609-
outcome_column: str = "mtopd",
16101619
start_column: str = "surgdt",
16111620
delimiter: str = ",",
16121621
day_window: int = 30,
1622+
population_normalize: int = None,
16131623
) -> Callable:
16141624
"""Build a tensor_from_file function for outcomes given CSV of patients.
16151625
@@ -1618,7 +1628,7 @@ def build_cardiac_surgery_outcome_tensor_from_file(
16181628
16191629
:param file_name: CSV or TSV file with header of patient IDs (MRNs) dates of enrollment and dates of diagnosis
16201630
:param patient_column: The header name of the column of patient ids
1621-
:param diagnosis_date_column: The header name of the column of disease diagnosis dates
1631+
:param outcome2column: Dictionary mapping outcome names to the header name of the column with outcome status
16221632
:param start_column: The header name of the column of surgery dates
16231633
:param delimiter: The delimiter separating columns of the TSV or CSV
16241634
:return: The tensor_from_file function to provide to TensorMap constructors
@@ -1630,57 +1640,49 @@ def build_cardiac_surgery_outcome_tensor_from_file(
16301640
header = next(reader)
16311641
patient_index = header.index(patient_column)
16321642
date_index = header.index(start_column)
1633-
outcome_index = header.index(outcome_column)
1634-
date_surg_table = {}
1635-
patient_table = {}
1643+
surgery_date_table = {}
1644+
outcome_table = defaultdict(dict)
16361645
for row in reader:
16371646
try:
16381647
patient_key = int(row[patient_index])
1639-
patient_table[patient_key] = int(row[outcome_index])
1640-
date_surg_table[patient_key] = _cardiac_surgery_str2date(
1641-
row[date_index],
1642-
)
1643-
if len(patient_table) % 1000 == 0:
1644-
logging.debug(f"Processed: {len(patient_table)} patient rows.")
1648+
surgery_date_table[patient_key] = _cardiac_surgery_str2date(row[date_index])
1649+
for outcome in outcome2column:
1650+
outcome_table[outcome][patient_key] = int(row[header.index(outcome2column[outcome])])
1651+
if len(outcome_table) % 1000 == 0:
1652+
logging.debug(f"Processed: {len(outcome_table)} outcome rows.")
16451653
except ValueError as e:
1646-
logging.warning(f"val err {e}")
1654+
logging.debug(f'Value error {e}')
1655+
1656+
logging.info(f"Processed outcomes:{list(outcome_table.keys())}. Got {len(surgery_date_table)} patients.")
16471657

16481658
except FileNotFoundError as e:
16491659
error = e
16501660

1651-
logging.info(
1652-
f"Done processing {outcome_column}. Got {len(patient_table)} patient rows.",
1653-
)
1654-
16551661
def tensor_from_file(tm: TensorMap, hd5: h5py.File, dependents=None):
16561662
if error:
16571663
raise error
16581664

1659-
ecg_dates = _get_ecg_dates(tm, hd5)
1660-
dynamic, shape = _is_dynamic_shape(tm, len(ecg_dates))
1661-
categorical_data = np.zeros(shape, dtype=np.float32)
1662-
for i, ecg_date in enumerate(ecg_dates):
1663-
mrn_int = _hd5_filename_to_mrn_int(hd5.filename)
1664-
if mrn_int not in patient_table:
1665-
raise KeyError(f"MRN not in STS outcomes CSV")
1666-
1667-
path = _make_hd5_path(tm, ecg_date, 'acquisitiondate')
1668-
ecg_date = _partners_str2date(
1669-
decompress_data(
1670-
data_compressed=hd5[path][()],
1671-
dtype=hd5[path].attrs["dtype"],
1672-
),
1673-
)
1665+
mrn_int = _hd5_filename_to_mrn_int(hd5.filename)
1666+
if mrn_int not in surgery_date_table:
1667+
raise KeyError(f"MRN not in STS outcomes CSV")
16741668

1675-
# Convert ecg_date from datetime.date to datetime.datetime
1676-
ecg_date = datetime.datetime.combine(ecg_date, datetime.time.min)
1669+
ecg_dates = list(hd5[tm.path_prefix])
1670+
tensor = np.zeros(tm.shape, dtype=np.float32)
1671+
for dtm in tm.dependent_map:
1672+
dependents[tm.dependent_map[dtm]] = np.zeros(tm.dependent_map[dtm].shape, dtype=np.float32)
1673+
ecg_date = _date_in_window_from_dates(ecg_dates, surgery_date_table[mrn_int], day_window)
1674+
for cm in tm.channel_map:
1675+
path = _make_hd5_path(tm, ecg_date, cm)
1676+
voltage = decompress_data(data_compressed=hd5[path][()], dtype=hd5[path].attrs['dtype'])
1677+
voltage = _resample_voltage(voltage, tm.shape[0])
1678+
tensor[..., tm.channel_map[cm]] = voltage
1679+
if population_normalize is not None:
1680+
tensor /= population_normalize
16771681

1678-
# If the date of surgery - date of ECG is > time window, skip it
1679-
if date_surg_table[mrn_int] - ecg_date > datetime.timedelta(days=day_window):
1680-
raise ValueError(f"ECG out of time window")
1682+
for dtm in tm.dependent_map:
1683+
dependents[tm.dependent_map[dtm]][outcome_table[dtm][mrn_int]] = 1.0
16811684

1682-
categorical_data[(i, patient_table[mrn_int]) if dynamic else (patient_table[mrn_int],)] = 1.0
1683-
return categorical_data
1685+
return tensor
16841686
return tensor_from_file
16851687

16861688

@@ -1689,37 +1691,55 @@ def build_cardiac_surgery_tensor_maps(
16891691
) -> Dict[str, TensorMap]:
16901692
name2tensormap: Dict[str, TensorMap] = {}
16911693
outcome2column = {
1692-
"death": "mtopd",
1693-
"stroke": "cnstrokp",
1694-
"renal_failure": "crenfail",
1695-
"prolonged_ventilation": "crenfail",
1696-
"dsw_infection": "deepsterninf",
1697-
"reoperation": "reop",
1698-
"any_morbidity": "anymorbidity",
1699-
"long_stay": "llos",
1694+
"sts_death": "mtopd",
1695+
"sts_stroke": "cnstrokp",
1696+
"sts_renal_failure": "crenfail",
1697+
"sts_prolonged_ventilation": "crenfail",
1698+
"sts_dsw_infection": "deepsterninf",
1699+
"sts_reoperation": "reop",
1700+
"sts_any_morbidity": "anymorbidity",
1701+
"sts_long_stay": "llos",
17001702
}
17011703

1704+
dependent_maps = {}
17021705
for outcome in outcome2column:
1703-
name = f"outcome_{outcome}"
1704-
if name in needed_tensor_maps:
1705-
tensor_from_file_fxn = build_cardiac_surgery_outcome_tensor_from_file(
1706-
file_name=CARDIAC_SURGERY_OUTCOMES_CSV,
1707-
outcome_column=outcome2column[outcome],
1708-
day_window=30,
1709-
)
1710-
name2tensormap[name] = TensorMap(
1711-
name,
1712-
Interpretation.CATEGORICAL,
1713-
path_prefix=PARTNERS_PREFIX,
1714-
channel_map=_outcome_channels(outcome),
1715-
tensor_from_file=tensor_from_file_fxn,
1716-
time_series_limit=0,
1717-
)
1718-
name2tensormap[f'{name}_newest'] = TensorMap(
1719-
f'{name}_newest',
1720-
Interpretation.CATEGORICAL,
1721-
path_prefix=PARTNERS_PREFIX,
1722-
channel_map=_outcome_channels(outcome),
1723-
tensor_from_file=tensor_from_file_fxn,
1724-
)
1706+
channel_map = _outcome_channels(outcome)
1707+
dependent_maps[outcome] = TensorMap(outcome, Interpretation.CATEGORICAL, path_prefix=PARTNERS_PREFIX, channel_map=channel_map)
1708+
1709+
name = 'ecg_2500_sts'
1710+
if name in needed_tensor_maps:
1711+
tensor_from_file_fxn = build_cardiac_surgery_outcome_tensor_from_file(
1712+
file_name=CARDIAC_SURGERY_OUTCOMES_CSV,
1713+
outcome2column=outcome2column,
1714+
population_normalize=2000,
1715+
day_window=30,
1716+
)
1717+
name2tensormap[name] = TensorMap(
1718+
name,
1719+
shape=(2500, 12),
1720+
path_prefix=PARTNERS_PREFIX,
1721+
dependent_map=dependent_maps,
1722+
channel_map=ECG_REST_AMP_LEADS,
1723+
tensor_from_file=tensor_from_file_fxn,
1724+
)
1725+
name = 'ecg_5000_sts'
1726+
if name in needed_tensor_maps:
1727+
tensor_from_file_fxn = build_cardiac_surgery_outcome_tensor_from_file(
1728+
file_name=CARDIAC_SURGERY_OUTCOMES_CSV,
1729+
outcome2column=outcome2column,
1730+
population_normalize=2000,
1731+
day_window=30,
1732+
)
1733+
name2tensormap[name] = TensorMap(
1734+
name,
1735+
shape=(5000, 12),
1736+
path_prefix=PARTNERS_PREFIX,
1737+
dependent_map=dependent_maps,
1738+
channel_map=ECG_REST_AMP_LEADS,
1739+
tensor_from_file=tensor_from_file_fxn,
1740+
)
1741+
for outcome in outcome2column:
1742+
if outcome in needed_tensor_maps:
1743+
name2tensormap[outcome] = dependent_maps[outcome]
1744+
17251745
return name2tensormap

scripts/tf.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ Attempting to run Docker with
127127
--rm \
128128
--ipc=host \
129129
-v ${WORKDIR}/:${WORKDIR}/ \
130+
-v ${HOME}/:${HOME}/ \
130131
${MOUNTS} \
131132
${DOCKER_IMAGE} /bin/bash -c "pip install ${WORKDIR}; ${PYTHON_COMMAND} ${PYTHON_ARGS}"
132133
LAUNCH_MESSAGE
@@ -136,5 +137,6 @@ ${GPU_DEVICE} \
136137
--rm \
137138
--ipc=host \
138139
-v ${WORKDIR}/:${WORKDIR}/ \
140+
-v ${HOME}/:${HOME}/ \
139141
${MOUNTS} \
140142
${DOCKER_IMAGE} /bin/bash -c "pip install ${WORKDIR}; ${PYTHON_COMMAND} ${PYTHON_ARGS}"

0 commit comments

Comments
 (0)