-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathequipment.py
254 lines (207 loc) · 13.4 KB
/
equipment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
import warnings
from collections import defaultdict
from typing import Counter, Iterable, Optional, Set, Union
import numpy as np
import pandas as pd
from tlo import logging
logger_summary = logging.getLogger("tlo.methods.healthsystem.summary")
class Equipment:
"""This is the Equipment Class. It maintains a current record of the availability of equipment in the
HealthSystem. It is expected that this is instantiated by the `HealthSystem` module.
The basic paradigm is that an `HSI_Event` can declare equipment that is required for delivering the healthcare
service that the `HSI_Event` represents. The `HSI_Event` uses `self.add_equipment()` to make these declaration,
with reference to the items of equipment that are defined in `ResourceFile_EquipmentCatalogue.csv`. (These
declaration can be in the form of the descriptor or the equipment item code). These declarations can be used when
the `HSI_Event` is created but before it is run (in `__init__`), or during execution of the HSI_Event (in `apply`).
As the HSI_Event can declare equipment that is required before it is run, the HealthSystem _can_ use this to
prevent an HSI_Event running if the equipment declared is not available. Note that for equipment that is declared
whilst the HSI_Event is running, there are no checks on availability, and the HSI_Event is allowed to continue
running even if equipment is declared is not available. For this reason, the `HSI_Event` should declare equipment
that is _essential_ for the healthcare service in its `__init__` method. If the logic inside the `apply` method
of the `HSI_Event` depends on the availability of equipment, then it can find the probability with which
item(s) will be available using `self.probability_equipment_available()`.
The data on the availability of equipment data refers to the proportion of facilities in a district of a
particular level (i.e., the `Facility_ID`) that do have that piece of equipment. In the model, we do not know
which actual facility the person is attending (there are many actual facilities grouped together into one
`Facility_ID` in the model). Therefore, the determination of whether equipment is available is made
probabilistically for the `HSI_Event` (i.e., the probability that the actual facility being attended by the
person has the equipment is represented by the proportion of such facilities that do have that equipment). It is
assumed that the probabilities of each item being available are independent of one other (so that the
probability of all items being available is the product of the probabilities for each item). This probabilistic
determination of availability is only done _once_ for the `HSI_Event`: i.e., if the equipment is determined to
not be available for the instance of the `HSI_Event`, then it will remain not available if the same event is
re-scheduled / re-entered into the HealthSystem queue. This represents that if the facility that a particular
person attends for the `HSI_Event` does not have the equipment available, then it will still not be available on
another day.
Where data on availability is not provided for an item, the probability of availability is inferred from the
average availability of other items in that `Facility_ID`. Likewise, the probability of an item being available
at `Facility_ID` is inferred from the average availability of that item at other facilities. If an item_code is
referred that is not recognised (not included in `catalogue`), a `UserWarning` is issued. If a facility_id is
referred that is not recognised (not included in `master_facilities_list`), an `Error` is raised.
:param: 'catalogue': The database of all recognised item_codes.
:param: `data_availability`: Specifies the probability with which each equipment (identified by an `item_code`) is
available at a facility level. Note that information is not necessarily provided for every item in the `catalogue`
or every facility_id in the `master_facilities_list`.
:param: `rng`: The Random Number Generator object to use for random numbers.
:param: `availability`: Determines the mode availability of the equipment. If 'default' then use the availability
specified in the `data_availability`; if 'none', then let no equipment be ever be available; if 'all', then all
equipment is always available.
:param `: `master_facilities_list`: The pd.DataFrame with the line-list of all the facilities in the HealthSystem.
"""
def __init__(
self,
catalogue: pd.DataFrame,
data_availability: pd.DataFrame,
rng: np.random,
master_facilities_list: pd.DataFrame,
availability: Optional[str] = "default",
) -> None:
# Store arguments
self.catalogue = catalogue
self.rng = rng
self.data_availability = data_availability
self.master_facilities_list = master_facilities_list
# Create internal storage structures
# - Probabilities of items being available at each facility_id
self._probabilities_of_items_available = pd.DataFrame()
# - Internal store of which items have been used at each facility_id This is of the form
# {facility_id: {item_code: count}}.
self._record_of_equipment_used_by_facility_id = defaultdict(Counter) # <-- Will be the
# - Data structures for quick look-ups for items and descriptors
self._item_code_lookup = self.catalogue.set_index('Item_Description')['Item_Code'].to_dict()
self._all_item_descriptors = set(self._item_code_lookup.keys())
self._all_item_codes = set(self._item_code_lookup.values())
# Initialise the internal stores of the probability with which equipment items that are available.
self._set_equipment_items_available(availability=availability)
def on_simulation_end(self):
"""Things to do when the simulation ends:
* Log (to the summary logger) the equipment that has been used.
"""
self.write_to_log()
def update_availability(self, availability: str) -> None:
"""Update the availability of equipment. This is expected to be called midway through the simulation if
the assumption of the equipment availability is changed."""
self._set_equipment_items_available(availability=availability)
def _set_equipment_items_available(self, availability: str):
"""Update internal store of probabilities of items of equipment being available. This is called at the beginning
of the simulation and whenever an update in `availability` is done by `update_availability`."""
# All facility_id in the simulation
all_fac_ids = self.master_facilities_list['Facility_ID'].unique()
# All equipment items in the catalogue
all_eq_items = self.catalogue["Item_Code"].unique()
# Create "full" dataset, where we force that there is probability of availability for every item_code at every
# observed facility
df = pd.Series(
index=pd.MultiIndex.from_product(
[all_fac_ids, all_eq_items], names=["Facility_ID", "Item_Code"]
),
data=float("nan"),
).combine_first(
self.data_availability.set_index(["Facility_ID", "Item_Code"])[
"Pr_Available"
]
)
# Merge in original dataset and use the mean in that facility_id to impute availability of missing item_codes
df = df.groupby("Facility_ID").transform(lambda x: x.fillna(x.mean()))
# ... and also impute availability for any facility_ids for which no data, based on all other facilities
df = df.groupby("Item_Code").transform(lambda x: x.fillna(x.mean()))
# Check no missing values
assert not df.isnull().any()
# Over-write these data if `availability` argument specifies that `none` or `all` items should be available
if availability == "default":
pass
elif availability == "all":
df = (df + 1).clip(upper=1.0) # All probabilities -> 1.0
elif availability == "none":
df = df.mul(0.0) # All probabilities -> 0.0
else:
raise KeyError(f"Unknown equipment availability specified: {availability=}")
# Save
self._probabilities_of_items_available = df
def parse_items(self, items: Union[int, str, Iterable[int | str]]) -> Set[int]:
"""Parse equipment items specified as an item_code (integer), an item descriptor (string), or an iterable of
either, and return as a set of item_code (integers). For any item_code/descriptor not recognised, a
`UserWarning` is issued."""
def check_item_codes_recognised(item_codes: set[int]):
if not item_codes.issubset(self._all_item_codes):
warnings.warn(f'Item code(s) "{item_codes}" not recognised.')
def check_item_descriptors_recognised(item_descriptors: set[str]):
if not item_descriptors.issubset(self._all_item_descriptors):
warnings.warn(f'Item descriptor(s) "{item_descriptors}" not recognised.')
# Make into a set if it is not one already
if isinstance(items, (str, int)):
items = set([items])
else:
items = set(items)
items_are_ints = all(isinstance(element, int) for element in items)
if items_are_ints:
check_item_codes_recognised(items)
# In the return, any unrecognised item_codes are silently ignored.
return items.intersection(self._all_item_codes)
else:
check_item_descriptors_recognised(items) # Warn for any unrecognised descriptors
# In the return, any unrecognised descriptors are silently ignored.
return set(filter(lambda item: item is not None, map(self._item_code_lookup.get, items)))
def probability_all_equipment_available(
self, item_codes: Set[int], facility_id: int
) -> float:
"""Returns the probability that all the equipment item_codes are available. It does so by looking at the
probabilities of each equipment item being available and multiplying these together to find the probability
that _all_ are available."""
try:
return self._probabilities_of_items_available.loc[(facility_id, list(item_codes))].prod()
except KeyError:
raise ValueError(f'Not recognised {facility_id=}')
def is_all_items_available(
self, item_codes: Set[int], facility_id: int
) -> bool:
"""Determine if all equipment items are available at the given facility_id. Returns True only if all items are
available at the facility_id, otherwise returns False."""
return self.rng.random_sample() < self.probability_all_equipment_available(item_codes=item_codes,
facility_id=facility_id)
def record_use_of_equipment(
self, item_codes: Set[int], facility_id: int
) -> None:
"""Update internal record of the usage of items at equipment at the specified facility_id."""
self._record_of_equipment_used_by_facility_id[facility_id].update(item_codes)
def write_to_log(self) -> None:
"""Write to the log:
* Summary of the equipment that was _ever_ used at each district/facility level.
Note that the info-level health system logger (key: `hsi_event_counts`) contains logging of the equipment used
in each HSI event (if finer splits are needed). Alternatively, different aggregations could be created here for
the summary logger, using the same pattern as used here.
"""
mfl = self.master_facilities_list
def set_of_keys_or_empty_set(x: Union[set, dict]):
if isinstance(x, set):
return x
elif isinstance(x, dict):
return set(x.keys())
else:
return set()
set_of_equipment_ever_used_at_each_facility_id = pd.Series({
fac_id: set_of_keys_or_empty_set(self._record_of_equipment_used_by_facility_id.get(fac_id, set()))
for fac_id in mfl['Facility_ID']
}, name='EquipmentEverUsed').astype(str)
output = mfl.merge(
set_of_equipment_ever_used_at_each_facility_id,
left_on='Facility_ID',
right_index=True,
how='left',
).drop(columns=['Facility_ID', 'Facility_Name'])
# Log multi-row data-frame
for _, row in output.iterrows():
logger_summary.info(
key='EquipmentEverUsed_ByFacilityID',
description='For each facility_id (the set of facilities of the same level in a district), the set of'
'equipment items that are ever used.',
data=row.to_dict(),
)
def lookup_item_codes_from_pkg_name(self, pkg_name: str) -> Set[int]:
"""Convenience function to find the set of item_codes that are grouped under a package name in the catalogue.
It is expected that this is used by the disease module once and then the resulting equipment item_codes are
saved on the module."""
df = self.catalogue
if pkg_name not in df['Pkg_Name'].unique():
raise ValueError(f'That Pkg_Name is not in the catalogue: {pkg_name=}')
return set(df.loc[df['Pkg_Name'] == pkg_name, 'Item_Code'].values)