Skip to content

Commit 3c011e7

Browse files
improving performance in hiv module (#830)
* "profiling for 1 month" * "transfer scale and shape calculation out od the loop" * "calculate scale and shape with linear model for all ages" * "uses linear model to calculate mean months between aids and death depending on age" * "modifies get_time_from_infection_to_aids to do vectorised calculations" * "modifies if statement in get_time_from_infection_to_aids to avoid tests failing but adds redundancy" * "removes redundant comments" * Update hiv.py * Update hiv.py * "updates formatting" * Update hiv.py * Update src/scripts/profiling/scale_run.py Co-authored-by: Matt Graham <[email protected]> * Update src/scripts/profiling/scale_run.py Co-authored-by: Matt Graham <[email protected]> * "restructures get_time_from_infection_to_aids into two seperate functions: 1) linear model parameter calculations, 2) sampling" * "changes transformation to list" * Avoid repeated dataframe access * Add comment explaining shape param special case * Resample only necessary subset + rename functions --------- Co-authored-by: Matt Graham <[email protected]>
1 parent a5912b3 commit 3c011e7

File tree

1 file changed

+83
-94
lines changed

1 file changed

+83
-94
lines changed

src/tlo/methods/hiv.py

Lines changed: 83 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -452,7 +452,9 @@ def pre_initialise_population(self):
452452
"age_years",
453453
conditions_are_mutually_exclusive=True,
454454
conditions_are_exhaustive=True)
455-
.when("<20", p["infection_to_death_weibull_scale_1519"])
455+
.when("==0", p["mean_survival_for_infants_infected_prior_to_birth"])
456+
.when(".between(1,4)", p["infection_to_death_infant_infection_after_birth_weibull_scale"])
457+
.when(".between(5, 19)", p["infection_to_death_weibull_scale_1519"])
456458
.when(".between(20, 24)", p["infection_to_death_weibull_scale_2024"])
457459
.when(".between(25, 29)", p["infection_to_death_weibull_scale_2529"])
458460
.when(".between(30, 34)", p["infection_to_death_weibull_scale_3034"])
@@ -467,7 +469,9 @@ def pre_initialise_population(self):
467469
"age_years",
468470
conditions_are_mutually_exclusive=True,
469471
conditions_are_exhaustive=True)
470-
.when("<20", p["infection_to_death_weibull_shape_1519"])
472+
.when("==0", 1) # Weibull with shape=1 equivalent to exponential distribution
473+
.when(".between(1,4)", p["infection_to_death_infant_infection_after_birth_weibull_shape"])
474+
.when(".between(5, 19)", p["infection_to_death_weibull_shape_1519"])
471475
.when(".between(20, 24)", p["infection_to_death_weibull_shape_2024"])
472476
.when(".between(25, 29)", p["infection_to_death_weibull_shape_2529"])
473477
.when(".between(30, 34)", p["infection_to_death_weibull_shape_3034"])
@@ -477,6 +481,16 @@ def pre_initialise_population(self):
477481
.when(">= 50", p["infection_to_death_weibull_shape_4549"])
478482
)
479483

484+
# -- Linear Model to give the mean months between aids and death depending on age
485+
self.lm["offset_parameter_for_months_from_aids_to_death"] = LinearModel.multiplicative(
486+
Predictor(
487+
"age_years",
488+
conditions_are_mutually_exclusive=True,
489+
conditions_are_exhaustive=True)
490+
.when("<5", p["mean_months_between_aids_and_death_infant"])
491+
.when(">=5", p["mean_months_between_aids_and_death"])
492+
)
493+
480494
# -- Linear Models for the Uptake of Services
481495
# Linear model that give the increase in likelihood of seeking a 'Spontaneous' Test for HIV
482496
# condition must be not on ART for test
@@ -840,23 +854,27 @@ def initialise_simulation(self, sim):
840854
# AIDS Onset Event for those who are infected but not yet AIDS and have not ever started ART
841855
# NB. This means that those on ART at the start of the simulation may not have an AIDS event --
842856
# like it happened at some point in the past
843-
844-
for person_id in before_aids_idx:
845-
# get days until develops aids, repeating sampling until a positive number is obtained.
846-
days_until_aids = 0
847-
while days_until_aids <= 0:
848-
days_since_infection = (
849-
self.sim.date - df.at[person_id, "hv_date_inf"]
850-
).days
851-
days_infection_to_aids = np.round(
852-
(self.get_time_from_infection_to_aids(person_id)).months * 30.5
857+
scale, shape, offset = self.get_time_from_infection_to_aids_distribution_parameters(before_aids_idx)
858+
days_infection_to_aids = self.sample_time_from_infection_to_aids_given_parameters(scale, shape, offset)
859+
days_since_infection = (self.sim.date - df.loc[before_aids_idx, "hv_date_inf"])
860+
# If any days_since_infection >= days_infection_to_aids are negative resample
861+
# these values until all are positive
862+
days_until_aids_is_negative = days_since_infection >= days_infection_to_aids
863+
while np.any(days_until_aids_is_negative):
864+
days_infection_to_aids[days_until_aids_is_negative] = (
865+
self.sample_time_from_infection_to_aids_given_parameters(
866+
scale[days_until_aids_is_negative],
867+
shape[days_until_aids_is_negative],
868+
offset[days_until_aids_is_negative],
853869
)
854-
days_until_aids = days_infection_to_aids - days_since_infection
855-
856-
date_onset_aids = self.sim.date + pd.DateOffset(days=days_until_aids)
870+
)
871+
days_until_aids_is_negative = days_since_infection >= days_infection_to_aids
872+
days_until_aids = days_infection_to_aids - days_since_infection
873+
date_onset_aids = self.sim.date + pd.to_timedelta(days_until_aids, unit='D')
874+
for person_id, date in zip(before_aids_idx, date_onset_aids):
857875
sim.schedule_event(
858876
HivAidsOnsetEvent(person_id=person_id, module=self, cause='AIDS_non_TB'),
859-
date=date_onset_aids,
877+
date=date,
860878
)
861879

862880
# Schedule the AIDS death events for those who have got AIDS already
@@ -1153,93 +1171,64 @@ def do_new_infection(self, person_id):
11531171
df.at[person_id, "hv_date_inf"] = self.sim.date
11541172

11551173
# Schedule AIDS onset events for this person
1156-
date_onset_aids = self.sim.date + self.get_time_from_infection_to_aids(
1157-
person_id=person_id
1174+
parameters = self.get_time_from_infection_to_aids_distribution_parameters(
1175+
[person_id]
11581176
)
1177+
date_onset_aids = (
1178+
self.sim.date
1179+
+ self.sample_time_from_infection_to_aids_given_parameters(*parameters)
1180+
).iloc[0]
11591181
self.sim.schedule_event(
1160-
event=HivAidsOnsetEvent(self, person_id, cause='AIDS_non_TB'), date=date_onset_aids
1182+
event=HivAidsOnsetEvent(self, person_id, cause='AIDS_non_TB'),
1183+
date=date_onset_aids,
11611184
)
11621185

1163-
def get_time_from_infection_to_aids(self, person_id):
1164-
"""Gives time between onset of infection and AIDS, returning a pd.DateOffset.
1165-
For those infected prior to, or at, birth: (this is a draw from an exponential distribution)
1166-
For those infected after birth but before reaching age 5.0 (this is drawn from a weibull distribution)
1167-
For adults: (this is a drawn from a weibull distribution (with scale depending on age);
1168-
* NB. It is further assumed that the time from aids to death is 18 months.
1186+
def sample_time_from_infection_to_aids_given_parameters(self, scale, shape, offset):
1187+
"""Generate time(s) between onset of infection and AIDS as Pandas time deltas.
1188+
1189+
The times are generated from translated Weibull distributions discretised to
1190+
an integer number of months.
1191+
1192+
:param scale: Scale parameters of Weibull distributions (unit: years).
1193+
:param shape: Shape parameters of Weibull distributions.
1194+
:param offset: Offset to (negatively) shift Weibull variable by (unit: months).
1195+
1196+
:return: Generated time deltas.
11691197
"""
11701198

1171-
df = self.sim.population.props
1172-
age = df.at[person_id, "age_exact_years"]
1173-
p = self.parameters
1199+
months_to_death = self.rng.weibull(shape) * scale * 12
1200+
months_to_aids = np.round(months_to_death - offset).clip(0).astype(int)
11741201

1175-
if age == 0.0:
1176-
# The person is infected prior to, or at, birth:
1177-
months_to_death = int(self.rng.exponential(
1178-
scale=p["mean_survival_for_infants_infected_prior_to_birth"]
1179-
)
1180-
* 12,
1181-
)
1202+
return pd.to_timedelta(months_to_aids * 30.5, unit='D')
11821203

1183-
months_to_aids = int(
1184-
max(
1185-
0.0,
1186-
np.round(
1187-
months_to_death
1188-
- self.parameters["mean_months_between_aids_and_death_infant"]
1189-
),
1190-
)
1191-
)
1192-
elif age < 5.0:
1193-
# The person is infected after birth but before age 5.0:
1194-
months_to_death = int(
1195-
max(
1196-
0.0,
1197-
self.rng.weibull(
1198-
p[
1199-
"infection_to_death_infant_infection_after_birth_weibull_shape"
1200-
]
1201-
)
1202-
* p["infection_to_death_infant_infection_after_birth_weibull_scale"]
1203-
* 12,
1204-
)
1205-
)
1206-
months_to_aids = int(
1207-
max(
1208-
0.0,
1209-
np.round(
1210-
months_to_death
1211-
- self.parameters["mean_months_between_aids_and_death_infant"]
1212-
),
1213-
)
1214-
)
1215-
else:
1216-
# The person is infected after age 5.0
1217-
# - get the shape parameters (unit: years)
1218-
scale = (
1219-
self.lm["scale_parameter_for_infection_to_death"].predict(
1220-
self.sim.population.props.loc[[person_id]]
1221-
).values[0]
1222-
)
1223-
# - get the scale parameter (unit: years)
1224-
shape = (
1225-
self.lm["shape_parameter_for_infection_to_death"].predict(
1226-
self.sim.population.props.loc[[person_id]]
1227-
).values[0]
1228-
)
1229-
# - draw from Weibull and convert to months
1230-
months_to_death = self.rng.weibull(shape) * scale * 12
1231-
# - compute months to aids, which is somewhat shorter than the months to death
1232-
months_to_aids = int(
1233-
max(
1234-
0.0,
1235-
np.round(
1236-
months_to_death
1237-
- self.parameters["mean_months_between_aids_and_death"]
1238-
),
1239-
)
1240-
)
1204+
def get_time_from_infection_to_aids_distribution_parameters(self, person_ids):
1205+
"""Compute per-person parameters of distribution of time from infection to aids.
1206+
1207+
Evaluates three linear models which output age specific scale, shape and offset
1208+
parameters for the (translated) Weibull distribution used to generate the time
1209+
from infection to aids for an individual.
1210+
1211+
For those infected prior to, or at, birth, a Weibull distribution with shape
1212+
parameter 1 (equivalent to an exponential distribution) is used.
12411213
1242-
return pd.DateOffset(months=months_to_aids)
1214+
For those infected after birth a Weibull distribution with both shape and
1215+
scale depending on age is used.
1216+
1217+
:param person_ids: Iterable of ID indices of individuals to get parameters for.
1218+
1219+
:return: Per-person parameters as a 3-tuple ``(scale, shape, offset)`` of
1220+
``pandas.Series`` objects.
1221+
"""
1222+
subpopulation = self.sim.population.props.loc[person_ids]
1223+
# get the scale parameters (unit: years)
1224+
scale = self.lm["scale_parameter_for_infection_to_death"].predict(subpopulation)
1225+
# get the shape parameter
1226+
shape = self.lm["shape_parameter_for_infection_to_death"].predict(subpopulation)
1227+
# get the mean months between aids and death (unit: months)
1228+
offset = self.lm["offset_parameter_for_months_from_aids_to_death"].predict(
1229+
subpopulation
1230+
)
1231+
return scale, shape, offset
12431232

12441233
def get_time_from_aids_to_death(self):
12451234
"""Gives time between onset of AIDS and death, returning a pd.DateOffset.

0 commit comments

Comments
 (0)