Skip to content

Commit a384c90

Browse files
authored
Malaria: use more efficient calls for some Pandas operations (#1590)
* Replace some basic number counting with more efficient pandas access * Summing boolean array gives count
1 parent ae59120 commit a384c90

File tree

1 file changed

+9
-14
lines changed

1 file changed

+9
-14
lines changed

src/tlo/methods/malaria.py

+9-14
Original file line numberDiff line numberDiff line change
@@ -417,11 +417,8 @@ def malaria_poll2(self, population):
417417
def _draw_incidence_for(_col, _where):
418418
"""a helper function to perform random draw for selected individuals on column of probabilities"""
419419
# create an index from the individuals to lookup entries in the current incidence table
420-
district_age_lookup = (
421-
df[_where]
422-
.set_index(['district_num_of_residence', 'ma_age_edited'])
423-
.index
424-
)
420+
district_age_lookup = pd.MultiIndex.from_frame(df.loc[_where, ['district_num_of_residence', 'ma_age_edited']])
421+
425422
# get the monthly incidence probabilities for these individuals
426423
monthly_prob = curr_inc.loc[district_age_lookup, _col]
427424
# update the index so it's the same as the original population dataframe for these individuals
@@ -1579,10 +1576,8 @@ def apply(self, population):
15791576
# infected in the last time-step, clinical and severe cases only
15801577
# incidence rate per 1000 person-years
15811578
# include those cases that have died in the case load
1582-
tmp = len(
1583-
df.loc[(df.ma_date_symptoms > (now - DateOffset(months=self.repeat)))]
1584-
)
1585-
pop = len(df[df.is_alive])
1579+
tmp = sum(df.ma_date_symptoms > (now - DateOffset(months=self.repeat)))
1580+
pop = sum(df.is_alive)
15861581

15871582
inc_1000py = ((tmp / pop) * 1000) if pop else 0
15881583

@@ -1594,7 +1589,7 @@ def apply(self, population):
15941589
]
15951590
)
15961591

1597-
pop2_10 = len(df[df.is_alive & (df.age_years.between(2, 10))])
1592+
pop2_10 = sum(df.is_alive & (df.age_years.between(2, 10)))
15981593
inc_1000py_2_10 = ((tmp2 / pop2_10) * 1000) if pop2_10 else 0
15991594

16001595
inc_1000py_hiv = 0 # if running without hiv/tb
@@ -1643,7 +1638,7 @@ def apply(self, population):
16431638
)
16441639

16451640
# population size - children
1646-
child2_10_pop = len(df[df.is_alive & (df.age_years.between(2, 10))])
1641+
child2_10_pop = sum(df.is_alive & (df.age_years.between(2, 10)))
16471642

16481643
# prevalence in children aged 2-10
16491644
child_prev = child2_10_inf / child2_10_pop if child2_10_pop else 0
@@ -1655,7 +1650,7 @@ def apply(self, population):
16551650
& ((df.ma_inf_type == 'clinical') | (df.ma_inf_type == 'severe'))
16561651
]
16571652
)
1658-
pop2 = len(df[df.is_alive])
1653+
pop2 = sum(df.is_alive)
16591654
prev_clin = total_clin / pop2
16601655

16611656
prev = {
@@ -1754,9 +1749,9 @@ def apply(self, population):
17541749

17551750
# ------------------------------------ PREVALENCE OF INFECTION ------------------------------------
17561751
infected = (
1757-
df[df.is_alive & df.ma_is_infected].groupby('district_num_of_residence').size()
1752+
df.district_num_of_residence[df.is_alive & df.ma_is_infected].value_counts()
17581753
)
1759-
pop = df[df.is_alive].groupby('district_num_of_residence').size()
1754+
pop = df.district_num_of_residence[df.is_alive].value_counts()
17601755
prev = infected / pop
17611756
prev_ed = prev.fillna(0)
17621757
assert prev_ed.all() >= 0 # checks

0 commit comments

Comments
 (0)