Skip to content

Commit b844efe

Browse files
tdm32tbhallett
andauthored
allocate population equally across districts at initialisation (#1331)
* fix failing test * fix unused import statement * edit optional dependency in demography.py * roll back simulation.py * put kwarg in demography.py * update test * roll back incidental change * factorize calc * add is_alive * roll back incidental changes * make static for clarity * roll back incidental changes --------- Co-authored-by: Tim Hallett <[email protected]>
1 parent 3bdba8c commit b844efe

File tree

2 files changed

+82
-1
lines changed

2 files changed

+82
-1
lines changed

src/tlo/methods/demography.py

+54-1
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,10 @@ class Demography(Module):
7373
The core demography module.
7474
"""
7575

76-
def __init__(self, name=None, resourcefilepath=None):
76+
def __init__(self, name=None, resourcefilepath=None, equal_allocation_by_district: bool = False):
7777
super().__init__(name)
7878
self.resourcefilepath = resourcefilepath
79+
self.equal_allocation_by_district = equal_allocation_by_district
7980
self.initial_model_to_data_popsize_ratio = None # will store scaling factor
8081
self.popsize_by_year = dict() # will store total population size each year
8182
self.causes_of_death = dict() # will store all the causes of death that are possible in the simulation
@@ -245,6 +246,8 @@ def initialise_population(self, population):
245246
init_pop,
246247
max_age=self.parameters['max_age_initial']
247248
)
249+
if self.equal_allocation_by_district:
250+
init_pop = self._edit_init_pop_so_that_equal_number_in_each_district(init_pop)
248251

249252
# randomly pick from the init_pop sheet, to allocate characteristic to each person in the df
250253
demog_char_to_assign = init_pop.iloc[self.rng.choice(init_pop.index.values,
@@ -381,6 +384,56 @@ def _edit_init_pop_to_prevent_persons_greater_than_max_age(self, df, max_age: in
381384
_df.prob = _df.prob / _df.prob.sum() # Rescale `prob` so that it sums to 1.0
382385
return _df.reset_index(drop=True)
383386

387+
@staticmethod
388+
def _edit_init_pop_so_that_equal_number_in_each_district(df) -> pd.DataFrame:
389+
"""Return an edited version of the `pd.DataFrame` describing the probability of persons in the population being
390+
created with certain characteristics to reflect the constraint of there being an equal number of persons
391+
in each district."""
392+
393+
# Get breakdown of Sex/Age within each district
394+
district_nums = df['District_Num'].unique()
395+
396+
# Target size of each district
397+
target_size_for_district = df['Count'].sum() / len(district_nums)
398+
399+
# Make new version (a copy) of the dataframe
400+
df_new = df.copy()
401+
402+
for district_num in district_nums:
403+
mask_for_district = df['District_Num'] == district_num
404+
# For each district, compute the age/sex breakdown, and use this with target_size to create updated `Count`
405+
# values
406+
df_new.loc[mask_for_district, 'Count'] = target_size_for_district * (
407+
df.loc[mask_for_district, 'Count'] / df.loc[mask_for_district, 'Count'].sum()
408+
)
409+
410+
# Recompute "prob" column (i.e. the probability of being in that category)
411+
df_new["prob"] = df_new['Count'] / df_new['Count'].sum()
412+
413+
# Check that the resulting dataframe is of the same size/shape as the original; that Count and prob make
414+
# sense; and that we have preserved the age/sex breakdown within each district
415+
def all_elements_identical(x):
416+
return np.allclose(x, x[0])
417+
418+
assert df['Count'].sum() == df_new['Count'].sum()
419+
assert 1.0 == df['prob'].sum() == df_new['prob'].sum()
420+
assert all_elements_identical(df_new.groupby('District_Num')['prob'].sum().values)
421+
422+
def get_age_sex_breakdown_in_district(dat, district_num):
423+
return (
424+
dat.loc[df['District_Num'] == district_num].groupby(['Age', 'Sex'])['prob'].sum()
425+
/ dat.loc[df['District_Num'] == district_num, 'prob'].sum()
426+
)
427+
428+
for _d in district_nums:
429+
pd.testing.assert_series_equal(
430+
get_age_sex_breakdown_in_district(df, _d),
431+
get_age_sex_breakdown_in_district(df_new, _d)
432+
)
433+
434+
# Return the new dataframe
435+
return df_new
436+
384437
def process_causes_of_death(self):
385438
"""
386439
1) Register all causes of deaths defined by Module

tests/test_demography.py

+28
Original file line numberDiff line numberDiff line change
@@ -374,3 +374,31 @@ def test_ageing_of_old_people_up_to_max_age(simulation):
374374
# All persons should have died, with a cause of 'Other'
375375
assert not df.loc[ever_alive].is_alive.any()
376376
assert (df.loc[ever_alive, 'cause_of_death'] == 'Other').all()
377+
378+
379+
def test_equal_allocation_by_district(seed):
380+
"""
381+
Check when key-word argument `equal_allocation_by_district=True` that each district has an identical population size
382+
"""
383+
384+
resourcefilepath = Path(os.path.dirname(__file__)) / '../resources'
385+
sim = Simulation(start_date=start_date, seed=seed)
386+
sim.register(
387+
demography.Demography(
388+
resourcefilepath=resourcefilepath,
389+
equal_allocation_by_district=True,
390+
)
391+
)
392+
population_per_district = 10_000
393+
number_of_districts = len(sim.modules['Demography'].districts)
394+
popsize = number_of_districts * population_per_district
395+
sim.make_initial_population(n=popsize)
396+
sim.simulate(end_date=sim.start_date) # Simulate for zero days
397+
398+
# check population size
399+
df = sim.population.props
400+
assert sum(df.is_alive) == popsize
401+
402+
# check total within each district is (close to being) identical and matches the target population of each district
403+
pop_size_by_district = df.loc[df.is_alive].groupby('district_of_residence').size()
404+
assert np.allclose(pop_size_by_district.values, pop_size_by_district, rtol=0.05)

0 commit comments

Comments
 (0)