@@ -73,9 +73,10 @@ class Demography(Module):
73
73
The core demography module.
74
74
"""
75
75
76
- def __init__ (self , name = None , resourcefilepath = None ):
76
+ def __init__ (self , name = None , resourcefilepath = None , equal_allocation_by_district : bool = False ):
77
77
super ().__init__ (name )
78
78
self .resourcefilepath = resourcefilepath
79
+ self .equal_allocation_by_district = equal_allocation_by_district
79
80
self .initial_model_to_data_popsize_ratio = None # will store scaling factor
80
81
self .popsize_by_year = dict () # will store total population size each year
81
82
self .causes_of_death = dict () # will store all the causes of death that are possible in the simulation
@@ -245,6 +246,8 @@ def initialise_population(self, population):
245
246
init_pop ,
246
247
max_age = self .parameters ['max_age_initial' ]
247
248
)
249
+ if self .equal_allocation_by_district :
250
+ init_pop = self ._edit_init_pop_so_that_equal_number_in_each_district (init_pop )
248
251
249
252
# randomly pick from the init_pop sheet, to allocate characteristic to each person in the df
250
253
demog_char_to_assign = init_pop .iloc [self .rng .choice (init_pop .index .values ,
@@ -381,6 +384,56 @@ def _edit_init_pop_to_prevent_persons_greater_than_max_age(self, df, max_age: in
381
384
_df .prob = _df .prob / _df .prob .sum () # Rescale `prob` so that it sums to 1.0
382
385
return _df .reset_index (drop = True )
383
386
387
+ @staticmethod
388
+ def _edit_init_pop_so_that_equal_number_in_each_district (df ) -> pd .DataFrame :
389
+ """Return an edited version of the `pd.DataFrame` describing the probability of persons in the population being
390
+ created with certain characteristics to reflect the constraint of there being an equal number of persons
391
+ in each district."""
392
+
393
+ # Get breakdown of Sex/Age within each district
394
+ district_nums = df ['District_Num' ].unique ()
395
+
396
+ # Target size of each district
397
+ target_size_for_district = df ['Count' ].sum () / len (district_nums )
398
+
399
+ # Make new version (a copy) of the dataframe
400
+ df_new = df .copy ()
401
+
402
+ for district_num in district_nums :
403
+ mask_for_district = df ['District_Num' ] == district_num
404
+ # For each district, compute the age/sex breakdown, and use this with target_size to create updated `Count`
405
+ # values
406
+ df_new .loc [mask_for_district , 'Count' ] = target_size_for_district * (
407
+ df .loc [mask_for_district , 'Count' ] / df .loc [mask_for_district , 'Count' ].sum ()
408
+ )
409
+
410
+ # Recompute "prob" column (i.e. the probability of being in that category)
411
+ df_new ["prob" ] = df_new ['Count' ] / df_new ['Count' ].sum ()
412
+
413
+ # Check that the resulting dataframe is of the same size/shape as the original; that Count and prob make
414
+ # sense; and that we have preserved the age/sex breakdown within each district
415
+ def all_elements_identical (x ):
416
+ return np .allclose (x , x [0 ])
417
+
418
+ assert df ['Count' ].sum () == df_new ['Count' ].sum ()
419
+ assert 1.0 == df ['prob' ].sum () == df_new ['prob' ].sum ()
420
+ assert all_elements_identical (df_new .groupby ('District_Num' )['prob' ].sum ().values )
421
+
422
+ def get_age_sex_breakdown_in_district (dat , district_num ):
423
+ return (
424
+ dat .loc [df ['District_Num' ] == district_num ].groupby (['Age' , 'Sex' ])['prob' ].sum ()
425
+ / dat .loc [df ['District_Num' ] == district_num , 'prob' ].sum ()
426
+ )
427
+
428
+ for _d in district_nums :
429
+ pd .testing .assert_series_equal (
430
+ get_age_sex_breakdown_in_district (df , _d ),
431
+ get_age_sex_breakdown_in_district (df_new , _d )
432
+ )
433
+
434
+ # Return the new dataframe
435
+ return df_new
436
+
384
437
def process_causes_of_death (self ):
385
438
"""
386
439
1) Register all causes of deaths defined by Module
0 commit comments