Skip to content

Commit 7c20a2e

Browse files
tbhallettmnjowe
andauthored
Update calibrations stats for COPD (#1087)
* pseudo-code * drop deaths * calibrating rate and relative rate of deaths * uncommenting some blocks of code * addressing key not found errors when running both COPD analyses and calibrations. Logging COPD logs in copd calibration * year specific calibration * add write-up --------- Co-authored-by: mnjowe <[email protected]>
1 parent b678823 commit 7c20a2e

File tree

3 files changed

+134
-39
lines changed

3 files changed

+134
-39
lines changed

Diff for: docs/write-ups/Copd.docx

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:c0c065a2c806bfdfa5d0eaabba8f123f03a9011ce725f11e4825b30af1a13197
3-
size 1416900
2+
oid sha256:bf9ab3048642b62fc782f1d7da017fb34e515bbdf9f436c410613fcb93b90a49
3+
size 761798

Diff for: src/scripts/copd_analyses/analysis_copd_prevalence_and_deaths.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -267,10 +267,13 @@ def plot_lung_function_categories_by_age_group(self):
267267
def plot_copd_deaths_by_lungfunction(self):
268268
""" a function to plot COPD deaths by lung function/obstruction """
269269
# get COPD deaths by lung function from copd logs
270-
deaths_lung_func = self.__logs_dict['copd_deaths_lung_func']
270+
deaths_lung_func = \
271+
parse_log_file(self.__logfile_path)['tlo.methods.demography.detail']['properties_of_deceased_persons']
272+
_plot_deaths = deaths_lung_func.loc[(deaths_lung_func.cause_of_death == 'COPD_cat5') |
273+
(deaths_lung_func.cause_of_death == 'COPD_cat6')]
271274

272275
# group by date and lung function
273-
deaths_grouped = deaths_lung_func.groupby(['date', 'lung_function']).size()
276+
deaths_grouped = _plot_deaths.groupby(['date', 'ch_lungfunction']).size()
274277
unstack_df = deaths_grouped.unstack()
275278
plot_lung_func_deaths = unstack_df.groupby(unstack_df.index.year).sum()
276279
plot_lung_func_deaths = plot_lung_func_deaths.apply(lambda row: row / row.sum(), axis=1)
@@ -297,6 +300,7 @@ def plot_modal_gbd_deaths_by_gender(self):
297300
fig, axs = plt.subplots(nrows=1, ncols=2, sharey=True, sharex=True)
298301
for _col, sex in enumerate(('M', 'F')):
299302
plot_df = death_compare.loc[(['2010-2014', '2015-2019'], sex, slice(None), 'COPD')].groupby('period').sum()
303+
plot_df = plot_df.loc[['2010-2014', '2015-2019']]
300304
ax = plot_df['model'].plot.bar(color=self.__plot_colors[6], label='Model', ax=axs[_col], rot=0)
301305
ax.errorbar(x=plot_df['model'].index, y=plot_df.GBD_mean,
302306
yerr=[plot_df.GBD_lower, plot_df.GBD_upper],

Diff for: src/scripts/copd_analyses/copd_calibrations.py

+126-35
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
import numpy as np
66
import pandas as pd
77
from matplotlib import pyplot as plt
8+
from matplotlib.font_manager import FontProperties
89

9-
from tlo import Date, Simulation
10+
from tlo import Date, Simulation, logging
1011
from tlo.analysis.utils import parse_log_file, unflatten_flattened_multi_index_in_logging
1112
from tlo.methods import (
1213
copd,
@@ -100,15 +101,17 @@ def copd_prevalence(self):
100101
)
101102
ax.legend(["modal", "observed data"])
102103
_col_counter += 1 # increment column counter
104+
plt.figtext(0.5, 0.01, "Prevalence of mild, moderate, severe, very severe COPD at mean age 49 (SD 17 years)*",
105+
ha="center", bbox={"facecolor": "grey", "alpha": 0.5, "pad": 5})
103106
plt.show()
104107

105108
def rate_of_death_by_lungfunction_category(self):
106109
"""Make the comparison to Alupo P et al. (2021) study. This study found that the rate of COPD death was as
107110
follows:
108-
Persons with Mild COPD Stage: 3.8 deaths per 100 person-years
109-
Persons with Moderate COPD Stage: 5.1 deaths per 100 person-years
110-
Persons with Severe COPD Stage: 15.3 deaths per 100 person-years
111-
Persons with Very Severe COPD Stage: 27.8 deaths per 100 person-years
111+
Persons with Mild COPD Stage: 3.8 all-cause deaths per 100 person-years
112+
Persons with Moderate COPD Stage: 5.1 all-cause deaths per 100 person-years
113+
Persons with Severe COPD Stage: 15.3 all-cause deaths per 100 person-years
114+
Persons with Very Severe COPD Stage: 27.8 all-cause deaths per 100 person-years
112115
We will compare this with fraction of people that die each year, averaged over many years, as an estimate of the
113116
risk of persons in each stage dying.
114117
We assume that the disease stages in the model correspond to the study as:
@@ -120,33 +123,32 @@ def rate_of_death_by_lungfunction_category(self):
120123

121124
# Get the number of deaths each year in each category of lung function
122125
output = parse_log_file(self.__logfile_path) # parse output file
123-
demog = output['tlo.methods.demography']['death'] # read deaths from demography module
124-
125-
model = demog.assign(
126-
year=lambda x: x['date'].dt.year).groupby(['year', 'sex', 'age', 'cause'])['person_id'].count()
127-
128-
# extract copd deaths:
129-
copd_deaths = pd.concat(
130-
{
131-
'mild': (
132-
model.loc[(slice(None), slice(None), slice(None), ['COPD_cat1'])].groupby('year').sum()
133-
+ model.loc[(slice(None), slice(None), slice(None), ['COPD_cat2'])].groupby('year').sum()
134-
),
135-
'moderate': (
136-
model.loc[(slice(None), slice(None), slice(None), ["COPD_cat3"])].groupby("year").sum()
137-
+ model.loc[(slice(None), slice(None), slice(None), ["COPD_cat4"])].groupby("year").sum()
138-
),
139-
'severe': model.loc[(slice(None), slice(None), slice(None), ['COPD_cat5'])].groupby('year').sum(),
140-
'very_severe': model.loc[(slice(None), slice(None), slice(None), ['COPD_cat6'])].groupby('year').sum(),
141-
},
142-
axis=1
143-
).fillna(0).astype(float)
126+
127+
# read deaths from demography detail logger
128+
demog = output['tlo.methods.demography.detail']['properties_of_deceased_persons']
129+
130+
# only consider deaths from individuals above 30
131+
demog = demog.loc[demog.age_years > 30]
132+
133+
# assign a function that groups deaths by year and lung function thereafter do count
134+
all_deaths = demog.assign(
135+
year=lambda x: x['date'].dt.year,
136+
cat=lambda x: x['ch_lungfunction']).groupby(['year', 'cat'])['cause_of_death'].count()
137+
138+
# re-construct the dataframe by transforming lung function categories into columns
139+
all_deaths_df = all_deaths.unstack().assign(
140+
none=lambda x: x[0],
141+
mild=lambda x: x[1] + x[2],
142+
moderate=lambda x: x[3] + x[4],
143+
severe=lambda x: x[5],
144+
very_severe=lambda x: x[6]).drop(columns=[0, 1, 2, 3, 4, 5, 6])
144145

145146
# Get the number of person each year in each category of lung function (irrespective of sex/age/smokingstatus)
146147
# average within the year
147148
prev = self.construct_dfs()['copd_prevalence']
148149
prev = (prev.groupby(axis=1, by=prev.columns.droplevel([0, 1, 2])).sum()
149150
.groupby(axis=0, by=prev.index.year).mean())
151+
prev['none'] = prev['0']
150152
prev['mild'] = prev['1'] + prev['2']
151153
prev['moderate'] = prev['3'] + prev['4']
152154
prev['severe'] = prev['5']
@@ -155,12 +157,97 @@ def rate_of_death_by_lungfunction_category(self):
155157

156158
# Compute fraction that die each year in each category of lung function, average over many years and compare to
157159
# data
158-
death_rate_per100 = (100 * copd_deaths / prev).mean()
159-
print(death_rate_per100)
160-
# mild 0.000000 (vs 3.8 in Alupo et al)
161-
# moderate 0.000000 (vs 5.1 in Alupo et al)
162-
# severe 1.674310 (vs 15.3 in Alupo et al)
163-
# very_severe 4.507594 (vs 27.8 in Alupo et al)
160+
death_rate_per100 = (100 * all_deaths_df.loc[[2021]] / prev.loc[[2021]]).mean()
161+
162+
model_and_observed_data_dict = {'model': [death_rate_per100.mild, death_rate_per100.moderate,
163+
death_rate_per100.severe, death_rate_per100.very_severe],
164+
'data': [3.8, 5.1, 15.3, 27.8]
165+
}
166+
167+
# plot rate of death (per 100 per year) by COPD stage (mild, moderate, severe, very severe)
168+
plot_rate_df = pd.DataFrame(index=['mild', 'moderate', 'severe', 'very_severe'],
169+
data=model_and_observed_data_dict)
170+
fig, axes = plt.subplots(ncols=4, sharey=True)
171+
_col_counter = 0
172+
for _label in plot_rate_df.index:
173+
# do plotting
174+
ax = plot_rate_df.iloc[_col_counter:1 + _col_counter].plot.line(ax=axes[_col_counter],
175+
title=f"{_label} COPD",
176+
y='model',
177+
marker='o',
178+
color='blue',
179+
ylabel="rate of COPD death per 100 "
180+
"person years"
181+
)
182+
183+
plot_rate_df.iloc[_col_counter:1 + _col_counter].plot.line(
184+
y='data',
185+
marker='^',
186+
color='red',
187+
ax=ax
188+
)
189+
_col_counter += 1 # increment column counter
190+
# remove all the subplot legends
191+
for ax in axes:
192+
ax.get_legend().remove()
193+
194+
fontP = FontProperties()
195+
fontP.set_size('small')
196+
197+
# set legend
198+
legend_keys = ['Model (Risk of death per 100 persons)', 'Data (Rate of death per 100 person-years)']
199+
plt.legend(legend_keys, bbox_to_anchor=(0.1, 0.74), loc='upper left', prop=fontP)
200+
plt.figtext(0.5, 0.01, "Rate of death (per 100 per year) by COPD stage (mild, moderate, severe, very severe)",
201+
ha="center", bbox={"facecolor": "grey", "alpha": 0.5, "pad": 5})
202+
203+
# show plot
204+
plt.show()
205+
206+
# COMPUTE THE RELATIVE RATES TO NONE
207+
rr_death_rate_per100 = (100 * all_deaths_df.loc[[2023]] / prev.loc[[2023]]).mean()
208+
rr_rates = rr_death_rate_per100 / rr_death_rate_per100.loc['none']
209+
210+
rr_model_and_observed_data_dict = {'model': [rr_rates.none, rr_rates.mild, rr_rates.moderate,
211+
rr_rates.severe + rr_rates.very_severe],
212+
'data': [1.0, 2.4, 3.5, 6.6]
213+
}
214+
215+
# plot relative rate of (all cause) death according to COPD stage (none, mild, moderate, severe + v severe)
216+
plot_rr_rate_df = pd.DataFrame(index=['none', 'mild', 'moderate', 'severe_and_very_severe'],
217+
data=rr_model_and_observed_data_dict)
218+
fig, axes = plt.subplots(ncols=4, sharey=True)
219+
_col_counter = 0
220+
for _label in plot_rr_rate_df.index:
221+
# do plotting
222+
ax = plot_rr_rate_df.iloc[_col_counter:1 + _col_counter].plot.line(ax=axes[_col_counter],
223+
title=f"{_label} COPD",
224+
y='model',
225+
marker='o',
226+
color='blue',
227+
ylabel="relative rate of COPD death per "
228+
"100 "
229+
"person years"
230+
)
231+
232+
plot_rr_rate_df.iloc[_col_counter:1 + _col_counter].plot.line(
233+
y='data',
234+
marker='^',
235+
color='red',
236+
ax=ax
237+
)
238+
_col_counter += 1 # increment column counter
239+
# remove all the subplot legends
240+
for ax in axes:
241+
ax.get_legend().remove()
242+
243+
fontP = FontProperties()
244+
fontP.set_size('small')
245+
# set legend
246+
plt.legend(['Model', 'Data'], bbox_to_anchor=(0.1, 0.74), loc='upper left', prop=fontP)
247+
plt.figtext(0.5, 0.01, "Relative rate of (all cause) death according to COPD stage (none, mild, moderate, "
248+
"severe + v severe)", ha="center", bbox={"facecolor": "grey", "alpha": 0.5, "pad": 5})
249+
# show plot
250+
plt.show()
164251

165252

166253
start_date = Date(2010, 1, 1)
@@ -181,6 +268,11 @@ def get_simulation(popsize):
181268
log_config={
182269
'filename': 'copd_analyses',
183270
'directory': outputpath,
271+
'custom_levels': {
272+
'*': logging.WARNING,
273+
'tlo.methods.demography.detail': logging.INFO,
274+
'tlo.methods.copd': logging.INFO,
275+
}
184276
},
185277
)
186278
sim.register(demography.Demography(resourcefilepath=resourcefilepath),
@@ -200,9 +292,8 @@ def get_simulation(popsize):
200292

201293

202294
# run simulation and store logfile path
203-
# sim = get_simulation(50000)
204-
# path_to_logfile = sim.log_filepath
205-
path_to_logfile = Path('/Users/tbh03/GitHub/TLOmodel/outputs/copd/copd_analyses__2023-08-24T170704.log')
295+
sim = get_simulation(50000)
296+
path_to_logfile = sim.log_filepath
206297

207298
# initialise Copd analyses class
208299
copd_analyses = CopdCalibrations(logfile_path=path_to_logfile)

0 commit comments

Comments
 (0)