Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Solve universe target issues #371

Merged
merged 19 commits into from
Nov 17, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions tasks/au/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,12 @@ def requires(self):
'source': SourceTags(),
'license': LicenseTags()
}
# all tables except B01/B02 require B01
if self.tablename != 'B01' and self.tablename != 'B02':
# all tables except B01 require B01
if self.tablename != 'B01':
requirements['B01'] = Columns(tablename='B01', year=self.year, profile=self.profile)
if self.tablename == 'B02':
requirements['B01'] = Columns(tablename='B01', year=self.year, profile=self.profile)
requirements['B17B'] = Columns(tablename='B17B', year=self.year, profile=self.profile)
if self.tablename == 'B04A':
requirements['B04B'] = Columns(tablename='B04B', year=self.year, profile=self.profile)
if self.tablename == 'B08A':
Expand Down Expand Up @@ -230,6 +233,7 @@ def columns(self):
# column req's from other tables
column_reqs = {}
column_reqs.update(input_.get('B01', {}))
column_reqs.update(input_.get('B02', {}))
column_reqs.update(input_.get('B04B', {}))
column_reqs.update(input_.get('B08B', {}))
column_reqs.update(input_.get('B10B', {}))
Expand Down Expand Up @@ -287,26 +291,30 @@ def columns(self):
denom_id = denom_id.strip()
if not denom_id:
continue

reltype = 'denominator'
if col_agg in ['median', 'average']:
reltype = 'universe'

if denom_id in column_reqs:
targets_dict[column_reqs[denom_id].get(session)] = 'denominator'
targets_dict[column_reqs[denom_id].get(session)] = reltype
else:
targets_dict[cols[denom_id]] = 'denominator'
targets_dict[cols[denom_id]] = reltype
targets_dict.pop(None, None)


cols[col_id] = OBSColumn(
id=col_id,
type='Numeric',
name=col_name,
description =tabledesc,
description=tabledesc,
# Ranking of importance, sometimes used to favor certain measures in auto-selection
# Weight of 0 will hide this column from the user. We generally use between 0 and 10
weight=5,
aggregate= col_agg or 'sum',
aggregate=col_agg or 'sum',
# Tags are our way of noting aspects of this measure like its unit, the country
# it's relevant to, and which section(s) of the catalog it should appear in
tags=[source, license, country, unittags[col_unit]],
targets= targets_dict
targets=targets_dict
)

# append the rest of the subsection tags
Expand Down
16 changes: 8 additions & 8 deletions tasks/au/meta/Metadata_2011_BCP_DataPack.csv
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,14 @@ B105,B01_Count_psns_occ_priv_dwgs_P,Count of Persons in occupied private dwellin
B106,B01_Count_Persons_other_dwgs_M,Count of Persons in other dwellings (Males),B01_Tot_P_M,B01,people,housing|age_gender,Males,,106,Selected Person Characteristics
B107,B01_Count_Persons_other_dwgs_F,Count of Persons in other dwellings (Females),B01_Tot_P_F,B01,people,housing|age_gender,Females,,107,Selected Person Characteristics
B108,B01_Count_Persons_other_dwgs_P,Count of Persons in other dwellings (Persons),B01_Tot_P_P,B01,people,housing|age_gender,Persons,,108,Selected Person Characteristics
B109,B02_Median_age_persons,Median age of (Persons),,B02,people,age_gender,Median age of persons,median,109,Selected Medians and Averages
B110,B02_Median_mortgage_repay_monthly,Median mortgage repayment monthly,,B02,money,housing,Median mortgage repayment ($/monthly),median,110,Selected Medians and Averages
B111,B02_Median_Tot_prsnl_inc_weekly,Median total personal income weekly,,B02,money,housing,Median total personal income ($/weekly),median,111,Selected Medians and Averages
B112,B02_Median_rent_weekly_,Median rent weekly,,B02,money,housing,Median rent ($/weekly),median,112,Selected Medians and Averages
B113,B02_Median_Tot_fam_inc_weekly,Median total family income weekly,,B02,money,housing,Median total family income ($/weekly),median,113,Selected Medians and Averages
B114,B02_Average_num_psns_per_bedroom,Average number of Persons per bedroom,,B02,people,housing,Average number of persons per bedroom,average,114,Selected Medians and Averages
B115,B02_Median_Tot_hhd_inc_weekly,Median total household income weekly,,B02,money,income,Median total household income ($/weekly),median,115,Selected Medians and Averages
B116,B02_Average_household_size,Average household size,,B02,people,housing,Average household size,average,116,Selected Medians and Averages
B109,B02_Median_age_persons,Median age of (Persons),B01_Tot_P_P,B02,people,age_gender,Median age of persons,median,109,Selected Medians and Averages
B110,B02_Median_mortgage_repay_monthly,Median mortgage repayment monthly,B17B_P_Tot_Tot,B02,money,housing,Median mortgage repayment ($/monthly),median,110,Selected Medians and Averages
B111,B02_Median_Tot_prsnl_inc_weekly,Median total personal income weekly,B17B_P_Tot_Tot,B02,money,housing,Median total personal income ($/weekly),median,111,Selected Medians and Averages
B112,B02_Median_rent_weekly_,Median rent weekly,B17B_P_Tot_Tot,B02,money,housing,Median rent ($/weekly),median,112,Selected Medians and Averages
B113,B02_Median_Tot_fam_inc_weekly,Median total family income weekly,B17B_P_Tot_Tot,B02,money,housing,Median total family income ($/weekly),median,113,Selected Medians and Averages
B114,B02_Average_num_psns_per_bedroom,Average number of Persons per bedroom,B01_Tot_P_P,B02,people,housing,Average number of persons per bedroom,average,114,Selected Medians and Averages
B115,B02_Median_Tot_hhd_inc_weekly,Median total household income weekly,B17B_P_Tot_Tot,B02,money,income,Median total household income ($/weekly),median,115,Selected Medians and Averages
B116,B02_Average_household_size,Average household size,B01_Tot_P_P,B02,people,housing,Average household size,average,116,Selected Medians and Averages
B256,B03_Total_Total,Visitors Total,B01_Tot_P_P,B03,people,age_gender,Total,,256,Place of Usual Residence on Census Night
B255,B03_Total_85ov,Visitors Age 85 years and over,B03_Total_Total,B03,people,age_gender,85 years and over,,255,Place of Usual Residence on Census Night
B254,B03_Total_75_84_yr,Visitors Age 75-84 years,B03_Total_Total,B03,people,age_gender,75-84 years,,254,Place of Usual Residence on Census Night
Expand Down
22 changes: 22 additions & 0 deletions tasks/base_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -832,6 +832,10 @@ def run(self):
LOGGER.info('checking for null columns on %s', self.output().table)
self.check_null_columns()

LOGGER.info('checking for medians/averages without universe target on %s',
self.output().table)
self.check_universe_in_aggregations()

LOGGER.info('create_indexes')
self.create_indexes(output)
current_session().flush()
Expand Down Expand Up @@ -887,6 +891,24 @@ def check_null_columns(self):
raise ValueError('The following columns of the table "{table}" contain only NULL values: {columns}'.format(
table=self.output().table, columns=', '.join([x[0] for x in result])))

def check_universe_in_aggregations(self):
session = current_session()
result = session.execute("SELECT c.id, c.aggregate, STRING_AGG(COALESCE(cc.reltype,''), ',') reltype "
"FROM observatory.obs_table t "
"INNER JOIN observatory.obs_column_table ct ON t.id = ct.table_id "
"INNER JOIN observatory.obs_column c ON ct.column_id = c.id "
"FULL JOIN observatory.obs_column_to_column cc ON c.id = cc.source_id "
"WHERE t.tablename = '{table}' "
"AND c.aggregate IN ('average', 'median') "
"GROUP BY 1, 2 "
"HAVING 'universe' <> ANY(ARRAY_AGG(COALESCE(LOWER(cc.reltype),'')))".format(
table=self.output()._tablename)).fetchall()

if result:
raise ValueError("The following columns of the table \"{table}\" are aggregated as 'median' or 'average' "
"but lack of 'universe' target: {columns}".format(
table=self.output().table, columns=', '.join([x[0] for x in result])))

def output(self):
if not hasattr(self, '_columns'):
self._columns = self.columns()
Expand Down
14 changes: 7 additions & 7 deletions tasks/ca/statcan/cols_census.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from tasks.meta import OBSColumn, DENOMINATOR
from tasks.meta import OBSColumn, DENOMINATOR, UNIVERSE
from tasks.base_tasks import ColumnsTask
from tasks.tags import SectionTags, SubsectionTags, UnitTags, PublicTags
from tasks.ca.statcan.license import LicenseTags, SourceTags
Expand Down Expand Up @@ -693,7 +693,7 @@ def columns(self):
weight=3,
aggregate='median',
tags=[ca, unit_years, subsections['age_gender']],
targets={},)
targets={t001c001_t: UNIVERSE},)

t001c025_m = OBSColumn(
id='t001c025_m',
Expand All @@ -702,7 +702,7 @@ def columns(self):
weight=3,
aggregate='median',
tags=[ca, unit_years, subsections['age_gender']],
targets={},)
targets={t001c001_m: UNIVERSE},)

t001c025_f = OBSColumn(
id='t001c025_f',
Expand All @@ -711,7 +711,7 @@ def columns(self):
weight=3,
aggregate='median',
tags=[ca, unit_years, subsections['age_gender']],
targets={},)
targets={t001c001_f: UNIVERSE},)

t001c026_t = OBSColumn(
id='t001c026_t',
Expand Down Expand Up @@ -10005,7 +10005,7 @@ def columns(self):
weight=3,
aggregate='average',
tags=[ca, unit_people, subsections['families']],
targets={},)
targets={t005c029_t: UNIVERSE},)

t006c002_t = OBSColumn(
id='t006c002_t',
Expand Down Expand Up @@ -10335,7 +10335,7 @@ def columns(self):
weight=3,
aggregate='average',
tags=[ca, unit_people, subsections['housing']],
targets={},)
targets={t007c001_t: UNIVERSE},)

t007c008_t = OBSColumn(
id='t007c008_t',
Expand Down Expand Up @@ -10803,7 +10803,7 @@ def columns(self):
weight=3,
aggregate='average',
tags=[ca, unit_households, subsections['housing']],
targets={},)
targets={t007c048_t: UNIVERSE},)

t008c002_t = OBSColumn(
id='t008c002_t',
Expand Down
Loading