-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathmeasurement_of_interest_test.py
121 lines (104 loc) · 4.73 KB
/
measurement_of_interest_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
"""Tests for query measurement_of_interest.sql.
See https://github.com/verilylifesciences/analysis-py-utils for more details
about the testing framework.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from datetime import date
from datetime import datetime
from dateutil import tz
from ddt import ddt
import os
import unittest
from verily.bigquery_wrapper import bq_test_case
SQL_TEMPLATE = "measurement_of_interest.sql"
@ddt
class QueryTest(bq_test_case.BQTestCase):
@classmethod
def setUpClass(cls):
"""Set up class."""
super(QueryTest, cls).setUpClass(use_mocks=False)
cls.sql_to_test = open(
os.path.join(os.path.dirname(os.path.realpath(__file__)),
SQL_TEMPLATE), "r").read()
@classmethod
def create_mock_tables(cls):
"""Create mock tables."""
cls.client.create_table_from_query("""
SELECT * FROM UNNEST([
STRUCT<person_id INT64,
birth_datetime TIMESTAMP,
sex_at_birth_concept_id INT64>
(1001, '1990-12-31 00:00:00 UTC', 501),
(1002, '1950-08-01 00:00:00 UTC', 500),
(1003, '1965-06-30 00:00:00 UTC', 500)
])
""", cls.client.path("person"))
cls.client.create_table_from_query("""
SELECT * FROM UNNEST([
STRUCT<concept_id INT64,
concept_name STRING>
( 0, 'No matching concept'),
(123, 'Hemoglobin'),
(456, 'gram per deciliter'),
(500, 'FEMALE'),
(501, 'MALE')
])
""", cls.client.path("concept"))
cls.client.create_table_from_query("""
SELECT * FROM UNNEST([
STRUCT<measurement_id INT64,
src_id STRING>
(1, 'EHR site1'),
(2, 'EHR site1'),
(3, 'PPI/PM'),
(4, 'EHR site2'),
(5, 'EHR site2'),
(6, 'EHR site2')
])
""", cls.client.path("measurement_ext"))
cls.client.create_table_from_query("""
SELECT * FROM UNNEST([
STRUCT<measurement_id INT64,
person_id INT64,
measurement_concept_id INT64,
unit_concept_id INT64,
operator_concept_id INT64,
measurement_date DATE,
measurement_datetime TIMESTAMP,
measurement_type_concept_id INT64,
value_as_number FLOAT64,
value_as_concept_id INT64,
range_low FLOAT64,
range_high FLOAT64>
(1, 1001, 123, 456, NULL, '2005-12-31', '2005-12-31 10:30:00 UTC', NULL, 42.0, NULL, 0, 999),
(2, 1001, 123, 456, NULL, '2007-09-11', '2007-09-11 08:00:00 UTC', NULL, 13.5, NULL, 0, 999),
(3, 1001, 123, 456, NULL, '2007-09-11', '2007-09-11 20:59:00 UTC', NULL, NULL, 100, 0, 999),
(4, 1002, 123, 456, NULL, '2008-02-10', '2008-02-10 23:30:00 UTC', NULL, NULL, NULL, 0, 999),
(5, 1002, 123, 456, 789, '2008-02-10', '2008-02-10 23:30:00 UTC', NULL, 7.2, NULL, 0, 999),
# This measurement is for someone not in our cohort.
(6, 1003, 123, 456, 789, '2010-01-01', '2010-10-01 23:30:00 UTC', NULL, 500, NULL, 0, 999)
])
""", cls.client.path("measurement"))
# Get the project id and dataset name where the temp tables are stored.
(project_id, dataset_id, _) = cls.client.parse_table_path(
cls.client.path("any_temp_table"))
cls.src_dataset = ".".join([project_id, dataset_id])
def test(self):
sql = self.sql_to_test.format(
CDR=self.src_dataset,
COHORT_QUERY="SELECT person_id FROM `{}.person` WHERE person_id <= 1002".format(self.src_dataset),
MEASUREMENT_CONCEPT_ID=123,
UNIT_CONCEPT_ID=456)
expected = [
# person_id birth_datetime sex_at_birth src_id measurement_concept_id measurement_date measurement_datetime measurement_type_concept_id operator_concept_id value_as_number value_as_concept_id unit_concept_id range_low range_high
(1001, datetime(1990, 12, 31, 0, 0, tzinfo=tz.gettz("UTC")), "MALE", "EHR site1", 123, date(2005, 12, 31), datetime(2005, 12, 31, 10, 30, tzinfo=tz.gettz("UTC")), None, None, 42.0, None, 456, 0, 999),
(1001, datetime(1990, 12, 31, 0, 0, tzinfo=tz.gettz("UTC")), "MALE", "EHR site1", 123, date(2007, 9, 11), datetime(2007, 9, 11, 8, 0, tzinfo=tz.gettz("UTC")), None, None, 13.5, None, 456, 0, 999),
(1001, datetime(1990, 12, 31, 0, 0, tzinfo=tz.gettz("UTC")), "MALE", "PPI/PM", 123, date(2007, 9, 11), datetime(2007, 9, 11, 20, 59, tzinfo=tz.gettz("UTC")), None, None, None, 100, 456, 0, 999),
(1002, datetime(1950, 8, 1, 0, 0, tzinfo=tz.gettz("UTC")), "FEMALE", "EHR site2", 123, date(2008, 2, 10), datetime(2008, 2, 10, 23, 30, tzinfo=tz.gettz("UTC")), None, None, None, None, 456, 0, 999),
(1002, datetime(1950, 8, 1, 0, 0, tzinfo=tz.gettz("UTC")), "FEMALE", "EHR site2", 123, date(2008, 2, 10), datetime(2008, 2, 10, 23, 30, tzinfo=tz.gettz("UTC")), None, 789, 7.2, None, 456, 0, 999)
]
self.expect_query_result(query=sql, expected=expected)
if __name__ == "__main__":
unittest.main()