6
6
from datacommons_client .endpoints .payloads import ObservationDate
7
7
from datacommons_client .endpoints .resolve import ResolveEndpoint
8
8
from datacommons_client .utils .decorators import requires_pandas
9
+ from datacommons_client .utils .error_handling import NoDataForPropertyError
9
10
10
11
try :
11
12
import pandas as pd
@@ -58,6 +59,58 @@ def __init__(
58
59
self .observation = ObservationEndpoint (api = self .api )
59
60
self .resolve = ResolveEndpoint (api = self .api )
60
61
62
+ def _find_filter_facet_ids (
63
+ self ,
64
+ fetch_by : Literal ["entity" , "entity_type" ],
65
+ date : ObservationDate | str ,
66
+ variable_dcids : str | list [str ],
67
+ entity_dcids : Literal ["all" ] | list [str ] = "all" ,
68
+ entity_type : Optional [str ] = None ,
69
+ parent_entity : Optional [str ] = None ,
70
+ property_filters : Optional [dict [str , str | list [str ]]] = None ,
71
+ ) -> list [str ] | None :
72
+ """Finds matching facet IDs for property filters.
73
+
74
+ Args:
75
+ fetch_by (Literal["entity", "entity_type"]): Determines whether to fetch by entity or entity type.
76
+ variable_dcids (str | list[str]): The variable DCIDs for which to retrieve facet IDs.
77
+ entity_dcids (Literal["all"] | list[str], optional): The entity DCIDs, or "all" if filtering by entity type.
78
+ entity_type (Optional[str]): The entity type, required if fetching by entity type.
79
+ parent_entity (Optional[str]): The parent entity, used when fetching by entity type.
80
+ property_filters (Optional[dict[str, str | list[str]]): A dictionary of properties to match facets against.
81
+
82
+ Returns:
83
+ list[str] | None: A list of matching facet IDs, or None if no filters are applied.
84
+ """
85
+
86
+ if not property_filters :
87
+ return None
88
+
89
+ if fetch_by == "entity" :
90
+ observations = self .observation .fetch_observations_by_entity (
91
+ date = date ,
92
+ entity_dcids = entity_dcids ,
93
+ variable_dcids = variable_dcids ,
94
+ select = ["variable" , "entity" , "facet" ],
95
+ )
96
+ else :
97
+ observations = self .observation .fetch_observations_by_entity_type (
98
+ date = date ,
99
+ entity_type = entity_type ,
100
+ parent_entity = parent_entity ,
101
+ variable_dcids = variable_dcids ,
102
+ select = ["variable" , "entity" , "facet" ],
103
+ )
104
+
105
+ facet_sets = [
106
+ observations .find_matching_facet_id (property_name = p , value = v )
107
+ for p , v in property_filters .items ()
108
+ ]
109
+
110
+ facet_ids = list ({facet for facets in facet_sets for facet in facets })
111
+
112
+ return facet_ids
113
+
61
114
@requires_pandas
62
115
def observations_dataframe (
63
116
self ,
@@ -66,6 +119,7 @@ def observations_dataframe(
66
119
entity_dcids : Literal ["all" ] | list [str ] = "all" ,
67
120
entity_type : Optional [str ] = None ,
68
121
parent_entity : Optional [str ] = None ,
122
+ property_filters : Optional [dict [str , str | list [str ]]] = None ,
69
123
):
70
124
"""
71
125
Fetches statistical observations and returns them as a Pandas DataFrame.
@@ -74,15 +128,17 @@ def observations_dataframe(
74
128
at a particular date (e.g., "population of USA in 2020", "GDP of California in 2010").
75
129
76
130
Args:
77
- variable_dcids (str | list[str]): One or more variable DCIDs for the observation.
78
- date (ObservationDate | str): The date for which observations are requested. It can be
131
+ variable_dcids (str | list[str]): One or more variable DCIDs for the observation.
132
+ date (ObservationDate | str): The date for which observations are requested. It can be
79
133
a specific date, "all" to retrieve all observations, or "latest" to get the most recent observations.
80
- entity_dcids (Literal["all"] | list[str], optional): The entity DCIDs to retrieve data for.
81
- Defaults to "all". DCIDs must include their type (e.g "country/GTM" for Guatemala).
82
- entity_type (Optional[str], optional): The type of entities to filter by when `entity_dcids="all"`.
83
- Required if `entity_dcids="all"`. Defaults to None.
84
- parent_entity (Optional[str], optional): The parent entity under which the target entities fall.
85
- Used only when `entity_dcids="all"`. Defaults to None.
134
+ entity_dcids (Literal["all"] | list[str], optional): The entity DCIDs to retrieve data for.
135
+ Defaults to "all". DCIDs must include their type (e.g., "country/GTM" for Guatemala).
136
+ entity_type (Optional[str]): The type of entities to filter by when `entity_dcids="all"`.
137
+ Required if `entity_dcids="all"`. Defaults to None.
138
+ parent_entity (Optional[str]): The parent entity under which the target entities fall.
139
+ Used only when `entity_dcids="all"`. Defaults to None.
140
+ property_filters (Optional[dict[str, str | list[str]]): An optional dictionary used to filter
141
+ the data by using observation properties like `measurementMethod`, `unit`, or `observationPeriod`.
86
142
87
143
Returns:
88
144
pd.DataFrame: A DataFrame containing the requested observations.
@@ -97,14 +153,34 @@ def observations_dataframe(
97
153
"Specify 'entity_type' and 'parent_entity' only when 'entity_dcids' is 'all'."
98
154
)
99
155
156
+ # If property filters are provided, fetch the required facet IDs. Otherwise, set to None.
157
+ facets = self ._find_filter_facet_ids (
158
+ fetch_by = "entity" if entity_dcids != "all" else "entity_type" ,
159
+ date = date ,
160
+ variable_dcids = variable_dcids ,
161
+ entity_dcids = entity_dcids ,
162
+ entity_type = entity_type ,
163
+ parent_entity = parent_entity ,
164
+ property_filters = property_filters ,
165
+ )
166
+
167
+ if not facets and property_filters :
168
+ raise NoDataForPropertyError
169
+
100
170
if entity_dcids == "all" :
101
171
observations = self .observation .fetch_observations_by_entity_type (
102
172
date = date ,
103
173
parent_entity = parent_entity ,
104
174
entity_type = entity_type ,
105
- variable_dcids = variable_dcids )
175
+ variable_dcids = variable_dcids ,
176
+ filter_facet_ids = facets ,
177
+ )
106
178
else :
107
179
observations = self .observation .fetch_observations_by_entity (
108
- date = date , entity_dcids = entity_dcids , variable_dcids = variable_dcids )
180
+ date = date ,
181
+ entity_dcids = entity_dcids ,
182
+ variable_dcids = variable_dcids ,
183
+ filter_facet_ids = facets ,
184
+ )
109
185
110
186
return pd .DataFrame (observations .get_observations_as_records ())
0 commit comments