Skip to content

Commit eb6adef

Browse files
authored
Add fetch_entity_names method (#230)
**Fetch entity names**: this is an implementation of a few DC website features: - The name endpoint of the internal API - Fetch the english names from the `name` property. - Fetch the i18n name using the `nameWithLanguage` property (the website automatically resolves the user's locale, this method takes it as an argument)
1 parent be402cc commit eb6adef

File tree

7 files changed

+368
-2
lines changed

7 files changed

+368
-2
lines changed

Diff for: datacommons_client/endpoints/node.py

+58
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,17 @@
66
from datacommons_client.endpoints.payloads import NodeRequestPayload
77
from datacommons_client.endpoints.payloads import normalize_properties_to_string
88
from datacommons_client.endpoints.response import NodeResponse
9+
from datacommons_client.models.node import Name
910
from datacommons_client.models.node import Node
1011
from datacommons_client.utils.graph import build_ancestry_map
1112
from datacommons_client.utils.graph import build_ancestry_tree
1213
from datacommons_client.utils.graph import fetch_parents_lru
1314
from datacommons_client.utils.graph import flatten_ancestry
15+
from datacommons_client.utils.names import DEFAULT_NAME_LANGUAGE
16+
from datacommons_client.utils.names import DEFAULT_NAME_PROPERTY
17+
from datacommons_client.utils.names import extract_name_from_english_name_property
18+
from datacommons_client.utils.names import extract_name_from_property_with_language
19+
from datacommons_client.utils.names import NAME_WITH_LANGUAGE_PROPERTY
1420

1521
ANCESTRY_MAX_WORKERS = 10
1622

@@ -194,6 +200,58 @@ def fetch_all_classes(
194200
next_token=next_token,
195201
)
196202

203+
def fetch_entity_names(
204+
self,
205+
entity_dcids: str | list[str],
206+
language: Optional[str] = DEFAULT_NAME_LANGUAGE,
207+
fallback_language: Optional[str] = None,
208+
) -> dict[str, Name]:
209+
"""
210+
Fetches entity names in the specified language, with optional fallback to English.
211+
Args:
212+
entity_dcids: A single DCID or a list of DCIDs to fetch names for.
213+
language: Language code (e.g., "en", "es"). Defaults to "en" (DEFAULT_NAME_LANGUAGE).
214+
fallback_language: If provided, this language will be used as a fallback if the requested
215+
language is not available. If not provided, no fallback will be used.
216+
Returns:
217+
A dictionary mapping each DCID to a dictionary with the mapped name, language, and
218+
the property used.
219+
"""
220+
221+
# Check if entity_dcids is a single string. If so, convert it to a list.
222+
if isinstance(entity_dcids, str):
223+
entity_dcids = [entity_dcids]
224+
225+
# If langauge is English, use the more efficient 'name' property.
226+
name_property = (DEFAULT_NAME_PROPERTY if language == DEFAULT_NAME_LANGUAGE
227+
else NAME_WITH_LANGUAGE_PROPERTY)
228+
229+
# Fetch names the given entity DCIDs.
230+
data = self.fetch_property_values(
231+
node_dcids=entity_dcids, properties=name_property).get_properties()
232+
233+
names: dict[str, Name] = {}
234+
235+
# Iterate through the fetched data and populate the names dictionary.
236+
for dcid, properties in data.items():
237+
if language == "en":
238+
name = extract_name_from_english_name_property(properties=properties)
239+
lang_used = "en"
240+
else:
241+
name, lang_used = extract_name_from_property_with_language(
242+
properties=properties,
243+
language=language,
244+
fallback_language=fallback_language,
245+
)
246+
if name:
247+
names[dcid] = Name(
248+
value=name,
249+
language=lang_used,
250+
property=name_property,
251+
)
252+
253+
return names
254+
197255
def fetch_entity_parents(
198256
self,
199257
entity_dcids: str | list[str],

Diff for: datacommons_client/endpoints/response.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from dataclasses import field
33
from typing import Any, Dict, List
44

5+
from datacommons_client.models.base import SerializableMixin
56
from datacommons_client.models.node import Arcs
67
from datacommons_client.models.node import NextToken
78
from datacommons_client.models.node import NodeDCID
@@ -13,7 +14,6 @@
1314
from datacommons_client.models.resolve import Entity
1415
from datacommons_client.utils.data_processing import flatten_properties
1516
from datacommons_client.utils.data_processing import observations_as_records
16-
from datacommons_client.utils.data_processing import SerializableMixin
1717

1818

1919
@dataclass

Diff for: datacommons_client/models/base.py

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
from dataclasses import asdict
2+
import json
3+
from typing import Any, Dict
4+
5+
6+
class SerializableMixin:
7+
"""Provides serialization methods for the Response dataclasses."""
8+
9+
def to_dict(self, exclude_none: bool = True) -> Dict[str, Any]:
10+
"""Converts the instance to a dictionary.
11+
12+
Args:
13+
exclude_none: If True, only include non-empty values in the response.
14+
15+
Returns:
16+
Dict[str, Any]: The dictionary representation of the instance.
17+
"""
18+
19+
def _remove_none(data: Any) -> Any:
20+
"""Recursively removes None or empty values from a dictionary or list."""
21+
if isinstance(data, dict):
22+
return {k: _remove_none(v) for k, v in data.items() if v is not None}
23+
elif isinstance(data, list):
24+
return [_remove_none(item) for item in data]
25+
return data
26+
27+
result = asdict(self)
28+
return _remove_none(result) if exclude_none else result
29+
30+
def to_json(self, exclude_none: bool = True) -> str:
31+
"""Converts the instance to a JSON string.
32+
33+
Args:
34+
exclude_none: If True, only include non-empty values in the response.
35+
36+
Returns:
37+
str: The JSON string representation of the instance.
38+
"""
39+
return json.dumps(self.to_dict(exclude_none=exclude_none), indent=2)

Diff for: datacommons_client/models/node.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from dataclasses import field
33
from typing import Any, Dict, List, Optional, TypeAlias
44

5-
from datacommons_client.utils.data_processing import SerializableMixin
5+
from datacommons_client.models.base import SerializableMixin
66

77
NextToken: TypeAlias = Optional[str]
88
NodeDCID: TypeAlias = str
@@ -40,6 +40,21 @@ def from_json(cls, json_data: Dict[str, Any]) -> "Node":
4040
)
4141

4242

43+
@dataclass
44+
class Name(SerializableMixin):
45+
"""Represents a name associated with an Entity (node).
46+
47+
Attributes:
48+
value: The name of the Entity
49+
language: The language of the name
50+
property: The property used to get the name
51+
"""
52+
53+
value: str
54+
language: str
55+
property: str
56+
57+
4358
@dataclass
4459
class NodeGroup:
4560
"""Represents a group of nodes in the Data Commons knowledge graph.

Diff for: datacommons_client/tests/endpoints/test_node_endpoint.py

+129
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
from datacommons_client.endpoints.base import API
55
from datacommons_client.endpoints.node import NodeEndpoint
66
from datacommons_client.endpoints.response import NodeResponse
7+
from datacommons_client.models.node import Name
78
from datacommons_client.models.node import Node
9+
from datacommons_client.utils.names import DEFAULT_NAME_PROPERTY
10+
from datacommons_client.utils.names import NAME_WITH_LANGUAGE_PROPERTY
811

912

1013
def test_node_endpoint_initialization():
@@ -198,6 +201,132 @@ def test_node_endpoint_fetch_property_values_string_vs_list():
198201
)
199202

200203

204+
@patch(
205+
"datacommons_client.endpoints.node.extract_name_from_english_name_property")
206+
def test_fetch_entity_names_english(mock_extract_name):
207+
"""Test fetching names in English (default behavior)."""
208+
mock_extract_name.return_value = "Guatemala"
209+
api_mock = MagicMock()
210+
endpoint = NodeEndpoint(api=api_mock)
211+
212+
# Mock the response from fetch_property_values
213+
endpoint.fetch_property_values = MagicMock(return_value=NodeResponse(
214+
data={
215+
"dc/123": {
216+
"properties": {
217+
DEFAULT_NAME_PROPERTY: [{
218+
"value": "Guatemala"
219+
}]
220+
}
221+
}
222+
}))
223+
224+
result = endpoint.fetch_entity_names("dc/123")
225+
endpoint.fetch_property_values.assert_called_once_with(
226+
node_dcids=["dc/123"], properties=DEFAULT_NAME_PROPERTY)
227+
assert result == {
228+
"dc/123":
229+
Name(
230+
value="Guatemala",
231+
language="en",
232+
property=DEFAULT_NAME_PROPERTY,
233+
)
234+
}
235+
236+
mock_extract_name.assert_called_once()
237+
238+
239+
@patch(
240+
"datacommons_client.endpoints.node.extract_name_from_property_with_language"
241+
)
242+
def test_fetch_entity_names_non_english(mock_extract_name):
243+
"""Test fetching names in a non-English language."""
244+
mock_extract_name.return_value = ("Californie", "fr")
245+
api_mock = MagicMock()
246+
endpoint = NodeEndpoint(api=api_mock)
247+
248+
endpoint.fetch_property_values = MagicMock(return_value=NodeResponse(
249+
data={
250+
"dc/123": {
251+
"properties": {
252+
NAME_WITH_LANGUAGE_PROPERTY: [{
253+
"value": "Californie",
254+
"lang": "fr"
255+
}]
256+
}
257+
}
258+
}))
259+
260+
result = endpoint.fetch_entity_names("dc/123", language="fr")
261+
endpoint.fetch_property_values.assert_called_once_with(
262+
node_dcids=["dc/123"], properties=NAME_WITH_LANGUAGE_PROPERTY)
263+
assert result == {
264+
"dc/123":
265+
Name(
266+
value="Californie",
267+
language="fr",
268+
property=NAME_WITH_LANGUAGE_PROPERTY,
269+
)
270+
}
271+
272+
mock_extract_name.assert_called_once()
273+
274+
275+
@patch(
276+
"datacommons_client.endpoints.node.extract_name_from_property_with_language"
277+
)
278+
def test_fetch_entity_names_with_fallback(mock_extract_name_lang):
279+
"""Test fallback to another language when target language is unavailable."""
280+
mock_extract_name_lang.return_value = ("Chiquimula", "en")
281+
api_mock = MagicMock()
282+
endpoint = NodeEndpoint(api=api_mock)
283+
284+
endpoint.fetch_property_values = MagicMock(return_value=NodeResponse(
285+
data={
286+
"dc/123": {
287+
"properties": {
288+
NAME_WITH_LANGUAGE_PROPERTY: [{
289+
"value": "Chiquimula",
290+
"lang": "en"
291+
}]
292+
}
293+
}
294+
}))
295+
296+
result = endpoint.fetch_entity_names("dc/123",
297+
language="fr",
298+
fallback_language="en")
299+
300+
assert result == {
301+
"dc/123":
302+
Name(
303+
value="Chiquimula",
304+
language="en",
305+
property=NAME_WITH_LANGUAGE_PROPERTY,
306+
)
307+
}
308+
309+
310+
@patch(
311+
"datacommons_client.endpoints.node.extract_name_from_property_with_language"
312+
)
313+
def test_fetch_entity_names_no_result(mock_extract_name_lang):
314+
"""Test case when no name is found."""
315+
mock_extract_name_lang.return_value = (None, None)
316+
api_mock = MagicMock()
317+
endpoint = NodeEndpoint(api=api_mock)
318+
319+
endpoint.fetch_property_values = MagicMock(return_value=NodeResponse(
320+
data={"dc/999": {
321+
"properties": {}
322+
}}))
323+
324+
result = endpoint.fetch_entity_names("dc/999",
325+
language="es",
326+
fallback_language="en")
327+
assert result == {}
328+
329+
201330
@patch("datacommons_client.endpoints.node.fetch_parents_lru")
202331
def test_fetch_parents_cached_delegates_to_lru(mock_fetch_lru):
203332
mock_fetch_lru.return_value = (Node("B", "B name", "Region"),)

Diff for: datacommons_client/tests/test_names.py

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
from datacommons_client.models.node import Node
2+
from datacommons_client.utils.names import extract_name_from_english_name_property
3+
from datacommons_client.utils.names import extract_name_from_property_with_language
4+
5+
6+
def test_extract_name_from_english_name_property_with_list():
7+
"""Test extracting name from a list of Nodes."""
8+
properties = [Node(value="Test Name")]
9+
result = extract_name_from_english_name_property(properties)
10+
assert result == "Test Name"
11+
12+
13+
def test_extract_name_from_english_not_list():
14+
"""Test extracting name from a single Node (not in a list)."""
15+
property_node = Node(value="Single Node Name")
16+
result = extract_name_from_english_name_property(property_node)
17+
assert result == "Single Node Name"
18+
19+
20+
def test_extract_name_from_property_with_language_match():
21+
"""Test extracting name when desired language is present."""
22+
properties = [
23+
Node(value="Nombre@es"),
24+
Node(value="Name@en"),
25+
]
26+
result = extract_name_from_property_with_language(properties,
27+
language="es",
28+
fallback_language="en")
29+
assert result[0] == "Nombre"
30+
assert result[1] == "es"
31+
32+
33+
def test_extract_name_from_property_with_language_fallback():
34+
"""Test fallback to English when desired language is not found."""
35+
properties = [
36+
Node(value="Name@en"),
37+
Node(value="Nom@fr"),
38+
Node(value="Nome@it"),
39+
]
40+
result = extract_name_from_property_with_language(properties,
41+
language="de",
42+
fallback_language="it")
43+
assert result[0] == "Nome"
44+
assert result[1] == "it"
45+
46+
47+
def test_extract_name_from_property_with_language_no_fallback():
48+
"""Test no result when language is not found and fallback is disabled."""
49+
properties = [
50+
Node(value="Name@en"),
51+
Node(value="Nom@fr"),
52+
]
53+
result = extract_name_from_property_with_language(properties, language="de")
54+
assert result[0] is None
55+
assert result[1] is None
56+
57+
58+
def test_extract_name_from_property_without_language_tags():
59+
"""Test that properties without language tags are skipped."""
60+
properties = [
61+
Node(value="Plain str"),
62+
Node(value="Name@en"),
63+
]
64+
result = extract_name_from_property_with_language(properties, language="en")
65+
assert result[0] == "Name"
66+
assert result[1] == "en"

0 commit comments

Comments
 (0)