Skip to content

Add fetch_entity_names method #230

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions datacommons_client/endpoints/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,17 @@
from datacommons_client.endpoints.payloads import NodeRequestPayload
from datacommons_client.endpoints.payloads import normalize_properties_to_string
from datacommons_client.endpoints.response import NodeResponse
from datacommons_client.models.node import Name
from datacommons_client.models.node import Node
from datacommons_client.utils.graph import build_ancestry_map
from datacommons_client.utils.graph import build_ancestry_tree
from datacommons_client.utils.graph import fetch_parents_lru
from datacommons_client.utils.graph import flatten_ancestry
from datacommons_client.utils.names import DEFAULT_NAME_LANGUAGE
from datacommons_client.utils.names import DEFAULT_NAME_PROPERTY
from datacommons_client.utils.names import extract_name_from_english_name_property
from datacommons_client.utils.names import extract_name_from_property_with_language
from datacommons_client.utils.names import NAME_WITH_LANGUAGE_PROPERTY

ANCESTRY_MAX_WORKERS = 10

Expand Down Expand Up @@ -194,6 +200,58 @@ def fetch_all_classes(
next_token=next_token,
)

def fetch_entity_names(
self,
entity_dcids: str | list[str],
language: Optional[str] = DEFAULT_NAME_LANGUAGE,
fallback_language: Optional[str] = None,
) -> dict[str, Name]:
"""
Fetches entity names in the specified language, with optional fallback to English.
Args:
entity_dcids: A single DCID or a list of DCIDs to fetch names for.
language: Language code (e.g., "en", "es"). Defaults to "en" (DEFAULT_NAME_LANGUAGE).
fallback_language: If provided, this language will be used as a fallback if the requested
language is not available. If not provided, no fallback will be used.
Returns:
A dictionary mapping each DCID to a dictionary with the mapped name, language, and
the property used.
"""

# Check if entity_dcids is a single string. If so, convert it to a list.
if isinstance(entity_dcids, str):
entity_dcids = [entity_dcids]

# If langauge is English, use the more efficient 'name' property.
name_property = (DEFAULT_NAME_PROPERTY if language == DEFAULT_NAME_LANGUAGE
else NAME_WITH_LANGUAGE_PROPERTY)

# Fetch names the given entity DCIDs.
data = self.fetch_property_values(
node_dcids=entity_dcids, properties=name_property).get_properties()

names: dict[str, Name] = {}

# Iterate through the fetched data and populate the names dictionary.
for dcid, properties in data.items():
if language == "en":
name = extract_name_from_english_name_property(properties=properties)
lang_used = "en"
else:
name, lang_used = extract_name_from_property_with_language(
properties=properties,
language=language,
fallback_language=fallback_language,
)
if name:
names[dcid] = Name(
value=name,
language=lang_used,
property=name_property,
)

return names

def fetch_entity_parents(
self,
entity_dcids: str | list[str],
Expand Down
2 changes: 1 addition & 1 deletion datacommons_client/endpoints/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from dataclasses import field
from typing import Any, Dict, List

from datacommons_client.models.base import SerializableMixin
from datacommons_client.models.node import Arcs
from datacommons_client.models.node import NextToken
from datacommons_client.models.node import NodeDCID
Expand All @@ -13,7 +14,6 @@
from datacommons_client.models.resolve import Entity
from datacommons_client.utils.data_processing import flatten_properties
from datacommons_client.utils.data_processing import observations_as_records
from datacommons_client.utils.data_processing import SerializableMixin


@dataclass
Expand Down
39 changes: 39 additions & 0 deletions datacommons_client/models/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from dataclasses import asdict
import json
from typing import Any, Dict


class SerializableMixin:
"""Provides serialization methods for the Response dataclasses."""

def to_dict(self, exclude_none: bool = True) -> Dict[str, Any]:
"""Converts the instance to a dictionary.

Args:
exclude_none: If True, only include non-empty values in the response.

Returns:
Dict[str, Any]: The dictionary representation of the instance.
"""

def _remove_none(data: Any) -> Any:
"""Recursively removes None or empty values from a dictionary or list."""
if isinstance(data, dict):
return {k: _remove_none(v) for k, v in data.items() if v is not None}
elif isinstance(data, list):
return [_remove_none(item) for item in data]
return data

result = asdict(self)
return _remove_none(result) if exclude_none else result

def to_json(self, exclude_none: bool = True) -> str:
"""Converts the instance to a JSON string.

Args:
exclude_none: If True, only include non-empty values in the response.

Returns:
str: The JSON string representation of the instance.
"""
return json.dumps(self.to_dict(exclude_none=exclude_none), indent=2)
17 changes: 16 additions & 1 deletion datacommons_client/models/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from dataclasses import field
from typing import Any, Dict, List, Optional, TypeAlias

from datacommons_client.utils.data_processing import SerializableMixin
from datacommons_client.models.base import SerializableMixin

NextToken: TypeAlias = Optional[str]
NodeDCID: TypeAlias = str
Expand Down Expand Up @@ -40,6 +40,21 @@ def from_json(cls, json_data: Dict[str, Any]) -> "Node":
)


@dataclass
class Name(SerializableMixin):
"""Represents a name associated with an Entity (node).

Attributes:
value: The name of the Entity
language: The language of the name
property: The property used to get the name
"""

value: str
language: str
property: str


@dataclass
class NodeGroup:
"""Represents a group of nodes in the Data Commons knowledge graph.
Expand Down
129 changes: 129 additions & 0 deletions datacommons_client/tests/endpoints/test_node_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
from datacommons_client.endpoints.base import API
from datacommons_client.endpoints.node import NodeEndpoint
from datacommons_client.endpoints.response import NodeResponse
from datacommons_client.models.node import Name
from datacommons_client.models.node import Node
from datacommons_client.utils.names import DEFAULT_NAME_PROPERTY
from datacommons_client.utils.names import NAME_WITH_LANGUAGE_PROPERTY


def test_node_endpoint_initialization():
Expand Down Expand Up @@ -198,6 +201,132 @@ def test_node_endpoint_fetch_property_values_string_vs_list():
)


@patch(
"datacommons_client.endpoints.node.extract_name_from_english_name_property")
def test_fetch_entity_names_english(mock_extract_name):
"""Test fetching names in English (default behavior)."""
mock_extract_name.return_value = "Guatemala"
api_mock = MagicMock()
endpoint = NodeEndpoint(api=api_mock)

# Mock the response from fetch_property_values
endpoint.fetch_property_values = MagicMock(return_value=NodeResponse(
data={
"dc/123": {
"properties": {
DEFAULT_NAME_PROPERTY: [{
"value": "Guatemala"
}]
}
}
}))

result = endpoint.fetch_entity_names("dc/123")
endpoint.fetch_property_values.assert_called_once_with(
node_dcids=["dc/123"], properties=DEFAULT_NAME_PROPERTY)
assert result == {
"dc/123":
Name(
value="Guatemala",
language="en",
property=DEFAULT_NAME_PROPERTY,
)
}

mock_extract_name.assert_called_once()


@patch(
"datacommons_client.endpoints.node.extract_name_from_property_with_language"
)
def test_fetch_entity_names_non_english(mock_extract_name):
"""Test fetching names in a non-English language."""
mock_extract_name.return_value = ("Californie", "fr")
api_mock = MagicMock()
endpoint = NodeEndpoint(api=api_mock)

endpoint.fetch_property_values = MagicMock(return_value=NodeResponse(
data={
"dc/123": {
"properties": {
NAME_WITH_LANGUAGE_PROPERTY: [{
"value": "Californie",
"lang": "fr"
}]
}
}
}))

result = endpoint.fetch_entity_names("dc/123", language="fr")
endpoint.fetch_property_values.assert_called_once_with(
node_dcids=["dc/123"], properties=NAME_WITH_LANGUAGE_PROPERTY)
assert result == {
"dc/123":
Name(
value="Californie",
language="fr",
property=NAME_WITH_LANGUAGE_PROPERTY,
)
}

mock_extract_name.assert_called_once()


@patch(
"datacommons_client.endpoints.node.extract_name_from_property_with_language"
)
def test_fetch_entity_names_with_fallback(mock_extract_name_lang):
"""Test fallback to another language when target language is unavailable."""
mock_extract_name_lang.return_value = ("Chiquimula", "en")
api_mock = MagicMock()
endpoint = NodeEndpoint(api=api_mock)

endpoint.fetch_property_values = MagicMock(return_value=NodeResponse(
data={
"dc/123": {
"properties": {
NAME_WITH_LANGUAGE_PROPERTY: [{
"value": "Chiquimula",
"lang": "en"
}]
}
}
}))

result = endpoint.fetch_entity_names("dc/123",
language="fr",
fallback_language="en")

assert result == {
"dc/123":
Name(
value="Chiquimula",
language="en",
property=NAME_WITH_LANGUAGE_PROPERTY,
)
}


@patch(
"datacommons_client.endpoints.node.extract_name_from_property_with_language"
)
def test_fetch_entity_names_no_result(mock_extract_name_lang):
"""Test case when no name is found."""
mock_extract_name_lang.return_value = (None, None)
api_mock = MagicMock()
endpoint = NodeEndpoint(api=api_mock)

endpoint.fetch_property_values = MagicMock(return_value=NodeResponse(
data={"dc/999": {
"properties": {}
}}))

result = endpoint.fetch_entity_names("dc/999",
language="es",
fallback_language="en")
assert result == {}


@patch("datacommons_client.endpoints.node.fetch_parents_lru")
def test_fetch_parents_cached_delegates_to_lru(mock_fetch_lru):
mock_fetch_lru.return_value = (Node("B", "B name", "Region"),)
Expand Down
66 changes: 66 additions & 0 deletions datacommons_client/tests/test_names.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from datacommons_client.models.node import Node
from datacommons_client.utils.names import extract_name_from_english_name_property
from datacommons_client.utils.names import extract_name_from_property_with_language


def test_extract_name_from_english_name_property_with_list():
"""Test extracting name from a list of Nodes."""
properties = [Node(value="Test Name")]
result = extract_name_from_english_name_property(properties)
assert result == "Test Name"


def test_extract_name_from_english_not_list():
"""Test extracting name from a single Node (not in a list)."""
property_node = Node(value="Single Node Name")
result = extract_name_from_english_name_property(property_node)
assert result == "Single Node Name"


def test_extract_name_from_property_with_language_match():
"""Test extracting name when desired language is present."""
properties = [
Node(value="Nombre@es"),
Node(value="Name@en"),
]
result = extract_name_from_property_with_language(properties,
language="es",
fallback_language="en")
assert result[0] == "Nombre"
assert result[1] == "es"


def test_extract_name_from_property_with_language_fallback():
"""Test fallback to English when desired language is not found."""
properties = [
Node(value="Name@en"),
Node(value="Nom@fr"),
Node(value="Nome@it"),
]
result = extract_name_from_property_with_language(properties,
language="de",
fallback_language="it")
assert result[0] == "Nome"
assert result[1] == "it"


def test_extract_name_from_property_with_language_no_fallback():
"""Test no result when language is not found and fallback is disabled."""
properties = [
Node(value="Name@en"),
Node(value="Nom@fr"),
]
result = extract_name_from_property_with_language(properties, language="de")
assert result[0] is None
assert result[1] is None


def test_extract_name_from_property_without_language_tags():
"""Test that properties without language tags are skipped."""
properties = [
Node(value="Plain str"),
Node(value="Name@en"),
]
result = extract_name_from_property_with_language(properties, language="en")
assert result[0] == "Name"
assert result[1] == "en"
Loading