Skip to content

Add fetch_entity_names method #230

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 8, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 66 additions & 13 deletions datacommons_client/endpoints/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from datacommons_client.endpoints.payloads import NodeRequestPayload
from datacommons_client.endpoints.payloads import normalize_properties_to_string
from datacommons_client.endpoints.response import NodeResponse
from datacommons_client.utils.names import extract_name_from_english_name_property
from datacommons_client.utils.names import extract_name_from_property_with_language


class NodeEndpoint(Endpoint):
Expand Down Expand Up @@ -91,10 +93,12 @@ def fetch_property_labels(
expression = "->" if out else "<-"

# Make the request and return the response.
return self.fetch(node_dcids=node_dcids,
expression=expression,
all_pages=all_pages,
next_token=next_token)
return self.fetch(
node_dcids=node_dcids,
expression=expression,
all_pages=all_pages,
next_token=next_token,
)

def fetch_property_values(
self,
Expand Down Expand Up @@ -143,10 +147,12 @@ def fetch_property_values(
if constraints:
expression += f"{{{constraints}}}"

return self.fetch(node_dcids=node_dcids,
expression=expression,
all_pages=all_pages,
next_token=next_token)
return self.fetch(
node_dcids=node_dcids,
expression=expression,
all_pages=all_pages,
next_token=next_token,
)

def fetch_all_classes(
self,
Expand Down Expand Up @@ -174,8 +180,55 @@ def fetch_all_classes(
```
"""

return self.fetch_property_values(node_dcids="Class",
properties="typeOf",
out=False,
all_pages=all_pages,
next_token=next_token)
return self.fetch_property_values(
node_dcids="Class",
properties="typeOf",
out=False,
all_pages=all_pages,
next_token=next_token,
)

def fetch_entity_names(
self,
entity_dcids: str | list[str],
language: Optional[str] = "en",
fallback_to_en: bool = False,
) -> dict[str, str]:
"""
Fetches entity names in the specified language, with optional fallback to English.
Args:
entity_dcids: A single DCID or a list of DCIDs to fetch names for.
language: Language code (e.g., "en", "es"). Defaults to "en".
fallback_to_en: If True, falls back to English if the desired language is not found.
Defaults to False.
Returns:
A dictionary mapping each DCID to its name (in the requested or fallback language).
"""

# Check if entity_dcids is a single string. If so, convert it to a list.
if isinstance(entity_dcids, str):
entity_dcids = [entity_dcids]

# If langauge is English, use the more efficient 'name' property.
name_property = "name" if language == "en" else "nameWithLanguage"

# Fetch names the given entity DCIDs.
data = self.fetch_property_values(
node_dcids=entity_dcids, properties=name_property).get_properties()

names: dict[str, str] = {}

# Iterate through the fetched data and populate the names dictionary.
for dcid, properties in data.items():
if language == "en":
name = extract_name_from_english_name_property(properties=properties)
else:
name = extract_name_from_property_with_language(
properties=properties,
language=language,
fallback_to_en=fallback_to_en,
)
if name:
names[dcid] = name

return names
65 changes: 65 additions & 0 deletions datacommons_client/tests/test_names.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from datacommons_client.models.node import Node
from datacommons_client.utils.names import extract_name_from_english_name_property
from datacommons_client.utils.names import extract_name_from_property_with_language


def test_extract_name_from_english_name_property_with_list():
"""Test extracting name from a list of Nodes."""
properties = [Node(value="Test Name")]
result = extract_name_from_english_name_property(properties)
assert result == "Test Name"


def test_extract_name_from_english_not_list():
"""Test extracting name from a single Node (not in a list)."""
property_node = Node(value="Single Node Name")
result = extract_name_from_english_name_property(property_node)
assert result == "Single Node Name"


def test_extract_name_from_property_with_language_match():
"""Test extracting name when desired language is present."""
properties = [
Node(value="Nombre@es"),
Node(value="Name@en"),
]
result = extract_name_from_property_with_language(properties,
language="es",
fallback_to_en=True)
assert result == "Nombre"


def test_extract_name_from_property_with_language_fallback():
"""Test fallback to English when desired language is not found."""
properties = [
Node(value="Name@en"),
Node(value="Nom@fr"),
]
result = extract_name_from_property_with_language(properties,
language="de",
fallback_to_en=True)
assert result == "Name"


def test_extract_name_from_property_with_language_no_fallback():
"""Test no result when language is not found and fallback is disabled."""
properties = [
Node(value="Name@en"),
Node(value="Nom@fr"),
]
result = extract_name_from_property_with_language(properties,
language="de",
fallback_to_en=False)
assert result is None


def test_extract_name_from_property_without_language_tags():
"""Test that properties without language tags are skipped."""
properties = [
Node(value="Plain str"),
Node(value="Name@en"),
]
result = extract_name_from_property_with_language(properties,
language="en",
fallback_to_en=False)
assert result == "Name"
50 changes: 50 additions & 0 deletions datacommons_client/utils/names.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from typing import Optional

from datacommons_client.models.node import Node


def extract_name_from_english_name_property(properties: list | Node) -> str:
"""
Extracts the name from a list of properties with English names.
Args:
properties (list): A list of properties with English names.
Returns:
str: The extracted name.
"""
if isinstance(properties, Node):
properties = [properties]

return properties[0].value


def extract_name_from_property_with_language(
properties: list, language: str, fallback_to_en: bool) -> Optional[str]:
"""
Extracts the name from a list of properties with language tags.
Args:
properties (list): A list of properties with language tags.
language (str): The desired language code.
fallback_to_en (bool): Whether to fall back to English if the desired language is not found.
"""
# If a non-English language is requested, unpack the response to get it.
fallback_name = None

# Iterate through the properties to find the name in the specified language
for candidate in properties:
# If no language is specified, skip the candidate
if "@" not in candidate.value:
continue

# Split the candidate value into name and language
name, lang = candidate.value.rsplit("@", 1)

# If the language matches, add the name to the dictionary.
if lang == language:
return name
# If language is 'en', store the name as a fallback
if lang == "en":
fallback_name = name

# If no name was found in the specified language, use the fallback name (if available and
# fallback_to_en is True)
return fallback_name if fallback_to_en else None