Skip to content

Commit 68bf413

Browse files
committed
New internal functions for better handling of ambiguity
1 parent 6acc04e commit 68bf413

4 files changed

Lines changed: 157 additions & 0 deletions

File tree

R/handle_search_term_ambiguity.R

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#' @title Handle Search Term Ambiguity
2+
#' @description Resolves ambiguity in search results by selecting a preferred
3+
#' match or returning NULL.
4+
#' @param search_results A list of search results from the Wikibase API.
5+
#' @param search_term The original search term used in the query.
6+
#' @param language The language code used in the search.
7+
#' @param prefer_first Logical; if TRUE, returns the first match when multiple
8+
#' matches are found.
9+
#' @return A single search result or NULL if ambiguity cannot be resolved.
10+
#' @keywords internal
11+
handle_search_term_ambiguity <- function(search_results,
12+
search_term,
13+
language,
14+
strategy = c("return_null", "return_first"),
15+
csrf = NULL) {
16+
strategy <- match.arg(strategy)
17+
18+
# Filter for exact label-language matches
19+
exact_matches <- purrr::keep(search_results, function(item) {
20+
item$label == search_term && item$match$language == language
21+
})
22+
23+
if (length(exact_matches) == 1) {
24+
return(exact_matches[[1]])
25+
} else if (length(exact_matches) > 1) {
26+
if (strategy == "return_first") {
27+
return(exact_matches[[1]])
28+
} else {
29+
return(NULL)
30+
}
31+
} else {
32+
return(NULL)
33+
}
34+
}

R/search_wikibase_entities.R

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#' @title Search Wikibase Entities
2+
#' @description This internal function interfaces with the Wikibase API to
3+
#' search for entities matching a given search term and language.
4+
#' @param search_term A character string representing the term to search for.
5+
#' @param language A character string specifying the language code (e.g., "en").
6+
#' @param type A character string indicating the type of entity to search for.
7+
#' Defaults to "item".
8+
#' @param wikibase_api_url A character string providing the URL of the Wikibase
9+
#' API endpoint.
10+
#' @importFrom httr POST content
11+
#' @return A list containing search results from the Wikibase API.
12+
#' @keywords internal
13+
search_wikibase_entities <- function(search_term,
14+
language = "en",
15+
wikibase_api_url = "https://www.wikidata.org/w/api.php",
16+
csrf = NULL) {
17+
# Construct the body of the POST request
18+
body <- list(
19+
action = "wbsearchentities",
20+
search = search_term,
21+
language = language,
22+
formatversion = 2,
23+
format = "json",
24+
type = "item",
25+
strictlanguage = "true"
26+
)
27+
28+
# Include CSRF token if provided
29+
if (!is.null(csrf)) {
30+
body$token <- csrf
31+
}
32+
33+
# Make the POST request
34+
response <- httr::POST(
35+
url = wikibase_api_url,
36+
body = body,
37+
encode = "form"
38+
)
39+
40+
# Parse the response
41+
content <- httr::content(response, as = "parsed", type = "application/json")
42+
43+
# Check for errors in the response
44+
if (!is.null(content$error)) {
45+
stop(paste(content$error$code, ":", content$error$info))
46+
}
47+
48+
# Return the search results
49+
return(content$search)
50+
}
51+
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
test_that("handle_search_term_ambiguity returns correct match", {
2+
search_results <- list(
3+
list(label = "Orange", match = list(language = "en"), id = "Q1"),
4+
list(label = "Orange", match = list(language = "fr"), id = "Q2")
5+
)
6+
7+
result <- handle_search_term_ambiguity(search_results, "Orange", "en")
8+
expect_equal(result$id, "Q1")
9+
})
10+
11+
test_that("handle_search_term_ambiguity returns NULL when no match", {
12+
search_results <- list(
13+
list(label = "Apple", match = list(language = "en"), id = "Q3")
14+
)
15+
16+
result <- handle_search_term_ambiguity(search_results, "Orange", "en")
17+
expect_null(result)
18+
})
19+
20+
test_that("handle_search_term_ambiguity returns first match when prefer_first is TRUE", {
21+
search_results <- list(
22+
list(label = "Orange", match = list(language = "en"), id = "Q1"),
23+
list(label = "Orange", match = list(language = "en"), id = "Q2")
24+
)
25+
26+
result <- handle_search_term_ambiguity(search_results, "Orange", "en", prefer_first = TRUE)
27+
expect_equal(result$id, "Q1")
28+
})
29+
30+
test_that("handle_search_term_ambiguity returns NULL when multiple matches and prefer_first is FALSE", {
31+
search_results <- list(
32+
list(label = "Orange", match = list(language = "en"), id = "Q1"),
33+
list(label = "Orange", match = list(language = "en"), id = "Q2")
34+
)
35+
36+
result <- handle_search_term_ambiguity(search_results, "Orange", "en", prefer_first = FALSE)
37+
expect_null(result)
38+
})
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
test_that("search_wikibase_entities returns expected results for valid input", {
2+
results <- search_wikibase_entities(
3+
search_term = "Estonian National Museum",
4+
language = "en",
5+
wikibase_api_url = "https://www.wikidata.org/w/api.php"
6+
)
7+
8+
expect_type(results, "list")
9+
expect_true(length(results) > 0)
10+
expect_true(any(sapply(results, function(x) x$label == "Estonian National Museum")))
11+
})
12+
13+
test_that("search_wikibase_entities handles no matches gracefully", {
14+
results <- search_wikibase_entities(
15+
search_term = "NonExistentEntity12345",
16+
language = "en",
17+
wikibase_api_url = "https://www.wikidata.org/w/api.php"
18+
)
19+
20+
expect_type(results, "list")
21+
expect_length(results, 0)
22+
})
23+
24+
test_that("search_wikibase_entities handles invalid API URL gracefully", {
25+
expect_error(
26+
search_wikibase_entities(
27+
search_term = "Estonian National Museum",
28+
language = "en",
29+
wikibase_api_url = "https://invalid.wikidata.org/w/api.php"
30+
),
31+
regexp = "Could not resolve host|Failed to connect|Name or service not known",
32+
info = "Expected a network-related error due to invalid API URL"
33+
)
34+
})

0 commit comments

Comments
 (0)