Skip to content

Commit 7c427b6

Browse files
committed
Replaced prefer_first parameter with ambiguity_handling in check_existing_item(), check_existing_property(), and handle_search_term_ambiguity() to standardize ambiguity resolution strategies.​
Updated search_wikibase_entities() to accept a type parameter, allowing explicit specification of entity types ("item" or "property") during searches.​ Modified related test cases to reflect the new parameter names and behaviors, ensuring consistency and clarity.​ Enhanced documentation and comments to align with the updated parameter conventions and improve code readability.
1 parent 68bf413 commit 7c427b6

12 files changed

Lines changed: 333 additions & 251 deletions

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: wbdataset
22
Title: Making Datasets Truly Interoperable and Reusable in R with Wikibase
3-
Version: 0.1.1043
4-
Date: 2024-04-10
3+
Version: 0.1.1044
4+
Date: 2024-04-11
55
Authors@R:
66
c(person(given="Daniel", family="Antal",
77
email= "daniel.antal@dataobservatory.eu",

R/check_existing_item.R

Lines changed: 52 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,37 @@
11
#' @title Check for Existing Items in Wikibase
22
#'
33
#' @description This function searches for existing items in a specified
4-
#' Wikibase instance that match a given label in a specified language. It helps
5-
#' prevent duplicate item creation by identifying existing matches.
4+
#' Wikibase instance that match a given label in a specified language. It
5+
#' helps prevent duplicate item creation by identifying existing matches.
66
#'
77
#' @details The function interfaces with the Wikibase API's `wbsearchentities`
8-
#' action to perform a search based on the provided label (`search_term`) and
9-
#' language. It returns information about matching items, facilitating the
10-
#' management of data consistency within the Wikibase. See:
8+
#' action to perform a search based on the provided label (`search_term`) and
9+
#' language. It returns information about matching items, facilitating the
10+
#' management of data consistency within the Wikibase. See:
1111
#' \href{https://www.wikidata.org/w/api.php?action=help&modules=wbsearchentities}{MediaWiki
1212
#' action=wbsearchentities}.
1313
#'
1414
#' @param search_term A character string representing the label to search for in
1515
#' the Wikibase. For example, `"Estonian National Museum"`.
1616
#' @param language A character string specifying the language code of the label,
1717
#' adhering to BCP 47 standards (e.g., `"en"` for English). Defaults to
18-
#' `"en"`.
18+
#' `"en"`. For more details, see \href{https://tools.ietf.org/html/bcp47}{BCP
19+
#' 47}.
1920
#' @param action A character string indicating the action being performed.
2021
#' Defaults to `"create_item"`.
2122
#' @param log_file_name A character string specifying the name of the log file.
2223
#' Defaults to `NA_character_`.
2324
#' @param data_curator An object of class `person` representing the data
2425
#' curator. Defaults to `person("Unknown", "Person")`.
26+
#' @param ambiguity_handling A character string indicating how to handle
27+
#' ambiguous results: "return_null" or "return_first". Defaults to
28+
#' "return_null".
2529
#' @return A `dataset_df` object containing information about the matching
26-
#' item(s), including action performed, item ID, label, description, language,
27-
#' and other metadata. Returns `NULL` if no matching items are found.
30+
#' item(s), including action performed, item ID, label, description, language,
31+
#' and other metadata. Returns `NULL` if no matching items are found.
2832
#' @inheritParams create_item
33+
#' @importFrom glue glue
34+
#' @importFrom dataset dataset_df defined
2935
#' @return A data.frame or NULL.
3036
#' @examples
3137
#' check_existing_item(
@@ -46,103 +52,53 @@ check_existing_item <- function(search_term,
4652
log_file_name = NA_character_,
4753
data_curator = person("Unknown", "Person"),
4854
wikibase_api_url = "https://www.wikidata.org/w/api.php",
49-
csrf = NULL) {
50-
51-
action_timestamp <- action_timestamp_create()
52-
action_time <- Sys.time()
53-
55+
csrf = NULL,
56+
ambiguity_handling = "return_null") {
5457

58+
# Validate inputs
5559
if (!is.character(search_term) || length(search_term) != 1 || nchar(search_term) == 0) {
56-
stop("Invalid input in check_existing_item(): 'search_term' must be a non-empty character string.")
60+
stop("Invalid input: 'search_term' must be a non-empty character string.")
5761
}
58-
5962
if (!is.character(language) || length(language) != 1 || nchar(language) == 0) {
60-
stop("Invalid input in check_existing_item(): 'language' must be a non-empty character string.")
63+
stop("Invalid input: 'language' must be a non-empty character string.")
6164
}
62-
6365
if (!is.character(wikibase_api_url) || length(wikibase_api_url) != 1 || !grepl("^https?://", wikibase_api_url)) {
64-
stop("Invalid input in check_existing_item(): 'wikibase_api_url' must be a valid URL string.")
65-
}
66-
67-
get_search <- httr::POST(
68-
wikibase_api_url,
69-
body = list(
70-
action = "wbsearchentities",
71-
search = search_term,
72-
language = language,
73-
formatversion = 2,
74-
format = "json",
75-
type = "item",
76-
strictlanguage = "true"
77-
),
78-
encode = "form",
79-
handle = csrf
80-
)
81-
82-
search_response <- httr::content(get_search,
83-
as = "parsed",
84-
type = "application/json")
85-
86-
if (!is.null(search_response$error)) {
87-
stop(paste(search_response$error$code, ": ", search_response$error$info))
88-
}
89-
90-
91-
if (search_response$success == 1) {
92-
if (length(search_response$search) == 0) {
93-
# No match was found
94-
return(NULL)
95-
}
96-
}
97-
98-
is_label_language_match <- function(sr) {
99-
sr$match$language == language && sr$label == search_term
66+
stop("Invalid input: 'wikibase_api_url' must be a valid URL string.")
10067
}
10168

102-
matching_items <- vapply(
103-
1:length(search_response$search),
104-
function(x) search_response$search[[x]]$id,
105-
character(1)
69+
# Search for entities
70+
search_results <- search_wikibase_entities(
71+
search_term = search_term,
72+
language = language,
73+
type = "item",
74+
wikibase_api_url = wikibase_api_url,
75+
csrf = csrf
10676
)
10777

108-
exact_match <- vapply(
109-
1:length(search_response$search),
110-
function(x) is_label_language_match(search_response$search[[x]]),
111-
logical(1)
78+
# Handle ambiguity
79+
resolved_item <- handle_search_term_ambiguity(
80+
search_results = search_results,
81+
search_term = search_term,
82+
language = language,
83+
strategy = ambiguity_handling,
84+
csrf = csrf
11285
)
11386

114-
if (!is.logical(exact_match)) message (matching_props[exact_match])
115-
116-
if (sum(exact_match) > 1) {
117-
stop("Multiple items [", paste(matching_items, collapse = ", "),
118-
"] are matching '", search_term, "' in language='", language, "'.")
119-
}
120-
121-
if (!any(exact_match)) {
122-
return(NULL)
123-
}
124-
if (is.null(search_response$search[[1]])) {
87+
if (is.null(resolved_item)) {
12588
return(NULL)
12689
}
127-
if (!is.list(search_response$search[[1]])) {
128-
return(NULL)
129-
}
130-
if (is.null(search_response$search[[which(exact_match)]])) {
131-
return(NULL)
132-
}
133-
134-
matching_items[exact_match]
13590

136-
matching_item_data <- search_response$search[[which(exact_match)]]
137-
comment_text <- glue::glue("An item with the label ", search_term, " already exists in this Wikibase.")
91+
# Prepare metadata
92+
action_timestamp <- action_timestamp_create()
93+
action_time <- Sys.time()
94+
comment_text <- glue::glue("An item with the label '{search_term}' already exists in this Wikibase.")
13895

96+
# Construct return data frame
13997
return_dataframe <- data.frame(
14098
action = action,
141-
id_on_target = matching_item_data$id,
142-
label = matching_item_data$label,
143-
description = ifelse(is.null(matching_item_data$description),
144-
"",
145-
matching_item_data$description),
99+
id_on_target = resolved_item$id,
100+
label = resolved_item$label,
101+
description = ifelse(is.null(resolved_item$description), "", resolved_item$description),
146102
language = language,
147103
datatype = "wikibase-item",
148104
wikibase_api_url = wikibase_api_url,
@@ -157,11 +113,12 @@ check_existing_item <- function(search_term,
157113
)
158114

159115
description_text <- paste0(
160-
"Failed item creation on Wikibase to ",
161-
wikibase_api_url, " with wbdataset:", action, "() at ",
116+
"Failed item creation on Wikibase at ",
117+
wikibase_api_url, " with action: ", action, " at ",
162118
substr(as.character(action_time), 1, 19)
163119
)
164120

121+
# Create dataset_df object
165122
return_ds <- dataset_df(
166123
action = return_dataframe$action,
167124
id_on_target = defined(
@@ -217,11 +174,14 @@ check_existing_item <- function(search_term,
217174
)
218175
)
219176

220-
prefix <- ifelse(wikibase_api_url == "https://www.wikidata.org/w/api.php", "wbi:", "wd:")
221-
return_ds$rowid <- defined(paste0(prefix, as.character(return_ds$id_on_target)),
177+
prefix <- ifelse(wikibase_api_url == "https://www.wikidata.org/w/api.php",
178+
"wbi:",
179+
"wd:")
180+
181+
return_ds$rowid <- defined(paste0(prefix,
182+
as.character(return_ds$id_on_target)),
222183
namespace = wikibase_api_url
223184
)
224185

225186
return_ds
226187
}
227-

0 commit comments

Comments
 (0)