11# ' @title Check for Existing Items in Wikibase
22# '
33# ' @description This function searches for existing items in a specified
4- # ' Wikibase instance that match a given label in a specified language. It helps
5- # ' prevent duplicate item creation by identifying existing matches.
4+ # ' Wikibase instance that match a given label in a specified language. It
5+ # ' helps prevent duplicate item creation by identifying existing matches.
66# '
77# ' @details The function interfaces with the Wikibase API's `wbsearchentities`
8- # ' action to perform a search based on the provided label (`search_term`) and
9- # ' language. It returns information about matching items, facilitating the
10- # ' management of data consistency within the Wikibase. See:
8+ # ' action to perform a search based on the provided label (`search_term`) and
9+ # ' language. It returns information about matching items, facilitating the
10+ # ' management of data consistency within the Wikibase. See:
1111# ' \href{https://www.wikidata.org/w/api.php?action=help&modules=wbsearchentities}{MediaWiki
1212# ' action=wbsearchentities}.
1313# '
1414# ' @param search_term A character string representing the label to search for in
1515# ' the Wikibase. For example, `"Estonian National Museum"`.
1616# ' @param language A character string specifying the language code of the label,
1717# ' adhering to BCP 47 standards (e.g., `"en"` for English). Defaults to
18- # ' `"en"`.
18+ # ' `"en"`. For more details, see \href{https://tools.ietf.org/html/bcp47}{BCP
19+ # ' 47}.
1920# ' @param action A character string indicating the action being performed.
2021# ' Defaults to `"create_item"`.
2122# ' @param log_file_name A character string specifying the name of the log file.
2223# ' Defaults to `NA_character_`.
2324# ' @param data_curator An object of class `person` representing the data
2425# ' curator. Defaults to `person("Unknown", "Person")`.
26+ # ' @param ambiguity_handling A character string indicating how to handle
27+ # ' ambiguous results: "return_null" or "return_first". Defaults to
28+ # ' "return_null".
2529# ' @return A `dataset_df` object containing information about the matching
26- # ' item(s), including action performed, item ID, label, description, language,
27- # ' and other metadata. Returns `NULL` if no matching items are found.
30+ # ' item(s), including action performed, item ID, label, description, language,
31+ # ' and other metadata. Returns `NULL` if no matching items are found.
2832# ' @inheritParams create_item
33+ # ' @importFrom glue glue
34+ # ' @importFrom dataset dataset_df defined
2935# ' @return A data.frame or NULL.
3036# ' @examples
3137# ' check_existing_item(
@@ -46,103 +52,53 @@ check_existing_item <- function(search_term,
4652 log_file_name = NA_character_ ,
4753 data_curator = person(" Unknown" , " Person" ),
4854 wikibase_api_url = " https://www.wikidata.org/w/api.php" ,
49- csrf = NULL ) {
50-
51- action_timestamp <- action_timestamp_create()
52- action_time <- Sys.time()
53-
55+ csrf = NULL ,
56+ ambiguity_handling = " return_null" ) {
5457
58+ # Validate inputs
5559 if (! is.character(search_term ) || length(search_term ) != 1 || nchar(search_term ) == 0 ) {
56- stop(" Invalid input in check_existing_item() : 'search_term' must be a non-empty character string." )
60+ stop(" Invalid input: 'search_term' must be a non-empty character string." )
5761 }
58-
5962 if (! is.character(language ) || length(language ) != 1 || nchar(language ) == 0 ) {
60- stop(" Invalid input in check_existing_item() : 'language' must be a non-empty character string." )
63+ stop(" Invalid input: 'language' must be a non-empty character string." )
6164 }
62-
6365 if (! is.character(wikibase_api_url ) || length(wikibase_api_url ) != 1 || ! grepl(" ^https?://" , wikibase_api_url )) {
64- stop(" Invalid input in check_existing_item(): 'wikibase_api_url' must be a valid URL string." )
65- }
66-
67- get_search <- httr :: POST(
68- wikibase_api_url ,
69- body = list (
70- action = " wbsearchentities" ,
71- search = search_term ,
72- language = language ,
73- formatversion = 2 ,
74- format = " json" ,
75- type = " item" ,
76- strictlanguage = " true"
77- ),
78- encode = " form" ,
79- handle = csrf
80- )
81-
82- search_response <- httr :: content(get_search ,
83- as = " parsed" ,
84- type = " application/json" )
85-
86- if (! is.null(search_response $ error )) {
87- stop(paste(search_response $ error $ code , " : " , search_response $ error $ info ))
88- }
89-
90-
91- if (search_response $ success == 1 ) {
92- if (length(search_response $ search ) == 0 ) {
93- # No match was found
94- return (NULL )
95- }
96- }
97-
98- is_label_language_match <- function (sr ) {
99- sr $ match $ language == language && sr $ label == search_term
66+ stop(" Invalid input: 'wikibase_api_url' must be a valid URL string." )
10067 }
10168
102- matching_items <- vapply(
103- 1 : length(search_response $ search ),
104- function (x ) search_response $ search [[x ]]$ id ,
105- character (1 )
69+ # Search for entities
70+ search_results <- search_wikibase_entities(
71+ search_term = search_term ,
72+ language = language ,
73+ type = " item" ,
74+ wikibase_api_url = wikibase_api_url ,
75+ csrf = csrf
10676 )
10777
108- exact_match <- vapply(
109- 1 : length(search_response $ search ),
110- function (x ) is_label_language_match(search_response $ search [[x ]]),
111- logical (1 )
78+ # Handle ambiguity
79+ resolved_item <- handle_search_term_ambiguity(
80+ search_results = search_results ,
81+ search_term = search_term ,
82+ language = language ,
83+ strategy = ambiguity_handling ,
84+ csrf = csrf
11285 )
11386
114- if (! is.logical(exact_match )) message (matching_props [exact_match ])
115-
116- if (sum(exact_match ) > 1 ) {
117- stop(" Multiple items [" , paste(matching_items , collapse = " , " ),
118- " ] are matching '" , search_term , " ' in language='" , language , " '." )
119- }
120-
121- if (! any(exact_match )) {
122- return (NULL )
123- }
124- if (is.null(search_response $ search [[1 ]])) {
87+ if (is.null(resolved_item )) {
12588 return (NULL )
12689 }
127- if (! is.list(search_response $ search [[1 ]])) {
128- return (NULL )
129- }
130- if (is.null(search_response $ search [[which(exact_match )]])) {
131- return (NULL )
132- }
133-
134- matching_items [exact_match ]
13590
136- matching_item_data <- search_response $ search [[which(exact_match )]]
137- comment_text <- glue :: glue(" An item with the label " , search_term , " already exists in this Wikibase." )
91+ # Prepare metadata
92+ action_timestamp <- action_timestamp_create()
93+ action_time <- Sys.time()
94+ comment_text <- glue :: glue(" An item with the label '{search_term}' already exists in this Wikibase." )
13895
96+ # Construct return data frame
13997 return_dataframe <- data.frame (
14098 action = action ,
141- id_on_target = matching_item_data $ id ,
142- label = matching_item_data $ label ,
143- description = ifelse(is.null(matching_item_data $ description ),
144- " " ,
145- matching_item_data $ description ),
99+ id_on_target = resolved_item $ id ,
100+ label = resolved_item $ label ,
101+ description = ifelse(is.null(resolved_item $ description ), " " , resolved_item $ description ),
146102 language = language ,
147103 datatype = " wikibase-item" ,
148104 wikibase_api_url = wikibase_api_url ,
@@ -157,11 +113,12 @@ check_existing_item <- function(search_term,
157113 )
158114
159115 description_text <- paste0(
160- " Failed item creation on Wikibase to " ,
161- wikibase_api_url , " with wbdataset: " , action , " () at " ,
116+ " Failed item creation on Wikibase at " ,
117+ wikibase_api_url , " with action: " , action , " at " ,
162118 substr(as.character(action_time ), 1 , 19 )
163119 )
164120
121+ # Create dataset_df object
165122 return_ds <- dataset_df(
166123 action = return_dataframe $ action ,
167124 id_on_target = defined(
@@ -217,11 +174,14 @@ check_existing_item <- function(search_term,
217174 )
218175 )
219176
220- prefix <- ifelse(wikibase_api_url == " https://www.wikidata.org/w/api.php" , " wbi:" , " wd:" )
221- return_ds $ rowid <- defined(paste0(prefix , as.character(return_ds $ id_on_target )),
177+ prefix <- ifelse(wikibase_api_url == " https://www.wikidata.org/w/api.php" ,
178+ " wbi:" ,
179+ " wd:" )
180+
181+ return_ds $ rowid <- defined(paste0(prefix ,
182+ as.character(return_ds $ id_on_target )),
222183 namespace = wikibase_api_url
223184 )
224185
225186 return_ds
226187}
227-
0 commit comments