Skip to content

Commit 551ddbd

Browse files
committed
Better handling of existing properties and items
1 parent d47567a commit 551ddbd

16 files changed

Lines changed: 237 additions & 92 deletions

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: wbdataset
22
Title: Making Datasets Truly Interoperable and Reusable in R with Wikibase
3-
Version: 0.1.1034
4-
Date: 2024-03-30
3+
Version: 0.1.1035
4+
Date: 2024-04-03
55
Authors@R:
66
c(person(given="Daniel", family="Antal",
77
email= "daniel.antal@dataobservatory.eu",

NEWS.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
# wbdataset
22

3+
# 0.1.1035
4+
* `get_property_definition()` works with CSRF, `check_existing_property()` returns
5+
correct datatype. Improved documentation and unit tests.
6+
37
# 0.1.1034
48
* Remove the unnecessary `log_path` parameter from the create and copy function
59
families.
610
* Many corrections in the formatting and the contents of the function reference.
711

812
# wbdataset 0.1.1023
9-
* [check_existing_item()], [check_existing_property()]
13+
* `check_existing_item()`, `check_existing_property()`
1014

1115
# wbdataset 0.1.1014
1216
* copy_wikidata_item, copy_wikidata_property works with one or several items/properties

R/add_statement.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,13 @@ add_statement <- function(
2828
add_item_statement(
2929
qid = qid, pid = pid, o = o,
3030
wikibase_type == "string",
31-
wikibase_api_url = wikibase_api_url, csrf_token = csrf_token
31+
wikibase_api_url = wikibase_api_url, csrf = csrf
3232
)
3333
} else if (wikibase_type == "item") {
3434
add_item_statement(
3535
qid = qid, pid = pid, o = o,
3636
wikibase_type == "item",
37-
wikibase_api_url = wikibase_api_url, csrf_token = csrf_token
37+
wikibase_api_url = wikibase_api_url, csrf = csrf
3838
)
3939
} else if (wikibase_type == "numeric") {
4040

R/check_existing_item.R

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,45 @@
11
#' @title Check if a label already has an item.
2-
#' @description
3-
#' Avoid failed writing attempts by checking if a label already matches an item.
4-
#' @details
5-
#' A wrapper around
2+
#' @description Avoid failed writing attempts by checking if a label already
3+
#' matches an item.
4+
#' @details A wrapper around
65
#' \href{https://www.wikidata.org/w/api.php?action=help&modules=wbsearchentities}{MediaWiki
76
#' action=wbsearchentities}.
87
#' @param action Defaults to \code{"create_item"}.
98
#' @param search_term A label in the given language, for example, "Estonia".
9+
#' @param language A single language code that indicates the language of the
10+
#' label and description, using BCP 47-compliant language tags (e.g., "en" for
11+
#' English, "fr" for French). Defaults to \code{"en"} for English.
12+
#' @param wikibase_api_url The full URL of the Wikibase API, which is the
13+
#' address that the \code{wbdataset} R client sends requests to when
14+
#' interacting with the knowledge base. In this case it defaults to
15+
#' \code{'https://www.wikidata.org/w/api.php'}, Wikidata itself, where no
16+
#' CSRF is needed.
17+
#' @param csrf The CSRF token of your session, received with
18+
#' \code{\link{get_csrf}}, not needed if
19+
#' \code{wikibase_api_url="https://www.wikidata.org/w/api.php"}. Defaults
20+
#' to \code{NULL}.
1021
#' @inheritParams create_item
1122
#' @return A data.frame or NULL.
23+
#' @examples
24+
#' check_existing_item(
25+
#' search_term="Estonian National Museum",
26+
#' language = "en",
27+
#' wikibase_api_url="https://www.wikidata.org/w/api.php",
28+
#' csrf=NULL)
1229
#' @export
1330

14-
check_existing_item <- function(action = "create_item",
15-
search_term,
16-
language,
31+
check_existing_item <- function(search_term,
32+
language = "en",
1733
equivalence_property = NA_character_,
1834
equivalence_id = NA_character_,
1935
classification_property = NA_character_,
2036
classification_id = NA_character_,
37+
action = "create_item",
2138
log_file_name = NA_character_,
2239
data_curator = person("Unknown", "Person"),
23-
wikibase_api_url,
24-
csrf) {
40+
wikibase_api_url = "https://www.wikidata.org/w/api.php",
41+
csrf = NULL) {
42+
2543
action_timestamp <- action_timestamp_create()
2644
action_time <- Sys.time()
2745

@@ -154,7 +172,8 @@ check_existing_item <- function(action = "create_item",
154172
)
155173
)
156174

157-
return_ds$rowid <- defined(paste0("wbi:", as.character(return_ds$id_on_target)),
175+
prefix <- ifelse(wikibase_api_url=="https://www.wikidata.org/w/api.php", "wbi:", "wd:")
176+
return_ds$rowid <- defined(paste0(prefix, as.character(return_ds$id_on_target)),
158177
namespace = wikibase_api_url
159178
)
160179

R/check_existing_property.R

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,46 @@
11
#' @title Check if a label already has an property.
2-
#' @description
3-
#' Avoid failed writing attempts by checking if a label already matches an item.
4-
#' @details
5-
#' A wrapper around
2+
#' @description Avoid failed writing attempts by checking if a label already
3+
#' matches an item.
4+
#' @details A wrapper around
65
#' \href{https://www.wikidata.org/w/api.php?action=help&modules=wbsearchentities}{MediaWiki
76
#' action=wbsearchentities}.
87
#' @param action Defaults to \code{"create_property"}.
98
#' @param classification_property The instance of, or subclass of, or superclass
109
#' of property. Defaults to \code{NA_character} when not used.
1110
#' @param classification_id The QID of the class. Defaults to
1211
#' \code{NA_character} when not used.
12+
#' @param wikibase_api_url The full URL of the Wikibase API, which is the
13+
#' address that the \code{wbdataset} R client sends requests to when
14+
#' interacting with the knowledge base. In this case it defaults to
15+
#' \code{'https://www.wikidata.org/w/api.php'}, Wikidata itself, where no
16+
#' CSRF is needed.
17+
#' @param csrf The CSRF token of your session, received with
18+
#' \code{\link{get_csrf}}, not needed if
19+
#' \code{wikibase_api_url="https://www.wikidata.org/w/api.php"}. Defaults
20+
#' to \code{NULL}.
1321
#' @param search_term A label in the given language, for example, "Estonia".
1422
#' @inheritParams create_property
1523
#' @return A data.frame or NULL.
24+
#' @examples
25+
#' # No CSRF needed for Wikidata, but you will need it for Wikibase Suit
26+
#' check_existing_property(
27+
#' search_term="instance of",
28+
#' language = "en",
29+
#' wikibase_api_url="https://www.wikidata.org/w/api.php")
1630
#' @export
1731

1832
check_existing_property <- function(
19-
action = "create_property",
2033
search_term,
2134
language,
2235
equivalence_property = NA_character_,
2336
equivalence_id = NA_character_,
2437
classification_property = NA_character_,
2538
classification_id = NA_character_,
39+
action = "create_property",
2640
log_file_name = NA_character_,
2741
data_curator = person("Unknown", "Person"),
28-
wikibase_api_url,
29-
csrf) {
42+
wikibase_api_url="https://www.wikidata.org/w/api.php",
43+
csrf=NULL) {
3044

3145
action_timestamp <- action_timestamp_create()
3246
action_time <- Sys.time()
@@ -46,7 +60,9 @@ check_existing_property <- function(
4660
handle = csrf
4761
)
4862

49-
search_response <- httr::content(get_search, as = "parsed", type = "application/json")
63+
search_response <- httr::content(get_search,
64+
as = "parsed",
65+
type = "application/json")
5066

5167
if (!is.null(search_response$error)) {
5268
stop(paste(search_response$error$code, ": ", search_response$error$info))
@@ -79,8 +95,13 @@ check_existing_property <- function(
7995

8096
matching_property_data <- search_response$search[[which(exact_match)]]
8197

98+
99+
82100
if (action %in% c("create_property", "copy_property")) {
83-
datatype <- "property"
101+
datatype <- get_property_definition(matching_property_data$id, "en",
102+
wikibase_api_url = wikibase_api_url,
103+
return_type = "data.frame",
104+
csrf= csrf)$datatype
84105
comment_text <- glue::glue("A property with the label ", search_term, " already exists in this Wikibase.")
85106
}
86107

@@ -163,7 +184,10 @@ check_existing_property <- function(
163184
)
164185
)
165186

166-
return_ds$rowid <- defined(paste0("wbi:", as.character(return_ds$id_on_target)),
187+
prefix <- ifelse(wikibase_api_url=="https://www.wikidata.org/w/api.php",
188+
"wd:", "wbi:")
189+
190+
return_ds$rowid <- defined(paste0(prefix, as.character(return_ds$id_on_target)),
167191
namespace = wikibase_api_url
168192
)
169193

R/copy_wikidata_property.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ copy_wikidata_property <- function(
7474
pid_equivalence_property = "P2",
7575
language = c("en", "hu"),
7676
wikibase_api_url = "https://reprexbase.eu/jekyll/api.php",
77+
classification_property = NA_character_,
78+
classification_id = NA_character_,
7779
data_curator = NULL,
7880
log_file_name = NULL,
7981
csrf,

R/data-wikidata_countries_df.R

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
1-
#' Countries on Wikidata
2-
#'
1+
#' @title Countries on Wikidata
2+
#' @description
33
#' A small dataset of countries, including sovereign states and autonomous
44
#' parts of federal states taken from Wikidata.
5-
#'
65
#' @format ## `wikidata_countries_df`
76
#' A data frame with 4 rows and 4 columns:
87
#' \describe{

R/get_propety_definition.R

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#' @title Get property definition
22
#' @description Receive the label and description of a property on the basis of
3-
#' its PID from a Wikibase instance. It will not add further statements about
4-
#' the property.
3+
#' its PID from a Wikibase instance. It will not add further statements about
4+
#' the property.
55
#' @details Currently the language has a choose a default, \code{"en"}, for
66
#' cases where the user-chosen language return empty labels and descriptions.
77
#' This feature may be elaborated or changed later. The function receives
@@ -10,13 +10,24 @@
1010
#' as aliases themselves can break the tidiness of the returned data.
1111
#' @param pid The PID of the property in the Wikibase instance (or Wikidata
1212
#' itself).
13-
#' @param wikibase_api_url Defaults to
14-
#' \code{"https://www.wikidata.org/w/api.php"}, may be replaced with a similar
15-
#' API address of a Wikibase instance. Private instances may require an
16-
#' authenticated session.
17-
#' @param return_type Defaults to \code{"data.frame"} that is suitable for receiving
18-
#' the information in stand-alone use. The \code{"JSON"} passes on a JSON
19-
#' string in the format that you may need it in further Wikibase API calls.
13+
#' @param wikibase_api_url The full URL of the Wikibase API, which is the
14+
#' address that the \code{wbdataset} R client sends requests to when
15+
#' interacting with the knowledge base. In this case it defaults to
16+
#' \code{'https://www.wikidata.org/w/api.php'}, Wikidata itself, where no CSRF
17+
#' is needed.
18+
#' @param csrf The CSRF token of your session, received with
19+
#' \code{\link{get_csrf}}, not needed if
20+
#' \code{wikibase_api_url="https://www.wikidata.org/w/api.php"}. Defaults to
21+
#' \code{NULL}.
22+
#' @param return_type Defaults to \code{"data.frame"} that is suitable for
23+
#' receiving the information in stand-alone use. The \code{"JSON"} passes on a
24+
#' JSON string in the format that you may need it in further Wikibase API
25+
#' calls.
26+
#' @param language Defaults to \code{c("en", "nl", "hu")}. A character string of
27+
#' the languages in which the users wants to receive the labels and
28+
#' descriptions of the property. The vector of languages must use \href{https://en.wikipedia.org/wiki/IETF_language_tag}{BCP
29+
#' 47}-compliant language tags (e.g., "en" for English, and "hu"
30+
#' for Hungarian.)
2031
#' @return A data.frame of the \code{PID} with the labels and descriptions of
2132
#' the property in the selected languages. Alternatively, when
2233
#' \code{return_type="JSON"}, the same data prepared for use in a subsequent
@@ -30,14 +41,16 @@
3041
#' get_property_definition(pid = "P2047", return_type = "data.frame")
3142
#'
3243
#' # Receive JSON for copying with wbeditidentiy
33-
#' get_property_definition(pid = "P2047", languages = c("en", "hu"))
44+
#' get_property_definition(pid = "P2047", language = c("en", "hu"))
3445
#' @export
3546

3647
get_property_definition <- function(
3748
pid,
3849
language = c("en", "nl", "hu"),
3950
wikibase_api_url = "https://www.wikidata.org/w/api.php",
40-
return_type = "JSON") {
51+
return_type = "JSON",
52+
csrf = NULL) {
53+
4154
## Ensure that the pid is a character string starting with P followed by
4255
## numbers.
4356
pid <- as.character(pid)
@@ -74,9 +87,10 @@ get_property_definition <- function(
7487
safely_post <- purrr::safely(httr::POST, NULL)
7588

7689
recevied_claim <- safely_post(
77-
"https://www.wikidata.org/w/api.php",
90+
wikibase_api_url,
7891
body = claim_body,
79-
encode = "form"
92+
encode = "form",
93+
handle = csrf
8094
)
8195

8296
if (!is.null(recevied_claim$error)) {
@@ -97,12 +111,30 @@ get_property_definition <- function(
97111
)
98112
}
99113

100-
if (!is_response_success(response)) { # internal assertion for susccessful response
114+
115+
pid_is_missing <- ifelse(!is.null(response[[1]][[1]]$missing), TRUE, FALSE)
116+
117+
118+
if (!is_response_success(response)) {
119+
# internal assertion for successful response
101120
# Exception: retrieval of the property was not successful, even though we
102121
# did not get an explicit error before.
103122
message("Could not access ", pid)
104123
message(response$error$messages[[1]]) # print the error message for debugging
124+
if (return_type == "data.frame") {
125+
return(error_data_frame)
126+
}
127+
if (return_type != "data.frame") { # in any other case send JSON
128+
return(error_json)
129+
}
130+
}
131+
132+
if (pid_is_missing) {
133+
# No such property
134+
message("Property does not exist ", pid)
135+
message(response$error$messages[[1]]) # print the error message for debugging
105136
if (return_type == "data.frame") { # if the user needs a data.frame
137+
error_data_frame$success <- TRUE
106138
return(error_data_frame)
107139
}
108140
if (return_type != "data.frame") { # in any other case send JSON
@@ -178,6 +210,7 @@ get_property_definition <- function(
178210
data.frame(
179211
language = names(descriptions_vector),
180212
description = as.character(descriptions_vector),
213+
datatype = rep(response$entities[[1]]$datatype, length(descriptions_vector)),
181214
success = TRUE
182215
),
183216
by = "language"
@@ -193,3 +226,4 @@ get_property_definition <- function(
193226
)
194227
}
195228
}
229+

0 commit comments

Comments
 (0)