Skip to content

Commit c184459

Browse files
committed
internal functions, unit tests
1 parent 536d8b0 commit c184459

32 files changed

Lines changed: 928 additions & 269 deletions

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: wbdataset
22
Title: Making Datasets Truly Interoperable and Reusable in R with Wikibase
3-
Version: 0.1.1044
4-
Date: 2024-04-11
3+
Version: 0.1.1046
4+
Date: 2024-04-14
55
Authors@R:
66
c(person(given="Daniel", family="Antal",
77
email= "daniel.antal@dataobservatory.eu",

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ export(here)
2020
export(is_pid)
2121
export(is_qid)
2222
export(left_join_column)
23+
export(new_wikibase_session)
2324
importFrom(assertthat,assert_that)
2425
importFrom(dataset,dataset_df)
2526
importFrom(dataset,defined)

R/add_statement.R

Lines changed: 42 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,60 @@
1-
#' @title Add statements
2-
#' @description See
1+
#' @title Add statements to an item
2+
#' @description Adds a claim (statement) to a Wikibase item via `wbcreateclaim`.
3+
#' See
34
#' \link[https://www.wikidata.org/w/api.php?action=help&modules=wbcreateclaim]{MediaWiki
45
#' API help}
5-
#' @param qid The QID of the item in the Wikibase instance that you use.
6-
#' @param pid The PID of the equivalent Wikidata (or reference Wikibase) URI.
7-
#' @param o The object of the semantic statement.
8-
#' @param wikibase_type A \code{'item'}, \code{'numeric'}, or \code{'string'}.
9-
#' @param wikibase_api_url The full URL of the Wikibase API, which is the
10-
#' address that the \code{wbdataset} R client sends requests to when
11-
#' interacting with the knowledge base. For example,
12-
#' \code{'https://reprexbase.eu/demowiki/api.php'}. The URL must end with
13-
#' api.php.
6+
#' @param qid The QID of the item in the Wikibase instance.
7+
#' @param pid The PID of the property to be added.
8+
#' @param o The object of the statement (either a QID, string, or number).
9+
#' @param wikibase_type One of `"item"`, `"string"` (or `"external-id"`), or `"numeric"`.
10+
#' @param wikibase_api_url The full URL of the Wikibase API (must end with `api.php`).
1411
#' @param csrf The CSRF token of your session, received with
1512
#' \code{\link{get_csrf}}.
16-
#' @return A data.frame with four columns:
17-
#' \code{id} containing the statement ID, \code{qid} with the QID,
18-
#' \code{o} with the PID, and \code{p} with the wikidata_uri.
13+
#' @return A data.frame describing the created statement.
1914
#' @export
20-
2115
add_statement <- function(
2216
qid, pid, o,
23-
wikibase_type,
17+
wikibase_type = c("item", "string", "numeric", "external-id"),
2418
wikibase_api_url = "https://reprexbase.eu/demowiki/api.php",
2519
csrf) {
20+
if (!is.character(wikibase_type) || length(wikibase_type) != 1) {
21+
stop("add_statement(): 'wikibase_type' must be a single character string.")
22+
}
23+
24+
# Safely match
25+
wikibase_type <- match.arg(wikibase_type,
26+
choices = c(
27+
"item", "string",
28+
"numeric", "external-id"
29+
)
30+
)
31+
32+
if (wikibase_type == "external-id") wikibase_type <- "string"
33+
2634
if (wikibase_type == "string") {
27-
add_item_statement(
28-
qid = qid, pid = pid, o = o,
29-
wikibase_type == "string",
30-
wikibase_api_url = wikibase_api_url, csrf = csrf
35+
add_id_statement(
36+
qid = qid,
37+
pid = pid,
38+
o = o,
39+
wikibase_type = "external-id", # this is what the API expects
40+
wikibase_api_url = wikibase_api_url,
41+
csrf = csrf
3142
)
3243
} else if (wikibase_type == "item") {
3344
add_item_statement(
34-
qid = qid, pid = pid, o = o,
35-
wikibase_type == "item",
36-
wikibase_api_url = wikibase_api_url, csrf = csrf
45+
qid = qid,
46+
pid = pid,
47+
o = o,
48+
wikibase_type = "item",
49+
wikibase_api_url = wikibase_api_url,
50+
csrf = csrf
3751
)
3852
} else if (wikibase_type == "numeric") {
39-
53+
stop("The 'numeric' type is not yet implemented in add_statement(). Please use a more specific method.")
4054
} else {
41-
stop("Error in add_statement(..., wikibase_type): '", wikibase_type, "' is not recognised (yet).")
55+
stop(
56+
"Error in add_statement(..., wikibase_type): '",
57+
wikibase_type, "' is not recognised (yet)."
58+
)
4259
}
4360
}

R/call_wbeditentity.R

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#' @title Make a call to the wbeditentity API
2+
#' @description Generalized interface for creating or editing a Wikibase entity (item or property).
3+
#' @param csrf_token The CSRF token to authorize the request.
4+
#' @param wikibase_api_url The URL of the Wikibase API (must end with "api.php").
5+
#' @param entity_data A JSON string representing the entity's data (labels, descriptions, etc.).
6+
#' @param new_entity_type Optional. One of "item" or "property" if creating a new entity.
7+
#' @param existing_id Optional. QID or PID if editing an existing entity.
8+
#' @param summary Optional. Edit summary.
9+
#' @param bot Logical. Whether this is a bot edit.
10+
#' @param csrf_handle Optional. httr handle for authenticated sessions.
11+
#' @return Parsed response from the Wikibase API.
12+
#' @keywords internal
13+
call_wbeditentity <- function(csrf_token,
14+
wikibase_api_url,
15+
entity_data,
16+
new_entity_type = NULL,
17+
existing_id = NULL,
18+
summary = NULL,
19+
bot = FALSE,
20+
csrf_handle = NULL) {
21+
if (!is.logical(bot) || length(bot) != 1) {
22+
stop("The `bot` argument must be TRUE or FALSE.")
23+
}
24+
25+
if (isTRUE(bot)) {
26+
body$bot <- "true"
27+
}
28+
29+
body <- list(
30+
action = "wbeditentity",
31+
data = entity_data,
32+
token = csrf_token,
33+
format = "json"
34+
)
35+
36+
if (!is.null(existing_id)) {
37+
body$id <- existing_id
38+
}
39+
40+
if (!is.null(new_entity_type)) {
41+
if (!new_entity_type %in% c("item", "property")) {
42+
stop("Invalid value for `new_entity_type`: must be 'item' or 'property'.")
43+
}
44+
body$new <- new_entity_type
45+
}
46+
47+
if (!is.null(summary)) {
48+
body$summary <- summary
49+
}
50+
51+
if (bot) {
52+
body$bot <- "true"
53+
}
54+
55+
response <- httr::POST(
56+
url = wikibase_api_url,
57+
body = body,
58+
encode = "form",
59+
handle = csrf_handle
60+
)
61+
62+
content <- httr::content(response, as = "parsed", type = "application/json")
63+
64+
if (!is.null(content$error)) {
65+
stop("wbeditentity error: ", content$error$code, ": ", content$error$info)
66+
}
67+
68+
return(content)
69+
}

R/copy_wikidata_item.R

Lines changed: 28 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,14 @@
3838
#' will be created.
3939
#' @param csrf The CSRF token of your session, received with
4040
#' \code{\link{get_csrf}}.
41-
#' @param wikibase_session An optional list that contains any of the values of
42-
#' parameters \code{qid_equivalence_property}, \code{language},
43-
#' \code{wikibase_api_url}, \code{data_curator},\code{log_path} and
44-
#' \code{csrf} (for repeated use in a session.)
41+
#' @param wikibase_session An optional named list of default values to reuse
42+
#' across multiple function calls. If any of the main parameters (such as
43+
#' \code{language}, \code{data_curator}, \code{log_file_name},
44+
#' \code{equivalence_propeert}, \code{classification_property}
45+
#' \code{wikibase_api_url}, or \code{csrf}) are missing from the function
46+
#' call, their values will be taken from this list if available. This is
47+
#' useful in interactive workflows or scripts where the same context is
48+
#' reused.
4549
#' @importFrom assertthat assert_that
4650
#' @importFrom utils person
4751
#' @return Returns a \code{\link[dataset]{dataset_df}} object.
@@ -78,48 +82,22 @@ copy_wikidata_item <- function(
7882
log_file_name = NULL,
7983
csrf,
8084
wikibase_session = NULL) {
81-
if (!is.null(wikibase_session)) {
82-
# For repeated queries you can add your variables directly or in a list
83-
84-
if (!is.null(wikibase_session$qid_equivalence_property)) {
85-
qid_equivalence_property <- wikibase_session$qid_equivalence_property
86-
}
87-
88-
if (!is.null(wikibase_session$classification_property)) {
89-
classification_property <- wikibase_session$classification_property
90-
}
91-
92-
if (!is.null(wikibase_session$language)) {
93-
# overwrite session default if it does not exist
94-
if (is.null(language)) language <- wikibase_session$language
95-
}
96-
97-
if (!is.null(wikibase_session$data_curator)) {
98-
# overwrite session default if it does not exist
99-
if (is.null(data_curator)) data_curator <- wikibase_session$data_curator
100-
}
101-
102-
if (!is.null(wikibase_session$wikibase_api_url)) {
103-
wikibase_api_url <- wikibase_session$wikibase_api_url
104-
}
105-
106-
if (!is.null(wikibase_session$log_file_name)) {
107-
log_file_name <- wikibase_session$log_file_name
108-
}
109-
110-
if (!is.null(wikibase_session$csrf)) {
111-
csrf <- wikibase_session$csrf
112-
}
113-
}
114-
115-
# Assertions for correct inputs ------------------------------------------------
116-
117-
if (is.null(data_curator)) data_curator <- person("Jane", "Doe")
118-
if (is.null(log_file_name)) log_file_name <- ""
119-
120-
assertthat::assert_that(
121-
inherits(data_curator, "person"),
122-
msg = 'copy_wikidata_item(..., data_curator): data_curator must be a person, like person("Jane, "Doe").'
85+
language <- resolve_from_session("language", language, wikibase_session)
86+
data_curator <- resolve_from_session("data_curator", data_curator, wikibase_session)
87+
log_file_name <- resolve_from_session("log_file_name", log_file_name, wikibase_session)
88+
wikibase_api_url <- resolve_from_session("wikibase_api_url", wikibase_api_url, wikibase_session)
89+
equivalence_property <- resolve_from_session("wikibase_api_url", equivalence_property, wikibase_session)
90+
classification_property <- resolve_from_session("wikibase_api_url", classification_property, wikibase_session)
91+
csrf <- resolve_from_session("csrf", csrf, wikibase_session)
92+
93+
validate_create_entity_args(
94+
language = language,
95+
wikibase_api_url = wikibase_api_url,
96+
equivalence_property = equivalence_property,
97+
equivalence_id = equivalence_id,
98+
csrf = csrf,
99+
data_curator = data_curator,
100+
validated_action = "copy_wikidata_property()"
123101
)
124102

125103
if (is.null(qid_equivalence_property)) qid_equivalence_property <- NA_character_
@@ -313,10 +291,10 @@ copy_wikidata_item <- function(
313291
## See get_csrf, get_csrf_token.
314292
csrf_token <- get_csrf_token(csrf)
315293

316-
assertthat::assert_that(!is.null(csrf_token),
317-
msg = "You do not have a CSRF token; perhaps your session has expired.
318-
Try get_csrf() with your credentials."
319-
)
294+
295+
if (!is_valid_csrf(csrf_token)) {
296+
stop(validated_action, ": the csrf appears to be invalid.")
297+
}
320298

321299
# Posting the new item ---------------------------------------------------
322300
new_item <- httr::POST(

R/copy_wikidata_property.R

Lines changed: 26 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,14 @@
4040
#' will be created.
4141
#' @param csrf The CSRF token of your session, received with
4242
#' \code{\link{get_csrf}}.
43-
#' @param wikibase_session An optional list that contains any of the values of
44-
#' parameters \code{qid_equivalence_property}, \code{language},
45-
#' \code{wikibase_api_url}, \code{data_curator}, and \code{csrf} (for repeated
46-
#' use in a session.)
43+
#' @param wikibase_session An optional named list of default values to reuse
44+
#' across multiple function calls. If any of the main parameters (such as
45+
#' \code{language}, \code{data_curator}, \code{log_file_name},
46+
#' \code{equivalence_propeert}, \code{classification_property}
47+
#' \code{wikibase_api_url}, or \code{csrf}) are missing from the function
48+
#' call, their values will be taken from this list if available. This is
49+
#' useful in interactive workflows or scripts where the same context is
50+
#' reused.
4751
#' @importFrom assertthat assert_that
4852
#' @importFrom utils person
4953
#' @return Returns a \code{\link[dataset]{dataset_df}} object. The columns
@@ -80,40 +84,28 @@ copy_wikidata_property <- function(
8084
log_file_name = NULL,
8185
csrf,
8286
wikibase_session = NULL) {
83-
if (!is.null(wikibase_session)) {
84-
# For repeated queries you can add your variables directly or in a list
85-
86-
if (!is.null(wikibase_session$pid_equivalence_property)) {
87-
pid_equivalence_property <- wikibase_session$pid_equivalence_property
88-
}
89-
90-
if (!is.null(wikibase_session$language)) {
91-
# overwrite session default if it does not exist
92-
if (is.null(language)) language <- wikibase_session$language
93-
}
94-
95-
if (!is.null(wikibase_session$data_curator)) {
96-
# overwrite session default if it does not exist
97-
if (is.null(data_curator)) data_curator <- wikibase_session$data_curator
98-
}
99-
100-
if (!is.null(wikibase_session$wikibase_api_url)) {
101-
wikibase_api_url <- wikibase_session$wikibase_api_url
102-
}
87+
language <- resolve_from_session("language", language, wikibase_session)
88+
data_curator <- resolve_from_session("data_curator", data_curator, wikibase_session)
89+
log_file_name <- resolve_from_session("log_file_name", log_file_name, wikibase_session)
90+
wikibase_api_url <- resolve_from_session("wikibase_api_url", wikibase_api_url, wikibase_session)
91+
classification_property <- resolve_from_session("classification_property", classification_property, wikibase_session)
92+
csrf <- resolve_from_session("csrf", csrf, wikibase_session)
93+
94+
validate_create_entity_args(
95+
language = language,
96+
wikibase_api_url = wikibase_api_url,
97+
classification_property = classification_property,
98+
classification_id = classification_id,
99+
csrf = csrf,
100+
data_curator = data_curator,
101+
validated_action = "copy_wikidata_property()"
102+
)
103103

104-
if (!is.null(wikibase_session$log_file_name)) {
105-
log_file_name <- wikibase_session$log_file_name
106-
}
107104

108-
if (!is.null(wikibase_session$csrf)) {
109-
csrf <- wikibase_session$csrf
110-
}
105+
if (!is_valid_csrf(csrf)) {
106+
stop(validated_action, ": the csrf appears to be invalid.")
111107
}
112108

113-
# Assertions for correct inputs ------------------------------------------------
114-
if (is.null(data_curator)) data_curator <- person("Person", "Unknown")
115-
if (is.null(log_file_name)) log_file_name <- ""
116-
117109
assertthat::assert_that(
118110
inherits(data_curator, "person"),
119111
msg = 'copy_wikidata_item(..., data_curator): data_curator must be a person, like person("Jane, "Doe").'

0 commit comments

Comments
 (0)