Skip to content

Commit 04ccfc0

Browse files
committed
documentation improvements and new mocked tests
1 parent 134bcc3 commit 04ccfc0

17 files changed

Lines changed: 343 additions & 111 deletions

DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ DOI: 10.5281/zenodo.14335287
2222
URL: https://wbdataset.dataobservatory.eu/
2323
BugReports: https://github.com/dataobservatory-eu/wbdataset/issues
2424
Suggests:
25+
httptest2,
2526
keyring,
2627
knitr,
2728
rmarkdown,

R/copy_wikidata_item.R

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,15 @@
3333
#' api.php. It is either given as a parameter or resolved from
3434
#' \code{wikibase_session}.
3535
#' @param data_curator The name of the data curator who runs the function and
36-
#' creates the log file, created with \link[utils]{person}.
37-
#' It is either given as a parameter or resolved from
38-
#' \code{wikibase_session}. If no curator is given, then filled with
39-
#' \code{person("Unknown", "Curator")}.
36+
#' creates the log file, created with \link[utils]{person}. It is either given
37+
#' as a parameter or resolved from \code{wikibase_session}. If no curator is
38+
#' given, then filled with \code{person("Unknown", "Curator")}.
4039
#' @param log_file_name An explicitly stated full path to a possible CSV log
4140
#' file, defaults to \code{NULL}. If the value is \code{NULL}, no log file
4241
#' will be created.
4342
#' @param csrf The CSRF token of your session, received with
44-
#' \code{\link{get_csrf}}. It is either given as a parameter or resolved
45-
#' from \code{wikibase_session}.
43+
#' \code{\link{get_csrf}}. It is either given as a parameter or resolved from
44+
#' \code{wikibase_session}.
4645
#' @param wikibase_session An optional named list of default values to reuse
4746
#' across multiple function calls. If any of the main parameters (such as
4847
#' \code{language}, \code{data_curator}, \code{log_file_name},
@@ -53,8 +52,8 @@
5352
#' reused.
5453
#' @importFrom assertthat assert_that
5554
#' @importFrom utils person
56-
#' @return Returns a \code{\link[dataset]{dataset_df}} object.
57-
#' The columns are:\cr
55+
#' @return Returns a \code{\link[dataset]{dataset_df}} object. The columns
56+
#' are:\cr
5857
#' \describe{
5958
#' \item{\code{rowid}}{A row identifier. }
6059
#' \item{\code{action}}{\code{copy_wikidata_item}}
@@ -73,7 +72,7 @@
7372
#' \item{\code{time}}{The time when the action started.}
7473
#' \item{\code{logfile}}{The name of the CSV logfile.}
7574
#' }
76-
#' The number of rows corresponds to the length of the qid_on_wikidata vector.
75+
#' The number of rows corresponds to the length of the qid_on_wikidata vector.
7776
#' @export
7877

7978
copy_wikidata_item <- function(

R/get_csrf.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@
2929
#' among other data.
3030
#' @importFrom httr handle GET POST content
3131
#' @export
32+
#' @examples
33+
#' \dontrun{
34+
#' get_csrf("user", "pass", "https://example.org/api.php")
35+
#' }
3236

3337
get_csrf <- function(username, password, wikibase_api_url) {
3438
check_api_url(wikibase_api_url = wikibase_api_url) # check if ends with api.php

R/left_join_column.R

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
11
#' @title Join Wikidata claims to a dataset by QID
22
#'
33
#' @description For each item in a data frame containing Wikidata QIDs, this
4-
#' function retrieves the value of a specified property using the Wikibase API,
5-
#' and joins it back to the input data.
4+
#' function retrieves the value of a specified property using the Wikibase
5+
#' API, and joins it back to the input data.
66
#'
77
#' @details This function queries the Wikibase API for claims (statements)
8-
#' related to a given property for each QID in the dataset. It returns only the
9-
#' preferred or first available value for each item (see \code{get_claim(first =
10-
#' TRUE)} for details).
8+
#' related to a given property for each QID in the dataset. It returns only
9+
#' the preferred or first available value for each item
10+
#' (see \code{get_claim(first = TRUE)} for details).
1111
#'
12-
#' Errors such as missing properties or API issues are gracefully handled, and
13-
#' NA values are returned where no claim is available. If the dataset already
14-
#' contains a column with the same name as the property, it will be replaced in
15-
#' the joined output.
12+
#' Errors such as missing properties or API issues are gracefully handled, and
13+
#' NA values are returned where no claim is available. If the dataset already
14+
#' contains a column with the same name as the property, it will be replaced
15+
#' in the joined output.
1616
#'
17-
#' This function is useful for enriching tabular data with values stored in
18-
#' Wikidata or another Wikibase-compatible knowledge base.
17+
#' This function is useful for enriching tabular data with values stored in
18+
#' Wikidata or another Wikibase-compatible knowledge base.
1919
#'
2020
#' @param ds A data frame that includes a column named \code{qid} with Wikidata
2121
#' QIDs.
@@ -32,10 +32,12 @@
3232
#'
3333
#' @export
3434

35-
left_join_column <- function(ds,
36-
property,
37-
wikibase_api_url = "https://www.wikidata.org/w/api.php",
38-
csrf = NULL) {
35+
left_join_column <- function(
36+
ds,
37+
property,
38+
wikibase_api_url = "https://www.wikidata.org/w/api.php",
39+
csrf = NULL) {
40+
3941
safely_get_claim <- purrr::safely(get_claim, NULL)
4042

4143
result_df <- vector("list", length = nrow(ds))

R/search_wikibase_entities.R

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@
1010
#' @importFrom httr POST content
1111
#' @return A list containing search results from the Wikibase API.
1212
#' @keywords internal
13-
search_wikibase_entities <- function(search_term,
14-
language = "en",
15-
wikibase_api_url = "https://www.wikidata.org/w/api.php",
16-
type,
17-
csrf = NULL) {
13+
14+
search_wikibase_entities <- function(
15+
search_term,
16+
language = "en",
17+
wikibase_api_url = "https://www.wikidata.org/w/api.php",
18+
type,
19+
csrf = NULL) {
1820
# Validate the 'type' parameter
1921
if (!type %in% c("item", "property")) {
2022
stop("Invalid 'type' parameter. Must be either 'item' or 'property'.")

R/validate_create_entity_args.R

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,29 @@
11
#' @keywords internal
2-
validate_create_entity_args <- function(label,
3-
description,
4-
language,
5-
wikibase_api_url,
6-
equivalence_property,
7-
equivalence_id,
8-
classification_property,
9-
classification_id,
10-
csrf,
11-
data_curator,
12-
validated_action) {
2+
validate_create_entity_args <- function(
3+
label,
4+
description,
5+
language,
6+
wikibase_api_url,
7+
equivalence_property,
8+
equivalence_id,
9+
classification_property,
10+
classification_id,
11+
csrf,
12+
data_curator,
13+
validated_action) {
1314

1415
if (!is.character(validated_action) || length(validated_action) !=1 || nchar(validated_action)==0 ) {
1516
stop("validate_copy_entity_args(..., validated_action) : 'validated_action' must be a non-empty character string.")
1617
}
1718

1819

19-
if (is.na(label) || !is.character(label) || length(label) !=1 || nchar(label) == 0) {
20+
if (is.na(label) || !is.character(label) ||
21+
length(label) !=1 || nchar(label) == 0) {
2022
stop(validated_action, ": 'label' must be a non-empty character string.")
2123
}
2224

23-
if (any(is.na(language)) || !any(is.character(language)) || length(language) < 1 || any(nchar(language))<0 ) {
25+
if (any(is.na(language)) || !any(is.character(language)) ||
26+
length(language) < 1 || any(nchar(language))<0 ) {
2427
stop(validated_action, ": 'language' must be a non-empty character vector.")
2528
}
2629

@@ -30,33 +33,40 @@ validate_create_entity_args <- function(label,
3033
stop(validated_action, ": 'wikibase_api_url' must be a valid URL.")
3134
}
3235

33-
if (!is.character(equivalence_property) || length(equivalence_property) != 1) {
36+
if (!is.character(equivalence_property) ||
37+
length(equivalence_property) != 1) {
3438
stop(validated_action, ": 'equivalence_property' must be a non-empty character string.")
3539
}
3640

37-
if (!is.character(classification_property) || length(classification_property) != 1) {
41+
if (!is.character(classification_property) ||
42+
length(classification_property) != 1) {
3843
stop(validated_action, ": 'classification_property' must be a non-empty character string.")
3944
}
4045

4146

42-
if (!is.null(description) && (!is.character(description) || length(description) != 1)) {
47+
if (!is.null(description) && (!is.character(description)
48+
|| length(description) != 1)) {
4349
stop(validated_action, ": 'description' must be a character string or NULL.")
4450
}
4551

46-
if (!is.null(data_curator) && !inherits(data_curator, "person")) {
52+
if (!is.null(data_curator) &&
53+
!inherits(data_curator, "person")) {
4754
stop(validated_action, ": 'data_curator' must be a person() object.")
4855
}
4956

50-
if (!is.na(equivalence_id) && is.na(equivalence_property)) {
57+
if (!is.na(equivalence_id) &&
58+
is.na(equivalence_property)) {
5159
stop(validated_action, ": Cannot provide an 'equivalence_id' without an 'equivalence_property'.")
5260
}
5361

5462
# Optionally: validate classification logic
55-
if (!is.na(classification_property) && is.na(classification_id)) {
63+
if (!is.na(classification_property) &&
64+
is.na(classification_id)) {
5665
stop(validated_action, ": classification_property provided without classification_id.")
5766
}
5867

59-
if (!is.na(classification_id) && is.na(classification_property)) {
68+
if (!is.na(classification_id) &&
69+
is.na(classification_property)) {
6070
stop(validated_action, ": classification_id provided without classification_property.")
6171
}
6272

data-raw/login_to_wikibase.R

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
require(keyring)
2+
login_to_wikibase <-function(instance, username) {
3+
get_csrf(username,
4+
password=key_get(instance, keyring = "reprexbase"),
5+
wikibase_api_url = sprintf(
6+
"https://reprexbase.eu/%s/api.php", instance)
7+
)
8+
9+
}
10+
11+

data-raw/wikidata_countries_df.R

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
## code to prepare the `wikidata_countries_df` dataset
22

33
wikidata_countries <- c(
4-
"http://www.wikidata.org/entity/Q756617", "http://www.wikidata.org/entity/Q347",
5-
"http://www.wikidata.org/entity/Q3908", "http://www.wikidata.org/entity/Q1246"
4+
"http://www.wikidata.org/entity/Q756617",
5+
"http://www.wikidata.org/entity/Q347",
6+
"http://www.wikidata.org/entity/Q3908",
7+
"http://www.wikidata.org/entity/Q1246"
68
)
9+
710
wikidata_countries_df <- get_item(
811
qid = wikidata_countries,
912
language = "en",

man/copy_wikidata_item.Rd

Lines changed: 7 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_csrf.Rd

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)