From 5473af38edca25278f33ce2da29c56c233ac8547 Mon Sep 17 00:00:00 2001 From: antaldaniel Date: Thu, 20 Feb 2025 09:18:22 +0100 Subject: [PATCH] 0.1.1016 --- DESCRIPTION | 4 +- R/copy_wikidata_item.R | 230 ++++++++++++++++-------- R/copy_wikidata_property.R | 169 ++++++++++++----- R/create_item.R | 34 +++- R/create_property.R | 32 +++- R/get_wikidata_item.R | 35 ++-- R/item_identity_datastring_create.R | 2 +- R/left_join_column.R | 32 ++-- README.Rmd | 2 +- README.md | 2 +- man/copy_wikidata_item.Rd | 18 +- man/copy_wikidata_property.Rd | 14 +- man/create_item.Rd | 8 +- man/create_property.Rd | 8 +- tests/testthat/test-get_wikidata_item.R | 6 +- tests/testthat/test-left_join_column.R | 10 +- vignettes/start.Rmd | 26 ++- 17 files changed, 457 insertions(+), 175 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ff6f79c..af54945 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: wbdataset Title: Making Datasets Truly Interoperable and Reusable in R with Wikibase -Version: 0.1.1015 -Date: 2024-02-19 +Version: 0.1.1016 +Date: 2024-02-20 Authors@R: c(person(given="Daniel", family="Antal", email= "daniel.antal@dataobservatory.eu", diff --git a/R/copy_wikidata_item.R b/R/copy_wikidata_item.R index c3ccb66..cdc7484 100644 --- a/R/copy_wikidata_item.R +++ b/R/copy_wikidata_item.R @@ -17,7 +17,7 @@ #' single valid QID or a vector of several QIDs. #' @param qid_equivalence_property The QID in Wikibase that records the #' equivalent Wikidata QID as an external ID. -#' @param language A vector of languages codes, for example, \code{c("en", +#' @param language A vector of language codes, for example, \code{c("en", #' "et")}. #' @param wikibase_api_url For example, #' \code{'https://reprexbase.eu/demowiki/api.php'}. @@ -27,6 +27,10 @@ #' \code{\link{tempdir()}}. #' @param csrf The CSRF token of your session, received with #' \code{\link{get_csrf}}. +#' @param wikibase_session An optional list that contains any of the values of +#' parameters \code{qid_equivalence_property}, \code{language}, +#' \code{wikibase_api_url}, \code{data_curator},\code{log_path} and +#' \code{csrf} (for repeated use in a session.) #' @importFrom assertthat assert_that #' @importFrom utils person #' @return Returns a dataset_df object. The columns are: @@ -52,11 +56,37 @@ copy_wikidata_item <- function( qid_on_wikidata = "Q4", qid_equivalence_property = "P35", - languages = c("en", "nl", "hu" ), + language = c("en", "nl", "hu"), wikibase_api_url = "https://reprexbase.eu/jekyll/api.php", data_curator = NULL, log_path = tempdir(), csrf) { + if (!is.null(wikibase_session)) { + # For repeated queries you can add your variables directly or in a list + + if (!is.null(wikibase_session$qid_equivalence_property)) { + qid_equivalence_property <- wikibase_session$qid_equivalence_property + } + + if (!is.null(wikibase_session$language)) { + language <- wikibase_session$language + } + if (!is.null(wikibase_session$data_curator)) { + data_curator <- wikibase_session$data_curator + } + + if (!is.null(wikibase_session$wikibase_api_url)) { + wikibase_api_url <- wikibase_session$wikibase_api_url + } + + if (!is.null(wikibase_session$log_path)) { + log_path <- wikibase_session$log_path + } + + if (!is.null(wikibase_session$csrf)) { + csrf <- wikibase_session$csrf + } + } # Assertions for correct inputs ------------------------------------------------ @@ -65,29 +95,30 @@ copy_wikidata_item <- function( assertthat::assert_that( inherits(data_curator, "person"), - msg='copy_wikidata_item(..., data_curator): data_curator must be a person, like person("Jane, "Doe").') + msg = 'copy_wikidata_item(..., data_curator): data_curator must be a person, like person("Jane, "Doe").' + ) if (is.null(qid_equivalence_property)) qid_equivalence_property <- NA_character_ - if ( length(qid_on_wikidata) > 1) { - + if (length(qid_on_wikidata) > 1) { return_log_file <- copy_wikidata_items( - qid_on_wikidata = qid_on_wikidata , + qid_on_wikidata = qid_on_wikidata, qid_equivalence_property = qid_equivalence_property, - languages = languages, + language = language, wikibase_api_url = wikibase_api_url, data_curator = data_curator, log_path = log_path, - csrf = csrf) + csrf = csrf + ) return_log_file$rowid <- defined( return_log_file$id_on_target, label = "Wikibase QID", - namespace = return_log_file$wikibase_api_url[1]) + namespace = return_log_file$wikibase_api_url[1] + ) return(return_log_file) - - } + } # Timestamping --------------------------------------------------------------------- action_time <- Sys.time() @@ -160,10 +191,10 @@ copy_wikidata_item <- function( ) write.csv(return_dataframe, - file = file.path(log_path, log_file_name), - row.names = FALSE, - na = "NA", - fileEncoding = "UTF-8" + file = file.path(log_path, log_file_name), + row.names = FALSE, + na = "NA", + fileEncoding = "UTF-8" ) return(return_dataframe) @@ -173,14 +204,14 @@ copy_wikidata_item <- function( # If the user wants to copy non-existing descriptions, we will replace them # with an empty string. - labels_present <- languages[which(languages %in% names(response$entities[[1]]$labels))] - labels_missing <- languages[which(!languages %in% names(response$entities[[1]]$labels))] + labels_present <- language[which(language %in% names(response$entities[[1]]$labels))] + labels_missing <- language[which(!language %in% names(response$entities[[1]]$labels))] - descriptions_present <- languages[which(languages %in% names(response$entities[[1]]$descriptions))] - descriptions_missing <- languages[which(!languages %in% names(response$entities[[1]]$descriptions))] + descriptions_present <- language[which(language %in% names(response$entities[[1]]$descriptions))] + descriptions_missing <- language[which(!language %in% names(response$entities[[1]]$descriptions))] - aliases_present <- languages[which(languages %in% names(response$entities[[1]]$aliases))] - aliases_missing <- languages[which(!languages %in% names(response$entities[[1]]$aliases))] + aliases_present <- language[which(language %in% names(response$entities[[1]]$aliases))] + aliases_missing <- language[which(!language %in% names(response$entities[[1]]$aliases))] labels_missing ## Set a default later, this is now hard coded to English but could be a parameter. @@ -240,7 +271,7 @@ copy_wikidata_item <- function( csrf_token <- get_csrf_token(csrf) assertthat::assert_that(!is.null(csrf_token), - msg = "You do not have a CSRF token; perhaps your session has expired. + msg = "You do not have a CSRF token; perhaps your session has expired. Try get_csrf() with your credentials." ) @@ -260,19 +291,23 @@ copy_wikidata_item <- function( # See if the created POST via wbeditentity was successful created_item_response <- httr::content( - new_item, as = "parsed", type = "application/json") + new_item, + as = "parsed", type = "application/json" + ) # Creating the log file and the returned log data.frame ---------------------- successful_post <- is_response_success(created_item_response) - if ( successful_post ) { + if (successful_post) { # Successfully created the item, try to add the equivalence statement # before returning log data. - message("Successfully created item ", - created_item_response$entity$id, " (", - created_item_response$entity$labels$en$value, ")") + message( + "Successfully created item ", + created_item_response$entity$id, " (", + created_item_response$entity$labels$en$value, ")" + ) if (is_pid(qid_equivalence_property)) { # If there is a meaningful equivalence property @@ -309,20 +344,18 @@ copy_wikidata_item <- function( ) write.csv(return_dataframe, - file = file.path(log_path, log_file_name), - row.names = FALSE, - na = "NA", - fileEncoding = "UTF-8" + file = file.path(log_path, log_file_name), + row.names = FALSE, + na = "NA", + fileEncoding = "UTF-8" ) - } else if ( # Case when we have clear message about a label conflict any(c( - ("wikibase-validator-label-conflict" %in% unlist(created_item_response$error$messages) ), - ("wikibase-validator-label-with-description-conflict" %in% unlist(created_item_response$error$messages) ) + ("wikibase-validator-label-conflict" %in% unlist(created_item_response$error$messages)), + ("wikibase-validator-label-with-description-conflict" %in% unlist(created_item_response$error$messages)) )) - ) { - + ) { # Unwrap error message and send it to terminal message_strings <- unlist(created_item_response$error$messages) message(message_strings) @@ -340,7 +373,7 @@ copy_wikidata_item <- function( ) # Try to find the English error message - error_languages <- unlist( + error_language <- unlist( lapply(error_messages, function(x) ifelse(length(x) >= 2, x[2], NA_character_)) ) @@ -350,16 +383,16 @@ copy_wikidata_item <- function( language <- "" if ( # we have English-language error message - any(error_languages == "en") + any(error_language == "en") ) { # The error message contains the already existing (conflicting) label - existing_label <- error_messages[[which(error_languages == "en")]][1] + existing_label <- error_messages[[which(error_language == "en")]][1] language <- "en" - } else if (any(!is.na(error_languages))) { + } else if (any(!is.na(error_language))) { # The error message contains the already existing (conflicting) label # but not in English, select the first language that is available, # if there are any messages that can be read in a human language. - nr_language <- which(!is.na(error_languages))[1] + nr_language <- which(!is.na(error_language))[1] existing_label <- error_messages[[nr_language]][1] language <- error_messages[[nr_language]][2] } @@ -381,12 +414,11 @@ copy_wikidata_item <- function( ) write.csv(return_dataframe, - file = file.path(log_path, log_file_name), - row.names = FALSE, - na = "NA", - fileEncoding = "UTF-8" + file = file.path(log_path, log_file_name), + row.names = FALSE, + na = "NA", + fileEncoding = "UTF-8" ) - } else { # Return an emptier data.frame if there was some error @@ -419,10 +451,10 @@ copy_wikidata_item <- function( # Save the log file write.csv(return_dataframe, - file = file.path(log_path, log_file_name), - row.names = FALSE, - na = "NA", - fileEncoding = "UTF-8" + file = file.path(log_path, log_file_name), + row.names = FALSE, + na = "NA", + fileEncoding = "UTF-8" ) } @@ -438,33 +470,49 @@ copy_wikidata_item <- function( id_on_target = defined( return_dataframe$id_on_target, label = paste0("QID on ", wikibase_api_url), - namespace = wikibase_api_url), + namespace = wikibase_api_url + ), label = defined( - return_dataframe$label, label = "Label of item"), + return_dataframe$label, + label = "Label of item" + ), description = defined( - return_dataframe$description, label = "Description of item"), + return_dataframe$description, + label = "Description of item" + ), language = defined( - return_dataframe$language, label = "Language of label and description"), + return_dataframe$language, + label = "Language of label and description" + ), datatype = return_dataframe$datatype, wikibase_api_url = wikibase_api_url, equivalence_property = defined( - return_dataframe$equivalence_property , + return_dataframe$equivalence_property, label = paste0("Equivalence property on ", wikibase_api_url), - namespace = wikibase_api_url), + namespace = wikibase_api_url + ), equivalence_id = defined( - return_dataframe$equivalence_id , + return_dataframe$equivalence_id, label = "Equivalent QID on Wikidata", - namespace = "https://www.wikidata.org/wiki/"), + namespace = "https://www.wikidata.org/wiki/" + ), success = return_dataframe$success, comment = return_dataframe$comment, time = return_dataframe$time, logfile = return_dataframe$logfile, dataset_bibentry = dublincore( - title = paste0("Wikibase Copy Item Log (", - strftime(action_time,'%Y-%m-%d %H:%M:%OS0'), ")"), - description = description_text, + title = paste0( + "Wikibase Copy Item Log (", + strftime(action_time, "%Y-%m-%d %H:%M:%OS0"), ")" + ), + description = description_text, creator = data_curator, - dataset_date = Sys.Date()) + dataset_date = Sys.Date() + ) + ) + + return_ds$rowid <- defined(paste0("wbi:", as.character(return_ds$id_on_target)), + namespace = wikibase_api_url ) return_ds @@ -472,33 +520,63 @@ copy_wikidata_item <- function( #' @rdname copy_wikidata_item #' @keywords internal -copy_wikidata_items <- function( qid_on_wikidata, - qid_equivalence_property, - languages, - wikibase_api_url, - data_curator, - log_path, - csrf) { - +copy_wikidata_items <- function(qid_on_wikidata, + qid_equivalence_property, + language, + wikibase_api_url, + data_curator, + log_path, + csrf) { # Ensure that QIDs are used in the loop ---------------------------- is_qid <- vapply(qid_on_wikidata, is_qid, logical(1)) - not_qid <- paste(names(which(!is_qid)), collapse="|") + not_qid <- paste(names(which(!is_qid)), collapse = "|") assertthat::assert_that( not_qid == "", - msg=paste0("Error copy_wikidata_items(): ", not_qid, - " does not appear to be a QID.")) + msg = paste0( + "Error copy_wikidata_items(): ", not_qid, + " does not appear to be a QID." + ) + ) + + if (!is.null(wikibase_session)) { + # For repeated queries you can add your variables directly or in a list + + if (!is.null(wikibase_session$qid_equivalence_property)) { + qid_equivalence_property <- wikibase_session$qid_equivalence_property + } + + if (!is.null(wikibase_session$language)) { + language <- wikibase_session$language + } + if (!is.null(wikibase_session$data_curator)) { + data_curator <- wikibase_session$data_curator + } + + if (!is.null(wikibase_session$wikibase_api_url)) { + wikibase_api_url <- wikibase_session$wikibase_api_url + } + + if (!is.null(wikibase_session$log_path)) { + log_path <- wikibase_session$log_path + } + + if (!is.null(wikibase_session$csrf)) { + csrf <- wikibase_session$csrf + } + } returned_list <- lapply( qid_on_wikidata, function(x) { copy_wikidata_item( - qid_on_wikidata =x, + qid_on_wikidata = x, qid_equivalence_property = qid_equivalence_property, - languages = languages, + language = language, wikibase_api_url = wikibase_api_url, data_curator = data_curator, log_path = log_path, - csrf = csrf) + csrf = csrf + ) } ) diff --git a/R/copy_wikidata_property.R b/R/copy_wikidata_property.R index e4ef13a..8f1b80e 100644 --- a/R/copy_wikidata_property.R +++ b/R/copy_wikidata_property.R @@ -19,7 +19,7 @@ #' non-authenticated sources, this should be changed.) #' @param pid_equivalence_property The PID in Wikibase that records the #' equivalent Wikidata PID as an external ID. -#' @param language A vector of languages codes, for example, \code{c("en", +#' @param language A vector of language codes, for example, \code{c("en", #' "et")}. #' @param wikibase_api_url For example, #' \code{'https://reprexbase.eu/demowiki/api.php'}. @@ -29,6 +29,10 @@ #' \code{\link{tempdir()}}. #' @param csrf The CSRF token of your session, received with #' \code{\link{get_csrf}}. +#' @param wikibase_session An optional list that contains any of the values of +#' parameters \code{qid_equivalence_property}, \code{language}, +#' \code{wikibase_api_url}, \code{data_curator},\code{log_path} and +#' \code{csrf} (for repeated use in a session.) #' @importFrom assertthat assert_that #' @importFrom utils person #' @return Returns a dataset_df object. The columns are: @@ -54,40 +58,69 @@ copy_wikidata_property <- function( pid_on_wikidata, pid_equivalence_property = "P2", - languages = c("en", "hu"), + language = c("en", "hu"), wikibase_api_url = "https://reprexbase.eu/jekyll/api.php", data_curator = NULL, log_path = tempdir(), - csrf) { + csrf, + wikibase_session = NULL) { + if (!is.null(wikibase_session)) { + # For repeated queries you can add your variables directly or in a list + + if (!is.null(wikibase_session$pid_equivalence_property)) { + pid_equivalence_property <- wikibase_session$pid_equivalence_property + } + + if (!is.null(wikibase_session$language)) { + language <- wikibase_session$language + } + if (!is.null(wikibase_session$data_curator)) { + data_curator <- wikibase_session$data_curator + } + + if (!is.null(wikibase_session$wikibase_api_url)) { + wikibase_api_url <- wikibase_session$wikibase_api_url + } + + if (!is.null(wikibase_session$log_path)) { + log_path <- wikibase_session$log_path + } + + if (!is.null(wikibase_session$csrf)) { + csrf <- wikibase_session$csrf + } + } # Assertions for correct inputs ------------------------------------------------ - if (is.null(data_curator)) data_curator <- person("Jane", "Doe") + if (is.null(data_curator)) data_curator <- person("Person", "Unknown") if (is.null(log_path)) log_path <- tempdir() assertthat::assert_that( inherits(data_curator, "person"), - msg='copy_wikidata_item(..., data_curator): data_curator must be a person, like person("Jane, "Doe").') + msg = 'copy_wikidata_item(..., data_curator): data_curator must be a person, like person("Jane, "Doe").' + ) - if ( length(pid_on_wikidata) > 1) { + if (length(pid_on_wikidata) > 1) { # Run this function in a loop if there are several PIDs to copy return_log_file <- copy_wikidata_properties( - pid_on_wikidata = pid_on_wikidata , + pid_on_wikidata = pid_on_wikidata, pid_equivalence_property = pid_equivalence_property, - languages = languages, + language = language, wikibase_api_url = wikibase_api_url, data_curator = data_curator, log_path = log_path, - csrf = csrf) + csrf = csrf + ) return_log_file$rowid <- defined( return_log_file$id_on_target, label = "Wikibase QID", - namespace = return_log_file$wikibase_api_url[1]) + namespace = return_log_file$wikibase_api_url[1] + ) return(return_log_file) - } # Timestamping --------------------------------------------------------------------- @@ -125,7 +158,9 @@ copy_wikidata_property <- function( } else { # there was no error response <- httr::content( - get_claim$result, as = "parsed", type = "application/json") + get_claim$result, + as = "parsed", type = "application/json" + ) } if (!is_response_success(response)) { @@ -172,14 +207,14 @@ copy_wikidata_property <- function( # If the user wants to copy non-existing descriptions, we will replace them # with an empty string. - labels_present <- languages[which(languages %in% names(response$entities[[1]]$labels))] - labels_missing <- languages[which(!languages %in% names(response$entities[[1]]$labels))] + labels_present <- language[which(language %in% names(response$entities[[1]]$labels))] + labels_missing <- language[which(!language %in% names(response$entities[[1]]$labels))] - descriptions_present <- languages[which(languages %in% names(response$entities[[1]]$descriptions))] - descriptions_missing <- languages[which(!languages %in% names(response$entities[[1]]$descriptions))] + descriptions_present <- language[which(language %in% names(response$entities[[1]]$descriptions))] + descriptions_missing <- language[which(!language %in% names(response$entities[[1]]$descriptions))] - aliases_present <- languages[which(languages %in% names(response$entities[[1]]$aliases))] - aliases_missing <- languages[which(!languages %in% names(response$entities[[1]]$aliases))] + aliases_present <- language[which(language %in% names(response$entities[[1]]$aliases))] + aliases_missing <- language[which(!language %in% names(response$entities[[1]]$aliases))] labels_missing ## Set a default later, this is now hard coded to English but could be a parameter. @@ -332,7 +367,7 @@ copy_wikidata_property <- function( ) # Try to find the English error message - error_languages <- unlist( + error_language <- unlist( lapply(error_messages, function(x) ifelse(length(x) >= 2, x[2], NA_character_)) ) @@ -342,16 +377,16 @@ copy_wikidata_property <- function( language <- "" if ( # we have English-language error message - any(error_languages == "en") + any(error_language == "en") ) { # The error message contains the already existing (conflicting) label - existing_label <- error_messages[[which(error_languages == "en")]][1] + existing_label <- error_messages[[which(error_language == "en")]][1] language <- "en" - } else if (any(!is.na(error_languages))) { + } else if (any(!is.na(error_language))) { # The error message contains the already existing (conflicting) label # but not in English, select the first language that is available, # if there are any messages that can be read in a human language. - nr_language <- which(!is.na(error_languages))[1] + nr_language <- which(!is.na(error_language))[1] existing_label <- error_messages[[nr_language]][1] language <- error_messages[[nr_language]][2] } @@ -381,7 +416,6 @@ copy_wikidata_property <- function( fileEncoding = "UTF-8" ) } else { - # Return an emptier data.frame if there was some error # Print out the error message verbatim to terminal @@ -432,34 +466,53 @@ copy_wikidata_property <- function( id_on_target = defined( return_dataframe$id_on_target, label = paste0("PID on ", wikibase_api_url), - namespace = wikibase_api_url), + namespace = wikibase_api_url + ), label = defined( - return_dataframe$label, label = "Label of item"), + return_dataframe$label, + label = "Label of item" + ), description = defined( - return_dataframe$description, label = "Description of item"), + return_dataframe$description, + label = "Description of item" + ), language = defined( - return_dataframe$language, label = "Language of label and description"), + return_dataframe$language, + label = "Language of label and description" + ), datatype = return_dataframe$datatype, wikibase_api_url = wikibase_api_url, equivalence_property = defined( - return_dataframe$equivalence_property , + return_dataframe$equivalence_property, label = paste0("Equivalence property on ", wikibase_api_url), - namespace = wikibase_api_url), + namespace = wikibase_api_url + ), equivalence_id = defined( - return_dataframe$equivalence_id , + return_dataframe$equivalence_id, label = "Equivalent PID on Wikidata", - namespace = "https://www.wikidata.org/wiki/"), + namespace = "https://www.wikidata.org/wiki/" + ), success = return_dataframe$success, comment = return_dataframe$comment, time = return_dataframe$time, logfile = return_dataframe$logfile, dataset_bibentry = dublincore( - title = paste0("Wikibase Copy Property Log (", - strftime(action_time,'%Y-%m-%d %H:%M:%OS0'), ")"), + title = paste0( + "Wikibase Copy Property Log (", + strftime(action_time, "%Y-%m-%d %H:%M:%OS0"), ")" + ), description = description_text, creator = data_curator, - dataset_date = Sys.Date()) + dataset_date = Sys.Date() + ), + identifier = c(wbi = wikibase_api_url) ) + + return_ds$rowid <- defined(paste0("wbi:", as.character(return_ds$id_on_target)), + namespace = wikibase_api_url + ) + + return_ds } @@ -468,32 +521,62 @@ copy_wikidata_property <- function( copy_wikidata_properties <- function( pid_on_wikidata, pid_equivalence_property, - languages, + language, wikibase_api_url, data_curator, log_path, - csrf) { - + csrf, + wikibase_session = NULL) { # Ensure that PIDs are used in the loop ---------------------------- is_pid <- vapply(pid_on_wikidata, is_pid, logical(1)) - not_pid <- paste(names(which(!is_pid)), collapse="|") + not_pid <- paste(names(which(!is_pid)), collapse = "|") assertthat::assert_that( not_pid == "", - msg=paste0("Error copy_wikidata_properties(): ", not_pid, - " does not appear to be a PID.")) + msg = paste0( + "Error copy_wikidata_properties(): ", not_pid, + " does not appear to be a PID." + ) + ) + if (!is.null(wikibase_session)) { + # For repeated queries you can add your variables directly or in a list + + if (!is.null(wikibase_session$pid_equivalence_property)) { + pid_equivalence_property <- wikibase_session$pid_equivalence_property + } + + if (!is.null(wikibase_session$language)) { + language <- wikibase_session$language + } + if (!is.null(wikibase_session$data_curator)) { + data_curator <- wikibase_session$data_curator + } + + if (!is.null(wikibase_session$wikibase_api_url)) { + wikibase_api_url <- wikibase_session$wikibase_api_url + } + + if (!is.null(wikibase_session$log_path)) { + log_path <- wikibase_session$log_path + } + + if (!is.null(wikibase_session$csrf)) { + csrf <- wikibase_session$csrf + } + } returned_list <- lapply( pid_on_wikidata, function(x) { copy_wikidata_property( pid_on_wikidata = x, pid_equivalence_property = pid_equivalence_property, - languages = languages, + language = language, wikibase_api_url = wikibase_api_url, data_curator = data_curator, log_path = log_path, - csrf = csrf) + csrf = csrf + ) } ) diff --git a/R/create_item.R b/R/create_item.R index 09ff4be..4da1386 100644 --- a/R/create_item.R +++ b/R/create_item.R @@ -23,10 +23,14 @@ #' \code{'https://reprexbase.eu/demowiki/api.php'}. #' @param data_curator The name of the data curator who runs the function and #' creates the log file, created with \link[utils]{person}. -#' @param csrf The CSRF token of your session, received with -#' \code{\link{get_csrf}}. #' @param log_path A path to save the log file. Defaults to the return value of #' \code{\link{tempdir()}}. +#' @param csrf The CSRF token of your session, received with +#' \code{\link{get_csrf}}. +#' @param wikibase_session An optional list that contains any of the values of +#' \code{language}, +#' \code{wikibase_api_url}, \code{data_curator},\code{log_path} and +#' \code{csrf} (for repeated use in a session.) #' @export #' @return Currently returns a data.frame, this should be a dataset. The columns #' are: @@ -73,7 +77,31 @@ create_item <- function(label, wikibase_api_url, data_curator = NULL, log_path = tempdir(), - csrf) { + csrf, + wikibase_session) { + + if (!is.null(wikibase_session)) { + # For repeated queries you can add your variables directly or in a list + + if(!is.null(wikibase_session$languages)) { + languages <- wikibase_session$languages + } + if(!is.null(wikibase_session$data_curator)) { + data_curator <- wikibase_session$data_curator + } + + if(!is.null(wikibase_session$wikibase_api_url)) { + wikibase_api_url <- wikibase_session$wikibase_api_url + } + + if(!is.null(wikibase_session$log_path)) { + log_path <- wikibase_session$log_path + } + + if(!is.null(wikibase_session$csrf)) { + csrf <- wikibase_session$csrf + } + } # Credit the person who curates the data if (is.null(data_curator)) data_curator <- person("Jane", "Doe") diff --git a/R/create_property.R b/R/create_property.R index eced85f..01e25cc 100644 --- a/R/create_property.R +++ b/R/create_property.R @@ -29,6 +29,10 @@ #' \code{\link{get_csrf}}. #' @param log_path A path to save the log file. Defaults to the return value of #' \code{\link{tempdir()}}. +#' @param wikibase_session An optional list that contains any of the values of +#' \code{language}, +#' \code{wikibase_api_url}, \code{data_curator},\code{log_path} and +#' \code{csrf} (for repeated use in a session.) #' @export #' @return Currently returns a data.frame, this should be a dataset. The columns #' are: @@ -74,10 +78,34 @@ create_property <- function(label, wikibase_api_url, data_curator = NULL, log_path = tempdir(), - csrf) { + csrf, + wikibase_session = NULL) { + + if (!is.null(wikibase_session)) { + # For repeated queries you can add your variables directly or in a list + + if(!is.null(wikibase_session$languages)) { + languages <- wikibase_session$languages + } + if(!is.null(wikibase_session$data_curator)) { + data_curator <- wikibase_session$data_curator + } + + if(!is.null(wikibase_session$wikibase_api_url)) { + wikibase_api_url <- wikibase_session$wikibase_api_url + } + + if(!is.null(wikibase_session$log_path)) { + log_path <- wikibase_session$log_path + } + + if(!is.null(wikibase_session$csrf)) { + csrf <- wikibase_session$csrf + } + } # Credit the person who curates the data - if (is.null(data_curator)) data_curator <- person("Jane", "Doe") + if (is.null(data_curator)) data_curator <- person("Person", "Unknown") assertthat::assert_that( inherits(data_curator, "person"), diff --git a/R/get_wikidata_item.R b/R/get_wikidata_item.R index cd14294..e0bbfb8 100644 --- a/R/get_wikidata_item.R +++ b/R/get_wikidata_item.R @@ -29,13 +29,13 @@ get_wikidata_item <- function( wikibase_api_url = "https://www.wikidata.org/w/api.php", data_curator = NULL, title = "Dataset title") { - # Credit the person who curates the data if (is.null(data_curator)) data_curator <- person("Jane", "Doe") assertthat::assert_that( inherits(data_curator, "person"), - msg='copy_wikidata_item(..., data_curator): data_curator must be a person, like person("Jane, "Doe").') + msg = 'copy_wikidata_item(..., data_curator): data_curator must be a person, like person("Jane, "Doe").' + ) qid_on_wikidata <- gsub(prefix, "", as.character(qid_on_wikidata)) @@ -48,13 +48,17 @@ get_wikidata_item <- function( for (i in seq_along(qid_on_wikidata)) { if (i == 1) { # Initialise the return_df - return_df <- get_singe_item(qid_on_wikidata = qid_on_wikidata[i], - language = language, - wikibase_api_url = wikibase_api_url) + return_df <- get_singe_item( + qid_on_wikidata = qid_on_wikidata[i], + language = language, + wikibase_api_url = wikibase_api_url + ) } else { - tmp <- get_singe_item(qid_on_wikidata = qid_on_wikidata[i], - language = language, - wikibase_api_url = wikibase_api_url) + tmp <- get_singe_item( + qid_on_wikidata = qid_on_wikidata[i], + language = language, + wikibase_api_url = wikibase_api_url + ) return_df <- rbind(return_df, tmp) } } @@ -64,21 +68,24 @@ get_wikidata_item <- function( return_df <- get_singe_item( qid_on_wikidata = qid_on_wikidata, language = language, - wikibase_api_url = wikibase_api_url) + wikibase_api_url = wikibase_api_url + ) } return_ds <- dataset_df( qid_on_wikidata = defined( return_df$qid_on_wikidata, label = paste0("qid_on_wikidata on ", wikibase_api_url), - namespace = wikibase_api_url), + namespace = wikibase_api_url + ), label = defined(return_df$label, label = "Label of item"), description = defined(return_df$description, label = "Description of item"), language = defined(return_df$language, label = "Language of label and description"), - dataset_bibentry = dublincore(title = title, - creator = data_curator, - dataset_date = Sys.Date() - ) + dataset_bibentry = dublincore( + title = title, + creator = data_curator, + dataset_date = Sys.Date() + ) ) wikibase_type <- c(qid_on_wikidata = "qid_on_wikidata") diff --git a/R/item_identity_datastring_create.R b/R/item_identity_datastring_create.R index 832d3c1..a2d9eae 100644 --- a/R/item_identity_datastring_create.R +++ b/R/item_identity_datastring_create.R @@ -10,7 +10,7 @@ #' @keywords internal item_identity_datastring_create <- function(labels_list, - descriptions_list) { + descriptions_list) { # Assert that the inputs are of correct types assertthat::assert_that( inherits(labels_list, "list"), diff --git a/R/left_join_column.R b/R/left_join_column.R index ad9b810..4d6702c 100644 --- a/R/left_join_column.R +++ b/R/left_join_column.R @@ -29,7 +29,6 @@ left_join_column <- function( wikibase_api_url = "https://www.wikidata.org/w/api.php", silent = FALSE, csrf = NULL) { - # Initialise a data.frame to return the data. new_column <- data.frame( @@ -79,24 +78,29 @@ left_join_column <- function( if (!all( c(is.null(label), is.null(unit), is.null(definition), is.null(namespace)) - )) { + )) { return_df[, 2] <- defined(return_df[, 2], - label = label, - unit = unit, - definition = definition, - namespace = namespace) + label = label, + unit = unit, + definition = definition, + namespace = namespace + ) } - new_column_ds <- as_dataset_df(df = return_df, - reference = list(author = creator(ds), - title = dataset_title(ds)) - ) + new_column_ds <- as_dataset_df( + df = return_df, + reference = list( + author = creator(ds), + title = dataset_title(ds) + ) + ) newcol_prov <- attributes(new_column_ds) new_ds <- invisible( left_join(ds, new_column_ds, - by = intersect(names(ds), names(new_column_ds))) + by = intersect(names(ds), names(new_column_ds)) ) + ) attr(new_ds, "Provenance") <- list( started_at = original_prov$started_at, @@ -107,7 +111,9 @@ left_join_column <- function( new_property_definition <- c("property" = column_type) names(new_property_definition) <- property - attr(new_ds, "wikibase_type") <- c(attr(ds, "wikibase_type"), - new_property_definition) + attr(new_ds, "wikibase_type") <- c( + attr(ds, "wikibase_type"), + new_property_definition + ) new_ds } diff --git a/README.Rmd b/README.Rmd index dd57475..6bad58b 100644 --- a/README.Rmd +++ b/README.Rmd @@ -18,7 +18,7 @@ knitr::opts_chunk$set( [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) [![CRAN status](https://www.r-pkg.org/badges/version/wbdataset)](https://CRAN.R-project.org/package=wbdataset) -[![devel-version](https://img.shields.io/badge/devel%20version-0.1.1014-blue.svg)](https://github.com/dataobservatory-eu/wbdataset) +[![devel-version](https://img.shields.io/badge/devel%20version-0.1.1016-blue.svg)](https://github.com/dataobservatory-eu/wbdataset) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.13972192.svg) [![Codecov test coverage](https://codecov.io/gh/dataobservatory-eu/wbdataset/graph/badge.svg)](https://app.codecov.io/gh/dataobservatory-eu/wbdataset) diff --git a/README.md b/README.md index c3472de..46c5129 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) [![CRAN status](https://www.r-pkg.org/badges/version/wbdataset)](https://CRAN.R-project.org/package=wbdataset) -[![devel-version](https://img.shields.io/badge/devel%20version-0.1.1014-blue.svg)](https://github.com/dataobservatory-eu/wbdataset) +[![devel-version](https://img.shields.io/badge/devel%20version-0.1.1016-blue.svg)](https://github.com/dataobservatory-eu/wbdataset) \[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.13972192.svg) [![Codecov test coverage](https://codecov.io/gh/dataobservatory-eu/wbdataset/graph/badge.svg)](https://app.codecov.io/gh/dataobservatory-eu/wbdataset) diff --git a/man/copy_wikidata_item.Rd b/man/copy_wikidata_item.Rd index f2b629f..102eccc 100644 --- a/man/copy_wikidata_item.Rd +++ b/man/copy_wikidata_item.Rd @@ -10,7 +10,7 @@ copy_wikidata_item( qid_on_wikidata = "Q4", qid_equivalence_property = "P35", - languages = c("en", "nl", "hu"), + language = c("en", "nl", "hu"), wikibase_api_url = "https://reprexbase.eu/jekyll/api.php", data_curator = NULL, log_path = tempdir(), @@ -20,7 +20,7 @@ copy_wikidata_item( copy_wikidata_items( qid_on_wikidata, qid_equivalence_property, - languages, + language, wikibase_api_url, data_curator, log_path, @@ -30,11 +30,12 @@ copy_wikidata_items( copy_wikidata_properties( pid_on_wikidata, pid_equivalence_property, - languages, + language, wikibase_api_url, data_curator, log_path, - csrf + csrf, + wikibase_session = NULL ) } \arguments{ @@ -44,6 +45,9 @@ single valid QID or a vector of several QIDs.} \item{qid_equivalence_property}{The QID in Wikibase that records the equivalent Wikidata QID as an external ID.} +\item{language}{A vector of language codes, for example, \code{c("en", +"et")}.} + \item{wikibase_api_url}{For example, \code{'https://reprexbase.eu/demowiki/api.php'}.} @@ -56,8 +60,10 @@ creates the log file, created with \link[utils]{person}.} \item{csrf}{The CSRF token of your session, received with \code{\link{get_csrf}}.} -\item{language}{A vector of languages codes, for example, \code{c("en", -"et")}.} +\item{wikibase_session}{An optional list that contains any of the values of +parameters \code{qid_equivalence_property}, \code{language}, +\code{wikibase_api_url}, \code{data_curator},\code{log_path} and +\code{csrf} (for repeated use in a session.)} } \value{ Returns a dataset_df object. The columns are: diff --git a/man/copy_wikidata_property.Rd b/man/copy_wikidata_property.Rd index 356158b..9cf8a2e 100644 --- a/man/copy_wikidata_property.Rd +++ b/man/copy_wikidata_property.Rd @@ -7,11 +7,12 @@ copy_wikidata_property( pid_on_wikidata, pid_equivalence_property = "P2", - languages = c("en", "hu"), + language = c("en", "hu"), wikibase_api_url = "https://reprexbase.eu/jekyll/api.php", data_curator = NULL, log_path = tempdir(), - csrf + csrf, + wikibase_session = NULL ) } \arguments{ @@ -22,6 +23,9 @@ non-authenticated sources, this should be changed.)} \item{pid_equivalence_property}{The PID in Wikibase that records the equivalent Wikidata PID as an external ID.} +\item{language}{A vector of language codes, for example, \code{c("en", +"et")}.} + \item{wikibase_api_url}{For example, \code{'https://reprexbase.eu/demowiki/api.php'}.} @@ -34,8 +38,10 @@ creates the log file, created with \link[utils]{person}.} \item{csrf}{The CSRF token of your session, received with \code{\link{get_csrf}}.} -\item{language}{A vector of languages codes, for example, \code{c("en", -"et")}.} +\item{wikibase_session}{An optional list that contains any of the values of +parameters \code{qid_equivalence_property}, \code{language}, +\code{wikibase_api_url}, \code{data_curator},\code{log_path} and +\code{csrf} (for repeated use in a session.)} } \value{ Returns a dataset_df object. The columns are: diff --git a/man/create_item.Rd b/man/create_item.Rd index 85ca447..c0d1850 100644 --- a/man/create_item.Rd +++ b/man/create_item.Rd @@ -13,7 +13,8 @@ create_item( wikibase_api_url, data_curator = NULL, log_path = tempdir(), - csrf + csrf, + wikibase_session ) } \arguments{ @@ -44,6 +45,11 @@ creates the log file, created with \link[utils]{person}.} \item{csrf}{The CSRF token of your session, received with \code{\link{get_csrf}}.} + +\item{wikibase_session}{An optional list that contains any of the values of +\code{language}, +\code{wikibase_api_url}, \code{data_curator},\code{log_path} and +\code{csrf} (for repeated use in a session.)} } \value{ Currently returns a data.frame, this should be a dataset. The columns diff --git a/man/create_property.Rd b/man/create_property.Rd index c1dbf4b..938087e 100644 --- a/man/create_property.Rd +++ b/man/create_property.Rd @@ -14,7 +14,8 @@ create_property( wikibase_api_url, data_curator = NULL, log_path = tempdir(), - csrf + csrf, + wikibase_session = NULL ) } \arguments{ @@ -48,6 +49,11 @@ creates the log file, created with \link[utils]{person}.} \item{csrf}{The CSRF token of your session, received with \code{\link{get_csrf}}.} + +\item{wikibase_session}{An optional list that contains any of the values of +\code{language}, +\code{wikibase_api_url}, \code{data_curator},\code{log_path} and +\code{csrf} (for repeated use in a session.)} } \value{ Currently returns a data.frame, this should be a dataset. The columns diff --git a/tests/testthat/test-get_wikidata_item.R b/tests/testthat/test-get_wikidata_item.R index c203259..656ec3a 100644 --- a/tests/testthat/test-get_wikidata_item.R +++ b/tests/testthat/test-get_wikidata_item.R @@ -7,8 +7,10 @@ test_that("get_wikidata_item() works with single qid_on_wikidata", { test_that("get_wikidata_item() works with multiple qid_on_wikidata", { - test_df2 <- get_wikidata_item(qid_on_wikidata = c("Q228", "Q347"), - language = c("en", "nl")) + test_df2 <- get_wikidata_item( + qid_on_wikidata = c("Q228", "Q347"), + language = c("en", "nl") + ) expect_equal(as.character(test_df2$language), c("en", "nl", "en", "nl")) expect_equal(as.character(test_df2$label), c("Andorra", "Andorra", "Liechtenstein", "Liechtenstein")) expect_equal(as.character(test_df2$qid_on_wikidata), c("Q228", "Q228", "Q347", "Q347")) diff --git a/tests/testthat/test-left_join_column.R b/tests/testthat/test-left_join_column.R index 7541da7..181a45f 100644 --- a/tests/testthat/test-left_join_column.R +++ b/tests/testthat/test-left_join_column.R @@ -2,9 +2,11 @@ data("wikidata_countries_df") ds <- wikidata_countries_df property <- "P297" -add_one_col <- left_join_column(ds = wikidata_countries_df, - property = "P297", - silent = TRUE) +add_one_col <- left_join_column( + ds = wikidata_countries_df, + property = "P297", + silent = TRUE +) test_that("left_join_column() works", { expect_equal(add_one_col$P297, c("DK", "LI", NA_character_, "XK")) @@ -18,6 +20,6 @@ test_that("left_join_column() works", { namespace = "https://www.geonames.org/", silent = TRUE ) - expect_equal(names(add_2nd_col), c("qid", "label", "description", "language","rowid", "P297", "P1566")) + expect_equal(names(add_2nd_col), c("qid", "label", "description", "language", "rowid", "P297", "P1566")) expect_equal(attr(add_2nd_col$P1566, "namespace"), "https://www.geonames.org/") }) diff --git a/vignettes/start.Rmd b/vignettes/start.Rmd index 2d5c5b1..ee97da9 100644 --- a/vignettes/start.Rmd +++ b/vignettes/start.Rmd @@ -145,7 +145,7 @@ capital_cities <- copy_wikidata_property( wikibase_api_url = "https://reprexbase.eu/jekyll/api.php", data_curator = person("Joe", "Doe"), log_path = tempdir(), - csrf=my_csrf + csrf =m y_csrf ) capital_cities @@ -203,9 +203,11 @@ The `ric_equivalence_pid$id_on_target` saves the new property identifier in your ```{r, echo=FALSE} message("Successfully created item P44 (RiC equivalent property)") ``` + You will get a message on the screen, and a logfile that records the newly created item. The contents of the logfile are also returned, so you can save them. In this case, it is saved in `ric_equivalence_pid`. + ```{r create-propery2, eval=FALSE} # This code will not run as expected if the instance at # https://reprexbase.eu/jekyll/api.php already has a property labelled @@ -227,6 +229,28 @@ p107 <- create_property( message("Successfully created item P47 (has current or former member (is current or former member of))") ``` +For repeated queries, you can place the repeating variables into a list, and +provide this parameters as a list with `wikibase_session`=`my_wikibase_session`. + +```{r wikibasesession} +my_wikibase_session <- list( + language = c("en", "nl"), + wikibase_api_url = "https://reprexbase.eu/jekyll/api.php", + log_path = tempdir(), + csrf = my_csrf +) +``` +```{r wikibasesessionp, eval=FALSE} +p107 <- create_property( + label = "has current or former member (is current or former member of)", + description = "This property relates an E39 Actor to the E74 Group of which that E39 Actor is a member.", + equivalence_property = "P37", # a property connecting your PIDs to CIDOC + equivalence_id = "P107", # the number of the property definition in CIDOC + datatype = "external-id", + wikibase_session = my_wikibase_session +) +``` + You can copy many properties at once, too, but only with the same language, curator, wikibase_api_url parameters: ```{r mass-property-copy, eval=FALSE}