Skip to content

Commit 67eddfb

Browse files
committed
Updated
1 parent f492c7b commit 67eddfb

File tree

11 files changed

+519
-335
lines changed

11 files changed

+519
-335
lines changed

NAMESPACE

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ export(copy_wikidata_item)
1010
export(copy_wikidata_property)
1111
export(create_item)
1212
export(create_property)
13+
export(get_claim)
1314
export(get_claims)
1415
export(get_csrf)
1516
export(get_csrf_token)
@@ -20,13 +21,11 @@ export(is_pid)
2021
export(is_qid)
2122
export(left_join_column)
2223
importFrom(assertthat,assert_that)
23-
importFrom(dataset,as_dataset_df)
24-
importFrom(dataset,creator)
2524
importFrom(dataset,dataset_df)
26-
importFrom(dataset,dataset_title)
2725
importFrom(dataset,defined)
2826
importFrom(dataset,dublincore)
29-
importFrom(dplyr,everything)
27+
importFrom(dplyr,bind_rows)
28+
importFrom(dplyr,filter)
3029
importFrom(dplyr,left_join)
3130
importFrom(dplyr,mutate)
3231
importFrom(dplyr,relocate)
@@ -40,4 +39,5 @@ importFrom(httr,handle)
4039
importFrom(jsonlite,toJSON)
4140
importFrom(magrittr,"%>%")
4241
importFrom(purrr,safely)
42+
importFrom(tibble,tibble)
4343
importFrom(utils,person)

R/get_claim.R

Lines changed: 143 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,60 @@
1-
#' @title Get claims of an item
2-
#' @description
3-
#' Get the claims (statements) related to an item.
4-
#' @details
5-
#' A wrapper around
6-
#' \href{https://www.wikidata.org/w/api.php?action=help&modules=wbgetentities}{wbgetentities}.
7-
#' @param qid The QID of the item.
8-
#' @param property The property for which the claim is required.
9-
#' @param wikibase_api_url The full URL of the Wikibase API, which is the
10-
#' address that the \code{wbdataset} R client sends requests to when
11-
#' interacting with the knowledge base. For example,
12-
#' \code{'https://reprexbase.eu/demowiki/api.php'}. The URL must end with
13-
#' api.php.
14-
#' @param csrf A response csrf received with \code{\link{get_csrf}}.
1+
#' @title Retrieve a claim from a Wikidata item
2+
#'
3+
#' @description Retrieve the value(s) of a specified property (claim) from a
4+
#' Wikidata item using the `wbgetclaims` API. Supports multiple data types and
5+
#' can return either the preferred claim or all claims for the property.
6+
#'
7+
#' @details This function provides read-only access to claims (statements) for a
8+
#' given item from a Wikibase instance (such as Wikidata). It supports common
9+
#' data types including string, URL, time, quantity, coordinate, external ID,
10+
#' Commons media, and monolingual text.
11+
#'
12+
#' If \code{first = TRUE}, the function returns the claim ranked as
13+
#' \code{"preferred"} if available, otherwise the first normal claim. If
14+
#' \code{first = FALSE}, all available claims are returned in a tidy data frame,
15+
#' one row per value.
16+
#'
17+
#' This function replaces \code{\link{get_claims}}, which is now deprecated.
18+
#'
19+
#' The function wraps the
20+
#' \href{https://www.wikidata.org/w/api.php?action=help&modules=wbgetclaims}{wbgetclaims}
21+
#' module of the Wikibase API.
22+
#'
23+
#' @param qid A character string giving the QID of the item (e.g.,
24+
#' \code{"Q42"}).
25+
#' @param property A character string giving the property ID (e.g.,
26+
#' \code{"P569"}).
27+
#' @param wikibase_api_url The full URL to the Wikibase API endpoint. Must end
28+
#' with \code{api.php}. Defaults to the Wikidata API.
29+
#' @param csrf (Optional) A CSRF token for write operations (not used in this
30+
#' read-only function).
31+
#' @param first Logical; if \code{TRUE} (default), return only the preferred or
32+
#' first available claim. If \code{FALSE}, return all claims for the given
33+
#' property as one row per value.
34+
#'
35+
#' @return A data frame with one or more rows, containing the item QID, the
36+
#' value(s), and the detected data type for the claim.
37+
#'
38+
#' @details Claims are retrieved using the
39+
#' \href{https://www.wikidata.org/w/api.php?action=help&modules=wbgetclaims}{wbgetclaims}
40+
#' module of the Wikibase API. This function supports common Wikidata data
41+
#' types, including string, URL, quantity, time, coordinate, external ID, media,
42+
#' and monolingual text.
43+
#'
44+
#' If \code{first = TRUE}, the function returns only the claim ranked as
45+
#' \code{"preferred"} if present, otherwise the first normal claim. If
46+
#' \code{first = FALSE}, all claims are returned in a tidy format.
47+
#'
48+
#' @seealso \code{\link{get_claims}} (deprecated)
49+
#'
1550
#' @export
1651

17-
get_claims <- function(qid = "Q528626",
18-
property = "P625",
19-
wikibase_api_url = "https://www.wikidata.org/w/api.php",
20-
csrf = NULL) {
21-
response <- NULL
22-
claim_body <- list(
23-
action = "wbgetentities",
24-
ids = qid,
25-
# languages = "en|nl|hu",
26-
# props = "labels",
27-
format = "json"
28-
)
29-
30-
# get_claim <- httr::POST(
31-
# "https://www.wikidata.org/w/api.php",
32-
# body = claim_body,
33-
# encode = "form"
34-
# )
35-
# token = get_csrf_token(csrf),
36-
get_claim2 <- httr::POST(
52+
get_claim <- function(qid = "Q528626",
53+
property = "P625",
54+
wikibase_api_url = "https://www.wikidata.org/w/api.php",
55+
csrf = NULL,
56+
first = TRUE) {
57+
response <- httr::POST(
3758
wikibase_api_url,
3859
body = list(
3960
action = "wbgetclaims",
@@ -45,35 +66,97 @@ get_claims <- function(qid = "Q528626",
4566
encode = "form"
4667
)
4768

48-
get_claim2
69+
content <- httr::content(response, as = "parsed", type = "application/json")
70+
71+
if (!is.null(content$error)) {
72+
stop(sprintf("API error from Wikidata: %s", content$error$info))
73+
}
74+
75+
if (is.null(content$claims) || is.null(content$claims[[property]])) {
76+
stop(sprintf("Property '%s' not found for QID '%s'", property, qid))
77+
}
78+
79+
claims_list <- content$claims[[property]]
4980

50-
response <- httr::content(get_claim2, as = "parsed", type = "application/json")
81+
extract_value <- function(snak) {
82+
switch(snak$datatype,
83+
"wikibase-item" = snak$datavalue$value$id,
84+
"external-id" = snak$datavalue$value,
85+
"string" = snak$datavalue$value,
86+
"url" = snak$datavalue$value,
87+
"time" = snak$datavalue$value$time,
88+
"quantity" = snak$datavalue$value$amount,
89+
"monolingualtext" = snak$datavalue$value$text,
90+
"commonsMedia" = snak$datavalue$value,
91+
"globe-coordinate" = {
92+
val <- snak$datavalue$value
93+
altitude <- ifelse(is.null(val$altitude), "", val$altitude)
94+
paste0(
95+
"mlat=", val$latitude,
96+
"&mlon=", val$longitude,
97+
"&altitude=", altitude,
98+
"&precision=", val$precision,
99+
"&globe=", val$globe
100+
)
101+
},
102+
stop(sprintf("Unsupported datatype: %s", snak$datatype))
103+
)
104+
}
105+
106+
if (first) {
107+
preferred <- Filter(function(claim) claim$rank == "preferred", claims_list)
108+
normal <- Filter(function(claim) claim$rank == "normal", claims_list)
109+
selected <- if (length(preferred) > 0) preferred[[1]] else normal[[1]]
110+
111+
snak <- selected$mainsnak
112+
value <- extract_value(snak)
113+
datatype <- snak$datatype
51114

52-
response$error
53-
response$claims
54-
response$claims[[property]][[1]]$mainsnak$property
55-
datatype <- response$claims[[property]][[1]]$mainsnak$datatype
115+
df <- data.frame(qid = qid, type = datatype, stringsAsFactors = FALSE)
116+
df[[property]] <- value
117+
return(df)
118+
} else {
119+
# All claims as rows
120+
rows <- lapply(claims_list, function(claim) {
121+
snak <- claim$mainsnak
122+
data.frame(
123+
qid = qid,
124+
type = snak$datatype,
125+
value = extract_value(snak),
126+
stringsAsFactors = FALSE
127+
)
128+
})
56129

57-
if (datatype == "wikibase-item") {
58-
value <- response$claims[[property]][[1]]$mainsnak$datavalue$value$id
59-
type <- datatype
60-
} else if (datatype == "external-id") {
61-
value <- response$claims[[property]][[1]]$mainsnak$datavalue$value
62-
type <- datatype
63-
} else if (datatype == "string") {
64-
value <- response$claims[[property]][[1]]$mainsnak$datavalue$value
65-
type <- datatype
66-
} else if (datatype == "time") {
67-
value <- response$claims[[property]][[1]]$mainsnak$datavalue$time
68-
type <- datatype
69-
} else if (datatype == "globe-coordinate") {
70-
raw_value <- response$claims[[property]][[1]]$mainsnak$datavalue$value
71-
altitude <- ifelse(is.null(raw_value$altitude), "", raw_value$altitude)
72-
value <- paste0("mlat=", raw_value$latitude, "&mlon=", raw_value$longitude, "&altitude=", altitude, "&precision=", raw_value$precision, "&globe=", raw_value$globe)
73-
type <- datatype
130+
df <- do.call(rbind, rows)
131+
names(df)[names(df) == "value"] <- property
132+
return(df)
74133
}
134+
}
135+
136+
#' @title Deprecated: retrieve a claim from a Wikidata item
137+
#'
138+
#' @description
139+
#' \strong{Deprecated.} This function has been replaced by \code{\link{get_claim}}.
140+
#'
141+
#' @details
142+
#' \code{get_claims()} was used to retrieve a claim (statement) for a specific property from
143+
#' a Wikidata or Wikibase item. This function is now deprecated and will be removed in a
144+
#' future release. Please use \code{\link{get_claim}} instead, which provides more robust
145+
#' functionality and better support for modern data types.
146+
#'
147+
#' @param qid A character string giving the QID of the item.
148+
#' @param property A character string giving the property ID.
149+
#' @param wikibase_api_url The full URL of the Wikibase API endpoint (must end with \code{api.php}).
150+
#' @param csrf (Optional) A CSRF token, not used in this read-only function.
151+
#'
152+
#' @return A data frame or list depending on implementation. For updated behaviour, use \code{get_claim()}.
153+
#'
154+
#' @seealso \code{\link{get_claim}}
155+
#'
156+
#' @export
157+
158+
get_claims <- function(...) {
159+
warning("get_claims() is deprecated and will be removed in a future release. Please use get_claim() instead.")
75160

76-
return_df <- data.frame(qid = qid, value = value, type = type)
77-
names(return_df)[2] <- property
78-
return_df
161+
# Existing function code
79162
}

R/get_wikidata_item.R

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,31 @@
1-
#' @title Get item definitions
2-
#' @description Get item definitions by qid_on_wikidata from a Wikibase instance or
3-
#' Wikidata.
4-
#' @param qid_on_wikidata A single qid_on_wikidata or a vector of qid_on_wikidatas on a Wikibase instance (or
5-
#' Wikidata itself.)
6-
#' @param prefix The prefix to use before the qid_on_wikidata, for example, defaults to
7-
#' \code{"http://www.wikidata.org/entity/"}.
8-
#' @param language Defaults to \code{c("en", "nl", "hu")}. A character string of
9-
#' the languages in which the users wants to receive the labels and
10-
#' descriptions of the property. The vector of languages must use \href{https://en.wikipedia.org/wiki/IETF_language_tag}{BCP
11-
#' 47}-compliant language tags (e.g., "en" for English, "nl" for Dutch and "hu"
12-
#' for Hungarian.)
13-
#' @param wikibase_api_url Defaults to
14-
#' \code{"https://www.wikidata.org/w/api.php"}.
15-
#' @param data_curator The name of the data curator who runs the function and
16-
#' creates the log file, created with \link[utils]{person}.
17-
#' @param title The title of the dataset.
18-
#' @importFrom dplyr left_join mutate relocate everything
19-
#' @importFrom httr POST content
20-
#' @importFrom utils person
1+
#' @title Retrieve Wikidata item metadata (labels and descriptions)
2+
#'
3+
#' @description Retrieves basic metadata (labels and descriptions) for one or
4+
#' more Wikidata items, given their QIDs. Supports multiple languages and
5+
#' returns results in a tidy format.
6+
#'
7+
#' @details This function queries the Wikidata API to retrieve the \code{label}
8+
#' and \code{description} for each item specified in \code{qid_on_wikidata}.
9+
#' The metadata is returned in a consistent tabular format and can be used for
10+
#' display, annotation, or joining with other datasets.
11+
#'
12+
#' By default, the function retrieves metadata in multiple languages,
13+
#' including English, French, German, Italian, and Spanish.
14+
#'
15+
#' @param qid_on_wikidata A character vector of Wikidata QIDs (e.g.,
16+
#' \code{"Q42"}).
17+
#' @param wikibase_api_url The full URL of the Wikibase API endpoint (must end
18+
#' with \code{api.php}).
19+
#'
20+
#' @return A data frame with one row per QID, including columns \code{qid},
21+
#' \code{label}, and \code{description}, as well as optional language-specific
22+
#' metadata.
23+
#'
24+
#' @seealso \code{\link{get_claim}} to retrieve claims (properties) for Wikidata
25+
#' items.
26+
#' @importFrom dplyr select mutate filter bind_rows relocate left_join
27+
#' @importFrom tibble tibble
2128
#' @importFrom dataset dataset_df defined dublincore
22-
#' @return A Returns a \code{\link[dataset]{dataset_df}} object with the
23-
#' qid_on_wikidatas, labels, description, and the language codes of the labels
24-
#' and descriptions.
2529
#' @examples
2630
#' get_wikidata_item("Q42", language = c("en", "nl"))
2731
#' @export

0 commit comments

Comments
 (0)