forked from ropensci/patentsview
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcast-pv-data.R
128 lines (110 loc) · 3.54 KB
/
cast-pv-data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#' @noRd
as_is <- function(x) x
#' @noRd
get_cast_fun <- function(data_type) {
# Some fields aren't documented, so we don't know what their data type is. Use
# string type for these.
# new version of the API: state of string vs fulltext is in flux. Latter currently unused
if (length(data_type) != 1) data_type <- "string"
switch(data_type,
"string" = as_is,
"date" = as.Date,
"number" = as_is,
"integer" = as_is,
"int" = as.integer,
"fulltext" = as_is,
"boolean" = as_is,
"bool" = as.logical
)
}
#' @noRd
lookup_cast_fun <- function(name, typesdf) {
data_type <- typesdf[typesdf$common_name == name, "data_type"]
get_cast_fun(data_type = data_type)
}
#' @noRd
cast_one.character <- function(one, name, typesdf) {
cast_fun <- lookup_cast_fun(name, typesdf)
cast_fun(one)
}
#' @noRd
cast_one.double <- function(one, name, typesdf) {
cast_fun <- lookup_cast_fun(name, typesdf)
cast_fun(one)
}
#' @noRd
cast_one.integer <- function(one, name, typesdf) {
cast_fun <- lookup_cast_fun(name, typesdf)
cast_fun(one)
}
#' @noRd
cast_one.default <- function(one, name, typesdf) NA
#' @noRd
cast_one.list <- function(one, name, typesdf) {
first_df <- one[[1]]
cols <- colnames(first_df)
fun_list <- sapply(
cols, function(x) lookup_cast_fun(x, typesdf = typesdf),
USE.NAMES = TRUE, simplify = FALSE
)
# Iterate over all dataframes in the list of dataframes
lapply(one, function(df) {
# Kinda funky way to go about this, but I'm iterating over the columns in
# the dataframe, looking up the appropriate cast function for that column
# and casting it the vector as such, then binding these columns all back
# together with the call to as.data.frame shown below.
casted_lst <- lapply(cols, function(one_col_name) {
fun_list[[one_col_name]](df[[one_col_name]])
})
as.data.frame(casted_lst, stringsAsFactors = FALSE, col.names = cols)
})
}
#' @noRd
cast_one <- function(one, name, typesdf) UseMethod("cast_one")
#' Cast PatentsView data
#'
#' This will cast the data fields returned by \code{\link{search_pv}} so that
#' they have their most appropriate data types (e.g., date, numeric, etc.).
#'
#' @inheritParams unnest_pv_data
#'
#' @return The same type of object that you passed into \code{cast_pv_data}.
#'
#' @examples
#' \dontrun{
#'
#' fields <- c("patent_date", "patent_title", "patent_year")
#' res <- search_pv(query = "{\"patent_id\":\"5116621\"}", fields = fields)
#' cast_pv_data(data = res$data)
#' }
#'
#' @export
cast_pv_data <- function(data) {
validate_pv_data(data)
entity_name <- names(data)
if (entity_name == "rel_app_texts") {
# blend the fields from both rel_app_texts entities
typesdf <- unique(fieldsdf[fieldsdf$group == entity_name, c("common_name", "data_type")])
} else {
# need to get the endpoint from entity_name
endpoint_df <- fieldsdf[fieldsdf$group == entity_name, ]
endpoint <- unique(endpoint_df$endpoint)
# watch out here- several endpoints return entities that are groups returned
# by the patent and publication endpoints (attorneys, inventors, assignees)
if(length(endpoint) > 1) {
endpoint <- endpoint[!endpoint %in% c("patent", "publication")]
}
typesdf <- fieldsdf[fieldsdf$endpoint == endpoint, c("common_name", "data_type")]
}
df <- data[[1]]
list_out <- lapply2(colnames(df), function(x) {
cast_one(df[, x], name = x, typesdf = typesdf)
})
df[] <- list_out
out_data <- list(x = df)
names(out_data) <- entity_name
structure(
out_data,
class = c("list", "pv_data_result")
)
}