-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtargets.R
279 lines (274 loc) · 9.88 KB
/
targets.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
#' Generate a set of targets for pre-processing of iQuizoo data
#'
#' This target factory prepares a set of target objects used to fetch data from
#' iQuizoo database, separated into static branches so that each is for a
#' specific project and task/game combination. Further pre-processing on the
#' fetched data can also be added if requested.
#'
#' @param params A [data.frame] or [list] contains the parameters to be bound to
#' the query. Default templates require specifying `organization_name` and
#' `project_name`, in that order. If `contents` template is specified without
#' any parameters, set it as empty vector or `NULL`. If `contents` argument is
#' specified, this argument is omitted.
#' @param ... For future usage. Should be empty.
#' @param contents The contents structure used as the configuration of data
#' fetching. It is typically automatically fetched from database based on the
#' `contents` template in `templates`. If not `NULL`, it will be used directly
#' and ignore that specified in `templates`. Note `contents` should at least
#' contains `project_id` and `game_id` names.
#' @param what What to fetch. There are basically two types of data, i.e., raw
#' data and scores. The former is the logged raw data for each trial of the
#' tasks/games, while the latter is the scores calculated by iQuizoo server.
#' If set as "all", both raw data and scores will be fetched. Further actions
#' on the fetched raw data can be specified by `action_raw_data`.
#' @param action_raw_data The action to be taken on the fetched raw data. There
#' are two consecutive actions, i.e., wrangling and pre-processing. The former
#' will parse the raw data into a tidy format, while the latter will calculate
#' indices based on the parsed data. If set as "all", both wrangling and
#' pre-processing will be done. If set as "parse", only wrangling will be
#' done. If set as "none", neither will be done. If `what` is "scores", this
#' argument will be ignored.
#' @param combine Specify which targets to be combined. Note you should only
#' specify names from `c("scores", "raw_data", "raw_data_parsed",
#' "indices")`. If `NULL`, none will be combined.
#' @param templates The SQL template files used to fetch data. See
#' [setup_templates()] for details.
#' @param check_progress Whether to check the progress hash. Set it as `FALSE`
#' if the project is finalized.
#' @return A list of target objects.
#' @export
tar_prep_iquizoo <- function(params, ...,
contents = NULL,
what = c("raw_data", "scores"),
action_raw_data = c("all", "parse", "none"),
combine = NULL,
templates = setup_templates(),
check_progress = TRUE) {
check_dots_empty()
if (!inherits(templates, "tarflow.template")) {
cli::cli_abort(
"{.arg templates} must be created by {.fun setup_templates}.",
class = "tarflow_bad_templates"
)
}
what <- match.arg(what, several.ok = TRUE)
action_raw_data <- match.arg(action_raw_data)
if (!is.null(combine) && !all(combine %in% objects())) {
cli::cli_abort(
"{.arg combine} must be a subset of {vctrs::vec_c({objects()})}.",
class = "tarflow_bad_combine"
)
}
if (is.null(contents)) {
contents <- fetch_iquizoo_mem()(
read_file(templates$contents),
params = unname(
if (!is_empty(params)) as.list(params)
)
)
}
if (nrow(contents) == 0) {
cli::cli_abort(
"No contents to fetch.",
class = "tarflow_bad_contents"
)
}
targets <- c(
targets::tar_target_raw(
"contents_origin",
expr(unserialize(!!serialize(contents, NULL)))
),
tar_prep_proj(contents, templates, check_progress),
sapply(
what,
\(what) tar_fetch_data(contents, templates, what, check_progress),
simplify = FALSE
),
if ("raw_data" %in% what && action_raw_data != "none") {
tar_prep_raw(contents, action_raw_data)
}
)
c(
targets,
lapply(
intersect(combine, names(targets)),
\(name) {
tarchetypes::tar_combine_raw(
name,
targets[[name]]
)
}
)
)
}
#' Generate a set of targets for preparing project-level data
#'
#' There are mainly two types of data to be fetched, i.e., the progress hash and
#' the user information. The former is used to check the progress of the
#' project, while the latter is used to identify the users involved in the
#' project.
#'
#' @param contents The contents structure used as the configuration of data
#' fetching.
#' @param templates The SQL template files used to fetch data. See
#' [setup_templates()] for details.
#' @param check_progress Whether to check the progress hash. When set as `TRUE`,
#' a progress hash objects named as `progress_hash_{project_id}` for each
#' project will be added into the target list. Set it as `FALSE` if the
#' projects are finalized.
#' @return A list of target objects.
#' @export
tar_prep_proj <- function(contents,
templates = setup_templates(),
check_progress = TRUE) {
c(
if (check_progress) {
tarchetypes::tar_map(
data.frame(project_id = as.character(unique(contents$project_id))),
targets::tar_target_raw(
"progress_hash",
bquote(
fetch_iquizoo(
.(read_file(templates[["progress_hash"]])),
params = list(project_id)
)
),
packages = "tarflow.iquizoo",
cue = targets::tar_cue("always")
)
)
},
targets::tar_target_raw(
"users",
bquote(
fetch_iquizoo(
.(read_file(templates[["users"]])),
params = list(.(unique(contents$project_id)))
) |>
unique()
),
packages = "tarflow.iquizoo"
)
)
}
#' Generate a set of targets for fetching data
#'
#' This target factory is the main part of the `tar_prep_iquizoo` function. It
#' fetches the raw data and scores for each project and task/game combination.
#'
#' @param contents The contents structure used as the configuration of data
#' fetching.
#' @param templates The SQL template files used to fetch data. See
#' [setup_templates()] for details.
#' @param what What to fetch.
#' @param check_progress Whether to check the progress hash. If set as `TRUE`,
#' Before fetching the data, the progress hash objects named as
#' `progress_hash_{project_id}` will be depended on, which are typically
#' generated by [tar_prep_proj()]. If the projects are finalized, set this
#' argument as `FALSE`.
#' @return A list of target objects.
#' @export
tar_fetch_data <- function(contents,
templates = setup_templates(),
what = c("raw_data", "scores"),
check_progress = TRUE) {
what <- match.arg(what)
game_ids <- unique(as.character(contents$game_id))
targets <- vector("list", length(game_ids))
names(targets) <- game_ids
for (game_id in game_ids) {
project_ids <- as.character(
contents$project_id[contents$game_id == game_id]
)
targets[[game_id]] <- targets::tar_target_raw(
paste0(what, "_", game_id),
as.call(c(
quote(`{`),
if (check_progress) {
bquote(
list(..(syms(paste0("progress_hash_", project_ids)))),
splice = TRUE
)
},
bquote(
do.call(
rbind,
.mapply(
fetch_data,
list(.(project_ids), .(game_id)),
MoreArgs = list(
what = .(what),
query = .(read_file(templates[[what]]))
)
)
)
)
)),
packages = "tarflow.iquizoo"
)
}
targets
}
#' Generate a set of targets for wrangling and pre-processing raw data
#'
#' This target factory is the main part of the `tar_prep_iquizoo` function. It
#' wrangles the raw data into a tidy format and calculates indices based on the
#' parsed data.
#'
#' @param contents The contents structure used as the configuration of data
#' fetching.
#' @param action_raw_data The action to be taken on the fetched raw data.
#' @param name_data The name of the raw data target.
#' @param name_parsed The name of the parsed data target.
#' @param name_indices The name of the indices target.
#' @return A list of target objects.
#' @export
tar_prep_raw <- function(contents,
action_raw_data = c("all", "parse", "none"),
name_data = "raw_data",
name_parsed = "raw_data_parsed",
name_indices = "indices") {
action_raw_data <- match.arg(action_raw_data)
if (action_raw_data == "all") action_raw_data <- c("parse", "preproc")
contents <- within(
unique(contents["game_id"]),
{
tar_data <- syms(sprintf("%s_%s", name_data, game_id))
tar_parsed <- syms(sprintf("%s_%s", name_parsed, game_id))
tar_indices <- syms(sprintf("%s_%s", name_indices, game_id))
}
)
list(
raw_data_parsed = if ("parse" %in% action_raw_data) {
tarchetypes::tar_eval(
targets::tar_target(
tar_parsed,
wrangle_data(tar_data),
packages = "preproc.iquizoo"
),
contents
)
},
indices = if ("preproc" %in% action_raw_data) {
tarchetypes::tar_eval(
targets::tar_target(
tar_indices,
preproc_data(tar_parsed, prep_fun, .input = input, .extra = extra),
packages = "preproc.iquizoo"
),
data.iquizoo::match_preproc(contents, type = "inner")
)
}
)
}
objects <- function() {
c("scores", "raw_data", "raw_data_parsed", "indices")
}
utils::globalVariables(
c(
"project_id", "game_id",
"tar_data", "tar_parsed", "tar_indices",
"wrangle_data", "preproc_data",
"prep_fun", "input", "extra"
)
)