Skip to content

Commit dfb9f87

Browse files
committed
simplify
1 parent 3d3cc18 commit dfb9f87

File tree

3 files changed

+153
-162
lines changed

3 files changed

+153
-162
lines changed

R/getIDVars.R

+149-149
Original file line numberDiff line numberDiff line change
@@ -1,150 +1,150 @@
1-
#' Extract identifying variables
2-
#'
3-
#' This function extracts the identifying variables from a table by applying a
4-
#' schema description to it.
5-
#' @param schema [\code{character(1)}]\cr the (validated) schema description of
6-
#' \code{input}.
7-
#' @param input [\code{character(1)}]\cr table to reorganise.
8-
#' @return a list per cluster with values of the identifying variables
9-
#' @examples
10-
#' input <- tabs2shift$clusters_nested
11-
#' schema <- setCluster(id = "sublevel",
12-
#' group = "territories", member = c(1, 1, 2),
13-
#' left = 1, top = c(3, 8, 15)) %>%
14-
#' setIDVar(name = "territories", columns = 1, rows = c(2, 14)) %>%
15-
#' setIDVar(name = "sublevel", columns = 1, rows = c(3, 8, 15)) %>%
16-
#' setIDVar(name = "year", columns = 7) %>%
17-
#' setIDVar(name = "commodities", columns = 2) %>%
18-
#' setObsVar(name = "harvested", columns = 5) %>%
19-
#' setObsVar(name = "production", columns = 6)
20-
#'
21-
#' validateSchema(schema = schema, input = input) %>%
22-
#' getIDVars(input = input)
23-
#' @importFrom checkmate assertTRUE
24-
#' @importFrom tibble tibble
25-
#' @importFrom purrr map set_names map_dfc
26-
#' @importFrom dplyr row_number filter select
27-
#' @importFrom tidyr extract unite fill
28-
#' @importFrom tidyselect all_of
29-
#' @export
30-
31-
getIDVars <- function(schema = NULL, input = NULL){
32-
33-
assertTRUE(x = schema@validated)
34-
35-
clusters <- schema@clusters
36-
nClusters <- max(lengths(clusters))
37-
38-
variables <- schema@variables
39-
filter <- schema@filter
40-
41-
idVars <- map(.x = seq_along(variables), .f = function(ix){
42-
# unselect those id variables that are also cluster or group id
43-
if(variables[[ix]]$vartype == "id" & !names(variables)[ix] %in% c(clusters$id, clusters$group)){
44-
variables[ix]
45-
}
46-
})
47-
idVars <- unlist(idVars, recursive = FALSE)
48-
49-
if(length(idVars) != 0){
50-
51-
out <- map(.x = 1:nClusters, .f = function(ix){
52-
vars <- NULL
53-
for(i in 1:length(idVars)){
54-
55-
tempVar <- idVars[[i]]
56-
varRow <- clusters$row[ix]:(clusters$row[ix]+clusters$height[ix] - 1)
57-
58-
if(!is.null(tempVar$value)){
59-
temp <- tibble(X = tempVar$value)
60-
} else {
61-
62-
if(!is.null(tempVar$row[ix])){
63-
if(!tempVar$dist){
64-
# in case a row value is set, this means we deal with a variable that is not tidy ...
65-
temp <- input[tempVar$row[ix], tempVar$col]
66-
rowFilter <- NULL
67-
if(!is.null(filter$col)){
68-
colFilter <- colnames(temp)[tempVar$col %in% filter$col]
69-
} else {
70-
colFilter <- NULL
71-
}
72-
} else {
73-
# ... or distinct from clusters
74-
temp <- input[unique(tempVar$row), unique(tempVar$col)]
75-
rowFilter <- NULL
76-
colFilter <- NULL
77-
}
78-
} else {
79-
80-
if(!is.null(tempVar$merge)){
81-
temp <- input[varRow, tempVar$col]
82-
rowFilter <- filter$row
83-
colFilter <- NULL
84-
} else {
85-
temp <- input[varRow, tempVar$col[ix]]
86-
rowFilter <- which(varRow %in% filter$row)
87-
colFilter <- NULL
88-
}
89-
90-
}
91-
92-
# apply a row filter ...
93-
if(!is.null(rowFilter)){
94-
temp <- temp %>%
95-
filter(row_number() %in% rowFilter)
96-
}
97-
98-
# ... and column filter
99-
if(!is.null(colFilter)){
100-
temp <- temp %>%
101-
select(all_of(colFilter))
102-
}
103-
104-
# copy missing values downwards
105-
if(anyNA(temp[1])){
106-
message("filling NA-values in variable '", names(idVars[i]),"'.")
107-
temp <- temp %>%
108-
fill(1, .direction = "down")
109-
}
110-
111-
# split ...
112-
if(!is.null(tempVar$split)){
113-
# need to distinguish between one and several columns
114-
if(dim(temp)[2] == 1){
115-
temp <- temp %>%
116-
extract(col = 1, into = names(temp), regex = tempVar$split)
117-
} else {
118-
temp <- map(.x = seq_along(temp), .f = function(iy){
119-
temp %>%
120-
select(all_of(iy)) %>%
121-
tidyr::extract(col = 1, into = names(temp)[iy], regex = tempVar$split)
122-
}) %>% bind_cols(.name_repair = "check_unique")
123-
}
124-
}
125-
126-
# ... or merge the variable
127-
if(!is.null(tempVar$merge)){
128-
newName <- paste0(names(temp), collapse = tempVar$merge)
129-
temp <- temp %>%
130-
unite(col = !!newName, sep = tempVar$merge)
131-
}
132-
133-
}
134-
135-
vars <- c(vars, set_names(x = list(temp), nm = names(idVars)[i]))
136-
137-
}
138-
return(vars)
139-
140-
})
141-
142-
143-
144-
} else {
145-
out <- NULL
146-
}
147-
148-
return(out)
149-
1+
#' Extract identifying variables
2+
#'
3+
#' This function extracts the identifying variables from a table by applying a
4+
#' schema description to it.
5+
#' @param schema [\code{character(1)}]\cr the (validated) schema description of
6+
#' \code{input}.
7+
#' @param input [\code{character(1)}]\cr table to reorganise.
8+
#' @return a list per cluster with values of the identifying variables
9+
#' @examples
10+
#' input <- tabs2shift$clusters_nested
11+
#' schema <- setCluster(id = "sublevel",
12+
#' group = "territories", member = c(1, 1, 2),
13+
#' left = 1, top = c(3, 8, 15)) %>%
14+
#' setIDVar(name = "territories", columns = 1, rows = c(2, 14)) %>%
15+
#' setIDVar(name = "sublevel", columns = 1, rows = c(3, 8, 15)) %>%
16+
#' setIDVar(name = "year", columns = 7) %>%
17+
#' setIDVar(name = "commodities", columns = 2) %>%
18+
#' setObsVar(name = "harvested", columns = 5) %>%
19+
#' setObsVar(name = "production", columns = 6)
20+
#'
21+
#' validateSchema(schema = schema, input = input) %>%
22+
#' getIDVars(input = input)
23+
#' @importFrom checkmate assertTRUE
24+
#' @importFrom tibble tibble
25+
#' @importFrom purrr map set_names map_dfc
26+
#' @importFrom dplyr row_number filter select
27+
#' @importFrom tidyr extract unite fill
28+
#' @importFrom tidyselect all_of
29+
#' @export
30+
31+
getIDVars <- function(schema = NULL, input = NULL){
32+
33+
assertTRUE(x = schema@validated)
34+
35+
clusters <- schema@clusters
36+
nClusters <- max(lengths(clusters))
37+
38+
variables <- schema@variables
39+
filter <- schema@filter
40+
41+
idVars <- map(.x = seq_along(variables), .f = function(ix){
42+
# unselect those id variables that are also cluster or group id
43+
if(variables[[ix]]$vartype == "id" & !names(variables)[ix] %in% c(clusters$id, clusters$group)){
44+
variables[ix]
45+
}
46+
})
47+
idVars <- unlist(idVars, recursive = FALSE)
48+
49+
if(length(idVars) != 0){
50+
51+
out <- map(.x = 1:nClusters, .f = function(ix){
52+
vars <- NULL
53+
for(i in 1:length(idVars)){
54+
55+
tempVar <- idVars[[i]]
56+
varRow <- clusters$row[ix]:(clusters$row[ix]+clusters$height[ix] - 1)
57+
58+
if(!is.null(tempVar$value)){
59+
temp <- tibble(X = tempVar$value)
60+
} else {
61+
62+
if(!is.null(tempVar$row[ix])){
63+
if(!tempVar$dist){
64+
# in case a row value is set, this means we deal with a variable that is not long ...
65+
temp <- input[tempVar$row[ix], unique(tempVar$col)]
66+
rowFilter <- NULL
67+
if(!is.null(filter$col)){
68+
colFilter <- colnames(temp)[tempVar$col %in% filter$col]
69+
} else {
70+
colFilter <- NULL
71+
}
72+
} else {
73+
# ... or distinct from clusters
74+
temp <- input[unique(tempVar$row), unique(tempVar$col)]
75+
rowFilter <- NULL
76+
colFilter <- NULL
77+
}
78+
} else {
79+
80+
if(!is.null(tempVar$merge)){
81+
temp <- input[varRow, tempVar$col]
82+
rowFilter <- filter$row
83+
colFilter <- NULL
84+
} else {
85+
temp <- input[varRow, tempVar$col[ix]]
86+
rowFilter <- which(varRow %in% filter$row)
87+
colFilter <- NULL
88+
}
89+
90+
}
91+
92+
# apply a row filter ...
93+
if(!is.null(rowFilter)){
94+
temp <- temp %>%
95+
filter(row_number() %in% rowFilter)
96+
}
97+
98+
# ... and column filter
99+
if(!is.null(colFilter)){
100+
temp <- temp %>%
101+
select(all_of(colFilter))
102+
}
103+
104+
# copy missing values downwards
105+
if(anyNA(temp[1])){
106+
message("filling NA-values in variable '", names(idVars[i]),"'.")
107+
temp <- temp %>%
108+
fill(1, .direction = "down")
109+
}
110+
111+
# split ...
112+
if(!is.null(tempVar$split)){
113+
# need to distinguish between one and several columns
114+
if(dim(temp)[2] == 1){
115+
temp <- temp %>%
116+
extract(col = 1, into = names(temp), regex = tempVar$split)
117+
} else {
118+
temp <- map(.x = seq_along(temp), .f = function(iy){
119+
temp %>%
120+
select(all_of(iy)) %>%
121+
tidyr::extract(col = 1, into = names(temp)[iy], regex = tempVar$split)
122+
}) %>% bind_cols(.name_repair = "check_unique")
123+
}
124+
}
125+
126+
# ... or merge the variable
127+
if(!is.null(tempVar$merge)){
128+
newName <- paste0(names(temp), collapse = tempVar$merge)
129+
temp <- temp %>%
130+
unite(col = !!newName, sep = tempVar$merge)
131+
}
132+
133+
}
134+
135+
vars <- c(vars, set_names(x = list(temp), nm = names(idVars)[i]))
136+
137+
}
138+
return(vars)
139+
140+
})
141+
142+
143+
144+
} else {
145+
out <- NULL
146+
}
147+
148+
return(out)
149+
150150
}

R/getObsVars.R

+3-12
Original file line numberDiff line numberDiff line change
@@ -106,25 +106,16 @@ getObsVars <- function(schema = NULL, input = NULL){
106106
varRows <- clusters$row[ix]:(clusters$row[ix]+clusters$height[ix] - 1)
107107

108108
if(!is.null(tempVar$key)){
109+
temp <- input[varRows, unique(tempVar$col)]
110+
rowFilter <- which(varRows %in% filter$row)
111+
colFilter <- NULL
109112
if(tempVar$key == "cluster"){
110113
if(tempVar$value != ix){
111114
next
112115
}
113-
if(length(unique(tempVar$col)) == 1){
114-
temp <- input[varRows, tempVar$col[ix]]
115-
} else {
116-
temp <- input[varRows, tempVar$col]
117-
}
118-
rowFilter <- which(varRows %in% filter$row)
119-
colFilter <- NULL
120116
} else if(is.numeric(tempVar$key)){
121-
temp <- input[varRows, tempVar$col]
122117
if(!tempVar$key == 0){
123118
rowFilter <- NULL
124-
colFilter <- NULL
125-
} else {
126-
rowFilter <- which(varRows %in% filter$row)
127-
colFilter <- NULL
128119
}
129120
}
130121
} else {

tabshiftr.Rproj

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
Version: 1.0
2-
ProjectId: 934225e6-a5a1-4111-86d0-eb38315d6cde
2+
ProjectId: d51a0db2-6864-468a-9885-c223d07ca857
33

44
RestoreWorkspace: Default
55
SaveWorkspace: Default

0 commit comments

Comments
 (0)