1
- # ' Extract identifying variables
2
- # '
3
- # ' This function extracts the identifying variables from a table by applying a
4
- # ' schema description to it.
5
- # ' @param schema [\code{character(1)}]\cr the (validated) schema description of
6
- # ' \code{input}.
7
- # ' @param input [\code{character(1)}]\cr table to reorganise.
8
- # ' @return a list per cluster with values of the identifying variables
9
- # ' @examples
10
- # ' input <- tabs2shift$clusters_nested
11
- # ' schema <- setCluster(id = "sublevel",
12
- # ' group = "territories", member = c(1, 1, 2),
13
- # ' left = 1, top = c(3, 8, 15)) %>%
14
- # ' setIDVar(name = "territories", columns = 1, rows = c(2, 14)) %>%
15
- # ' setIDVar(name = "sublevel", columns = 1, rows = c(3, 8, 15)) %>%
16
- # ' setIDVar(name = "year", columns = 7) %>%
17
- # ' setIDVar(name = "commodities", columns = 2) %>%
18
- # ' setObsVar(name = "harvested", columns = 5) %>%
19
- # ' setObsVar(name = "production", columns = 6)
20
- # '
21
- # ' validateSchema(schema = schema, input = input) %>%
22
- # ' getIDVars(input = input)
23
- # ' @importFrom checkmate assertTRUE
24
- # ' @importFrom tibble tibble
25
- # ' @importFrom purrr map set_names map_dfc
26
- # ' @importFrom dplyr row_number filter select
27
- # ' @importFrom tidyr extract unite fill
28
- # ' @importFrom tidyselect all_of
29
- # ' @export
30
-
31
- getIDVars <- function (schema = NULL , input = NULL ){
32
-
33
- assertTRUE(x = schema @ validated )
34
-
35
- clusters <- schema @ clusters
36
- nClusters <- max(lengths(clusters ))
37
-
38
- variables <- schema @ variables
39
- filter <- schema @ filter
40
-
41
- idVars <- map(.x = seq_along(variables ), .f = function (ix ){
42
- # unselect those id variables that are also cluster or group id
43
- if (variables [[ix ]]$ vartype == " id" & ! names(variables )[ix ] %in% c(clusters $ id , clusters $ group )){
44
- variables [ix ]
45
- }
46
- })
47
- idVars <- unlist(idVars , recursive = FALSE )
48
-
49
- if (length(idVars ) != 0 ){
50
-
51
- out <- map(.x = 1 : nClusters , .f = function (ix ){
52
- vars <- NULL
53
- for (i in 1 : length(idVars )){
54
-
55
- tempVar <- idVars [[i ]]
56
- varRow <- clusters $ row [ix ]: (clusters $ row [ix ]+ clusters $ height [ix ] - 1 )
57
-
58
- if (! is.null(tempVar $ value )){
59
- temp <- tibble(X = tempVar $ value )
60
- } else {
61
-
62
- if (! is.null(tempVar $ row [ix ])){
63
- if (! tempVar $ dist ){
64
- # in case a row value is set, this means we deal with a variable that is not tidy ...
65
- temp <- input [tempVar $ row [ix ], tempVar $ col ]
66
- rowFilter <- NULL
67
- if (! is.null(filter $ col )){
68
- colFilter <- colnames(temp )[tempVar $ col %in% filter $ col ]
69
- } else {
70
- colFilter <- NULL
71
- }
72
- } else {
73
- # ... or distinct from clusters
74
- temp <- input [unique(tempVar $ row ), unique(tempVar $ col )]
75
- rowFilter <- NULL
76
- colFilter <- NULL
77
- }
78
- } else {
79
-
80
- if (! is.null(tempVar $ merge )){
81
- temp <- input [varRow , tempVar $ col ]
82
- rowFilter <- filter $ row
83
- colFilter <- NULL
84
- } else {
85
- temp <- input [varRow , tempVar $ col [ix ]]
86
- rowFilter <- which(varRow %in% filter $ row )
87
- colFilter <- NULL
88
- }
89
-
90
- }
91
-
92
- # apply a row filter ...
93
- if (! is.null(rowFilter )){
94
- temp <- temp %> %
95
- filter(row_number() %in% rowFilter )
96
- }
97
-
98
- # ... and column filter
99
- if (! is.null(colFilter )){
100
- temp <- temp %> %
101
- select(all_of(colFilter ))
102
- }
103
-
104
- # copy missing values downwards
105
- if (anyNA(temp [1 ])){
106
- message(" filling NA-values in variable '" , names(idVars [i ])," '." )
107
- temp <- temp %> %
108
- fill(1 , .direction = " down" )
109
- }
110
-
111
- # split ...
112
- if (! is.null(tempVar $ split )){
113
- # need to distinguish between one and several columns
114
- if (dim(temp )[2 ] == 1 ){
115
- temp <- temp %> %
116
- extract(col = 1 , into = names(temp ), regex = tempVar $ split )
117
- } else {
118
- temp <- map(.x = seq_along(temp ), .f = function (iy ){
119
- temp %> %
120
- select(all_of(iy )) %> %
121
- tidyr :: extract(col = 1 , into = names(temp )[iy ], regex = tempVar $ split )
122
- }) %> % bind_cols(.name_repair = " check_unique" )
123
- }
124
- }
125
-
126
- # ... or merge the variable
127
- if (! is.null(tempVar $ merge )){
128
- newName <- paste0(names(temp ), collapse = tempVar $ merge )
129
- temp <- temp %> %
130
- unite(col = !! newName , sep = tempVar $ merge )
131
- }
132
-
133
- }
134
-
135
- vars <- c(vars , set_names(x = list (temp ), nm = names(idVars )[i ]))
136
-
137
- }
138
- return (vars )
139
-
140
- })
141
-
142
-
143
-
144
- } else {
145
- out <- NULL
146
- }
147
-
148
- return (out )
149
-
1
+ # ' Extract identifying variables
2
+ # '
3
+ # ' This function extracts the identifying variables from a table by applying a
4
+ # ' schema description to it.
5
+ # ' @param schema [\code{character(1)}]\cr the (validated) schema description of
6
+ # ' \code{input}.
7
+ # ' @param input [\code{character(1)}]\cr table to reorganise.
8
+ # ' @return a list per cluster with values of the identifying variables
9
+ # ' @examples
10
+ # ' input <- tabs2shift$clusters_nested
11
+ # ' schema <- setCluster(id = "sublevel",
12
+ # ' group = "territories", member = c(1, 1, 2),
13
+ # ' left = 1, top = c(3, 8, 15)) %>%
14
+ # ' setIDVar(name = "territories", columns = 1, rows = c(2, 14)) %>%
15
+ # ' setIDVar(name = "sublevel", columns = 1, rows = c(3, 8, 15)) %>%
16
+ # ' setIDVar(name = "year", columns = 7) %>%
17
+ # ' setIDVar(name = "commodities", columns = 2) %>%
18
+ # ' setObsVar(name = "harvested", columns = 5) %>%
19
+ # ' setObsVar(name = "production", columns = 6)
20
+ # '
21
+ # ' validateSchema(schema = schema, input = input) %>%
22
+ # ' getIDVars(input = input)
23
+ # ' @importFrom checkmate assertTRUE
24
+ # ' @importFrom tibble tibble
25
+ # ' @importFrom purrr map set_names map_dfc
26
+ # ' @importFrom dplyr row_number filter select
27
+ # ' @importFrom tidyr extract unite fill
28
+ # ' @importFrom tidyselect all_of
29
+ # ' @export
30
+
31
+ getIDVars <- function (schema = NULL , input = NULL ){
32
+
33
+ assertTRUE(x = schema @ validated )
34
+
35
+ clusters <- schema @ clusters
36
+ nClusters <- max(lengths(clusters ))
37
+
38
+ variables <- schema @ variables
39
+ filter <- schema @ filter
40
+
41
+ idVars <- map(.x = seq_along(variables ), .f = function (ix ){
42
+ # unselect those id variables that are also cluster or group id
43
+ if (variables [[ix ]]$ vartype == " id" & ! names(variables )[ix ] %in% c(clusters $ id , clusters $ group )){
44
+ variables [ix ]
45
+ }
46
+ })
47
+ idVars <- unlist(idVars , recursive = FALSE )
48
+
49
+ if (length(idVars ) != 0 ){
50
+
51
+ out <- map(.x = 1 : nClusters , .f = function (ix ){
52
+ vars <- NULL
53
+ for (i in 1 : length(idVars )){
54
+
55
+ tempVar <- idVars [[i ]]
56
+ varRow <- clusters $ row [ix ]: (clusters $ row [ix ]+ clusters $ height [ix ] - 1 )
57
+
58
+ if (! is.null(tempVar $ value )){
59
+ temp <- tibble(X = tempVar $ value )
60
+ } else {
61
+
62
+ if (! is.null(tempVar $ row [ix ])){
63
+ if (! tempVar $ dist ){
64
+ # in case a row value is set, this means we deal with a variable that is not long ...
65
+ temp <- input [tempVar $ row [ix ], unique( tempVar $ col ) ]
66
+ rowFilter <- NULL
67
+ if (! is.null(filter $ col )){
68
+ colFilter <- colnames(temp )[tempVar $ col %in% filter $ col ]
69
+ } else {
70
+ colFilter <- NULL
71
+ }
72
+ } else {
73
+ # ... or distinct from clusters
74
+ temp <- input [unique(tempVar $ row ), unique(tempVar $ col )]
75
+ rowFilter <- NULL
76
+ colFilter <- NULL
77
+ }
78
+ } else {
79
+
80
+ if (! is.null(tempVar $ merge )){
81
+ temp <- input [varRow , tempVar $ col ]
82
+ rowFilter <- filter $ row
83
+ colFilter <- NULL
84
+ } else {
85
+ temp <- input [varRow , tempVar $ col [ix ]]
86
+ rowFilter <- which(varRow %in% filter $ row )
87
+ colFilter <- NULL
88
+ }
89
+
90
+ }
91
+
92
+ # apply a row filter ...
93
+ if (! is.null(rowFilter )){
94
+ temp <- temp %> %
95
+ filter(row_number() %in% rowFilter )
96
+ }
97
+
98
+ # ... and column filter
99
+ if (! is.null(colFilter )){
100
+ temp <- temp %> %
101
+ select(all_of(colFilter ))
102
+ }
103
+
104
+ # copy missing values downwards
105
+ if (anyNA(temp [1 ])){
106
+ message(" filling NA-values in variable '" , names(idVars [i ])," '." )
107
+ temp <- temp %> %
108
+ fill(1 , .direction = " down" )
109
+ }
110
+
111
+ # split ...
112
+ if (! is.null(tempVar $ split )){
113
+ # need to distinguish between one and several columns
114
+ if (dim(temp )[2 ] == 1 ){
115
+ temp <- temp %> %
116
+ extract(col = 1 , into = names(temp ), regex = tempVar $ split )
117
+ } else {
118
+ temp <- map(.x = seq_along(temp ), .f = function (iy ){
119
+ temp %> %
120
+ select(all_of(iy )) %> %
121
+ tidyr :: extract(col = 1 , into = names(temp )[iy ], regex = tempVar $ split )
122
+ }) %> % bind_cols(.name_repair = " check_unique" )
123
+ }
124
+ }
125
+
126
+ # ... or merge the variable
127
+ if (! is.null(tempVar $ merge )){
128
+ newName <- paste0(names(temp ), collapse = tempVar $ merge )
129
+ temp <- temp %> %
130
+ unite(col = !! newName , sep = tempVar $ merge )
131
+ }
132
+
133
+ }
134
+
135
+ vars <- c(vars , set_names(x = list (temp ), nm = names(idVars )[i ]))
136
+
137
+ }
138
+ return (vars )
139
+
140
+ })
141
+
142
+
143
+
144
+ } else {
145
+ out <- NULL
146
+ }
147
+
148
+ return (out )
149
+
150
150
}
0 commit comments