1- # ' Extract identifying variables
2- # '
3- # ' This function extracts the identifying variables from a table by applying a
4- # ' schema description to it.
5- # ' @param schema [\code{character(1)}]\cr the (validated) schema description of
6- # ' \code{input}.
7- # ' @param input [\code{character(1)}]\cr table to reorganise.
8- # ' @return a list per cluster with values of the identifying variables
9- # ' @examples
10- # ' input <- tabs2shift$clusters_nested
11- # ' schema <- setCluster(id = "sublevel",
12- # ' group = "territories", member = c(1, 1, 2),
13- # ' left = 1, top = c(3, 8, 15)) %>%
14- # ' setIDVar(name = "territories", columns = 1, rows = c(2, 14)) %>%
15- # ' setIDVar(name = "sublevel", columns = 1, rows = c(3, 8, 15)) %>%
16- # ' setIDVar(name = "year", columns = 7) %>%
17- # ' setIDVar(name = "commodities", columns = 2) %>%
18- # ' setObsVar(name = "harvested", columns = 5) %>%
19- # ' setObsVar(name = "production", columns = 6)
20- # '
21- # ' validateSchema(schema = schema, input = input) %>%
22- # ' getIDVars(input = input)
23- # ' @importFrom checkmate assertTRUE
24- # ' @importFrom tibble tibble
25- # ' @importFrom purrr map set_names map_dfc
26- # ' @importFrom dplyr row_number filter select
27- # ' @importFrom tidyr extract unite fill
28- # ' @importFrom tidyselect all_of
29- # ' @export
30-
31- getIDVars <- function (schema = NULL , input = NULL ){
32-
33- assertTRUE(x = schema @ validated )
34-
35- clusters <- schema @ clusters
36- nClusters <- max(lengths(clusters ))
37-
38- variables <- schema @ variables
39- filter <- schema @ filter
40-
41- idVars <- map(.x = seq_along(variables ), .f = function (ix ){
42- # unselect those id variables that are also cluster or group id
43- if (variables [[ix ]]$ vartype == " id" & ! names(variables )[ix ] %in% c(clusters $ id , clusters $ group )){
44- variables [ix ]
45- }
46- })
47- idVars <- unlist(idVars , recursive = FALSE )
48-
49- if (length(idVars ) != 0 ){
50-
51- out <- map(.x = 1 : nClusters , .f = function (ix ){
52- vars <- NULL
53- for (i in 1 : length(idVars )){
54-
55- tempVar <- idVars [[i ]]
56- varRow <- clusters $ row [ix ]: (clusters $ row [ix ]+ clusters $ height [ix ] - 1 )
57-
58- if (! is.null(tempVar $ value )){
59- temp <- tibble(X = tempVar $ value )
60- } else {
61-
62- if (! is.null(tempVar $ row [ix ])){
63- if (! tempVar $ dist ){
64- # in case a row value is set, this means we deal with a variable that is not tidy ...
65- temp <- input [tempVar $ row [ix ], tempVar $ col ]
66- rowFilter <- NULL
67- if (! is.null(filter $ col )){
68- colFilter <- colnames(temp )[tempVar $ col %in% filter $ col ]
69- } else {
70- colFilter <- NULL
71- }
72- } else {
73- # ... or distinct from clusters
74- temp <- input [unique(tempVar $ row ), unique(tempVar $ col )]
75- rowFilter <- NULL
76- colFilter <- NULL
77- }
78- } else {
79-
80- if (! is.null(tempVar $ merge )){
81- temp <- input [varRow , tempVar $ col ]
82- rowFilter <- filter $ row
83- colFilter <- NULL
84- } else {
85- temp <- input [varRow , tempVar $ col [ix ]]
86- rowFilter <- which(varRow %in% filter $ row )
87- colFilter <- NULL
88- }
89-
90- }
91-
92- # apply a row filter ...
93- if (! is.null(rowFilter )){
94- temp <- temp %> %
95- filter(row_number() %in% rowFilter )
96- }
97-
98- # ... and column filter
99- if (! is.null(colFilter )){
100- temp <- temp %> %
101- select(all_of(colFilter ))
102- }
103-
104- # copy missing values downwards
105- if (anyNA(temp [1 ])){
106- message(" filling NA-values in variable '" , names(idVars [i ])," '." )
107- temp <- temp %> %
108- fill(1 , .direction = " down" )
109- }
110-
111- # split ...
112- if (! is.null(tempVar $ split )){
113- # need to distinguish between one and several columns
114- if (dim(temp )[2 ] == 1 ){
115- temp <- temp %> %
116- extract(col = 1 , into = names(temp ), regex = tempVar $ split )
117- } else {
118- temp <- map(.x = seq_along(temp ), .f = function (iy ){
119- temp %> %
120- select(all_of(iy )) %> %
121- tidyr :: extract(col = 1 , into = names(temp )[iy ], regex = tempVar $ split )
122- }) %> % bind_cols(.name_repair = " check_unique" )
123- }
124- }
125-
126- # ... or merge the variable
127- if (! is.null(tempVar $ merge )){
128- newName <- paste0(names(temp ), collapse = tempVar $ merge )
129- temp <- temp %> %
130- unite(col = !! newName , sep = tempVar $ merge )
131- }
132-
133- }
134-
135- vars <- c(vars , set_names(x = list (temp ), nm = names(idVars )[i ]))
136-
137- }
138- return (vars )
139-
140- })
141-
142-
143-
144- } else {
145- out <- NULL
146- }
147-
148- return (out )
149-
1+ # ' Extract identifying variables
2+ # '
3+ # ' This function extracts the identifying variables from a table by applying a
4+ # ' schema description to it.
5+ # ' @param schema [\code{character(1)}]\cr the (validated) schema description of
6+ # ' \code{input}.
7+ # ' @param input [\code{character(1)}]\cr table to reorganise.
8+ # ' @return a list per cluster with values of the identifying variables
9+ # ' @examples
10+ # ' input <- tabs2shift$clusters_nested
11+ # ' schema <- setCluster(id = "sublevel",
12+ # ' group = "territories", member = c(1, 1, 2),
13+ # ' left = 1, top = c(3, 8, 15)) %>%
14+ # ' setIDVar(name = "territories", columns = 1, rows = c(2, 14)) %>%
15+ # ' setIDVar(name = "sublevel", columns = 1, rows = c(3, 8, 15)) %>%
16+ # ' setIDVar(name = "year", columns = 7) %>%
17+ # ' setIDVar(name = "commodities", columns = 2) %>%
18+ # ' setObsVar(name = "harvested", columns = 5) %>%
19+ # ' setObsVar(name = "production", columns = 6)
20+ # '
21+ # ' validateSchema(schema = schema, input = input) %>%
22+ # ' getIDVars(input = input)
23+ # ' @importFrom checkmate assertTRUE
24+ # ' @importFrom tibble tibble
25+ # ' @importFrom purrr map set_names map_dfc
26+ # ' @importFrom dplyr row_number filter select
27+ # ' @importFrom tidyr extract unite fill
28+ # ' @importFrom tidyselect all_of
29+ # ' @export
30+
31+ getIDVars <- function (schema = NULL , input = NULL ){
32+
33+ assertTRUE(x = schema @ validated )
34+
35+ clusters <- schema @ clusters
36+ nClusters <- max(lengths(clusters ))
37+
38+ variables <- schema @ variables
39+ filter <- schema @ filter
40+
41+ idVars <- map(.x = seq_along(variables ), .f = function (ix ){
42+ # unselect those id variables that are also cluster or group id
43+ if (variables [[ix ]]$ vartype == " id" & ! names(variables )[ix ] %in% c(clusters $ id , clusters $ group )){
44+ variables [ix ]
45+ }
46+ })
47+ idVars <- unlist(idVars , recursive = FALSE )
48+
49+ if (length(idVars ) != 0 ){
50+
51+ out <- map(.x = 1 : nClusters , .f = function (ix ){
52+ vars <- NULL
53+ for (i in 1 : length(idVars )){
54+
55+ tempVar <- idVars [[i ]]
56+ varRow <- clusters $ row [ix ]: (clusters $ row [ix ]+ clusters $ height [ix ] - 1 )
57+
58+ if (! is.null(tempVar $ value )){
59+ temp <- tibble(X = tempVar $ value )
60+ } else {
61+
62+ if (! is.null(tempVar $ row [ix ])){
63+ if (! tempVar $ dist ){
64+ # in case a row value is set, this means we deal with a variable that is not long ...
65+ temp <- input [tempVar $ row [ix ], unique( tempVar $ col ) ]
66+ rowFilter <- NULL
67+ if (! is.null(filter $ col )){
68+ colFilter <- colnames(temp )[tempVar $ col %in% filter $ col ]
69+ } else {
70+ colFilter <- NULL
71+ }
72+ } else {
73+ # ... or distinct from clusters
74+ temp <- input [unique(tempVar $ row ), unique(tempVar $ col )]
75+ rowFilter <- NULL
76+ colFilter <- NULL
77+ }
78+ } else {
79+
80+ if (! is.null(tempVar $ merge )){
81+ temp <- input [varRow , tempVar $ col ]
82+ rowFilter <- filter $ row
83+ colFilter <- NULL
84+ } else {
85+ temp <- input [varRow , tempVar $ col [ix ]]
86+ rowFilter <- which(varRow %in% filter $ row )
87+ colFilter <- NULL
88+ }
89+
90+ }
91+
92+ # apply a row filter ...
93+ if (! is.null(rowFilter )){
94+ temp <- temp %> %
95+ filter(row_number() %in% rowFilter )
96+ }
97+
98+ # ... and column filter
99+ if (! is.null(colFilter )){
100+ temp <- temp %> %
101+ select(all_of(colFilter ))
102+ }
103+
104+ # copy missing values downwards
105+ if (anyNA(temp [1 ])){
106+ message(" filling NA-values in variable '" , names(idVars [i ])," '." )
107+ temp <- temp %> %
108+ fill(1 , .direction = " down" )
109+ }
110+
111+ # split ...
112+ if (! is.null(tempVar $ split )){
113+ # need to distinguish between one and several columns
114+ if (dim(temp )[2 ] == 1 ){
115+ temp <- temp %> %
116+ extract(col = 1 , into = names(temp ), regex = tempVar $ split )
117+ } else {
118+ temp <- map(.x = seq_along(temp ), .f = function (iy ){
119+ temp %> %
120+ select(all_of(iy )) %> %
121+ tidyr :: extract(col = 1 , into = names(temp )[iy ], regex = tempVar $ split )
122+ }) %> % bind_cols(.name_repair = " check_unique" )
123+ }
124+ }
125+
126+ # ... or merge the variable
127+ if (! is.null(tempVar $ merge )){
128+ newName <- paste0(names(temp ), collapse = tempVar $ merge )
129+ temp <- temp %> %
130+ unite(col = !! newName , sep = tempVar $ merge )
131+ }
132+
133+ }
134+
135+ vars <- c(vars , set_names(x = list (temp ), nm = names(idVars )[i ]))
136+
137+ }
138+ return (vars )
139+
140+ })
141+
142+
143+
144+ } else {
145+ out <- NULL
146+ }
147+
148+ return (out )
149+
150150}
0 commit comments