1
1
library(textrecipes )
2
2
library(recipes )
3
- data(grants , package = " modeldata" )
4
3
5
- test_data <- grants_test [1 : 20 , c(" contract_value_band" , " sponsor_code" )]
6
- test_data <- tibble :: as_tibble(test_data )
7
-
8
- rec <- recipe(~ . , data = test_data )
9
4
10
5
test_that(" hashing gives double outputs" , {
11
6
skip_if_not_installed(" text2vec" )
12
7
skip_if_not_installed(" data.table" )
8
+ skip_if_not_installed(" modeldata" )
13
9
data.table :: setDTthreads(2 ) # because data.table uses all cores by default
14
10
15
- rec <- rec %> %
11
+ data(" grants" , package = " modeldata" )
12
+
13
+ test_data <- grants_test [1 : 20 , c(" contract_value_band" , " sponsor_code" )]
14
+ test_data <- tibble :: as_tibble(test_data )
15
+
16
+ rec <- recipe(~ . , data = test_data ) %> %
16
17
step_dummy_hash(sponsor_code )
17
18
18
19
obj <- rec %> %
@@ -32,9 +33,16 @@ test_that("hashing gives double outputs", {
32
33
33
34
test_that(" hashing multiple factors" , {
34
35
skip_if_not_installed(" data.table" )
36
+ skip_if_not_installed(" modeldata" )
37
+ skip_if_not_installed(" text2vec" )
35
38
data.table :: setDTthreads(2 ) # because data.table uses all cores by default
36
39
37
- res <- rec %> %
40
+ data(" grants" , package = " modeldata" )
41
+
42
+ test_data <- grants_test [1 : 20 , c(" contract_value_band" , " sponsor_code" )]
43
+ test_data <- tibble :: as_tibble(test_data )
44
+
45
+ res <- recipe(~ . , data = test_data ) %> %
38
46
step_dummy_hash(all_nominal_predictors(), num_terms = 12 ) %> %
39
47
prep() %> %
40
48
bake(new_data = NULL )
@@ -46,9 +54,16 @@ test_that("hashing multiple factors", {
46
54
47
55
test_that(" hashing collapsed multiple factors" , {
48
56
skip_if_not_installed(" data.table" )
57
+ skip_if_not_installed(" modeldata" )
58
+ skip_if_not_installed(" text2vec" )
49
59
data.table :: setDTthreads(2 ) # because data.table uses all cores by default
50
60
51
- res <- rec %> %
61
+ data(" grants" , package = " modeldata" )
62
+
63
+ test_data <- grants_test [1 : 20 , c(" contract_value_band" , " sponsor_code" )]
64
+ test_data <- tibble :: as_tibble(test_data )
65
+
66
+ res <- recipe(~ . , data = test_data ) %> %
52
67
step_dummy_hash(all_nominal_predictors(), num_terms = 4 , collapse = TRUE ) %> %
53
68
prep() %> %
54
69
bake(new_data = NULL )
@@ -60,9 +75,15 @@ test_that("hashing collapsed multiple factors", {
60
75
test_that(" hashing output width changes accordingly with num_terms" , {
61
76
skip_if_not_installed(" text2vec" )
62
77
skip_if_not_installed(" data.table" )
78
+ skip_if_not_installed(" modeldata" )
63
79
data.table :: setDTthreads(2 ) # because data.table uses all cores by default
64
80
65
- rec <- rec %> %
81
+ data(" grants" , package = " modeldata" )
82
+
83
+ test_data <- grants_test [1 : 20 , c(" contract_value_band" , " sponsor_code" )]
84
+ test_data <- tibble :: as_tibble(test_data )
85
+
86
+ rec <- recipe(~ . , data = test_data ) %> %
66
87
step_dummy_hash(sponsor_code , num_terms = 256 ) %> %
67
88
prep()
68
89
@@ -77,7 +98,13 @@ test_that("hashing output width changes accordingly with num_terms", {
77
98
test_that(" hashing output width changes accordingly with num_terms" , {
78
99
skip_if_not_installed(" text2vec" )
79
100
skip_if_not_installed(" data.table" )
101
+ skip_if_not_installed(" modeldata" )
80
102
data.table :: setDTthreads(2 ) # because data.table uses all cores by default
103
+
104
+ data(" grants" , package = " modeldata" )
105
+
106
+ test_data <- grants_test [1 : 20 , c(" contract_value_band" , " sponsor_code" )]
107
+ test_data <- tibble :: as_tibble(test_data )
81
108
82
109
signed <- recipe(~ . , data = test_data ) %> %
83
110
step_dummy_hash(all_predictors(), num_terms = 2 ) %> %
@@ -98,8 +125,14 @@ test_that("hashing output width changes accordingly with num_terms", {
98
125
test_that(" check_name() is used" , {
99
126
skip_if_not_installed(" text2vec" )
100
127
skip_if_not_installed(" data.table" )
128
+ skip_if_not_installed(" modeldata" )
101
129
data.table :: setDTthreads(2 ) # because data.table uses all cores by default
102
130
131
+ data(" grants" , package = " modeldata" )
132
+
133
+ test_data <- grants_test [1 : 20 , c(" contract_value_band" , " sponsor_code" )]
134
+ test_data <- tibble :: as_tibble(test_data )
135
+
103
136
dat <- test_data
104
137
dat $ text <- dat $ sponsor_code
105
138
dat $ dummyhash_text_01 <- dat $ sponsor_code
@@ -131,6 +164,15 @@ test_that("tunable", {
131
164
# Infrastructure ---------------------------------------------------------------
132
165
133
166
test_that(" bake method errors when needed non-standard role columns are missing" , {
167
+ skip_if_not_installed(" modeldata" )
168
+ skip_if_not_installed(" text2vec" )
169
+ data.table :: setDTthreads(2 ) # because data.table uses all cores by default
170
+
171
+ data(" grants" , package = " modeldata" )
172
+
173
+ test_data <- grants_test [1 : 20 , c(" contract_value_band" , " sponsor_code" )]
174
+ test_data <- tibble :: as_tibble(test_data )
175
+
134
176
rec <- recipe(~ sponsor_code , data = test_data ) %> %
135
177
step_dummy_hash(sponsor_code ) %> %
136
178
update_role(sponsor_code , new_role = " potato" ) %> %
@@ -190,8 +232,14 @@ test_that("empty selection tidy method works", {
190
232
test_that(" keep_original_cols works" , {
191
233
skip_if_not_installed(" text2vec" )
192
234
skip_if_not_installed(" data.table" )
235
+ skip_if_not_installed(" modeldata" )
193
236
data.table :: setDTthreads(2 ) # because data.table uses all cores by default
194
237
238
+ data(" grants" , package = " modeldata" )
239
+
240
+ test_data <- grants_test [1 : 20 , c(" contract_value_band" , " sponsor_code" )]
241
+ test_data <- tibble :: as_tibble(test_data )
242
+
195
243
new_names <- paste0(" dummyhash_sponsor_code_" , 1 : 5 )
196
244
197
245
rec <- recipe(~ sponsor_code , data = test_data ) %> %
@@ -220,8 +268,14 @@ test_that("keep_original_cols works", {
220
268
test_that(" keep_original_cols - can prep recipes with it missing" , {
221
269
skip_if_not_installed(" text2vec" )
222
270
skip_if_not_installed(" data.table" )
271
+ skip_if_not_installed(" modeldata" )
223
272
data.table :: setDTthreads(2 ) # because data.table uses all cores by default
224
273
274
+ data(" grants" , package = " modeldata" )
275
+
276
+ test_data <- grants_test [1 : 20 , c(" contract_value_band" , " sponsor_code" )]
277
+ test_data <- tibble :: as_tibble(test_data )
278
+
225
279
rec <- recipe(~ sponsor_code , data = test_data ) %> %
226
280
step_dummy_hash(sponsor_code )
227
281
@@ -242,8 +296,8 @@ test_that("printing", {
242
296
skip_if_not_installed(" data.table" )
243
297
data.table :: setDTthreads(2 ) # because data.table uses all cores by default
244
298
245
- rec <- rec %> %
246
- step_dummy_hash(sponsor_code )
299
+ rec <- recipe( ~ . , data = iris ) %> %
300
+ step_dummy_hash(Species )
247
301
248
302
expect_snapshot(print(rec ))
249
303
expect_snapshot(prep(rec ))
0 commit comments