Skip to content

Commit fad67ec

Browse files
committed
add lots of skip if not installed
1 parent a25fb65 commit fad67ec

17 files changed

+226
-61
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Errors if vocabulary size is set to low.
2+
3+
Code
4+
recipe(~text1, data = test_data) %>% step_tokenize_bpe(text1, vocabulary_size = 10) %>%
5+
prep()
6+
Condition
7+
Warning in `read.dcf()`:
8+
cannot open compressed file '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/tokenizers.bpe/DESCRIPTION', probable reason 'No such file or directory'
9+
Message
10+
1 package (tokenizers.bpe) is needed for this step but is not installed.
11+
To install run: `install.packages("tokenizers.bpe")`
12+
Condition
13+
Error in `step_tokenize_bpe()`:
14+
Caused by error in `prep()`:
15+
! `vocabulary_size` of 10 is too small for column `text1` which has a unique character count of 23
16+

tests/testthat/_snaps/dummy_hash.md

+5-5
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,10 @@
7070
7171
-- Inputs
7272
Number of variables by role
73-
predictor: 2
73+
predictor: 5
7474
7575
-- Operations
76-
* Feature hashing with: sponsor_code
76+
* Feature hashing with: Species
7777

7878
---
7979

@@ -85,11 +85,11 @@
8585
8686
-- Inputs
8787
Number of variables by role
88-
predictor: 2
88+
predictor: 5
8989
9090
-- Training information
91-
Training data contained 20 data points and no incomplete rows.
91+
Training data contained 150 data points and no incomplete rows.
9292
9393
-- Operations
94-
* Feature hashing with: sponsor_code | Trained
94+
* Feature hashing with: Species | Trained
9595

tests/testthat/_snaps/lda.md

+10-7
Original file line numberDiff line numberDiff line change
@@ -70,28 +70,31 @@
7070
7171
-- Inputs
7272
Number of variables by role
73-
predictor: 2
73+
predictor: 5
7474
7575
-- Operations
76-
* Tokenization for: medium
77-
* Text feature extraction for: medium
76+
* Tokenization for: Species
77+
* Text feature extraction for: Species
7878

7979
---
8080

8181
Code
8282
prep(rec)
83+
Condition
84+
Warning in `get_dtm()`:
85+
dtm has 0 rows. Empty iterator?
8386
Message
8487
8588
-- Recipe ----------------------------------------------------------------------
8689
8790
-- Inputs
8891
Number of variables by role
89-
predictor: 2
92+
predictor: 5
9093
9194
-- Training information
92-
Training data contained 100 data points and no incomplete rows.
95+
Training data contained 150 data points and no incomplete rows.
9396
9497
-- Operations
95-
* Tokenization for: medium | Trained
96-
* Text feature extraction for: medium | Trained
98+
* Tokenization for: Species | Trained
99+
* Text feature extraction for: Species | Trained
97100

tests/testthat/test-clean_levels.R

+16-10
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
1-
library(testthat)
2-
library(textrecipes)
3-
library(modeldata)
4-
data("Smithsonian")
5-
smith_tr <- Smithsonian[1:15, ]
6-
smith_te <- Smithsonian[16:20, ]
7-
8-
rec <- recipe(~., data = smith_tr)
9-
101
test_that("character input", {
112
skip_if_not_installed("janitor")
12-
cleaned <- rec %>% step_clean_levels(name, id = "")
3+
skip_if_not_installed("modeldata")
4+
5+
data("Smithsonian", package = "modeldata")
6+
smith_tr <- Smithsonian[1:15, ]
7+
smith_te <- Smithsonian[16:20, ]
8+
9+
cleaned <- recipe(~., data = smith_tr) %>%
10+
step_clean_levels(name, id = "")
1311

1412
tidy_exp_un <- tibble(
1513
terms = c("name"),
@@ -50,6 +48,9 @@ test_that("character input", {
5048

5149
test_that("factor input", {
5250
skip_if_not_installed("janitor")
51+
skip_if_not_installed("modeldata")
52+
53+
data("Smithsonian", package = "modeldata")
5354
smith_tr <- Smithsonian[1:15, ]
5455
smith_tr$name <- as.factor(smith_tr$name)
5556
smith_te <- Smithsonian[16:20, ]
@@ -71,6 +72,11 @@ test_that("factor input", {
7172

7273
test_that("bake method errors when needed non-standard role columns are missing", {
7374
skip_if_not_installed("janitor")
75+
skip_if_not_installed("modeldata")
76+
77+
data("Smithsonian", package = "modeldata")
78+
smith_tr <- Smithsonian[1:15, ]
79+
7480
rec <- recipe(~name, data = smith_tr) %>%
7581
step_clean_levels(name) %>%
7682
update_role(name, new_role = "potato") %>%

tests/testthat/test-clean_names.R

+10-9
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
1-
library(testthat)
2-
library(textrecipes)
3-
data(airquality)
1+
test_that("can clean names", {
2+
skip_if_not_installed("janitor")
3+
skip_if_not_installed("modeldata")
44

5-
air_tr <- airquality[1:20, ]
6-
air_te <- airquality[101:110, ]
5+
data("airquality", package = "modeldata")
76

8-
rec <- recipe(~., data = air_tr)
7+
air_tr <- airquality[1:20, ]
8+
air_te <- airquality[101:110, ]
99

10-
test_that("can clean names", {
11-
skip_if_not_installed("janitor")
12-
cleaned <- rec %>% step_clean_names(all_predictors(), id = "")
10+
cleaned <- recipe(~., data = air_tr) %>%
11+
step_clean_names(all_predictors(), id = "")
1312

1413
tidy_exp_un <- tibble(
1514
terms = c("all_predictors()"),
@@ -35,6 +34,8 @@ test_that("can clean names", {
3534
# Infrastructure ---------------------------------------------------------------
3635

3736
test_that("bake method errors when needed non-standard role columns are missing", {
37+
skip_if_not_installed("janitor")
38+
3839
rec <- recipe(mtcars) %>%
3940
step_clean_names(disp) %>%
4041
update_role(disp, new_role = "potato") %>%

tests/testthat/test-dummy_hash.R

+65-11
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,19 @@
11
library(textrecipes)
22
library(recipes)
3-
data(grants, package = "modeldata")
43

5-
test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
6-
test_data <- tibble::as_tibble(test_data)
7-
8-
rec <- recipe(~., data = test_data)
94

105
test_that("hashing gives double outputs", {
116
skip_if_not_installed("text2vec")
127
skip_if_not_installed("data.table")
8+
skip_if_not_installed("modeldata")
139
data.table::setDTthreads(2) # because data.table uses all cores by default
1410

15-
rec <- rec %>%
11+
data("grants", package = "modeldata")
12+
13+
test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
14+
test_data <- tibble::as_tibble(test_data)
15+
16+
rec <- recipe(~., data = test_data) %>%
1617
step_dummy_hash(sponsor_code)
1718

1819
obj <- rec %>%
@@ -32,9 +33,16 @@ test_that("hashing gives double outputs", {
3233

3334
test_that("hashing multiple factors", {
3435
skip_if_not_installed("data.table")
36+
skip_if_not_installed("modeldata")
37+
skip_if_not_installed("text2vec")
3538
data.table::setDTthreads(2) # because data.table uses all cores by default
3639

37-
res <- rec %>%
40+
data("grants", package = "modeldata")
41+
42+
test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
43+
test_data <- tibble::as_tibble(test_data)
44+
45+
res <- recipe(~., data = test_data) %>%
3846
step_dummy_hash(all_nominal_predictors(), num_terms = 12) %>%
3947
prep() %>%
4048
bake(new_data = NULL)
@@ -46,9 +54,16 @@ test_that("hashing multiple factors", {
4654

4755
test_that("hashing collapsed multiple factors", {
4856
skip_if_not_installed("data.table")
57+
skip_if_not_installed("modeldata")
58+
skip_if_not_installed("text2vec")
4959
data.table::setDTthreads(2) # because data.table uses all cores by default
5060

51-
res <- rec %>%
61+
data("grants", package = "modeldata")
62+
63+
test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
64+
test_data <- tibble::as_tibble(test_data)
65+
66+
res <- recipe(~., data = test_data) %>%
5267
step_dummy_hash(all_nominal_predictors(), num_terms = 4, collapse = TRUE) %>%
5368
prep() %>%
5469
bake(new_data = NULL)
@@ -60,9 +75,15 @@ test_that("hashing collapsed multiple factors", {
6075
test_that("hashing output width changes accordingly with num_terms", {
6176
skip_if_not_installed("text2vec")
6277
skip_if_not_installed("data.table")
78+
skip_if_not_installed("modeldata")
6379
data.table::setDTthreads(2) # because data.table uses all cores by default
6480

65-
rec <- rec %>%
81+
data("grants", package = "modeldata")
82+
83+
test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
84+
test_data <- tibble::as_tibble(test_data)
85+
86+
rec <- recipe(~., data = test_data) %>%
6687
step_dummy_hash(sponsor_code, num_terms = 256) %>%
6788
prep()
6889

@@ -77,7 +98,13 @@ test_that("hashing output width changes accordingly with num_terms", {
7798
test_that("hashing output width changes accordingly with num_terms", {
7899
skip_if_not_installed("text2vec")
79100
skip_if_not_installed("data.table")
101+
skip_if_not_installed("modeldata")
80102
data.table::setDTthreads(2) # because data.table uses all cores by default
103+
104+
data("grants", package = "modeldata")
105+
106+
test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
107+
test_data <- tibble::as_tibble(test_data)
81108

82109
signed <- recipe(~., data = test_data) %>%
83110
step_dummy_hash(all_predictors(), num_terms = 2) %>%
@@ -98,8 +125,14 @@ test_that("hashing output width changes accordingly with num_terms", {
98125
test_that("check_name() is used", {
99126
skip_if_not_installed("text2vec")
100127
skip_if_not_installed("data.table")
128+
skip_if_not_installed("modeldata")
101129
data.table::setDTthreads(2) # because data.table uses all cores by default
102130

131+
data("grants", package = "modeldata")
132+
133+
test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
134+
test_data <- tibble::as_tibble(test_data)
135+
103136
dat <- test_data
104137
dat$text <- dat$sponsor_code
105138
dat$dummyhash_text_01 <- dat$sponsor_code
@@ -131,6 +164,15 @@ test_that("tunable", {
131164
# Infrastructure ---------------------------------------------------------------
132165

133166
test_that("bake method errors when needed non-standard role columns are missing", {
167+
skip_if_not_installed("modeldata")
168+
skip_if_not_installed("text2vec")
169+
data.table::setDTthreads(2) # because data.table uses all cores by default
170+
171+
data("grants", package = "modeldata")
172+
173+
test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
174+
test_data <- tibble::as_tibble(test_data)
175+
134176
rec <- recipe(~sponsor_code, data = test_data) %>%
135177
step_dummy_hash(sponsor_code) %>%
136178
update_role(sponsor_code, new_role = "potato") %>%
@@ -190,8 +232,14 @@ test_that("empty selection tidy method works", {
190232
test_that("keep_original_cols works", {
191233
skip_if_not_installed("text2vec")
192234
skip_if_not_installed("data.table")
235+
skip_if_not_installed("modeldata")
193236
data.table::setDTthreads(2) # because data.table uses all cores by default
194237

238+
data("grants", package = "modeldata")
239+
240+
test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
241+
test_data <- tibble::as_tibble(test_data)
242+
195243
new_names <- paste0("dummyhash_sponsor_code_", 1:5)
196244

197245
rec <- recipe(~ sponsor_code, data = test_data) %>%
@@ -220,8 +268,14 @@ test_that("keep_original_cols works", {
220268
test_that("keep_original_cols - can prep recipes with it missing", {
221269
skip_if_not_installed("text2vec")
222270
skip_if_not_installed("data.table")
271+
skip_if_not_installed("modeldata")
223272
data.table::setDTthreads(2) # because data.table uses all cores by default
224273

274+
data("grants", package = "modeldata")
275+
276+
test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")]
277+
test_data <- tibble::as_tibble(test_data)
278+
225279
rec <- recipe(~ sponsor_code, data = test_data) %>%
226280
step_dummy_hash(sponsor_code)
227281

@@ -242,8 +296,8 @@ test_that("printing", {
242296
skip_if_not_installed("data.table")
243297
data.table::setDTthreads(2) # because data.table uses all cores by default
244298

245-
rec <- rec %>%
246-
step_dummy_hash(sponsor_code)
299+
rec <- recipe(~., data = iris) %>%
300+
step_dummy_hash(Species)
247301

248302
expect_snapshot(print(rec))
249303
expect_snapshot(prep(rec))

0 commit comments

Comments
 (0)