Skip to content

Commit 8161b20

Browse files
committed
first round of type checkers
1 parent be4e250 commit 8161b20

38 files changed

+572
-27
lines changed

R/dummy_hash.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,10 @@ step_dummy_hash_new <-
157157
prep.step_dummy_hash <- function(x, training, info = NULL, ...) {
158158
col_names <- recipes_eval_select(x$terms, training, info)
159159

160+
check_bool(x$signed, arg = "signed")
161+
check_number_whole(x$num_terms, min = 0, arg = "num_terms")
162+
check_bool(x$collapse, arg = "collapse")
163+
160164
check_type(training[, col_names], types = c("string", "factor", "ordered"))
161165

162166
step_dummy_hash_new(

R/lda.R

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,9 @@ step_lda_new <-
134134
prep.step_lda <- function(x, training, info = NULL, ...) {
135135
col_names <- recipes_eval_select(x$terms, training, info)
136136

137+
check_number_whole(x$num_topics, min = 0, arg = "num_topics")
138+
check_string(x$prefix, arg = "prefix")
139+
137140
check_lda_character(training[, col_names])
138141

139142
check_type(training[, col_names], types = "tokenlist")

R/ngram.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,10 @@ step_ngram_new <-
119119
prep.step_ngram <- function(x, training, info = NULL, ...) {
120120
col_names <- recipes_eval_select(x$terms, training, info)
121121

122+
check_number_whole(x$num_tokens, min = 0, arg = "num_tokens")
123+
check_number_whole(x$min_num_tokens, min = 0, arg = "min_num_tokens")
124+
check_string(x$delim, arg = "delim")
125+
122126
check_type(training[, col_names], types = "tokenlist")
123127

124128
step_ngram_new(

R/pos_filter.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ step_pos_filter_new <-
9999
prep.step_pos_filter <- function(x, training, info = NULL, ...) {
100100
col_names <- recipes_eval_select(x$terms, training, info)
101101

102+
check_character(x$keep_tags, arg = "keep_tags")
103+
102104
check_type(training[, col_names], types = "tokenlist")
103105

104106
step_pos_filter_new(

R/sequence_onehot.R

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,8 @@ step_sequence_onehot <-
8484
keep_original_cols = FALSE,
8585
skip = FALSE,
8686
id = rand_id("sequence_onehot")) {
87-
if (length(padding) != 1 || !(padding %in% c("pre", "post"))) {
88-
cli::cli_abort("{.arg padding} should be one of: {.val pre}, {.val post}")
89-
}
90-
91-
if (length(truncating) != 1 || !(truncating %in% c("pre", "post"))) {
92-
cli::cli_abort("{.code truncating} should be {.val pre} or {.val post}.")
93-
}
87+
rlang::arg_match0(padding, c("pre", "post"))
88+
rlang::arg_match0(truncating, c("pre", "post"))
9489

9590
add_step(
9691
recipe,
@@ -135,6 +130,9 @@ step_sequence_onehot_new <-
135130
prep.step_sequence_onehot <- function(x, training, info = NULL, ...) {
136131
col_names <- recipes_eval_select(x$terms, training, info)
137132

133+
check_number_whole(x$sequence_length, min = 0, arg = "sequence_length")
134+
check_string(x$prefix, arg = "prefix")
135+
138136
check_type(training[, col_names], types = "tokenlist")
139137

140138
token_list <- list()

R/show_tokens.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#' step_tokenize(medium) %>%
2727
#' show_tokens(medium)
2828
show_tokens <- function(rec, var, n = 6L) {
29+
check_number_whole(n, min = 0, nrow(rec$template))
30+
2931
res <- rec %>%
3032
prep() %>%
3133
bake(new_data = NULL) %>%

R/stem.R

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ step_stem <-
9292
custom_stemmer = NULL,
9393
skip = FALSE,
9494
id = rand_id("stem")) {
95+
96+
check_function(custom_stemmer, allow_null = TRUE)
97+
9598
add_step(
9699
recipe,
97100
step_stem_new(

R/stopwords.R

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,13 @@ step_stopwords_new <-
133133
prep.step_stopwords <- function(x, training, info = NULL, ...) {
134134
col_names <- recipes_eval_select(x$terms, training, info)
135135

136+
check_string(x$language, arg = "language")
137+
check_bool(x$keep, arg = "keep")
138+
check_string(x$stopword_source, arg = "stopword_source")
139+
check_character(
140+
x$custom_stopword_source, allow_null = TRUE, arg = "custom_stopword_source"
141+
)
142+
136143
check_type(training[, col_names], types = "tokenlist")
137144

138145
step_stopwords_new(

R/text_normalization.R

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,12 @@ step_text_normalization_new <-
9999
prep.step_text_normalization <- function(x, training, info = NULL, ...) {
100100
col_names <- recipes_eval_select(x$terms, training, info)
101101

102+
rlang::arg_match0(
103+
x$normalization_form,
104+
c("nfc", "nfd", "nfkd", "nfkc", "nfkc_casefold"),
105+
arg_nm = "normalization_form"
106+
)
107+
102108
training <- factor_to_text(training, col_names)
103109

104110
check_type(training[, col_names], types = c("string", "factor", "ordered"))

R/textfeature.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@ step_textfeature_new <-
122122
prep.step_textfeature <- function(x, training, info = NULL, ...) {
123123
col_names <- recipes_eval_select(x$terms, training, info)
124124

125+
check_string(x$prefix, arg = "prefix")
126+
125127
training <- factor_to_text(training, col_names)
126128

127129
check_type(training[, col_names], types = c("string", "factor", "ordered"))

R/texthash.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,10 @@ step_texthash_new <-
141141
prep.step_texthash <- function(x, training, info = NULL, ...) {
142142
col_names <- recipes_eval_select(x$terms, training, info)
143143

144+
check_bool(x$signed, arg = "signed")
145+
check_number_whole(x$num_terms, min = 0, arg = "num_terms")
146+
check_string(x$prefix, arg = "prefix")
147+
144148
check_type(training[, col_names], types = "tokenlist")
145149

146150
step_texthash_new(

R/tf.R

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,11 @@ step_tf_new <-
156156
prep.step_tf <- function(x, training, info = NULL, ...) {
157157
col_names <- recipes_eval_select(x$terms, training, info)
158158

159+
rlang::arg_match0(x$weight_scheme, tf_funs, arg_nm = "weight_scheme")
160+
check_number_decimal(x$weight, arg = "weight")
161+
check_character(x$vocabulary, allow_null = TRUE, arg = "vocabulary")
162+
check_string(x$prefix, arg = "prefix")
163+
159164
check_type(training[, col_names], types = "tokenlist")
160165

161166
token_list <- list()

R/tfidf.R

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,12 @@ step_tfidf_new <-
148148
prep.step_tfidf <- function(x, training, info = NULL, ...) {
149149
col_names <- recipes_eval_select(x$terms, training, info)
150150

151+
check_character(x$vocabulary, allow_null = TRUE, arg = "vocabulary")
152+
check_bool(x$smooth_idf, arg = "smooth_idf")
153+
rlang::arg_match0(x$norm, c("l1", "l2", "none"), arg_nm = "norm")
154+
check_bool(x$sublinear_tf, arg = "sublinear_tf")
155+
check_string(x$prefix, arg = "prefix")
156+
151157
check_type(training[, col_names], types = "tokenlist")
152158

153159
idf_weights <- list()

tests/testthat/_snaps/R4.4/tokenize_bpe.new.md

Lines changed: 0 additions & 16 deletions
This file was deleted.

tests/testthat/_snaps/dummy_hash.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,33 @@
88
! Name collision occurred. The following variable names already exist:
99
* `dummyhash_text_01`
1010

11+
# bad args
12+
13+
Code
14+
recipe(~., data = mtcars) %>% step_dummy_hash(signed = "yes") %>% prep()
15+
Condition
16+
Error in `step_dummy_hash()`:
17+
Caused by error in `prep()`:
18+
! `signed` must be `TRUE` or `FALSE`, not the string "yes".
19+
20+
---
21+
22+
Code
23+
recipe(~., data = mtcars) %>% step_dummy_hash(num_terms = -4) %>% prep()
24+
Condition
25+
Error in `step_dummy_hash()`:
26+
Caused by error in `prep()`:
27+
! `num_terms` must be a whole number larger than or equal to 0, not the number -4.
28+
29+
---
30+
31+
Code
32+
recipe(~., data = mtcars) %>% step_dummy_hash(collapse = "yes") %>% prep()
33+
Condition
34+
Error in `step_dummy_hash()`:
35+
Caused by error in `prep()`:
36+
! `collapse` must be `TRUE` or `FALSE`, not the string "yes".
37+
1138
# bake method errors when needed non-standard role columns are missing
1239

1340
Code

tests/testthat/_snaps/lda.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,24 @@
88
! Name collision occurred. The following variable names already exist:
99
* `lda_text_1`
1010

11+
# bad args
12+
13+
Code
14+
recipe(~., data = mtcars) %>% step_lda(num_topics = -4) %>% prep()
15+
Condition
16+
Error in `step_lda()`:
17+
Caused by error in `prep()`:
18+
! `num_topics` must be a whole number larger than or equal to 0, not the number -4.
19+
20+
---
21+
22+
Code
23+
recipe(~., data = mtcars) %>% step_lda(prefix = NULL) %>% prep()
24+
Condition
25+
Error in `step_lda()`:
26+
Caused by error in `prep()`:
27+
! `prefix` must be a single string, not `NULL`.
28+
1129
# bake method errors when needed non-standard role columns are missing
1230

1331
Code

tests/testthat/_snaps/ngram.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,33 @@
1414
Error:
1515
! n must be a positive integer.
1616

17+
# bad args
18+
19+
Code
20+
recipe(~., data = mtcars) %>% step_ngram(num_tokens = -4) %>% prep()
21+
Condition
22+
Error in `step_ngram()`:
23+
Caused by error in `prep()`:
24+
! `num_tokens` must be a whole number larger than or equal to 0, not the number -4.
25+
26+
---
27+
28+
Code
29+
recipe(~., data = mtcars) %>% step_ngram(min_num_tokens = -4) %>% prep()
30+
Condition
31+
Error in `step_ngram()`:
32+
Caused by error in `prep()`:
33+
! `min_num_tokens` must be a whole number larger than or equal to 0, not the number -4.
34+
35+
---
36+
37+
Code
38+
recipe(~., data = mtcars) %>% step_ngram(delim = -4) %>% prep()
39+
Condition
40+
Error in `step_ngram()`:
41+
Caused by error in `prep()`:
42+
! `delim` must be a single string, not the number -4.
43+
1744
# bake method errors when needed non-standard role columns are missing
1845

1946
Code

tests/testthat/_snaps/pos_filter.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@
88
! `text` doesn't have a pos attribute.
99
i Make sure the tokenization step includes part of speech tagging.
1010

11+
# bad args
12+
13+
Code
14+
recipe(~., data = mtcars) %>% step_pos_filter(keep_tags = -4) %>% prep()
15+
Condition
16+
Error in `step_pos_filter()`:
17+
Caused by error in `prep()`:
18+
! `keep_tags` must be a character vector, not the number -4.
19+
1120
# bake method errors when needed non-standard role columns are missing
1221

1322
Code

tests/testthat/_snaps/sequence_onehot.md

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@
44
rec %>% step_tokenize(text) %>% step_sequence_onehot(text, padding = "not pre")
55
Condition
66
Error in `step_sequence_onehot()`:
7-
! `padding` should be one of: "pre", "post"
7+
! `padding` must be one of "pre" or "post", not "not pre".
88

99
---
1010

1111
Code
1212
rec %>% step_tokenize(text) %>% step_sequence_onehot(text, truncating = "Wrong")
1313
Condition
1414
Error in `step_sequence_onehot()`:
15-
! `truncating` should be "pre" or "post".
15+
! `truncating` must be one of "pre" or "post", not "Wrong".
1616

1717
---
1818

@@ -21,15 +21,15 @@
2121
"pre"))
2222
Condition
2323
Error in `step_sequence_onehot()`:
24-
! `padding` should be one of: "pre", "post"
24+
! `arg` must be length 1 or a permutation of `c("pre", "post")`.
2525

2626
---
2727

2828
Code
2929
rec %>% step_tokenize(text) %>% step_sequence_onehot(text, truncating = "Wrong")
3030
Condition
3131
Error in `step_sequence_onehot()`:
32-
! `truncating` should be "pre" or "post".
32+
! `truncating` must be one of "pre" or "post", not "Wrong".
3333

3434
# check_name() is used
3535

@@ -41,6 +41,41 @@
4141
! Name collision occurred. The following variable names already exist:
4242
* `seq1hot_text_1`
4343

44+
# bad args
45+
46+
Code
47+
recipe(~., data = mtcars) %>% step_sequence_onehot(padding = "yes")
48+
Condition
49+
Error in `step_sequence_onehot()`:
50+
! `padding` must be one of "pre" or "post", not "yes".
51+
52+
---
53+
54+
Code
55+
recipe(~., data = mtcars) %>% step_sequence_onehot(truncating = "yes")
56+
Condition
57+
Error in `step_sequence_onehot()`:
58+
! `truncating` must be one of "pre" or "post", not "yes".
59+
60+
---
61+
62+
Code
63+
recipe(~., data = mtcars) %>% step_sequence_onehot(sequence_length = -4) %>%
64+
prep()
65+
Condition
66+
Error in `step_sequence_onehot()`:
67+
Caused by error in `prep()`:
68+
! `sequence_length` must be a whole number larger than or equal to 0, not the number -4.
69+
70+
---
71+
72+
Code
73+
recipe(~., data = mtcars) %>% step_sequence_onehot(prefix = NULL) %>% prep()
74+
Condition
75+
Error in `step_sequence_onehot()`:
76+
Caused by error in `prep()`:
77+
! `prefix` must be a single string, not `NULL`.
78+
4479
# bake method errors when needed non-standard role columns are missing
4580

4681
Code

tests/testthat/_snaps/stem.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
# bad args
2+
3+
Code
4+
recipe(~., data = mtcars) %>% step_stem(custom_stemmer = "yes") %>% prep()
5+
Condition
6+
Error in `step_stem()`:
7+
! `custom_stemmer` must be a function or `NULL`, not the string "yes".
8+
19
# bake method errors when needed non-standard role columns are missing
210

311
Code

0 commit comments

Comments
 (0)