tidymodels
diff --git a/‎R/dummy_hash.R
Lines changed: 4 additions & 0 deletions b/‎R/dummy_hash.R
Lines changed: 4 additions & 0 deletions
diff --git a/‎R/lda.R
Lines changed: 3 additions & 0 deletions b/‎R/lda.R
Lines changed: 3 additions & 0 deletions
diff --git a/‎R/ngram.R
Lines changed: 4 additions & 0 deletions b/‎R/ngram.R
Lines changed: 4 additions & 0 deletions
diff --git a/‎R/pos_filter.R
Lines changed: 2 additions & 0 deletions b/‎R/pos_filter.R
Lines changed: 2 additions & 0 deletions
diff --git a/‎R/sequence_onehot.R
Lines changed: 5 additions & 7 deletions b/‎R/sequence_onehot.R
Lines changed: 5 additions & 7 deletions
diff --git a/‎R/show_tokens.R
Lines changed: 2 additions & 0 deletions b/‎R/show_tokens.R
Lines changed: 2 additions & 0 deletions
diff --git a/‎R/stem.R
Lines changed: 3 additions & 0 deletions b/‎R/stem.R
Lines changed: 3 additions & 0 deletions
diff --git a/‎R/stopwords.R
Lines changed: 7 additions & 0 deletions b/‎R/stopwords.R
Lines changed: 7 additions & 0 deletions
diff --git a/‎R/text_normalization.R
Lines changed: 6 additions & 0 deletions b/‎R/text_normalization.R
Lines changed: 6 additions & 0 deletions
diff --git a/‎R/textfeature.R
Lines changed: 2 additions & 0 deletions b/‎R/textfeature.R
Lines changed: 2 additions & 0 deletions
diff --git a/‎R/texthash.R
Lines changed: 4 additions & 0 deletions b/‎R/texthash.R
Lines changed: 4 additions & 0 deletions
diff --git a/‎R/tf.R
Lines changed: 5 additions & 0 deletions b/‎R/tf.R
Lines changed: 5 additions & 0 deletions
diff --git a/‎R/tfidf.R
Lines changed: 6 additions & 0 deletions b/‎R/tfidf.R
Lines changed: 6 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/R4.4/tokenize_bpe.new.md
Lines changed: 0 additions & 16 deletions b/‎tests/testthat/_snaps/R4.4/tokenize_bpe.new.md
Lines changed: 0 additions & 16 deletions
diff --git a/‎tests/testthat/_snaps/dummy_hash.md
Lines changed: 27 additions & 0 deletions b/‎tests/testthat/_snaps/dummy_hash.md
Lines changed: 27 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/lda.md
Lines changed: 18 additions & 0 deletions b/‎tests/testthat/_snaps/lda.md
Lines changed: 18 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/ngram.md
Lines changed: 27 additions & 0 deletions b/‎tests/testthat/_snaps/ngram.md
Lines changed: 27 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/pos_filter.md
Lines changed: 9 additions & 0 deletions b/‎tests/testthat/_snaps/pos_filter.md
Lines changed: 9 additions & 0 deletions
diff --git a/‎tests/testthat/_snaps/sequence_onehot.md
Lines changed: 39 additions & 4 deletions b/‎tests/testthat/_snaps/sequence_onehot.md
Lines changed: 39 additions & 4 deletions
diff --git a/‎tests/testthat/_snaps/stem.md
Lines changed: 8 additions & 0 deletions b/‎tests/testthat/_snaps/stem.md
Lines changed: 8 additions & 0 deletions
@@ -157,6 +157,10 @@ step_dummy_hash_new <-
 prep.step_dummy_hash <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
 
+  check_bool(x$signed, arg = "signed")
+  check_number_whole(x$num_terms, min = 0, arg = "num_terms")
+  check_bool(x$collapse, arg = "collapse")
+
   check_type(training[, col_names], types = c("string", "factor", "ordered"))
 
   step_dummy_hash_new(
 
@@ -134,6 +134,9 @@ step_lda_new <-
 prep.step_lda <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
 
+  check_number_whole(x$num_topics, min = 0, arg = "num_topics")
+  check_string(x$prefix, arg = "prefix")
+
   check_lda_character(training[, col_names])
 
   check_type(training[, col_names], types = "tokenlist")
 
@@ -119,6 +119,10 @@ step_ngram_new <-
 prep.step_ngram <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
 
+  check_number_whole(x$num_tokens, min = 0, arg = "num_tokens")
+  check_number_whole(x$min_num_tokens, min = 0, arg = "min_num_tokens")
+  check_string(x$delim, arg = "delim")
+
   check_type(training[, col_names], types = "tokenlist")
 
   step_ngram_new(
 
@@ -99,6 +99,8 @@ step_pos_filter_new <-
 prep.step_pos_filter <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
 
+  check_character(x$keep_tags, arg = "keep_tags")
+
   check_type(training[, col_names], types = "tokenlist")
 
   step_pos_filter_new(
 
@@ -84,13 +84,8 @@ step_sequence_onehot <-
            keep_original_cols = FALSE,
            skip = FALSE,
            id = rand_id("sequence_onehot")) {
-    if (length(padding) != 1 || !(padding %in% c("pre", "post"))) {
-      cli::cli_abort("{.arg padding} should be one of: {.val pre}, {.val post}")
-    }
-
-    if (length(truncating) != 1 || !(truncating %in% c("pre", "post"))) {
-      cli::cli_abort("{.code truncating} should be {.val pre} or {.val post}.")
-    }
+    rlang::arg_match0(padding, c("pre", "post"))
+    rlang::arg_match0(truncating, c("pre", "post"))
 
     add_step(
       recipe,
@@ -135,6 +130,9 @@ step_sequence_onehot_new <-
 prep.step_sequence_onehot <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
 
+  check_number_whole(x$sequence_length, min = 0, arg = "sequence_length")
+  check_string(x$prefix, arg = "prefix")
+
   check_type(training[, col_names], types = "tokenlist")
 
   token_list <- list()
 
@@ -26,6 +26,8 @@
 #'   step_tokenize(medium) %>%
 #'   show_tokens(medium)
 show_tokens <- function(rec, var, n = 6L) {
+  check_number_whole(n, min = 0, nrow(rec$template))
+  
   res <- rec %>%
     prep() %>%
     bake(new_data = NULL) %>%
 
@@ -92,6 +92,9 @@ step_stem <-
            custom_stemmer = NULL,
            skip = FALSE,
            id = rand_id("stem")) {
+    
+    check_function(custom_stemmer, allow_null = TRUE)
+    
     add_step(
       recipe,
       step_stem_new(
 
@@ -133,6 +133,13 @@ step_stopwords_new <-
 prep.step_stopwords <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
 
+  check_string(x$language, arg = "language")
+  check_bool(x$keep, arg = "keep")
+  check_string(x$stopword_source, arg = "stopword_source")
+  check_character(
+    x$custom_stopword_source, allow_null = TRUE, arg = "custom_stopword_source"
+  )
+
   check_type(training[, col_names], types = "tokenlist")
 
   step_stopwords_new(
 
@@ -99,6 +99,12 @@ step_text_normalization_new <-
 prep.step_text_normalization <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
 
+  rlang::arg_match0(
+    x$normalization_form, 
+    c("nfc", "nfd", "nfkd", "nfkc", "nfkc_casefold"),
+    arg_nm = "normalization_form"
+  )
+
   training <- factor_to_text(training, col_names)
 
   check_type(training[, col_names], types = c("string", "factor", "ordered"))
 
@@ -122,6 +122,8 @@ step_textfeature_new <-
 prep.step_textfeature <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
 
+  check_string(x$prefix, arg = "prefix")
+
   training <- factor_to_text(training, col_names)
 
   check_type(training[, col_names], types = c("string", "factor", "ordered"))
 
@@ -141,6 +141,10 @@ step_texthash_new <-
 prep.step_texthash <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
 
+  check_bool(x$signed, arg = "signed")
+  check_number_whole(x$num_terms, min = 0, arg = "num_terms")
+  check_string(x$prefix, arg = "prefix")
+
   check_type(training[, col_names], types = "tokenlist")
 
   step_texthash_new(
 
@@ -156,6 +156,11 @@ step_tf_new <-
 prep.step_tf <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
 
+  rlang::arg_match0(x$weight_scheme, tf_funs, arg_nm = "weight_scheme")
+  check_number_decimal(x$weight, arg = "weight")
+  check_character(x$vocabulary, allow_null = TRUE, arg = "vocabulary")
+  check_string(x$prefix, arg = "prefix")
+
   check_type(training[, col_names], types = "tokenlist")
 
   token_list <- list()
 
@@ -148,6 +148,12 @@ step_tfidf_new <-
 prep.step_tfidf <- function(x, training, info = NULL, ...) {
   col_names <- recipes_eval_select(x$terms, training, info)
 
+  check_character(x$vocabulary, allow_null = TRUE, arg = "vocabulary")
+  check_bool(x$smooth_idf, arg = "smooth_idf")
+  rlang::arg_match0(x$norm, c("l1", "l2", "none"), arg_nm = "norm")
+  check_bool(x$sublinear_tf, arg = "sublinear_tf")
+  check_string(x$prefix, arg = "prefix")
+
   check_type(training[, col_names], types = "tokenlist")
 
   idf_weights <- list()
 
@@ -8,6 +8,33 @@
       ! Name collision occurred. The following variable names already exist:
       * `dummyhash_text_01`
 
+# bad args
+
+    Code
+      recipe(~., data = mtcars) %>% step_dummy_hash(signed = "yes") %>% prep()
+    Condition
+      Error in `step_dummy_hash()`:
+      Caused by error in `prep()`:
+      ! `signed` must be `TRUE` or `FALSE`, not the string "yes".
+
+---
+
+    Code
+      recipe(~., data = mtcars) %>% step_dummy_hash(num_terms = -4) %>% prep()
+    Condition
+      Error in `step_dummy_hash()`:
+      Caused by error in `prep()`:
+      ! `num_terms` must be a whole number larger than or equal to 0, not the number -4.
+
+---
+
+    Code
+      recipe(~., data = mtcars) %>% step_dummy_hash(collapse = "yes") %>% prep()
+    Condition
+      Error in `step_dummy_hash()`:
+      Caused by error in `prep()`:
+      ! `collapse` must be `TRUE` or `FALSE`, not the string "yes".
+
 # bake method errors when needed non-standard role columns are missing
 
     Code
 
@@ -8,6 +8,24 @@
       ! Name collision occurred. The following variable names already exist:
       * `lda_text_1`
 
+# bad args
+
+    Code
+      recipe(~., data = mtcars) %>% step_lda(num_topics = -4) %>% prep()
+    Condition
+      Error in `step_lda()`:
+      Caused by error in `prep()`:
+      ! `num_topics` must be a whole number larger than or equal to 0, not the number -4.
+
+---
+
+    Code
+      recipe(~., data = mtcars) %>% step_lda(prefix = NULL) %>% prep()
+    Condition
+      Error in `step_lda()`:
+      Caused by error in `prep()`:
+      ! `prefix` must be a single string, not `NULL`.
+
 # bake method errors when needed non-standard role columns are missing
 
     Code
 
@@ -14,6 +14,33 @@
       Error:
       ! n must be a positive integer.
 
+# bad args
+
+    Code
+      recipe(~., data = mtcars) %>% step_ngram(num_tokens = -4) %>% prep()
+    Condition
+      Error in `step_ngram()`:
+      Caused by error in `prep()`:
+      ! `num_tokens` must be a whole number larger than or equal to 0, not the number -4.
+
+---
+
+    Code
+      recipe(~., data = mtcars) %>% step_ngram(min_num_tokens = -4) %>% prep()
+    Condition
+      Error in `step_ngram()`:
+      Caused by error in `prep()`:
+      ! `min_num_tokens` must be a whole number larger than or equal to 0, not the number -4.
+
+---
+
+    Code
+      recipe(~., data = mtcars) %>% step_ngram(delim = -4) %>% prep()
+    Condition
+      Error in `step_ngram()`:
+      Caused by error in `prep()`:
+      ! `delim` must be a single string, not the number -4.
+
 # bake method errors when needed non-standard role columns are missing
 
     Code
 
@@ -8,6 +8,15 @@
       ! `text` doesn't have a pos attribute.
       i Make sure the tokenization step includes part of speech tagging.
 
+# bad args
+
+    Code
+      recipe(~., data = mtcars) %>% step_pos_filter(keep_tags = -4) %>% prep()
+    Condition
+      Error in `step_pos_filter()`:
+      Caused by error in `prep()`:
+      ! `keep_tags` must be a character vector, not the number -4.
+
 # bake method errors when needed non-standard role columns are missing
 
     Code
 
@@ -4,15 +4,15 @@
       rec %>% step_tokenize(text) %>% step_sequence_onehot(text, padding = "not pre")
     Condition
       Error in `step_sequence_onehot()`:
-      ! `padding` should be one of: "pre", "post"
+      ! `padding` must be one of "pre" or "post", not "not pre".
 
 ---
 
     Code
       rec %>% step_tokenize(text) %>% step_sequence_onehot(text, truncating = "Wrong")
     Condition
       Error in `step_sequence_onehot()`:
-      ! `truncating` should be "pre" or "post".
+      ! `truncating` must be one of "pre" or "post", not "Wrong".
 
 ---
 
@@ -21,15 +21,15 @@
         "pre"))
     Condition
       Error in `step_sequence_onehot()`:
-      ! `padding` should be one of: "pre", "post"
+      ! `arg` must be length 1 or a permutation of `c("pre", "post")`.
 
 ---
 
     Code
       rec %>% step_tokenize(text) %>% step_sequence_onehot(text, truncating = "Wrong")
     Condition
       Error in `step_sequence_onehot()`:
-      ! `truncating` should be "pre" or "post".
+      ! `truncating` must be one of "pre" or "post", not "Wrong".
 
 # check_name() is used
 
@@ -41,6 +41,41 @@
       ! Name collision occurred. The following variable names already exist:
       * `seq1hot_text_1`
 
+# bad args
+
+    Code
+      recipe(~., data = mtcars) %>% step_sequence_onehot(padding = "yes")
+    Condition
+      Error in `step_sequence_onehot()`:
+      ! `padding` must be one of "pre" or "post", not "yes".
+
+---
+
+    Code
+      recipe(~., data = mtcars) %>% step_sequence_onehot(truncating = "yes")
+    Condition
+      Error in `step_sequence_onehot()`:
+      ! `truncating` must be one of "pre" or "post", not "yes".
+
+---
+
+    Code
+      recipe(~., data = mtcars) %>% step_sequence_onehot(sequence_length = -4) %>%
+        prep()
+    Condition
+      Error in `step_sequence_onehot()`:
+      Caused by error in `prep()`:
+      ! `sequence_length` must be a whole number larger than or equal to 0, not the number -4.
+
+---
+
+    Code
+      recipe(~., data = mtcars) %>% step_sequence_onehot(prefix = NULL) %>% prep()
+    Condition
+      Error in `step_sequence_onehot()`:
+      Caused by error in `prep()`:
+      ! `prefix` must be a single string, not `NULL`.
+
 # bake method errors when needed non-standard role columns are missing
 
     Code
 
@@ -1,3 +1,11 @@
+# bad args
+
+    Code
+      recipe(~., data = mtcars) %>% step_stem(custom_stemmer = "yes") %>% prep()
+    Condition
+      Error in `step_stem()`:
+      ! `custom_stemmer` must be a function or `NULL`, not the string "yes".
+
 # bake method errors when needed non-standard role columns are missing
 
     Code