diff --git a/.github/workflows/R-CMD-check-hard.yaml b/.github/workflows/R-CMD-check-hard.yaml new file mode 100644 index 00000000..ac3bc0fd --- /dev/null +++ b/.github/workflows/R-CMD-check-hard.yaml @@ -0,0 +1,59 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +# +# NOTE: This workflow only directly installs "hard" dependencies, i.e. Depends, +# Imports, and LinkingTo dependencies. Notably, Suggests dependencies are never +# installed, with the exception of testthat, knitr, and rmarkdown. The cache is +# never used to avoid accidentally restoring a cache containing a suggested +# dependency. +on: + push: + branches: [main, master] + pull_request: + +name: R-CMD-check-hard.yaml + +permissions: read-all + +jobs: + check-no-suggests: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: ubuntu-latest, r: 'release'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + dependencies: '"hard"' + cache: false + extra-packages: | + any::rcmdcheck + any::testthat + any::knitr + any::rmarkdown + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/R/clean_levels.R b/R/clean_levels.R index c6f8683e..8ab1d8a7 100644 --- a/R/clean_levels.R +++ b/R/clean_levels.R @@ -41,7 +41,7 @@ #' [recipes::step_unknown()], [recipes::step_novel()], [recipes::step_other()] #' @family Steps for Text Cleaning #' -#' @examplesIf rlang::is_installed("janitor") +#' @examplesIf rlang::is_installed(c("modeldata", "janitor")) #' library(recipes) #' library(modeldata) #' data(Smithsonian) @@ -139,7 +139,7 @@ bake.step_clean_levels <- function(object, new_data, ...) { new_data[[col_name]] <- janitor::make_clean_names(new_data[[col_name]]) } - } +} new_data } diff --git a/R/dummy_hash.R b/R/dummy_hash.R index 3cfc8aad..bedd246b 100644 --- a/R/dummy_hash.R +++ b/R/dummy_hash.R @@ -72,7 +72,7 @@ #' @seealso [recipes::step_dummy()] #' @family Steps for Numeric Variables From Characters #' -#' @examplesIf all(c("text2vec", "data.table") %in% rownames(installed.packages())) +#' @examplesIf all(c("modeldata", "text2vec", "data.table") %in% rownames(installed.packages())) #' \dontshow{library(data.table)} #' \dontshow{data.table::setDTthreads(2)} #' \dontshow{Sys.setenv("OMP_NUM_THREADS" = 1)} diff --git a/R/lda.R b/R/lda.R index 2bd2c334..c767f75b 100644 --- a/R/lda.R +++ b/R/lda.R @@ -38,7 +38,7 @@ #' #' @family Steps for Numeric Variables From Tokens #' -#' @examplesIf all(c("text2vec", "data.table") %in% rownames(installed.packages())) +#' @examplesIf all(c("modeldata", "text2vec", "data.table") %in% rownames(installed.packages())) #' \dontshow{library(data.table)} #' \dontshow{data.table::setDTthreads(2)} #' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} diff --git a/R/ngram.R b/R/ngram.R index 6fa8d1c0..eabfef85 100644 --- a/R/ngram.R +++ b/R/ngram.R @@ -49,7 +49,7 @@ #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()] #' @family Steps for Token Modification #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/sequence_onehot.R b/R/sequence_onehot.R index 223f852c..b38f0af8 100644 --- a/R/sequence_onehot.R +++ b/R/sequence_onehot.R @@ -52,7 +52,7 @@ #' #' @family Steps for Numeric Variables From Characters #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/show_tokens.R b/R/show_tokens.R index bcbfb6d8..49e302d5 100644 --- a/R/show_tokens.R +++ b/R/show_tokens.R @@ -12,7 +12,7 @@ #' @return A list of character vectors #' @export #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' text_tibble <- tibble(text = c("This is words", "They are nice!")) #' #' recipe(~text, data = text_tibble) %>% diff --git a/R/stem.R b/R/stem.R index c49b4bde..a3c285a6 100644 --- a/R/stem.R +++ b/R/stem.R @@ -42,7 +42,7 @@ #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()] #' @family Steps for Token Modification #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/stopwords.R b/R/stopwords.R index a00b4eb8..ae8ca7cb 100644 --- a/R/stopwords.R +++ b/R/stopwords.R @@ -49,7 +49,7 @@ #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()] #' @family Steps for Token Modification #' -#' @examplesIf rlang::is_installed("stopwords") +#' @examplesIf rlang::is_installed(c("modeldata", "stopwords")) #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/textfeature.R b/R/textfeature.R index 57e2b313..02857b95 100644 --- a/R/textfeature.R +++ b/R/textfeature.R @@ -42,7 +42,7 @@ #' #' @family Steps for Numeric Variables From Characters #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/texthash.R b/R/texthash.R index 48a549fa..8178dbcf 100644 --- a/R/texthash.R +++ b/R/texthash.R @@ -62,7 +62,7 @@ #' [step_text_normalization()] to perform text normalization. #' @family Steps for Numeric Variables From Tokens #' -#' @examplesIf all(c("text2vec", "data.table") %in% rownames(installed.packages())) +#' @examplesIf all(c("modeldata", "text2vec", "data.table") %in% rownames(installed.packages())) #' \dontshow{library(data.table)} #' \dontshow{data.table::setDTthreads(2)} #' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} diff --git a/R/tf.R b/R/tf.R index f1502632..f8098bb2 100644 --- a/R/tf.R +++ b/R/tf.R @@ -74,7 +74,7 @@ #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()] #' @family Steps for Numeric Variables From Tokens #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' \donttest{ #' library(recipes) #' library(modeldata) diff --git a/R/tfidf.R b/R/tfidf.R index 73ad9a72..437a4606 100644 --- a/R/tfidf.R +++ b/R/tfidf.R @@ -68,7 +68,7 @@ #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()] #' @family Steps for Numeric Variables From Tokens #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' \donttest{ #' library(recipes) #' library(modeldata) diff --git a/R/tokenfilter.R b/R/tokenfilter.R index 669a325e..b131aa5f 100644 --- a/R/tokenfilter.R +++ b/R/tokenfilter.R @@ -64,7 +64,7 @@ #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()] #' @family Steps for Token Modification #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/tokenize.R b/R/tokenize.R index c2e60611..a39fe757 100644 --- a/R/tokenize.R +++ b/R/tokenize.R @@ -202,7 +202,7 @@ #' @seealso [step_untokenize()] to untokenize. #' @family Steps for Tokenization #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/tokenize_bpe.R b/R/tokenize_bpe.R index 57a2f351..f8e79752 100644 --- a/R/tokenize_bpe.R +++ b/R/tokenize_bpe.R @@ -42,7 +42,7 @@ #' @seealso [step_untokenize()] to untokenize. #' @family Steps for Tokenization #' -#' @examplesIf rlang::is_installed("tokenizers.bpe") +#' @examplesIf rlang::is_installed(c("modeldata", "tokenizers.bpe")) #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/tokenize_sentencepiece.R b/R/tokenize_sentencepiece.R index 88590c74..783dba0c 100644 --- a/R/tokenize_sentencepiece.R +++ b/R/tokenize_sentencepiece.R @@ -41,7 +41,7 @@ #' @seealso [step_untokenize()] to untokenize. #' @family Steps for Tokenization #' -#' @examplesIf rlang::is_installed("sentencepiece") +#' @examplesIf rlang::is_installed(c("modeldata", "sentencepiece")) #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/tokenize_wordpiece.R b/R/tokenize_wordpiece.R index 48d8996f..13f57296 100644 --- a/R/tokenize_wordpiece.R +++ b/R/tokenize_wordpiece.R @@ -35,7 +35,7 @@ #' @seealso [step_untokenize()] to untokenize. #' @family Steps for Tokenization #' -#' @examplesIf rlang::is_installed("wordpiece") +#' @examplesIf rlang::is_installed(c("modeldata", "wordpiece")) #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/tokenlist.R b/R/tokenlist.R index 81d262ae..bb267160 100644 --- a/R/tokenlist.R +++ b/R/tokenlist.R @@ -9,7 +9,7 @@ #' #' @return a [tokenlist] object. #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' abc <- list(letters, LETTERS) #' tokenlist(abc) #' diff --git a/R/tokenmerge.R b/R/tokenmerge.R index 2f4738ad..81292f75 100644 --- a/R/tokenmerge.R +++ b/R/tokenmerge.R @@ -33,7 +33,7 @@ #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()] #' @family Steps for Token Modification #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/untokenize.R b/R/untokenize.R index 024c440d..074783b4 100644 --- a/R/untokenize.R +++ b/R/untokenize.R @@ -37,7 +37,7 @@ #' @seealso [step_tokenize()] to turn characters into [`tokens`][tokenlist()] #' @family Steps for Un-Tokenization #' -#' @examples +#' @examplesIf rlang::is_installed("modeldata") #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/man/show_tokens.Rd b/man/show_tokens.Rd index 4a6d3177..1bb178d1 100644 --- a/man/show_tokens.Rd +++ b/man/show_tokens.Rd @@ -23,6 +23,7 @@ used in final recipe steps. Note that this function will both prep() and bake() the recipe it is used on. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} text_tibble <- tibble(text = c("This is words", "They are nice!")) recipe(~text, data = text_tibble) \%>\% @@ -35,4 +36,5 @@ data(tate_text) recipe(~., data = tate_text) \%>\% step_tokenize(medium) \%>\% show_tokens(medium) +\dontshow{\}) # examplesIf} } diff --git a/man/step_clean_levels.Rd b/man/step_clean_levels.Rd index 04ec607c..eff12e58 100644 --- a/man/step_clean_levels.Rd +++ b/man/step_clean_levels.Rd @@ -74,7 +74,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (rlang::is_installed("janitor")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("modeldata", "janitor"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(Smithsonian) diff --git a/man/step_dummy_hash.Rd b/man/step_dummy_hash.Rd index 09d42a53..8e3c3345 100644 --- a/man/step_dummy_hash.Rd +++ b/man/step_dummy_hash.Rd @@ -122,7 +122,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (all(c("text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(c("modeldata", "text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontshow{library(data.table)} \dontshow{data.table::setDTthreads(2)} \dontshow{Sys.setenv("OMP_NUM_THREADS" = 1)} diff --git a/man/step_lda.Rd b/man/step_lda.Rd index 6447c422..a4a710e4 100644 --- a/man/step_lda.Rd +++ b/man/step_lda.Rd @@ -87,7 +87,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (all(c("text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(c("modeldata", "text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontshow{library(data.table)} \dontshow{data.table::setDTthreads(2)} \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} diff --git a/man/step_ngram.Rd b/man/step_ngram.Rd index 08c443af..8d20d9d2 100644 --- a/man/step_ngram.Rd +++ b/man/step_ngram.Rd @@ -93,6 +93,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) @@ -113,6 +114,7 @@ bake(tate_obj, new_data = NULL) \%>\% tidy(tate_rec, number = 2) tidy(tate_obj, number = 2) +\dontshow{\}) # examplesIf} } \seealso{ \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}} diff --git a/man/step_sequence_onehot.Rd b/man/step_sequence_onehot.Rd index 0259b0cf..dcbfb7d4 100644 --- a/man/step_sequence_onehot.Rd +++ b/man/step_sequence_onehot.Rd @@ -104,6 +104,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) @@ -120,6 +121,7 @@ bake(tate_obj, new_data = NULL) tidy(tate_rec, number = 3) tidy(tate_obj, number = 3) +\dontshow{\}) # examplesIf} } \seealso{ Other Steps for Numeric Variables From Characters: diff --git a/man/step_stem.Rd b/man/step_stem.Rd index 8b753583..4a22b496 100644 --- a/man/step_stem.Rd +++ b/man/step_stem.Rd @@ -83,6 +83,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) @@ -121,6 +122,7 @@ bake(tate_obj, new_data = NULL, medium) \%>\% bake(tate_obj, new_data = NULL) \%>\% slice(2) \%>\% pull(medium) +\dontshow{\}) # examplesIf} } \seealso{ \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}} diff --git a/man/step_stopwords.Rd b/man/step_stopwords.Rd index afc6d9b6..7ded7972 100644 --- a/man/step_stopwords.Rd +++ b/man/step_stopwords.Rd @@ -94,7 +94,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (rlang::is_installed("stopwords")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("modeldata", "stopwords"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) diff --git a/man/step_textfeature.Rd b/man/step_textfeature.Rd index a216f54d..d7997c42 100644 --- a/man/step_textfeature.Rd +++ b/man/step_textfeature.Rd @@ -88,6 +88,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) @@ -116,6 +117,7 @@ recipe(~., data = tate_text) \%>\% ) \%>\% prep() \%>\% bake(new_data = NULL) +\dontshow{\}) # examplesIf} } \seealso{ Other Steps for Numeric Variables From Characters: diff --git a/man/step_texthash.Rd b/man/step_texthash.Rd index a9a7ea8c..ee9189ab 100644 --- a/man/step_texthash.Rd +++ b/man/step_texthash.Rd @@ -114,7 +114,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (all(c("text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(c("modeldata", "text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontshow{library(data.table)} \dontshow{data.table::setDTthreads(2)} \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} diff --git a/man/step_tf.Rd b/man/step_tf.Rd index c0a21373..07e4cfa4 100644 --- a/man/step_tf.Rd +++ b/man/step_tf.Rd @@ -132,6 +132,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ library(recipes) library(modeldata) @@ -149,7 +150,7 @@ bake(tate_obj, tate_text) tidy(tate_rec, number = 2) tidy(tate_obj, number = 2) } - +\dontshow{\}) # examplesIf} } \seealso{ \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}} diff --git a/man/step_tfidf.Rd b/man/step_tfidf.Rd index fe8208cf..297993c1 100644 --- a/man/step_tfidf.Rd +++ b/man/step_tfidf.Rd @@ -128,6 +128,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ library(recipes) library(modeldata) @@ -145,7 +146,7 @@ bake(tate_obj, tate_text) tidy(tate_rec, number = 2) tidy(tate_obj, number = 2) } - +\dontshow{\}) # examplesIf} } \seealso{ \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}} diff --git a/man/step_tokenfilter.Rd b/man/step_tokenfilter.Rd index 48d82350..42f0b858 100644 --- a/man/step_tokenfilter.Rd +++ b/man/step_tokenfilter.Rd @@ -116,6 +116,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) @@ -136,6 +137,7 @@ bake(tate_obj, new_data = NULL) \%>\% tidy(tate_rec, number = 2) tidy(tate_obj, number = 2) +\dontshow{\}) # examplesIf} } \seealso{ \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}} diff --git a/man/step_tokenize.Rd b/man/step_tokenize.Rd index 0da6c165..0515caa5 100644 --- a/man/step_tokenize.Rd +++ b/man/step_tokenize.Rd @@ -269,6 +269,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) @@ -296,6 +297,7 @@ tate_obj_chars <- recipe(~., data = tate_text) \%>\% bake(tate_obj, new_data = NULL) \%>\% slice(2) \%>\% pull(medium) +\dontshow{\}) # examplesIf} } \seealso{ \code{\link[=step_untokenize]{step_untokenize()}} to untokenize. diff --git a/man/step_tokenize_bpe.Rd b/man/step_tokenize_bpe.Rd index cd6d1d07..cedfe951 100644 --- a/man/step_tokenize_bpe.Rd +++ b/man/step_tokenize_bpe.Rd @@ -85,7 +85,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (rlang::is_installed("tokenizers.bpe")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("modeldata", "tokenizers.bpe"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) diff --git a/man/step_tokenize_sentencepiece.Rd b/man/step_tokenize_sentencepiece.Rd index ead33b8d..5c48788b 100644 --- a/man/step_tokenize_sentencepiece.Rd +++ b/man/step_tokenize_sentencepiece.Rd @@ -84,7 +84,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (rlang::is_installed("sentencepiece")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("modeldata", "sentencepiece"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) diff --git a/man/step_tokenize_wordpiece.Rd b/man/step_tokenize_wordpiece.Rd index 72cefe6a..72295135 100644 --- a/man/step_tokenize_wordpiece.Rd +++ b/man/step_tokenize_wordpiece.Rd @@ -77,7 +77,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (rlang::is_installed("wordpiece")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("modeldata", "wordpiece"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) diff --git a/man/step_tokenmerge.Rd b/man/step_tokenmerge.Rd index 383dee36..f39cb657 100644 --- a/man/step_tokenmerge.Rd +++ b/man/step_tokenmerge.Rd @@ -76,6 +76,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) @@ -91,6 +92,7 @@ bake(tate_obj, new_data = NULL) tidy(tate_rec, number = 2) tidy(tate_obj, number = 2) +\dontshow{\}) # examplesIf} } \seealso{ \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}} diff --git a/man/step_untokenize.Rd b/man/step_untokenize.Rd index 070f3ca4..8d03841c 100644 --- a/man/step_untokenize.Rd +++ b/man/step_untokenize.Rd @@ -76,6 +76,7 @@ The underlying operation does not allow for case weights. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(recipes) library(modeldata) data(tate_text) @@ -96,6 +97,7 @@ bake(tate_obj, new_data = NULL) \%>\% tidy(tate_rec, number = 2) tidy(tate_obj, number = 2) +\dontshow{\}) # examplesIf} } \seealso{ \code{\link[=step_tokenize]{step_tokenize()}} to turn characters into \code{\link[=tokenlist]{tokens}} diff --git a/man/tokenlist.Rd b/man/tokenlist.Rd index 102bb0bb..af8429ae 100644 --- a/man/tokenlist.Rd +++ b/man/tokenlist.Rd @@ -21,6 +21,7 @@ A \link{tokenlist} object is a thin wrapper around a list of character vectors, with a few attributes. } \examples{ +\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} abc <- list(letters, LETTERS) tokenlist(abc) @@ -34,4 +35,5 @@ data(tate_text) tokens <- tokenize_words(as.character(tate_text$medium)) tokenlist(tokens) +\dontshow{\}) # examplesIf} } diff --git a/tests/embeddings/embeddings-references.R b/tests/embeddings/embeddings-references.R index 7709f178..b0e089cf 100644 --- a/tests/embeddings/embeddings-references.R +++ b/tests/embeddings/embeddings-references.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(testthat) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/_snaps/R4.4/tokenize_bpe.new.md b/tests/testthat/_snaps/R4.4/tokenize_bpe.new.md new file mode 100644 index 00000000..0f4258f8 --- /dev/null +++ b/tests/testthat/_snaps/R4.4/tokenize_bpe.new.md @@ -0,0 +1,16 @@ +# Errors if vocabulary size is set to low. + + Code + recipe(~text1, data = test_data) %>% step_tokenize_bpe(text1, vocabulary_size = 10) %>% + prep() + Condition + Warning in `read.dcf()`: + cannot open compressed file '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/tokenizers.bpe/DESCRIPTION', probable reason 'No such file or directory' + Message + 1 package (tokenizers.bpe) is needed for this step but is not installed. + To install run: `install.packages("tokenizers.bpe")` + Condition + Error in `step_tokenize_bpe()`: + Caused by error in `prep()`: + ! `vocabulary_size` of 10 is too small for column `text1` which has a unique character count of 23 + diff --git a/tests/testthat/_snaps/clean_levels.md b/tests/testthat/_snaps/clean_levels.md index fe29e220..a3077070 100644 --- a/tests/testthat/_snaps/clean_levels.md +++ b/tests/testthat/_snaps/clean_levels.md @@ -51,10 +51,10 @@ -- Inputs Number of variables by role - predictor: 3 + predictor: 5 -- Operations - * Cleaning factor levels for: name + * Cleaning factor levels for: Species --- @@ -66,11 +66,11 @@ -- Inputs Number of variables by role - predictor: 3 + predictor: 5 -- Training information - Training data contained 15 data points and no incomplete rows. + Training data contained 150 data points and no incomplete rows. -- Operations - * Cleaning factor levels for: name | Trained + * Cleaning factor levels for: Species | Trained diff --git a/tests/testthat/_snaps/clean_names.md b/tests/testthat/_snaps/clean_names.md index e6426902..3db2bb79 100644 --- a/tests/testthat/_snaps/clean_names.md +++ b/tests/testthat/_snaps/clean_names.md @@ -51,7 +51,7 @@ -- Inputs Number of variables by role - predictor: 6 + predictor: 11 -- Operations * Cleaning variable names for: all_predictors() @@ -66,11 +66,11 @@ -- Inputs Number of variables by role - predictor: 6 + predictor: 11 -- Training information - Training data contained 20 data points and 4 incomplete rows. + Training data contained 32 data points and no incomplete rows. -- Operations - * Cleaning variable names for: Ozone, Solar.R, Wind, Temp, ... | Trained + * Cleaning variable names for: mpg, cyl, disp, hp, drat, wt, ... | Trained diff --git a/tests/testthat/_snaps/dummy_hash.md b/tests/testthat/_snaps/dummy_hash.md index 916a3164..12a1d82e 100644 --- a/tests/testthat/_snaps/dummy_hash.md +++ b/tests/testthat/_snaps/dummy_hash.md @@ -70,10 +70,10 @@ -- Inputs Number of variables by role - predictor: 2 + predictor: 5 -- Operations - * Feature hashing with: sponsor_code + * Feature hashing with: Species --- @@ -85,11 +85,11 @@ -- Inputs Number of variables by role - predictor: 2 + predictor: 5 -- Training information - Training data contained 20 data points and no incomplete rows. + Training data contained 150 data points and no incomplete rows. -- Operations - * Feature hashing with: sponsor_code | Trained + * Feature hashing with: Species | Trained diff --git a/tests/testthat/_snaps/lda.md b/tests/testthat/_snaps/lda.md index 39bb3bd2..7c293622 100644 --- a/tests/testthat/_snaps/lda.md +++ b/tests/testthat/_snaps/lda.md @@ -70,28 +70,31 @@ -- Inputs Number of variables by role - predictor: 2 + predictor: 5 -- Operations - * Tokenization for: medium - * Text feature extraction for: medium + * Tokenization for: Species + * Text feature extraction for: Species --- Code prep(rec) + Condition + Warning in `get_dtm()`: + dtm has 0 rows. Empty iterator? Message -- Recipe ---------------------------------------------------------------------- -- Inputs Number of variables by role - predictor: 2 + predictor: 5 -- Training information - Training data contained 100 data points and no incomplete rows. + Training data contained 150 data points and no incomplete rows. -- Operations - * Tokenization for: medium | Trained - * Text feature extraction for: medium | Trained + * Tokenization for: Species | Trained + * Text feature extraction for: Species | Trained diff --git a/tests/testthat/test-clean_levels.R b/tests/testthat/test-clean_levels.R index f7ad597e..8016fa63 100644 --- a/tests/testthat/test-clean_levels.R +++ b/tests/testthat/test-clean_levels.R @@ -1,15 +1,13 @@ -library(testthat) -library(textrecipes) -library(modeldata) -data("Smithsonian") -smith_tr <- Smithsonian[1:15, ] -smith_te <- Smithsonian[16:20, ] - -rec <- recipe(~., data = smith_tr) - test_that("character input", { skip_if_not_installed("janitor") - cleaned <- rec %>% step_clean_levels(name, id = "") + skip_if_not_installed("modeldata") + + data("Smithsonian", package = "modeldata") + smith_tr <- Smithsonian[1:15, ] + smith_te <- Smithsonian[16:20, ] + + cleaned <- recipe(~., data = smith_tr) %>% + step_clean_levels(name, id = "") tidy_exp_un <- tibble( terms = c("name"), @@ -50,6 +48,9 @@ test_that("character input", { test_that("factor input", { skip_if_not_installed("janitor") + skip_if_not_installed("modeldata") + + data("Smithsonian", package = "modeldata") smith_tr <- Smithsonian[1:15, ] smith_tr$name <- as.factor(smith_tr$name) smith_te <- Smithsonian[16:20, ] @@ -71,6 +72,11 @@ test_that("factor input", { test_that("bake method errors when needed non-standard role columns are missing", { skip_if_not_installed("janitor") + skip_if_not_installed("modeldata") + + data("Smithsonian", package = "modeldata") + smith_tr <- Smithsonian[1:15, ] + rec <- recipe(~name, data = smith_tr) %>% step_clean_levels(name) %>% update_role(name, new_role = "potato") %>% @@ -123,7 +129,8 @@ test_that("empty selection tidy method works", { test_that("printing", { skip_if_not_installed("janitor") - rec <- rec %>% step_clean_levels(name) + rec <- recipe(~., data = iris) %>% + step_clean_levels(Species) expect_snapshot(print(rec)) expect_snapshot(prep(rec)) diff --git a/tests/testthat/test-clean_names.R b/tests/testthat/test-clean_names.R index b7b80310..475e0d5c 100644 --- a/tests/testthat/test-clean_names.R +++ b/tests/testthat/test-clean_names.R @@ -1,15 +1,12 @@ -library(testthat) -library(textrecipes) -data(airquality) - -air_tr <- airquality[1:20, ] -air_te <- airquality[101:110, ] - -rec <- recipe(~., data = air_tr) - test_that("can clean names", { skip_if_not_installed("janitor") - cleaned <- rec %>% step_clean_names(all_predictors(), id = "") + skip_if_not_installed("modeldata") + + air_tr <- airquality[1:20, ] + air_te <- airquality[101:110, ] + + cleaned <- recipe(~., data = air_tr) %>% + step_clean_names(all_predictors(), id = "") tidy_exp_un <- tibble( terms = c("all_predictors()"), @@ -35,6 +32,8 @@ test_that("can clean names", { # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { + skip_if_not_installed("janitor") + rec <- recipe(mtcars) %>% step_clean_names(disp) %>% update_role(disp, new_role = "potato") %>% @@ -87,7 +86,9 @@ test_that("empty selection tidy method works", { test_that("printing", { skip_if_not_installed("janitor") - rec <- rec %>% step_clean_names(all_predictors()) + + rec <- recipe(~., data = mtcars) %>% + step_clean_names(all_predictors()) expect_snapshot(print(rec)) expect_snapshot(prep(rec)) diff --git a/tests/testthat/test-dummy_hash.R b/tests/testthat/test-dummy_hash.R index 112c8f2b..c4cbb775 100644 --- a/tests/testthat/test-dummy_hash.R +++ b/tests/testthat/test-dummy_hash.R @@ -1,18 +1,15 @@ -library(textrecipes) -library(recipes) -data(grants, package = "modeldata") - -test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] -test_data <- tibble::as_tibble(test_data) - -rec <- recipe(~., data = test_data) - test_that("hashing gives double outputs", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default - rec <- rec %>% + data("grants", package = "modeldata") + + test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] + test_data <- tibble::as_tibble(test_data) + + rec <- recipe(~., data = test_data) %>% step_dummy_hash(sponsor_code) obj <- rec %>% @@ -32,9 +29,16 @@ test_that("hashing gives double outputs", { test_that("hashing multiple factors", { skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") + skip_if_not_installed("text2vec") data.table::setDTthreads(2) # because data.table uses all cores by default - res <- rec %>% + data("grants", package = "modeldata") + + test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] + test_data <- tibble::as_tibble(test_data) + + res <- recipe(~., data = test_data) %>% step_dummy_hash(all_nominal_predictors(), num_terms = 12) %>% prep() %>% bake(new_data = NULL) @@ -46,9 +50,16 @@ test_that("hashing multiple factors", { test_that("hashing collapsed multiple factors", { skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") + skip_if_not_installed("text2vec") data.table::setDTthreads(2) # because data.table uses all cores by default - res <- rec %>% + data("grants", package = "modeldata") + + test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] + test_data <- tibble::as_tibble(test_data) + + res <- recipe(~., data = test_data) %>% step_dummy_hash(all_nominal_predictors(), num_terms = 4, collapse = TRUE) %>% prep() %>% bake(new_data = NULL) @@ -60,9 +71,15 @@ test_that("hashing collapsed multiple factors", { test_that("hashing output width changes accordingly with num_terms", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default - rec <- rec %>% + data("grants", package = "modeldata") + + test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] + test_data <- tibble::as_tibble(test_data) + + rec <- recipe(~., data = test_data) %>% step_dummy_hash(sponsor_code, num_terms = 256) %>% prep() @@ -77,7 +94,13 @@ test_that("hashing output width changes accordingly with num_terms", { test_that("hashing output width changes accordingly with num_terms", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default + + data("grants", package = "modeldata") + + test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] + test_data <- tibble::as_tibble(test_data) signed <- recipe(~., data = test_data) %>% step_dummy_hash(all_predictors(), num_terms = 2) %>% @@ -98,8 +121,14 @@ test_that("hashing output width changes accordingly with num_terms", { test_that("check_name() is used", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default + data("grants", package = "modeldata") + + test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] + test_data <- tibble::as_tibble(test_data) + dat <- test_data dat$text <- dat$sponsor_code dat$dummyhash_text_01 <- dat$sponsor_code @@ -131,6 +160,15 @@ test_that("tunable", { # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { + skip_if_not_installed("modeldata") + skip_if_not_installed("text2vec") + data.table::setDTthreads(2) # because data.table uses all cores by default + + data("grants", package = "modeldata") + + test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] + test_data <- tibble::as_tibble(test_data) + rec <- recipe(~sponsor_code, data = test_data) %>% step_dummy_hash(sponsor_code) %>% update_role(sponsor_code, new_role = "potato") %>% @@ -190,8 +228,14 @@ test_that("empty selection tidy method works", { test_that("keep_original_cols works", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default + data("grants", package = "modeldata") + + test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] + test_data <- tibble::as_tibble(test_data) + new_names <- paste0("dummyhash_sponsor_code_", 1:5) rec <- recipe(~ sponsor_code, data = test_data) %>% @@ -220,8 +264,14 @@ test_that("keep_original_cols works", { test_that("keep_original_cols - can prep recipes with it missing", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default + data("grants", package = "modeldata") + + test_data <- grants_test[1:20, c("contract_value_band", "sponsor_code")] + test_data <- tibble::as_tibble(test_data) + rec <- recipe(~ sponsor_code, data = test_data) %>% step_dummy_hash(sponsor_code) @@ -242,8 +292,8 @@ test_that("printing", { skip_if_not_installed("data.table") data.table::setDTthreads(2) # because data.table uses all cores by default - rec <- rec %>% - step_dummy_hash(sponsor_code) + rec <- recipe(~., data = iris) %>% + step_dummy_hash(Species) expect_snapshot(print(rec)) expect_snapshot(prep(rec)) diff --git a/tests/testthat/test-lda.R b/tests/testthat/test-lda.R index 40ec2dba..81c1b27b 100644 --- a/tests/testthat/test-lda.R +++ b/tests/testthat/test-lda.R @@ -1,19 +1,15 @@ -set.seed(1234) -library(recipes) -library(textrecipes) -library(modeldata) -data(tate_text) - -n_rows <- 100 -rec <- recipe(~ medium + artist, data = tate_text[seq_len(n_rows), ]) - test_that("step_lda works as intended", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default + data("tate_text", package = "modeldata") + + n_rows <- 100 n_top <- 10 - rec1 <- rec %>% + + rec1 <- recipe(~ medium + artist, data = tate_text[seq_len(n_rows), ]) %>% step_tokenize(medium) %>% step_lda(medium, num_topics = n_top) @@ -29,10 +25,14 @@ test_that("step_lda works as intended", { test_that("step_lda works with num_topics argument", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default + data("tate_text", package = "modeldata") + + n_rows <- 100 n_top <- 100 - rec1 <- rec %>% + rec1 <- recipe(~ medium + artist, data = tate_text[seq_len(n_rows), ]) %>% step_tokenize(medium) %>% step_lda(medium, num_topics = n_top) @@ -45,8 +45,11 @@ test_that("step_lda works with num_topics argument", { test_that("check_name() is used", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default + data("tate_text", package = "modeldata") + dat <- tate_text[seq_len(100), ] dat$text <- dat$medium dat$lda_text_1 <- dat$text @@ -66,9 +69,15 @@ test_that("check_name() is used", { test_that("bake method errors when needed non-standard role columns are missing", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default - tokenized_test_data <- rec %>% + data("tate_text", package = "modeldata") + + n_rows <- 100 + + tokenized_test_data <- recipe(~ medium + artist, + data = tate_text[seq_len(n_rows), ]) %>% step_tokenize(medium) %>% prep() %>% bake(new_data = NULL) @@ -131,10 +140,15 @@ test_that("empty selection tidy method works", { test_that("keep_original_cols works", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default + data("tate_text", package = "modeldata") + new_names <- paste0("lda_medium_", 1:10) + n_rows <- 100 + rec <- recipe(~ medium, data = tate_text[seq_len(n_rows), ]) %>% step_tokenize(medium) %>% step_lda(medium, keep_original_cols = FALSE) @@ -163,8 +177,13 @@ test_that("keep_original_cols works", { test_that("keep_original_cols - can prep recipes with it missing", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") + skip_if_not_installed("modeldata") data.table::setDTthreads(2) # because data.table uses all cores by default + data("tate_text", package = "modeldata") + + n_rows <- 100 + rec <- recipe(~ medium, data = tate_text[seq_len(n_rows), ]) %>% step_tokenize(medium) %>% step_lda(medium, keep_original_cols = TRUE) @@ -180,15 +199,14 @@ test_that("keep_original_cols - can prep recipes with it missing", { ) }) - test_that("printing", { skip_if_not_installed("text2vec") skip_if_not_installed("data.table") data.table::setDTthreads(2) # because data.table uses all cores by default - rec <- rec %>% - step_tokenize(medium) %>% - step_lda(medium) + rec <- recipe(~., data = iris) %>% + step_tokenize(Species) %>% + step_lda(Species) expect_snapshot(print(rec)) expect_snapshot(prep(rec)) diff --git a/tests/testthat/test-lemma.R b/tests/testthat/test-lemma.R index a7956383..ddcd59cf 100644 --- a/tests/testthat/test-lemma.R +++ b/tests/testthat/test-lemma.R @@ -1,7 +1,3 @@ -library(textrecipes) -library(recipes) -library(tibble) - text <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", @@ -11,6 +7,7 @@ text <- tibble(text = c( test_that("lemmatization works", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() rec <- recipe(~text, data = text) %>% @@ -53,6 +50,7 @@ test_that("lemmatization errors if lemma attribute doesn't exists", { test_that("bake method errors when needed non-standard role columns are missing", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() tokenized_test_data <- recipe(~text, data = text) %>% @@ -112,7 +110,9 @@ test_that("empty selection tidy method works", { test_that("printing", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() + rec <- recipe(~text, data = text) %>% step_tokenize(all_predictors(), engine = "spacyr") %>% step_lemma(all_predictors()) diff --git a/tests/testthat/test-ngram.R b/tests/testthat/test-ngram.R index e396a35e..4fc4ac31 100644 --- a/tests/testthat/test-ngram.R +++ b/tests/testthat/test-ngram.R @@ -135,9 +135,6 @@ test_that("ngram returns length zero vectors when length(x) < n", { ) }) -library(recipes) -library(textrecipes) - test_tibble <- tibble(text = c( "not eat them here or there.", "not eat them anywhere." diff --git a/tests/testthat/test-pos_filter.R b/tests/testthat/test-pos_filter.R index 71722648..9d5e826f 100644 --- a/tests/testthat/test-pos_filter.R +++ b/tests/testthat/test-pos_filter.R @@ -1,7 +1,3 @@ -library(textrecipes) -library(recipes) -library(tibble) - text <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", @@ -11,6 +7,7 @@ text <- tibble(text = c( test_that("part of speech filtering works", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() rec <- recipe(~text, data = text) %>% @@ -36,6 +33,7 @@ test_that("part of speech filtering works", { test_that("part of speech filtering removes everything", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() rec <- recipe(~text, data = text) %>% @@ -61,6 +59,7 @@ test_that("part of speech filtering removes everything", { test_that("part of speech filtering works with multiple tags", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() rec <- recipe(~text, data = text) %>% @@ -99,6 +98,7 @@ test_that("lemmatization errors if lemma attribute doesn't exists", { test_that("bake method errors when needed non-standard role columns are missing", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() tokenized_test_data <- recipe(~text, data = text) %>% @@ -158,7 +158,9 @@ test_that("empty selection tidy method works", { test_that("printing", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() + rec <- recipe(~text, data = text) %>% step_tokenize(all_predictors(), engine = "spacyr") %>% step_pos_filter(all_predictors()) diff --git a/tests/testthat/test-s3-methods.R b/tests/testthat/test-s3-methods.R index f00c1f20..9c0ce5a3 100644 --- a/tests/testthat/test-s3-methods.R +++ b/tests/testthat/test-s3-methods.R @@ -1,6 +1,3 @@ -library(recipes) -library(textrecipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-sequence_onehot.R b/tests/testthat/test-sequence_onehot.R index de1d7236..3c55a1d2 100644 --- a/tests/testthat/test-sequence_onehot.R +++ b/tests/testthat/test-sequence_onehot.R @@ -1,7 +1,3 @@ -library(testthat) -library(recipes) -library(textrecipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-stem.R b/tests/testthat/test-stem.R index 8eaca0ec..96069f7c 100644 --- a/tests/testthat/test-stem.R +++ b/tests/testthat/test-stem.R @@ -1,6 +1,3 @@ -library(recipes) -library(textrecipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-stopwords.R b/tests/testthat/test-stopwords.R index 575f918f..bfdd3028 100644 --- a/tests/testthat/test-stopwords.R +++ b/tests/testthat/test-stopwords.R @@ -1,6 +1,3 @@ -library(recipes) -library(textrecipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", @@ -79,6 +76,8 @@ test_that("custom stopwords are supported", { # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { + skip_if_not_installed("stopwords") + tokenized_test_data <- recipe(~text, data = test_data) %>% step_tokenize(text) %>% prep() %>% @@ -99,6 +98,8 @@ test_that("bake method errors when needed non-standard role columns are missing" }) test_that("empty printing", { + skip_if_not_installed("stopwords") + rec <- recipe(mpg ~ ., mtcars) rec <- step_stopwords(rec) @@ -110,6 +111,8 @@ test_that("empty printing", { }) test_that("empty selection prep/bake is a no-op", { + skip_if_not_installed("stopwords") + rec1 <- recipe(mpg ~ ., mtcars) rec2 <- step_stopwords(rec1) @@ -123,6 +126,8 @@ test_that("empty selection prep/bake is a no-op", { }) test_that("empty selection tidy method works", { + skip_if_not_installed("stopwords") + rec <- recipe(mpg ~ ., mtcars) rec <- step_stopwords(rec) diff --git a/tests/testthat/test-text_normalization.R b/tests/testthat/test-text_normalization.R index d95e2322..25774994 100644 --- a/tests/testthat/test-text_normalization.R +++ b/tests/testthat/test-text_normalization.R @@ -1,7 +1,3 @@ -library(testthat) -library(recipes) -library(tibble) - ex_dat <- tibble(text = c("sch\U00f6n", "scho\U0308n")) test_that("simple sqrt trans", { diff --git a/tests/testthat/test-textfeature.R b/tests/testthat/test-textfeature.R index cfd4990f..0c755464 100644 --- a/tests/testthat/test-textfeature.R +++ b/tests/testthat/test-textfeature.R @@ -1,6 +1,3 @@ -library(recipes) -library(textrecipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-texthash.R b/tests/testthat/test-texthash.R index 2b57339d..e36d24f4 100644 --- a/tests/testthat/test-texthash.R +++ b/tests/testthat/test-texthash.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(recipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", @@ -111,6 +108,8 @@ test_that("tunable", { # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { + skip_if_not_installed("text2vec") + tokenized_test_data <- recipe(~text, data = test_data) %>% step_tokenize(text) %>% prep() %>% @@ -205,6 +204,8 @@ test_that("keep_original_cols works", { }) test_that("keep_original_cols - can prep recipes with it missing", { + skip_if_not_installed("text2vec") + rec <- recipe(~text, data = test_data) %>% step_tokenize(text) %>% step_texthash(text) diff --git a/tests/testthat/test-tf.R b/tests/testthat/test-tf.R index 69ab9b9f..0affe2e1 100644 --- a/tests/testthat/test-tf.R +++ b/tests/testthat/test-tf.R @@ -1,6 +1,3 @@ -library(recipes) -library(textrecipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-tfidf.R b/tests/testthat/test-tfidf.R index e0b2e37a..c9a846d1 100644 --- a/tests/testthat/test-tfidf.R +++ b/tests/testthat/test-tfidf.R @@ -1,6 +1,3 @@ -library(recipes) -library(textrecipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-tokenfilter.R b/tests/testthat/test-tokenfilter.R index 39295d99..e2bb0af9 100644 --- a/tests/testthat/test-tokenfilter.R +++ b/tests/testthat/test-tokenfilter.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(recipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-tokenize.R b/tests/testthat/test-tokenize.R index 2d7699e2..c6691b86 100644 --- a/tests/testthat/test-tokenize.R +++ b/tests/testthat/test-tokenize.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(recipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", @@ -102,6 +99,7 @@ test_that("tokenization errors with wrong engines", { test_that("tokenization includes lemma attribute when avaliable", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() expect_type( diff --git a/tests/testthat/test-tokenize_bpe.R b/tests/testthat/test-tokenize_bpe.R index 52afd52b..8ab94853 100644 --- a/tests/testthat/test-tokenize_bpe.R +++ b/tests/testthat/test-tokenize_bpe.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(recipes) - r_version <- function() paste0("R", getRversion()[, 1:2]) text1 <- c( @@ -62,6 +59,8 @@ text2_out <- list( ) test_that("output is list when length is 1 or 0", { + skip_if_not_installed("tokenizers.bpe") + data <- tibble(a = rep(c("a", ""), 20)) data_rec <- recipe(~., data = data) %>% @@ -72,6 +71,8 @@ test_that("output is list when length is 1 or 0", { }) test_that("step_tokenize_bpe works", { + skip_if_not_installed("tokenizers.bpe") + res <- recipe(~text1, data = test_data) %>% step_tokenize_bpe(text1) %>% prep() %>% @@ -84,6 +85,8 @@ test_that("step_tokenize_bpe works", { }) test_that("step_tokenize_bpe works with tokenizers.bpe and multiple colunms", { + skip_if_not_installed("tokenizers.bpe") + res <- recipe(~., data = test_data) %>% step_tokenize_bpe(all_predictors()) %>% prep() %>% @@ -101,6 +104,8 @@ test_that("step_tokenize_bpe works with tokenizers.bpe and multiple colunms", { }) test_that("arguments are passed to tokenizers.bpe", { + skip_if_not_installed("tokenizers.bpe") + res <- recipe(~text1, data = test_data) %>% step_tokenize_bpe(text1, vocabulary_size = 60) %>% prep() %>% @@ -123,6 +128,8 @@ test_that("arguments are passed to tokenizers.bpe", { }) test_that("Errors if vocabulary size is set to low.", { + skip_if_not_installed("tokenizers.bpe") + expect_snapshot( error = TRUE, variant = r_version(), @@ -151,6 +158,8 @@ test_that("tunable", { # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { + skip_if_not_installed("tokenizers.bpe") + rec <- recipe(~text1, data = test_data) %>% step_tokenize_bpe(text1) %>% update_role(text1, new_role = "potato") %>% @@ -202,6 +211,8 @@ test_that("empty selection tidy method works", { }) test_that("printing", { + skip_if_not_installed("tokenizers.bpe") + rec <- recipe(~., data = test_data) %>% step_tokenize_bpe(text1) diff --git a/tests/testthat/test-tokenize_sentencepiece.R b/tests/testthat/test-tokenize_sentencepiece.R index 1d6fd52e..5a5da763 100644 --- a/tests/testthat/test-tokenize_sentencepiece.R +++ b/tests/testthat/test-tokenize_sentencepiece.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(recipes) - text1 <- c( "I would not eat them here or there.", "I would not eat them anywhere.", @@ -60,6 +57,8 @@ text2_out <- list( ) test_that("step_tokenize_sentencepiece works", { + skip_if_not_installed("sentencepiece") + res <- recipe(~text1, data = test_data) %>% step_tokenize_sentencepiece(text1, vocabulary_size = 80) %>% prep() %>% @@ -72,6 +71,8 @@ test_that("step_tokenize_sentencepiece works", { }) test_that("step_tokenize_sentencepiece works with tokenizers.sentencepiece and multiple colunms", { + skip_if_not_installed("sentencepiece") + res <- recipe(~., data = test_data) %>% step_tokenize_sentencepiece(all_predictors(), vocabulary_size = 80) %>% prep() %>% @@ -89,6 +90,8 @@ test_that("step_tokenize_sentencepiece works with tokenizers.sentencepiece and m }) test_that("arguments are passed to tokenizers.sentencepiece", { + skip_if_not_installed("sentencepiece") + res <- recipe(~text1, data = test_data) %>% step_tokenize_sentencepiece(text1, vocabulary_size = 60) %>% prep() %>% @@ -111,6 +114,8 @@ test_that("arguments are passed to tokenizers.sentencepiece", { }) test_that("Errors if vocabulary size is set to low.", { + skip_if_not_installed("sentencepiece") + expect_snapshot( error = TRUE, recipe(~text1, data = test_data) %>% @@ -122,6 +127,8 @@ test_that("Errors if vocabulary size is set to low.", { # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { + skip_if_not_installed("sentencepiece") + rec <- recipe(~text1, data = test_data) %>% step_tokenize_sentencepiece(text1, vocabulary_size = 100) %>% update_role(text1, new_role = "potato") %>% @@ -173,6 +180,8 @@ test_that("empty selection tidy method works", { }) test_that("printing", { + skip_if_not_installed("sentencepiece") + rec <- recipe(~., data = test_data) %>% step_tokenize_sentencepiece(text1, vocabulary_size = 100) diff --git a/tests/testthat/test-tokenize_wordpiece.R b/tests/testthat/test-tokenize_wordpiece.R index aa351753..3989c38e 100644 --- a/tests/testthat/test-tokenize_wordpiece.R +++ b/tests/testthat/test-tokenize_wordpiece.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(recipes) - text1 <- c( "I would not eat them here or there.", "I would not eat them anywhere.", @@ -32,6 +29,8 @@ text2_out <- list( ) test_that("step_tokenize_wordpiece works", { + skip_if_not_installed("wordpiece") + res <- recipe(~text1, data = test_data) %>% step_tokenize_wordpiece(text1) %>% prep() %>% @@ -44,6 +43,8 @@ test_that("step_tokenize_wordpiece works", { }) test_that("step_tokenize_wordpiece works with tokenizers.wordpiece and multiple colunms", { + skip_if_not_installed("wordpiece") + res <- recipe(~., data = test_data) %>% step_tokenize_wordpiece(all_predictors()) %>% prep() %>% @@ -63,6 +64,8 @@ test_that("step_tokenize_wordpiece works with tokenizers.wordpiece and multiple # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { + skip_if_not_installed("wordpiece") + rec <- recipe(~ text1 + text2, data = test_data) %>% step_tokenize_wordpiece(text1, text2) %>% update_role(text1, new_role = "potato") %>% @@ -77,6 +80,8 @@ test_that("bake method errors when needed non-standard role columns are missing" }) test_that("empty printing", { + skip_if_not_installed("wordpiece") + rec <- recipe(mpg ~ ., mtcars) rec <- step_tokenize_wordpiece(rec) @@ -88,6 +93,8 @@ test_that("empty printing", { }) test_that("empty selection prep/bake is a no-op", { + skip_if_not_installed("wordpiece") + rec1 <- recipe(mpg ~ ., mtcars) rec2 <- step_tokenize_wordpiece(rec1) @@ -101,6 +108,8 @@ test_that("empty selection prep/bake is a no-op", { }) test_that("empty selection tidy method works", { + skip_if_not_installed("wordpiece") + rec <- recipe(mpg ~ ., mtcars) rec <- step_tokenize_wordpiece(rec) @@ -114,6 +123,8 @@ test_that("empty selection tidy method works", { }) test_that("printing", { + skip_if_not_installed("wordpiece") + rec <- recipe(~., data = test_data) %>% step_tokenize_wordpiece(text1) diff --git a/tests/testthat/test-tokenizer-spacyr.R b/tests/testthat/test-tokenizer-spacyr.R index a0365e5e..0b281c3d 100644 --- a/tests/testthat/test-tokenizer-spacyr.R +++ b/tests/testthat/test-tokenizer-spacyr.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(recipes) - text <- c( "I would not eat them here or there.", "I would not eat them anywhere.", @@ -10,6 +7,7 @@ text <- c( test_that("tokenizer works", { skip_on_cran() + skip_if_not_installed("spacyr") skip_if_no_python_or_no_spacy() out <- spacyr_tokenizer_words(text) diff --git a/tests/testthat/test-tokenizer-tokenizersbpe.R b/tests/testthat/test-tokenizer-tokenizersbpe.R index afe28153..0a56b73e 100644 --- a/tests/testthat/test-tokenizer-tokenizersbpe.R +++ b/tests/testthat/test-tokenizer-tokenizersbpe.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(recipes) - r_version <- function() paste0("R", getRversion()[, 1:2]) text1 <- c( @@ -97,6 +94,8 @@ test_that("tokenizer works", { }) test_that("step_tokenize works with tokenizers.bpe", { + skip_if_not_installed("tokenizers.bpe") + res <- recipe(~text1, data = test_data) %>% step_tokenize(text1, engine = "tokenizers.bpe") %>% prep() %>% @@ -109,6 +108,8 @@ test_that("step_tokenize works with tokenizers.bpe", { }) test_that("step_tokenize works with tokenizers.bpe and multiple colunms", { + skip_if_not_installed("tokenizers.bpe") + res <- recipe(~., data = test_data) %>% step_tokenize(all_predictors(), engine = "tokenizers.bpe") %>% prep() %>% @@ -126,6 +127,8 @@ test_that("step_tokenize works with tokenizers.bpe and multiple colunms", { }) test_that("arguments are passed to tokenizers.bpe", { + skip_if_not_installed("tokenizers.bpe") + res <- recipe(~text1, data = test_data) %>% step_tokenize(text1, engine = "tokenizers.bpe", diff --git a/tests/testthat/test-tokenlist.R b/tests/testthat/test-tokenlist.R index 94058803..d4c3e193 100644 --- a/tests/testthat/test-tokenlist.R +++ b/tests/testthat/test-tokenlist.R @@ -1,6 +1,3 @@ -library(testthat) -library(vctrs) - ## Creation ------------------------------------------------------------------- test_that("tokenlist creation works", { @@ -10,12 +7,12 @@ test_that("tokenlist creation works", { expect_s3_class(tkn_list, "textrecipes_tokenlist") expect_equal( - fields(tkn_list), + vctrs::fields(tkn_list), "tokens" ) expect_equal( - field(tkn_list, "tokens"), + vctrs::field(tkn_list, "tokens"), list(letters, letters) ) @@ -30,17 +27,17 @@ test_that("tokenlist creation works", { expect_s3_class(tkn_list, "textrecipes_tokenlist") expect_equal( - fields(tkn_list), + vctrs::fields(tkn_list), c("tokens", "lemma") ) expect_equal( - field(tkn_list, "tokens"), + vctrs::field(tkn_list, "tokens"), list(letters, letters) ) expect_equal( - field(tkn_list, "lemma"), + vctrs::field(tkn_list, "lemma"), list(LETTERS, LETTERS) ) @@ -55,17 +52,17 @@ test_that("tokenlist creation works", { expect_s3_class(tkn_list, "textrecipes_tokenlist") expect_equal( - fields(tkn_list), + vctrs::fields(tkn_list), c("tokens", "pos") ) expect_equal( - field(tkn_list, "tokens"), + vctrs::field(tkn_list, "tokens"), list(letters, letters) ) expect_equal( - field(tkn_list, "pos"), + vctrs::field(tkn_list, "pos"), list(LETTERS, LETTERS) ) @@ -83,22 +80,22 @@ test_that("tokenlist creation works", { expect_s3_class(tkn_list, "textrecipes_tokenlist") expect_equal( - fields(tkn_list), + vctrs::fields(tkn_list), c("tokens", "lemma", "pos") ) expect_equal( - field(tkn_list, "tokens"), + vctrs::field(tkn_list, "tokens"), list(letters, letters) ) expect_equal( - field(tkn_list, "lemma"), + vctrs::field(tkn_list, "lemma"), list(letters, LETTERS) ) expect_equal( - field(tkn_list, "pos"), + vctrs::field(tkn_list, "pos"), list(LETTERS, LETTERS) ) diff --git a/tests/testthat/test-tokenmerge.R b/tests/testthat/test-tokenmerge.R index 30596df9..f2509115 100644 --- a/tests/testthat/test-tokenmerge.R +++ b/tests/testthat/test-tokenmerge.R @@ -1,6 +1,3 @@ -library(textrecipes) -library(recipes) - test_data <- tibble( text1 = c( "I would not eat them here or there.", diff --git a/tests/testthat/test-untokenize.R b/tests/testthat/test-untokenize.R index 4a71461f..0c993900 100644 --- a/tests/testthat/test-untokenize.R +++ b/tests/testthat/test-untokenize.R @@ -1,6 +1,3 @@ -library(recipes) -library(textrecipes) - test_data <- tibble(text = c( "I would not eat them here or there.", "I would not eat them anywhere.", diff --git a/tests/testthat/test-word_embeddings.R b/tests/testthat/test-word_embeddings.R index 06330a86..64509d5f 100644 --- a/tests/testthat/test-word_embeddings.R +++ b/tests/testthat/test-word_embeddings.R @@ -1,5 +1,3 @@ -library(recipes) - embeddings <- readRDS(test_path("emb-data", "embeddings.rds")) sentence_embeddings_long <- readRDS(test_path("emb-data", "long.rds")) diff --git a/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd b/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd index b14b2c5b..3a64042d 100644 --- a/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd +++ b/vignettes/cookbook---using-more-complex-recipes-involving-text.Rmd @@ -10,7 +10,14 @@ vignette: > --- ```{r setup, include = FALSE} +if (rlang::is_installed(c("modeldata"))) { + run <- TRUE +} else { + run <- FALSE +} + knitr::opts_chunk$set( + eval = run, collapse = TRUE, comment = "#>" )