Skip to content

Commit 31ffaf8

Browse files
updating TRUE~ to .default =
1 parent 97a305a commit 31ffaf8

File tree

2 files changed

+36
-99
lines changed

2 files changed

+36
-99
lines changed

modules/Data_Cleaning/Data_Cleaning.Rmd

Lines changed: 33 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -74,16 +74,16 @@ Types of "missing" data:
7474
## Finding Missing data {.small}
7575

7676
- `is.na` - looks for `NAN` and `NA`
77-
- `is.nan`- looks for `NAN`
7877
- `is.infinite` - looks for Inf or -Inf
7978

79+
```{r, echo=FALSE}
80+
NA_vect<- c(0,NA, -1)
81+
NA_vect <- NA_vect/0
82+
```
83+
8084
```{r}
81-
test <- c(0,NA, -1)
82-
test/0
83-
test <- test/0
84-
is.na(test)
85-
is.nan(test)
86-
is.infinite(test)
85+
is.na(NA_vect)
86+
is.infinite(NA_vect)
8787
```
8888

8989

@@ -92,8 +92,8 @@ is.infinite(test)
9292
`any()` can help you check if there are any `NA` values in a vector
9393

9494
```{r}
95-
test
96-
any(is.na(test))
95+
NA_vect
96+
any(is.na(NA_vect))
9797
```
9898

9999

@@ -512,15 +512,15 @@ Note that automatically values not reassigned explicitly by
512512
{data_input} %>%
513513
mutate({variable_to_fix} = case_when({Variable_fixing}
514514
/some condition/ ~ {value_for_con},
515-
TRUE ~ {value_for_not_meeting_condition})
515+
.default = {value_for_not_meeting_condition})
516516
517517
```
518518
:::
519519

520520
{value_for_not_meeting_condition} could be something new
521521
or it can be the original values of the column
522522

523-
## case_when with TRUE ~ original variable name
523+
## case_when with .default = original variable name
524524

525525
```{r}
526526
data_ginger_mint %>%
@@ -529,7 +529,7 @@ data_ginger_mint %>%
529529
Treatment == "Mint" ~ "Peppermint",
530530
Treatment == "mint" ~ "Peppermint",
531531
Treatment == "peppermint" ~ "Peppermint",
532-
TRUE ~ Treatment)) %>%
532+
.default = Treatment)) %>%
533533
count(Treatment, Treatment_recoded)
534534
```
535535

@@ -544,35 +544,23 @@ data_ginger_mint %>%
544544
Treatment == "Mint" ~ "Peppermint",
545545
Treatment == "mint" ~ "Peppermint",
546546
Treatment == "peppermint" ~ "Peppermint",
547-
TRUE ~ Treatment)) %>%
547+
.default = Treatment)) %>%
548548
count(Treatment, Treatment_recoded)
549549
```
550550

551551

552-
## But maybe we want NA?
553-
554-
Perhaps we want values that are O or Other to actually be NA, then `case_when` can be helpful for this. We simply specify everything else.
555552

556-
```{r}
557-
data_ginger_mint %>%
558-
mutate(Treatment_recoded = case_when(
559-
Treatment == "Ginger" ~ "Ginger",
560-
Treatment == "Mint" ~ "Peppermint",
561-
Treatment == "mint" ~ "Peppermint",
562-
Treatment == "peppermint" ~ "Peppermint")) %>%
563-
count(Treatment, Treatment_recoded)
564-
```
565553
## case_when() can also overwrite/update a variable
566554

567555
You need to specify what we want in the first part of `mutate`.
568556

569557
```{r}
570558
data_ginger_mint %>%
571559
mutate(Treatment = case_when(
572-
Treatment == "Ginger" ~ "Ginger",
573560
Treatment == "Mint" ~ "Peppermint",
574561
Treatment == "mint" ~ "Peppermint",
575-
Treatment == "peppermint" ~ "Peppermint")) %>%
562+
Treatment == "peppermint" ~ "Peppermint",
563+
.default = Treatment)) %>%
576564
count(Treatment)
577565
578566
```
@@ -584,16 +572,29 @@ data_ginger_mint %>%
584572
```{r}
585573
data_ginger_mint %>%
586574
mutate(Treatment_recoded = case_when(
587-
Treatment == "Ginger" ~ "Ginger", # keep it the same!
588575
Treatment %in%
589576
c("Mint", "mint", "Peppermint", "peppermint") ~ "Peppermint",
590-
Treatment %in% c("O", "Other") ~ "Other")) %>%
577+
Treatment %in% c("O", "Other") ~ "Other",
578+
.default = Treatment)) %>%
591579
592580
count(Treatment, Treatment_recoded)
593581
594582
```
595583

584+
## But maybe we want NA?
596585

586+
Perhaps we want values that are O or Other to actually be NA, then `case_when` can be helpful for this. We could specify everything else and drop `.default = Treatment` or we could specify NA directly with `NA_character_`
587+
588+
```{r}
589+
data_ginger_mint %>%
590+
mutate(Treatment_recoded = case_when(
591+
Treatment %in%
592+
c("Mint", "mint", "Peppermint", "peppermint") ~ "Peppermint",
593+
Treatment %in% c("O", "Other") ~ NA_character_,
594+
.default = Treatment)) %>%
595+
596+
count(Treatment, Treatment_recoded)
597+
```
597598

598599
## Another reason for `case_when()`
599600

@@ -619,7 +620,7 @@ data_ginger_mint %>%
619620
count(Group, Effect)
620621
```
621622

622-
## GUT CHECK: If we want all unspecified values to remain the same with `case_when()`, how should we complete the `TRUE ~` statement?
623+
## GUT CHECK: If we want all unspecified values to remain the same with `case_when()`, how should we complete the `.default =` statement?
623624

624625
A. With the name of the variable we are modifying or using as source
625626

@@ -726,7 +727,7 @@ data_ginger_mint %>%
726727
Treatment %in%
727728
c("Mint", "mint", "Peppermint", "peppermint") ~ "Peppermint",
728729
Treatment %in% c("O", "Other") ~ "Other",
729-
TRUE ~ Treatment))
730+
.default = Treatment))
730731
```
731732

732733
## `case_when()` improved with `stringr`
@@ -738,7 +739,7 @@ data_ginger_mint %>%
738739
mutate(Treatment_recoded = case_when(
739740
str_detect(string = Treatment, pattern = "int") ~ "Peppermint",
740741
str_detect(string = Treatment, pattern = "^o|^O") ~ "Other",
741-
TRUE ~ Treatment)) %>%
742+
.default = Treatment)) %>%
742743
count(Treatment, Treatment_recoded)
743744
```
744745

@@ -777,71 +778,7 @@ data_comb <- data_ginger_mint %>%
777778
data_comb
778779
```
779780

780-
## Separating columns based on a separator
781-
782-
The `separate()` function from `tidyr` can split a column into multiple columns.
783-
The `col` argument specifies what column to work with
784-
The `into` argument specifies names of new columns
785-
The `sep` argument specifies what to separate by
786-
787-
```{r}
788-
data_comb <- data_comb %>%
789-
separate(col = change, into = c("Group", "Change"), sep = "_" )
790-
data_comb
791-
```
792-
793-
794-
## Summary
795-
- `case_when()` requires `mutate()` when working with dataframes/tibbles
796-
- `case_when()` can recode **entire values** based on **conditions** (need quotes for conditions and new values)
797-
- remember `case_when()` needs `TRUE ~ varaible` to keep values that aren't specified by conditions, otherwise will be `NA`
798-
799-
**Note:** you might see the `recode()` function, it only does some of what `case_when()` can do, so we skipped it, but it is in the extra slides at the end.
800-
801-
## Summary continued
802-
803-
```{r, fig.alt="dplyr", out.width = "70%", echo = FALSE, fig.align='center'}
804-
knitr::include_graphics("images/case_when.png")
805-
```
806-
"Artwork by @allison_horst". https://allisonhorst.com/
807-
808-
809-
## Summary Continued
810-
811-
- `stringr` package has great functions for looking for specific **parts of values** especially `filter()` and `str_detect()` combined
812-
- `stringr` also has other useful string functions like `str_detect()` (finding patterns in a column or vector), `str_subset()` (parsing text), `str_replace()` (replacing the first instance in values), `str_replace_all()` (replacing all instances in each value) and **more**!
813-
- `separate()` can split columns into additional columns
814-
- `unite()` can combine columns
815-
- `:` can indicate when you want to start and end with columns next to one another
816-
817-
818-
## Lab Part 2
819-
820-
🏠 [Class Website](https://jhudatascience.org/intro_to_r/)
821-
822-
💻[Lab](https://jhudatascience.org/intro_to_r/modules/Data_Cleaning/lab/Data_Cleaning_Lab.Rmd)
823781

824-
📃 [Day 5 Cheatsheet](https://jhudatascience.org/intro_to_r/modules/cheatsheets/Day-5.pdf)
825-
826-
📃 [Posit's `stringr` Cheatsheet](https://evoldyn.gitlab.io/evomics-2018/ref-sheets/R_strings.pdf)
827-
828-
```{r, fig.alt="The End", out.width = "50%", echo = FALSE, fig.align='center'}
829-
knitr::include_graphics(here::here("images/the-end-g23b994289_1280.jpg"))
830-
```
831-
832-
Image by <a href="https://pixabay.com/users/geralt-9301/?utm_source=link-attribution&amp;utm_medium=referral&amp;utm_campaign=image&amp;utm_content=812226">Gerd Altmann</a> from <a href="https://pixabay.com//?utm_source=link-attribution&amp;utm_medium=referral&amp;utm_campaign=image&amp;utm_content=812226">Pixabay</a>
833-
834-
# Extra Slides
835-
836-
## `recode()` function
837-
838-
This is similar to `case_when()` but it can't do as much.
839-
840-
::: {style="color: red;"}
841-
(need `mutate` for data frames/tibbles!)
842-
:::
843-
::: codeexample
844-
```{r, eval = FALSE}
845782
# General Format - this is not code!
846783
{data_input} %>%
847784
mutate({variable_to_fix_or_new} = recode({Variable_fixing}, {old_value} = {new_value},

modules/Data_Cleaning/lab/Data_Cleaning_Lab_Key.Rmd

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ NEW_TIBBLE <- OLD_TIBBLE %>%
148148
mutate(NEW_COLUMN = case_when(
149149
OLD_COLUMN %in% c( ... ) ~ ... ,
150150
OLD_COLUMN %in% c( ... ) ~ ... ,
151-
TRUE ~ OLD_COLUMN
151+
.default = OLD_COLUMN
152152
))
153153
```
154154

@@ -158,7 +158,7 @@ BloodType <- BloodType %>%
158158
mutate(exposure = case_when(
159159
exposure %in% c("N", "n", "No", "no") ~ "No",
160160
exposure %in% c("Y", "y", "Yes", "yes") ~ "Yes",
161-
TRUE ~ exposure # the only other value is an NA so we could include this or we don't need to (it's generally good practice unless we want to create NAs)
161+
.default = exposure # the only other value is an NA so we could include this or we don't need to (it's generally good practice unless we want to create NAs)
162162
))
163163
164164
count(BloodType, exposure)
@@ -181,7 +181,7 @@ BloodType <- BloodType %>%
181181
mutate(type = case_when(
182182
type == "o.-" ~ "O.-",
183183
type == "o.+" ~ "O.+",
184-
TRUE ~ type))
184+
.default = type))
185185
BloodType
186186
```
187187

0 commit comments

Comments
 (0)