Merge pull request #536 from cmu-delphi/ds/file

brookslogan · web-flow · commit 63cb8200a40c · 2024-10-04T10:55:17.000-07:00
refactor: remove Suggests dependence on covidcast
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: epiprocess
 Title: Tools for basic signal processing in epidemiology
-Version: 0.9.3
+Version: 0.9.4
 Authors@R: c(
     person("Jacob", "Bien", role = "ctb"),
     person("Logan", "Brooks", , "lcbrooks@andrew.cmu.edu", role = c("aut", "cre")),
@@ -35,7 +35,7 @@ Imports:
     checkmate,
     cli,
     data.table,
-    dplyr (>= 1.0.8),
+    dplyr (>= 1.1.0),
     genlasso,
     ggplot2,
     glue,
@@ -53,11 +53,11 @@ Imports:
     vctrs,
     waldo
 Suggests:
-    covidcast,
     devtools,
     epidatr,
     knitr,
     outbreaks,
+    readr,
     rmarkdown,
     testthat (>= 3.1.5),
     withr
diff --git a/NEWS.md b/NEWS.md
@@ -17,6 +17,10 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
   syntax.
 - Improved validation of `.window_size` arguments.
 
+## Cleanup
+
+- Removed vignette dependency on `covidcast`.
+
 # epiprocess 0.9
 
 ## Breaking changes
diff --git a/data-raw/jhu_csse_county_level_subset.R b/data-raw/jhu_csse_county_level_subset.R
@@ -1,10 +1,15 @@
+library(readr)
 library(epidatr)
-library(covidcast)
 library(epiprocess)
 library(dplyr)
 
-# Use covidcast::county_census to get the county and state names
-y <- covidcast::county_census %>%
+y <- read_csv("https://github.com/cmu-delphi/covidcast/raw/c89e4d295550ba1540d64d2cc991badf63ad04e5/Python-packages/covidcast-py/covidcast/geo_mappings/county_census.csv", # nolint: line_length_linter
+  col_types = cols(
+    FIPS = col_character(),
+    STNAME = col_character(),
+    CTYNAME = col_character()
+  )
+) %>%
   filter(STNAME %in% c("Massachusetts", "Vermont"), STNAME != CTYNAME) %>%
   select(geo_value = FIPS, county_name = CTYNAME, state_name = STNAME)
 
@@ -18,7 +23,7 @@ jhu_csse_county_level_subset <- pub_covidcast(
   time_values = epirange(20200601, 20211231),
 ) %>%
   select(geo_value, time_value, cases = value) %>%
-  full_join(y, by = "geo_value") %>%
+  inner_join(y, by = "geo_value", relationship = "many-to-one", unmatched = c("error", "drop")) %>%
   as_epi_df()
 
 usethis::use_data(jhu_csse_county_level_subset, overwrite = TRUE)
diff --git a/vignettes/aggregation.Rmd b/vignettes/aggregation.Rmd
@@ -13,13 +13,19 @@ kinds of tasks with `epi_df` objects. We'll work with county-level reported
 COVID-19 cases in MA and VT.
 
 ```{r, message = FALSE, eval= FALSE, warning= FALSE}
+library(readr)
 library(epidatr)
-library(covidcast)
 library(epiprocess)
 library(dplyr)
 
-# Use covidcast::county_census to get the county and state names
-y <- covidcast::county_census %>%
+# Get mapping between FIPS codes and county&state names:
+y <- read_csv("https://github.com/cmu-delphi/covidcast/raw/c89e4d295550ba1540d64d2cc991badf63ad04e5/Python-packages/covidcast-py/covidcast/geo_mappings/county_census.csv", # nolint: line_length_linter
+  col_types = c(
+    FIPS = col_character(),
+    CTYNAME = col_character(),
+    STNAME = col_character()
+  )
+) %>%
   filter(STNAME %in% c("Massachusetts", "Vermont"), STNAME != CTYNAME) %>%
   select(geo_value = FIPS, county_name = CTYNAME, state_name = STNAME)
 
@@ -33,15 +39,15 @@ x <- pub_covidcast(
   time_values = epirange(20200601, 20211231),
 ) %>%
   select(geo_value, time_value, cases = value) %>%
-  full_join(y, by = "geo_value") %>%
+  inner_join(y, by = "geo_value", relationship = "many-to-one", unmatched = c("error", "drop")) %>%
   as_epi_df(as_of = as.Date("2024-03-20"))
 ```
 
 The data contains 16,212 rows and 5 columns.
 
 ```{r, echo=FALSE, warning=FALSE, message=FALSE}
+library(readr)
 library(epidatr)
-library(covidcast)
 library(epiprocess)
 library(dplyr)
 
@@ -110,15 +116,16 @@ help avoid bugs in further downstream data processing tasks.
 Let's first remove certain dates from our data set to create gaps:
 
 ```{r}
+state_naming <- read_csv("https://github.com/cmu-delphi/covidcast/raw/c89e4d295550ba1540d64d2cc991badf63ad04e5/Python-packages/covidcast-py/covidcast/geo_mappings/state_census.csv", # nolint: line_length_linter
+  col_types = c(NAME = col_character(), ABBR = col_character())
+) %>%
+  transmute(state_name = NAME, abbr = tolower(ABBR)) %>%
+  as_tibble()
+
 # First make geo value more readable for tables, plots, etc.
 x <- x %>%
-  mutate(
-    geo_value = paste(
-      substr(county_name, 1, nchar(county_name) - 7),
-      name_to_abbr(state_name),
-      sep = ", "
-    )
-  ) %>%
+  inner_join(state_naming, by = "state_name", relationship = "many-to-one", unmatched = c("error", "drop")) %>%
+  mutate(geo_value = paste(substr(county_name, 1, nchar(county_name) - 7), state_name, sep = ", ")) %>%
   select(geo_value, time_value, cases)
 
 xt <- as_tsibble(x) %>% filter(cases >= 3)