ReasonFoundation
diff --git a/‎.DS_Store
0 Bytes b/‎.DS_Store
0 Bytes
diff --git a/‎California-State-Flag.jpeg
-15.7 KB b/‎California-State-Flag.jpeg
-15.7 KB
diff --git a/‎Flag_of_California.png
-1 b/‎Flag_of_California.png
-1
diff --git a/‎Flag_of_New_York_City.png
-43.2 KB b/‎Flag_of_New_York_City.png
-43.2 KB
diff --git a/‎Rplots.pdf
-3.53 KB b/‎Rplots.pdf
-3.53 KB
diff --git a/‎acfrs_8gov_type.RDS
2.22 MB b/‎acfrs_8gov_type.RDS
2.22 MB
diff --git a/‎acfrs_city_pop_added_char.RDS
625 KB b/‎acfrs_city_pop_added_char.RDS
625 KB
diff --git a/‎acfrs_school_districts_clean.csv
+9,479 b/‎acfrs_school_districts_clean.csv
+9,479
diff --git a/‎acfrs_census_matching_cities_population.Rmd ‎city_gov.Rmd
+15-40 b/‎acfrs_census_matching_cities_population.Rmd ‎city_gov.Rmd
+15-40
diff --git a/‎acfrs_census_matching_county_population.Rmd ‎county_gov.Rmd
+50-65 b/‎acfrs_census_matching_county_population.Rmd ‎county_gov.Rmd
+50-65
diff --git a/‎county_pop_census_acfrs.RDS
200 KB b/‎county_pop_census_acfrs.RDS
200 KB
@@ -72,9 +72,8 @@ pop_governmentID %>%
 ```{r}
 options(scipen = 999)
 # census_id in ACFRs is government_ID used in file "City and Town Mapping.xlsx"
-acfrs <- readRDS("data_from_dbsite.RDS") 
 
-acfrs_governmentID <- acfrs %>% drop_na(census_id) %>% 
+acfrs_governmentID <- readRDS("data_from_dbsite.RDS") %>% drop_na(census_id) %>% 
   filter(category == "General Purpose") %>% 
   rename(government_ID = census_id)
 
@@ -98,17 +97,17 @@ acfrs_governmentID_14char <- acfrs_governmentID %>%
 
 acfrs_governmentID_1314_char <- rbind(acfrs_governmentID_13char, acfrs_governmentID_14char)
 
-#Joining 1
+#Joining method 1
 
 acfrs_city_pop <- acfrs_governmentID %>% 
-  left_join(pop_governmentID) %>% drop_na(population) %>% 
+  left_join(pop_governmentID) %>% #drop_na(population) %>% 
   select(-c(state, STATE_AB, year)) %>% 
   arrange(desc(population)) 
 
-# joining 2: add 0 to those with 13 chars --> # once added extra 0, have more 695 matches 
+# joining method 2: add 0 to those with 13 chars --> # once added extra 0, have more 695 matches 
 
 acfrs_city_pop_added_char <- acfrs_governmentID_1314_char %>% 
-  left_join(pop_governmentID) %>% drop_na(population) %>% 
+  left_join(pop_governmentID) %>% #drop_na(population) %>% 
   select(-c(state, STATE_AB, year)) %>% 
   arrange(desc(population)) 
 
@@ -119,6 +118,9 @@ acfrs_city_pop %>%
 # Now got matched
 acfrs_city_pop_added_char %>% 
  filter(grepl("Los Angeles", name)) 
+
+#10412 cities, of which 4112 do not have population data
+#acfrs_city_pop_added_char %>% filter(is.na(population))
 ```
 
 ```{r}
@@ -130,43 +132,16 @@ saveRDS(acfrs_city_pop_added_char, "acfrs_city_pop_added_char.RDS")
 ```{r}
 
 # cities with population over 100,000 
-cities_100Kpop_NOT_acfrs <- pop %>% 
-  filter(population >= 100000) %>% 
-  #filter(!geo_id %in% acfrs_city_pop$geo_id) %>% 
-  
-  # adding 0 to those with 13 char in government_ID in ACFRs get more matches --> now only 33 cities > 100k in pop but NOT in acfrs
-  filter(!geo_id %in% acfrs_city_pop_added_char$geo_id) %>% 
-  arrange(desc(population)) %>% distinct()
+# cities_100Kpop_NOT_acfrs <- pop %>% 
+#   filter(population >= 100000) %>% 
+#   #filter(!geo_id %in% acfrs_city_pop$geo_id) %>% 
+#   
+#   # adding 0 to those with 13 char in government_ID in ACFRs get more matches --> now only 33 cities > 100k in pop but NOT in acfrs
+#   filter(!geo_id %in% acfrs_city_pop_added_char$geo_id) %>% 
+#   arrange(desc(population)) %>% distinct()
 
 #write.csv(cities_100Kpop_NOT_acfrs, "cities_100Kpop_NOT_acfrs.csv")
 
 ```
 
 
-
-# Some special cases
-
-```{r}
-  
-#Counties that are also Cities:
-#San Francisco, CA
-#Jacksonville, FL and Dupage County, FL
-#Nashville, TN and Davidson County, TN
-# Check Nashville
-acfrs %>% filter(state == "TN") %>% 
-  filter(grepl("Nashville", name)) %>% select(state, name, total_liabilities, revenues) %>% 
-  filter(name == "Nashville-Davidson County")
-```
-
-```{r}
-pop %>% filter(state.abb == "TN") %>% 
-  filter(grepl("Nashville", city_town))
-
-
-# Combined city/ city government: Jacksonville city, FL  = Duval County, FL
-jacksonville_city_fl <- acfrs_city_pop_added_char %>% 
-  filter(state.abb == "FL" & name == "Jacksonville") 
-
-
-```
-
@@ -16,16 +16,16 @@ library(dplyr)
 # Census Population - County Level 
 
 ```{r}
-d <- rio::import(here::here("data", "DECENNIALPL2020.P1_data_with_overlays_2021-12-16T123049.csv"), skip = 1) %>% 
+d_county <- rio::import(here::here("data", "DECENNIALPL2020.P1_data_with_overlays_2021-12-16T123049.csv"), skip = 1) %>% 
   select(id, `Geographic Area Name`, `!!Total:`) %>% 
   rename(population = `!!Total:`)
 
-# join with df_state to get state.abb and state.name --> to have a common col with acfrs to join
+# join with df_state to get state.abb and state.name --> to have a common col to join with acfrs
 df_state <- data.frame(state.abb, state.name) %>% 
   add_row(state.abb = "PR", state.name = "Puerto Rico") %>% 
   add_row(state.abb = "DC", state.name = "District of Columbia")
 
-pop <- d %>% 
+pop_county <- d_county %>% 
   separate(`Geographic Area Name`, c("county", "state.name"), sep = ",")  %>%  
   mutate(state.name = str_trim(state.name)) %>% 
   left_join(df_state) %>% 
@@ -34,10 +34,10 @@ pop <- d %>%
   mutate(county = str_to_lower(county),
          county = str_remove(county,"\\.|'|‘")) 
 
-  pop %>% 
+## Special cases
+  pop_county %>% 
   filter(state.abb == "TN" | state.abb == "KY") %>% 
   filter(str_detect(county, "davidson|jefferson")) #jefferson county ; davidson county
-## Special cases
 
 # # Nashville-Davidson metropolitan government (balance) == "Nashville-Davidson County" in ACFRS--> rename to match
 # mutate(county = ifelse(county == "nashville-davidson metropolitan government (balance)", "nashville-davidson county", county)) %>%  
@@ -80,13 +80,12 @@ acfrs <- readRDS("data_from_dbsite.RDS") %>%
 ACFRs has `r nrow(acfrs)` observations. 
 
 
-## Find "County" in ACFRs
+## Find "County" or "Municipality" in ACFRs
 ```{r}
 # ACFRs entities that contains the word "County" in their names
   acfrs_county <- acfrs %>% 
   filter(category == "General Purpose") %>% 
-  filter(grepl("county", county))
-
+  filter(grepl("county|municipality", county)) 
 ```
 
 There are `r nrow(acfrs_county)` entities that contain the word "county" in their names. 
@@ -115,15 +114,6 @@ alaska_borough <- acfrs %>%
   filter(category == "General Purpose") %>%  
   filter(state.abb == "AK") %>% 
   filter(grepl("borough", county)) %>% arrange(county)
-
-
-# double check, find any ACFRs entities that has "Municipality" in their names.
-acfrs %>% filter(grepl("municipality", county))
-
-# Found 2: SD Municipality of Dell Rapids & WV Municipality of Parkersburg.
-# However, they're not in Census population data
-pop %>%
-filter(county == "municipality of parkersburg" | county == "municipality of dell rapids" )
           
 ```
 
@@ -143,7 +133,7 @@ Potential risk: Not all Puerto Rico entities in ACFRs without the word "Municipi
 
 ```{r}
 # in Census population, find county that has the word "Municipio" --> remove the word "Municipio" to match with acfrs 
-puertorico_census_pop <- pop %>% 
+puertorico_census_pop <- pop_county %>% 
           filter(state.name == "Puerto Rico") %>% 
   filter(grepl("municipio", county)) %>% 
   mutate(county = str_remove(county, " municipio")) 
@@ -155,15 +145,14 @@ puertorico_census_pop <- pop %>%
           filter(category == "General Purpose") %>% 
           filter(state.abb == "PR") %>% 
           #join Puerto Rico in acfrs with PR in census -
-  left_join(puertorico_census_pop, by = c("state.abb", "county")) %>% 
-          drop_na(population) 
+  left_join(puertorico_census_pop, by = c("state.abb", "county")) 
 ```
 
 # Join ACFRs and Census population data to get population for ACFRs counties 
 
 
 Joining these components: 
-* acfrs entities contain word "County" in their names
+* acfrs entities contain word "County" or "Municipality" in their names
 * acfrs entities of Louisiana that contain word "Parish" in their names
 * Puerto Rico in acfrs and Census. Note that in ACFRs, there's no Puerto Rico entities has "Municipio". To match with PR in Census, need to remove this word from Census population data. This introduces a risk that there might be cases where PR entities in ACFRs without word "Municipio" are NOT actually Municipio.
 
@@ -172,22 +161,18 @@ Joining these components:
 # first, join entities in ACFRs contain words "County" + Louisiana that contain word "Parish" + alaska that contain "Borough" in their names
 acfrs_county_parish_borough <- rbind(acfrs_county, louisiana_parish) %>% 
                               rbind(alaska_borough) 
-                              
 
 # next, join with census Census population by = c("state.abb", "county")
 county_pop_census_acfrs <- acfrs_county_parish_borough %>% 
-            left_join(pop, by = c("state.abb", "county")) %>% 
-            drop_na(population) %>% 
+            left_join(pop_county, by = c("state.abb", "county")) %>% 
             
 # third, bind with puertorico 
            rbind(puertorico_afrs_census_pop) %>% 
             arrange(desc(population)) %>% distinct() 
 
-# PROBLEM: Need to explore more why others do not get matched
-setdiff(acfrs_county_parish_borough$county, county_pop_census_acfrs$county)
+# --> 2503 county level entities, of which 255 do not have population data
+#county_pop_census_acfrs %>% filter(is.na(population)) 
 
-acfrs_county_parish_borough %>% 
-            left_join(pop, by = c("state.abb", "county")) %>% filter(is.na(population)) 
 ```
 
 
@@ -197,7 +182,7 @@ county_pop_census_acfrs %>%
   filter(state.abb == "KY" | state.abb == "TN") %>% 
   filter(str_detect(county, "davidson|jefferson")) %>% select(state.abb, population, total_liabilities, id.y, county)
   
-write.csv(county_pop_census_acfrs, "county_pop_census_acfrs.csv")
+#write.csv(county_pop_census_acfrs, "county_pop_census_acfrs.csv")
 saveRDS(county_pop_census_acfrs, "county_pop_census_acfrs.RDS")
 
 ```
@@ -210,41 +195,41 @@ The matched dataset has `r nrow(county_pop_census_acfrs)` observations/ counties
 ```{r}
 # stoplist
 
-stoplist1 <-  pop %>% 
-  filter(state.name == "New York" & county %in% c("Bronx County", "Richmond County", "Kings County", "Queens County", "New York County"))
-  
-stoplist2 <- pop %>% 
-  filter(state.name == "Florida" & county == "Duval County") # FL Duval County's government is combined with FL Jacksonville,
-
-stoplist3 <- pop %>% 
-    filter(state.name == "Tennessee" & county == "Davidson County") 
-    
-stoplist4 <-  pop %>% 
-  filter(state.name == "Kentucky" & county == "Jefferson County")
-
-stoplist5 <- pop %>% 
-    filter(state.name == "Ohio" & county == "Franklin County")
-  
-stoplist <- rbind(stoplist1, stoplist2, stoplist3, stoplist4, stoplist5)
-
-# get counties in Census population data that have > 100k pop, county level, not Connecticut, not in stoplist
-
-census_pop_100k <- pop %>% 
-  # counties with > 100k  pop in Census
-  filter(population > 100000) %>% 
-  
-  # only get county level 
-  filter(grepl("County|Parish", county)) %>%  # how about |Borough?
-  filter(state.name != "Connecticut") %>% 
-  filter(!id %in% stoplist$id) 
+# stoplist1 <-  pop %>% 
+#   filter(state.name == "New York" & county %in% c("Bronx County", "Richmond County", "Kings County", "Queens County", "New York County"))
+#   
+# stoplist2 <- pop %>% 
+#   filter(state.name == "Florida" & county == "Duval County") # FL Duval County's government is combined with FL Jacksonville,
+# 
+# stoplist3 <- pop %>% 
+#     filter(state.name == "Tennessee" & county == "Davidson County") 
+#     
+# stoplist4 <-  pop %>% 
+#   filter(state.name == "Kentucky" & county == "Jefferson County")
+# 
+# stoplist5 <- pop %>% 
+#     filter(state.name == "Ohio" & county == "Franklin County")
+#   
+# stoplist <- rbind(stoplist1, stoplist2, stoplist3, stoplist4, stoplist5)
+# 
+# # get counties in Census population data that have > 100k pop, county level, not Connecticut, not in stoplist
+# 
+# census_pop_100k <- pop %>% 
+#   # counties with > 100k  pop in Census
+#   filter(population > 100000) %>% 
+#   
+#   # only get county level 
+#   filter(grepl("County|Parish", county)) %>%  # how about |Borough?
+#   filter(state.name != "Connecticut") %>% 
+#   filter(!id %in% stoplist$id) 
 
 ```
 
 ## Counties in Census with > 100k population above that are not yet matched with ACFRs
 
 ```{r}
-census_pop_NOT_match_acfrs_100k <- census_pop_100k %>% 
-  filter(!id %in% county_pop_census_acfrs$id ) %>% arrange(desc(population)) %>% distinct()
+# census_pop_NOT_match_acfrs_100k <- census_pop_100k %>% 
+#   filter(!id %in% county_pop_census_acfrs$id ) %>% arrange(desc(population)) %>% distinct()
 #write.csv(census_pop_NOT_match_acfrs_100k, "census_pop_NOT_match_acfrs_100k.csv")
 ```
 
@@ -255,13 +240,13 @@ There are `r nrow(census_pop_NOT_match_acfrs_100k)` counties in Census with more
 county_pop_census_acfrs
 
 ```{r}
-census_pop_NOT_match_acfrs_all <- pop %>% 
-
-  # only get county level 
-  filter(grepl("County|Parish", county)) %>%  # how about |Borough?
-  filter(state.name != "Connecticut") %>%
-  filter(!id %in% stoplist$id) %>% 
-  filter(!id %in% county_pop_census_acfrs$id) %>% arrange(desc(population))
+# census_pop_NOT_match_acfrs_all <- pop %>% 
+# 
+#   # only get county level 
+#   filter(grepl("County|Parish", county)) %>%  # how about |Borough?
+#   filter(state.name != "Connecticut") %>%
+#   filter(!id %in% stoplist$id) %>% 
+#   filter(!id %in% county_pop_census_acfrs$id) %>% arrange(desc(population))
 
 #write.csv(census_pop_NOT_match_acfrs_all, "census_pop_NOT_match_acfrs_all.csv")
 ```