-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathveg_cover_by_date.R
executable file
·231 lines (220 loc) · 10.5 KB
/
veg_cover_by_date.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#' ---
#' title: "Effect of sampling date on cover estimate"
#' author: "Beau Larkin"
#' date: "2021-12-06"
#' output:
#' github_document:
#' toc: true
#' toc_depth: 2
#' ---
#'
#' # Description
#' This is an addendum to the vegetation sampling guidance report produced in early 2021.
#' The purpose here is to produce some graphics and supporting summaries about how
#' vegetation cover changes over the course of a season, and how we can handle, reduce,
#' or otherwise manage it with surveys at MPG Ranch.
#'
#' # Resources
#'
#' ## Packages, libraries, and functions
#'
#' Packages and multiple data sources must be added to the local environment before knitting
#' this notebook.
#+ install_1,message=FALSE
# Quick-loading resources
packages_needed = c("tidyverse", "knitr", "rjson", "lubridate", "bigrquery", "devtools", "ggmap", "colorspace")
packages_installed = packages_needed %in% rownames(installed.packages())
if (any(!packages_installed))
install.packages(packages_needed[!packages_installed])
for (i in 1:length(packages_needed)) {
library(packages_needed[i], character.only = T)
}
#' ## API keys
#' API keys for data access are pulled from local resources and are not available in the hosted environment. Code not shown here.
#+ BQ_api_keys,echo=FALSE
bq_auth(
path = paste0(getwd(), "/mpg-data-warehouse-api_key-master.json"),
cache = NULL
)
Sys.setenv(BIGQUERY_TEST_PROJECT = "mpg-data-warehouse")
billing <- bq_test_project()
#+ Google_maps_key,echo=FALSE
mapKey <- fromJSON(file = paste0(getwd(), "/R_globalKeys.json"))$mapKey
register_google(key = mapKey)
#' ## Global functions and styles: `theme_bgl`
# Load text file from local working directory
source(paste0(getwd(), "/styles.txt"))
#' # Data
#' ## Survey metadata
#+ survey_metadata,echo=TRUE
meta_sql <-
"
SELECT *
FROM `mpg-data-warehouse.vegetation_point_intercept_gridVeg.gridVeg_survey_metadata`
"
meta_bq <- bq_project_query(billing, meta_sql)
meta_tb <- bq_table_download(meta_bq)
meta_df <- as.data.frame(meta_tb)
#' ## Grid point metadata
#+ gpmeta,echo=TRUE
gp_meta_sql <-
"
SELECT *
FROM `mpg-data-warehouse.grid_point_summaries.location_position_classification`
"
gp_meta_bq <- bq_project_query(billing, gp_meta_sql)
gp_meta_tb <- bq_table_download(gp_meta_bq)
gp_meta_df <- as.data.frame(gp_meta_tb)
#' ## Any-hit plant cover data
#' Plant species and cover data from point-intercept surveys in 2011-12, 2016, and 2021.
#' Plant data are joined with survey metadata to filter the data to the survey periods with the
#' greatest effort (2011-12, 2016, and 2021). Data are simplified and summarized to show sums of
#' cover in plant functional groups at each date of annual surveys.
#+ plant_cover_data,echo=TRUE
cvr_sql <-
"
SELECT *
FROM `mpg-data-warehouse.vegetation_gridVeg_summaries.gridVeg_foliar_cover_all`
WHERE type3_vegetation_indicators = 'uncultivated grassland native or degraded'
"
cvr_bq <- bq_project_query(billing, cvr_sql)
cvr_tb <- bq_table_download(cvr_bq)
cvr_df <-
as.data.frame(cvr_tb) %>%
filter(survey_sequence %in% c("2011-12", "2016", "2021"),
plant_native_status %in% c("native", "nonnative"),
plant_life_cycle %in% c("annual", "perennial"),
plant_life_form %in% c("forb", "graminoid")) %>%
select(survey_ID, survey_sequence, grid_point, plant_name_common, plant_native_status, plant_life_cycle, plant_life_form, intercepts_pct) %>%
left_join(meta_df %>% select(survey_ID, date), by = "survey_ID") %>%
mutate(doy = yday(date)) %>%
group_by(grid_point, survey_sequence, doy, plant_native_status, plant_life_cycle, plant_life_form) %>%
summarize(cvr_pct = sum(intercepts_pct), .groups = "drop")
#' ## Top-hit plant cover data
#+ plant_top_cover,echo=TRUE
top_sql <-
"
SELECT *
FROM `mpg-data-warehouse.vegetation_gridVeg_summaries.gridVeg_foliar_cover_top`
"
top_bq <- bq_project_query(billing, top_sql)
top_tb <- bq_table_download(top_bq)
top_df <- as.data.frame(top_tb)
#+ plant_spe_metadata,echo=TRUE
spe_meta_sql <-
"
SELECT key_plant_species, key_plant_code, plant_native_status, plant_life_cycle, plant_life_form, plant_name_sci, plant_name_common
FROM `mpg-data-warehouse.vegetation_species_metadata.vegetation_species_metadata`
"
spe_meta_bq <- bq_project_query(billing, spe_meta_sql)
spe_meta_tb <- bq_table_download(spe_meta_bq)
spe_meta_df <- as.data.frame(spe_meta_tb)
#+ top_cover_data,echo=TRUE
top_cvr_df <- top_df %>%
left_join(spe_meta_df, by = "key_plant_species") %>%
left_join(meta_df %>% select(survey_ID, date), by = "survey_ID") %>%
left_join(gp_meta_df %>% select(grid_point, type3_vegetation_indicators), by = "grid_point") %>%
filter(survey_sequence %in% c("2011-12", "2016", "2021"),
plant_native_status %in% c("native", "nonnative"),
plant_life_cycle %in% c("annual", "perennial"),
plant_life_form %in% c("forb", "graminoid"),
type3_vegetation_indicators == "uncultivated grassland native or degraded") %>%
mutate(doy = yday(date)) %>%
group_by(grid_point, survey_sequence, doy, plant_native_status, plant_life_cycle, plant_life_form) %>%
summarize(top_cvr_pct = sum(top_intercepts_pct), .groups = "drop")
#' I examined point-intercept cover data from 2011-12, 2016, and 2021. These were years with the most extensive
#' survey efforts. I filtered the data to include only points in uncultivated grassland to reduce the noise
#' associated with restoration activities. The removal of uncultivated grasslands probably obscures activitiy
#' of most of the exotic forage grass plantations, however.
#' Very little signal and generally low cover was observed with annual
#' plants (not shown), so they were filtered as well.
#'
#' ## Survey locations
#' The following map shows locations of points that appear at least once in this data set. The table after the map
#' details the number of points included per year.
#+ fig_map,echo=TRUE,message=FALSE
mpgr_map <-
ggmap(
get_googlemap(
center = c(lon = -114.008, lat = 46.700006),
zoom = 13,
scale = 2,
maptype ='terrain')
)
mpgr_map +
geom_point(
data = cvr_df %>%
select(grid_point) %>%
distinct() %>%
left_join(gp_meta_df %>% select(grid_point, lat, long), by = "grid_point"),
aes(x = long, y = lat)
) +
theme_void()
#+ table_points_years,echo=TRUE
cvr_df %>%
distinct(grid_point, survey_sequence) %>%
count(survey_sequence) %>%
kable(format = "pandoc")
#'
#' # Results
#' ## Any-hit cover data
#' Plant cover can vary substantially over a season, but this depends on the group of plants and the year.
#' The 2011-12 season preceded several years of drought, and cover appears generally higher as a result,
#' with native forbs slowly increasing and nonnative forbs strongly increasing throughout the season. Native perennial
#' grasses were abundant in 2011-12, with a unimodal peak in cover near mid-summer. Cover retracted slightly in 2016 for most
#' groups, with native forbs declining from mid-summer on and nonnative forbs holding flat. Perennial grasses
#' in 2016 were smaller in 2016 and showed a briefer peak in abundance near the end of June. Nonnative grasses were flat and
#' similar to 2011-12. 2021 was a shorter season, with much less change in cover over time. Cover was flat
#' for all functional groups except nonnative perennial grasses, which declined sharply. I suspect that this was
#' driven more by a difference in species/locations surveyed that an actual decline in cover.
#'
#' In any case, variation in cover among locations is much greater than variation across the season.
#' A shorter sampling season might be good for a number of reasons, but we shouldn't worry much about
#' using the data we already have.
#+ fig_seasonal_cover,echo=TRUE
cvr_df %>%
filter(plant_life_cycle == "perennial") %>%
ggplot(., aes(x = doy, y = cvr_pct, group = survey_sequence)) +
facet_grid(cols = vars(plant_native_status), rows = vars(plant_life_form), scales = "free_y") +
geom_point(aes(color = as.factor(survey_sequence)), alpha = 0.8) +
geom_smooth(aes(color = as.factor(survey_sequence)), method = "gam", formula = y ~ s(x, bs = "cs"), se = FALSE) +
labs(x = "", y = "Any-hit percent cover of perennials", caption = "Lines produced by GAM smoother with default parameters.") +
scale_color_discrete_qualitative(name = "year", palette = "Harmonic") +
scale_x_continuous(breaks = c(121, 152, 182, 213, 244), labels = c("May", "Jun", "Jul", "Aug","Sep")) +
theme_bgl
#+ fig_average_cover,echo=TRUE
cvr_df %>%
filter(plant_life_cycle == "perennial") %>%
ggplot(., aes(x = survey_sequence, y = cvr_pct)) +
facet_grid(cols = vars(plant_native_status), rows = vars(plant_life_form), scales = "free_y") +
geom_boxplot(fill = "gray95") +
labs(x = "", y = "Any-hit percent cover of perennials") +
theme_bgl
#' The previous analysis used all hits from the point-intercept data. It could be that top-cover would
#' be more responsive to seasonal change, so let's have a look at that here.
#'
#' ## Top-hit cover data
#' Restricting the plant data to top-hit only changes little of the overall pattern, except in most
#' cases to flatten things out over the season. It's possible this is because the largest-statured plants
#' are most likely to be hit in top-cover, and these aren't as responsive to seasonal change. The exception
#' is with perennial native grasses. With any-hit data, the seasonal curve in 2011-12 is pronounced, but
#' it is flat with top-hit data. Possibly 2011-12 was a wetter period and smaller grasses were expanding.
#+ fig_seasonal_top_cover,echo=TRUE
top_cvr_df %>%
filter(plant_life_cycle == "perennial") %>%
ggplot(., aes(x = doy, y = top_cvr_pct, group = survey_sequence)) +
facet_grid(cols = vars(plant_native_status), rows = vars(plant_life_form), scales = "free_y") +
geom_point(aes(color = as.factor(survey_sequence)), alpha = 0.8) +
geom_smooth(aes(color = as.factor(survey_sequence)), method = "gam", formula = y ~ s(x, bs = "cs"), se = FALSE) +
labs(x = "", y = "Top-hit percent cover of perennials", caption = "Lines produced by GAM smoother with default parameters.") +
scale_color_discrete_qualitative(name = "year", palette = "Harmonic") +
scale_x_continuous(breaks = c(121, 152, 182, 213, 244), labels = c("May", "Jun", "Jul", "Aug","Sep")) +
theme_bgl
#+ fig_top_average_cover,echo=TRUE
top_cvr_df %>%
filter(plant_life_cycle == "perennial") %>%
ggplot(., aes(x = survey_sequence, y = top_cvr_pct)) +
facet_grid(cols = vars(plant_native_status), rows = vars(plant_life_form), scales = "free_y") +
geom_boxplot(fill = "gray95") +
labs(x = "", y = "Top-hit percent cover of perennials") +
theme_bgl