Skip to content

Commit 44b7098

Browse files
committed
added two functions
1 parent bad9741 commit 44b7098

File tree

3 files changed

+216
-1
lines changed

3 files changed

+216
-1
lines changed

R/align_events.R

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
#' Aligns data by events
2+
#'
3+
#' Uses a vectory specifying whether data falls into an event to reshape data, aligning by the onset of the event
4+
#'
5+
#' @param df A data frame containing all data continuously along time, required columns: \code{"site", "date"}.
6+
#' @param events A data frame with columns \code{idx_start} and \code{len}, specifying event start and length, referring to the row index of \code{df}.
7+
#' \code{events} is the output of a function call to \code{get_consecutive}.
8+
#' @param dovars A vector of character strings specifying which columns (by column name) of \code{df} to re-arrange.
9+
#' @param leng_threshold An integer specifying the minum number of consecutive dates required to define an event.
10+
#' All events of length lower than \code{leng_threshold} are dropped.
11+
#' @param before An integer specifying the number of days before the event onset to be retained in re-arranged data
12+
#' @param after An integer specifying the number of days after the event onset to be retained in re-arranged data
13+
#' @param do_norm A logical specifying whether re-arranged data is to be normalised by the median value of the bin
14+
#' (number of bins given by argument \code{nbins}) before the event onset, given by argument \code{normbin}. Defaults to \code{FALSE}.
15+
#' @param nbins An integer, specifying the number of bins used to determine median values before event onset. Only used when code{do_norm=TRUE}. Defaults to 6.
16+
#' @param normbin An integer, specifying the bin number just before the event onset, used for normalisation. Only used when code{do_norm=TRUE}. Defaults to 2.
17+
#'
18+
#' @return A named list of data frames (\code{list( "df_idx_event", "df_idx_event_aggbyidx_event")}) containing data from all events and \code{before + after}
19+
#' dates (relative to event onset) with additional columns named \code{"inst"}, defining the event number (instance), and \code{"idx_event"}, defining
20+
#' the date relative to the respective event onset. The data frame \code{"df_idx_event"} contains rearranged, but otherwise unchanged data (unless
21+
#' \code{do_norm}=TRUE). The data frame \code{"df_idx_event_aggbyidx_event"} containes data aggregated across events with the mean and quantiles given for each
22+
#' \code{"idx_event"}.
23+
#' @export
24+
#'
25+
#' @examples df_alg <- align_events( df, truefalse, before=30, after=300 )
26+
#'
27+
align_events <- function(
28+
df,
29+
events,
30+
dovars = names(df),
31+
leng_threshold,
32+
before,
33+
after,
34+
do_norm=FALSE,
35+
nbins=6,
36+
normbin=2
37+
){
38+
##--------------------------------------------------------
39+
## Re-arrange data, aligning by beginning of events
40+
## Creates data frame where not all rows are retained from df
41+
## and columns added for 'idx_event' (number of day relative to onset of event)
42+
## and 'iinst' number of event to which row belongs.
43+
##--------------------------------------------------------
44+
if (nrow(events) > 1){
45+
46+
df_idx_event <- tibble()
47+
for ( iinst in 1:nrow(events) ){
48+
# idx_event = 0 is beginning of event
49+
idx_event <- seq( from = -before,
50+
to = events$len[iinst],
51+
by = 1
52+
)
53+
idxs <- idx_event + events$idx_start[iinst]
54+
55+
# avoid negative row indexes (possible consequence of using 'before > 0')
56+
drophead <- which( idxs < 1 )
57+
if (length(drophead) > 0){
58+
idxs <- idxs[ -drophead ]
59+
idx_event <- idx_event[ -drophead ]
60+
}
61+
addrows <- df |>
62+
slice( idxs ) |>
63+
mutate( idx_event = idx_event,
64+
inst = iinst
65+
)
66+
df_idx_event <- df_idx_event |>
67+
bind_rows( addrows )
68+
}
69+
70+
##--------------------------------------------------------
71+
## Normalise re-arranged data relative to a certain bin's median
72+
##--------------------------------------------------------
73+
if (do_norm){
74+
## Bins for different variables
75+
bins <- seq(
76+
from = -before,
77+
to = after,
78+
by = (after + before + 1)/nbins )
79+
80+
## add bin information based on idx_event to expanded df
81+
df_idx_event <- df_idx_event |>
82+
mutate(
83+
inbin = cut( as.numeric(idx_event), breaks = bins )
84+
)
85+
86+
tmp <- df_idx_event |>
87+
dplyr::filter(!is.na(inbin)) |>
88+
group_by( inbin ) |>
89+
summarise_at( vars(one_of(dovars)), funs(median( ., na.rm=TRUE )) )
90+
91+
norm <- slice(tmp, normbin)
92+
93+
## subtract from all values
94+
df_idx_event <- df_idx_event |> mutate_at( vars(one_of(dovars)), funs(. - norm$.) )
95+
96+
}
97+
98+
} else {
99+
100+
df_idx_event <- NULL
101+
102+
}
103+
104+
return( df_idx_event )
105+
106+
}
107+
108+
q33 <- function( vec, ... ){
109+
quantile( vec, 0.33, ...)
110+
}
111+
112+
q66 <- function( vec, ... ){
113+
quantile( vec, 0.66, ...)
114+
}
115+
116+
117+

R/get_consecutive.R

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#' Identify events
2+
#'
3+
#' Identifies events as periods where of consecutively TRUE values in a boolean
4+
#' vector.
5+
#'
6+
#' @param vec A vector of boolean values. Consecutive TRUE vakues designate an event.
7+
#' @param merge_threshold An integer value specifying the threshold of the gap
8+
#' length below in units of time steps which
9+
#' gaps between events are ignored and the two events on either side of the gap
10+
#' are merged into a single events. Defaults to NA (ignored). Is ignored if
11+
#' \code{do_merge=FALSE}
12+
#' @param leng_threshold An integer specifying the minimum length required for
13+
#' creating an event. Defaults to 3.
14+
#' @param do_merge A logical specifying whether to merge events if the gap between
15+
#' them is small (smaller than \code{merge_threshold}).
16+
#'
17+
#' @return A data frame containing information about the start date and length
18+
#' of each detected event
19+
#' @export
20+
#'
21+
get_consecutive <- function(
22+
vec,
23+
merge_threshold = NA,
24+
leng_threshold = 3,
25+
do_merge = FALSE
26+
){
27+
28+
## replace NAs with FALSE (no drought). This is needed because of NAs at head or tail
29+
vec[ which(is.na(vec)) ] <- FALSE
30+
31+
## identifies periods where 'vec' true for consecutive days of length>leng_threshold and
32+
## creates data frame holding each instance's info: start of drought by index
33+
## in 'vec' and length (number of days thereafter)
34+
instances <- data.frame( idx_start=c(), len=c() )
35+
consecutive_vec <- rep( NA, length( vec ) )
36+
nvec <- 0
37+
ninst <- 0
38+
for ( idx in 1:length( vec ) ){
39+
if (vec[idx]){
40+
nvec <- nvec + 1
41+
} else {
42+
if (nvec >= leng_threshold) {
43+
## create instance
44+
ninst <- ninst + 1
45+
addrow <- data.frame( idx_start = idx-(nvec), len = nvec )
46+
instances <- rbind( instances, addrow )
47+
}
48+
nvec <- 0
49+
}
50+
consecutive_vec[idx] <- nvec
51+
}
52+
if (nvec > leng_threshold){
53+
## create a last instance if the last vec period extends to the end of the time series
54+
ninst <- ninst + 1
55+
addrow <- data.frame( idx_start=idx-(nvec), len=nvec )
56+
instances <- rbind( instances, addrow )
57+
}
58+
59+
# get info about gap between events
60+
instances <- instances |>
61+
mutate(gap_before = idx_start - (lag(idx_start) + lag(len)))
62+
63+
if (nrow(instances) > 0){
64+
if (do_merge && nrow(instances) > 1){
65+
66+
instances_merged <- instances[1,]
67+
idx_merged <- 1
68+
for (idx in 2:nrow(instances)){
69+
if (instances$gap_before[idx] > merge_threshold){
70+
71+
# create new sequence
72+
idx_merged <- idx_merged + 1
73+
instances_merged <- bind_rows(
74+
instances_merged,
75+
instances[idx,]
76+
)
77+
78+
# edit length of previously recorded instance
79+
instances_merged$len[idx_merged - 1] <- instances$idx_start[idx - 1] + instances$len[idx - 1] - instances_merged$idx_start[idx_merged - 1]
80+
}
81+
}
82+
83+
# if all is merged until the end
84+
instances_merged$len[idx_merged] <- instances$idx_start[idx] + instances$len[idx] - instances_merged$idx_start[idx_merged]
85+
86+
instances <- instances_merged[,c("idx_start", "len")]
87+
} else {
88+
instances <- instances[,c("idx_start", "len")]
89+
if (nrow(instances) == 1){
90+
if (instances$idx_start == 0)
91+
instances$idx_start <- 1
92+
}
93+
}
94+
95+
}
96+
97+
return( instances )
98+
}

vignettes/event_detection.Rmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ knitr::opts_chunk$set(echo = TRUE)
1616
```{r message=FALSE}
1717
library(dplyr)
1818
library(ggplot2)
19-
library(GECOr)
19+
library(rgeco)
2020
library(here)
2121
```
2222

0 commit comments

Comments
 (0)