Skip to content

Commit

Permalink
added two functions
Browse files Browse the repository at this point in the history
  • Loading branch information
stineb committed Mar 28, 2024
1 parent bad9741 commit 44b7098
Show file tree
Hide file tree
Showing 3 changed files with 216 additions and 1 deletion.
117 changes: 117 additions & 0 deletions R/align_events.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#' Aligns data by events
#'
#' Uses a vectory specifying whether data falls into an event to reshape data, aligning by the onset of the event
#'
#' @param df A data frame containing all data continuously along time, required columns: \code{"site", "date"}.
#' @param events A data frame with columns \code{idx_start} and \code{len}, specifying event start and length, referring to the row index of \code{df}.
#' \code{events} is the output of a function call to \code{get_consecutive}.
#' @param dovars A vector of character strings specifying which columns (by column name) of \code{df} to re-arrange.
#' @param leng_threshold An integer specifying the minum number of consecutive dates required to define an event.
#' All events of length lower than \code{leng_threshold} are dropped.
#' @param before An integer specifying the number of days before the event onset to be retained in re-arranged data
#' @param after An integer specifying the number of days after the event onset to be retained in re-arranged data
#' @param do_norm A logical specifying whether re-arranged data is to be normalised by the median value of the bin
#' (number of bins given by argument \code{nbins}) before the event onset, given by argument \code{normbin}. Defaults to \code{FALSE}.
#' @param nbins An integer, specifying the number of bins used to determine median values before event onset. Only used when code{do_norm=TRUE}. Defaults to 6.
#' @param normbin An integer, specifying the bin number just before the event onset, used for normalisation. Only used when code{do_norm=TRUE}. Defaults to 2.
#'
#' @return A named list of data frames (\code{list( "df_idx_event", "df_idx_event_aggbyidx_event")}) containing data from all events and \code{before + after}
#' dates (relative to event onset) with additional columns named \code{"inst"}, defining the event number (instance), and \code{"idx_event"}, defining
#' the date relative to the respective event onset. The data frame \code{"df_idx_event"} contains rearranged, but otherwise unchanged data (unless
#' \code{do_norm}=TRUE). The data frame \code{"df_idx_event_aggbyidx_event"} containes data aggregated across events with the mean and quantiles given for each
#' \code{"idx_event"}.
#' @export
#'
#' @examples df_alg <- align_events( df, truefalse, before=30, after=300 )
#'
align_events <- function(
df,
events,
dovars = names(df),
leng_threshold,
before,
after,
do_norm=FALSE,
nbins=6,
normbin=2
){
##--------------------------------------------------------
## Re-arrange data, aligning by beginning of events
## Creates data frame where not all rows are retained from df
## and columns added for 'idx_event' (number of day relative to onset of event)
## and 'iinst' number of event to which row belongs.
##--------------------------------------------------------
if (nrow(events) > 1){

df_idx_event <- tibble()
for ( iinst in 1:nrow(events) ){
# idx_event = 0 is beginning of event
idx_event <- seq( from = -before,
to = events$len[iinst],
by = 1
)
idxs <- idx_event + events$idx_start[iinst]

# avoid negative row indexes (possible consequence of using 'before > 0')
drophead <- which( idxs < 1 )
if (length(drophead) > 0){
idxs <- idxs[ -drophead ]
idx_event <- idx_event[ -drophead ]
}
addrows <- df |>
slice( idxs ) |>
mutate( idx_event = idx_event,
inst = iinst
)
df_idx_event <- df_idx_event |>
bind_rows( addrows )
}

##--------------------------------------------------------
## Normalise re-arranged data relative to a certain bin's median
##--------------------------------------------------------
if (do_norm){
## Bins for different variables
bins <- seq(
from = -before,
to = after,
by = (after + before + 1)/nbins )

## add bin information based on idx_event to expanded df
df_idx_event <- df_idx_event |>
mutate(
inbin = cut( as.numeric(idx_event), breaks = bins )
)

tmp <- df_idx_event |>
dplyr::filter(!is.na(inbin)) |>
group_by( inbin ) |>
summarise_at( vars(one_of(dovars)), funs(median( ., na.rm=TRUE )) )

norm <- slice(tmp, normbin)

## subtract from all values
df_idx_event <- df_idx_event |> mutate_at( vars(one_of(dovars)), funs(. - norm$.) )

}

} else {

df_idx_event <- NULL

}

return( df_idx_event )

}

q33 <- function( vec, ... ){
quantile( vec, 0.33, ...)
}

q66 <- function( vec, ... ){
quantile( vec, 0.66, ...)
}



98 changes: 98 additions & 0 deletions R/get_consecutive.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#' Identify events
#'
#' Identifies events as periods where of consecutively TRUE values in a boolean
#' vector.
#'
#' @param vec A vector of boolean values. Consecutive TRUE vakues designate an event.
#' @param merge_threshold An integer value specifying the threshold of the gap
#' length below in units of time steps which
#' gaps between events are ignored and the two events on either side of the gap
#' are merged into a single events. Defaults to NA (ignored). Is ignored if
#' \code{do_merge=FALSE}
#' @param leng_threshold An integer specifying the minimum length required for
#' creating an event. Defaults to 3.
#' @param do_merge A logical specifying whether to merge events if the gap between
#' them is small (smaller than \code{merge_threshold}).
#'
#' @return A data frame containing information about the start date and length
#' of each detected event
#' @export
#'
get_consecutive <- function(
vec,
merge_threshold = NA,
leng_threshold = 3,
do_merge = FALSE
){

## replace NAs with FALSE (no drought). This is needed because of NAs at head or tail
vec[ which(is.na(vec)) ] <- FALSE

## identifies periods where 'vec' true for consecutive days of length>leng_threshold and
## creates data frame holding each instance's info: start of drought by index
## in 'vec' and length (number of days thereafter)
instances <- data.frame( idx_start=c(), len=c() )
consecutive_vec <- rep( NA, length( vec ) )
nvec <- 0
ninst <- 0
for ( idx in 1:length( vec ) ){
if (vec[idx]){
nvec <- nvec + 1
} else {
if (nvec >= leng_threshold) {
## create instance
ninst <- ninst + 1
addrow <- data.frame( idx_start = idx-(nvec), len = nvec )
instances <- rbind( instances, addrow )
}
nvec <- 0
}
consecutive_vec[idx] <- nvec
}
if (nvec > leng_threshold){
## create a last instance if the last vec period extends to the end of the time series
ninst <- ninst + 1
addrow <- data.frame( idx_start=idx-(nvec), len=nvec )
instances <- rbind( instances, addrow )
}

# get info about gap between events
instances <- instances |>
mutate(gap_before = idx_start - (lag(idx_start) + lag(len)))

if (nrow(instances) > 0){
if (do_merge && nrow(instances) > 1){

instances_merged <- instances[1,]
idx_merged <- 1
for (idx in 2:nrow(instances)){
if (instances$gap_before[idx] > merge_threshold){

# create new sequence
idx_merged <- idx_merged + 1
instances_merged <- bind_rows(
instances_merged,
instances[idx,]
)

# edit length of previously recorded instance
instances_merged$len[idx_merged - 1] <- instances$idx_start[idx - 1] + instances$len[idx - 1] - instances_merged$idx_start[idx_merged - 1]
}
}

# if all is merged until the end
instances_merged$len[idx_merged] <- instances$idx_start[idx] + instances$len[idx] - instances_merged$idx_start[idx_merged]

instances <- instances_merged[,c("idx_start", "len")]
} else {
instances <- instances[,c("idx_start", "len")]
if (nrow(instances) == 1){
if (instances$idx_start == 0)
instances$idx_start <- 1
}
}

}

return( instances )
}
2 changes: 1 addition & 1 deletion vignettes/event_detection.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ knitr::opts_chunk$set(echo = TRUE)
```{r message=FALSE}
library(dplyr)
library(ggplot2)
library(GECOr)
library(rgeco)
library(here)
```

Expand Down

0 comments on commit 44b7098

Please sign in to comment.