generated from geco-bern/R_proj_template
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
216 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
#' Aligns data by events | ||
#' | ||
#' Uses a vectory specifying whether data falls into an event to reshape data, aligning by the onset of the event | ||
#' | ||
#' @param df A data frame containing all data continuously along time, required columns: \code{"site", "date"}. | ||
#' @param events A data frame with columns \code{idx_start} and \code{len}, specifying event start and length, referring to the row index of \code{df}. | ||
#' \code{events} is the output of a function call to \code{get_consecutive}. | ||
#' @param dovars A vector of character strings specifying which columns (by column name) of \code{df} to re-arrange. | ||
#' @param leng_threshold An integer specifying the minum number of consecutive dates required to define an event. | ||
#' All events of length lower than \code{leng_threshold} are dropped. | ||
#' @param before An integer specifying the number of days before the event onset to be retained in re-arranged data | ||
#' @param after An integer specifying the number of days after the event onset to be retained in re-arranged data | ||
#' @param do_norm A logical specifying whether re-arranged data is to be normalised by the median value of the bin | ||
#' (number of bins given by argument \code{nbins}) before the event onset, given by argument \code{normbin}. Defaults to \code{FALSE}. | ||
#' @param nbins An integer, specifying the number of bins used to determine median values before event onset. Only used when code{do_norm=TRUE}. Defaults to 6. | ||
#' @param normbin An integer, specifying the bin number just before the event onset, used for normalisation. Only used when code{do_norm=TRUE}. Defaults to 2. | ||
#' | ||
#' @return A named list of data frames (\code{list( "df_idx_event", "df_idx_event_aggbyidx_event")}) containing data from all events and \code{before + after} | ||
#' dates (relative to event onset) with additional columns named \code{"inst"}, defining the event number (instance), and \code{"idx_event"}, defining | ||
#' the date relative to the respective event onset. The data frame \code{"df_idx_event"} contains rearranged, but otherwise unchanged data (unless | ||
#' \code{do_norm}=TRUE). The data frame \code{"df_idx_event_aggbyidx_event"} containes data aggregated across events with the mean and quantiles given for each | ||
#' \code{"idx_event"}. | ||
#' @export | ||
#' | ||
#' @examples df_alg <- align_events( df, truefalse, before=30, after=300 ) | ||
#' | ||
align_events <- function( | ||
df, | ||
events, | ||
dovars = names(df), | ||
leng_threshold, | ||
before, | ||
after, | ||
do_norm=FALSE, | ||
nbins=6, | ||
normbin=2 | ||
){ | ||
##-------------------------------------------------------- | ||
## Re-arrange data, aligning by beginning of events | ||
## Creates data frame where not all rows are retained from df | ||
## and columns added for 'idx_event' (number of day relative to onset of event) | ||
## and 'iinst' number of event to which row belongs. | ||
##-------------------------------------------------------- | ||
if (nrow(events) > 1){ | ||
|
||
df_idx_event <- tibble() | ||
for ( iinst in 1:nrow(events) ){ | ||
# idx_event = 0 is beginning of event | ||
idx_event <- seq( from = -before, | ||
to = events$len[iinst], | ||
by = 1 | ||
) | ||
idxs <- idx_event + events$idx_start[iinst] | ||
|
||
# avoid negative row indexes (possible consequence of using 'before > 0') | ||
drophead <- which( idxs < 1 ) | ||
if (length(drophead) > 0){ | ||
idxs <- idxs[ -drophead ] | ||
idx_event <- idx_event[ -drophead ] | ||
} | ||
addrows <- df |> | ||
slice( idxs ) |> | ||
mutate( idx_event = idx_event, | ||
inst = iinst | ||
) | ||
df_idx_event <- df_idx_event |> | ||
bind_rows( addrows ) | ||
} | ||
|
||
##-------------------------------------------------------- | ||
## Normalise re-arranged data relative to a certain bin's median | ||
##-------------------------------------------------------- | ||
if (do_norm){ | ||
## Bins for different variables | ||
bins <- seq( | ||
from = -before, | ||
to = after, | ||
by = (after + before + 1)/nbins ) | ||
|
||
## add bin information based on idx_event to expanded df | ||
df_idx_event <- df_idx_event |> | ||
mutate( | ||
inbin = cut( as.numeric(idx_event), breaks = bins ) | ||
) | ||
|
||
tmp <- df_idx_event |> | ||
dplyr::filter(!is.na(inbin)) |> | ||
group_by( inbin ) |> | ||
summarise_at( vars(one_of(dovars)), funs(median( ., na.rm=TRUE )) ) | ||
|
||
norm <- slice(tmp, normbin) | ||
|
||
## subtract from all values | ||
df_idx_event <- df_idx_event |> mutate_at( vars(one_of(dovars)), funs(. - norm$.) ) | ||
|
||
} | ||
|
||
} else { | ||
|
||
df_idx_event <- NULL | ||
|
||
} | ||
|
||
return( df_idx_event ) | ||
|
||
} | ||
|
||
q33 <- function( vec, ... ){ | ||
quantile( vec, 0.33, ...) | ||
} | ||
|
||
q66 <- function( vec, ... ){ | ||
quantile( vec, 0.66, ...) | ||
} | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
#' Identify events | ||
#' | ||
#' Identifies events as periods where of consecutively TRUE values in a boolean | ||
#' vector. | ||
#' | ||
#' @param vec A vector of boolean values. Consecutive TRUE vakues designate an event. | ||
#' @param merge_threshold An integer value specifying the threshold of the gap | ||
#' length below in units of time steps which | ||
#' gaps between events are ignored and the two events on either side of the gap | ||
#' are merged into a single events. Defaults to NA (ignored). Is ignored if | ||
#' \code{do_merge=FALSE} | ||
#' @param leng_threshold An integer specifying the minimum length required for | ||
#' creating an event. Defaults to 3. | ||
#' @param do_merge A logical specifying whether to merge events if the gap between | ||
#' them is small (smaller than \code{merge_threshold}). | ||
#' | ||
#' @return A data frame containing information about the start date and length | ||
#' of each detected event | ||
#' @export | ||
#' | ||
get_consecutive <- function( | ||
vec, | ||
merge_threshold = NA, | ||
leng_threshold = 3, | ||
do_merge = FALSE | ||
){ | ||
|
||
## replace NAs with FALSE (no drought). This is needed because of NAs at head or tail | ||
vec[ which(is.na(vec)) ] <- FALSE | ||
|
||
## identifies periods where 'vec' true for consecutive days of length>leng_threshold and | ||
## creates data frame holding each instance's info: start of drought by index | ||
## in 'vec' and length (number of days thereafter) | ||
instances <- data.frame( idx_start=c(), len=c() ) | ||
consecutive_vec <- rep( NA, length( vec ) ) | ||
nvec <- 0 | ||
ninst <- 0 | ||
for ( idx in 1:length( vec ) ){ | ||
if (vec[idx]){ | ||
nvec <- nvec + 1 | ||
} else { | ||
if (nvec >= leng_threshold) { | ||
## create instance | ||
ninst <- ninst + 1 | ||
addrow <- data.frame( idx_start = idx-(nvec), len = nvec ) | ||
instances <- rbind( instances, addrow ) | ||
} | ||
nvec <- 0 | ||
} | ||
consecutive_vec[idx] <- nvec | ||
} | ||
if (nvec > leng_threshold){ | ||
## create a last instance if the last vec period extends to the end of the time series | ||
ninst <- ninst + 1 | ||
addrow <- data.frame( idx_start=idx-(nvec), len=nvec ) | ||
instances <- rbind( instances, addrow ) | ||
} | ||
|
||
# get info about gap between events | ||
instances <- instances |> | ||
mutate(gap_before = idx_start - (lag(idx_start) + lag(len))) | ||
|
||
if (nrow(instances) > 0){ | ||
if (do_merge && nrow(instances) > 1){ | ||
|
||
instances_merged <- instances[1,] | ||
idx_merged <- 1 | ||
for (idx in 2:nrow(instances)){ | ||
if (instances$gap_before[idx] > merge_threshold){ | ||
|
||
# create new sequence | ||
idx_merged <- idx_merged + 1 | ||
instances_merged <- bind_rows( | ||
instances_merged, | ||
instances[idx,] | ||
) | ||
|
||
# edit length of previously recorded instance | ||
instances_merged$len[idx_merged - 1] <- instances$idx_start[idx - 1] + instances$len[idx - 1] - instances_merged$idx_start[idx_merged - 1] | ||
} | ||
} | ||
|
||
# if all is merged until the end | ||
instances_merged$len[idx_merged] <- instances$idx_start[idx] + instances$len[idx] - instances_merged$idx_start[idx_merged] | ||
|
||
instances <- instances_merged[,c("idx_start", "len")] | ||
} else { | ||
instances <- instances[,c("idx_start", "len")] | ||
if (nrow(instances) == 1){ | ||
if (instances$idx_start == 0) | ||
instances$idx_start <- 1 | ||
} | ||
} | ||
|
||
} | ||
|
||
return( instances ) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters