|
| 1 | +#' Aligns data by events |
| 2 | +#' |
| 3 | +#' Uses a vectory specifying whether data falls into an event to reshape data, aligning by the onset of the event |
| 4 | +#' |
| 5 | +#' @param df A data frame containing all data continuously along time, required columns: \code{"site", "date"}. |
| 6 | +#' @param events A data frame with columns \code{idx_start} and \code{len}, specifying event start and length, referring to the row index of \code{df}. |
| 7 | +#' \code{events} is the output of a function call to \code{get_consecutive}. |
| 8 | +#' @param dovars A vector of character strings specifying which columns (by column name) of \code{df} to re-arrange. |
| 9 | +#' @param leng_threshold An integer specifying the minum number of consecutive dates required to define an event. |
| 10 | +#' All events of length lower than \code{leng_threshold} are dropped. |
| 11 | +#' @param before An integer specifying the number of days before the event onset to be retained in re-arranged data |
| 12 | +#' @param after An integer specifying the number of days after the event onset to be retained in re-arranged data |
| 13 | +#' @param do_norm A logical specifying whether re-arranged data is to be normalised by the median value of the bin |
| 14 | +#' (number of bins given by argument \code{nbins}) before the event onset, given by argument \code{normbin}. Defaults to \code{FALSE}. |
| 15 | +#' @param nbins An integer, specifying the number of bins used to determine median values before event onset. Only used when code{do_norm=TRUE}. Defaults to 6. |
| 16 | +#' @param normbin An integer, specifying the bin number just before the event onset, used for normalisation. Only used when code{do_norm=TRUE}. Defaults to 2. |
| 17 | +#' |
| 18 | +#' @return A named list of data frames (\code{list( "df_idx_event", "df_idx_event_aggbyidx_event")}) containing data from all events and \code{before + after} |
| 19 | +#' dates (relative to event onset) with additional columns named \code{"inst"}, defining the event number (instance), and \code{"idx_event"}, defining |
| 20 | +#' the date relative to the respective event onset. The data frame \code{"df_idx_event"} contains rearranged, but otherwise unchanged data (unless |
| 21 | +#' \code{do_norm}=TRUE). The data frame \code{"df_idx_event_aggbyidx_event"} containes data aggregated across events with the mean and quantiles given for each |
| 22 | +#' \code{"idx_event"}. |
| 23 | +#' @export |
| 24 | +#' |
| 25 | +#' @examples df_alg <- align_events( df, truefalse, before=30, after=300 ) |
| 26 | +#' |
| 27 | +align_events <- function( |
| 28 | + df, |
| 29 | + events, |
| 30 | + dovars = names(df), |
| 31 | + leng_threshold, |
| 32 | + before, |
| 33 | + after, |
| 34 | + do_norm=FALSE, |
| 35 | + nbins=6, |
| 36 | + normbin=2 |
| 37 | + ){ |
| 38 | + ##-------------------------------------------------------- |
| 39 | + ## Re-arrange data, aligning by beginning of events |
| 40 | + ## Creates data frame where not all rows are retained from df |
| 41 | + ## and columns added for 'idx_event' (number of day relative to onset of event) |
| 42 | + ## and 'iinst' number of event to which row belongs. |
| 43 | + ##-------------------------------------------------------- |
| 44 | + if (nrow(events) > 1){ |
| 45 | + |
| 46 | + df_idx_event <- tibble() |
| 47 | + for ( iinst in 1:nrow(events) ){ |
| 48 | + # idx_event = 0 is beginning of event |
| 49 | + idx_event <- seq( from = -before, |
| 50 | + to = events$len[iinst], |
| 51 | + by = 1 |
| 52 | + ) |
| 53 | + idxs <- idx_event + events$idx_start[iinst] |
| 54 | + |
| 55 | + # avoid negative row indexes (possible consequence of using 'before > 0') |
| 56 | + drophead <- which( idxs < 1 ) |
| 57 | + if (length(drophead) > 0){ |
| 58 | + idxs <- idxs[ -drophead ] |
| 59 | + idx_event <- idx_event[ -drophead ] |
| 60 | + } |
| 61 | + addrows <- df |> |
| 62 | + slice( idxs ) |> |
| 63 | + mutate( idx_event = idx_event, |
| 64 | + inst = iinst |
| 65 | + ) |
| 66 | + df_idx_event <- df_idx_event |> |
| 67 | + bind_rows( addrows ) |
| 68 | + } |
| 69 | + |
| 70 | + ##-------------------------------------------------------- |
| 71 | + ## Normalise re-arranged data relative to a certain bin's median |
| 72 | + ##-------------------------------------------------------- |
| 73 | + if (do_norm){ |
| 74 | + ## Bins for different variables |
| 75 | + bins <- seq( |
| 76 | + from = -before, |
| 77 | + to = after, |
| 78 | + by = (after + before + 1)/nbins ) |
| 79 | + |
| 80 | + ## add bin information based on idx_event to expanded df |
| 81 | + df_idx_event <- df_idx_event |> |
| 82 | + mutate( |
| 83 | + inbin = cut( as.numeric(idx_event), breaks = bins ) |
| 84 | + ) |
| 85 | + |
| 86 | + tmp <- df_idx_event |> |
| 87 | + dplyr::filter(!is.na(inbin)) |> |
| 88 | + group_by( inbin ) |> |
| 89 | + summarise_at( vars(one_of(dovars)), funs(median( ., na.rm=TRUE )) ) |
| 90 | + |
| 91 | + norm <- slice(tmp, normbin) |
| 92 | + |
| 93 | + ## subtract from all values |
| 94 | + df_idx_event <- df_idx_event |> mutate_at( vars(one_of(dovars)), funs(. - norm$.) ) |
| 95 | + |
| 96 | + } |
| 97 | + |
| 98 | + } else { |
| 99 | + |
| 100 | + df_idx_event <- NULL |
| 101 | + |
| 102 | + } |
| 103 | + |
| 104 | + return( df_idx_event ) |
| 105 | + |
| 106 | +} |
| 107 | + |
| 108 | +q33 <- function( vec, ... ){ |
| 109 | + quantile( vec, 0.33, ...) |
| 110 | +} |
| 111 | + |
| 112 | +q66 <- function( vec, ... ){ |
| 113 | + quantile( vec, 0.66, ...) |
| 114 | +} |
| 115 | + |
| 116 | + |
| 117 | + |
0 commit comments