improve dtw using a vector-based algorithm

M3nin0 · M3nin0 · commit bf6fd8e8e324 · 2024-03-03T02:14:10.000Z
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -37,6 +37,10 @@ C_kernel_modal <- function(x, ncols, nrows, band, window_size) {
     .Call(`_sits_C_kernel_modal`, x, ncols, nrows, band, window_size)
 }
 
+dtw2vec <- function(x, y) {
+    .Call(`_sits_dtw2vec`, x, y)
+}
+
 dtw <- function() {
     .Call(`_sits_dtw`)
 }
diff --git a/R/sits_som.R b/R/sits_som.R
@@ -54,9 +54,11 @@
 #' @param som_radius     Radius of SOM neighborhood.
 #' @param mode           Type of learning algorithm (default = "online").
 #'
-#' @note The sits package implements the \code{"dtw"} (Dynamic Time Warping)
-#'       similarity measure. All other similarity measurements are from
-#'       the \code{\link[kohonen:supersom]{kohonen::supersom (dist.fcts)}}
+#' @note The \code{sits} package implements the \code{"dtw"} (Dynamic Time
+#'       Warping) similarity measure using the vector-based DTW algorithm
+#'       from the \code{IncDTW} R package. All other similarity measurements
+#'       come from the
+#'       \code{\link[kohonen:supersom]{kohonen::supersom (dist.fcts)}}
 #'       function.
 #'
 #' @return
diff --git a/man/sits_som.Rd b/man/sits_som.Rd
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
@@ -141,6 +141,18 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// dtw2vec
+double dtw2vec(const arma::vec& x, const arma::vec& y);
+RcppExport SEXP _sits_dtw2vec(SEXP xSEXP, SEXP ySEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const arma::vec& >::type x(xSEXP);
+    Rcpp::traits::input_parameter< const arma::vec& >::type y(ySEXP);
+    rcpp_result_gen = Rcpp::wrap(dtw2vec(x, y));
+    return rcpp_result_gen;
+END_RCPP
+}
 // dtw
 Rcpp::XPtr<DistanceFunctionPtr> dtw();
 RcppExport SEXP _sits_dtw() {
@@ -475,6 +487,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"_sits_C_kernel_max", (DL_FUNC) &_sits_C_kernel_max, 5},
     {"_sits_C_kernel_var", (DL_FUNC) &_sits_C_kernel_var, 5},
     {"_sits_C_kernel_modal", (DL_FUNC) &_sits_C_kernel_modal, 5},
+    {"_sits_dtw2vec", (DL_FUNC) &_sits_dtw2vec, 2},
     {"_sits_dtw", (DL_FUNC) &_sits_dtw, 0},
     {"_sits_C_label_max_prob", (DL_FUNC) &_sits_C_label_max_prob, 1},
     {"_sits_linear_interp", (DL_FUNC) &_sits_linear_interp, 1},
diff --git a/src/kohonen_dtw.cpp b/src/kohonen_dtw.cpp
@@ -1,160 +1,163 @@
-#include <Rcpp.h>
-
-#include <cstdlib>
-#include <vector>
-#include <cmath>
-#include <algorithm>
-#include <stdexcept>
+#include <RcppArmadillo.h>
+// [[Rcpp::depends(RcppArmadillo)]]
 
 #include "./sits_types.h"
 
 using namespace Rcpp;
 
+
 /**
- * Compute the p-norm distance between two 1D C++ vectors.
+ * Minimum of 2 values.
  *
  * @description
- * The p-norm, also known as the Minkowski norm, is a generalized norm
- * calculation that includes several types of distances based on the value of p.
- *
- * Common values of p include:
+ * Auxiliary function to calculate the minimum value of `x` and `y`.
+ */
+double minval(double x, double y)
+{
+    // z > nan for z != nan is required by C the standard
+    int xnan = std::isnan(x), ynan = std::isnan(y);
+    if(xnan || ynan) {
+        if(xnan && !ynan) return y;
+        if(!xnan && ynan) return x;
+        return x;
+    }
+    return std::min(x,y);
+}
+
+
+/**
+ * Calculate the `symmetric2` step pattern.
  *
- *  - p = 1 for the Manhattan (city block) distance;
- *  - p = 2 for the Euclidean norm (distance).
+ * @description
+ * This function calculates the `symmetric2` step pattern, which uses a weight
+ * of 2 for the diagonal step and 1 for the vertical and horizontal to
+ * compensate for the favor of diagonal steps.
  *
- * More details about p-norms can be found on Wikipedia:
- * https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm
+ * @note
+ * For more information on this step pattern, visit the `IncDTW` package
+ * documentation: https://www.rdocumentation.org/packages/IncDTW/versions/1.1.4.4/topics/dtw2vec
  *
- * @param a A 1D vector representing the first point in an m-dimensional space.
- * @param b A 1D vector representing the second point in an m-dimensional space.
- * @param p The value of the norm to use, determining the type of distance
- *          calculated.
+ * @reference
+ * Leodolter, M., Plant, C., & Brändle, N. (2021). IncDTW: An R Package for
+ * Incremental Calculation of Dynamic Time Warping. Journal of Statistical
+ * Software, 99(9), 1–23. https://doi.org/10.18637/jss.v099.i09
  *
- * @note Both vectors 'a' and 'b' must have the same number of dimensions.
- * @note This function was adapted from the DTW implementation found at:
- *       https://github.com/cjekel/DTW_cpp
+ * Giorgino, T. (2009). Computing and Visualizing Dynamic Time Warping
+ * Alignments in R: The dtw Package. Journal of Statistical Software, 31(7),
+ * 1–24. https://doi.org/10.18637/jss.v031.i07
  *
- * @return The p-norm distance between vectors 'a' and 'b'.
+ * @return `symmetric2` step pattern value.
  */
-double p_norm(std::vector<double> a, std::vector<double> b, double p)
-{
-    double d = 0;
-
-    size_t index;
-    size_t a_size = a.size();
-
-    for (index = 0; index < a_size; index++)
-    {
-        d += std::pow(std::abs(a[index] - b[index]), p);
-    }
-    return std::pow(d, 1.0 / p);
+double calculate_step_pattern_symmetric2(
+    const double gcm10, // vertical
+    const double gcm11, // diagonal
+    const double gcm01, // horizontal
+    const double cm00
+) {
+    return(cm00 + minval(gcm10, minval(cm00 + gcm11, gcm01)));
 }
 
+
 /**
- * Compute the Dynamic Time Warping (DTW) distance between two 2D C++ vectors.
+ * Vector-based Dynamic Time Warping (DTW) distance.
  *
  * @description
  * This function calculates the Dynamic Time Warping (DTW) distance between
- * two sequences that can have a different number of data points but must
- * share the same number of dimensions. An exception is thrown if the dimensions
- * of the input vectors do not match.
+ * two sequences using the vector-based algorithm proposed by Leodolter
+ * et al. (2021).
  *
- * For more information on DTW, visit:
- * https://en.wikipedia.org/wiki/Dynamic_time_warping
+ * The complexity of this function, as presented by Leodolter et al. (2021), is
+ * equal to O(n).
  *
- * @param a A 2D vector representing the first sequence
- * @param b A 2D vector representing the second sequence.
- * @param p The value of p-norm to use for distance calculation.
+ * For more information on vector-based DTW, visit:
+ * https://doi.org/10.18637/jss.v099.i09
  *
- * @throws std::invalid_argument If the dimensions of 'a' and 'b' do not match.
+ * @param x A `arma::vec` with time-series values.
+ * @param y A `arma::vec` with time-series values.
  *
- * @note
- * Both vectors 'a', and 'b' should be structured as follows:
+ * @reference
+ * Leodolter, M., Plant, C., & Brändle, N. (2021). IncDTW: An R Package for
+ * Incremental Calculation of Dynamic Time Warping. Journal of Statistical
+ * Software, 99(9), 1–23. https://doi.org/10.18637/jss.v099.i09
  *
- *  [number_of_data_points][number_of_dimensions]
- *
- * allowing the DTW distance computation to adapt to any p-norm value specified.
- *
- * @note The implementation of this DTW distance calculation was adapted from:
- *       https://github.com/cjekel/DTW_cpp
+ * @note
+ * The implementation of this DTW distance calculation was adapted from the
+ * `IncDTW` R package.
  *
- * @return The DTW distance between the two input sequences.
+ * @return DTW distance.
  */
-double distance_dtw_op(std::vector<std::vector<double>> a,
-                       std::vector<std::vector<double>> b,
-                       double p)
+// [[Rcpp::export]]
+double dtw2vec(const arma::vec &x, const arma::vec &y)
 {
-    int n = a.size();
-    int o = b.size();
+    int nx = x.size();
+    int ny = y.size();
 
-    int a_m = a[0].size();
-    int b_m = b[0].size();
+    double *p1 = new double[nx];
+    double *p2 = new double[nx];
 
-    if (a_m != b_m)
-    {
-        throw std::invalid_argument(
-            "a and b must have the same number of dimensions!"
-        );
-    }
-    std::vector<std::vector<double>> d(n, std::vector<double>(o, 0.0));
-
-    d[0][0] = p_norm(a[0], b[0], p);
+    double *ptmp;
+    double ret;
 
-    for (int i = 1; i < n; i++)
-    {
-        d[i][0] = d[i - 1][0] + p_norm(a[i], b[0], p);
-    }
-    for (int i = 1; i < o; i++)
+    // first column
+    *p1 = std::abs(x[0] - y[0]);
+    for (int i = 1; i < nx; i++)
     {
-        d[0][i] = d[0][i - 1] + p_norm(a[0], b[i], p);
+        p1[i] = std::abs(x[i] - y[0]) + p1[i - 1];
     }
-    for (int i = 1; i < n; i++)
+
+    for (int j = 1; j < ny; j++)
     {
-        for (int j = 1; j < o; j++)
+        *p2 = std::abs(x[0] - y[j]) + *(p1);
+
+        for (int i = 1; i < nx; i++)
         {
-            d[i][j] = p_norm(a[i], b[j], p) + std::fmin(
-                std::fmin(d[i - 1][j], d[i][j - 1]), d[i - 1][j - 1]
-            );
+            *(p2 + i) = calculate_step_pattern_symmetric2(*(p2 + i - 1), *(p1 + i - 1), *(p1 + i), std::abs(x[i] - y[j]));
         }
+        ptmp = p1;
+        p1 = p2;
+        p2 = ptmp;
     }
-    return d[n - 1][o - 1];
+
+    ret = *(p1 + nx - 1); // p1[nx-1]
+
+    delete[] p1;
+    delete[] p2;
+
+    return (ret);
 }
 
+
 /**
  * Dynamic Time Warping (DTW) distance wrapper.
  *
  * @description
  * This function calculates prepare data from `Kohonen` package and calculate
- * the DTW distance between two array of points.
+ * the DTW distance between two 1D time-series.
  *
- * @param a A 2D vector representing the first sequence.
- * @param b A 2D vector representing the second sequence.
- * @param np Number of points in vectors `a` and `b`.
- * @param nNA Number of NA values in the vectors `a` and `b`.
+ * @param p1 A 1D array representing the first time-series.
+ * @param p2 A 1D array representing the second time-series.
+ * @param np Number of points in arrays `p1` and `p2`.
+ * @param nNA Number of NA values in the arrays `p1` and `p2`.
  *
  * @note The function signature was created following the `Kohonen` R package
  *        specifications for custom distance functions.
  *
- *
- * @return The DTW distance between the two input sequences.
+ * @return The DTW distance between two time-series.
  */
 double kohonen_dtw(double *p1, double *p2, int np, int nNA)
 {
-    std::vector<double> p1_data(p1, p1 + np);
-    std::vector<double> p2_data(p2, p2 + np);
+    arma::vec p1_vec(p1, np, false);
+    arma::vec p2_vec(p2, np, false);
 
-    std::vector<std::vector<double>> p1_vec = {p1_data};
-    std::vector<std::vector<double>> p2_vec = {p2_data};
-
-    // p-norm fixed in 2 (equivalent to euclidean distance)
-    return (distance_dtw_op(p1_vec, p2_vec, 2));
+    return dtw2vec(p1_vec, p2_vec);
 }
 
+
 // [[Rcpp::export]]
 Rcpp::XPtr<DistanceFunctionPtr> dtw()
 {
     // Returns a External Pointer, which is used by the `kohonen` package
     // https://cran.r-project.org/doc/manuals/R-exts.html#External-pointers-and-weak-references
     return (Rcpp::XPtr<DistanceFunctionPtr>(new DistanceFunctionPtr(
-            &kohonen_dtw)));
+        &kohonen_dtw)));
 }
diff --git a/tests/testthat/test-som.R b/tests/testthat/test-som.R
@@ -1,3 +1,21 @@
+test_that("Calculating time-series distances using DTW", {
+    set.seed(2903)
+
+    # test 1 (example from `IncDTW` article)
+    t1 <- c(3, 4, 5, 6)
+    t2 <- c(1, 3, 3, 5, 6)
+
+    expect_equal(dtw2vec(t1, t2), 3)
+
+    # test 2 (using ndvi time-series)
+    t1 <- c(0.5001, 0.5060, 0.5079, 0.7141, 0.4262)
+    t2 <- c(0.3974, 0.5144, 0.5052, 0.6463, 0.5139)
+
+    # 0.4323 -> Value confirmed using `dtw` and `IncDTW` packages as reference
+    expect_equal(dtw2vec(t1, t2), 0.4323)
+})
+
+
 test_that("Creating clustering using Self-organizing Maps with DTW distance", {
     set.seed(2903)
 

Original file line number	Diff line number	Diff line change
`@@ -37,6 +37,10 @@ C_kernel_modal <- function(x, ncols, nrows, band, window_size) {`
`37`	`37`	.Call(`_sits_C_kernel_modal`, x, ncols, nrows, band, window_size)
`38`	`38`	`}`
`39`	`39`
	`40`	`+dtw2vec <- function(x, y) {`
	`41`	+ .Call(`_sits_dtw2vec`, x, y)
	`42`	`+}`
	`43`	`+`
`40`	`44`	`dtw <- function() {`
`41`	`45`	.Call(`_sits_dtw`)
`42`	`46`	`}`