Skip to content

Commit bf6fd8e

Browse files
committed
improve dtw using a vector-based algorithm
1 parent 3387dca commit bf6fd8e

File tree

6 files changed

+146
-104
lines changed

6 files changed

+146
-104
lines changed

R/RcppExports.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ C_kernel_modal <- function(x, ncols, nrows, band, window_size) {
3737
.Call(`_sits_C_kernel_modal`, x, ncols, nrows, band, window_size)
3838
}
3939

40+
dtw2vec <- function(x, y) {
41+
.Call(`_sits_dtw2vec`, x, y)
42+
}
43+
4044
dtw <- function() {
4145
.Call(`_sits_dtw`)
4246
}

R/sits_som.R

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,11 @@
5454
#' @param som_radius Radius of SOM neighborhood.
5555
#' @param mode Type of learning algorithm (default = "online").
5656
#'
57-
#' @note The sits package implements the \code{"dtw"} (Dynamic Time Warping)
58-
#' similarity measure. All other similarity measurements are from
59-
#' the \code{\link[kohonen:supersom]{kohonen::supersom (dist.fcts)}}
57+
#' @note The \code{sits} package implements the \code{"dtw"} (Dynamic Time
58+
#' Warping) similarity measure using the vector-based DTW algorithm
59+
#' from the \code{IncDTW} R package. All other similarity measurements
60+
#' come from the
61+
#' \code{\link[kohonen:supersom]{kohonen::supersom (dist.fcts)}}
6062
#' function.
6163
#'
6264
#' @return

man/sits_som.Rd

Lines changed: 5 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/RcppExports.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,18 @@ BEGIN_RCPP
141141
return rcpp_result_gen;
142142
END_RCPP
143143
}
144+
// dtw2vec
145+
double dtw2vec(const arma::vec& x, const arma::vec& y);
146+
RcppExport SEXP _sits_dtw2vec(SEXP xSEXP, SEXP ySEXP) {
147+
BEGIN_RCPP
148+
Rcpp::RObject rcpp_result_gen;
149+
Rcpp::RNGScope rcpp_rngScope_gen;
150+
Rcpp::traits::input_parameter< const arma::vec& >::type x(xSEXP);
151+
Rcpp::traits::input_parameter< const arma::vec& >::type y(ySEXP);
152+
rcpp_result_gen = Rcpp::wrap(dtw2vec(x, y));
153+
return rcpp_result_gen;
154+
END_RCPP
155+
}
144156
// dtw
145157
Rcpp::XPtr<DistanceFunctionPtr> dtw();
146158
RcppExport SEXP _sits_dtw() {
@@ -475,6 +487,7 @@ static const R_CallMethodDef CallEntries[] = {
475487
{"_sits_C_kernel_max", (DL_FUNC) &_sits_C_kernel_max, 5},
476488
{"_sits_C_kernel_var", (DL_FUNC) &_sits_C_kernel_var, 5},
477489
{"_sits_C_kernel_modal", (DL_FUNC) &_sits_C_kernel_modal, 5},
490+
{"_sits_dtw2vec", (DL_FUNC) &_sits_dtw2vec, 2},
478491
{"_sits_dtw", (DL_FUNC) &_sits_dtw, 0},
479492
{"_sits_C_label_max_prob", (DL_FUNC) &_sits_C_label_max_prob, 1},
480493
{"_sits_linear_interp", (DL_FUNC) &_sits_linear_interp, 1},

src/kohonen_dtw.cpp

Lines changed: 101 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1,160 +1,163 @@
1-
#include <Rcpp.h>
2-
3-
#include <cstdlib>
4-
#include <vector>
5-
#include <cmath>
6-
#include <algorithm>
7-
#include <stdexcept>
1+
#include <RcppArmadillo.h>
2+
// [[Rcpp::depends(RcppArmadillo)]]
83

94
#include "./sits_types.h"
105

116
using namespace Rcpp;
127

8+
139
/**
14-
* Compute the p-norm distance between two 1D C++ vectors.
10+
* Minimum of 2 values.
1511
*
1612
* @description
17-
* The p-norm, also known as the Minkowski norm, is a generalized norm
18-
* calculation that includes several types of distances based on the value of p.
19-
*
20-
* Common values of p include:
13+
* Auxiliary function to calculate the minimum value of `x` and `y`.
14+
*/
15+
double minval(double x, double y)
16+
{
17+
// z > nan for z != nan is required by C the standard
18+
int xnan = std::isnan(x), ynan = std::isnan(y);
19+
if(xnan || ynan) {
20+
if(xnan && !ynan) return y;
21+
if(!xnan && ynan) return x;
22+
return x;
23+
}
24+
return std::min(x,y);
25+
}
26+
27+
28+
/**
29+
* Calculate the `symmetric2` step pattern.
2130
*
22-
* - p = 1 for the Manhattan (city block) distance;
23-
* - p = 2 for the Euclidean norm (distance).
31+
* @description
32+
* This function calculates the `symmetric2` step pattern, which uses a weight
33+
* of 2 for the diagonal step and 1 for the vertical and horizontal to
34+
* compensate for the favor of diagonal steps.
2435
*
25-
* More details about p-norms can be found on Wikipedia:
26-
* https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm
36+
* @note
37+
* For more information on this step pattern, visit the `IncDTW` package
38+
* documentation: https://www.rdocumentation.org/packages/IncDTW/versions/1.1.4.4/topics/dtw2vec
2739
*
28-
* @param a A 1D vector representing the first point in an m-dimensional space.
29-
* @param b A 1D vector representing the second point in an m-dimensional space.
30-
* @param p The value of the norm to use, determining the type of distance
31-
* calculated.
40+
* @reference
41+
* Leodolter, M., Plant, C., & Brändle, N. (2021). IncDTW: An R Package for
42+
* Incremental Calculation of Dynamic Time Warping. Journal of Statistical
43+
* Software, 99(9), 1–23. https://doi.org/10.18637/jss.v099.i09
3244
*
33-
* @note Both vectors 'a' and 'b' must have the same number of dimensions.
34-
* @note This function was adapted from the DTW implementation found at:
35-
* https://github.com/cjekel/DTW_cpp
45+
* Giorgino, T. (2009). Computing and Visualizing Dynamic Time Warping
46+
* Alignments in R: The dtw Package. Journal of Statistical Software, 31(7),
47+
* 1–24. https://doi.org/10.18637/jss.v031.i07
3648
*
37-
* @return The p-norm distance between vectors 'a' and 'b'.
49+
* @return `symmetric2` step pattern value.
3850
*/
39-
double p_norm(std::vector<double> a, std::vector<double> b, double p)
40-
{
41-
double d = 0;
42-
43-
size_t index;
44-
size_t a_size = a.size();
45-
46-
for (index = 0; index < a_size; index++)
47-
{
48-
d += std::pow(std::abs(a[index] - b[index]), p);
49-
}
50-
return std::pow(d, 1.0 / p);
51+
double calculate_step_pattern_symmetric2(
52+
const double gcm10, // vertical
53+
const double gcm11, // diagonal
54+
const double gcm01, // horizontal
55+
const double cm00
56+
) {
57+
return(cm00 + minval(gcm10, minval(cm00 + gcm11, gcm01)));
5158
}
5259

60+
5361
/**
54-
* Compute the Dynamic Time Warping (DTW) distance between two 2D C++ vectors.
62+
* Vector-based Dynamic Time Warping (DTW) distance.
5563
*
5664
* @description
5765
* This function calculates the Dynamic Time Warping (DTW) distance between
58-
* two sequences that can have a different number of data points but must
59-
* share the same number of dimensions. An exception is thrown if the dimensions
60-
* of the input vectors do not match.
66+
* two sequences using the vector-based algorithm proposed by Leodolter
67+
* et al. (2021).
6168
*
62-
* For more information on DTW, visit:
63-
* https://en.wikipedia.org/wiki/Dynamic_time_warping
69+
* The complexity of this function, as presented by Leodolter et al. (2021), is
70+
* equal to O(n).
6471
*
65-
* @param a A 2D vector representing the first sequence
66-
* @param b A 2D vector representing the second sequence.
67-
* @param p The value of p-norm to use for distance calculation.
72+
* For more information on vector-based DTW, visit:
73+
* https://doi.org/10.18637/jss.v099.i09
6874
*
69-
* @throws std::invalid_argument If the dimensions of 'a' and 'b' do not match.
75+
* @param x A `arma::vec` with time-series values.
76+
* @param y A `arma::vec` with time-series values.
7077
*
71-
* @note
72-
* Both vectors 'a', and 'b' should be structured as follows:
78+
* @reference
79+
* Leodolter, M., Plant, C., & Brändle, N. (2021). IncDTW: An R Package for
80+
* Incremental Calculation of Dynamic Time Warping. Journal of Statistical
81+
* Software, 99(9), 1–23. https://doi.org/10.18637/jss.v099.i09
7382
*
74-
* [number_of_data_points][number_of_dimensions]
75-
*
76-
* allowing the DTW distance computation to adapt to any p-norm value specified.
77-
*
78-
* @note The implementation of this DTW distance calculation was adapted from:
79-
* https://github.com/cjekel/DTW_cpp
83+
* @note
84+
* The implementation of this DTW distance calculation was adapted from the
85+
* `IncDTW` R package.
8086
*
81-
* @return The DTW distance between the two input sequences.
87+
* @return DTW distance.
8288
*/
83-
double distance_dtw_op(std::vector<std::vector<double>> a,
84-
std::vector<std::vector<double>> b,
85-
double p)
89+
// [[Rcpp::export]]
90+
double dtw2vec(const arma::vec &x, const arma::vec &y)
8691
{
87-
int n = a.size();
88-
int o = b.size();
92+
int nx = x.size();
93+
int ny = y.size();
8994

90-
int a_m = a[0].size();
91-
int b_m = b[0].size();
95+
double *p1 = new double[nx];
96+
double *p2 = new double[nx];
9297

93-
if (a_m != b_m)
94-
{
95-
throw std::invalid_argument(
96-
"a and b must have the same number of dimensions!"
97-
);
98-
}
99-
std::vector<std::vector<double>> d(n, std::vector<double>(o, 0.0));
100-
101-
d[0][0] = p_norm(a[0], b[0], p);
98+
double *ptmp;
99+
double ret;
102100

103-
for (int i = 1; i < n; i++)
104-
{
105-
d[i][0] = d[i - 1][0] + p_norm(a[i], b[0], p);
106-
}
107-
for (int i = 1; i < o; i++)
101+
// first column
102+
*p1 = std::abs(x[0] - y[0]);
103+
for (int i = 1; i < nx; i++)
108104
{
109-
d[0][i] = d[0][i - 1] + p_norm(a[0], b[i], p);
105+
p1[i] = std::abs(x[i] - y[0]) + p1[i - 1];
110106
}
111-
for (int i = 1; i < n; i++)
107+
108+
for (int j = 1; j < ny; j++)
112109
{
113-
for (int j = 1; j < o; j++)
110+
*p2 = std::abs(x[0] - y[j]) + *(p1);
111+
112+
for (int i = 1; i < nx; i++)
114113
{
115-
d[i][j] = p_norm(a[i], b[j], p) + std::fmin(
116-
std::fmin(d[i - 1][j], d[i][j - 1]), d[i - 1][j - 1]
117-
);
114+
*(p2 + i) = calculate_step_pattern_symmetric2(*(p2 + i - 1), *(p1 + i - 1), *(p1 + i), std::abs(x[i] - y[j]));
118115
}
116+
ptmp = p1;
117+
p1 = p2;
118+
p2 = ptmp;
119119
}
120-
return d[n - 1][o - 1];
120+
121+
ret = *(p1 + nx - 1); // p1[nx-1]
122+
123+
delete[] p1;
124+
delete[] p2;
125+
126+
return (ret);
121127
}
122128

129+
123130
/**
124131
* Dynamic Time Warping (DTW) distance wrapper.
125132
*
126133
* @description
127134
* This function calculates prepare data from `Kohonen` package and calculate
128-
* the DTW distance between two array of points.
135+
* the DTW distance between two 1D time-series.
129136
*
130-
* @param a A 2D vector representing the first sequence.
131-
* @param b A 2D vector representing the second sequence.
132-
* @param np Number of points in vectors `a` and `b`.
133-
* @param nNA Number of NA values in the vectors `a` and `b`.
137+
* @param p1 A 1D array representing the first time-series.
138+
* @param p2 A 1D array representing the second time-series.
139+
* @param np Number of points in arrays `p1` and `p2`.
140+
* @param nNA Number of NA values in the arrays `p1` and `p2`.
134141
*
135142
* @note The function signature was created following the `Kohonen` R package
136143
* specifications for custom distance functions.
137144
*
138-
*
139-
* @return The DTW distance between the two input sequences.
145+
* @return The DTW distance between two time-series.
140146
*/
141147
double kohonen_dtw(double *p1, double *p2, int np, int nNA)
142148
{
143-
std::vector<double> p1_data(p1, p1 + np);
144-
std::vector<double> p2_data(p2, p2 + np);
149+
arma::vec p1_vec(p1, np, false);
150+
arma::vec p2_vec(p2, np, false);
145151

146-
std::vector<std::vector<double>> p1_vec = {p1_data};
147-
std::vector<std::vector<double>> p2_vec = {p2_data};
148-
149-
// p-norm fixed in 2 (equivalent to euclidean distance)
150-
return (distance_dtw_op(p1_vec, p2_vec, 2));
152+
return dtw2vec(p1_vec, p2_vec);
151153
}
152154

155+
153156
// [[Rcpp::export]]
154157
Rcpp::XPtr<DistanceFunctionPtr> dtw()
155158
{
156159
// Returns a External Pointer, which is used by the `kohonen` package
157160
// https://cran.r-project.org/doc/manuals/R-exts.html#External-pointers-and-weak-references
158161
return (Rcpp::XPtr<DistanceFunctionPtr>(new DistanceFunctionPtr(
159-
&kohonen_dtw)));
162+
&kohonen_dtw)));
160163
}

tests/testthat/test-som.R

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,21 @@
1+
test_that("Calculating time-series distances using DTW", {
2+
set.seed(2903)
3+
4+
# test 1 (example from `IncDTW` article)
5+
t1 <- c(3, 4, 5, 6)
6+
t2 <- c(1, 3, 3, 5, 6)
7+
8+
expect_equal(dtw2vec(t1, t2), 3)
9+
10+
# test 2 (using ndvi time-series)
11+
t1 <- c(0.5001, 0.5060, 0.5079, 0.7141, 0.4262)
12+
t2 <- c(0.3974, 0.5144, 0.5052, 0.6463, 0.5139)
13+
14+
# 0.4323 -> Value confirmed using `dtw` and `IncDTW` packages as reference
15+
expect_equal(dtw2vec(t1, t2), 0.4323)
16+
})
17+
18+
119
test_that("Creating clustering using Self-organizing Maps with DTW distance", {
220
set.seed(2903)
321

0 commit comments

Comments
 (0)