|
1 |
| -#include <Rcpp.h> |
2 |
| - |
3 |
| -#include <cstdlib> |
4 |
| -#include <vector> |
5 |
| -#include <cmath> |
6 |
| -#include <algorithm> |
7 |
| -#include <stdexcept> |
| 1 | +#include <RcppArmadillo.h> |
| 2 | +// [[Rcpp::depends(RcppArmadillo)]] |
8 | 3 |
|
9 | 4 | #include "./sits_types.h"
|
10 | 5 |
|
11 | 6 | using namespace Rcpp;
|
12 | 7 |
|
| 8 | + |
13 | 9 | /**
|
14 |
| - * Compute the p-norm distance between two 1D C++ vectors. |
| 10 | + * Minimum of 2 values. |
15 | 11 | *
|
16 | 12 | * @description
|
17 |
| - * The p-norm, also known as the Minkowski norm, is a generalized norm |
18 |
| - * calculation that includes several types of distances based on the value of p. |
19 |
| - * |
20 |
| - * Common values of p include: |
| 13 | + * Auxiliary function to calculate the minimum value of `x` and `y`. |
| 14 | + */ |
| 15 | +double minval(double x, double y) |
| 16 | +{ |
| 17 | + // z > nan for z != nan is required by C the standard |
| 18 | + int xnan = std::isnan(x), ynan = std::isnan(y); |
| 19 | + if(xnan || ynan) { |
| 20 | + if(xnan && !ynan) return y; |
| 21 | + if(!xnan && ynan) return x; |
| 22 | + return x; |
| 23 | + } |
| 24 | + return std::min(x,y); |
| 25 | +} |
| 26 | + |
| 27 | + |
| 28 | +/** |
| 29 | + * Calculate the `symmetric2` step pattern. |
21 | 30 | *
|
22 |
| - * - p = 1 for the Manhattan (city block) distance; |
23 |
| - * - p = 2 for the Euclidean norm (distance). |
| 31 | + * @description |
| 32 | + * This function calculates the `symmetric2` step pattern, which uses a weight |
| 33 | + * of 2 for the diagonal step and 1 for the vertical and horizontal to |
| 34 | + * compensate for the favor of diagonal steps. |
24 | 35 | *
|
25 |
| - * More details about p-norms can be found on Wikipedia: |
26 |
| - * https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm |
| 36 | + * @note |
| 37 | + * For more information on this step pattern, visit the `IncDTW` package |
| 38 | + * documentation: https://www.rdocumentation.org/packages/IncDTW/versions/1.1.4.4/topics/dtw2vec |
27 | 39 | *
|
28 |
| - * @param a A 1D vector representing the first point in an m-dimensional space. |
29 |
| - * @param b A 1D vector representing the second point in an m-dimensional space. |
30 |
| - * @param p The value of the norm to use, determining the type of distance |
31 |
| - * calculated. |
| 40 | + * @reference |
| 41 | + * Leodolter, M., Plant, C., & Brändle, N. (2021). IncDTW: An R Package for |
| 42 | + * Incremental Calculation of Dynamic Time Warping. Journal of Statistical |
| 43 | + * Software, 99(9), 1–23. https://doi.org/10.18637/jss.v099.i09 |
32 | 44 | *
|
33 |
| - * @note Both vectors 'a' and 'b' must have the same number of dimensions. |
34 |
| - * @note This function was adapted from the DTW implementation found at: |
35 |
| - * https://github.com/cjekel/DTW_cpp |
| 45 | + * Giorgino, T. (2009). Computing and Visualizing Dynamic Time Warping |
| 46 | + * Alignments in R: The dtw Package. Journal of Statistical Software, 31(7), |
| 47 | + * 1–24. https://doi.org/10.18637/jss.v031.i07 |
36 | 48 | *
|
37 |
| - * @return The p-norm distance between vectors 'a' and 'b'. |
| 49 | + * @return `symmetric2` step pattern value. |
38 | 50 | */
|
39 |
| -double p_norm(std::vector<double> a, std::vector<double> b, double p) |
40 |
| -{ |
41 |
| - double d = 0; |
42 |
| - |
43 |
| - size_t index; |
44 |
| - size_t a_size = a.size(); |
45 |
| - |
46 |
| - for (index = 0; index < a_size; index++) |
47 |
| - { |
48 |
| - d += std::pow(std::abs(a[index] - b[index]), p); |
49 |
| - } |
50 |
| - return std::pow(d, 1.0 / p); |
| 51 | +double calculate_step_pattern_symmetric2( |
| 52 | + const double gcm10, // vertical |
| 53 | + const double gcm11, // diagonal |
| 54 | + const double gcm01, // horizontal |
| 55 | + const double cm00 |
| 56 | +) { |
| 57 | + return(cm00 + minval(gcm10, minval(cm00 + gcm11, gcm01))); |
51 | 58 | }
|
52 | 59 |
|
| 60 | + |
53 | 61 | /**
|
54 |
| - * Compute the Dynamic Time Warping (DTW) distance between two 2D C++ vectors. |
| 62 | + * Vector-based Dynamic Time Warping (DTW) distance. |
55 | 63 | *
|
56 | 64 | * @description
|
57 | 65 | * This function calculates the Dynamic Time Warping (DTW) distance between
|
58 |
| - * two sequences that can have a different number of data points but must |
59 |
| - * share the same number of dimensions. An exception is thrown if the dimensions |
60 |
| - * of the input vectors do not match. |
| 66 | + * two sequences using the vector-based algorithm proposed by Leodolter |
| 67 | + * et al. (2021). |
61 | 68 | *
|
62 |
| - * For more information on DTW, visit: |
63 |
| - * https://en.wikipedia.org/wiki/Dynamic_time_warping |
| 69 | + * The complexity of this function, as presented by Leodolter et al. (2021), is |
| 70 | + * equal to O(n). |
64 | 71 | *
|
65 |
| - * @param a A 2D vector representing the first sequence |
66 |
| - * @param b A 2D vector representing the second sequence. |
67 |
| - * @param p The value of p-norm to use for distance calculation. |
| 72 | + * For more information on vector-based DTW, visit: |
| 73 | + * https://doi.org/10.18637/jss.v099.i09 |
68 | 74 | *
|
69 |
| - * @throws std::invalid_argument If the dimensions of 'a' and 'b' do not match. |
| 75 | + * @param x A `arma::vec` with time-series values. |
| 76 | + * @param y A `arma::vec` with time-series values. |
70 | 77 | *
|
71 |
| - * @note |
72 |
| - * Both vectors 'a', and 'b' should be structured as follows: |
| 78 | + * @reference |
| 79 | + * Leodolter, M., Plant, C., & Brändle, N. (2021). IncDTW: An R Package for |
| 80 | + * Incremental Calculation of Dynamic Time Warping. Journal of Statistical |
| 81 | + * Software, 99(9), 1–23. https://doi.org/10.18637/jss.v099.i09 |
73 | 82 | *
|
74 |
| - * [number_of_data_points][number_of_dimensions] |
75 |
| - * |
76 |
| - * allowing the DTW distance computation to adapt to any p-norm value specified. |
77 |
| - * |
78 |
| - * @note The implementation of this DTW distance calculation was adapted from: |
79 |
| - * https://github.com/cjekel/DTW_cpp |
| 83 | + * @note |
| 84 | + * The implementation of this DTW distance calculation was adapted from the |
| 85 | + * `IncDTW` R package. |
80 | 86 | *
|
81 |
| - * @return The DTW distance between the two input sequences. |
| 87 | + * @return DTW distance. |
82 | 88 | */
|
83 |
| -double distance_dtw_op(std::vector<std::vector<double>> a, |
84 |
| - std::vector<std::vector<double>> b, |
85 |
| - double p) |
| 89 | +// [[Rcpp::export]] |
| 90 | +double dtw2vec(const arma::vec &x, const arma::vec &y) |
86 | 91 | {
|
87 |
| - int n = a.size(); |
88 |
| - int o = b.size(); |
| 92 | + int nx = x.size(); |
| 93 | + int ny = y.size(); |
89 | 94 |
|
90 |
| - int a_m = a[0].size(); |
91 |
| - int b_m = b[0].size(); |
| 95 | + double *p1 = new double[nx]; |
| 96 | + double *p2 = new double[nx]; |
92 | 97 |
|
93 |
| - if (a_m != b_m) |
94 |
| - { |
95 |
| - throw std::invalid_argument( |
96 |
| - "a and b must have the same number of dimensions!" |
97 |
| - ); |
98 |
| - } |
99 |
| - std::vector<std::vector<double>> d(n, std::vector<double>(o, 0.0)); |
100 |
| - |
101 |
| - d[0][0] = p_norm(a[0], b[0], p); |
| 98 | + double *ptmp; |
| 99 | + double ret; |
102 | 100 |
|
103 |
| - for (int i = 1; i < n; i++) |
104 |
| - { |
105 |
| - d[i][0] = d[i - 1][0] + p_norm(a[i], b[0], p); |
106 |
| - } |
107 |
| - for (int i = 1; i < o; i++) |
| 101 | + // first column |
| 102 | + *p1 = std::abs(x[0] - y[0]); |
| 103 | + for (int i = 1; i < nx; i++) |
108 | 104 | {
|
109 |
| - d[0][i] = d[0][i - 1] + p_norm(a[0], b[i], p); |
| 105 | + p1[i] = std::abs(x[i] - y[0]) + p1[i - 1]; |
110 | 106 | }
|
111 |
| - for (int i = 1; i < n; i++) |
| 107 | + |
| 108 | + for (int j = 1; j < ny; j++) |
112 | 109 | {
|
113 |
| - for (int j = 1; j < o; j++) |
| 110 | + *p2 = std::abs(x[0] - y[j]) + *(p1); |
| 111 | + |
| 112 | + for (int i = 1; i < nx; i++) |
114 | 113 | {
|
115 |
| - d[i][j] = p_norm(a[i], b[j], p) + std::fmin( |
116 |
| - std::fmin(d[i - 1][j], d[i][j - 1]), d[i - 1][j - 1] |
117 |
| - ); |
| 114 | + *(p2 + i) = calculate_step_pattern_symmetric2(*(p2 + i - 1), *(p1 + i - 1), *(p1 + i), std::abs(x[i] - y[j])); |
118 | 115 | }
|
| 116 | + ptmp = p1; |
| 117 | + p1 = p2; |
| 118 | + p2 = ptmp; |
119 | 119 | }
|
120 |
| - return d[n - 1][o - 1]; |
| 120 | + |
| 121 | + ret = *(p1 + nx - 1); // p1[nx-1] |
| 122 | + |
| 123 | + delete[] p1; |
| 124 | + delete[] p2; |
| 125 | + |
| 126 | + return (ret); |
121 | 127 | }
|
122 | 128 |
|
| 129 | + |
123 | 130 | /**
|
124 | 131 | * Dynamic Time Warping (DTW) distance wrapper.
|
125 | 132 | *
|
126 | 133 | * @description
|
127 | 134 | * This function calculates prepare data from `Kohonen` package and calculate
|
128 |
| - * the DTW distance between two array of points. |
| 135 | + * the DTW distance between two 1D time-series. |
129 | 136 | *
|
130 |
| - * @param a A 2D vector representing the first sequence. |
131 |
| - * @param b A 2D vector representing the second sequence. |
132 |
| - * @param np Number of points in vectors `a` and `b`. |
133 |
| - * @param nNA Number of NA values in the vectors `a` and `b`. |
| 137 | + * @param p1 A 1D array representing the first time-series. |
| 138 | + * @param p2 A 1D array representing the second time-series. |
| 139 | + * @param np Number of points in arrays `p1` and `p2`. |
| 140 | + * @param nNA Number of NA values in the arrays `p1` and `p2`. |
134 | 141 | *
|
135 | 142 | * @note The function signature was created following the `Kohonen` R package
|
136 | 143 | * specifications for custom distance functions.
|
137 | 144 | *
|
138 |
| - * |
139 |
| - * @return The DTW distance between the two input sequences. |
| 145 | + * @return The DTW distance between two time-series. |
140 | 146 | */
|
141 | 147 | double kohonen_dtw(double *p1, double *p2, int np, int nNA)
|
142 | 148 | {
|
143 |
| - std::vector<double> p1_data(p1, p1 + np); |
144 |
| - std::vector<double> p2_data(p2, p2 + np); |
| 149 | + arma::vec p1_vec(p1, np, false); |
| 150 | + arma::vec p2_vec(p2, np, false); |
145 | 151 |
|
146 |
| - std::vector<std::vector<double>> p1_vec = {p1_data}; |
147 |
| - std::vector<std::vector<double>> p2_vec = {p2_data}; |
148 |
| - |
149 |
| - // p-norm fixed in 2 (equivalent to euclidean distance) |
150 |
| - return (distance_dtw_op(p1_vec, p2_vec, 2)); |
| 152 | + return dtw2vec(p1_vec, p2_vec); |
151 | 153 | }
|
152 | 154 |
|
| 155 | + |
153 | 156 | // [[Rcpp::export]]
|
154 | 157 | Rcpp::XPtr<DistanceFunctionPtr> dtw()
|
155 | 158 | {
|
156 | 159 | // Returns a External Pointer, which is used by the `kohonen` package
|
157 | 160 | // https://cran.r-project.org/doc/manuals/R-exts.html#External-pointers-and-weak-references
|
158 | 161 | return (Rcpp::XPtr<DistanceFunctionPtr>(new DistanceFunctionPtr(
|
159 |
| - &kohonen_dtw))); |
| 162 | + &kohonen_dtw))); |
160 | 163 | }
|
0 commit comments