Line data Source code
1 : /**
2 : * @file warping.hpp
3 : * @brief Time warping functions.
4 : *
5 : * @details This file contains functions for dynamic time warping, which is a method to
6 : * compare two temporal sequences that may vary in time or speed. It includes
7 : * different versions of the algorithm for full, light (L), and banded computations.
8 : *
9 : * @author Volkan Kumtepeli
10 : * @author Becky Perriment
11 : * @date 08 Dec 2022
12 : */
13 :
14 : #pragma once
15 :
16 : #include <cstdlib> // for abs, size_t
17 : #include <algorithm> // for min, max
18 : #include <cmath> // for floor, round
19 : #include <limits> // for numeric_limits
20 : #include <vector> // for vector
21 : #include <utility> // for pair
22 :
23 : #include <armadillo>
24 :
25 : namespace dtwc {
26 :
27 : /**
28 : * @brief Computes the full dynamic time warping distance between two sequences.
29 : *
30 : * @tparam data_t Data type of the elements in the sequences.
31 : * @param x First sequence.
32 : * @param y Second sequence.
33 : * @return The dynamic time warping distance.
34 : */
35 : template <typename data_t>
36 7 : data_t dtwFull(const std::vector<data_t> &x, const std::vector<data_t> &y)
37 : {
38 7 : thread_local arma::Mat<data_t> C;
39 7 : constexpr data_t maxValue = std::numeric_limits<data_t>::max();
40 :
41 7 : if (&x == &y) return 0; // If they are the same data then distance is 0.
42 :
43 6 : const int mx = x.size();
44 6 : const int my = y.size();
45 :
46 6 : C.resize(mx, my);
47 :
48 48 : auto distance = [](data_t x, data_t y) { return std::abs(x - y); };
49 :
50 6 : if ((mx == 0) || (my == 0)) return maxValue;
51 :
52 4 : C(0, 0) = distance(x[0], y[0]);
53 :
54 14 : for (int i = 1; i < mx; i++)
55 30 : C(i, 0) = C(i - 1, 0) + distance(x[i], y[0]); // j = 0
56 :
57 14 : for (int j = 1; j < my; j++)
58 30 : C(0, j) = C(0, j - 1) + distance(x[0], y[j]); // i = 0
59 :
60 :
61 14 : for (int j = 1; j < my; j++) {
62 34 : for (int i = 1; i < mx; i++) {
63 96 : const auto minimum = std::min({ C(i - 1, j), C(i, j - 1), C(i - 1, j - 1) });
64 48 : C(i, j) = minimum + distance(x[i], y[j]);
65 : }
66 : }
67 :
68 8 : return C(mx - 1, my - 1);
69 : }
70 :
71 : /**
72 : * @brief Computes the dynamic time warping distance using the light method.
73 : *
74 : * This function uses the light method for computation but cannot backtrack.
75 : * It only uses one vector to traverse instead of matrices.
76 : *
77 : * @tparam data_t Data type of the elements in the sequences.
78 : * @param x First sequence.
79 : * @param y Second sequence.
80 : * @return The dynamic time warping distance.
81 : */
82 : template <typename data_t>
83 28 : data_t dtwFull_L(const std::vector<data_t> &x, const std::vector<data_t> &y)
84 : {
85 28 : if (&x == &y) return 0; // If they are the same data then distance is 0.
86 24 : constexpr data_t maxValue = std::numeric_limits<data_t>::max();
87 24 : thread_local static std::vector<data_t> short_side(10000);
88 :
89 24 : const auto &[short_vec, long_vec] = (x.size() < y.size()) ? std::tie(x, y) : std::tie(y, x);
90 24 : const auto m_short{ short_vec.size() }, m_long{ long_vec.size() };
91 :
92 24 : short_side.resize(m_short);
93 :
94 96 : auto distance = [](data_t x, data_t y) { return std::abs(x - y); };
95 :
96 24 : if ((m_short == 0) || (m_long == 0)) return maxValue;
97 :
98 16 : short_side[0] = distance(short_vec[0], long_vec[0]);
99 :
100 48 : for (size_t i = 1; i < m_short; i++)
101 32 : short_side[i] = short_side[i - 1] + distance(short_vec[i], long_vec[0]);
102 :
103 64 : for (size_t j = 1; j < m_long; j++) {
104 48 : auto diag = short_side[0];
105 48 : short_side[0] += distance(short_vec[0], long_vec[j]);
106 :
107 144 : for (size_t i = 1; i < m_short; i++) {
108 96 : const data_t min1 = std::min(short_side[i - 1], short_side[i]);
109 96 : const auto shr = short_vec.at(i);
110 96 : const auto lng = long_vec.at(j);
111 96 : const data_t dist = std::abs(shr - lng);
112 96 : const data_t next = std::min(diag, min1) + dist;
113 :
114 96 : diag = short_side[i];
115 96 : short_side[i] = next;
116 : }
117 : }
118 :
119 16 : return short_side.back();
120 : }
121 :
122 :
123 : /**
124 : * @brief Computes the banded dynamic time warping distance between two sequences.
125 : *
126 : * @details This version of the algorithm introduces banding to limit the computation to
127 : * a certain vicinity around the diagonal, reducing computational complexity.
128 : *
129 : * Actual banding with skewness. Uses Sakoe-Chiba band.
130 : * Reference: H. Sakoe and S. Chiba, "Dynamic programming algorithm optimization
131 : * for spoken word recognition". IEEE Transactions on Acoustics,
132 : * Speech, and Signal Processing, 26(1), 43-49 (1978).
133 : *
134 : * Code is inspired from pyts.
135 : * See https://pyts.readthedocs.io/en/stable/auto_examples/metrics/plot_sakoe_chiba.html
136 : * for a detailed explanation.
137 : *
138 : * @tparam data_t Data type of the elements in the sequences.
139 : * @param x First sequence.
140 : * @param y Second sequence.
141 : * @param band The bandwidth parameter that controls the vicinity around the diagonal.
142 : * @return The dynamic time warping distance.
143 : */
144 : template <typename data_t = float>
145 18 : data_t dtwBanded(const std::vector<data_t> &x, const std::vector<data_t> &y, int band = settings::DEFAULT_BAND_LENGTH)
146 : {
147 18 : if (band < 0) return dtwFull_L<data_t>(x, y); //<! Band is negative, so returning full dtw.
148 :
149 8 : thread_local arma::Mat<data_t> C;
150 8 : constexpr data_t maxValue = std::numeric_limits<data_t>::max();
151 :
152 8 : const auto &[short_vec, long_vec] = (x.size() < y.size()) ? std::tie(x, y) : std::tie(y, x);
153 8 : const int m_short(short_vec.size()), m_long(long_vec.size());
154 :
155 8 : C.resize(m_long, m_short);
156 8 : C.fill(maxValue);
157 :
158 44 : auto distance = [](data_t xi, data_t yi) { return std::abs(xi - yi); };
159 :
160 8 : if ((m_short == 0) || (m_long == 0)) return maxValue;
161 8 : if ((m_short == 1) || (m_long == 1)) return dtwFull_L<data_t>(x, y); //<! Band is meaningless when one length is one, so return full DTW
162 8 : if (m_long <= (band + 1)) return dtwFull_L<data_t>(x, y); //<! Band is bigger than long side so full DTW can be done.
163 :
164 :
165 4 : const double slope = static_cast<double>(m_long - 1) / (m_short - 1);
166 4 : const auto window = std::max((double)band, slope / 2);
167 :
168 16 : auto get_bounds = [slope, window](int x) {
169 12 : const auto y = slope * x;
170 12 : const int low = std::ceil(std::round(100 * (y - window)) / 100.0);
171 12 : const int high = std::floor(std::round(100 * (y + window)) / 100.0) + 1;
172 12 : return std::pair(low, high);
173 : };
174 :
175 4 : C(0, 0) = distance(long_vec[0], short_vec[0]);
176 :
177 : {
178 4 : const auto [lo, hi] = get_bounds(0);
179 12 : for (int i = 1; i < hi; ++i)
180 24 : C(i, 0) = C(i - 1, 0) + distance(long_vec[i], short_vec[0]);
181 : }
182 :
183 12 : for (int j = 1; j < m_short; j++) //<! Scan the short part!
184 : {
185 8 : const auto [lo, hi] = get_bounds(j);
186 8 : if (lo <= 0)
187 12 : C(0, j) = C(0, j - 1) + distance(long_vec[0], short_vec[j]);
188 :
189 8 : const auto high = std::min(hi, m_long);
190 36 : for (int i = std::max(lo, 1); i < high; ++i) {
191 112 : const auto minimum = std::min({ C(i - 1, j), C(i, j - 1), C(i - 1, j - 1) });
192 56 : C(i, j) = minimum + distance(long_vec[i], short_vec[j]);
193 : }
194 : }
195 :
196 8 : return C(m_long - 1, m_short - 1);
197 : }
198 : } // namespace dtwc
|