-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsax_demo.m
145 lines (117 loc) · 4.94 KB
/
sax_demo.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
% Copyright and terms of use (DO NOT REMOVE):
% The code is made freely available for non-commercial uses only, provided that the copyright
% header in each file not be removed, and suitable citation(s) (see below) be made for papers
% published based on the code.
%
% The code is not optimized for speed, and we are not responsible for any errors that might
% occur in the code.
%
% The copyright of the code is retained by the authors. By downloading/using this code you
% agree to all the terms stated above.
%
% Lin, J., Keogh, E., Lonardi, S. & Chiu, B.
% "A Symbolic Representation of Time Series, with Implications for Streaming Algorithms."
% In proceedings of the 8th ACM SIGMOD Workshop on Research Issues in Data Mining and
% Knowledge Discovery. San Diego, CA. June 13, 2003.
%
%
% Lin, J., Keogh, E., Patel, P. & Lonardi, S.
% "Finding Motifs in Time Series". In proceedings of the 2nd Workshop on Temporal Data Mining,
% at the 8th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining.
% Edmonton, Alberta, Canada. July 23-26, 2002
%
%
% This code provides a step-by-step demo of SAX (Symbolic Aggregate approXimation). Press enter
% for the next step.
%
% usage: [str] = sax_demo
% [str] = sax_demo(data)
%
% Copyright (c) 2003, Eamonn Keogh, Jessica Lin, Stefano Lonardi, Pranav Patel, Li Wei. All rights reserved.
%
function [sax_string] = sax_demo(data)
if nargin == 0
data_len = 100;
data = random_walk(data_len);
else
data_len = length(data);
end
nseg = 10;
alphabet_size = 10;
if alphabet_size > 10
disp('Currently alphabet_size cannot be larger than 10. Please update the breakpoint table if you wish to do so');
return;
end
data_len
nseg
% nseg must be divisible by data length
if (mod(data_len, nseg))
disp('nseg must be divisible by the data length. Aborting ');
return;
end
% win_size is the number of data points on the raw time series that will be mapped to a
% single symbol
win_size = floor(data_len/nseg);
data = (data - mean(data))/std(data);
plot(data);
% special case: no dimensionality reduction
if data_len == nseg
PAA = data;
% Convert to PAA. Note that this line is also in timeseries2symbol, which will be
% called later. So it's redundant here and is for the purpose of plotting only.
else
PAA = [mean(reshape(data,win_size,nseg))];
end
% plot the PAA segments
PAA_plot = repmat(PAA', 1, win_size);
PAA_plot = reshape(PAA_plot', 1, data_len)';
hold on;
plot(PAA_plot,'r');
% map the segments to string
str = timeseries2symbol(data, data_len, nseg, alphabet_size);
% get the breakpoints
switch alphabet_size
case 2, cutlines = [0];
case 3, cutlines = [-0.43 0.43];
case 4, cutlines = [-0.67 0 0.67];
case 5, cutlines = [-0.84 -0.25 0.25 0.84];
case 6, cutlines = [-0.97 -0.43 0 0.43 0.97];
case 7, cutlines = [-1.07 -0.57 -0.18 0.18 0.57 1.07];
case 8, cutlines = [-1.15 -0.67 -0.32 0 0.32 0.67 1.15];
case 9, cutlines = [-1.22 -0.76 -0.43 -0.14 0.14 0.43 0.76 1.22];
case 10, cutlines = [-1.28 -0.84 -0.52 -0.25 0. 0.25 0.52 0.84 1.28];
otherwise, disp('WARNING:: Alphabet size too big');
end
% draw the gray guide lines in the background
guidelines = repmat(cutlines', 1, data_len);
plot(guidelines', 'color', [0.8 0.8 0.8]);
hold on
color = {'g', 'y', 'm', 'c', 'b'};
symbols = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'};
% high-light the segments and assign them to symbols
for i = 1 : nseg
% get the x coordinates for the segments
x_start = (i-1) * win_size + 1;
x_end = x_start + win_size - 1;
x_mid = x_start + (x_end - x_start) / 2;
% color-code each segment
colorIndex = rem(str(i),length(color))+1;
% draw the segments
plot([x_start:x_end],PAA_plot([x_start:x_end]), 'color', color{colorIndex}, 'linewidth', 3);
% show symbols
text(x_mid, PAA_plot(x_start), symbols{str(i)}, 'fontsize', 14);
end
sax_string = symbols(str);
%end
%------------------------------------------------------------------------------------------
% Make random walk data
%------------------------------------------------------------------------------------------
function r = random_walk(n)
% r = random_walk(n)
% n: length of random walk time series
%
% This is the continuous analog of symmetric random walk, each increment y(s+t)-y(s) is
% Gaussian with distribution N(0,t^2) and increments over disjoint intervals are independent.
% It is typically simulated as an approximating random walk in discrete time.
sigma=1;
r=[0 cumsum(sigma.*randn(1,n-1))]; % standard Brownian motion