-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHT.py
216 lines (183 loc) · 7.48 KB
/
HT.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
:Author: Tomatis D.
:Date: 29/01/2018
:Company: CEA/DEN/DANS/DM2S/SERMA/LPEC
Implementation of the Hotelling transform, which is the
discrete version of the Karhunen-Loeve transform, according
to chap. 4 of [1].
References
----------
[1] SUN, Huifang et SHI, Yun Q. Image and Video Compression
for Multimedia Engineering: Fundamentals, Algorithms,
and standards. CRC press, 2008.
"""
import numpy as np
import warnings
# set the machine precision to compare to zero
numerical_zero=np.finfo(np.float32).eps
def HTI(Y, t, E):
"""Inverse Hotelling transform.
:param Y: real transformed matrix
:param t: tuple of real arrays with original average
and standard deviation values (per row)
:param E: matrix with eigenvectors
:type Y: ndarray, 2D array of `float` type
:type t: tuple of 2 ndarray, both of `float` type
:type E: ndarray, 2D array of `float` type
:return Zp: anti-transformed matrix
:rtype Zp: ndarray, 2D array of `float` type
"""
m, std = t
mtype, n = type(m[0]), Y.shape[1]
Z = (np.dot(E.T, Y) + np.outer(m, np.ones((n), dtype=mtype)))
return Z if std is None else np.dot(np.diag(std), Z)
def get_c_hat(n1, n2, std=None):
if std is None:
c_hat = (n1 * n2 - n1) // (n1 + n2)
else:
c_hat = (n1 * n2 - n1 * 2) // (n1 + n2)
return c_hat
def HT(Z, t=-.01, c=None, chk=False, d=None, force_SVD=False,
whiten=False):
"""Apply the direct Hotelling transform to the input
matrix Z. Note that eigenvectors and the corresponding
eigenvectors (arranged per column) are set in decreasing
order. The threshold for the allowed minimum mean squared
reconstruction error (MSRE) is the input t, and it is equal
to the sum of the discarded variances, i.e. the eigenvalues);
it is given here as a fraction to 1, thus all variances are
compared to their sum. Alternatively, an input fixed number
of terms can be selected.
This transform can also achieve the PCA, and it is a particular
case of SVD.
.. note:: t=0. disables the truncation after the transform.
:param Z: input real 2D matrixM
:param t: threshold for the MSRE
:param c: number of coding terms
:param chk: verify that the sum of squared error equals the
sum of discarded variances
:param d: number of decimals used to check the property
:param force_SVD: force the use of SVD
:param whiten: whiten features by std division
:type Z: ndarray, 2D array of `float` type
:type t: float
:type c: int
:type chk: bool
:type d: int
:type force_SVD: bool
:type whiten: bool
:return Y: transformed matrix (with column-wise vectors),
:return m: vectors of average values (and std) per row
:return Et: truncated matrix of eigenvectors
:rtype Y: ndarray, 2D array of `float` type
:rtype m: tuple of ndarray, 2 1D arrays of `float` type
:rtype Ep: ndarray, 2D array of `float` type
"""
n1, n2 = Z.shape
nm = min(n1, n2)
if c is not None:
if type(c) != int:
raise ValueError("input c must be integer")
if not (0 < c <= nm):
raise ValueError("invalid number of coding terms")
if (t > 0.):
warnings.warn(("Found both threshold and fixed coding"+
". Fixed coding with {:d} terms will be"+
"used.").format(c))
mtype = type(Z[0, 0])
if whiten:
std = np.std(Z, axis=1) # attempt to improve accuracy
Z = np.dot(np.diag(1. / std), Z)
else:
std = None
# compute the average values per row
m = np.mean(Z, axis=1)
M = Z - np.outer(m, np.ones((n2), dtype=mtype))
# eigen-decomposition on the symmetric and positive semi-definite
# matrix np.dot(M, M.T). This can be performed by SVD on the matrix
# M itself. The eigenvalues in L will then be the squared singular
# values of M, and E will contain the left-singular vectors.
if n1 > n2 or force_SVD:
E, S, V = np.linalg.svd(M, full_matrices=False)
L = S**2; del V
else:
# the division of the matrix np.dot(M, M.T) by n1 is neglected here
# note that the eigenvectors are already ortho-normalized,
# i.e. np.dot(E.T, E) == np.diag(n1)
L, E = np.linalg.eigh( np.dot(M, M.T) )
# C = np.dot(M, M.T) # covariance matrix (times n1)
# np.testing.assert_array_almost_equal(C,
# np.dot(Z, Z.T) - np.outer(m, m) * n1,
# err_msg="property of covariance matrix not verified")
# sort the eigenvalues and corresponding eigenvectors
idx = L.argsort()[::-1]
# and reorder the eigenpairs
L, E = L[idx], E[:,idx]
# remove possible rounding-off error
#if np.any( L < 0.): L = np.where( np.isclose(L, 0.), 0., L )
# note that L contains variances, and it is expected as a
# positive array. Any negative element must be very small.
if len(L) != nm:
raise RuntimeError("unexpected nb. of eigenvalues")
Y = lambda E, M: np.dot(E, M)
if c is None:
# threshold coding when outputing
Lr = L / np.sum(L)
# print('check:', Lr); print(np.cumsum(Lr[::-1]))
s, c = 0., nm - 1
while (s <= t) and (c > -1):
# the following abs is redundant if the negative-value-fix
# above is enabled.
s += abs(Lr[c])
c -= 1
c += 2
#else apply the input fixed coding by truncation
Ep = E[:,:c].T
c_hat = get_c_hat(n1, n2, std)
if c_hat < c < nm:
warnings.warn("Negative savings, L=%d, I=%d." % (c - 1, nm),
RuntimeWarning)
if chk:
# if whiten, std must be None because Z is already divided
# here by std
err = Z - HTI(Y(Ep, M), (m, None), Ep)
sse = np.linalg.norm(err, ord='fro')**2
if d is None:
d = np.finfo(type(sse)).precision - 1
elif not isinstance(d, int):
raise ValueError("input d must be integer")
sum_discarded_vars = np.sum(L[c:])
#if n1 != n2: # mistaken
# sum_discarded_vars *= n2 / n1
np.testing.assert_almost_equal(sse, sum_discarded_vars, decimal=d,
err_msg="The sum of squared errors must be equal "+
"to the sum of the discared variances (eigs)")
return Y(Ep, M), (m, std), Ep
if __name__ == "__main__":
dim = 10
A = np.random.rand(dim, dim)
A = np.load('../yama_uox/Z.npy').astype(np.float64)
# test the HT without threshold coding (or truncation of terms)
np.testing.assert_array_almost_equal(A, HTI( *list(HT(A, chk=True)) ), \
err_msg="consistency check of direct-inverse Hotelling Transform")
# verify threshold coding
assert np.linalg.norm(A - HTI( *list(HT(A, t=0.10)) ), ord='fro') >= \
np.linalg.norm(A - HTI( *list(HT(A, t=0.01)) ), ord='fro'), \
"Higher threshold provides better accuracy after reconstruction?!?"
try:
Y, (m, std), Ep = HT(A, t=0.01, chk=True, d=6)
except AssertionError as e:
print(e)
A = np.load('../yama_uox/Z.npy').astype(np.float64)
try:
Y, t, Ep = HT(A, t=0.0005, chk=True, d=6)
except AssertionError as e:
print("Test large rectangular matrix failed.")
print(e)
try:
Y, t, Ep = HT(A, t=0.0005, chk=True, d=10, whiten=True)
except AssertionError as e:
print("Test large rectangular matrix failed.")
print(e)