-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHT_04032020.py
190 lines (160 loc) · 6.69 KB
/
HT_04032020.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
:Author: Tomatis D.
:Date: 29/01/2018
:Company: CEA/DEN/DANS/DM2S/SERMA/LPEC
Implementation of the Hotelling transform, which is the
discrete version of the Karhunen-Loeve transform, according
to chap. 4 of [1].
References
----------
[1] SUN, Huifang et SHI, Yun Q. Image and Video Compression
for Multimedia Engineering: Fundamentals, Algorithms,
and standards. CRC press, 2008.
"""
import numpy as np
import warnings
# set the machine precision to compare to zero
numerical_zero=np.finfo(np.float32).eps
def HTI(Y, m, E):
"""Inverse Hotelling transform.
:param Y: real transformed matrix
:param m: real array with original average values (per row)
:param E: matrix with eigenvectors
:type Y: ndarray, 2D array of `float` type
:type m: ndarray, 1D array of `float` type
:type E: ndarray, 2D array of `float` type
:return Zp: anti-transformed matrix
:rtype Zp: ndarray, 2D array of `float` type
"""
mtype, n = type(m[0]), Y.shape[1]
return (np.dot(E.T, Y) + np.outer(m, np.ones((n), dtype=mtype)))
def HT(Z, t=-.01, c=None, chk=False, d=None, force_SVD=False):
"""Apply the direct Hotelling transform to the input
matrix Z. Note that eigenvectors and the corresponding
eigenvectors (arranged per column) are set in decreasing
order. The threshold for the allowed minimum mean squared
reconstruction error (MSRE) is the input t, and it is equal
to the sum of the discarded variances, i.e. the eigenvalues);
it is given here as a fraction to 1, thus all variances are
compared to their sum. Alternatively, an input fixed number
of terms can be selected.
This transform can also achieve the PCA, and it is a particular
case of SVD.
.. note:: t=0. disables the truncation after the transform.
:param Z: input real 2D matrixM
:param t: threshold for the MSRE
:param c: number of coding terms
:param chk: verify that the sum of squared error equals the
sum of discarded variances
:param d: number of decimals used to check the property
:param force_SVD: force the use of SVD
:type Z: ndarray, 2D array of `float` type
:type t: float
:type c: int
:type chk: bool
:type d: int
:type force_SVD: bool
:return Y: transformed matrix (with column-wise vectors),
:return m: vector of average values per row
:return Et: truncated matrix of eigenvectors
:rtype Y: ndarray, 2D array of `float` type
:rtype m: ndarray, 1D array of `float` type
:rtype Ep: ndarray, 2D array of `float` type
"""
n1, n2 = Z.shape
nm = min(n1, n2)
if c is not None:
if type(c) != int:
raise ValueError("input c must be integer")
if not (0 < c <= nm):
raise ValueError("invalid number of coding terms")
if (t > 0.):
warnings.warn(("Found both threshold and fixed coding"+
". Fixed coding with {:d} terms will be"+
"used.").format(c))
mtype = type(Z[0, 0])
# compute the average values per row
m = np.mean(Z, axis=1)
M = Z - np.outer(m, np.ones((n2), dtype=mtype))
# eigen-decomposition on the symmetric and positive semi-definite
# matrix np.dot(M, M.T). This can be performed by SVD on the matrix
# M itself. The eigenvalues in L will then be the squared singular
# values of M, and E will contain the left-singular vectors.
if n1 > n2 or force_SVD:
E, S, V = np.linalg.svd(M, full_matrices=False)
L = S**2; del V
else:
# the division of the matrix np.dot(M, M.T) by n1 is neglected here
# note that the eigenvectors are already ortho-normalized,
# i.e. np.dot(E.T, E) == np.diag(n1)
L, E = np.linalg.eigh( np.dot(M, M.T) )
# C = np.dot(M, M.T) # covariance matrix (times n1)
# np.testing.assert_array_almost_equal(C,
# np.dot(Z, Z.T) - np.outer(m, m) * n1,
# err_msg="property of covariance matrix not verified")
# sort the eigenvalues and corresponding eigenvectors
idx = L.argsort()[::-1]
# and reorder the eigenpairs
L, E = L[idx], E[:,idx]
# remove possible rounding-off error
#if np.any( L < 0.): L = np.where( np.isclose(L, 0.), 0., L )
# note that L contains variances, and it is expected as a
# positive array. Any negative element must be very small.
if len(L) != nm:
raise RuntimeError("unexpected nb. of eigenvalues")
Y = lambda E, M: np.dot(E, M)
if c is None:
# threshold coding when outputing
Lr = L / np.sum(L)
# print('check:', Lr); print(np.cumsum(Lr[::-1]))
s, c = 0., nm - 1
while (s <= t) and (c > -1):
# the following abs is redundant if the negative-value-fix
# above is enabled.
s += abs(Lr[c])
c -= 1
c += 2
#else apply the input fixed coding by truncation
Ep = E[:,:c].T
c_hat = (n1 * n2 - n1) // (n1 + n2)
# if ((t > 0.) and (c is not None)) and (c > c_hat):
if c > c_hat:
warnings.warn("Negative savings, L=%d, I=%d." % (c, nm),
RuntimeWarning)
if chk:
err = Z - HTI(Y(Ep, M), m, Ep)
sse = np.linalg.norm(err, ord='fro')**2
if d is None:
d = np.finfo(type(sse)).precision - 1
elif not isinstance(d, int):
raise ValueError("input d must be integer")
sum_discarded_vars = np.sum(L[c:])
#if n1 != n2: # mistaken
# sum_discarded_vars *= n2 / n1
np.testing.assert_almost_equal(sse, sum_discarded_vars, decimal=d,
err_msg="The sum of squared errors must be equal "+
"to the sum of the discared variances (eigs)")
return Y(Ep, M), m, Ep
if __name__ == "__main__":
dim = 10
A = np.random.rand(dim, dim)
A = np.load('../yama_uox/Z.npy').astype(np.float64)
# test the HT without threshold coding (or truncation of terms)
np.testing.assert_array_almost_equal(A, HTI( *list(HT(A, chk=True)) ), \
err_msg="consistency check of direct-inverse Hotelling Transform")
# verify threshold coding
assert np.linalg.norm(A - HTI( *list(HT(A, t=0.10)) ), ord='fro') >= \
np.linalg.norm(A - HTI( *list(HT(A, t=0.01)) ), ord='fro'), \
"Higher threshold provides better accuracy after reconstruction?!?"
try:
Y, m, Ep = HT(A, t=0.01, chk=True, d=6)
except AssertionError as e:
print(e)
A = np.load('../yama_uox/Z.npy').astype(np.float64)
try:
Y, m, Ep = HT(A, t=0.0005, chk=True, d=6)
except AssertionError as e:
print("Test large rectangular matrix failed.")
print(e)