Skip to content

Commit

Permalink
add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Leo-Simpson committed Dec 3, 2020
1 parent da168d8 commit 1e677db
Show file tree
Hide file tree
Showing 22 changed files with 1,824 additions and 848 deletions.
1 change: 0 additions & 1 deletion classo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
random_data,
csv_to_np,
mat_to_np,
rescale,
clr,
theoretical_lam,
to_zarr,
Expand Down
21 changes: 17 additions & 4 deletions classo/compact_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@ def Classo(
get_lambdamax = False,
true_lam = False,
e = None,
rho_classification = -0.0,
rho_classification = -1.0,
w = None,
intercept = False,
return_sigm = True
):

if w is not None:
Expand All @@ -52,6 +53,7 @@ def Classo(
if e is None or e == len(matrices[0]) / 2:
r = 1.0
pb = problem_R3(matrices, meth)
e = len(matrices[0]) / 2
else:
r = np.sqrt(2 * e / len(matrices[0]))
pb = problem_R3((matrices[0] * r, matrices[1], matrices[2] * r), meth)
Expand All @@ -60,7 +62,6 @@ def Classo(
beta, s = Classo_R3(pb, lam / lambdamax)
else:
beta, s = Classo_R3(pb, lam)
s = s / np.sqrt(e)

if intercept:
betaO = ybar - np.vdot(Xbar, beta)
Expand All @@ -73,6 +74,7 @@ def Classo(
if e is None or e == len(matrices[0]):
r = 1.0
pb = problem_R4(matrices, meth, rho, intercept = intercept)
e = len(matrices[0])
else:
r = np.sqrt(e / len(matrices[0]))
pb = problem_R4(
Expand Down Expand Up @@ -101,6 +103,9 @@ def Classo(

elif typ == "C2":

assert set(matrices[2]).issubset({1, -1})


lambdamax = h_lambdamax(
matrices, rho_classification, typ="C2", intercept = intercept
)
Expand All @@ -118,6 +123,8 @@ def Classo(

elif typ == "C1":

assert set(matrices[2]).issubset({1, -1})

lambdamax = h_lambdamax(matrices, 0, typ="C1", intercept = intercept)
if true_lam:
out = solve_path(
Expand Down Expand Up @@ -159,7 +166,7 @@ def Classo(
else:
beta = beta / w

if typ in ["R3", "R4"]:
if typ in ["R3", "R4"] and return_sigm:
if get_lambdamax:
return (lambdamax, beta, s)
else:
Expand All @@ -181,7 +188,7 @@ def pathlasso(
true_lam = False,
e = None,
return_sigm = False,
rho_classification = 0.0,
rho_classification = -1.0,
w = None,
intercept = False,
):
Expand Down Expand Up @@ -257,6 +264,8 @@ def pathlasso(

elif typ == "C2":

assert set(matrices[2]).issubset({1, -1})

lambdamax = h_lambdamax(
matrices, rho_classification, typ="C2", intercept = intercept
)
Expand All @@ -273,6 +282,8 @@ def pathlasso(

elif typ == "C1":

assert set(matrices[2]).issubset({1, -1})

lambdamax = h_lambdamax(matrices, 0, typ="C1", intercept = intercept)
if true_lam:
lambdass = [lamb / lambdamax for lamb in lambdass]
Expand All @@ -297,6 +308,7 @@ def pathlasso(
if intercept:
BETA = np.array([[ybar - Xbar.dot(beta)] + list(beta) for beta in BETA])


real_path = [lam * lambdamax for lam in lambdass]

if w is not None:
Expand All @@ -307,6 +319,7 @@ def pathlasso(

BETA = np.array([beta / ww for beta in BETA])


if typ in ["R3", "R4"] and return_sigm:
return (np.array(BETA), real_path, S)
return (np.array(BETA), real_path)
213 changes: 104 additions & 109 deletions classo/misc_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,37 +43,10 @@


"""functions required in solver :
rescale, theoretical_lam, min_LS, affichage, check_size, tree_to_matrix
theoretical_lam, min_LS, affichage, check_size, tree_to_matrix
"""


def rescale(matrices):
"""Function that rescale the matrix and returns its scale
Substract the mean of y, then divides by its norm. Also divide each colomn of X by its norm.
This will change the solution, not only by scaling it, because then the L1 norm will affect every component equally (and not only the variables with big size)
Args:
matrices (tuple) : tuple of three ndarray matrices corresponding to (X,C,y)
Returns:
tuple : tuple of the three corresponding matrices after normalization
tuple : tuple of the three information one need to recover the initial data : lX (list of initial colomn-norms of X), ly (initial norm of y), my (initial mean of y)
"""
(X, C, y) = matrices

my = sum(y) / len(y)
lX = [LA.norm(X[:, j]) for j in range(len(X[0]))]
ly = LA.norm(y - my * np.ones(len(y)))
Xn = np.array(
[[X[i, j] / (lX[j]) for j in range(len(X[0]))] for i in range(len(X))]
)
yn = np.array([(y[j] - my) / ly for j in range(len(y))])
Cn = np.array(
[[C[i, j] * ly / lX[j] for j in range(len(X[0]))] for i in range(len(C))]
)
return ((Xn, Cn, yn), (lX, ly, my))


def theoretical_lam(n, d):
Expand Down Expand Up @@ -131,26 +104,16 @@ def affichage(
path,
title = " ",
labels = False,
pix = False,
xlabel = " ",
ylabel = " ",
naffichage = 10,
):
BETAS = np.array(LISTE_BETA)
l_index = influence(BETAS, naffichage)
plt.figure(figsize = (10, 3), dpi = 80)
if pix == "path":
plt.plot(path, [0] * len(path), "r+")
plot_betai(labels, l_index, path, BETAS)
plt.title(title), plt.legend(loc = 4, borderaxespad = 0.0)
plt.xlabel(xlabel), plt.ylabel(ylabel)
if type(pix) == bool and pix:
plt.matshow(
[
[(abs(LISTE_BETA[i][j]) > 1e-2) for i in range(len(LISTE_BETA))]
for j in range(len(LISTE_BETA[0]))
]
), plt.show()



def check_size(X, y, C):
Expand All @@ -175,47 +138,6 @@ def check_size(X, y, C):
return X2, y2, C2


def tree_to_matrix(tree, label, with_repr = False):
# to do here : given a skbio tree and the beta-labels in a given order, return the matrix A and the new labels corresponding
dicti = dict()
d = len(label)
LEAVES = [tip.name for tip in tree.tips()]
order = (
[]
) # list that will give the order in which the codes are added in the dicti, such that it will be easy to remove similar nodes
for i in range(d):
name_leaf = label[i]
dicti[name_leaf] = np.zeros(d)
dicti[name_leaf][i] = 1
order.append(name_leaf)
if name_leaf in LEAVES:
for n in tree.find(name_leaf).ancestors():
ancest = n.name
if ancest[-1] != "_":
if ancest not in dicti:
dicti[ancest] = np.zeros(d)
order.append(ancest)
dicti[ancest][i] = 1

L, label2, tree_repr = [], [], []

if with_repr:
for n, l in tree.to_taxonomy():
nam = n.name
if nam in label:
tree_repr.append((nam, l))

for node in tree.levelorder():
nam = node.name
if nam in dicti and nam not in label2:
label2.append(nam)
L.append(dicti[nam])

to_keep = remove_same_vect(L, label2, order)

return np.array(L)[to_keep].T, np.array(label2)[to_keep], tree_repr


"""functions required in init() :
random_data, csv_to_np, mat_to_np, clr, theoretical_lam, to_zarr
"""
Expand Down Expand Up @@ -394,29 +316,26 @@ def to_zarr(obj, name, root, first = True):


"""
misc of compact func
misc of solve_R.. functions
"""


def unpenalized(matrix, eps = 1e-3, intercept = False):
def unpenalized(cmatrices, intercept = False):

if intercept:
A1, C1, y = matrix
A1, C1, y = cmatrices
A = np.concatenate([np.ones((len(A1), 1)), A1], axis=1)
C = np.concatenate([np.zeros((len(C1), 1)), C1], axis=1)

else:
A, C, y = matrix
A, C, y = cmatrices

M1 = np.concatenate([A.T.dot(A), C.T], axis=1)
M2 = np.concatenate([C, np.zeros((len(C), len(C)))], axis=1)
M = np.concatenate([M1, M2], axis=0)
b = np.concatenate([A.T.dot(y), np.zeros(len(C))])

try:
return LA.inv(M).dot(b)[: len(A[0])]
except LA.LinAlgError:
return LA.inv(M + eps * np.eye(len(M))).dot(b)[: len(A[0])]
sol = LA.lstsq( M, b, rcond=None)[0]
return sol[: len(A[0])]


"""
Expand Down Expand Up @@ -450,8 +369,59 @@ def normalize(lb, lna, ly):
return lb


def denorm(B, lna, ly):
return np.array([ly * B[j] / (np.sqrt(len(B)) * lna[j]) for j in range(len(B))])
def proj_c(M, d):
# Compute I - C^t (C.C^t)^-1 . C : the projection on Ker(C)
if LA.matrix_rank(M) == 0:
return np.eye(d)
return np.eye(d) - LA.multi_dot([M.T, np.linalg.inv(M.dot(M.T)), M])


def remove_same_vect(L, label, order):
K = len(L)
to_keep = np.array([True] * K)
j = label.index(order[0])
col = L[j]
for i in range(K - 1):
new_j = label.index(order[i + 1])
new_col = L[new_j]
if np.array_equal(col, new_col):
to_keep[new_j] = False
else:
j, col = new_j, new_col

return to_keep



"""
def rescale(matrices):
""Function that rescale the matrix and returns its scale
Substract the mean of y, then divides by its norm. Also divide each colomn of X by its norm.
This will change the solution, not only by scaling it, because then the L1 norm will affect every component equally (and not only the variables with big size)
Args:
matrices (tuple) : tuple of three ndarray matrices corresponding to (X,C,y)
Returns:
tuple : tuple of the three corresponding matrices after normalization
tuple : tuple of the three information one need to recover the initial data : lX (list of initial colomn-norms of X), ly (initial norm of y), my (initial mean of y)
""
(X, C, y) = matrices
my = sum(y) / len(y)
lX = [LA.norm(X[:, j]) for j in range(len(X[0]))]
ly = LA.norm(y - my * np.ones(len(y)))
Xn = np.array(
[[X[i, j] / (lX[j]) for j in range(len(X[0]))] for i in range(len(X))]
)
yn = np.array([(y[j] - my) / ly for j in range(len(y))])
Cn = np.array(
[[C[i, j] * ly / lX[j] for j in range(len(X[0]))] for i in range(len(C))]
)
return ((Xn, Cn, yn), (lX, ly, my))
def hub(r, rho):
Expand All @@ -478,24 +448,49 @@ def L_H(A, y, lamb, x, rho):
return hub(A.dot(x) - y, rho) + lamb * LA.norm(x, 1)
def proj_c(M, d):
# Compute I - C^t (C.C^t)^-1 . C : the projection on Ker(C)
if LA.matrix_rank(M) == 0:
return np.eye(d)
return np.eye(d) - LA.multi_dot([M.T, np.linalg.inv(M.dot(M.T)), M])
def denorm(B, lna, ly):
return np.array([ly * B[j] / (np.sqrt(len(B)) * lna[j]) for j in range(len(B))])
def remove_same_vect(L, label, order):
K = len(L)
to_keep = np.array([True] * K)
j = label.index(order[0])
col = L[j]
for i in range(K - 1):
new_j = label.index(order[i + 1])
new_col = L[new_j]
if np.array_equal(col, new_col):
to_keep[new_j] = False
else:
j, col = new_j, new_col
def tree_to_matrix(tree, label, with_repr = False):
# to do here : given a skbio tree and the beta-labels in a given order, return the matrix A and the new labels corresponding
dicti = dict()
d = len(label)
LEAVES = [tip.name for tip in tree.tips()]
order = (
[]
) # list that will give the order in which the codes are added in the dicti, such that it will be easy to remove similar nodes
for i in range(d):
name_leaf = label[i]
dicti[name_leaf] = np.zeros(d)
dicti[name_leaf][i] = 1
order.append(name_leaf)
if name_leaf in LEAVES:
for n in tree.find(name_leaf).ancestors():
ancest = n.name
if ancest[-1] != "_":
if ancest not in dicti:
dicti[ancest] = np.zeros(d)
order.append(ancest)
dicti[ancest][i] = 1
return to_keep
L, label2, tree_repr = [], [], []
if with_repr:
for n, l in tree.to_taxonomy():
nam = n.name
if nam in label:
tree_repr.append((nam, l))
for node in tree.levelorder():
nam = node.name
if nam in dicti and nam not in label2:
label2.append(nam)
L.append(dicti[nam])
to_keep = remove_same_vect(L, label2, order)
return np.array(L)[to_keep].T, np.array(label2)[to_keep], tree_repr
"""
Loading

0 comments on commit 1e677db

Please sign in to comment.