diff --git a/benchmark/C1/bm-C1-plot.py b/benchmark/C1/bm-C1-plot.py new file mode 100644 index 0000000..1a1e45d --- /dev/null +++ b/benchmark/C1/bm-C1-plot.py @@ -0,0 +1,58 @@ +import numpy as np +import matplotlib.pyplot as plt + +import os +my_path = os.path.dirname(__file__) + +output = os.path.join(my_path, "output/") + + +data = np.load(os.path.join(my_path, 'bm-C1.npz')) + +labels = [str(size) for size in data["SIZES"]] + + +fig = plt.figure() + +for name in ["T_pa", "T_cvx"]: + T = data[name] + N_size, N_data = T.shape + mean = np.mean(T, axis=1) + std = np.std(T, axis=1)/np.sqrt(N_data) + label = name[2:] + plt.errorbar(range(N_size), mean, yerr=std, label=label) + +plt.title("Running time") +plt.xticks(range(N_size), labels) +plt.legend() +plt.savefig(os.path.join(output, "bm-C1-times.png")) +plt.show() + + +for name in ["L_pa", "L_cvx"]: + L = data[name] - data["L_pa"] + N_size, N_data = L.shape + mean = np.mean(L, axis=1) + std = np.std(L, axis=1)/np.sqrt(N_data) + label = name[2:] + plt.errorbar(range(N_size), mean, yerr=std, label=label) + +plt.title("Value of loss function") +plt.xticks(range(N_size), labels) +plt.legend() +plt.savefig(os.path.join(output, "bm-C1-losses.png")) +plt.show() + +for name in ["C_pa", "C_cvx"]: + L = data[name] + N_size, N_data = L.shape + mean = np.mean(L, axis=1) + std = np.std(L, axis=1)/np.sqrt(N_data) + label = name[2:] + plt.errorbar(range(N_size), mean, yerr=std, label=label) + +plt.title("Value of norm(Cbeta)") +plt.xticks(range(N_size), labels) +plt.legend() +plt.savefig(os.path.join(output, "bm-C1-constraint.png")) +plt.show() \ No newline at end of file diff --git a/benchmark/C1/bm-C1.py b/benchmark/C1/bm-C1.py new file mode 100644 index 0000000..ed3b163 --- /dev/null +++ b/benchmark/C1/bm-C1.py @@ -0,0 +1,100 @@ +import numpy as np +import numpy.linalg as LA +from classo import classo_problem, random_data +import cvxpy as cp +from time import time + +import os +my_path = os.path.dirname(__file__) + +l = [1, 2, 5, 7] + + + +def loss(X, y, lam, beta): + lamb = lam*2*LA.norm(X.T.dot(y),np.infty) + return np.sum((X.dot(beta) - y)**2) + lamb*np.sum(abs(beta)) + + +d_nonzero = 5 +sigma = 0.5 +lam = 0.1 + +N_per_data = 5 +N_data = 10 +SIZES = [ + (50, 100), + (100, 100), + (100, 200), + (200, 200) +] + +N_sizes = len(SIZES) + +T_pa = np.zeros((N_sizes, N_data)) +L_pa = np.zeros((N_sizes, N_data)) +C_pa = np.zeros((N_sizes, N_data)) + +T_cvx = np.zeros((N_sizes, N_data)) +L_cvx = np.zeros((N_sizes, N_data)) +C_cvx = np.zeros((N_sizes, N_data)) + + + +for s in range(N_sizes): + + m, d = SIZES[s] + + + for i in range(N_data): + (X, C, y), sol = random_data(m, d, d_nonzero, 1, sigma, zerosum=True, seed=i) + lamb = lam*2*LA.norm(X.T.dot(y),np.infty) + + t0 = time() + # classo Path-Alg + b_pa = [] + for j in range(N_per_data): + problem = classo_problem(X, y, C) + problem.formulation.concomitant = False + problem.model_selection.StabSel = False + problem.model_selection.LAMfixed = True + problem.model_selection.LAMfixedparameters.rescaled_lam = True + problem.model_selection.LAMfixedparameters.lam = lam + problem.model_selection.LAMfixedparameters.numerical_method = 'Path-Alg' + problem.solve() + b_pa.append(problem.solution.LAMfixed.beta) + b_pa = np.array(b_pa) + + t1 = time() + + # cvx + b_cvx = [] + for j in range(N_per_data): + beta = cp.Variable(d) + objective, constraints = cp.Minimize(cp.sum_squares(X@beta-y)+ lamb*cp.norm(beta, 1)), [C@beta == 0] + prob = cp.Problem(objective, constraints) + result = prob.solve(warm_start = False, eps_abs = 1e-5) + b_cvx.append(beta.value) + b_cvx = np.array(b_cvx) + + t2 = time() + + + T_pa[s, i] = (t1 - t0) / N_per_data + L_pa[s, i] = loss(X, y, lam, np.mean(b_pa, axis=0)) + C_pa[s, i] = np.linalg.norm(C.dot(np.mean(b_pa, axis=0))) + + T_cvx[s, i] = (t2 - t1) / N_per_data + L_cvx[s, i] = loss(X, y, lam, np.mean(b_cvx, axis=0)) + C_cvx[s, i] = np.linalg.norm(C.dot(np.mean(b_cvx, axis=0))) + +np.savez( + os.path.join(my_path, 'bm-C1.npz'), + T_pa = T_pa, + L_pa = L_pa, + C_pa = C_pa, + T_cvx = T_cvx, + L_cvx = L_cvx, + C_cvx = C_cvx, + SIZES = np.array(SIZES) +) diff --git a/benchmark/C2/bm-C2-plot.py b/benchmark/C2/bm-C2-plot.py new file mode 100644 index 0000000..8b0e0ac --- /dev/null +++ b/benchmark/C2/bm-C2-plot.py @@ -0,0 +1,59 @@ +import numpy as np +import matplotlib.pyplot as plt + + +import os +my_path = os.path.dirname(__file__) + +output = os.path.join(my_path, "output/") + +data = np.load(os.path.join(my_path, 'bm-C2.npz')) + +labels = [str(size) for size in data["SIZES"]] + + + +fig = plt.figure() + +for name in ["T_pa", "T_cvx"]: + T = data[name] + N_size, N_data = T.shape + mean = np.mean(T, axis=1) + std = np.std(T, axis=1)/np.sqrt(N_data) + label = name[2:] + plt.errorbar(range(N_size), mean, yerr=std, label=label) + +plt.title("Running time") +plt.xticks(range(N_size), labels) +plt.legend() +plt.savefig(os.path.join(output, "bm-C2-times.png")) +plt.show() + + +for name in ["L_pa", "L_cvx"]: + L = data[name] - data["L_pa"] + N_size, N_data = L.shape + mean = np.mean(L, axis=1) + std = np.std(L, axis=1)/np.sqrt(N_data) + label = name[2:] + plt.errorbar(range(N_size), mean, yerr=std, label=label) + +plt.title("Value of loss function") +plt.xticks(range(N_size), labels) +plt.legend() +plt.savefig(os.path.join(output, "bm-C2-losses.png")) +plt.show() + +for name in ["C_pa", "C_cvx"]: + L = data[name] + N_size, N_data = L.shape + mean = np.mean(L, axis=1) + std = np.std(L, axis=1)/np.sqrt(N_data) + label = name[2:] + plt.errorbar(range(N_size), mean, yerr=std, label=label) + +plt.title("Value of norm(Cbeta)") +plt.xticks(range(N_size), labels) +plt.legend() +plt.savefig(os.path.join(output, "bm-C2-constraint.png")) +plt.show() \ No newline at end of file diff --git a/benchmark/C2/bm-C2.py b/benchmark/C2/bm-C2.py new file mode 100644 index 0000000..08067c9 --- /dev/null +++ b/benchmark/C2/bm-C2.py @@ -0,0 +1,119 @@ +import numpy as np +import numpy.linalg as LA +from classo import classo_problem, random_data +import cvxpy as cp +from time import time + +import os +my_path = os.path.dirname(__file__) + +l = [1, 2, 5, 7] + +def huber(r, rho): + F = abs(r) >= rho + h = r**2 + h[F] = 2*rho*abs(r)[F] - rho**2 + return h + + + +def loss(X, y, lam, rho, beta): + lamb = lam*2*LA.norm(X.T.dot(y),np.infty) + r = X.dot(beta) - y + return np.sum(huber(r,rho)) + lamb*np.sum(abs(beta)) + + +d_nonzero = 5 +sigma = 0.5 +lam = 0.1 + +N_per_data = 5 +N_data = 10 +SIZES = [ + (50, 100), + (100, 100), + (100, 200), + (200, 200) +] + + +N_sizes = len(SIZES) + + +T_pa = np.zeros((N_sizes, N_data)) +L_pa = np.zeros((N_sizes, N_data)) +C_pa = np.zeros((N_sizes, N_data)) + +T_cvx = np.zeros((N_sizes, N_data)) +L_cvx = np.zeros((N_sizes, N_data)) +C_cvx = np.zeros((N_sizes, N_data)) + + + +for s in range(N_sizes): + + m, d = SIZES[s] + + + for i in range(N_data): + (X, C, y), sol = random_data(m, d, d_nonzero, 1, sigma, zerosum=True, seed=i) + rho = 1.345 * np.sqrt(np.mean(y**2)) + lamb = lam*2*LA.norm(X.T.dot(y),np.infty) + + t0 = time() + # classo Path-Alg + b_pa = [] + for j in range(N_per_data): + problem = classo_problem(X, y, C) + problem.formulation.concomitant = False + problem.formulation.huber = True + problem.formulation.scale_rho = False + problem.formulation.rho = rho + problem.model_selection.StabSel = False + problem.model_selection.LAMfixed = True + problem.model_selection.LAMfixedparameters.rescaled_lam = True + problem.model_selection.LAMfixedparameters.lam = lam + problem.model_selection.LAMfixedparameters.numerical_method = 'Path-Alg' + problem.solve() + b_pa.append(problem.solution.LAMfixed.beta) + b_pa = np.array(b_pa) + + t1 = time() + # cvx + b_cvx = [] + for j in range(N_per_data): + beta = cp.Variable(d) + objective = cp.Minimize(cp.sum(cp.huber(X@beta-y, rho))+ lamb*cp.norm(beta, 1)) + constraints = [C@beta == 0] + prob = cp.Problem(objective, constraints) + result = prob.solve(warm_start = False, eps_abs = 1e-5) + b_cvx.append(beta.value) + b_cvx = np.array(b_cvx) + + t2 = time() + + + T_pa[s, i] = (t1 - t0) / N_per_data + L_pa[s, i] = loss(X, y, lam, rho, np.mean(b_pa, axis=0)) + C_pa[s, i] = np.linalg.norm(C.dot(np.mean(b_pa, axis=0))) + + T_cvx[s, i] = (t2 - t1) / N_per_data + L_cvx[s, i] = loss(X, y, lam, rho, np.mean(b_cvx, axis=0)) + C_cvx[s, i] = np.linalg.norm(C.dot(np.mean(b_cvx, axis=0))) + +np.savez( + os.path.join(my_path, 'bm-C2.npz'), + T_pa = T_pa, + L_pa = L_pa, + C_pa = C_pa, + T_pds = T_pds, + L_pds = L_pds, + C_pds = C_pds, + T_dr = T_dr, + L_dr = L_dr, + C_dr = C_dr, + T_cvx = T_cvx, + L_cvx = L_cvx, + C_cvx = C_cvx, + SIZES = np.array(SIZES) +) diff --git a/benchmark/Makefile b/benchmark/Makefile index 31f8893..b2013c8 100644 --- a/benchmark/Makefile +++ b/benchmark/Makefile @@ -1,9 +1,13 @@ # Minimal makefile for benchmark # -python3.9 R1/bm-R1.py -python3.9 R1/bm-R1-plot.py - -python3.9 R2/bm-R2.py -python3.9 R2/bm-R2-plot.py +benchmark: + python3.9 R1/bm-R1.py + python3.9 R1/bm-R1-plot.py + python3.9 R2/bm-R2.py + python3.9 R2/bm-R2-plot.py + python3.9 C1/bm-C1.py + python3.9 C1/bm-C1-plot.py + python3.9 C2/bm-C2.py + python3.9 C2/bm-C2-plot.py diff --git a/benchmark/R1/bm-R1-losses.png b/benchmark/R1/bm-R1-losses.png deleted file mode 100644 index bfe4a02..0000000 Binary files a/benchmark/R1/bm-R1-losses.png and /dev/null differ diff --git a/benchmark/R1/bm-R1-plot.py b/benchmark/R1/bm-R1-plot.py index 878e4e4..6e35a0e 100644 --- a/benchmark/R1/bm-R1-plot.py +++ b/benchmark/R1/bm-R1-plot.py @@ -1,11 +1,15 @@ import numpy as np import matplotlib.pyplot as plt -data = np.load('bm-R1.npz') +import os +my_path = os.path.dirname(__file__) + +output = os.path.join(my_path, "output/") + + +data = np.load(os.path.join(my_path, 'bm-R1.npz')) -print(data["SIZES"]) labels = [str(size) for size in data["SIZES"]] -print(labels) fig = plt.figure() @@ -18,9 +22,10 @@ label = name[2:] plt.errorbar(range(N_size), mean, yerr=std, label=label) +plt.title("Running time") plt.xticks(range(N_size), labels) plt.legend() -plt.savefig("bm-R1-times.png") +plt.savefig(os.path.join(output, "bm-R1-times.png")) plt.show() @@ -32,13 +37,14 @@ label = name[2:] plt.errorbar(range(N_size), mean, yerr=std, label=label) +plt.title("Value of loss function") plt.xticks(range(N_size), labels) plt.legend() -plt.savefig("bm-R1-losses.png") +plt.savefig(os.path.join(output, "bm-R1-losses.png")) plt.show() -for name in ["C_pa", "C_pds", "C_dr", "Cs_cvx"]: - L = data[name] - data["L_pa"] +for name in ["C_pa", "C_pds", "C_dr", "C_cvx"]: + L = data[name] N_size, N_data = L.shape mean = np.mean(L, axis=1) std = np.std(L, axis=1)/np.sqrt(N_data) @@ -48,5 +54,5 @@ plt.title("Value of norm(Cbeta)") plt.xticks(range(N_size), labels) plt.legend() -plt.savefig("bm-R1-constraint.png") +plt.savefig(os.path.join(output, "bm-R1-constraint.png")) plt.show() \ No newline at end of file diff --git a/benchmark/R1/bm-R1-times.png b/benchmark/R1/bm-R1-times.png deleted file mode 100644 index b4efa58..0000000 Binary files a/benchmark/R1/bm-R1-times.png and /dev/null differ diff --git a/benchmark/R1/bm-R1.npz b/benchmark/R1/bm-R1.npz index 51b4b9e..ba0fb96 100644 Binary files a/benchmark/R1/bm-R1.npz and b/benchmark/R1/bm-R1.npz differ diff --git a/benchmark/R1/bm-R1.py b/benchmark/R1/bm-R1.py index 220541f..16b332e 100644 --- a/benchmark/R1/bm-R1.py +++ b/benchmark/R1/bm-R1.py @@ -4,6 +4,9 @@ import cvxpy as cp from time import time +import os +my_path = os.path.dirname(__file__) + l = [1, 2, 5, 7] @@ -107,7 +110,7 @@ def loss(X, y, lam, beta): beta = cp.Variable(d) objective, constraints = cp.Minimize(cp.sum_squares(X@beta-y)+ lamb*cp.norm(beta, 1)), [C@beta == 0] prob = cp.Problem(objective, constraints) - result = prob.solve(warm_start = False,eps_abs = 1e-5) + result = prob.solve(warm_start = False, eps_abs = 1e-5) b_cvx.append(beta.value) b_cvx = np.array(b_cvx) @@ -131,7 +134,7 @@ def loss(X, y, lam, beta): C_cvx[s, i] = np.linalg.norm(C.dot(np.mean(b_cvx, axis=0))) np.savez( - 'bm-R1.npz', + os.path.join(my_path, 'bm-R1.npz'), T_pa = T_pa, L_pa = L_pa, C_pa = C_pa, diff --git a/benchmark/R1/output/bm-R1-constraint.png b/benchmark/R1/output/bm-R1-constraint.png new file mode 100644 index 0000000..a8dfc64 Binary files /dev/null and b/benchmark/R1/output/bm-R1-constraint.png differ diff --git a/benchmark/R1/output/bm-R1-losses.png b/benchmark/R1/output/bm-R1-losses.png new file mode 100644 index 0000000..02e297a Binary files /dev/null and b/benchmark/R1/output/bm-R1-losses.png differ diff --git a/benchmark/R1/output/bm-R1-times.png b/benchmark/R1/output/bm-R1-times.png new file mode 100644 index 0000000..82ebcfa Binary files /dev/null and b/benchmark/R1/output/bm-R1-times.png differ diff --git a/benchmark/R2/bm-R2-plot.py b/benchmark/R2/bm-R2-plot.py index 775c0af..8eed0f4 100644 --- a/benchmark/R2/bm-R2-plot.py +++ b/benchmark/R2/bm-R2-plot.py @@ -1,11 +1,18 @@ import numpy as np import matplotlib.pyplot as plt -data = np.load('bm-R2.npz') -print(data["SIZES"]) +import os +my_path = os.path.dirname(__file__) + +output = os.path.join(my_path, "output/") + + + +data = np.load(os.path.join(my_path, 'bm-R2.npz')) + labels = [str(size) for size in data["SIZES"]] -print(labels) + fig = plt.figure() @@ -18,9 +25,10 @@ label = name[2:] plt.errorbar(range(N_size), mean, yerr=std, label=label) +plt.title("Running time") plt.xticks(range(N_size), labels) plt.legend() -plt.savefig("bm-R2-times.png") +plt.savefig(os.path.join(output, "bm-R2-times.png")) plt.show() @@ -32,13 +40,14 @@ label = name[2:] plt.errorbar(range(N_size), mean, yerr=std, label=label) +plt.title("Value of loss function") plt.xticks(range(N_size), labels) plt.legend() -plt.savefig("bm-R2-losses.png") +plt.savefig(os.path.join(output, "bm-R2-losses.png")) plt.show() -for name in ["C_pa", "C_pds", "C_dr", "Cs_cvx"]: - L = data[name] - data["L_pa"] +for name in ["C_pa", "C_pds", "C_dr", "C_cvx"]: + L = data[name] N_size, N_data = L.shape mean = np.mean(L, axis=1) std = np.std(L, axis=1)/np.sqrt(N_data) @@ -48,5 +57,5 @@ plt.title("Value of norm(Cbeta)") plt.xticks(range(N_size), labels) plt.legend() -plt.savefig("bm-R2-constraint.png") +plt.savefig(os.path.join(output, "bm-R2-constraint.png")) plt.show() \ No newline at end of file diff --git a/benchmark/R2/bm-R2.npz b/benchmark/R2/bm-R2.npz new file mode 100644 index 0000000..dee71e4 Binary files /dev/null and b/benchmark/R2/bm-R2.npz differ diff --git a/benchmark/R2/bm-R2.py b/benchmark/R2/bm-R2.py index dfb6ca3..231efcb 100644 --- a/benchmark/R2/bm-R2.py +++ b/benchmark/R2/bm-R2.py @@ -4,6 +4,9 @@ import cvxpy as cp from time import time +import os +my_path = os.path.dirname(__file__) + l = [1, 2, 5, 7] def huber(r, rho): @@ -124,9 +127,10 @@ def loss(X, y, lam, rho, beta): b_cvx = [] for j in range(N_per_data): beta = cp.Variable(d) - objective, constraints = cp.Minimize(cp.sum_squares(cp.huber(X@beta-y), rho)+ lamb*cp.norm(beta, 1)), [C@beta == 0] + objective = cp.Minimize(cp.sum(cp.huber(X@beta-y, rho))+ lamb*cp.norm(beta, 1)) + constraints = [C@beta == 0] prob = cp.Problem(objective, constraints) - result = prob.solve(warm_start = False,eps_abs = 1e-5) + result = prob.solve(warm_start = False, eps_abs = 1e-5) b_cvx.append(beta.value) b_cvx = np.array(b_cvx) @@ -150,7 +154,7 @@ def loss(X, y, lam, rho, beta): C_cvx[s, i] = np.linalg.norm(C.dot(np.mean(b_cvx, axis=0))) np.savez( - 'bm-R1.npz', + os.path.join(my_path, 'bm-R2.npz'), T_pa = T_pa, L_pa = L_pa, C_pa = C_pa, diff --git a/benchmark/R2/output/bm-R2-constraint.png b/benchmark/R2/output/bm-R2-constraint.png new file mode 100644 index 0000000..0e959b7 Binary files /dev/null and b/benchmark/R2/output/bm-R2-constraint.png differ diff --git a/benchmark/R2/output/bm-R2-losses.png b/benchmark/R2/output/bm-R2-losses.png new file mode 100644 index 0000000..c0a0fa4 Binary files /dev/null and b/benchmark/R2/output/bm-R2-losses.png differ diff --git a/benchmark/R2/output/bm-R2-times.png b/benchmark/R2/output/bm-R2-times.png new file mode 100644 index 0000000..ebd614a Binary files /dev/null and b/benchmark/R2/output/bm-R2-times.png differ