implementation started

Pavelrst · Pavelrst · commit 1cc49d21d450 · 2019-06-10T00:10:10.000+03:00
diff --git a/Augmented Lagrangian method for Constrained Optimization.pdf b/Augmented Lagrangian method for Constrained Optimization.pdf
diff --git a/Optimizer_utils.py b/Optimizer_utils.py
@@ -0,0 +1,109 @@
+import matplotlib.pyplot as plt
+from functions_utils import *
+from armijo_utils import Armijo_method
+from newton_utils import NewtonMethod
+from penalty_method import AugmentedLagrangian
+
+
+class Gradient_descent():
+    def __init__(self, method_type='steepest_descent', threshold=0.00001, step_size_estimator=Armijo_method(),
+                 max_steps=100000, verbose=True):
+        self.threshold = threshold
+        self.max_steps = max_steps
+        self.verbose = verbose
+        self.f_val_list = []
+        self.step_sizes_list = []
+        self.step_size_estimator = step_size_estimator
+        self.method_type = method_type
+
+    def optimize(self, func, start_point):
+        self.f_val_list.append(func.val(start_point))
+
+        x = start_point
+
+        for step in range(self.max_steps):
+            print("step:", step)
+            prev_x = x
+            if self.method_type == 'steepest_descent':
+                x = self.optimizer_step(x, func)
+            elif self.method_type == 'newton_method':
+                x = self.optimizer_step_newton(x, func)
+            else:
+                print("Direction method not selected")
+                break
+            self.f_val_list.append(func.val(x))
+
+            # if self.verbose:
+            #     print("f(x)=", func.val(x), " current point= ~", np.round(x, 5))
+
+            # print("norm=",np.linalg.norm(func.grad(x)))
+            if np.linalg.norm(func.grad(x)) < self.threshold:
+                print("Optimizer reached accuracy threshold after", step, "iterations!")
+                break
+        return x
+
+    def optimizer_step(self, x, func):
+        step_size = self.step_size_estimator.calc_step_size(x, func, direction=func.grad(x))
+        x = x - step_size * func.grad(x)
+        # self.step_size_estimator.armijo_plot()
+        self.step_sizes_list.append(step_size)
+        return x
+
+    def optimizer_step_newton(self, x, func):
+        newton = NewtonMethod()
+        d = newton.direction(x, func)
+        step_size = self.step_size_estimator.calc_step_size(x, func, direction=d)
+        x = x - step_size * d
+        self.step_sizes_list.append(step_size)
+        return x
+
+    def plot_step_sizes(self):
+        iterations_list = range(len(self.step_sizes_list))
+
+        a, = plt.plot(iterations_list, self.step_sizes_list, label='step size')
+        plt.legend(handles=[a])
+        plt.ylabel('step size')
+        plt.xlabel('iterations')
+        plt.show()
+
+    def get_convergence(self, val_optimal):
+        '''
+        gets converg rates list
+        :param f_list: list of values of f during gradient descent algo
+        :param val_optimal: the global minimum value of the function
+        '''
+        converg_list = []
+        iterations_list = []
+        for idx, val in enumerate(self.f_val_list):
+            converg_list.append(val - val_optimal)
+            iterations_list.append(idx)
+
+        return iterations_list, converg_list
+
+    def plot_convergence(self, val_optimal, f_name='plot title', marker=None, save = True):
+        '''
+        plots the convergence rate
+        :param f_list: list of values of f during gradient descent algo
+        :param val_optimal: the global minimum value of the function
+        '''
+        converg_list = []
+        iterations_list = []
+        for idx, val in enumerate(self.f_val_list):
+            #converg_list.append(abs(val - val_optimal))
+            converg_list.append(val)
+            iterations_list.append(idx)
+
+        plt.plot(iterations_list, converg_list)
+        plt.ylabel('f(x)-f* / log')
+        plt.xlabel('iterations')
+        #plt.yscale('log')
+        label = f_name + ' - ' + self.method_type + ' convergence rate'
+        plt.title(label)
+        if marker != None:
+            x, y = marker
+            plt.plot(x, y, 'ro')
+        plt.gcf()
+        name = label + '_fig.JPEG'
+        plt.savefig(name, bbox_inches='tight')
+        plt.show()
+
diff --git a/armijo_utils.py b/armijo_utils.py
@@ -0,0 +1,93 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+class Armijo_method():
+    def __init__(self, initial_alpha=1, sigma=0.25, beta=0.5):
+        self.initial_alpha = initial_alpha
+        self.sigma = sigma
+        self.beta = beta
+
+    def calc_step_size(self, x, func, direction):
+        '''
+        Denotre Phi(alpha) as function of step size: Phi(alpha)=f(x+alpha*d)-f(x)
+        while d is direction. Deriviate Phi by alpha and we get:
+        Phi'(alpha) =  f'(x+alpha*d)*d so,
+        Phi'(0) =  f'(x)*d i.e. directional derivative, in our case - gradient
+        Denote: c = Phi'(0) = f_grad(x), so
+        alpha*c is a tangent line to Phi(alpha) at alpha=0.
+        Now lets make a new line: sigma*alpha*c
+        Finally let's state to Armijo condition:
+            f(x+alpha*d)-f(x) <= sigma*alpha*c
+        Also we assume search direction d is gradient direction.
+        :param x: the point from which we want make a step
+        :param alpha: Initial step size
+        :param sigma: Factor for a slope of tangent
+        :param beta: Step size decrement factor
+        :return: step size which is fulfilling Armijo condition.
+        '''
+
+        self.armijo = ArmijoPhiFunc(x, func.val, func.grad,
+                                    direction=direction, sigma=0.25, beta=0.5)
+
+        alpha = self.initial_alpha
+        # f(x+alpha*d)-f(x) <= sigma*alpha*c
+        while self.armijo.phi_val(alpha) > self.armijo.elevated_tangent_val(alpha):
+            alpha = alpha * self.beta
+        return alpha
+
+    def armijo_plot(self):
+        alpas = np.linspace(0, 1, num=1000)
+        phi_list = []
+        tangent_list = []
+        elevated_tangent_list = []
+        for idx, alpha in enumerate(alpas):
+            phi_list.append(self.armijo.phi_val(alpha))
+            tangent_list.append(self.armijo.tanget_val(alpha))
+            elevated_tangent_list.append(self.armijo.elevated_tangent_val(alpha))
+
+        a, = plt.plot(alpas, phi_list, label='phi')
+        b, = plt.plot(alpas, tangent_list, label='tangent')
+        c, = plt.plot(alpas, elevated_tangent_list, label='elevated_tangent')
+        plt.legend(handles=[a, b, c])
+        plt.ylabel('Phi(alpha)')
+        plt.xlabel('alpha')
+        plt.grid()
+        plt.show()
+
+
+class ArmijoPhiFunc:
+    def __init__(self, x, func_val, func_grad, direction, sigma=0.25, beta=0.5):
+        self.f = func_val
+        self.g = func_grad
+        self.x = x
+        self.direction = direction
+        self.sigma = sigma
+        self.beta = beta
+
+    def phi_val(self, alpha):
+        # The direction is opposite to gradient.
+        alpha_d = -alpha*self.direction
+        val = self.f(self.x + alpha_d) - self.f(self.x)
+        return val
+
+    def tanget_val(self, alpha):
+        '''
+        :param alpha: given step size.
+        :return: value of tangent to phi at given alpha.
+        '''
+        c = -np.matmul(self.g(self.x), self.direction)
+        #c = -np.matmul(self.direction, self.direction)
+        return alpha * c
+
+    def elevated_tangent_val(self, alpha):
+        '''
+        :param alpha: given step size.
+        :return: value of tangent to phi at given alpha.
+        '''
+        c = -np.matmul(np.transpose(self.g(self.x)), self.direction)
+        #c = np.matmul(np.transpose(self.g(self.x)), self.direction)
+        return self.sigma * alpha * c
+
+
+
diff --git a/functions_utils.py b/functions_utils.py
@@ -0,0 +1,28 @@
+from scipy.optimize import rosen
+import numpy as np
+from numdiff_utils import numdiff
+from scipy.io import loadmat
+
+class Quadratic_general:
+    def __init__(self, Q, d, e):
+        '''
+        xT Q x + dT x + e
+        '''
+        self.Q = Q
+        self.d = d
+        self.e = e
+
+    def val(self, x):
+        xT = np.transpose(x)
+        xT_Q_x = np.matmul(np.matmul(xT,self.Q),x)
+        dT_x = np.matmul(np.transpose(self.d),x)
+        return (1/2 * xT_Q_x + dT_x + self.e)[0]
+
+    def grad(self, x):
+        Q_QT = self.Q+np.transpose(self.Q)
+        return (1/2 * np.matmul(Q_QT,x) + self.d)
+
+    def hess(self, x):
+        return 1/2 * self.Q+np.transpose(self.Q)
+
+
diff --git a/main.py b/main.py
@@ -0,0 +1,67 @@
+from functions_utils import Quadratic_general
+from Optimizer_utils import Gradient_descent
+from penalty_method import AugmentedLagrangian
+import numpy as np
+
+def main():
+    # opt_grad = Gradient_descent(method_type='steepest_descent', max_steps=50, verbose=False)
+    objective, constraints_list, start_point, optimal, name = create_problem()
+    f = AugmentedLagrangian(objective, constraints_list, start_point, optimal, name)
+
+    #opt_grad.optimize(f, f.starting_point())
+    #opt_grad.plot_convergence(f.optimal, f.name)
+
+    opt_newton = Gradient_descent(method_type='newton_method', max_steps=50, verbose=True)
+
+    x = f.starting_point()
+    for i in range(5):
+        print("x=", x)
+        x = opt_newton.optimize(f, x)
+        f.update_mu(x)
+        f.update_p()
+
+    opt_newton.plot_convergence(f.optimal, f.name)
+    # opt_newton.optimize(f, f.starting_point())
+    # opt_newton.plot_convergence(f.optimal(), f.name)
+
+def create_problem():
+    start_point = np.array([[1],
+                            [1]])
+    optimal = 37+2/3
+    name = 'Task 2'
+    # objective
+    Q = np.array([[2, 0],
+                 [0, 1]])
+    b = np.array([[-20],
+                 [-2]])
+    e = 51
+    objective = Quadratic_general(Q, b, e)
+
+    # Constarints
+    constraints_list = []
+
+    # 1
+    Q_zeros = np.array([[0, 0],
+                        [0, 0]])
+    b = np.array([[0.5],
+                  [1]])
+    e = -1
+    constraints_list.append(Quadratic_general(Q_zeros, b, e))
+
+    # 2
+    b = np.array([[1],
+                  [-1]])
+    e = 0
+    constraints_list.append(Quadratic_general(Q_zeros, b, e))
+
+    # 3
+    b = np.array([[-1],
+                  [-1]])
+    e = 0
+    constraints_list.append(Quadratic_general(Q_zeros, b, e))
+
+    return objective, constraints_list, start_point, optimal, name
+
+if __name__ == "__main__":
+    main()
+
diff --git a/newton_solver.py b/newton_solver.py
diff --git a/newton_utils.py b/newton_utils.py
@@ -0,0 +1,10 @@
+import numpy as np
+
+class NewtonMethod:
+    def direction(self, x, func):
+        try:
+            H_inv = np.linalg.inv(func.hess(x))
+        except:
+            H_inv = np.linalg.pinv(func.hess(x))
+        return np.matmul(H_inv, func.grad(x))
+
diff --git a/numdiff_utils.py b/numdiff_utils.py
@@ -0,0 +1,61 @@
+import numpy as np
+
+def numdiff(myfunc, x, par, nargout=1):
+    '''
+    computes gradient and hessian of myfunc numerically
+
+    :param myfunc: pointer to either of f1 or f2
+    :param x: Input vector R^(mx1)
+    :param par: a dictionary including keys:
+        'epsilon' : The incerement of x
+        'f_par' : parameters dictionary given to function
+        'gradient' : gradient function of f
+    :param nargout: Like nargout of matlab, can be 1 or 2
+    :return: [gnum, Hnum]
+        gnum : Numerical estimation of function gradient
+        Hnum : Numerical estimation of function Hessian
+    '''
+    assert callable(myfunc)
+    assert isinstance(x, np.ndarray)
+    assert isinstance(par, dict)
+    assert 'epsilon' in par.keys()
+    assert isinstance(nargout, int)
+    assert nargout in range(1, 3)
+
+    epsilon_tot = par['epsilon']
+    assert isinstance(epsilon_tot, float)
+    max_abs_val_of_x = max(x.min(), x.max(), key=abs)
+    if max_abs_val_of_x != 0:
+        epsilon = pow(epsilon_tot, 1 / 3) * max_abs_val_of_x
+    else:
+        epsilon = epsilon_tot**2
+
+
+    # standard_base = np.array(((1, 0, 0),
+    #                           (0, 1, 0),
+    #                           (0, 0, 1)))
+
+    standard_base = np.identity(len(x))
+
+    grad = []
+    for i in range(0, len(x)):
+        right_g_i = myfunc(x+epsilon*standard_base[i])
+        left_g_i = myfunc(x-epsilon*standard_base[i])
+        g_i = (right_g_i - left_g_i)/(2*epsilon)
+        grad.append(g_i)
+    grad = np.array(grad)
+
+    if nargout == 1:
+        return grad
+
+    hess = []
+    analytic_grad = par['gradient']
+    assert callable(analytic_grad)
+    for i in range(0, len(x)):
+        right_sample = analytic_grad(x+epsilon*standard_base[i], par['f_par'])
+        left_sample = analytic_grad(x-epsilon*standard_base[i], par['f_par'])
+        h_i = (right_sample-left_sample)/(2*epsilon)
+        hess.append(h_i)
+    hess = np.array(hess)
+
+    return grad, hess
diff --git a/penalty_method.py b/penalty_method.py