最优化大作业（二）：常用无约束最优化方法

程序员文章站 2022-04-22 13:04:13

...

问题描述

对以下优化问题

选取初始点最优化大作业（二）：常用无约束最优化方法，分别用以下方法求解

（1）最速下降法；

（2）Newton法或修正Newton法；

（3）共轭梯度法。

基本原理

（1）最速下降法

最优化大作业（二）：常用无约束最优化方法

图1 最速下降法流程图

（2）Newton法

最优化大作业（二）：常用无约束最优化方法

图2 Newton法流程图

（3）共轭梯度法

最优化大作业（二）：常用无约束最优化方法

图3 共轭梯度法流程图

实验结果

（1）最速下降法

迭代次数	1	2	3	4	5	6	7	8	9
梯度模值		5.4210	1.6680	0.9532	0.2933	0.1676	0.0516	0.0295	0.0091
搜索方向		[9.00, 3.00]	[-1.71, 5.14]	[1.58, 0.53]	[-0.30, 0.90]	[0.28, 0.09]	[-0.05, 0.16]	[0.05, 0.02]	[-0.01, 0.03]
当前迭代点	(1.00, 1.00)	(7.43, 3.14)	(6.77, 5.12)	(7.90, 5.50)	(7.78, 5.85)	(7.98, 5.91)	(7.96, 5.97)	(8.00, 5.98)	(7.99, 6.00)
当前迭代点值	47.00	14.8571	9.2057	8.2120	8.0373	8.0066	8.0012	8.0002	8.0000

表1 最速下降法迭代过程

最优化大作业（二）：常用无约束最优化方法

图4 最速下降法迭代过程图

（2）Newton法

迭代次数	1	2
梯度模值		0.0000
搜索方向		[7.00,5.00]
当前迭代点	(1.00,1.00)	(8.00,6.00)
当前迭代点值	47.00	8.0000

表2 Newton法迭代过程

最优化大作业（二）：常用无约束最优化方法

图5 Newton法迭代过程图

（3）共轭梯度法

迭代次数	1	2	3
梯度模值		5.4210	0.0000
搜索方向		[9.00,3.00]	[1.22,6.12]
当前迭代点	(1.00,1.00)	(7.43,3.14)	(8.00,6.00)
当前迭代点值	47.00	14.8571	8.0000

表3 共轭梯度法迭代过程

最优化大作业（二）：常用无约束最优化方法

图6 共轭梯度法迭代过程图

对比结果可得，三种算法均得到同一个极值点（8, 6）。

代码展示

import matplotlib.pyplot as plt
from sympy import *
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
t = symbols('t')

# 优化目标函数
def fun1():
    x1, x2 = symbols('x1 x2')
    y = np.power(x1, 2) + np.power(x2, 2) - x1*x2 -10 * x1 - 4 *x2 +60
    return y

def fun2(x1, x2):
    return np.power(x1, 2) + np.power(x2, 2) - x1 * x2 - 10 * x1 - 4 * x2 + 60

# 计算当前梯度
def cal_gradient_cur(X_cur):
    x1, x2 = symbols('x1 x2')
    f = fun1()
    g = [diff(f, x1), diff(f, x2)]
    g[0] = g[0].evalf(subs={x1:X_cur[0], x2:X_cur[1]})
    g[1] = g[1].evalf(subs={x1:X_cur[0], x2:X_cur[1]})
    return np.array(g)

# 计算lambda, X1: 上一轮迭代点, X2: 本次迭代点
def cal_lambda(X1, X2):
    g1 = np.array(cal_gradient_cur(X1))
    g2 = np.array(cal_gradient_cur(X2))
    g1_norm_2 = np.sum(g1**2, axis=0)
    g2_norm_2 = np.sum(g2**2, axis=0)
    lamda = g2_norm_2 / g1_norm_2
    return lamda

# 更新迭代点X
def update_X(X, P):
    return np.array(X + t*P)

# 更新迭代点X
def update_X_cur(X, t, P):
    return np.array(X + t*P)

# 计算最优步长
def cal_best_t(X_cur):
    x1, x2 = symbols('x1 x2')
    f = fun1()
    f_t = f.subs({x1: X_cur[0], x2: X_cur[1]})
    return solve(diff(f_t, t), t)

# 计算梯度模值
def cal_g_norm_cur(X):
    g_cur = np.array(cal_gradient_cur(X), dtype=np.float32)
    return np.sqrt(np.sum(g_cur**2, axis=0))

def draw(X0):
    plt.figure()
    ax = plt.axes(projection='3d')
    xx = np.arange(-20, 20, 0.1)
    yy = np.arange(-20, 20, 0.1)
    x1, x2 = np.meshgrid(xx, yy)
    Z = fun2(x1, x2)
    ax.plot_surface(x1, x2, Z, cmap='rainbow', alpha=0.5)

    X = np.array([X0[0]])
    Y = np.array([X0[1]])
    X, Y = np.meshgrid(X, Y)
    Z = fun2(X, Y)
    print("初始点：(%0.2f,%0.2f,%0.2f)" % (X, Y, Z))
    ax.scatter(X, Y, Z, c='k', s=20)
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')
    # ax.legend()
    # ax.contour(X,Y,Z, zdim='z',offset=-2，cmap='rainbow)   #等高线图，要设置offset，为Z的最小值
    return ax


class C_gradient(object):
    def __init__(self, X0):
        self.X0 = X0

    # 更新搜索方向
    def cal_P(self, g_cur, P1, lamda):
        P = -1 * g_cur + lamda*P1
        return np.array(P)

    def search(self):
        X1 = self.X0
        g_norm_cur = cal_g_norm_cur(X1)  # 计算梯度模值
        count = 0
        result = []
        if(g_norm_cur <= 0.01):
            print("极值点为({:.2f},{:.2f})".format(X1[0], X1[1]))
            x1, x2 = symbols('x1 x2')
            f = fun1()
            min_value = f.evalf(subs={x1: X1[0], x2: X1[1]})
            print("极小值为{:.4f}".format(min_value))
        else:
            P = -1 * cal_gradient_cur(X1)  # 计算当前负梯度方向
            while True:
                X2 = update_X(X1, P)
                t_cur = cal_best_t(X2)
                X2 = update_X_cur(X1, t_cur, P)
                g_cur = cal_gradient_cur(X2)
                g_norm_cur = cal_g_norm_cur(X2)
                x1, x2 = symbols('x1 x2')
                f = fun1()
                min_value = f.evalf(subs={x1: X2[0], x2: X2[1]})
                result.append([float(X2[0]), float(X2[1]), float(min_value)])
                print("当前梯度模值为{:.4f}，搜索方向为[{:.2f},{:.2f}]".format(g_norm_cur, P[0], P[1]), end=" ")
                print("极值点为({:.2f},{:.2f}), 极小值为{:.4f}".format(result[count][0], result[count][1], result[count][2]))

                if(g_norm_cur <= 0.01):
                    return np.array(result)
                else:
                    lamda = cal_lambda(X1, X2)
                    P = self.cal_P(g_cur, P, lamda)
                    X1 = X2
                    count += 1



def C_gradient_main():
    print("当前搜索方法为共轭梯度法")
    X0 = np.array([1, 1])
    ax = draw(X0)
    cg = C_gradient(X0)
    cg.ax = ax
    result = cg.search()
    ax.scatter(np.array([result[:, 0]]), np.array([result[:, 1]]), np.array([result[:, 2]]), c='k', s=20)
    plt.show()


class steepest_gradient(object):
    def __init__(self, X0):
        self.X0 = X0

    def search(self):
        X1 = self.X0
        result = []
        count = 0
        while True:
            P = -1 * cal_gradient_cur(X1)  # 计算当前负梯度方向
            X2 = update_X(X1, P)
            t_cur = cal_best_t(X2)
            X2 = update_X_cur(X1, t_cur, P)
            g_norm_cur = cal_g_norm_cur(X2)
            x1, x2 = symbols('x1 x2')
            f = fun1()
            min_value = f.evalf(subs={x1: X2[0], x2: X2[1]})
            result.append([float(X2[0]), float(X2[1]), float(min_value)])
            print("当前梯度模值为{:.4f}，搜索方向为[{:.2f},{:.2f}]".format(g_norm_cur, P[0], P[1]), end=" ")
            print("极值点为({:.2f},{:.2f}), 极小值为{:.4f}".format(result[count][0], result[count][1], result[count][2]))


            if(g_norm_cur <= 0.01):
                return np.array(result)
            else:
                X1 = X2
                count += 1


def steepest_gradient_main():
    print("当前搜索方法为最速下降法")
    X0 = np.array([1, 1])
    ax = draw(X0)
    a = steepest_gradient(X0)
    a.ax = ax
    result = a.search()
    ax.scatter(np.array([result[:, 0]]), np.array([result[:, 1]]), np.array([result[:, 2]]), c='k', s=20)
    plt.show()



class Newton(object):
    def __init__(self, X0):
        self.X0 = X0

    def cal_hesse(self):
        return np.array([[2, -1], [-1, 2]])

    def search(self):
        X1 = self.X0
        count = 0
        result = []
        while True:
            g = cal_gradient_cur(X1)
            g = g.reshape((1, 2)).T
            h = np.linalg.inv(self.cal_hesse())
            P = -1 * np.dot(h, g).ravel()
            X2 = update_X(X1, P)
            t_cur = cal_best_t(X2)
            X2 = update_X_cur(X1, t_cur, P)
            x1, x2 = symbols('x1 x2')
            f = fun1()
            min_value = f.evalf(subs={x1: X2[0], x2: X2[1]})
            g_norm_cur = cal_g_norm_cur(X2)
            result.append([float(X2[0]), float(X2[1]), float(min_value)])
            print("当前梯度模值为{:.4f}，搜索方向为[{:.2f},{:.2f}]".format(g_norm_cur, P[0], P[1]), end=" ")
            print("极值点为({:.2f},{:.2f}), 极小值为{:.4f}".format(result[count][0], result[count][1], result[count][2]))
            if(g_norm_cur <= 0.01):
                return np.array(result)
            else:
                X1 = X2
                count += 1


def newton_main():
    print("当前搜索方法为newton法")
    X0 = np.array([1, 1])
    ax = draw(X0)
    b = Newton(X0)
    result = b.search()
    ax.scatter(np.array([result[:, 0]]), np.array([result[:, 1]]), np.array([result[:, 2]]), c='k', s=20)
    plt.show()


if __name__ == '__main__':
    steepest_gradient_main()
    newton_main()
    C_gradient_main()

最优化大作业（二）： 常用无约束最优化方法

最优化大作业（二）： 常用无约束最优化方法

最优化大作业（二）：常用无约束最优化方法

最优化大作业（二）：常用无约束最优化方法