欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

吴恩达 机器学习课程 coursera 第四次编程作业(Neural Network Back Propagation) python实现

程序员文章站 2022-07-13 08:51:53
...

本文是吴恩达机器学习课程的第四次编程作业:Neural Network Back Propagation,用python实现。

 

ex4.py为主程序入口。

作业文件和训练集数据下载地址:https://github.com/toanoyx/MachineLearning-AndrewNg-coursera-python/tree/master/ex4%20NN%20back%20propagation

 

下文是文件的源代码:

ex4.py

from tensorflow.contrib import opt
from sklearn.metrics import classification_report
from loadData import *
from displayData import *
from feedForward import *
from nnCostFunction import *
from computeNumericalGradient import *
from checkNNGradients import *


""" 第1部分 可视化数据集 """
X, _ = loadData('ex4data1.mat')
displayData(X)
plt.show()

""" 第2部分 模型表示 """
X_raw, y_raw = loadData('ex4data1.mat', transpose=False)
X = np.insert(X_raw, 0, np.ones(X_raw.shape[0]), axis=1)


def expand_y(y):
    res = []
    for i in y:
        y_array = np.zeros(10)
        y_array[i - 1] = 1
        res.append(y_array)
    return np.array(res)


y = expand_y(y_raw)


def load_weight(path):
    data = sio.loadmat(path)
    return data['Theta1'], data['Theta2']


t1, t2 = load_weight('ex4weights.mat')

""" 第3部分 前向传播和代价函数 """
theta = np.concatenate((np.ravel(t1), np.ravel(t2)))
_, _, _, _, h = feedForward(theta, X)
print("cost function: " + str(nnCostFunction(theta, X, y)) + "(this should be 0.287629)")

""" 第4部分 正则化代价函数 """
t1, t2 = deserialize(theta)
m = X.shape[0]
l = 1
reg_t1 = (l / (2 * m)) * np.power(t1[:, 1:], 2).sum()
reg_t2 = (l / (2 * m)) * np.power(t2[:, 1:], 2).sum()
regularizedCost = nnCostFunction(theta, X, y) + reg_t1 + reg_t2
print("regularized cost function: " + str(regularizedCost) + "(this should be 0.383770)")

""" 第5部分 反向传播 """
sigmoid_gradient(0)
d1, d2 = deserialize(computeNumericalGradient(theta, X, y))

checkNNGradients(theta, X, y, epsilon= 0.0001)

checkNNGradients(theta, X, y, epsilon=0.0001, regularized=True)

""" 第6部分 """


def random_init(size):
    return np.random.uniform(-0.12, 0.12, size)


def nn_training(X, y):
    init_theta = random_init(10285)  # 25*401 + 10*26

    res = opt.minimize(fun=regularized_cost,
                       x0=init_theta,
                       args=(X, y, 1),
                       method='TNC',
                       jac=regularized_gradient,
                       options={'maxiter': 400})
    return res


res = nn_training(X, y)
print(str(res))
_, y_answer = loadData('ex4data1.mat')
print(str(y_answer[:20]))
final_theta = res.x


def show_accuracy(theta, X, y):
    _, _, _, _, h = feedForward(theta, X)
    y_pred = np.argmax(h, axis=1) + 1
    print(classification_report(y, y_pred))


def plot_hidden_layer(theta):
    final_theta1, _ = deserialize(theta)
    hidden_layer = final_theta1[:, 1:]  # ger rid of bias term theta

    fig, ax_array = plt.subplots(nrows=5, ncols=5, sharey=True, sharex=True, figsize=(5, 5))

    for r in range(5):
        for c in range(5):
            ax_array[r, c].matshow(hidden_layer[5 * r + c].reshape((20, 20)),
                                   cmap=matplotlib.cm.binary)
            plt.xticks(np.array([]))
            plt.yticks(np.array([]))


plot_hidden_layer(final_theta)
plt.show()

 

checkNNGradients.py
from nnCostFunction import *
from computeNumericalGradient import *


def checkNNGradients(theta, X, y, epsilon, regularized=False):
    def a_numeric_grad(plus, minus, regularized=False):
        if regularized:
            return (regularized_cost(plus, X, y) - regularized_cost(minus, X, y)) / (epsilon * 2)
        else:
            return (nnCostFunction(plus, X, y) - nnCostFunction(minus, X, y)) / (epsilon * 2)

    theta_matrix = expand_array(theta)  # expand to (10285, 10285)
    epsilon_matrix = np.identity(len(theta)) * epsilon

    plus_matrix = theta_matrix + epsilon_matrix
    minus_matrix = theta_matrix - epsilon_matrix

    numeric_grad = np.array([a_numeric_grad(plus_matrix[i], minus_matrix[i], regularized)
                             for i in range(len(theta))])

    analytic_grad = regularized_gradient(theta, X, y) if regularized else computeNumericalGradient(theta, X, y)
    diff = np.linalg.norm(numeric_grad - analytic_grad) / np.linalg.norm(numeric_grad + analytic_grad)

    print(
        'If your backpropagation implementation is correct,\nthe relative difference will be smaller than 10e-9 (assume epsilon=0.0001).\nRelative Difference: {}\n'.format(
            diff))


def regularized_cost(theta, X, y, l=1):
    t1, t2 = deserialize(theta)
    m = X.shape[0]
    reg_t1 = (l / (2 * m)) * np.power(t1[:, 1:], 2).sum()  # this is how you ignore first col
    reg_t2 = (l / (2 * m)) * np.power(t2[:, 1:], 2).sum()
    return nnCostFunction(theta, X, y) + reg_t1 + reg_t2


def expand_array(arr):
    return np.array(np.matrix(np.ones(arr.shape[0])).T @ np.matrix(arr))


def regularized_gradient(theta, X, y, l=1):
    m = X.shape[0]
    delta1, delta2 = deserialize(computeNumericalGradient(theta, X, y))
    t1, t2 = deserialize(theta)

    t1[:, 0] = 0
    reg_term_d1 = (l / m) * t1
    delta1 = delta1 + reg_term_d1

    t2[:, 0] = 0
    reg_term_d2 = (l / m) * t2
    delta2 = delta2 + reg_term_d2

    return np.concatenate((np.ravel(delta1), np.ravel(delta2)))

 

computeNumericalGradient.py
from feedForward import *


def computeNumericalGradient(theta, X, y):
    t1, t2 = deserialize(theta)  # t1: (25,401) t2: (10,26)
    m = X.shape[0]

    delta1 = np.zeros(t1.shape)  # (25, 401)
    delta2 = np.zeros(t2.shape)  # (10, 26)

    a1, z2, a2, z3, h = feedForward(theta, X)

    for i in range(m):
        a1i = a1[i, :]  # (1, 401)
        z2i = z2[i, :]  # (1, 25)
        a2i = a2[i, :]  # (1, 26)

        hi = h[i, :]  # (1, 10)
        yi = y[i, :]  # (1, 10)

        d3i = hi - yi  # (1, 10)

        z2i = np.insert(z2i, 0, np.ones(1))  # make it (1, 26) to compute d2i
        d2i = np.multiply(t2.T @ d3i, sigmoid_gradient(z2i))  # (1, 26)

        # careful with np vector transpose
        delta2 += np.matrix(d3i).T @ np.matrix(a2i)  # (1, 10).T @ (1, 26) -> (10, 26)
        delta1 += np.matrix(d2i[1:]).T @ np.matrix(a1i)  # (1, 25).T @ (1, 401) -> (25, 401)

    delta1 = delta1 / m
    delta2 = delta2 / m
    return np.concatenate((np.ravel(delta1), np.ravel(delta2)))

 

displayData.py
import numpy as np
import matplotlib.pyplot as plt
import matplotlib


def displayData(X):
    size = int(np.sqrt(X.shape[1]))

    sample_idx = np.random.choice(np.arange(X.shape[0]), 100)  # 100*400
    sample_images = X[sample_idx, :]

    fig, ax_array = plt.subplots(nrows=10, ncols=10, sharey=True, sharex=True, figsize=(8, 8))

    for r in range(10):
        for c in range(10):
            ax_array[r, c].matshow(sample_images[10 * r + c].reshape((size, size)),
                                   cmap=matplotlib.cm.binary)
            plt.xticks(np.array([]))
            plt.yticks(np.array([]))

 

feedForward.py
import numpy as np
from sigmoid import *


def feedForward(theta, X):
    t1, t2 = deserialize(theta)
    m = X.shape[0]
    a1 = X

    z2 = a1 @ t1.T
    a2 = np.insert(sigmoid(z2), 0, np.ones(m), axis=1)

    z3 = a2 @ t2.T
    h = sigmoid(z3)

    return a1, z2, a2, z3, h


def deserialize(seq):
    return seq[:25 * 401].reshape(25, 401), seq[25 * 401:].reshape(10, 26)

 

loadData.py
import scipy.io as sio
import numpy as np


def loadData(path, transpose=True):
    data = sio.loadmat(path)
    y = data.get('y')
    y = y.reshape(y.shape[0])
    X = data.get('X')

    if transpose:
        X = np.array([im.reshape((20, 20)).T for im in X])
        X = np.array([im.reshape(400) for im in X])

    return X, y

 

nnCostFunction.py
from feedForward import *


def nnCostFunction(theta, X, y):
    m = X.shape[0]
    _, _, _, _, h = feedForward(theta, X)

    pair_computation = -np.multiply(y, np.log(h)) - np.multiply((1 - y), np.log(1 - h))

    return pair_computation.sum() / m

 

sigmoid.py
import numpy as np


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def sigmoid_gradient(z):
    return np.multiply(sigmoid(z), 1 - sigmoid(z))

 

相关标签: 机器学习