欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

Ng深度学习笔记——单隐层神经网络python实现

程序员文章站 2022-06-14 12:23:41
...

实验所需文件:

链接:https://pan.baidu.com/s/1nceEZvtnu1ZMxarxdAeaiw 
提取码:ebk5

下载之后将文件放在此次实验代码文件所在目录下

单隐层神经网络模型图例:

Ng深度学习笔记——单隐层神经网络python实现

 

实现思路:

数据预处理——>搭建网络模型——>训练模型得出参数——>预测

搭建网络模型又分为以下步骤:

  • 初始化各层的维度

  • 初始化参数(随机初始化)

  • 循环训练:

    • 前向传播

    • 计算代价

    • 反向传播

    • 更新参数

代码实现:

导入模块:

import numpy as np
import matplotlib.pyplot as plt
from testCases import *  #此模块在实验文件中包含
import sklearn
import sklearn.datasets
import sklearn.linear_model
from planar_utils import plot_decision_boundary,sigmoid,load_planar_dataset,load_extra_datasets  #此模块在实验文件中包含

数据预处理:

np.random.seed(1)
X,Y = load_planar_dataset()
plt.scatter(X[0,:],X[1,:],c=Y,s=40,cmap=plt.cm.Spectral)
shape_X = X.shape
shape_Y = Y.shape
m = Y.shape[1]
print("X的维度为:"+str(shape_X))
print("Y的维度为:"+str(shape_Y))
print("样本数为:"+str(m))

使用sklearn模块训练逻辑回归模型,观察结果:

clf = sklearn.linear_model.LogisticRegression()
clf.fit(X.T,Y.T)
plot_decision_boundary(lambda x:clf.predict(x),X,Y)
plt.title("Logistic Regression")
plt.show()
LR_prediction = clf.predict(X.T)
print("逻辑回归的准确性:%d "%float((np.dot(Y,LR_prediction)+np.dot(1-Y,1-LR_prediction))/float(m)*100) +"%")

如图所示:

Ng深度学习笔记——单隐层神经网络python实现

 

初始化各层的维度,并进行测试:

def layer_size(X,Y,n_h):
    n_x = X.shape[0]
    n_y = Y.shape[0]
    return (n_x,n_h,n_y)

print("=========================测试layer_sizes=========================")
X_asses,Y_asses = layer_sizes_test_case()
n_x,n_h,n_y = layer_size(X_asses,Y_asses,4)
print("输入层的节点数量n_x = %d "%n_x)
print("隐藏层的节点数量n_h = %d "%n_h)
print("输出层的节点数量n_y = %d "%n_y)

初始化随机参数:

def initialize_parameters(n_x,n_h,n_y):
    np.random.seed(2)
    W1 = np.random.randn(n_h,n_x)*0.01
    b1 = np.zeros(shape=(n_h,1))
    W2 = np.random.randn(n_y,n_h)*0.01
    b2 = np.zeros(shape=(n_y,1))
    assert(W1.shape == (n_h,n_x))
    assert(b1.shape == (n_h,1))
    assert(W2.shape == (n_y,n_h))
    assert(b2.shape == (n_y,1))
    parameters = {
        'W1' : W1,
        'b1' : b1,
        'W2' : W2,
        'b2' : b2
    }
    return parameters

print("=========================测试initialize_parameters=========================")
n_x,n_h,n_y = initialize_parameters_test_case()
parameters = initialize_parameters(n_x,n_h,n_y)
print("W1:"+str(parameters['W1']))
print("b1:"+str(parameters['b1']))
print("W2:"+str(parameters['W2']))
print("b2:"+str(parameters['b2']))

前向传播:

def forward_propagation(X,parameters):
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    Z1 = np.dot(W1,X)+b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2,A1)+b2
    A2 = sigmoid(Z2)
    assert(A2.shape == (1,X.shape[1]))
    cache = {
        'Z1':Z1,
        'A1':A1,
        'Z2':Z2,
        'A2':A2
    }
    return (A2,cache)

print("=========================测试forward_propagation=========================")
X_asses,parameters = forward_propagation_test_case()
A2,cache = forward_propagation(X_asses,parameters)
print(np.mean(cache['Z1']),np.mean(cache['A1']),np.mean(cache['Z2']),np.mean(cache['A2']))

计算代价cost:

def compute_cost(A2,Y):
    m = Y.shape[1]
    logprobs = np.multiply(Y,np.log(A2))+np.multiply((1-Y),np.log(1-A2))
    cost = -np.sum(logprobs)/m
    cost = float(np.squeeze(cost))
    return cost

print("=========================测试compute_cost=========================")
A2,Y_asses,parameters = compute_cost_test_case()
cost = compute_cost(A2,Y_asses)
print("cost = %f"% cost)

反向传播:

def backward_propagation(parameters,cache,X,Y):
    m = X.shape[1]
    A1 = cache['A1']
    A2 = cache['A2']
    W2 = parameters['W2']
    W1 = parameters['W1']
    dZ2 = A2-Y
    dW2 = (1/m)*np.dot(dZ2,A1.T)
    db2 = (1/m)*np.sum(dZ2,axis=1,keepdims=True)
    dZ1 = np.multiply(np.dot(W2.T,dZ2),1-np.power(A1,2))
    dW1 = (1/m)*np.dot(dZ1,X.T)
    db1 = (1/m)*np.sum(dZ1,axis=1,keepdims=True)
    grads = {
        'dW1':dW1,
        'dW2':dW2,
        'db1':db1,
        'db2':db2
    }
    return grads

print("=========================测试backward_propagation=========================")
parameters,cache,X_asses,Y_asses = backward_propagation_test_case()
grads = backward_propagation(parameters,cache,X_asses,Y_asses)
print("dW1 =  " + str(grads['dW1']))
print("db1 =  " + str(grads['db1']))
print("dW2 =  " + str(grads['dW2']))
print("db2 =  " + str(grads['db2']))

更新参数:

def update_parameters(parameters,grads,learning_rate=1.2):
    W1,b1 = parameters['W1'],parameters['b1']
    W2,b2 = parameters['W2'],parameters['b2']
    dW1,db1 = grads['dW1'],grads['db1']
    dW2,db2 = grads['dW2'],grads['db2']
    W1 = W1 - learning_rate*dW1
    b1 = b1 - learning_rate*db1
    W2 = W2 - learning_rate*dW2
    b2 = b2 - learning_rate*db2
    parameters = {
        'W1':W1,
        'b1':b1,
        'W2':W2,
        'b2':b2
    }
    return parameters

print("=========================测试update_parameters=========================")
parameters,grads = update_parameters_test_case()
parameters = update_parameters(parameters,grads)
print(str(parameters))

将以上功能组合成模型nn_model:

def nn_model(X,Y,n_h,num_iterations,print_cost=False):
    np.random.seed(3)
    n_x,n_h,n_y = layer_size(X,Y,n_h)
    parameters = initialize_parameters(n_x,n_h,n_y)
    for i in range(num_iterations):
        A2,cache = forward_propagation(X,parameters)
        cost = compute_cost(A2,Y)
        grads = backward_propagation(parameters,cache,X,Y)
        parameters = update_parameters(parameters,grads,learning_rate=0.5)
        if print_cost:
            if i%1000 == 0:
                print("第 " ,i,"次循环,成本为:",str(cost))

    return parameters


print("=========================测试nn_model=========================")
X_asses,Y_asses = nn_model_test_case()
parameters = nn_model(X_asses,Y_asses,4,num_iterations=10000,print_cost=False)
print(parameters)

预测函数:

def predict(parameters,X):
    A2,cache = forward_propagation(X,parameters)
    predictions = np.round(A2)
    return predictions

print("=========================测试predict=========================")
parameters,X_asses = predict_test_case()
predictions = predict(parameters,X_asses)
print("预测的平均值为 = "+str(np.mean(predictions)))

开始正式预测:

parameters = nn_model(X,Y,4,num_iterations=10000,print_cost=True)
plot_decision_boundary(lambda x:predict(parameters,x.T),X,Y)
plt.show()
predictions = predict(parameters,X)
print("准确率为:",float((np.dot(Y,predictions.T) + np.dot(1-Y,1-predictions.T))/float(Y.shape[1])))

预测结果如图所示:

Ng深度学习笔记——单隐层神经网络python实现

改变不同的隐藏层大小观察预测结果变化:

plt.figure(figsize=(16,32))
hidden_layer_sizes=[1,2,3,4,5,20,50]
for i,n_h in enumerate(hidden_layer_sizes):
    plt.subplot(5,2,i+1)
    plt.title("Hidden Layer of size: %d "%n_h)
    parameters = nn_model(X,Y,n_h,num_iterations=5000)
    plot_decision_boundary(lambda x:predict(parameters,x.T),X,Y)
    predictions = predict(parameters,X)
    accuracy = float((np.dot(Y,predictions.T)+np.dot(1-Y,1-predictions.T))/float(Y.shape[1])*100)
    print("隐藏层的节点数量:",n_h,"准确率:",accuracy)
plt.show()

结果如图所示:

Ng深度学习笔记——单隐层神经网络python实现

 

对其他数据集的训练:

noisy_circles, noisy_moons, blobs, gaussian_quantiles, no_structure = load_extra_datasets()

datasets = {"noisy_circles": noisy_circles,
            "noisy_moons": noisy_moons,
            "blobs": blobs,
            "gaussian_quantiles": gaussian_quantiles}

dataset = "noisy_moons"

X, Y = datasets[dataset]
X, Y = X.T, Y.reshape(1, Y.shape[0])

if dataset == "blobs":
    Y = Y % 2

plt.scatter(X[0, :], X[1, :], c=Y, s=40, cmap=plt.cm.Spectral)
plt.show()
parameters = nn_model(X,Y,4,num_iterations=5000,print_cost=True)
plot_decision_boundary(lambda x:predict(parameters,x.T),X,Y)
plt.show()
predictions = predict(parameters,X)
accuracy = float((np.dot(Y,predictions.T)+np.dot(1-Y,1-predictions.T))/float(Y.shape[1])*100)
print("准确率为:",accuracy)

结果如图:

Ng深度学习笔记——单隐层神经网络python实现

Ng深度学习笔记——单隐层神经网络python实现