Ng深度学习笔记——单隐层神经网络python实现
程序员文章站
2022-06-14 12:23:41
...
实验所需文件:
链接:https://pan.baidu.com/s/1nceEZvtnu1ZMxarxdAeaiw
提取码:ebk5
下载之后将文件放在此次实验代码文件所在目录下
单隐层神经网络模型图例:
实现思路:
数据预处理——>搭建网络模型——>训练模型得出参数——>预测
搭建网络模型又分为以下步骤:
-
初始化各层的维度
-
初始化参数(随机初始化)
-
循环训练:
-
前向传播
-
计算代价
-
反向传播
-
更新参数
-
代码实现:
导入模块:
import numpy as np
import matplotlib.pyplot as plt
from testCases import * #此模块在实验文件中包含
import sklearn
import sklearn.datasets
import sklearn.linear_model
from planar_utils import plot_decision_boundary,sigmoid,load_planar_dataset,load_extra_datasets #此模块在实验文件中包含
数据预处理:
np.random.seed(1)
X,Y = load_planar_dataset()
plt.scatter(X[0,:],X[1,:],c=Y,s=40,cmap=plt.cm.Spectral)
shape_X = X.shape
shape_Y = Y.shape
m = Y.shape[1]
print("X的维度为:"+str(shape_X))
print("Y的维度为:"+str(shape_Y))
print("样本数为:"+str(m))
使用sklearn模块训练逻辑回归模型,观察结果:
clf = sklearn.linear_model.LogisticRegression()
clf.fit(X.T,Y.T)
plot_decision_boundary(lambda x:clf.predict(x),X,Y)
plt.title("Logistic Regression")
plt.show()
LR_prediction = clf.predict(X.T)
print("逻辑回归的准确性:%d "%float((np.dot(Y,LR_prediction)+np.dot(1-Y,1-LR_prediction))/float(m)*100) +"%")
如图所示:
初始化各层的维度,并进行测试:
def layer_size(X,Y,n_h):
n_x = X.shape[0]
n_y = Y.shape[0]
return (n_x,n_h,n_y)
print("=========================测试layer_sizes=========================")
X_asses,Y_asses = layer_sizes_test_case()
n_x,n_h,n_y = layer_size(X_asses,Y_asses,4)
print("输入层的节点数量n_x = %d "%n_x)
print("隐藏层的节点数量n_h = %d "%n_h)
print("输出层的节点数量n_y = %d "%n_y)
初始化随机参数:
def initialize_parameters(n_x,n_h,n_y):
np.random.seed(2)
W1 = np.random.randn(n_h,n_x)*0.01
b1 = np.zeros(shape=(n_h,1))
W2 = np.random.randn(n_y,n_h)*0.01
b2 = np.zeros(shape=(n_y,1))
assert(W1.shape == (n_h,n_x))
assert(b1.shape == (n_h,1))
assert(W2.shape == (n_y,n_h))
assert(b2.shape == (n_y,1))
parameters = {
'W1' : W1,
'b1' : b1,
'W2' : W2,
'b2' : b2
}
return parameters
print("=========================测试initialize_parameters=========================")
n_x,n_h,n_y = initialize_parameters_test_case()
parameters = initialize_parameters(n_x,n_h,n_y)
print("W1:"+str(parameters['W1']))
print("b1:"+str(parameters['b1']))
print("W2:"+str(parameters['W2']))
print("b2:"+str(parameters['b2']))
前向传播:
def forward_propagation(X,parameters):
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
Z1 = np.dot(W1,X)+b1
A1 = np.tanh(Z1)
Z2 = np.dot(W2,A1)+b2
A2 = sigmoid(Z2)
assert(A2.shape == (1,X.shape[1]))
cache = {
'Z1':Z1,
'A1':A1,
'Z2':Z2,
'A2':A2
}
return (A2,cache)
print("=========================测试forward_propagation=========================")
X_asses,parameters = forward_propagation_test_case()
A2,cache = forward_propagation(X_asses,parameters)
print(np.mean(cache['Z1']),np.mean(cache['A1']),np.mean(cache['Z2']),np.mean(cache['A2']))
计算代价cost:
def compute_cost(A2,Y):
m = Y.shape[1]
logprobs = np.multiply(Y,np.log(A2))+np.multiply((1-Y),np.log(1-A2))
cost = -np.sum(logprobs)/m
cost = float(np.squeeze(cost))
return cost
print("=========================测试compute_cost=========================")
A2,Y_asses,parameters = compute_cost_test_case()
cost = compute_cost(A2,Y_asses)
print("cost = %f"% cost)
反向传播:
def backward_propagation(parameters,cache,X,Y):
m = X.shape[1]
A1 = cache['A1']
A2 = cache['A2']
W2 = parameters['W2']
W1 = parameters['W1']
dZ2 = A2-Y
dW2 = (1/m)*np.dot(dZ2,A1.T)
db2 = (1/m)*np.sum(dZ2,axis=1,keepdims=True)
dZ1 = np.multiply(np.dot(W2.T,dZ2),1-np.power(A1,2))
dW1 = (1/m)*np.dot(dZ1,X.T)
db1 = (1/m)*np.sum(dZ1,axis=1,keepdims=True)
grads = {
'dW1':dW1,
'dW2':dW2,
'db1':db1,
'db2':db2
}
return grads
print("=========================测试backward_propagation=========================")
parameters,cache,X_asses,Y_asses = backward_propagation_test_case()
grads = backward_propagation(parameters,cache,X_asses,Y_asses)
print("dW1 = " + str(grads['dW1']))
print("db1 = " + str(grads['db1']))
print("dW2 = " + str(grads['dW2']))
print("db2 = " + str(grads['db2']))
更新参数:
def update_parameters(parameters,grads,learning_rate=1.2):
W1,b1 = parameters['W1'],parameters['b1']
W2,b2 = parameters['W2'],parameters['b2']
dW1,db1 = grads['dW1'],grads['db1']
dW2,db2 = grads['dW2'],grads['db2']
W1 = W1 - learning_rate*dW1
b1 = b1 - learning_rate*db1
W2 = W2 - learning_rate*dW2
b2 = b2 - learning_rate*db2
parameters = {
'W1':W1,
'b1':b1,
'W2':W2,
'b2':b2
}
return parameters
print("=========================测试update_parameters=========================")
parameters,grads = update_parameters_test_case()
parameters = update_parameters(parameters,grads)
print(str(parameters))
将以上功能组合成模型nn_model:
def nn_model(X,Y,n_h,num_iterations,print_cost=False):
np.random.seed(3)
n_x,n_h,n_y = layer_size(X,Y,n_h)
parameters = initialize_parameters(n_x,n_h,n_y)
for i in range(num_iterations):
A2,cache = forward_propagation(X,parameters)
cost = compute_cost(A2,Y)
grads = backward_propagation(parameters,cache,X,Y)
parameters = update_parameters(parameters,grads,learning_rate=0.5)
if print_cost:
if i%1000 == 0:
print("第 " ,i,"次循环,成本为:",str(cost))
return parameters
print("=========================测试nn_model=========================")
X_asses,Y_asses = nn_model_test_case()
parameters = nn_model(X_asses,Y_asses,4,num_iterations=10000,print_cost=False)
print(parameters)
预测函数:
def predict(parameters,X):
A2,cache = forward_propagation(X,parameters)
predictions = np.round(A2)
return predictions
print("=========================测试predict=========================")
parameters,X_asses = predict_test_case()
predictions = predict(parameters,X_asses)
print("预测的平均值为 = "+str(np.mean(predictions)))
开始正式预测:
parameters = nn_model(X,Y,4,num_iterations=10000,print_cost=True)
plot_decision_boundary(lambda x:predict(parameters,x.T),X,Y)
plt.show()
predictions = predict(parameters,X)
print("准确率为:",float((np.dot(Y,predictions.T) + np.dot(1-Y,1-predictions.T))/float(Y.shape[1])))
预测结果如图所示:
改变不同的隐藏层大小观察预测结果变化:
plt.figure(figsize=(16,32))
hidden_layer_sizes=[1,2,3,4,5,20,50]
for i,n_h in enumerate(hidden_layer_sizes):
plt.subplot(5,2,i+1)
plt.title("Hidden Layer of size: %d "%n_h)
parameters = nn_model(X,Y,n_h,num_iterations=5000)
plot_decision_boundary(lambda x:predict(parameters,x.T),X,Y)
predictions = predict(parameters,X)
accuracy = float((np.dot(Y,predictions.T)+np.dot(1-Y,1-predictions.T))/float(Y.shape[1])*100)
print("隐藏层的节点数量:",n_h,"准确率:",accuracy)
plt.show()
结果如图所示:
对其他数据集的训练:
noisy_circles, noisy_moons, blobs, gaussian_quantiles, no_structure = load_extra_datasets()
datasets = {"noisy_circles": noisy_circles,
"noisy_moons": noisy_moons,
"blobs": blobs,
"gaussian_quantiles": gaussian_quantiles}
dataset = "noisy_moons"
X, Y = datasets[dataset]
X, Y = X.T, Y.reshape(1, Y.shape[0])
if dataset == "blobs":
Y = Y % 2
plt.scatter(X[0, :], X[1, :], c=Y, s=40, cmap=plt.cm.Spectral)
plt.show()
parameters = nn_model(X,Y,4,num_iterations=5000,print_cost=True)
plot_decision_boundary(lambda x:predict(parameters,x.T),X,Y)
plt.show()
predictions = predict(parameters,X)
accuracy = float((np.dot(Y,predictions.T)+np.dot(1-Y,1-predictions.T))/float(Y.shape[1])*100)
print("准确率为:",accuracy)
结果如图:
下一篇: Flume学习(一)Flume的基础介绍