深度学习之神经网络初实战
程序员文章站
2022-05-09 13:52:43
...
单层与多层神经网络实现
出处:kaggle大佬的教程
链接:link.
个人复习总结用
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
# import warnings
import warnings
# filter warnings
warnings.filterwarnings('ignore')
from subprocess import check_output
# print(check_output(["ls", "../input"]).decode("utf8"))
# Any results you write to the current directory are saved as output.
数据集:链接:https://pan.baidu.com/s/15jq7JF3aO10tEO6_S3XF6Q
提取码:a0n4
#导入数据集
x_1 = np.load('E:/Anaconda/envs/torch160/Python_Test/Data/NLP/deep_learning_npy/X.npy')
Y_1 = np.load('E:/Anaconda/envs/torch160/Python_Test/Data/NLP/deep_learning_npy/Y.npy')
#显示数据
img_size = 64
plt.subplot(1,2,1)
plt.imshow(x_1[260].reshape(img_size,img_size))
plt.axis('off')
plt.subplot(1,2,2)
plt.imshow(x_1[900].reshape(img_size,img_size))
plt.axis('off')
#410 means that we have 410 images (zero and one signs)
#64 means that our image size is 64x64 (64x64 pixels)
X = np.concatenate((x_1[204:409],x_1[822:1027]),axis=0)## from 0 to 204 is zero sign and from 205 to 410 is one sign
z=np.zeros(205)
o=np.ones(205)
Y=np.concatenate((z,o),axis=0).reshape(X.shape[0],1)#Y是0和1各占一半构成的一维矩阵(不是410,),Y的410是标签
print("X shapes:",X.shape)
print("Y shapes:",Y.shape)
把X和Y划分为测试集和训练集,15%的测试集
# Then lets create x_train, y_train, x_test, y_test arrays
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.15,random_state=42)
number_of_train = X_train.shape[0]#train的样本数(即行)
number_of_test = X_test.shape[0]#test的样本数
X是三维的,需要映射为二维,以便用作第一个深度学习模型的输入
标签数组Y已经是2维,不用动
X_train_flatten = X_train.reshape(number_of_train,X_train.shape[1]*X_train.shape[2])
X_test_flatten = X_test.reshape(number_of_test,X_test.shape[1]*X_test.shape[2])
print("X train flatten",X_train_flatten.shape)
print("X train flatten",X_test_flatten.shape)
x_train = X_train_flatten.T#这是转置是为了矩阵乘积,如y = (w.T)x + b
x_test = X_test_flatten.T
y_train = Y_train.T
y_test = Y_test.T
print("x train: ",x_train.shape)
print("x test: ",x_test.shape)
print("y train: ",y_train.shape)
print("y test: ",y_test.shape)
标题数据已经处理好,下面开始进行逻辑回归
定义在神经网络传播中要用到的函数
# short description and example of definition (def)
#初始化参数,权重w和偏差b(这里其实可以优化初试参数)
def dummy(parameter):
dummy_parameter = parameter + 5
return dummy_parameter
result = dummy(3) #result=8
# lets initialize parameters
# So what we need is dimension 4096 that is number of pixels as a parameter for our initialize method(def)
def initialize_weights_and_bias(dimension):
w = np.full((dimension,1),0.01) #np.full(shape, fill_value)可以生成一个元素为fill_value,形状为shape的array
b=0.0
return w,b
#前向propagation
#calculation of z(设计sigmoid函数)
def sigmoid(z):
y_head = 1/(1+np.exp(-z))
return y_head
#RelU函数
def relu(x):
return np.maximum(0,x)
y_head = sigmoid(0)#这里y_head的结果是0.5
# Forward propagation steps:()
# find z = w.T*x+b
# y_head = sigmoid(z)
# loss(error) = loss(y,y_head)
# cost = sum(loss)
#需要用到的函数已经准备好,下面在前向传播中开始调用
def forward_propagation(w,b,x_train,y_train):
z = np.dot(w.T,x_train)+b
y_head = sigmoid(z)#probabilistic 0-1
loss = -y_train*np.log(y_head)-(1-y_train)*np.log(1-y_head)#成本函数是损失函数的总和,每个图像都有损失函数
cost = (np.sum(loss))/x_train.shape[1] # x_train.shape[1] is for scaling(缩放)
return cost
下面进行前向传播和反向传播的结合使用
def forward_backward_propagation(w,b,x_train,y_train):
#前向传播,这里按前面写的步骤进行
z=np.dot(w.T,x_train)+b
y_head = sigmoid(z)
loss = -y_train*np.log(y_head)-(1-y_train)*np.log(1-y_head)
cost = (np.sum(loss))/x_train.shape[1] # x_train.shape[1] is for scaling
#反向传播,获得权重和偏差的梯度,即偏导
derivative_weight = (np.dot(x_train,((y_head-y_train).T)))/x_train.shape[1]#x_train.shape[1] is for scaling
derivative_bias = np.sum(y_head-y_train)/x_train.shape[1]
gradients = {"derivative_weight":derivative_weight,"derivative_bias":derivative_bias}
return cost,gradients
更新学习到的参数
# Updating(learning) parameters
def update(w,b,x_train,y_train,learning_rate,number_of_iteration):
cost_list=[]#搜集每一张图片的成本
cost_list2=[]#每隔10张搜集一次成本,用来画图
index=[]#每隔10张做索引,用来做x轴的坐标
## updating(learning) parameters is number_of_iterarion times
for i in range(number_of_iteration):
#make forward and backward propagation and find cost and gradients
cost,gradients = forward_backward_propagation(w,b,x_train,y_train)
cost_list.append(cost)
#lets update
w = w - learning_rate * gradients['derivative_weight']
b = b - learning_rate * gradients['derivative_bias']
if i%10 == 0:
cost_list2.append(cost)
index.append(i)
print("Cost after iteration %i:%f"%(i,cost))
#we update(learn) parameters weights and bias
parameters = {"weight":w,"bias":b}#更新字典里的权重和偏差
plt.plot(index,cost_list2)
plt.xticks(index,rotation='vertical')#设置x坐标名称的旋转度
plt.xlabel('number of iterarion')
plt.ylabel('cost')
plt.show()
return parameters,gradients,cost_list
#parameters, gradients, cost_list = update(w, b, x_train, y_train, learning_rate = 0.009,number_of_iterarion = 200)
定义函数预测
#prediction
def predict(w,b,x_test):
#x_test is a input for forward propagation
#这里是进行x_test的测试集的预测
z = sigmoid(np.dot(w.T,x_test)+b)#得到输出结果
Y_prediction = np.zeros((1,x_test.shape[1]))#初始化测试集结果的一维矩阵
# if z is bigger than 0.5, our prediction is sign one (y_head=1),
# if z is smaller than 0.5, our prediction is sign zero (y_head=0),
for i in range(z.shape[1]):
if z[0,i]<=0.5:
Y_prediction[0,i]=0
else:
Y_prediction[0,i]=1
return Y_prediction#返回预测结果
# predict(parameters["weight"],parameters["bias"],x_test)
预测函数也有了,下面可以正式进行逻辑回归了
def logistic_regression(x_train,y_train,x_test,y_test,learning_rate,num_iterations):
#initialize
dimension = x_train.shape[0] #that is 4096
w,b = initialize_weights_and_bias(dimension)
#do not change learning rate
parameters,gradients,cost_list = update(w,b,x_train,y_train,learning_rate,num_iterations)
y_prediction_test = predict(parameters['weight'],parameters['bias'],x_test)
y_prediction_train = predict(parameters["weight"],parameters['bias'],x_train)
#print train/test errors
print("train accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_train - y_train)) * 100))
print("test accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_test - y_test)) * 100))
logistic_regression(x_train,y_train,x_test,y_test,learning_rate=0.01,num_iterations=150)
从图上可以看出,随着迭代次数的增加,成本函数在下降,参数被训练的越来越好。
上面的神经网络只有一层,即输入层和输出层(输入层不能算层数)
这上面的代码可以用sklearn库轻松代替,结果是一样的
from sklearn.linear_model import LogisticRegression as clf
logreg = clf(random_state=42,max_iter=150)
print("test accuracy:{}".format(logreg.fit(x_train.T,y_train.T).score(x_test.T,y_test.T)))#这里将训练模型和模型的使用合为一步
print("train accuracy:{}".format(logreg.fit(x_train.T,y_train.T).score(x_train.T,y_train.T)))
下面进行2层神经网络,情况类似,只是多了隐藏层及隐藏层的权重Wi和Bi
#2-Layer Neural Network
# intialize parameters and layer sizes
def initialize_parameters_and_layer_sizes_NN(x_train,y_train):
parameters = {"weight1":np.random.randn(3,x_train.shape[0])*0.1,
"bias1":np.zeros((3,1)),
"weight2":np.random.randn(y_train.shape[0],3)*0.1,
"bias2":np.zeros((y_train.shape[0],1))}
return parameters
#forward propagation
def forward_propagation_NN(x_train,parameters):
Z1 = np.dot(parameters['weight1'],x_train)+parameters['bias1']
A1 = np.tanh(Z1)
Z2 = np.dot(parameters['weight2'],A1)+parameters['bias2']
A2 = sigmoid(Z2)
cache = {"Z1":Z1,
"A1":A1,
"Z2":Z2,
"A2":A2}
return A2,cache
#Loss function and Cost function
#compute cost
def computer_cost_NN(A2,Y,parameters):
logprobs = np.multiply(np.log(A2),Y)
cost = -np.sum(logprobs)/Y.shape[1]
return cost
# Backward Propagation
def backward_propagation_NN(parameters,cache,X,Y):
dZ2 = cache['A2']-Y
dW2 = np.dot(dZ2,cache['A1'].T)/X.shape[1]
db2 = np.sum(dZ2,axis=1,keepdims=True)/X.shape[1]
dZ1 = np.dot(parameters['weight2'].T,dZ2)*(1-np.power(cache['A1'],2))
dW1 = np.dot(dZ1,X.T)/X.shape[1]
db1 = np.sum(dZ1,axis = 1,keepdims=True)/X.shape[1]
grads = {'dweight1':dW1,
"dbias1": db1,
"dweight2": dW2,
"dbias2": db2}
return grads
#Update Parameters
# update parameters
def update_parameters_NN(parameters, grads, learning_rate = 0.01):
parameters = {"weight1": parameters["weight1"]-learning_rate*grads["dweight1"],
"bias1": parameters["bias1"]-learning_rate*grads["dbias1"],
"weight2": parameters["weight2"]-learning_rate*grads["dweight2"],
"bias2": parameters["bias2"]-learning_rate*grads["dbias2"]}
return parameters
#Prediction with learnt parameters weight and bias
# prediction
def predict_NN(parameters,x_test):
# x_test is a input for forward propagation
A2, cache = forward_propagation_NN(x_test,parameters)
Y_prediction = np.zeros((1,x_test.shape[1]))
# if z is bigger than 0.5, our prediction is sign one (y_head=1),
# if z is smaller than 0.5, our prediction is sign zero (y_head=0),
for i in range(A2.shape[1]):
if A2[0,i]<= 0.5:
Y_prediction[0,i] = 0
else:
Y_prediction[0,i] = 1
return Y_prediction
#创建模型
def two_layer_neural_network(x_train,y_train,x_test,y_test,num_iterations):
cost_list = []
index_list = []
#initialize parameters and layer sizes
parameters = initialize_parameters_and_layer_sizes_NN(x_train,y_train)
for i in range(0,num_iterations):
#forward propagation
A2,cache = forward_propagation_NN(x_train,parameters)
#compute cost
cost = computer_cost_NN(A2,y_train,parameters)
#back propagation
grads = backward_propagation_NN(parameters,cache,x_train,y_train)
#upgrade parameters
parameters = update_parameters_NN(parameters,grads)
if i %100==0:
cost_list.append(cost)
index_list.append(i)
print("cost after iteration %i:%f"%(i,cost))
plt.plot(index_list,cost_list)
plt.xticks(index_list,rotation='vertical')
plt.xlabel("Number of Iterarion")
plt.ylabel("Cost")
plt.show()
#做预测
y_prediction_test = predict_NN(parameters,x_test)
y_prediction_train = predict_NN(parameters,x_train)
print("train accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_train - y_train)) * 100))
print("test accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_test - y_test)) * 100))
return parameters
parameters = two_layer_neural_network(x_train, y_train,x_test,y_test, num_iterations=2500)
以下是L层神经网络的实现代码,但是最好在服务器上运行
#L Layer Neural Network
# reshaping
x_train, x_test, y_train, y_test = x_train.T, x_test.T, y_train.T, y_test.T
# Evaluating the ANN
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from keras.models import Sequential # initialize neural network library
from keras.layers import Dense # build our layers library
def build_classifier():
classifier = Sequential() # initialize neural network
classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu', input_dim = x_train.shape[1]))
classifier.add(Dense(units = 4, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
return classifier
classifier = KerasClassifier(build_fn = build_classifier, epochs = 100)
accuracies = cross_val_score(estimator = classifier, X = x_train, y = y_train, cv = 3)
mean = accuracies.mean()
variance = accuracies.std()
print("Accuracy mean: "+ str(mean))
print("Accuracy variance: "+ str(variance))