欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

深度学习之神经网络初实战

程序员文章站 2022-05-09 13:52:43
...

单层与多层神经网络实现

出处:kaggle大佬的教程
链接:link.
个人复习总结用

# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
# import warnings
import warnings
# filter warnings
warnings.filterwarnings('ignore')
from subprocess import check_output
# print(check_output(["ls", "../input"]).decode("utf8"))
# Any results you write to the current directory are saved as output.

数据集:链接:https://pan.baidu.com/s/15jq7JF3aO10tEO6_S3XF6Q
提取码:a0n4

#导入数据集
x_1 = np.load('E:/Anaconda/envs/torch160/Python_Test/Data/NLP/deep_learning_npy/X.npy')
Y_1 = np.load('E:/Anaconda/envs/torch160/Python_Test/Data/NLP/deep_learning_npy/Y.npy')
#显示数据
img_size = 64
plt.subplot(1,2,1)
plt.imshow(x_1[260].reshape(img_size,img_size))
plt.axis('off')
plt.subplot(1,2,2)
plt.imshow(x_1[900].reshape(img_size,img_size))
plt.axis('off')

深度学习之神经网络初实战

#410 means that we have 410 images (zero and one signs)
#64 means that our image size is 64x64 (64x64 pixels)

X = np.concatenate((x_1[204:409],x_1[822:1027]),axis=0)## from 0 to 204 is zero sign and from 205 to 410 is one sign 
z=np.zeros(205)
o=np.ones(205)
Y=np.concatenate((z,o),axis=0).reshape(X.shape[0],1)#Y是0和1各占一半构成的一维矩阵(不是410,),Y的410是标签
print("X shapes:",X.shape)
print("Y shapes:",Y.shape)

深度学习之神经网络初实战

把X和Y划分为测试集和训练集,15%的测试集

# Then lets create x_train, y_train, x_test, y_test arrays
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.15,random_state=42)
number_of_train = X_train.shape[0]#train的样本数(即行)
number_of_test = X_test.shape[0]#test的样本数

X是三维的,需要映射为二维,以便用作第一个深度学习模型的输入
标签数组Y已经是2维,不用动

X_train_flatten = X_train.reshape(number_of_train,X_train.shape[1]*X_train.shape[2])
X_test_flatten = X_test.reshape(number_of_test,X_test.shape[1]*X_test.shape[2])
print("X train flatten",X_train_flatten.shape)
print("X train flatten",X_test_flatten.shape)
x_train = X_train_flatten.T#这是转置是为了矩阵乘积,如y = (w.T)x + b
x_test = X_test_flatten.T
y_train = Y_train.T
y_test = Y_test.T
print("x train: ",x_train.shape)
print("x test: ",x_test.shape)
print("y train: ",y_train.shape)
print("y test: ",y_test.shape)

深度学习之神经网络初实战

标题数据已经处理好,下面开始进行逻辑回归

定义在神经网络传播中要用到的函数

# short description and example of definition (def)
#初始化参数,权重w和偏差b(这里其实可以优化初试参数)
def dummy(parameter):
    dummy_parameter = parameter + 5
    return dummy_parameter
result = dummy(3) #result=8
# lets initialize parameters
# So what we need is dimension 4096 that is number of pixels as a parameter for our initialize method(def)
def initialize_weights_and_bias(dimension):
    w = np.full((dimension,1),0.01)         #np.full(shape, fill_value)可以生成一个元素为fill_value,形状为shape的array
    b=0.0
    return w,b
#前向propagation
#calculation of z(设计sigmoid函数)
def sigmoid(z):
    y_head = 1/(1+np.exp(-z))
    return y_head
#RelU函数
def relu(x):
    return np.maximum(0,x)
y_head = sigmoid(0)#这里y_head的结果是0.5
# Forward propagation steps:()
# find z = w.T*x+b
# y_head = sigmoid(z)
# loss(error) = loss(y,y_head)
# cost = sum(loss)
#需要用到的函数已经准备好,下面在前向传播中开始调用
def forward_propagation(w,b,x_train,y_train):
    z = np.dot(w.T,x_train)+b
    y_head = sigmoid(z)#probabilistic 0-1
    loss = -y_train*np.log(y_head)-(1-y_train)*np.log(1-y_head)#成本函数是损失函数的总和,每个图像都有损失函数
    cost = (np.sum(loss))/x_train.shape[1]  # x_train.shape[1]  is for scaling(缩放)
    return cost

下面进行前向传播和反向传播的结合使用

def forward_backward_propagation(w,b,x_train,y_train):
    #前向传播,这里按前面写的步骤进行
    z=np.dot(w.T,x_train)+b
    y_head = sigmoid(z)
    loss = -y_train*np.log(y_head)-(1-y_train)*np.log(1-y_head)
    cost = (np.sum(loss))/x_train.shape[1]      # x_train.shape[1]  is for scaling
    #反向传播,获得权重和偏差的梯度,即偏导
    derivative_weight = (np.dot(x_train,((y_head-y_train).T)))/x_train.shape[1]#x_train.shape[1] is for scaling
    derivative_bias = np.sum(y_head-y_train)/x_train.shape[1]
    gradients = {"derivative_weight":derivative_weight,"derivative_bias":derivative_bias}
    return cost,gradients

更新学习到的参数

# Updating(learning) parameters
def update(w,b,x_train,y_train,learning_rate,number_of_iteration):
    cost_list=[]#搜集每一张图片的成本
    cost_list2=[]#每隔10张搜集一次成本,用来画图
    index=[]#每隔10张做索引,用来做x轴的坐标
    ## updating(learning) parameters is number_of_iterarion times
    for i in range(number_of_iteration):
        #make forward and backward propagation and find cost and gradients
        cost,gradients = forward_backward_propagation(w,b,x_train,y_train)
        cost_list.append(cost)
        #lets update
        w = w - learning_rate * gradients['derivative_weight']
        b = b - learning_rate * gradients['derivative_bias']
        if i%10 == 0:
            cost_list2.append(cost)
            index.append(i)
            print("Cost after iteration %i:%f"%(i,cost))
    #we update(learn) parameters weights and bias
    parameters = {"weight":w,"bias":b}#更新字典里的权重和偏差
    plt.plot(index,cost_list2)
    plt.xticks(index,rotation='vertical')#设置x坐标名称的旋转度
    plt.xlabel('number of iterarion')
    plt.ylabel('cost')
    plt.show()
    return parameters,gradients,cost_list
#parameters, gradients, cost_list = update(w, b, x_train, y_train, learning_rate = 0.009,number_of_iterarion = 200)

定义函数预测

#prediction
def predict(w,b,x_test):
    #x_test is a input for forward propagation
    #这里是进行x_test的测试集的预测
    z = sigmoid(np.dot(w.T,x_test)+b)#得到输出结果
    Y_prediction = np.zeros((1,x_test.shape[1]))#初始化测试集结果的一维矩阵
    # if z is bigger than 0.5, our prediction is sign one (y_head=1),
    # if z is smaller than 0.5, our prediction is sign zero (y_head=0),
    for i in range(z.shape[1]):
        if z[0,i]<=0.5:
            Y_prediction[0,i]=0
        else:
            Y_prediction[0,i]=1
    return Y_prediction#返回预测结果
# predict(parameters["weight"],parameters["bias"],x_test)

预测函数也有了,下面可以正式进行逻辑回归了

def logistic_regression(x_train,y_train,x_test,y_test,learning_rate,num_iterations):
    #initialize
    dimension = x_train.shape[0] #that is 4096
    w,b = initialize_weights_and_bias(dimension)
    #do not change learning rate
    parameters,gradients,cost_list = update(w,b,x_train,y_train,learning_rate,num_iterations)
    y_prediction_test = predict(parameters['weight'],parameters['bias'],x_test)
    y_prediction_train = predict(parameters["weight"],parameters['bias'],x_train)
    
    #print train/test errors
    print("train accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_train - y_train)) * 100))
    print("test accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_test - y_test)) * 100))
logistic_regression(x_train,y_train,x_test,y_test,learning_rate=0.01,num_iterations=150)

深度学习之神经网络初实战

从图上可以看出,随着迭代次数的增加,成本函数在下降,参数被训练的越来越好。

上面的神经网络只有一层,即输入层和输出层(输入层不能算层数)

这上面的代码可以用sklearn库轻松代替,结果是一样的

from sklearn.linear_model import LogisticRegression as clf
logreg = clf(random_state=42,max_iter=150)
print("test accuracy:{}".format(logreg.fit(x_train.T,y_train.T).score(x_test.T,y_test.T)))#这里将训练模型和模型的使用合为一步
print("train accuracy:{}".format(logreg.fit(x_train.T,y_train.T).score(x_train.T,y_train.T)))

深度学习之神经网络初实战

下面进行2层神经网络,情况类似,只是多了隐藏层及隐藏层的权重Wi和Bi

#2-Layer Neural Network
# intialize parameters and layer sizes
def initialize_parameters_and_layer_sizes_NN(x_train,y_train):
    parameters = {"weight1":np.random.randn(3,x_train.shape[0])*0.1,
                 "bias1":np.zeros((3,1)),
                 "weight2":np.random.randn(y_train.shape[0],3)*0.1,
                  "bias2":np.zeros((y_train.shape[0],1))}
    return parameters

#forward propagation
def forward_propagation_NN(x_train,parameters):
    Z1 = np.dot(parameters['weight1'],x_train)+parameters['bias1']
    A1 = np.tanh(Z1)
    Z2 = np.dot(parameters['weight2'],A1)+parameters['bias2']
    A2 = sigmoid(Z2)
    cache = {"Z1":Z1,
            "A1":A1,
            "Z2":Z2,
            "A2":A2}
    return A2,cache
#Loss function and Cost function
#compute cost
def computer_cost_NN(A2,Y,parameters):
    logprobs = np.multiply(np.log(A2),Y)
    cost = -np.sum(logprobs)/Y.shape[1]
    return cost
# Backward Propagation
def backward_propagation_NN(parameters,cache,X,Y):
    dZ2 = cache['A2']-Y
    dW2 = np.dot(dZ2,cache['A1'].T)/X.shape[1]
    db2 = np.sum(dZ2,axis=1,keepdims=True)/X.shape[1]
    dZ1 = np.dot(parameters['weight2'].T,dZ2)*(1-np.power(cache['A1'],2))
    dW1 = np.dot(dZ1,X.T)/X.shape[1]
    db1 = np.sum(dZ1,axis = 1,keepdims=True)/X.shape[1]
    grads = {'dweight1':dW1,
             "dbias1": db1,
             "dweight2": dW2,
             "dbias2": db2}
    return grads
#Update Parameters
# update parameters
def update_parameters_NN(parameters, grads, learning_rate = 0.01):
    parameters = {"weight1": parameters["weight1"]-learning_rate*grads["dweight1"],
                  "bias1": parameters["bias1"]-learning_rate*grads["dbias1"],
                  "weight2": parameters["weight2"]-learning_rate*grads["dweight2"],
                  "bias2": parameters["bias2"]-learning_rate*grads["dbias2"]}
    
    return parameters
#Prediction with learnt parameters weight and bias
# prediction
def predict_NN(parameters,x_test):
    # x_test is a input for forward propagation
    A2, cache = forward_propagation_NN(x_test,parameters)
    Y_prediction = np.zeros((1,x_test.shape[1]))
    # if z is bigger than 0.5, our prediction is sign one (y_head=1),
    # if z is smaller than 0.5, our prediction is sign zero (y_head=0),
    for i in range(A2.shape[1]):
        if A2[0,i]<= 0.5:
            Y_prediction[0,i] = 0
        else:
            Y_prediction[0,i] = 1

    return Y_prediction
#创建模型
def two_layer_neural_network(x_train,y_train,x_test,y_test,num_iterations):
    cost_list = []
    index_list = []
    #initialize parameters and layer sizes
    parameters = initialize_parameters_and_layer_sizes_NN(x_train,y_train)
    
    for i in range(0,num_iterations):
        #forward propagation
        A2,cache = forward_propagation_NN(x_train,parameters)
        #compute cost
        cost = computer_cost_NN(A2,y_train,parameters)
        #back propagation
        grads = backward_propagation_NN(parameters,cache,x_train,y_train)
        #upgrade parameters
        parameters = update_parameters_NN(parameters,grads)
        if i %100==0:
            cost_list.append(cost)
            index_list.append(i)
            print("cost after iteration %i:%f"%(i,cost))
    plt.plot(index_list,cost_list)
    plt.xticks(index_list,rotation='vertical')
    plt.xlabel("Number of Iterarion")
    plt.ylabel("Cost")
    plt.show()
    
    #做预测
    y_prediction_test = predict_NN(parameters,x_test)
    y_prediction_train = predict_NN(parameters,x_train)
    print("train accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_train - y_train)) * 100))
    print("test accuracy: {} %".format(100 - np.mean(np.abs(y_prediction_test - y_test)) * 100))
    return parameters

parameters = two_layer_neural_network(x_train, y_train,x_test,y_test, num_iterations=2500)

深度学习之神经网络初实战

以下是L层神经网络的实现代码,但是最好在服务器上运行

#L Layer Neural Network
# reshaping
x_train, x_test, y_train, y_test = x_train.T, x_test.T, y_train.T, y_test.T
# Evaluating the ANN
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from keras.models import Sequential # initialize neural network library
from keras.layers import Dense # build our layers library
def build_classifier():
    classifier = Sequential() # initialize neural network
    classifier.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu', input_dim = x_train.shape[1]))
    classifier.add(Dense(units = 4, kernel_initializer = 'uniform', activation = 'relu'))
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
    classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
    return classifier
classifier = KerasClassifier(build_fn = build_classifier, epochs = 100)
accuracies = cross_val_score(estimator = classifier, X = x_train, y = y_train, cv = 3)
mean = accuracies.mean()
variance = accuracies.std()
print("Accuracy mean: "+ str(mean))
print("Accuracy variance: "+ str(variance))