欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

吴恩达作业8:三层神经网络实现手势数字的识别(基于tensorflow)

程序员文章站 2022-05-22 10:35:51
...

数据集的载入,随机产生mini-batch放在tf_utils.py,代码如下

import h5py
import numpy as np
import tensorflow as tf
import math

def load_dataset():
    train_dataset = h5py.File('datasets/train_signs.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

    test_dataset = h5py.File('datasets/test_signs.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes


def random_mini_batches(X, Y, mini_batch_size, seed = 0):
    """
    Creates a list of random minibatches from (X, Y)
    
    Arguments:
    X -- input data, of shape (input size, number of examples)
    Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
    mini_batch_size - size of the mini-batches, integer
    seed -- this is only for the purpose of grading, so that you're "random minibatches are the same as ours.
    
    Returns:
    mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)
    """
    
    m = X.shape[1]                  # number of training examples
    mini_batches = []
    np.random.seed(seed)
    
    # Step 1: Shuffle (X, Y)
    permutation = list(np.random.permutation(m))
    shuffled_X = X[:, permutation]
    shuffled_Y = Y[:, permutation]#.reshape((Y.shape[0],m))

    # Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
    num_complete_minibatches = math.floor(m/mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
    for k in range(0, num_complete_minibatches):
        mini_batch_X = shuffled_X[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
        mini_batch_Y = shuffled_Y[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    # Handling the end case (last mini-batch < mini_batch_size)
    if m % mini_batch_size != 0:
        mini_batch_X = shuffled_X[:, num_complete_minibatches * mini_batch_size : m]
        mini_batch_Y = shuffled_Y[:, num_complete_minibatches * mini_batch_size : m]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    return mini_batches

def convert_to_one_hot(Y, C):
    ##Y.reshape(-1) 变成一行
    Y = np.eye(C)[Y.reshape(-1)].T
    return Y


def predict(X, parameters):
    
    W1 = tf.convert_to_tensor(parameters["W1"])
    b1 = tf.convert_to_tensor(parameters["b1"])
    W2 = tf.convert_to_tensor(parameters["W2"])
    b2 = tf.convert_to_tensor(parameters["b2"])
    W3 = tf.convert_to_tensor(parameters["W3"])
    b3 = tf.convert_to_tensor(parameters["b3"])
    
    params = {"W1": W1,
              "b1": b1,
              "W2": W2,
              "b2": b2,
              "W3": W3,
              "b3": b3}
    
    x = tf.placeholder("float", [12288, 1])
    
    z3 = forward_propagation_for_predict(x, params)
    p = tf.argmax(z3)
    
    sess = tf.Session()
    prediction = sess.run(p, feed_dict = {x: X})
        
    return prediction

def forward_propagation_for_predict(X, parameters):
    """
    Implements the forward propagation for the model: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SOFTMAX
    
    Arguments:
    X -- input dataset placeholder, of shape (input size, number of examples)
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3"
                  the shapes are given in initialize_parameters

    Returns:
    Z3 -- the output of the last LINEAR unit
    """
    
    # Retrieve the parameters from the dictionary "parameters" 
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3'] 
                                                           # Numpy Equivalents:
    Z1 = tf.add(tf.matmul(W1, X), b1)                      # Z1 = np.dot(W1, X) + b1
    A1 = tf.nn.relu(Z1)                                    # A1 = relu(Z1)
    Z2 = tf.add(tf.matmul(W2, A1), b2)                     # Z2 = np.dot(W2, a1) + b2
    A2 = tf.nn.relu(Z2)                                    # A2 = relu(Z2)
    Z3 = tf.add(tf.matmul(W3, A2), b3)                     # Z3 = np.dot(W3,Z2) + b3
    
    return Z3
    

首先看数据集:

import tf_utils
import cv2
train_set_x_orig, train_set_Y, test_set_x_orig, test_set_Y, classes = tf_utils.load_dataset()
print('训练样本={}'.format(train_set_x_orig.shape))
print('训练样本标签={}'.format(train_set_Y.shape))
print('测试样本={}'.format(test_set_x_orig.shape))
print('测试样本标签={}'.format(test_set_Y.shape))
print('第五个样本={}'.format(train_set_Y[0,5]))
cv2.imshow('1.jpg',train_set_x_orig[5,:,:,:])
cv2.waitKey()

打印结果:可看出1080个训练样本,size为(64,64,3),其中手势数字用相应的数字代表,故后面要处理成one-hot(samples,6)

吴恩达作业8:三层神经网络实现手势数字的识别(基于tensorflow)

利用三层神经网络,W1=(25,64*64*3),W2=(12,25),W1=(6,12),输入X=(64*64*3,samples),最终y_pred=(6,samples),做一个转置与给定的真实y做损失,代码如下:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import tf_utils
import cv2
"""
创建 placeholder
"""
def create_placeholder(n_x,n_y):
    X=tf.placeholder(tf.float32,shape=[n_x,None],name='X')
    Y = tf.placeholder(tf.float32, shape=[n_y, None], name='Y')
    return X,Y
"""
初始化权重
"""
def initialize_parameters():
    tf.set_random_seed(1)

    W1=tf.get_variable(name='W1',shape=[25,12288],dtype=tf.float32,
                       initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b1 = tf.get_variable(name='b1', shape=[25, 1], dtype=tf.float32,
                         initializer=tf.zeros_initializer())
    W2 = tf.get_variable(name='W2', shape=[12, 25], dtype=tf.float32,
                         initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b2 = tf.get_variable(name='b2', shape=[12, 1], dtype=tf.float32,
                         initializer=tf.zeros_initializer())
    W3 = tf.get_variable(name='W3', shape=[6, 12], dtype=tf.float32,
                         initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b3 = tf.get_variable(name='b3', shape=[6, 1], dtype=tf.float32,
                         initializer=tf.zeros_initializer())
    parameters={'W1': W1,
                'b1': b1,
                'W2': W2,
                'b2': b2,
                'W3': W3,
                'b3': b3}
    return parameters
"""
one-hot编码
"""
def convert_one_hot(Y,C):
    one_hot=np.eye(C)[Y.reshape(-1)].T
    return one_hot
"""
前向传播
"""
def forward_propagation(X,parameters):
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']
    Z1=tf.add(tf.matmul(W1,X),b1)
    A1=tf.nn.relu(Z1)
    Z2 = tf.add(tf.matmul(W2, A1) , b2)
    A2 = tf.nn.relu(Z2)
    Z3 = tf.add(tf.matmul(W3, A2) , b3)
    return Z3
"""
计算损失值
"""
def compute_cost(Z3,Y):
    Z_input=tf.transpose(Z3) ##转置
    Y = tf.transpose(Y)  ####tf.nn.softmax_cross_entropy_w  要求shape是(number of examples,num_class)
    cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=Z_input,labels=Y))
    return cost
"""
构建模型
"""
def model(train_X,train_Y,test_X,test_Y,learning_rate,num_pochs,minibatch_size):
    tf.set_random_seed(1)
    seed=3
    (n_x,m)=train_X.shape #(12288,1080)
    costs=[]
    n_y=train_Y.shape[0] #(6,1080)
    X, Y = create_placeholder(n_x, n_y)
    parameters = initialize_parameters()
    Z3 = forward_propagation(X, parameters)
    #print(Z3)
    cost = compute_cost(Z3, Y)
    optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    for i in range(num_pochs):
        epoch_cost=0
        mini_batches=tf_utils.random_mini_batches(train_X,train_Y,minibatch_size,seed)
        num_minibatches=int(m/minibatch_size)
        for mini_batche in mini_batches:
            (mini_batche_X,mini_batche_Y)=mini_batche
            _,temp_cost=sess.run([optimizer,cost],feed_dict={X:mini_batche_X,Y:mini_batche_Y})
            epoch_cost += temp_cost / num_minibatches
        if i%100==0:
            #cost=sess.run(cost,feed_dict={X:mini_batche_X,Y:mini_batche_Y})
            print('after {} iterations minibatch_cost={}'.format(i,epoch_cost))
            costs.append(epoch_cost)
    plt.plot(costs)
    plt.xlabel('iterations')
    plt.ylabel('cost')
    plt.title('learning_rate={}'.format(learning_rate))
    plt.show()
    parameters=sess.run(parameters)
    #print('parameters={}'.format(parameters))
    correct_prediction=tf.equal(tf.argmax(Z3,0),tf.argmax(Y,0))##0 代表按列取索引最大值 1代表行索引最大值
    accuarcy=tf.reduce_mean(tf.cast(correct_prediction,'float'))
    print('train accuarcy is',sess.run(accuarcy,feed_dict={X: train_X,Y: train_Y}))
    print('test accuarcy is ',sess.run(accuarcy,feed_dict={X: test_X, Y: test_Y}))
    return parameters
"""
测试模型
"""
def test_model():
    train_set_x_orig, train_set_Y, test_set_x_orig, test_set_Y, classes = tf_utils.load_dataset()
    train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0],
                                                   train_set_x_orig.shape[1] * train_set_x_orig.shape[2] * 3).T
    test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0],
                                                 test_set_x_orig.shape[1] * test_set_x_orig.shape[2] * 3).T
    train_X = train_set_x_flatten / 255  #(12288,1080)
    test_X = test_set_x_flatten / 255
    train_Y = convert_one_hot(train_set_Y,6)#(6,1080)
    #print('train_y',train_Y.shape)
    test_Y = convert_one_hot(test_set_Y, 6)
    parameters=model(train_X, train_Y, test_X, test_Y, learning_rate=0.0001, num_pochs=1000, minibatch_size=32)

    img = cv2.imread('thumbs_up.jpg')
    imgsize = cv2.resize(img, (64, 64), interpolation=cv2.INTER_CUBIC).reshape(1,64*64*3).T
    cv2.imshow('imgsize', imgsize)
    image_predict=tf_utils.predict(imgsize,parameters)
    print(image_predict)
if __name__ == '__main__':
    test_model()

打印结果:

吴恩达作业8:三层神经网络实现手势数字的识别(基于tensorflow)

吴恩达作业8:三层神经网络实现手势数字的识别(基于tensorflow)

下图的预测结果是1  符合

 

吴恩达作业8:三层神经网络实现手势数字的识别(基于tensorflow)