吴恩达作业8：三层神经网络实现手势数字的识别（基于tensorflow）

程序员文章站 2022-05-22 10:35:51

...

数据集的载入，随机产生mini-batch放在tf_utils.py,代码如下

import h5py
import numpy as np
import tensorflow as tf
import math

def load_dataset():
    train_dataset = h5py.File('datasets/train_signs.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

    test_dataset = h5py.File('datasets/test_signs.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes


def random_mini_batches(X, Y, mini_batch_size, seed = 0):
    """
    Creates a list of random minibatches from (X, Y)
    
    Arguments:
    X -- input data, of shape (input size, number of examples)
    Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
    mini_batch_size - size of the mini-batches, integer
    seed -- this is only for the purpose of grading, so that you're "random minibatches are the same as ours.
    
    Returns:
    mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)
    """
    
    m = X.shape[1]                  # number of training examples
    mini_batches = []
    np.random.seed(seed)
    
    # Step 1: Shuffle (X, Y)
    permutation = list(np.random.permutation(m))
    shuffled_X = X[:, permutation]
    shuffled_Y = Y[:, permutation]#.reshape((Y.shape[0],m))

    # Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
    num_complete_minibatches = math.floor(m/mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
    for k in range(0, num_complete_minibatches):
        mini_batch_X = shuffled_X[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
        mini_batch_Y = shuffled_Y[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    # Handling the end case (last mini-batch < mini_batch_size)
    if m % mini_batch_size != 0:
        mini_batch_X = shuffled_X[:, num_complete_minibatches * mini_batch_size : m]
        mini_batch_Y = shuffled_Y[:, num_complete_minibatches * mini_batch_size : m]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    return mini_batches

def convert_to_one_hot(Y, C):
    ##Y.reshape(-1) 变成一行
    Y = np.eye(C)[Y.reshape(-1)].T
    return Y


def predict(X, parameters):
    
    W1 = tf.convert_to_tensor(parameters["W1"])
    b1 = tf.convert_to_tensor(parameters["b1"])
    W2 = tf.convert_to_tensor(parameters["W2"])
    b2 = tf.convert_to_tensor(parameters["b2"])
    W3 = tf.convert_to_tensor(parameters["W3"])
    b3 = tf.convert_to_tensor(parameters["b3"])
    
    params = {"W1": W1,
              "b1": b1,
              "W2": W2,
              "b2": b2,
              "W3": W3,
              "b3": b3}
    
    x = tf.placeholder("float", [12288, 1])
    
    z3 = forward_propagation_for_predict(x, params)
    p = tf.argmax(z3)
    
    sess = tf.Session()
    prediction = sess.run(p, feed_dict = {x: X})
        
    return prediction

def forward_propagation_for_predict(X, parameters):
    """
    Implements the forward propagation for the model: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SOFTMAX
    
    Arguments:
    X -- input dataset placeholder, of shape (input size, number of examples)
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3"
                  the shapes are given in initialize_parameters

    Returns:
    Z3 -- the output of the last LINEAR unit
    """
    
    # Retrieve the parameters from the dictionary "parameters" 
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3'] 
                                                           # Numpy Equivalents:
    Z1 = tf.add(tf.matmul(W1, X), b1)                      # Z1 = np.dot(W1, X) + b1
    A1 = tf.nn.relu(Z1)                                    # A1 = relu(Z1)
    Z2 = tf.add(tf.matmul(W2, A1), b2)                     # Z2 = np.dot(W2, a1) + b2
    A2 = tf.nn.relu(Z2)                                    # A2 = relu(Z2)
    Z3 = tf.add(tf.matmul(W3, A2), b3)                     # Z3 = np.dot(W3,Z2) + b3
    
    return Z3

首先看数据集：

import tf_utils
import cv2
train_set_x_orig, train_set_Y, test_set_x_orig, test_set_Y, classes = tf_utils.load_dataset()
print('训练样本={}'.format(train_set_x_orig.shape))
print('训练样本标签={}'.format(train_set_Y.shape))
print('测试样本={}'.format(test_set_x_orig.shape))
print('测试样本标签={}'.format(test_set_Y.shape))
print('第五个样本={}'.format(train_set_Y[0,5]))
cv2.imshow('1.jpg',train_set_x_orig[5,:,:,:])
cv2.waitKey()

打印结果：可看出1080个训练样本，size为（64，64，3），其中手势数字用相应的数字代表，故后面要处理成one-hot（samples，6）

吴恩达作业8：三层神经网络实现手势数字的识别（基于tensorflow）

利用三层神经网络，W1=(25,64*64*3),W2=(12,25),W1=(6,12),输入X=(64*64*3,samples),最终y_pred=(6,samples),做一个转置与给定的真实y做损失，代码如下：

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import tf_utils
import cv2
"""
创建 placeholder
"""
def create_placeholder(n_x,n_y):
    X=tf.placeholder(tf.float32,shape=[n_x,None],name='X')
    Y = tf.placeholder(tf.float32, shape=[n_y, None], name='Y')
    return X,Y
"""
初始化权重
"""
def initialize_parameters():
    tf.set_random_seed(1)

    W1=tf.get_variable(name='W1',shape=[25,12288],dtype=tf.float32,
                       initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b1 = tf.get_variable(name='b1', shape=[25, 1], dtype=tf.float32,
                         initializer=tf.zeros_initializer())
    W2 = tf.get_variable(name='W2', shape=[12, 25], dtype=tf.float32,
                         initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b2 = tf.get_variable(name='b2', shape=[12, 1], dtype=tf.float32,
                         initializer=tf.zeros_initializer())
    W3 = tf.get_variable(name='W3', shape=[6, 12], dtype=tf.float32,
                         initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b3 = tf.get_variable(name='b3', shape=[6, 1], dtype=tf.float32,
                         initializer=tf.zeros_initializer())
    parameters={'W1': W1,
                'b1': b1,
                'W2': W2,
                'b2': b2,
                'W3': W3,
                'b3': b3}
    return parameters
"""
one-hot编码
"""
def convert_one_hot(Y,C):
    one_hot=np.eye(C)[Y.reshape(-1)].T
    return one_hot
"""
前向传播
"""
def forward_propagation(X,parameters):
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']
    Z1=tf.add(tf.matmul(W1,X),b1)
    A1=tf.nn.relu(Z1)
    Z2 = tf.add(tf.matmul(W2, A1) , b2)
    A2 = tf.nn.relu(Z2)
    Z3 = tf.add(tf.matmul(W3, A2) , b3)
    return Z3
"""
计算损失值
"""
def compute_cost(Z3,Y):
    Z_input=tf.transpose(Z3) ##转置
    Y = tf.transpose(Y)  ####tf.nn.softmax_cross_entropy_w  要求shape是（number of examples,num_class）
    cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=Z_input,labels=Y))
    return cost
"""
构建模型
"""
def model(train_X,train_Y,test_X,test_Y,learning_rate,num_pochs,minibatch_size):
    tf.set_random_seed(1)
    seed=3
    (n_x,m)=train_X.shape #(12288,1080)
    costs=[]
    n_y=train_Y.shape[0] #(6,1080)
    X, Y = create_placeholder(n_x, n_y)
    parameters = initialize_parameters()
    Z3 = forward_propagation(X, parameters)
    #print(Z3)
    cost = compute_cost(Z3, Y)
    optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    for i in range(num_pochs):
        epoch_cost=0
        mini_batches=tf_utils.random_mini_batches(train_X,train_Y,minibatch_size,seed)
        num_minibatches=int(m/minibatch_size)
        for mini_batche in mini_batches:
            (mini_batche_X,mini_batche_Y)=mini_batche
            _,temp_cost=sess.run([optimizer,cost],feed_dict={X:mini_batche_X,Y:mini_batche_Y})
            epoch_cost += temp_cost / num_minibatches
        if i%100==0:
            #cost=sess.run(cost,feed_dict={X:mini_batche_X,Y:mini_batche_Y})
            print('after {} iterations minibatch_cost={}'.format(i,epoch_cost))
            costs.append(epoch_cost)
    plt.plot(costs)
    plt.xlabel('iterations')
    plt.ylabel('cost')
    plt.title('learning_rate={}'.format(learning_rate))
    plt.show()
    parameters=sess.run(parameters)
    #print('parameters={}'.format(parameters))
    correct_prediction=tf.equal(tf.argmax(Z3,0),tf.argmax(Y,0))##0 代表按列取索引最大值 1代表行索引最大值
    accuarcy=tf.reduce_mean(tf.cast(correct_prediction,'float'))
    print('train accuarcy is',sess.run(accuarcy,feed_dict={X: train_X,Y: train_Y}))
    print('test accuarcy is ',sess.run(accuarcy,feed_dict={X: test_X, Y: test_Y}))
    return parameters
"""
测试模型
"""
def test_model():
    train_set_x_orig, train_set_Y, test_set_x_orig, test_set_Y, classes = tf_utils.load_dataset()
    train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0],
                                                   train_set_x_orig.shape[1] * train_set_x_orig.shape[2] * 3).T
    test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0],
                                                 test_set_x_orig.shape[1] * test_set_x_orig.shape[2] * 3).T
    train_X = train_set_x_flatten / 255  #(12288,1080)
    test_X = test_set_x_flatten / 255
    train_Y = convert_one_hot(train_set_Y,6)#(6,1080)
    #print('train_y',train_Y.shape)
    test_Y = convert_one_hot(test_set_Y, 6)
    parameters=model(train_X, train_Y, test_X, test_Y, learning_rate=0.0001, num_pochs=1000, minibatch_size=32)

    img = cv2.imread('thumbs_up.jpg')
    imgsize = cv2.resize(img, (64, 64), interpolation=cv2.INTER_CUBIC).reshape(1,64*64*3).T
    cv2.imshow('imgsize', imgsize)
    image_predict=tf_utils.predict(imgsize,parameters)
    print(image_predict)
if __name__ == '__main__':
    test_model()

打印结果：

吴恩达作业8：三层神经网络实现手势数字的识别（基于tensorflow）

下图的预测结果是1 符合

吴恩达作业8：三层神经网络实现手势数字的识别（基于tensorflow）

上一篇： python用百度云接口实现通用文字识别

下一篇： Android手写签名