欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

深度学习tensorflow之softmax(二)手写数字识别底层实现

程序员文章站 2024-03-07 22:29:03
...

数据集

MNISET手写数字数据集

代码

'''
    手写数字识别 结构:x(m*784) ->(784*512)隐层(512*10) ->y(m*10)
'''
import tensorflow as tf
import random
from matplotlib import pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data

# 设置随机种子,保证每次运行的结果一致
tf.set_random_seed(1)
# 读取数据 直接指定标签独热编码
mnist = input_data.read_data_sets(r'MNIST_data',one_hot=True)

# 占位符
X = tf.placeholder(dtype=tf.float32,shape=[None,784])
Y = tf.placeholder(dtype=tf.float32,shape=[None,10])

# 参数初始化
W1 = tf.Variable(tf.random_normal([784,512]))
b1 = tf.Variable(tf.random_normal([512]))
W2 = tf.Variable(tf.random_normal([512,10]))
b2 = tf.Variable(tf.random_normal([10]))

# 前向计算
z1 = tf.matmul(X,W1) + b1
a1 = tf.sigmoid(z1)

z2 = tf.matmul(a1,W2) + b2
# sotfmax实现多分类
a2 = tf.nn.softmax(z2)

# 计算代价	交叉熵代价函数
cost = -tf.reduce_mean(tf.reduce_sum(Y * tf.log(a2),axis=1),axis=0)

# 反向传播	反向链式求导
dz2 = a2 - Y
dw2 = tf.matmul(tf.transpose(a1),dz2) / tf.cast(tf.shape(X)[0],dtype=tf.float32)
db2 = tf.reduce_mean(tf.reduce_sum(dz2,axis=1))

da1 = tf.matmul(dz2,tf.transpose(W2))
dz1 = da1 * a1 * (1 - a1)

dw1 = tf.matmul(tf.transpose(X),dz1) / tf.cast(tf.shape(X)[0],dtype=tf.float32)
db1 = tf.reduce_mean(tf.reduce_sum(dz1,axis=1))

# 参数更新
learning_rate = 0.1
update = [
    tf.assign(W2,W2 - learning_rate * dw2),
    tf.assign(W1,W1 - learning_rate * dw1),
    tf.assign(b1,b1 - learning_rate * db1),
    tf.assign(b2,b2 - learning_rate * db2),
]

# 准确率
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(a2,axis=1),tf.argmax(Y,axis=1)),dtype=tf.float32),axis=0)

# 大批次		6万训练集训练15次
train_times = 15
# 小批量		6万数据每次训练100个
batch_size = 100

# 开启会话
with tf.Session() as sess:
    # 初始化所有变量
    sess.run(tf.global_variables_initializer())
    # 训练模型
    # 大批次
    for times in range(train_times):
        avg_cost = 0
        # 大批次下的小批量次数
        n_batch = int(mnist.train.num_examples / batch_size)
        # 小批量训练
        for n in range(n_batch):
        	# 取出训练集的样本和标签,每次取100个
            x_train,y_train = mnist.train.next_batch(batch_size)
            c, _ = sess.run([cost,update],feed_dict={X: x_train,Y: y_train})
            avg_cost += c / n_batch
        print('批次: ',times+1)
        print('代价: ',avg_cost)
    print('训练结束!')

    # 准确率
    print(sess.run(accuracy,feed_dict={X: mnist.test.images,Y: mnist.test.labels}))

    # 随机预测
    r = random.randint(0,mnist.test.num_examples - 1)
    print('label: ',sess.run(tf.argmax(mnist.test.labels[r:r+1],axis=1)))
    print('prediction: ',sess.run(tf.argmax(a2,1),feed_dict={X: mnist.test.images[r:r+1]}))

# 画图
plt.imshow(
    mnist.test.images[r:r+1].reshape(28,28),
    cmap="Greys",
    interpolation='nearest'
)
plt.show()


结果

深度学习tensorflow之softmax(二)手写数字识别底层实现

画图

深度学习tensorflow之softmax(二)手写数字识别底层实现