深度学习tensorflow之softmax(二)手写数字识别底层实现
程序员文章站
2024-03-07 22:29:03
...
数据集
MNISET手写数字数据集
代码
'''
手写数字识别 结构:x(m*784) ->(784*512)隐层(512*10) ->y(m*10)
'''
import tensorflow as tf
import random
from matplotlib import pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
# 设置随机种子,保证每次运行的结果一致
tf.set_random_seed(1)
# 读取数据 直接指定标签独热编码
mnist = input_data.read_data_sets(r'MNIST_data',one_hot=True)
# 占位符
X = tf.placeholder(dtype=tf.float32,shape=[None,784])
Y = tf.placeholder(dtype=tf.float32,shape=[None,10])
# 参数初始化
W1 = tf.Variable(tf.random_normal([784,512]))
b1 = tf.Variable(tf.random_normal([512]))
W2 = tf.Variable(tf.random_normal([512,10]))
b2 = tf.Variable(tf.random_normal([10]))
# 前向计算
z1 = tf.matmul(X,W1) + b1
a1 = tf.sigmoid(z1)
z2 = tf.matmul(a1,W2) + b2
# sotfmax实现多分类
a2 = tf.nn.softmax(z2)
# 计算代价 交叉熵代价函数
cost = -tf.reduce_mean(tf.reduce_sum(Y * tf.log(a2),axis=1),axis=0)
# 反向传播 反向链式求导
dz2 = a2 - Y
dw2 = tf.matmul(tf.transpose(a1),dz2) / tf.cast(tf.shape(X)[0],dtype=tf.float32)
db2 = tf.reduce_mean(tf.reduce_sum(dz2,axis=1))
da1 = tf.matmul(dz2,tf.transpose(W2))
dz1 = da1 * a1 * (1 - a1)
dw1 = tf.matmul(tf.transpose(X),dz1) / tf.cast(tf.shape(X)[0],dtype=tf.float32)
db1 = tf.reduce_mean(tf.reduce_sum(dz1,axis=1))
# 参数更新
learning_rate = 0.1
update = [
tf.assign(W2,W2 - learning_rate * dw2),
tf.assign(W1,W1 - learning_rate * dw1),
tf.assign(b1,b1 - learning_rate * db1),
tf.assign(b2,b2 - learning_rate * db2),
]
# 准确率
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(a2,axis=1),tf.argmax(Y,axis=1)),dtype=tf.float32),axis=0)
# 大批次 6万训练集训练15次
train_times = 15
# 小批量 6万数据每次训练100个
batch_size = 100
# 开启会话
with tf.Session() as sess:
# 初始化所有变量
sess.run(tf.global_variables_initializer())
# 训练模型
# 大批次
for times in range(train_times):
avg_cost = 0
# 大批次下的小批量次数
n_batch = int(mnist.train.num_examples / batch_size)
# 小批量训练
for n in range(n_batch):
# 取出训练集的样本和标签,每次取100个
x_train,y_train = mnist.train.next_batch(batch_size)
c, _ = sess.run([cost,update],feed_dict={X: x_train,Y: y_train})
avg_cost += c / n_batch
print('批次: ',times+1)
print('代价: ',avg_cost)
print('训练结束!')
# 准确率
print(sess.run(accuracy,feed_dict={X: mnist.test.images,Y: mnist.test.labels}))
# 随机预测
r = random.randint(0,mnist.test.num_examples - 1)
print('label: ',sess.run(tf.argmax(mnist.test.labels[r:r+1],axis=1)))
print('prediction: ',sess.run(tf.argmax(a2,1),feed_dict={X: mnist.test.images[r:r+1]}))
# 画图
plt.imshow(
mnist.test.images[r:r+1].reshape(28,28),
cmap="Greys",
interpolation='nearest'
)
plt.show()