[深度学习-实践]BP神经网络的Helloworld(手写体识别和Fashion_mnist)
程序员文章站
2024-03-14 12:15:40
...
前言
原理部分请看这里 [深度学习-原理]BP神经网络
Tensorflow2 实现一个简单的识别衣服的例子
数据集Fashion_mnist,
此数据集包含10类型的衣服
(‘T-shirt/top’, ‘Trouser’, ‘Pullover’, ‘Dress’, ‘Coat’, ‘Sandal’, ‘Shirt’, ‘Sneaker’, ‘Bag’, ‘Ankle boot’)
训练数据是60000条,大小是28x28
测试数据是10000条, 大小是28x28
看看训练数据的前25张图片吧
import tensorflow as tf
import tensorflow.keras as keras
# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
import os
def show_DataSet():
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
print(train_images.shape)
print(len(train_labels))
print(test_images.shape)
print(len(test_labels))
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
plt.figure(figsize=(10, 10))
for i in range(25):
plt.subplot(5, 5, i + 1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(train_images[i], cmap=plt.cm.binary)
plt.xlabel(class_names[train_labels[i]])
plt.show()
if __name__ == '__main__':
show_DataSet()
执行结果
训练代码如下
是用了俩层全连接的
# TensorFlow and tf.keras
import tensorflow as tf
import tensorflow.keras as keras
# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
import os
def fit_dataSet():
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
model = keras.Sequential([
keras.layers.Flatten(input_shape=(28, 28)),
keras.layers.Dense(128, activation='relu'),
keras.layers.Dense(10)
])
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
model.fit(train_images, train_labels, epochs=10)
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
print('\nTest accuracy:', test_acc)
probability_model = tf.keras.Sequential([model,
tf.keras.layers.Softmax()])
predictions = probability_model.predict(test_images)
print(predictions[0])
print(np.argmax(predictions[0]))
print(test_labels[0])
plot_image(0, predictions[0], test_labels, test_images)
plot_value_array(0, predictions[0], test_labels)
def plot_image(i, predictions_array, true_label, img):
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
predictions_array, true_label, img = predictions_array, true_label[i], img[i]
plt.grid(False)
plt.xticks([])
plt.yticks([])
plt.imshow(img, cmap=plt.cm.binary)
predicted_label = np.argmax(predictions_array)
if predicted_label == true_label:
color = 'blue'
else:
color = 'red'
plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
100*np.max(predictions_array),
class_names[true_label]),
color=color)
plt.show()
def plot_value_array(i, predictions_array, true_label):
predictions_array, true_label = predictions_array, true_label[i]
plt.grid(False)
plt.xticks(range(10))
plt.yticks([])
thisplot = plt.bar(range(10), predictions_array, color="#777777")
plt.ylim([0, 1])
predicted_label = np.argmax(predictions_array)
thisplot[predicted_label].set_color('red')
thisplot[true_label].set_color('blue')
plt.show()
if __name__ == '__main__':
fit_dataSet()
执行结果
训练集上0.8341, 测试集上0.8282
57632/60000 [===========================>..] - ETA: 0s - loss: 0.4788 - accuracy: 0.8346
59040/60000 [============================>.] - ETA: 0s - loss: 0.4808 - accuracy: 0.8341
60000/60000 [==============================] - 2s 37us/sample - loss: 0.4810 - accuracy: 0.8341
10000/10000 - 0s - loss: 0.5949 - accuracy: 0.8292
Test accuracy: 0.8292
[5.4345663e-26 2.4194021e-22 0.0000000e+00 1.8774234e-22 0.0000000e+00
1.9197876e-03 3.0303031e-32 4.0659446e-02 1.0808072e-21 9.5742083e-01]
9
9
用LeNet方法实现手写体识别(MNIST数据集)-Tensorflow1
MNIST数据集是一个手写体数据集,数据集中每一个样本都是一个0-9的手写数字
大小28X28
训练数据目录结构是这样的
0的目录里都是0的图片,其它数字目录也是一样的。
Lenet 模型
from skimage import io, transform
import os
import glob
import numpy as np
import tensorflow as tf
# 将所有的图片重新设置尺寸为32*32
w = 32
h = 32
c = 1
# mnist数据集中训练数据和测试数据保存地址
train_path = "C:/Users/**/ML_worksbase/lenet5/traning/"
test_path = "C:/Users/**/ML_worksbase/lenet5/test/"
model_path = "C:/Users/**/ML_worksbase/lenet5/model_path/"
# 读取图片及其标签函数
def read_image(path):
label_dir = [path + x for x in os.listdir(path) if os.path.isdir(path + x)]
images = []
labels = []
for index, folder in enumerate(label_dir):
for img in glob.glob(folder + '/*.jpg'):
#print("reading the image:%s" % img)
image = io.imread(img)
image = transform.resize(image, (w, h, c))
images.append(image)
labels.append(index)
split_number = int(len(labels) * 0.7)
all_images = np.asarray(images, dtype=np.float32)
all_labels = np.asarray(labels, dtype=np.int32)
traning_images = all_images[:split_number]
traning_labels = all_labels[:split_number]
test_images = all_images[split_number:]
test_labels = all_labels[split_number:]
return traning_images, traning_labels, test_images, test_labels
# 读取训练数据及测试数据
train_data, train_label, test_data, test_label = read_image(train_path)
# 打乱训练数据及测试数据
train_image_num = len(train_data)
train_image_index = np.arange(train_image_num)
np.random.shuffle(train_image_index)
train_data = train_data[train_image_index]
train_label = train_label[train_image_index]
test_image_num = len(test_data)
test_image_index = np.arange(test_image_num)
np.random.shuffle(test_image_index)
test_data = test_data[test_image_index]
test_label = test_label[test_image_index]
tf.compat.v1.reset_default_graph()
tf.compat.v1.disable_eager_execution()
# 搭建CNN
x = tf.compat.v1.placeholder(tf.float32, [None, w, h, c], name='x')
y_ = tf.compat.v1.placeholder(tf.int32, [None], name='y_')
def inference(input_tensor, train, regularizer):
# 1.input:[batch, in_height, in_width, in_channels]
# 2.Filter [filter_height, filter_width, in_channels, out_channels]
# 3.strides
# 4. padding:string
# 5.use_cudnn_on_gpu:bool类型,是否使用cudnn加速,默认为true
# return feature map
# 第一层:卷积层,过滤器的尺寸为5×5,深度为6,不使用全0补充,步长为1。
# 尺寸变化:32×32×1->28×28×6
with tf.variable_scope('layer1-conv1'):
conv1_weights = tf.compat.v1.get_variable('weight', [5, 5, c, 6], initializer=tf.compat.v1.truncated_normal_initializer(stddev=0.1))
conv1_biases = tf.compat.v1.get_variable('bias', [6], initializer=tf.constant_initializer(0.0))
conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='VALID')
relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))
# 第二层:池化层,过滤器的尺寸为2×2,使用全0补充,步长为2。
# 尺寸变化:28×28×6->14×14×6
with tf.name_scope('layer2-pool1'):
pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
# 第三层:卷积层,过滤器的尺寸为5×5,深度为16,不使用全0补充,步长为1。
# 尺寸变化:14×14×6->10×10×16
with tf.variable_scope('layer3-conv2'):
conv2_weights = tf.compat.v1.get_variable('weight', [5, 5, 6, 16],
initializer=tf.compat.v1.truncated_normal_initializer(stddev=0.1))
conv2_biases = tf.compat.v1.get_variable('bias', [16], initializer=tf.constant_initializer(0.0))
conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='VALID')
relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))
# 第四层:池化层,过滤器的尺寸为2×2,使用全0补充,步长为2。
# 尺寸变化:10×10×6->5×5×16
with tf.variable_scope('layer4-pool2'):
pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
# 将第四层池化层的输出转化为第五层全连接层的输入格式。第四层的输出为5×5×16的矩阵,然而第五层全连接层需要的输入格式
# 为向量,所以我们需要把代表每张图片的尺寸为5×5×16的矩阵拉直成一个长度为5×5×16的向量。
# 举例说,每次训练64张图片,那么第四层池化层的输出的size为(64,5,5,16),拉直为向量,nodes=5×5×16=400,尺寸size变为(64,400)
pool_shape = pool2.get_shape().as_list()
nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]
reshaped = tf.reshape(pool2, [-1, nodes])
# 第五层:全连接层,nodes=5×5×16=400,400->120的全连接
# 尺寸变化:比如一组训练样本为64,那么尺寸变化为64×400->64×120
# 训练时,引入dropout,dropout在训练时会随机将部分节点的输出改为0,dropout可以避免过拟合问题。
# 这和模型越简单越不容易过拟合思想一致,和正则化限制权重的大小,使得模型不能任意拟合训练数据中的随机噪声,以此达到避免过拟合思想一致。
# 本文最后训练时没有采用dropout,dropout项传入参数设置成了False,因为训练和测试写在了一起没有分离,不过大家可以尝试。
with tf.variable_scope('layer5-fc1'):
fc1_weights = tf.compat.v1.get_variable('weight', [nodes, 120], initializer=tf.compat.v1.truncated_normal_initializer(stddev=0.1))
if regularizer != None:
tf.add_to_collection('losses', regularizer(fc1_weights))
fc1_biases = tf.compat.v1.get_variable('bias', [120], initializer=tf.constant_initializer(0.1))
fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases)
if train:
fc1 = tf.nn.dropout(fc1, 0.5)
# 第六层:全连接层,120->84的全连接
# 尺寸变化:比如一组训练样本为64,那么尺寸变化为64×120->64×84
with tf.variable_scope('layer6-fc2'):
fc2_weights = tf.compat.v1.get_variable('weight', [120, 84], initializer=tf.compat.v1.truncated_normal_initializer(stddev=0.1))
if regularizer != None:
tf.add_to_collection('losses', regularizer(fc2_weights))
fc2_biases = tf.compat.v1.get_variable('bias', [84], initializer=tf.compat.v1.truncated_normal_initializer(stddev=0.1))
fc2 = tf.nn.relu(tf.matmul(fc1, fc2_weights) + fc2_biases)
if train:
fc2 = tf.nn.dropout(fc2, 0.5)
# 第七层:全连接层(近似表示),84->10的全连接
# 尺寸变化:比如一组训练样本为64,那么尺寸变化为64×84->64×10。最后,64×10的矩阵经过softmax之后就得出了64张图片分类于每种数字的概率,
# 即得到最后的分类结果。
with tf.variable_scope('layer7-fc3'):
fc3_weights = tf.compat.v1.get_variable('weight', [84, 10], initializer=tf.compat.v1.truncated_normal_initializer(stddev=0.1))
if regularizer != None:
tf.add_to_collection('losses', regularizer(fc3_weights))
fc3_biases = tf.compat.v1.get_variable('bias', [10], initializer=tf.compat.v1.truncated_normal_initializer(stddev=0.1))
logit = tf.matmul(fc2, fc3_weights) + fc3_biases
return logit
# 正则化,交叉熵,平均交叉熵,损失函数,最小化损失函数,预测和实际equal比较,tf.equal函数会得到True或False,
# accuracy首先将tf.equal比较得到的布尔值转为float型,即True转为1.,False转为0,最后求平均值,即一组样本的正确率。
# 比如:一组5个样本,tf.equal比较为[True False True False False],转化为float型为[1. 0 1. 0 0],准确率为2./5=40%。
regularizer = tf.contrib.layers.l2_regularizer(0.001)
y = inference(x, False, regularizer)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=y_)
cross_entropy_mean = tf.reduce_mean(cross_entropy)
loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
train_op = tf.train.AdamOptimizer(0.001).minimize(loss)
correct_prediction = tf.equal(tf.cast(tf.argmax(y, 1), tf.int32), y_)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# 每次获取batch_size个样本进行训练或测试
def get_batch(data, label, batch_size):
for start_index in range(0, len(data) - batch_size + 1, batch_size):
slice_index = slice(start_index, start_index + batch_size)
yield data[slice_index], label[slice_index]
m_saver = tf.train.Saver()
# 创建Session会话
with tf.compat.v1.Session() as sess:
# 初始化所有变量(权值,偏置等)
sess.run(tf.global_variables_initializer())
writer = tf.compat.v1.summary.FileWriter("logs/", sess.graph)
writer.flush()
writer.close()
# 将所有样本训练10次,每次训练中以64个为一组训练完所有样本。
# train_num可以设置大一些。
train_num = 10
batch_size = 64
#batch_size = 5
for i in range(train_num):
train_loss, train_acc, batch_num = 0, 0, 0
for train_data_batch, train_label_batch in get_batch(train_data, train_label, batch_size):
trainop, err, acc, y_t = sess.run([train_op, loss, accuracy, y],
feed_dict={x: train_data_batch, y_: train_label_batch})
#print('y_t = ', y_t)
train_loss += err
train_acc += acc
batch_num += 1
print("train loss:", train_loss / batch_num)
print("train acc:", train_acc / batch_num)
m_saver.save(sess, model_path + "/model_name", global_step=i)
test_loss, test_acc, batch_num = 0, 0, 0
for test_data_batch, test_label_batch in get_batch(test_data, test_label, batch_size):
err, acc = sess.run([loss, accuracy], feed_dict={x: test_data_batch, y_: test_label_batch})
test_loss += err
test_acc += acc
batch_num += 1
print("test loss:", test_loss / batch_num)
print("test acc:", test_acc / batch_num)
手写体数字识别用Tensorflow1准确率再99%以上的例子
import math
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# 设置一个随机数种子
tf.set_random_seed(28)
# 数据加载
mnist = input_data.read_data_sets("data/mnist", one_hot=True)
# 手写数字识别的数据集主要包含三个部分:训练集(5.5w, mnist.train)、测试集(1w, mnist.test)、验证集(0.5w, mnist.validation)
# 手写数字图片大小是28*28*1像素的图片(黑白),也就是每个图片由784维的特征描述
train_img = mnist.train.images
train_label = mnist.train.labels
test_img = mnist.test.images
test_label = mnist.test.labels
train_sample_number = mnist.train.num_examples
# 打印数据形状
print(train_img.shape)
print(train_img[0].shape)
print(train_label.shape)
print(test_img.shape)
print(test_label.shape)
print(test_label[0])
# 相关的参数、超参数的设置
# 学习率,一般学习率设置的比较小
learn_rate_base = 0.1
# 每次迭代的训练样本数目
batch_size = 64
# 展示信息的间隔大小
display_step = 1
# 输入样本维度大小的信息
input_dim = train_img.shape[1]
# 输出的维度大小
n_classes = train_label.shape[1]
# 模型的构建
# 1、设置输入输出数据的占位符
x = tf.placeholder(tf.float32, shape=[None, input_dim], name='x')
y = tf.placeholder(tf.float32, shape=[None, n_classes], name='y')
learn_rate = tf.placeholder(tf.float32, name='learn_rate')
# 根据给定的迭代批次,更新产生一个学习率的值
def learn_rate_func(epoth):
return max(0.001, learn_rate_base * (0.9 ** int(epoth / 10)))
# 返回一个对应的变量,w和b
def get_variable(name, shape=None, dtpye=tf.float32, initializer=tf.random_normal_initializer(mean=0, stddev=0.1)):
return tf.get_variable(name, shape, dtpye, initializer)
# 2. 构建网络
def le_net(x, y):
# 1. 输入层
with tf.variable_scope('input1'):
# 将输入的x的格式转换为规定的格式
# [None, input_dim] -> [None, height, weight, channels]
net = tf.reshape(x, shape=[-1, 28, 28, 1])
# 2. 卷积层
with tf.variable_scope('conv2'):
# 卷积
# conv2d(input, filter, strides, padding, use_cudnn_on_gpu=True, data_format="NHWC", name=None) => 卷积的API
# data_format: 表示的是输入的数据格式,两种:NHWC和NCHW,N=>样本数目,H=>Height, W=>Weight, C=>Channels
# input:输入数据,必须是一个4维格式的图像数据,具体格式和data_format有关
# 如果data_format是NHWC的时候,input的格式为: [batch_size, height, weight, channels] => [批次中的图片数目,图片的高度,图片的宽度,图片的通道数];
# 如果data_format是NCHW的时候,input的格式为: [batch_size, channels, height, weight] => [批次中的图片数目,图片的通道数,图片的高度,图片的宽度]
# filter: 卷积核,是一个4维格式的数据,shape: [height, weight, in_channels, out_channels] => [窗口的高度,窗口的宽度,输入的channel通道数(上一层图片的深度),输出的通道数(卷积核数目)]
# strides:步长,是一个4维的数据,每一维数据必须和data_format格式匹配,表示的是在data_format每一维上的移动步长
# 当格式为NHWC的时候,strides的格式为: [batch, in_height, in_weight, in_channels] => [样本上的移动大小,高度的移动大小,宽度的移动大小,深度的移动大小],要求在样本上和在深度通道上的移动必须是1;
# 当格式为NCHW的时候,strides的格式为: [batch,in_channels, in_height, in_weight]
# padding: 只支持两个参数"SAME", "VALID"
# 当取值为SAME的时候,表示进行填充,"在TensorFlow中,如果步长为1,并且padding为SAME的时候,经过卷积之后的图像大小是不变的";
# 当VALID的时候,表示多余的特征会丢弃;
# 权重w
net = tf.nn.conv2d(input=net, filter=get_variable('w', [5, 5, 1, 20]), strides=[1, 1, 1, 1], padding='SAME')
# 加偏置项
net = tf.nn.bias_add(net, get_variable('b', [20]))
# 激励 ReLu
# tf.nn.relu => max(fetures, 0)
# tf.nn.relu6 => min(max(fetures,0), 6)
net = tf.nn.relu(net)
# 3. 池化
with tf.variable_scope('pool3'):
# 和conv2一样,需要给定窗口大小和步长
# max_pool(value, ksize, strides, padding, data_format="NHWC", name=None)
# avg_pool(value, ksize, strides, padding, data_format="NHWC", name=None)
# 默认格式下:NHWC,value:输入的数据,必须是[batch_size, height, weight, channels]格式
# 默认格式下:NHWC,ksize:指定窗口大小,必须是[batch, in_height, in_weight, in_channels], 其中batch和in_channels必须为1
# 默认格式下:NHWC,strides:指定步长大小,必须是[batch, in_height, in_weight, in_channels],其中batch和in_channels必须为1
# padding: 只支持两个参数"SAME", "VALID",当取值为SAME的时候,表示进行填充,;当VALID的时候,表示多余的特征会丢弃;
net = tf.nn.max_pool(value=net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
# 4. 卷积
with tf.variable_scope('conv4'):
net = tf.nn.conv2d(input=net, filter=get_variable('w', [5, 5, 20, 50]), strides=[1, 1, 1, 1], padding='SAME')
net = tf.nn.bias_add(net, get_variable('b', [50]))
net = tf.nn.relu(net)
# 5. 池化
with tf.variable_scope('pool5'):
net = tf.nn.max_pool(value=net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
# 6. 全连接
with tf.variable_scope('fc6'):
# 28 -> 14 -> 7(因为此时的卷积不改变图片的大小)
size = 7 * 7 * 50
net = tf.reshape(net, shape=[-1, size])
net = tf.add(tf.matmul(net, get_variable('w', [size, 500])), get_variable('b', [500]))
net = tf.nn.relu(net)
# 7. 全连接
with tf.variable_scope('fc7'):
net = tf.add(tf.matmul(net, get_variable('w', [500, n_classes])), get_variable('b', [n_classes]))
return net
# 构建网络
act = le_net(x, y)
# 构建模型的损失函数
# softmax_cross_entropy_with_logits: 计算softmax中的每个样本的交叉熵,logits指定预测值,labels指定实际值
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=act, labels=y))
# 使用Adam优化方式比较多
# learning_rate: 要注意,不要过大,过大可能不收敛,也不要过小,过小收敛速度比较慢
train = tf.train.AdadeltaOptimizer(learning_rate=learn_rate).minimize(cost)
# 得到预测的类别是那一个
# tf.argmax:对矩阵按行或列计算最大值对应的下标,和numpy中的一样
# tf.equal:是对比这两个矩阵或者向量的相等的元素,如果是相等的那就返回True,反正返回False,返回的值的矩阵维度和A是一样的
pred = tf.equal(tf.argmax(act, axis=1), tf.argmax(y, axis=1))
# 正确率(True转换为1,False转换为0)
acc = tf.reduce_mean(tf.cast(pred, tf.float32))
# 初始化
init = tf.global_variables_initializer()
with tf.Session() as sess:
# 进行数据初始化
sess.run(init)
# 模型保存、持久化
saver = tf.train.Saver()
epoch = 0
while True:
avg_cost = 0
# 计算出总的批次
total_batch = int(train_sample_number / batch_size)
# 迭代更新
for i in range(total_batch):
# 获取x和y
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
feeds = {x: batch_xs, y: batch_ys, learn_rate: learn_rate_func(epoch)}
# 模型训练
sess.run(train, feed_dict=feeds)
# 获取损失函数值
avg_cost += sess.run(cost, feed_dict=feeds)
# 重新计算平均损失(相当于计算每个样本的损失值)
avg_cost = avg_cost / total_batch
# DISPLAY 显示误差率和训练集的正确率以此测试集的正确率
if (epoch + 1) % display_step == 0:
print("批次: %03d 损失函数值: %.9f" % (epoch, avg_cost))
# 这里之所以使用batch_xs和batch_ys,是因为我使用train_img会出现内存不够的情况,直接就会退出
feeds = {x: train_img[:1000], y: train_label[:1000], learn_rate: learn_rate_func(epoch)}
train_acc = sess.run(acc, feed_dict=feeds)
print("训练集准确率: %.3f" % train_acc)
feeds = {x: test_img, y: test_label, learn_rate: learn_rate_func(epoch)}
test_acc = sess.run(acc, feed_dict=feeds)
print("测试准确率: %.3f" % test_acc)
# 如果训练准确率和测试准确率大于等于0.99停止迭代,并保存模型
if train_acc >= 0.99 and test_acc >= 0.99:
saver.save(sess, './mnist/model_{}_{}'.format(train_acc, test_acc), global_step=epoch)
break
epoch += 1
# 模型可视化输出
writer = tf.summary.FileWriter('./mnist/graph', tf.get_default_graph())
writer.close()
print("end....")
模型训练可视化可以下面命令
参考资料
[1] https://blog.csdn.net/Daycym/article/details/90267188