Tensorflow 入门 的自整理的MNIST简单网络和复杂网络练习
2024-03-14 21:52:29
“ 当你背单词时,阿拉斯加的鳕鱼正跃出水面。当你解微分方程时,大洋彼岸的海鸥正拂过费城。当你晚自习时,极图的夜空散满了五彩斑斓。当你为自己的未来踏踏实实努力时,那些你从未见过的风景,那些你以为不会遇到的人,你要的一切,正一步步向你走来。”
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import tensorflow as tf
import cv2
import numpy as np
import math
from scipy import ndimage
def getBestShift(img):
cy,cx = ndimage.measurements.center_of_mass(img)
rows,cols = img.shape
shiftx = np.round(cols/2.0-cx).astype(int)
shifty = np.round(rows/2.0-cy).astype(int)
return shiftx,shifty
def shift(img,sx,sy):
rows,cols = img.shape
M = np.float32([[1,0,sx],[0,1,sy]])
shifted = cv2.warpAffine(img,M,(cols,rows))
return shifted
import tensorflow.examples.tutorials.mnist.input_data as input_data
# 下载MNIST数据集到'MNIST_data'文件夹并解压
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
# 设置权重weights和偏置biases作为优化变量,初始值设为0
weights = tf.Variable(tf.zeros([784, 10]))
biases = tf.Variable(tf.zeros([10]))
# 构建模型
x = tf.placeholder("float", [None, 784])
y = tf.nn.softmax(tf.matmul(x, weights) + biases) # 模型的预测值
y_real = tf.placeholder("float", [None, 10]) # 真实值
cross_entropy = -tf.reduce_sum(y_real * tf.log(y)) # 预测值与真实值的交叉熵
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy) # 使用梯度下降优化器最小化交叉熵
# 开始训练
init = tf.initialize_all_variables()
#saver = tf.train.Saver()
sess = tf.Session()
for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100) # 每次随机选取100个数据进行训练,即所谓的“随机梯度下降(Stochastic Gradient Descent,SGD)”
sess.run(train_step, feed_dict={x: batch_xs, y_real:batch_ys}) # 正式执行train_step,用feed_dict的数据取代placeholder
if i % 100 == 0:
# 每训练100次后评估模型
correct_prediction = tf.equal(tf.argmax(y, 1), tf.arg_max(y_real, 1)) # 比较预测值和真实值是否一致
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) # 统计预测正确的个数,取均值得到准确率
#saver.save(sess, '/home/calmcar/Desktop/dl/2/model.ckpt') #保存模型参数
print sess.run(accuracy, feed_dict={x: mnist.test.images, y_real: mnist.test.labels})
# test
# create an an array where we can store our pictures
images = np.zeros((1,784))
# read the image
gray = cv2.imread("test_data/"+"9.png", 0)
# rescale it
gray = cv2.resize(255-gray, (28, 28))
# better black and white version
(thresh, gray) = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
while np.sum(gray[0]) == 0:
gray = gray[1:]
while np.sum(gray[:,0]) == 0:
gray = np.delete(gray,0,1)
while np.sum(gray[-1]) == 0:
gray = gray[:-1]
while np.sum(gray[:,-1]) == 0:
gray = np.delete(gray,-1,1)
rows,cols = gray.shape
if rows > cols:
factor = 20.0/rows
rows = 20
cols = int(round(cols*factor))
# first cols than rows
gray = cv2.resize(gray, (cols,rows))
factor = 20.0/cols
cols = 20
rows = int(round(rows*factor))
# first cols than rows
gray = cv2.resize(gray, (cols, rows))
colsPadding = (int(math.ceil((28-cols)/2.0)),int(math.floor((28-cols)/2.0)))
rowsPadding = (int(math.ceil((28-rows)/2.0)),int(math.floor((28-rows)/2.0)))
gray = np.lib.pad(gray,(rowsPadding,colsPadding),'constant')
shiftx,shifty = getBestShift(gray)
shifted = shift(gray,shiftx,shifty)
gray = shifted
# save the processed images
cv2.imwrite("test_data/9_"+".png", gray)
all images in the training set have an range from 0-1
and not from 0-255 so we divide our flatten images
(a one dimensional vector with our 784 pixels)
to use the same 0-1 based range
flatten = gray.flatten() / 255.0
we need to store the flatten image and generate
the correct_vals array
correct_val for the first digit (9) would be
images[0] = flatten
the prediction will be an array with four values,
which show the predicted number
prediction = tf.argmax(y,1)
print('recognize result:')
print(sess.run(prediction, feed_dict={x: images}))
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#:MNIST 数据集是包含了 60000 个训练集和 10000 个测试集.
# 所有图像都在一个20*20 的包围盒, 并且在28*28 的图像的正中位置.这是关于预处理的重要信息.
import tensorflow as tf
import cv2
import numpy as np
import math
from scipy import ndimage
def getBestShift(img):
cy,cx = ndimage.measurements.center_of_mass(img)
rows,cols = img.shape
shiftx = np.round(cols/2.0-cx).astype(int)
shifty = np.round(rows/2.0-cy).astype(int)
return shiftx,shifty
def shift(img,sx,sy):
rows,cols = img.shape
M = np.float32([[1,0,sx],[0,1,sy]])
shifted = cv2.warpAffine(img,M,(cols,rows))
return shifted
def loadimage():
gray = cv2.imread("test_data/"+"9.png", 0)
gray = cv2.resize(255-gray, (28, 28))
(thresh, gray) = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
#首先, 我们想把图像缩放到 20×20的包围盒中. 因此我们应当把黑色边界去掉.
while np.sum(gray[0]) == 0:
gray = gray[1:]
while np.sum(gray[:,0]) == 0:
gray = np.delete(gray,0,1)
while np.sum(gray[-1]) == 0:
gray = gray[:-1]
while np.sum(gray[:,-1]) == 0:
gray = np.delete(gray,-1,1)
rows,cols = gray.shape
#然后是缩放到 20*20
if rows > cols:
factor = 20.0/rows
rows = 20
cols = int(round(cols*factor))
# first cols than rows
gray = cv2.resize(gray, (cols,rows))
factor = 20.0/cols
cols = 20
rows = int(round(rows*factor))
# first cols than rows
gray = cv2.resize(gray, (cols, rows))
#但是最终我们需要 28*28的图像, 所以应当做加边处理
colsPadding = (int(math.ceil((28-cols)/2.0)),int(math.floor((28-cols)/2.0)))
rowsPadding = (int(math.ceil((28-rows)/2.0)),int(math.floor((28-rows)/2.0)))
gray = np.lib.pad(gray,(rowsPadding,colsPadding),'constant')
#第一个是获取图像的质心.def getBestShift(img)
#第二个函数是在给定的方向平移图像. 我们的平移矩阵如下 def shift(img,sx,sy)
shiftx,shifty = getBestShift(gray)
shifted = shift(gray,shiftx,shifty)
gray = shifted
# 保存图像
cv2.imwrite("test_data/99_"+".png", gray)
flatten = gray.flatten() / 255.0
return flatten
import tensorflow.examples.tutorials.mnist.input_data as input_data
# 下载MNIST数据集到'MNIST_data'文件夹并解压
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
sess = tf.InteractiveSession()
x = tf.placeholder("float", shape=[None, 784])
y_ = tf.placeholder("float", shape=[None, 10])
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
#的步长。padding表示补齐数据。 目前有两种补齐方式,一种是SAME,表示补齐操作后(在原始图像周围补充0),实
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
#函数原型:def max_pool(value, ksize, strides, padding, data_format="NHWC", name=None)。对ksize和strides
#定义的理解要基于data_format进行。默认NHWC,表示4维数据,[batch,height,width,channels]. 下面函数中的ksize,
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
#其中filter的结构为:[filter_height, filter_width, in_channels, out_channels]。这里,卷积核的高和宽都是5,
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1,28,28,1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
#最后,添加一个softmax层,就像前面的单层softmax regression一样。softmax是一个多选择分类函数,其作用和sigmoid这个2值
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
cross_entropy = -tf.reduce_sum(y_*tf.log(y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
saver = tf.train.Saver() # defaults to saving all variables
processed = False
if processed:
for i in range(20000):
batch = mnist.train.next_batch(50)
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
print "step %d, training accuracy %g"%(i, train_accuracy)
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
saver.save(sess, './model_parameter/model.ckpt') #保存模型参数,注意把这里改为自己的路径
print "test accuracy %g"%accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})
# 创建一个可以存储图片的数组
images = np.zeros((1,784))
# 读取图片并对图片进行预处理
images[0] =loadimage()
ckpt = tf.train.get_checkpoint_state('/home/calmcar/Desktop/dl/2/model_parameter/')
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
print('No checkpoint found!')
prediction = tf.argmax(y_conv,1)
print('recognize result:')
print(prediction.eval(feed_dict={x:images, keep_prob: 1.0},session=sess))
上一篇: Mac----CentOS安装jdk
下一篇: Keras loss调试记录