欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

TensorFlow第七步再试牛刀-自编BP代码解Mnist

程序员文章站 2022-03-17 14:49:39
...

与网上教程的网络设计、训练数据、算法、参数设置完全一样,但是从训练的结果看,用tf自己动手编写的 BP代码的速度更快,精度更高。自己编写的代码经10次训练,训练数据精度达到100%,测试数据精度达到87.3%,教程中,经70次训练,训练数据精度才达到100%,经280次训练,测试数据精度才达到82%,原因不明!!

http://neuralnetworksanddeeplearning.com/chap3.html

TensorFlow第七步再试牛刀-自编BP代码解MnistTensorFlow第七步再试牛刀-自编BP代码解Mnist

TensorFlow第七步再试牛刀-自编BP代码解Mnist

 

TensorFlow第七步再试牛刀-自编BP代码解Mnist

# coding=utf-8
import os  
os.environ["TF_CPP_MIN_LOG_LEVEL"]='2' # 只显示 warning 和 Error 

###data (50000,784),(1000,784),(1000,784):
import pickle
import gzip
import numpy as np

def load_data():
    f = gzip.open('../data/mnist.pkl.gz', 'rb')
    training_data, validation_data, test_data = pickle.load(f,encoding='bytes')
    f.close()
    return (training_data, validation_data, test_data)

def vectorized_result(j):
    e = np.zeros(10)
    e[j] = 1.0
    return e

training_data, validation_data, test_data = load_data()
trainData_in=training_data[0][:1000]
trainData_out=[vectorized_result(j) for j in training_data[1][:1000]]
validData_in=validation_data[0]
validData_out=[vectorized_result(j) for j in validation_data[1]]
testData_in=test_data[0]
testData_out=[vectorized_result(j) for j in test_data[1]]

###net 784X30X10: 
import tensorflow as tf
import random
#import matplotlib.pyplot as plt
 
logs_path=r'c:/temp/log_mnist_softmax'
learning_rate=5.0 #当>0.05时误差很大
training_epochs=400
batch_size=10
 
x_input=tf.placeholder(tf.float32, [None,784], name='x_input')
y_desired=tf.placeholder(tf.float32,[None,10],name='y_desired')
w1=tf.Variable(tf.truncated_normal([784,30],stddev=0.1),name='w1')
b1=tf.Variable(tf.zeros([30]),name='b1')
z1=tf.matmul(x_input,w1)+b1
y1=tf.nn.sigmoid(z1)

w=tf.Variable(tf.truncated_normal([30,10],stddev=0.1),name='w')
b=tf.Variable(tf.zeros([10]),name='b')
z=tf.matmul(y1,w)+b
y_output=tf.nn.softmax(z,name='y_output')
lossFun_crossEntropy=-tf.reduce_mean(y_desired*tf.log(y_output)) #交叉熵均值

#BP:
delta=tf.add(y_output,-y_desired)  #BP1
nabla_b=tf.reduce_sum(delta,axis=0,name='nabla_b')#在列方向上求和delta #BP3
nabla_w=tf.matmul(y1,delta,transpose_a=True,name='nabla_w') #BP4
dSigmod_z1=tf.nn.sigmoid(z1)*(1-tf.nn.sigmoid(z1))
delta=tf.matmul(delta,w,transpose_b=True)*dSigmod_z1 #BP2!!!
nabla_b1=tf.reduce_sum(delta,axis=0,name='nabla_b1')#在列方向上求和delta #BP3
nabla_w1=tf.matmul(x_input,delta,transpose_a=True,name='nabla_w1')  #BP4


feed_dict_trainData={x_input:trainData_in,y_desired:trainData_out}
feed_dict_testData={x_input:testData_in,y_desired:testData_out}

correct_prediction=tf.equal(tf.argmax(y_output,1),\
                             tf.argmax(y_desired,1)) #1:按行索引,每行得一索引值
accuracy=tf.reduce_mean(tf.cast(correct_prediction,\
                                tf.float32))#将逻辑型变成数字型,再求均值
###
#train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(lossFun_crossEntropy)
###
tf.summary.scalar('cost',lossFun_crossEntropy)
tf.summary.scalar('accuracy',accuracy)
summary_op=tf.summary.merge_all()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    logs_writer=tf.summary.FileWriter(logs_path,graph=tf.get_default_graph())
    for epoch in range(training_epochs):
#        _,summary=sess.run([train_step,summary_op],feed_dict=feed_dict_trainData)
        ######
        #SGD:
        trainData=list(zip(trainData_in,trainData_out))
        random.shuffle(trainData)
        trainData_in,trainData_out=zip(*trainData)
        batch_count=int(len(trainData_in)/batch_size)
        for i in range(batch_count):
            batch_x=trainData_in[batch_size*i:batch_size*(i+1)]
            batch_y=trainData_out[batch_size*i:batch_size*(i+1)]
            feed_dict_batch={x_input:batch_x,y_desired:batch_y}
            
            #update:
            w1_temp,b1_temp,w_temp,b_temp,\
            nabla_w1_temp,nabla_b1_temp,nabla_w_temp,nabla_b_temp=\
            sess.run([w1,b1,w,b,nabla_w1,nabla_b1,nabla_w,nabla_b],\
                 feed_dict=feed_dict_batch)
            m,n=np.shape(batch_y)
            update_w1=tf.assign(w1,w1_temp-learning_rate/m/n*nabla_w1_temp)
            update_b1=tf.assign(b1,b1_temp-learning_rate/m/n*nabla_b1_temp)
            update_w=tf.assign(w,w_temp-learning_rate/m/n*nabla_w_temp)
            update_b=tf.assign(b,b_temp-learning_rate/m/n*nabla_b_temp)
            sess.run([update_w1,update_b1,update_w,update_b])

            summary=sess.run(summary_op,feed_dict=feed_dict_trainData)
            logs_writer.add_summary(summary,epoch)
            print('Epoch',epoch)
            print('Accuracy_trainData:',accuracy.eval\
                  (feed_dict=feed_dict_trainData))
            print('Accuracy_testData:',accuracy.eval\
                  (feed_dict=feed_dict_testData))
            print('Done')
            
    try_input=testData_in[0] 
    try_desired=testData_out[0]  
    print(try_desired)
    print(y_output.eval(feed_dict={x_input:[try_input]}))

 

相关标签: tensorflow