欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

TensorFlow神经网络优化

程序员文章站 2022-06-27 11:52:58
...

学习率

#实现指数衰减学习率
global_step = tf.Variable(0)

#实现的功能类似于
#decay_learning_rate = learning_rate * decay_rate ^^ (global_step / decay_step)
#staircase的作用:是否对(global_step / decay_step)取整
#参数:learning_rate,global_step,decay_step,decay_rate
learning_rate = tf.trainexponential_decay(
    0.1,global_step,100,0.96,staircase=True)

#传入global_step使其自动更新
learning_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(
    loss,global_step=global_step)

正则化

损失函数变为:

J(θ)+λR(w)

L1正则化:

R(w)=i|wi|

L2正则化:

R(w)=i|wi2|

也可以一起使用:

R(w)=iα|wi|+(1α)wi2

实例:

loss = tf.reduce_mean(tf.square(y_ - y)) + 
    tf.contrib.layers.l2_regularizer(0.5)(weights)

使用集合管理多个loss

#将各个loss添加到losses集合中
tf.add_to_collection('losses',loss1)
tf.add_to_collection('losses',loss2)
#获取集合中的所有loss,并将其相加,获得所有loss之和
total_loss = tf.add_n(tf.get_collection('losses'))

滑动平均

计算公式

shadow_variable=decayshadow_variable+(1decay)variable

decay=min{decay,1+num_updates10+num_updates}

v = tf.Variable(0,dtype=tf.float32)
global_step = tf.Variable(0)
#参数:dacay,global_step
ema = tf.train.ExponentialMovingAverage(0.99,step)
ema_op = ema.apply([v])

sess.run(tf.assign(v,5))
#先计算衰减率,为min{0.99,(1+0)/(10+0)=0.1}=0.1
#故滑动平均为0.1 * 0 + 0.9 * 5 = 4.5
sess.run(ema_op)
print(sess.run(ema.average(v)))

sess.run(tf.assign(step,10000))
sess.run(tf.assign(v,10))
#同理,衰减率为min{0.99,(1+10000)/(10+10000)=0.999}=0.99
#滑动平均为0.99 * 4.5 + 0.01 * 10 = 4.555
sess.run(tf.ema_op)
print(sess.run(ema.average(v)))

实例

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

input_size = 784
output_size = 10
hidden_size = 500

batch_size = 100

learning_rate_base = 0.8
learning_rate_decay = 0.99

regularization_rate = 0.0001
training_steps = 30000
moving_average_decay = 0.99

def inference(input_tensor,ema,w1,b1,w2,b2):
    if ema is None:
        layer1 = tf.nn.relu(tf.matmul(input_tensor,w1)+b1)
        layer2 = tf.matmul(layer1,w2)+b2
        return layer2
    else:
        layer1 = tf.nn.relu(tf.matmul(input_tensor,ema.average(w1))+ema.average(b1))
        layer2 = tf.matmul(layer1, ema.average(w2)) + ema.average(b2)
        return layer2

def train(mnist):
    x = tf.placeholder(tf.float32,[None,input_size])
    y_ = tf.placeholder(tf.float32,[None,output_size])

    w1 = tf.Variable(tf.truncated_normal([input_size,hidden_size],stddev=0.1))
    b1 = tf.Variable(tf.constant(0.1,shape=[hidden_size]))
    w2 = tf.Variable(tf.truncated_normal([hidden_size,output_size],stddev=0.1))
    b2 = tf.Variable(tf.constant(0.1, shape=[output_size]))

    y = inference(x,None,w1,b1,w2,b2)

    global_step = tf.Variable(0,trainable=False)

    #滑动平均
    ema = tf.train.ExponentialMovingAverage(moving_average_decay,global_step)
    ema_op = ema.apply(tf.trainable_variables())

    average_y = inference(x,ema,w1,b1,w2,b2)

    #交叉熵
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y,labels=tf.arg_max(y_,1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)

    #L2正则化
    regularizer = tf.contrib.layers.l2_regularizer(regularization_rate)
    regularization = regularizer(w1)+regularizer(w2)
    #损失函数
    loss = cross_entropy_mean + regularization

    #指数衰减的学习率
    learning_rate = tf.train.exponential_decay(
        learning_rate_base,
        global_step,
        mnist.train.num_examples/batch_size,
        learning_rate_decay
    )

    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
    #将两个操作同时进行
    train_op = tf.group(train_step,ema_op)

    #使用滑动平均模型计算准确率
    correct_prediction = tf.equal(tf.arg_max(average_y,1),tf.arg_max(y_,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        validate_feed = {x:mnist.validation.images,y_:mnist.validation.labels}
        test_feed = {x:mnist.test.images,y_:mnist.test.labels}

        for i in range(training_steps):
            if i % 1000 == 0:
                validate_acc = sess.run(accuracy,feed_dict=validate_feed)
                print('{} steps, validation acc: {}'.format(i,validate_acc))

            xs,ys = mnist.train.next_batch(batch_size)
            sess.run(train_op,feed_dict={x:xs,y_:ys})

        test_acc = sess.run(accuracy,feed_dict=test_feed)
        print('finally,test acc:{}'.format(test_acc))

def main(argv=None):
    mnist = input_data.read_data_sets('MNIST_data',one_hot=True)
    train(mnist)

if __name__ == '__main__':
    #运行main函数
    tf.app.run()