TensorFlow神经网络优化
程序员文章站
2022-06-27 11:52:58
...
学习率
#实现指数衰减学习率
global_step = tf.Variable(0)
#实现的功能类似于
#decay_learning_rate = learning_rate * decay_rate ^^ (global_step / decay_step)
#staircase的作用:是否对(global_step / decay_step)取整
#参数:learning_rate,global_step,decay_step,decay_rate
learning_rate = tf.trainexponential_decay(
0.1,global_step,100,0.96,staircase=True)
#传入global_step使其自动更新
learning_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(
loss,global_step=global_step)
正则化
损失函数变为:
L1正则化:
L2正则化:
也可以一起使用:
实例:
loss = tf.reduce_mean(tf.square(y_ - y)) +
tf.contrib.layers.l2_regularizer(0.5)(weights)
使用集合管理多个loss
#将各个loss添加到losses集合中
tf.add_to_collection('losses',loss1)
tf.add_to_collection('losses',loss2)
#获取集合中的所有loss,并将其相加,获得所有loss之和
total_loss = tf.add_n(tf.get_collection('losses'))
滑动平均
计算公式
v = tf.Variable(0,dtype=tf.float32)
global_step = tf.Variable(0)
#参数:dacay,global_step
ema = tf.train.ExponentialMovingAverage(0.99,step)
ema_op = ema.apply([v])
sess.run(tf.assign(v,5))
#先计算衰减率,为min{0.99,(1+0)/(10+0)=0.1}=0.1
#故滑动平均为0.1 * 0 + 0.9 * 5 = 4.5
sess.run(ema_op)
print(sess.run(ema.average(v)))
sess.run(tf.assign(step,10000))
sess.run(tf.assign(v,10))
#同理,衰减率为min{0.99,(1+10000)/(10+10000)=0.999}=0.99
#滑动平均为0.99 * 4.5 + 0.01 * 10 = 4.555
sess.run(tf.ema_op)
print(sess.run(ema.average(v)))
实例
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
input_size = 784
output_size = 10
hidden_size = 500
batch_size = 100
learning_rate_base = 0.8
learning_rate_decay = 0.99
regularization_rate = 0.0001
training_steps = 30000
moving_average_decay = 0.99
def inference(input_tensor,ema,w1,b1,w2,b2):
if ema is None:
layer1 = tf.nn.relu(tf.matmul(input_tensor,w1)+b1)
layer2 = tf.matmul(layer1,w2)+b2
return layer2
else:
layer1 = tf.nn.relu(tf.matmul(input_tensor,ema.average(w1))+ema.average(b1))
layer2 = tf.matmul(layer1, ema.average(w2)) + ema.average(b2)
return layer2
def train(mnist):
x = tf.placeholder(tf.float32,[None,input_size])
y_ = tf.placeholder(tf.float32,[None,output_size])
w1 = tf.Variable(tf.truncated_normal([input_size,hidden_size],stddev=0.1))
b1 = tf.Variable(tf.constant(0.1,shape=[hidden_size]))
w2 = tf.Variable(tf.truncated_normal([hidden_size,output_size],stddev=0.1))
b2 = tf.Variable(tf.constant(0.1, shape=[output_size]))
y = inference(x,None,w1,b1,w2,b2)
global_step = tf.Variable(0,trainable=False)
#滑动平均
ema = tf.train.ExponentialMovingAverage(moving_average_decay,global_step)
ema_op = ema.apply(tf.trainable_variables())
average_y = inference(x,ema,w1,b1,w2,b2)
#交叉熵
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y,labels=tf.arg_max(y_,1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
#L2正则化
regularizer = tf.contrib.layers.l2_regularizer(regularization_rate)
regularization = regularizer(w1)+regularizer(w2)
#损失函数
loss = cross_entropy_mean + regularization
#指数衰减的学习率
learning_rate = tf.train.exponential_decay(
learning_rate_base,
global_step,
mnist.train.num_examples/batch_size,
learning_rate_decay
)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
#将两个操作同时进行
train_op = tf.group(train_step,ema_op)
#使用滑动平均模型计算准确率
correct_prediction = tf.equal(tf.arg_max(average_y,1),tf.arg_max(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
validate_feed = {x:mnist.validation.images,y_:mnist.validation.labels}
test_feed = {x:mnist.test.images,y_:mnist.test.labels}
for i in range(training_steps):
if i % 1000 == 0:
validate_acc = sess.run(accuracy,feed_dict=validate_feed)
print('{} steps, validation acc: {}'.format(i,validate_acc))
xs,ys = mnist.train.next_batch(batch_size)
sess.run(train_op,feed_dict={x:xs,y_:ys})
test_acc = sess.run(accuracy,feed_dict=test_feed)
print('finally,test acc:{}'.format(test_acc))
def main(argv=None):
mnist = input_data.read_data_sets('MNIST_data',one_hot=True)
train(mnist)
if __name__ == '__main__':
#运行main函数
tf.app.run()
上一篇: vue(笔记)2.0
下一篇: vue2.0之vuex—公用数据使用管理