欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

dropout解决overfitting问题

程序员文章站 2022-07-13 11:27:49
...
import tensorflow as tf
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
#from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import LabelBinarizer
#load data
digits = load_digits()
X = digits.data
y = digits.target
y = LabelBinarizer().fit_transform(y)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=.3)

#定义层,add_layer(输入数据,输入数据维数,输出数据维数,激励函数)
def add_layer(inputs,in_size,out_size,layer_name,activate_function=None):
        Weights = tf.Variable(tf.random_normal([in_size,out_size]))
        bias = tf.Variable(tf.zeros([1,out_size])+0.1)
        Wx_plus_b = tf.add(tf.matmul(inputs,Weights),bias)
        Wx_plus_b = tf.nn.dropout(Wx_plus_b,keep_prob)
        if activate_function is None:
            outputs = Wx_plus_b
        else:
            outputs = activate_function(Wx_plus_b)
        #tf.histogram_summary(layer_name + '/outputs',outputs)
        return outputs

#define placeholder for inputs to network
keep_prob = tf.placeholder(tf.float32)
xs = tf.placeholder(tf.float32, [None, 64])
ys = tf.placeholder(tf.float32, [None, 10])

#add output layer
l1 = add_layer(xs,64,50,'l1',activate_function = tf.nn.tanh)
prediction = add_layer(l1,50,10,'l2',activate_function = tf.nn.softmax)

#the error between prediction and real data
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys*tf.log(prediction),reduction_indices=[1])) #loss
tf.summary.scalar('loss',cross_entropy)
train_step = tf.train.GradientDescentOptimizer(0.6).minimize(cross_entropy)

sess= tf.Session()
merged = tf.summary.merge_all()
#summary writer goes in here
train_writer = tf.summary.FileWriter('logs/train',sess.graph)
test_writer = tf.summary.FileWriter('logs/test',sess.graph)

sess.run(tf.global_variables_initializer())

for i in range(500):
    sess.run(train_step,feed_dict={xs:X_train,ys:y_train,keep_prob:0.5})
    if i%50 == 0:
        #record loss
        train_result = sess.run(merged,feed_dict = {xs:X_train,ys:y_train,keep_prob:1})
        test_result = sess.run(merged,feed_dict = {xs:X_test,ys:y_test,keep_prob:1})
        train_writer.add_summary(train_result,i)
        test_writer.add_summary(test_result,i)