07 selu函数
程序员文章站
2022-07-04 21:01:56
...
1.只将梯度下降函数改为selu
'''分类问题 selu函数能缓解梯度消失'''
import tensorflow as tf
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
from tensorflow import keras
#导入数据
fashion_mnist = keras.datasets.fashion_mnist
(x_train_all,y_train_all),(x_test,y_test) = fashion_mnist.load_data()#训练集
x_valid,x_train = x_train_all[:5000],x_train_all[5000:] #前5000张为验证集,5000张之后的为训练集
y_valid,y_train = y_train_all[:5000],y_train_all[5000:]
#对数据做归一化
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(
x_train.astype(np.float32).reshape(-1,1)).reshape(-1,28,28)
x_valid_scaled = scaler.fit_transform(
x_valid.astype(np.float32).reshape(-1,1)).reshape(-1,28,28)
x_test_scaled = scaler.fit_transform(
x_test.astype(np.float32).reshape(-1,1)).reshape(-1,28,28)
#定义训练模型
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28,28]))
#添加20层的神经网络
for _ in range(20):
model.add(keras.layers.Dense(100,activation="selu"))#selu是自带归一化功能的**函数
model.add(keras.layers.Dense(10,activation="softmax"))
model.compile(loss="sparse_categorical_crossentropy",
optimizer = "sgd",#因为用sgd梯度下降法会导致陷入局部最小值点
metrics = ['accuracy'])
model.summary()
#回调函数
logdir = "./dnn-selu-callbacks"
if not os.path.exists(logdir):
os.mkdir(logdir)
output_model_file = os.path.join(logdir,"fashion_mnist_model.h5")
callbacks = {
keras.callbacks.TensorBoard(logdir),
keras.callbacks.ModelCheckpoint(output_model_file,
save_best_only="True"),
keras.callbacks.EarlyStopping(patience = 5, min_delta = 1e-3),
}
#开始训练
history = model.fit(x_train_scaled,y_train,epochs = 10,validation_data = (x_valid_scaled,y_valid))
#画图展示准确度
def plot_learning_curves(history):
pd.DataFrame(history.history).plot(figsize=(8,5))
plt.grid(True)
plt.gca().set_ylim(0,3)
plt.show()
plot_learning_curves(history)
#在测试集上的准确度
model.evaluate(x_test_scaled,y_test)
运行结果:
2.在selu基础上添加dropout
'''分类问题 selu缓解梯度消失,一般只给最后几层添加dropout,防止过拟合'''
import tensorflow as tf
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
from tensorflow import keras
#导入数据
fashion_mnist = keras.datasets.fashion_mnist
(x_train_all,y_train_all),(x_test,y_test) = fashion_mnist.load_data()#训练集
x_valid,x_train = x_train_all[:5000],x_train_all[5000:] #前5000张为验证集,5000张之后的为训练集
y_valid,y_train = y_train_all[:5000],y_train_all[5000:]
#对数据做归一化
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(
x_train.astype(np.float32).reshape(-1,1)).reshape(-1,28,28)
x_valid_scaled = scaler.fit_transform(
x_valid.astype(np.float32).reshape(-1,1)).reshape(-1,28,28)
x_test_scaled = scaler.fit_transform(
x_test.astype(np.float32).reshape(-1,1)).reshape(-1,28,28)
#定义训练模型
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28,28]))
#添加20层的神经网络
for _ in range(20):
model.add(keras.layers.Dense(100,activation="selu"))#selu是自带归一化功能的**函数
#model.add(keras.layers.Dropout(rate=0.5))keras中也有原始的dropout的实现
#-------------添加dropout--------------------#
model.add(keras.layers.AlphaDropout(rate=0.5))
'''
AlphaDropout:1.均值和方差不变 2.归一化的性质也不变
'''
model.add(keras.layers.Dense(10,activation="softmax"))
model.compile(loss="sparse_categorical_crossentropy",
optimizer = "sgd",#因为用sgd梯度下降法会导致陷入局部最小值点
metrics = ['accuracy'])
model.summary()
#回调函数
logdir = "./dnn-selu-callbacks"
if not os.path.exists(logdir):
os.mkdir(logdir)
output_model_file = os.path.join(logdir,"fashion_mnist_model.h5")
callbacks = {
keras.callbacks.TensorBoard(logdir),
keras.callbacks.ModelCheckpoint(output_model_file,
save_best_only="True"),
keras.callbacks.EarlyStopping(patience = 5, min_delta = 1e-3),
}
#开始训练
history = model.fit(x_train_scaled,y_train,epochs = 10,validation_data = (x_valid_scaled,y_valid))
#画图展示准确度
def plot_learning_curves(history):
pd.DataFrame(history.history).plot(figsize=(8,5))
plt.grid(True)
plt.gca().set_ylim(0,3)
plt.show()
plot_learning_curves(history)
#在测试集上的准确度
model.evaluate(x_test_scaled,y_test)
运行结果: