【Tensorflow2.0】10、端到端的自定义模型训练custom training
程序员文章站
2022-05-30 21:01:28
...
1、导入必备的包
import tensorflow as tf
import shutil,os,sys,io,copy,time,itertools,argparse,matplotlib
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import pandas as pd
from functools import partial
from collections import namedtuple
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
%matplotlib inline
2、配置模型的参数
class ObjectDict(dict):
def __getattr__(self,name):
try:
return self[name]
except:
raise Exception(name)
def __setattr__(self,key,value):
self[key]=value
args = ObjectDict()
args.output_folder='/tmp/out/model_out'
args.save_format='hdf5'
args.which_gpu=0
args.batch_size=200
args.epochs=10
args.regularizer=1e-4
args.num_classes=10
args.initial_learning_rate=1e-3
args.learning_rate_decay_factor=0.9 #对学习率做周期衰减
args.num_epochs_per_decay=1 #训练多少个epoch衰减一次
3、准备开始训练
# 0 = all messages are logged (default behavior)
# 1 = INFO messages are not printed
# 2 = INFO and WARNING messages are not printed
# 3 = INFO, WARNING, and ERROR messages are not printed
#系统的普通信息和警告信息不打印
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.keras.backend.clear_session()
3.1、模型配置
History = namedtuple('History', ['train_epoch_acc', 'train_epoch_loss', 'val_epoch_acc', 'val_epoch_loss'])
def configs(args = None):
# t = datetime.datetime.now().strftime("%Y%m%d_%H%M")
t = datetime.now().strftime("%Y%m%d_%H%M%S")
output_folder = args.output_folder
if os.path.exists(output_folder):
inc = input("The model saved path(%s) has exist,Do you want to delete and remake it?(y/n)" % output_folder)
while (inc.lower() not in ['y', 'n']):
inc = input("The model saved path has exist,Do you want to delete and remake it?(y/n)")
if inc.lower() == 'y':
shutil.rmtree(output_folder)
os.makedirs(output_folder)
else:
print("Exit and chechk the path!")
exit(-1)
else:
print("The model saved path (%s) does not exist,make it!" % output_folder)
os.makedirs(output_folder)
if args.save_format == "hdf5":
save_path_models = os.path.join(output_folder, "hdf5_models_{}".format(t))
if not os.path.exists(save_path_models):
os.makedirs(save_path_models)
save_path = os.path.join(save_path_models, "ckpt_epoch{:02d}_val_acc{:.2f}.hdf5")
elif args.save_format == "saved_model":
save_path_models = os.path.join(output_folder, "saved_models_{}".format(t))
if not os.path.exists(save_path_models):
os.makedirs(save_path_models)
save_path = os.path.join(save_path_models, "ckpt_epoch{:03d}_val_acc{:.4f}.ckpt")
# 用来保存日志
# t1 = datetime.now().strftime("%Y%m%d_%H%M")
log_dir = os.path.join(output_folder, 'logs_{}'.format(t))
if not os.path.exists(log_dir):
os.makedirs(log_dir)
physical_devices = tf.config.experimental.list_physical_devices('GPU') # 列出所有可见显卡
# print("All the available GPUs:\n", physical_devices)
if physical_devices:
gpu = physical_devices[args.which_gpu] # 显示第一块显卡
tf.config.experimental.set_memory_growth(gpu, True) # 根据需要自动增长显存
tf.config.experimental.set_visible_devices(gpu, 'GPU') # 只选择第一块
return output_folder, save_path, log_dir
output_folder, save_path, log_dir = configs(args)
The model saved path(/tmp/out/model_out) has exist,Do you want to delete and remake it?(y/n)y
使用Tensorboard和日志监控模型训练过程
train_log_dir = os.path.join(log_dir, 'train')
validation_log_dir = os.path.join(log_dir, 'validation')
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
validation_summary_writer = tf.summary.create_file_writer(validation_log_dir)
3.2、准备数据集
fashion_mnist=tf.keras.datasets.fashion_mnist
(train_x,train_y),(validation_x,validation_y)=fashion_mnist.load_data()
train_x,validation_x = train_x[...,np.newaxis]/255.0,validation_x[...,np.newaxis]/255.0
train_x = np.array(train_x).astype(np.float32)
validation_x=np.array(validation_x).astype(np.float32)
total_train_sample = train_x.shape[0]
total_validation_sample=validation_x.shape[0]
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
train_ds = tf.data.Dataset.from_tensor_slices((train_x,train_y))
validation_ds = tf.data.Dataset.from_tensor_slices((validation_x,validation_y))
train_ds=train_ds.shuffle(buffer_size=args.batch_size*10).batch(args.batch_size).prefetch(buffer_size = tf.data.experimental.AUTOTUNE)
validation_ds = validation_ds.batch(args.batch_size).prefetch(buffer_size = tf.data.experimental.AUTOTUNE)#不加repeat,执行一次就行
train_steps_per_epoch = np.ceil(total_train_sample / args.batch_size).astype(np.int32)
validation_steps_per_epoch = np.ceil(total_validation_sample / args.batch_size).astype(np.int32)
print("train_steps_per_epoch:", train_steps_per_epoch)
print("validation_steps_per_epoch:", validation_steps_per_epoch)
train_steps_per_epoch: 300
validation_steps_per_epoch: 50
3.3、定义模型
def mymodel(num_classes,regularizer):
l2 = tf.keras.regularizers.l2(regularizer)#定义模型正则化方法
ini = tf.keras.initializers.he_normal()#定义参数初始化方法
conv2d = partial(tf.keras.layers.Conv2D,activation='relu',padding='same',kernel_regularizer=l2,bias_regularizer=l2)
fc = partial(tf.keras.layers.Dense,activation='relu',kernel_regularizer=l2,bias_regularizer=l2)
maxpool=tf.keras.layers.MaxPooling2D
dropout=tf.keras.layers.Dropout
x_input = tf.keras.layers.Input(shape=(28,28,1),name='input_node')
x = conv2d(128,(5,5))(x_input)
x = maxpool((2,2))(x)
x = conv2d(256,(5,5))(x)
x = maxpool((2,2))(x)
x = tf.keras.layers.Flatten()(x)
x = fc(128)(x)
x_output=fc(10,activation=None,name='output_node')(x)
model = tf.keras.models.Model(inputs=x_input,outputs=x_output)
return model
model = mymodel(args.num_classes,args.regularizer)
print(model.summary())
tf.keras.utils.plot_model(model, to_file = os.path.join(log_dir, 'model_arch.png'), show_shapes = True)
model_json = model.to_json()
with open(os.path.join(log_dir, 'model_json.json'), 'w') as json_file:
json_file.write(model_json)
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_node (InputLayer) [(None, 28, 28, 1)] 0
_________________________________________________________________
conv2d (Conv2D) (None, 28, 28, 128) 3328
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 128) 0
_________________________________________________________________
conv2d_1 (Conv2D) (None, 14, 14, 256) 819456
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 256) 0
_________________________________________________________________
flatten (Flatten) (None, 12544) 0
_________________________________________________________________
dense (Dense) (None, 128) 1605760
_________________________________________________________________
output_node (Dense) (None, 10) 1290
=================================================================
Total params: 2,429,834
Trainable params: 2,429,834
Non-trainable params: 0
_________________________________________________________________
None
3.4、准备优化及相关训练所用函数
#优化
decay_steps = int(train_steps_per_epoch * args.num_epochs_per_decay)
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
args.initial_learning_rate,
decay_steps = decay_steps,
decay_rate = args.learning_rate_decay_factor,
staircase = True)
# optimizer = tf.keras.optimizers.SGD(learning_rate = lr_schedule, momentum = 0.9)
optimizer = tf.keras.optimizers.Adam(learning_rate = lr_schedule)
#从参数名称看
@tf.function
def train_on_batch(model,optimizer,datas,labels,
train_batch_acc,
train_batch_total_loss,
train_batch_celoss,
train_batch_regloss,
train_epoch_acc,
train_epoch_ce_loss):
"""
:param model: 模型
:param optimizer: 优化器
:param datas: batch size大小的数据
:param labels: 对应标签
:param train_batch_acc: 每个batch 的精度
:param train_batch_total_loss: 所有损失值
:param train_batch_celoss: 交叉熵
:param train_batch_regloss: 正则化
:param train_epoch_acc: 每个epoch累积的精度
:param train_epoch_ce_loss: 每个epoch累积的交叉熵
:return: gradients:返回每个可训练变量的梯度
"""
with tf.GradientTape() as tape:
logits = model(datas)
ce_loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)(labels, logits)
# ce_loss = scels.compute_loss(labels, logits,args.num_classes)
reg_loss = tf.add_n(model.losses)
total_loss = ce_loss + reg_loss # 加正则化
# total_loss = ce_loss #不加正则化
gradients = tape.gradient(total_loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_batch_acc(labels, logits)
train_batch_total_loss(total_loss)
train_batch_celoss(ce_loss)
train_batch_regloss(reg_loss)
train_epoch_acc(labels, logits)
train_epoch_ce_loss(ce_loss)
return gradients
@tf.function
def test_on_batch(model, datas, labels, val_epoch_acc, test_epoch_loss):
"""
:param model: 模型
:param datas: batch数据
:param labels:与数据对应的标签
:param val_epoch_acc:累积精度
:param test_epoch_loss:累积损失
:return preds:测试样本的预测值
"""
logits = model(datas)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)(labels, logits)
val_epoch_acc(labels, logits)
test_epoch_loss(loss)
preds = tf.argmax(logits, axis = -1)
return preds
3.5、开始训练并验证模型
#用来完成训练后把所有结果进行总结
def plot_acc_loss(history=None,log_dir=None):
"""
:param history: 训练测试完以后,每个epoch的训练测试acc和loss画图
:param log_dir: 画完图的保存地址
:return:
"""
plt.figure(figsize=(8,8))
N = np.arange(len(history.train_epoch_acc))
plt.subplot(2,1,1)
plt.plot(N, history.train_epoch_acc, label = 'Training Accuracy')
plt.scatter(N, history.train_epoch_acc)
plt.plot(N, history.val_epoch_acc, label = 'Validation Accuracy')
plt.scatter(N, history.val_epoch_acc)
plt.legend(loc = 'lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()), 1])
plt.title('Training and Validation Accuracy')
plt.subplot(2, 1, 2)
plt.plot(N,history.train_epoch_loss,label='Training Loss')
plt.scatter(N,history.train_epoch_loss)
plt.plot(N,history.val_epoch_loss,label='Validation Loss')
plt.scatter(N,history.val_epoch_loss)
plt.legend(loc = 'upper right')
plt.ylabel('Cross Entropy')
# plt.ylim([0, 1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.savefig(os.path.join(log_dir,'training.png'))
#每个epoch后记录并打印测试结果
def print_metrics(labels, predictions, target_names, save = False, save_path = None, epoch = None, train_time = None,
test_time = None):
"""
:param labels: 检签
:param predictions: 预测值
:param target_names:
:param save: 是否保存结果
:param save_path: 保存路径
:param epoch: 传入测试的epoch数
:param train_time: 传入训练单个epoch用时
:param test_time: 传入测试单个epoch用时
:return: comfusion matrix
"""
# 计算confusion result
assert len(predictions) == len(labels)
confusion_result = confusion_matrix(labels, predictions)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1500)
index = (set(predictions) | set(labels))
target_names = [target_names[i] for i in index]
confusion_result = pd.DataFrame(confusion_result, index = target_names, columns = target_names)
# classification report
report = classification_report(labels, predictions, target_names = target_names, digits = 4)
result_report = 'Epoch:{} with train_time:{:2f}min and test_time:{:2f}min\n' \
'Confuse_matrix:\n{}\n\nClassification_report:\n{} \n'.format(epoch,
train_time / 60,
test_time / 60,
confusion_result,
report)
print(result_report)
if save:
savepath = os.path.join(save_path, "validation_result.txt")
print('the result saved in %s' % savepath) # 如果savepath相同的话,会把所有结果保存到同一个文件中
with open(savepath, 'a') as f:
f.write(result_report)
return confusion_result
把confusion matrix画成图何存,并且把图存到tensorboard中一份
def plot_to_image(figure, log_dir, epoch):
"""Converts the matplotlib plot specified by 'figure' to a PNG image and
returns it. The supplied figure is closed and inaccessible after this call."""
# Save the plot to a PNG in memory.
buf = io.BytesIO()
fig = figure
plt.savefig(buf, format = 'png')
fig.savefig(os.path.join(log_dir, 'confusion_matrix_epoch%d.png' % epoch)) # 保存图片
# Closing the figure prevents it from being displayed directly inside
# the notebook.
plt.close(figure)
buf.seek(0)
# Convert PNG buffer to TF image
image = tf.image.decode_png(buf.getvalue(), channels = 4)
# Add the batch dimension
image = tf.expand_dims(image, 0)
return image
def plot_confusion_matrix(cm, class_names):
"""
Returns a matplotlib figure containing the plotted confusion matrix.
Args:
cm (array, shape = [n, n]): a confusion matrix of integer classes
class_names (array, shape = [n]): String names of the integer classes
"""
figure = plt.figure(figsize = (8, 8))
plt.imshow(cm, interpolation = 'nearest', cmap = plt.cm.Blues)
plt.title("Confusion matrix")
plt.colorbar()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names, rotation = 45)
plt.yticks(tick_marks, class_names)
# Normalize the confusion matrix.
cm = np.around(cm.astype('float') / cm.sum(axis = 1)[:, np.newaxis], decimals = 2)
# Use white text if squares are dark; otherwise black.
threshold = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
color = "white" if cm.iloc[i, j] > threshold.iloc[i] else "black"
plt.text(j, i, cm.iloc[i, j], horizontalalignment = "center", color = color)
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
return figure
train_batch_acc = tf.keras.metrics.SparseCategoricalAccuracy()
train_batch_total_loss = tf.keras.metrics.Mean() # 交叉熵的loss与正则化的loss的和
train_batch_celoss = tf.keras.metrics.Mean() # 交叉熵的loss
train_batch_regloss = tf.keras.metrics.Mean() # 正则化的loss
train_epoch_acc = tf.keras.metrics.SparseCategoricalAccuracy()
train_epoch_ce_loss = tf.keras.metrics.Mean()
val_epoch_acc = tf.keras.metrics.SparseCategoricalAccuracy()
val_epoch_loss = tf.keras.metrics.Mean()
train_acc = []
train_loss = []
val_acc = []
val_loss = []
format_str = (
'%s: step:%-6d epoch:%-6.3f/%d celoss:%-5.3f regloss:%-7.4f total_loss:%-6.3f '
'batch_acc:%-5.2f%% epoch_acc:%-5.2f%% epoch_loss:%-6.3f (%.1f examples/sec; %-3.2f sec/batch)')
# format_str=('%s:step:%d, epoch:%.4f/%d loss:%.2f lr:%-7.5f train_batch_acc:%5.2f (%.1f examples/sec; %.3f sec/batch)')
for epoch in range(args.epochs):
print("Do training Epoch=%d/%d on train dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:" % (epoch + 1, args.epochs))
start = time.time()
for i, (data,label) in enumerate(train_ds):
if (epoch==0 and i==1): # 只对第二次batch做porfiling
tf.summary.trace_on(graph = True, profiler = True) # 开启Trace(可选)可以记录图结构和profile信息,graph=True会把图结构写入log
start_time = time.time()
#prepare moving average parameters
# num_updates= i+epoch*train_steps_per_epoch
# moving_average_decay = min(args.moving_average_decay_factor, (1 + num_updates) / (10 + num_updates))
# shadow_variables=copy.deepcopy(model.trainable_variables)
# updata variable
grads = train_on_batch(model, optimizer, data, label, train_batch_acc,
train_batch_total_loss,
train_batch_celoss,
train_batch_regloss,
train_epoch_acc,
train_epoch_ce_loss)
#do moving average
# moving_average(model,moving_average_decay,shadow_variables)
duration = time.time() - start_time
if (epoch==0 and i==1):
with train_summary_writer.as_default():
tf.summary.trace_export(name = "model_trace", step = 1,
profiler_outdir = train_log_dir) # 保存Trace信息到文件(可选)
tf.summary.trace_off() # 关闭
if (i + 1) % 100 == 0:
examples_per_sec = args.batch_size / duration
current_epoch = (i + 1) / ((epoch + 1) * train_steps_per_epoch)+epoch
print(format_str % (datetime.now().strftime("%Y-%m-%d %H:%M:%S"), i + 1,
current_epoch, args.epochs,
train_batch_celoss.result(),
train_batch_regloss.result(),
train_batch_total_loss.result(),
100 * train_batch_acc.result(),
100*train_epoch_acc.result(),
train_epoch_ce_loss.result(),
examples_per_sec, duration))
step = tf.constant(epoch * train_steps_per_epoch + i + 1)
with train_summary_writer.as_default(): # 每50步记录一下,太频繁会影响训练速度
tf.summary.scalar('train_batch_accuracy', train_batch_acc.result(), step = step)
tf.summary.scalar('train_batch_celoss', train_batch_celoss.result(), step = step)
tf.summary.scalar('train_batch_regloss', train_batch_regloss.result(), step = step)
tf.summary.scalar('train_batch_total_loss', train_batch_total_loss.result(), step = step)
tf.summary.scalar('train_epoch_acc', train_epoch_acc.result(), step = step)
tf.summary.scalar('train_epoch_total_loss', train_epoch_ce_loss.result(), step = step)
train_summary_writer.flush()
if ((i + 1) % int(train_steps_per_epoch * 0.1))==0:
step = tf.constant(epoch * train_steps_per_epoch + i + 1)
# 每0.1epoch记录一下模型各层参数及其梯度的直方图,太多日志文件会很大
with train_summary_writer.as_default():
for grad, variable in zip(grads, model.trainable_variables):
v_name = variable.name.replace(':', '_')
# 记录变量直方图
tf.summary.histogram(v_name, variable, step = step)
# 记录变量梯度直方图
tf.summary.histogram('{}_grad'.format(v_name), grad, step = step)
#draw_variable_tb(variable, v_name, step)
train_summary_writer.flush()
train_batch_acc.reset_states()
train_batch_celoss.reset_states()
train_batch_regloss.reset_states()
train_batch_total_loss.reset_states()
end = time.time() - start
print("Training Epoch:{}/{} loss:{:.2f} acc:{:.2f} fineshed usetime:{:.1f} sec".format(epoch + 1,
args.epochs,
train_epoch_ce_loss.result().numpy(),
train_epoch_acc.result().numpy(),
end))
train_acc.append(train_epoch_acc.result().numpy())
train_loss.append(train_epoch_ce_loss.result().numpy())
train_epoch_acc.reset_states()
train_epoch_ce_loss.reset_states()
#完成一个epoch的训练
train_summary_writer.flush()
# 对测试集进行测试
print("Do testing on validation dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:")
all_labels = []
all_preds = []
start_time = time.time()
# for i, (data, label) in enumerate(validation_ds.take(1000)):
for i,(data,label) in enumerate(validation_ds):
preds = test_on_batch(model, data, label, val_epoch_acc, val_epoch_loss)
all_preds.extend(preds.numpy().flatten().tolist())
all_labels.extend(label.numpy().flatten().tolist())
sys.stdout.write('\r %d / %d finished !' %(i+1,validation_steps_per_epoch))
duration = time.time() - start_time
print("Epoch %d: test_acc:%.3f test_loss:%.3f total_time:%d sec " % ((epoch + 1),
val_epoch_acc.result(),
val_epoch_loss.result(),
int(duration)))
with validation_summary_writer.as_default():
tf.summary.scalar('val_loss', val_epoch_loss.result(), step = epoch)
tf.summary.scalar('val_accuracy', val_epoch_acc.result(), step = epoch)
validation_summary_writer.flush()
acc = val_epoch_acc.result().numpy()
val_acc.append(val_epoch_acc.result().numpy())
val_loss.append(val_epoch_loss.result().numpy())
cm = print_metrics(all_labels, all_preds, class_names, True, validation_log_dir, (epoch + 1), train_time = end,
test_time = duration)
figure = plot_confusion_matrix(cm, class_names = class_names)
cm_image = plot_to_image(figure, validation_log_dir, epoch + 1) # 同时保存图片到文件夹
with validation_summary_writer.as_default():
tf.summary.scalar('val_loss', val_epoch_loss.result(), step = epoch + 1)
tf.summary.scalar('val_accuracy', val_epoch_acc.result(), step = epoch + 1)
tf.summary.image("Confusion Matrix", cm_image, step = epoch + 1) # 将测试结果confuse matrix画到tensorboard
validation_summary_writer.flush()
# 训练完成保存模型
print("Model saved at Epoch %d end ." % (epoch + 1,))
model.save(save_path.format((epoch + 1), acc))
val_epoch_acc.reset_states()
val_epoch_loss.reset_states()
#测试完成
validation_summary_writer.flush()
history = History(train_epoch_acc = train_acc, train_epoch_loss = train_loss, val_epoch_acc = val_acc,
val_epoch_loss = val_loss)
plot_acc_loss(history = history, log_dir = log_dir)
train_summary_writer.flush()
validation_summary_writer.flush()
train_summary_writer.close()
validation_summary_writer.close()
Do training Epoch=1/10 on train dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
2019-12-19 19:09:27: step:100 epoch:0.333 /10 celoss:0.372 regloss:0.0437 total_loss:0.416 batch_acc:88.00% epoch_acc:75.10% epoch_loss:0.664 (38154.3 examples/sec; 0.01 sec/batch)
2019-12-19 19:09:29: step:200 epoch:0.667 /10 celoss:0.333 regloss:0.0458 total_loss:0.379 batch_acc:89.50% epoch_acc:80.93% epoch_loss:0.521 (35259.6 examples/sec; 0.01 sec/batch)
2019-12-19 19:09:31: step:300 epoch:1.000 /10 celoss:0.360 regloss:0.0473 total_loss:0.407 batch_acc:88.00% epoch_acc:83.34% epoch_loss:0.457 (33926.3 examples/sec; 0.01 sec/batch)
Training Epoch:1/10 loss:0.46 acc:0.83 fineshed usetime:8.0 sec
Do testing on validation dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
50 / 50 finished !Epoch 1: test_acc:0.877 test_loss:0.335 total_time:0 sec
Epoch:1 with train_time:0.133957min and test_time:0.005964min
Confuse_matrix:
T-shirt/top Trouser Pullover Dress Coat Sandal Shirt Sneaker Bag Ankle boot
T-shirt/top 746 0 19 28 12 2 184 0 9 0
Trouser 0 970 0 18 7 0 3 0 2 0
Pullover 6 1 698 4 191 0 99 0 1 0
Dress 12 1 7 874 59 0 47 0 0 0
Coat 1 1 17 20 898 0 62 0 1 0
Sandal 0 0 0 1 0 980 0 8 0 11
Shirt 82 1 43 27 101 0 736 0 10 0
Sneaker 0 0 0 0 0 35 0 952 0 13
Bag 1 1 8 3 7 2 6 4 968 0
Ankle boot 0 0 0 0 0 4 1 48 0 947
Classification_report:
precision recall f1-score support
T-shirt/top 0.8797 0.7460 0.8074 1000
Trouser 0.9949 0.9700 0.9823 1000
Pullover 0.8813 0.6980 0.7790 1000
Dress 0.8964 0.8740 0.8851 1000
Coat 0.7043 0.8980 0.7895 1000
Sandal 0.9580 0.9800 0.9689 1000
Shirt 0.6467 0.7360 0.6885 1000
Sneaker 0.9407 0.9520 0.9463 1000
Bag 0.9768 0.9680 0.9724 1000
Ankle boot 0.9753 0.9470 0.9609 1000
accuracy 0.8769 10000
macro avg 0.8854 0.8769 0.8780 10000
weighted avg 0.8854 0.8769 0.8780 10000
the result saved in /tmp/out/model_out/logs_20191219_190839/validation/validation_result.txt
Model saved at Epoch 1 end .
Do training Epoch=2/10 on train dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
2019-12-19 19:09:34: step:100 epoch:1.167 /10 celoss:0.257 regloss:0.0484 total_loss:0.305 batch_acc:91.50% epoch_acc:89.25% epoch_loss:0.293 (33302.7 examples/sec; 0.01 sec/batch)
2019-12-19 19:09:36: step:200 epoch:1.333 /10 celoss:0.229 regloss:0.0490 total_loss:0.278 batch_acc:93.00% epoch_acc:89.62% epoch_loss:0.288 (33259.1 examples/sec; 0.01 sec/batch)
2019-12-19 19:09:37: step:300 epoch:1.500 /10 celoss:0.210 regloss:0.0499 total_loss:0.260 batch_acc:92.50% epoch_acc:89.82% epoch_loss:0.281 (34271.4 examples/sec; 0.01 sec/batch)
Training Epoch:2/10 loss:0.28 acc:0.90 fineshed usetime:5.6 sec
Do testing on validation dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
50 / 50 finished !Epoch 2: test_acc:0.899 test_loss:0.274 total_time:0 sec
Epoch:2 with train_time:0.093418min and test_time:0.003458min
Confuse_matrix:
T-shirt/top Trouser Pullover Dress Coat Sandal Shirt Sneaker Bag Ankle boot
T-shirt/top 875 0 29 12 6 1 69 0 8 0
Trouser 4 973 0 16 3 0 2 0 2 0
Pullover 13 0 839 8 85 0 54 0 1 0
Dress 32 0 10 913 22 0 20 0 3 0
Coat 1 1 56 38 848 0 56 0 0 0
Sandal 0 0 0 0 0 972 0 16 0 12
Shirt 157 0 68 23 78 0 662 0 12 0
Sneaker 0 0 0 0 0 7 0 979 0 14
Bag 3 1 2 2 4 4 1 5 978 0
Ankle boot 0 0 0 0 0 3 1 43 0 953
Classification_report:
precision recall f1-score support
T-shirt/top 0.8065 0.8750 0.8393 1000
Trouser 0.9979 0.9730 0.9853 1000
Pullover 0.8357 0.8390 0.8373 1000
Dress 0.9022 0.9130 0.9076 1000
Coat 0.8107 0.8480 0.8289 1000
Sandal 0.9848 0.9720 0.9784 1000
Shirt 0.7653 0.6620 0.7099 1000
Sneaker 0.9386 0.9790 0.9584 1000
Bag 0.9741 0.9780 0.9760 1000
Ankle boot 0.9734 0.9530 0.9631 1000
accuracy 0.8992 10000
macro avg 0.8989 0.8992 0.8984 10000
weighted avg 0.8989 0.8992 0.8984 10000
the result saved in /tmp/out/model_out/logs_20191219_190839/validation/validation_result.txt
Model saved at Epoch 2 end .
Do training Epoch=3/10 on train dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
2019-12-19 19:09:40: step:100 epoch:2.111 /10 celoss:0.238 regloss:0.0506 total_loss:0.288 batch_acc:91.50% epoch_acc:90.84% epoch_loss:0.252 (32027.4 examples/sec; 0.01 sec/batch)
2019-12-19 19:09:42: step:200 epoch:2.222 /10 celoss:0.239 regloss:0.0512 total_loss:0.290 batch_acc:89.50% epoch_acc:90.93% epoch_loss:0.248 (33647.3 examples/sec; 0.01 sec/batch)
2019-12-19 19:09:44: step:300 epoch:2.333 /10 celoss:0.297 regloss:0.0522 total_loss:0.349 batch_acc:89.00% epoch_acc:91.04% epoch_loss:0.244 (32278.8 examples/sec; 0.01 sec/batch)
Training Epoch:3/10 loss:0.24 acc:0.91 fineshed usetime:5.6 sec
Do testing on validation dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
50 / 50 finished !Epoch 3: test_acc:0.908 test_loss:0.255 total_time:0 sec
Epoch:3 with train_time:0.093433min and test_time:0.003803min
Confuse_matrix:
T-shirt/top Trouser Pullover Dress Coat Sandal Shirt Sneaker Bag Ankle boot
T-shirt/top 892 0 11 13 1 2 78 0 3 0
Trouser 3 973 0 20 0 0 2 0 2 0
Pullover 13 1 869 11 45 0 60 0 1 0
Dress 22 0 8 931 6 0 32 0 1 0
Coat 2 1 84 44 787 0 82 0 0 0
Sandal 0 0 0 0 0 989 0 7 0 4
Shirt 124 1 57 25 47 0 740 0 6 0
Sneaker 0 0 0 0 0 20 0 959 0 21
Bag 4 1 3 4 2 3 3 3 977 0
Ankle boot 0 0 0 0 0 6 1 28 0 965
Classification_report:
precision recall f1-score support
T-shirt/top 0.8415 0.8920 0.8660 1000
Trouser 0.9959 0.9730 0.9843 1000
Pullover 0.8421 0.8690 0.8553 1000
Dress 0.8884 0.9310 0.9092 1000
Coat 0.8863 0.7870 0.8337 1000
Sandal 0.9696 0.9890 0.9792 1000
Shirt 0.7415 0.7400 0.7407 1000
Sneaker 0.9619 0.9590 0.9604 1000
Bag 0.9869 0.9770 0.9819 1000
Ankle boot 0.9747 0.9650 0.9698 1000
accuracy 0.9082 10000
macro avg 0.9089 0.9082 0.9081 10000
weighted avg 0.9089 0.9082 0.9081 10000
the result saved in /tmp/out/model_out/logs_20191219_190839/validation/validation_result.txt
Model saved at Epoch 3 end .
Do training Epoch=4/10 on train dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
2019-12-19 19:09:46: step:100 epoch:3.083 /10 celoss:0.159 regloss:0.0525 total_loss:0.212 batch_acc:94.00% epoch_acc:92.10% epoch_loss:0.214 (35415.9 examples/sec; 0.01 sec/batch)
2019-12-19 19:09:48: step:200 epoch:3.167 /10 celoss:0.220 regloss:0.0530 total_loss:0.273 batch_acc:92.00% epoch_acc:92.02% epoch_loss:0.216 (31278.6 examples/sec; 0.01 sec/batch)
2019-12-19 19:09:50: step:300 epoch:3.250 /10 celoss:0.315 regloss:0.0539 total_loss:0.369 batch_acc:88.50% epoch_acc:92.11% epoch_loss:0.215 (33379.5 examples/sec; 0.01 sec/batch)
Training Epoch:4/10 loss:0.21 acc:0.92 fineshed usetime:5.6 sec
Do testing on validation dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
50 / 50 finished !Epoch 4: test_acc:0.899 test_loss:0.279 total_time:0 sec
Epoch:4 with train_time:0.093070min and test_time:0.003471min
Confuse_matrix:
T-shirt/top Trouser Pullover Dress Coat Sandal Shirt Sneaker Bag Ankle boot
T-shirt/top 938 0 17 8 2 2 30 0 3 0
Trouser 6 976 0 14 0 0 2 0 2 0
Pullover 21 1 922 8 31 0 17 0 0 0
Dress 50 0 11 909 14 0 16 0 0 0
Coat 2 1 135 39 789 0 33 0 1 0
Sandal 0 0 0 0 0 966 0 24 0 10
Shirt 228 0 117 16 61 0 572 0 6 0
Sneaker 0 0 0 0 0 3 0 987 0 10
Bag 7 1 6 3 2 2 0 5 974 0
Ankle boot 0 0 0 0 0 4 1 38 0 957
Classification_report:
precision recall f1-score support
T-shirt/top 0.7492 0.9380 0.8330 1000
Trouser 0.9969 0.9760 0.9864 1000
Pullover 0.7632 0.9220 0.8351 1000
Dress 0.9117 0.9090 0.9104 1000
Coat 0.8776 0.7890 0.8310 1000
Sandal 0.9887 0.9660 0.9772 1000
Shirt 0.8525 0.5720 0.6846 1000
Sneaker 0.9364 0.9870 0.9611 1000
Bag 0.9878 0.9740 0.9809 1000
Ankle boot 0.9795 0.9570 0.9681 1000
accuracy 0.8990 10000
macro avg 0.9044 0.8990 0.8968 10000
weighted avg 0.9044 0.8990 0.8968 10000
the result saved in /tmp/out/model_out/logs_20191219_190839/validation/validation_result.txt
Model saved at Epoch 4 end .
Do training Epoch=5/10 on train dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
2019-12-19 19:09:53: step:100 epoch:4.067 /10 celoss:0.194 regloss:0.0543 total_loss:0.248 batch_acc:93.00% epoch_acc:92.46% epoch_loss:0.201 (35447.3 examples/sec; 0.01 sec/batch)
2019-12-19 19:09:54: step:200 epoch:4.133 /10 celoss:0.170 regloss:0.0546 total_loss:0.224 batch_acc:95.50% epoch_acc:92.81% epoch_loss:0.197 (31472.2 examples/sec; 0.01 sec/batch)
2019-12-19 19:09:56: step:300 epoch:4.200 /10 celoss:0.204 regloss:0.0552 total_loss:0.259 batch_acc:93.50% epoch_acc:92.96% epoch_loss:0.193 (32910.7 examples/sec; 0.01 sec/batch)
Training Epoch:5/10 loss:0.19 acc:0.93 fineshed usetime:5.6 sec
Do testing on validation dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
50 / 50 finished !Epoch 5: test_acc:0.910 test_loss:0.249 total_time:0 sec
Epoch:5 with train_time:0.093259min and test_time:0.003784min
Confuse_matrix:
T-shirt/top Trouser Pullover Dress Coat Sandal Shirt Sneaker Bag Ankle boot
T-shirt/top 947 0 9 6 3 1 31 0 3 0
Trouser 5 980 0 11 1 0 1 0 2 0
Pullover 27 2 877 5 47 0 41 0 1 0
Dress 53 1 9 891 32 0 14 0 0 0
Coat 4 1 60 16 855 0 63 0 1 0
Sandal 0 0 0 0 0 982 0 12 0 6
Shirt 212 0 67 13 51 0 652 0 5 0
Sneaker 0 0 0 0 0 4 0 989 0 7
Bag 8 1 3 3 3 3 0 4 975 0
Ankle boot 0 0 0 0 0 7 1 44 0 948
Classification_report:
precision recall f1-score support
T-shirt/top 0.7540 0.9470 0.8395 1000
Trouser 0.9949 0.9800 0.9874 1000
Pullover 0.8556 0.8770 0.8662 1000
Dress 0.9429 0.8910 0.9162 1000
Coat 0.8619 0.8550 0.8584 1000
Sandal 0.9850 0.9820 0.9835 1000
Shirt 0.8120 0.6520 0.7232 1000
Sneaker 0.9428 0.9890 0.9653 1000
Bag 0.9878 0.9750 0.9814 1000
Ankle boot 0.9865 0.9480 0.9669 1000
accuracy 0.9096 10000
macro avg 0.9123 0.9096 0.9088 10000
weighted avg 0.9123 0.9096 0.9088 10000
the result saved in /tmp/out/model_out/logs_20191219_190839/validation/validation_result.txt
Model saved at Epoch 5 end .
Do training Epoch=6/10 on train dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
2019-12-19 19:09:59: step:100 epoch:5.056 /10 celoss:0.152 regloss:0.0560 total_loss:0.209 batch_acc:93.50% epoch_acc:93.50% epoch_loss:0.178 (35207.8 examples/sec; 0.01 sec/batch)
2019-12-19 19:10:01: step:200 epoch:5.111 /10 celoss:0.148 regloss:0.0563 total_loss:0.204 batch_acc:95.00% epoch_acc:93.56% epoch_loss:0.179 (33584.0 examples/sec; 0.01 sec/batch)
2019-12-19 19:10:02: step:300 epoch:5.167 /10 celoss:0.224 regloss:0.0569 total_loss:0.281 batch_acc:92.00% epoch_acc:93.69% epoch_loss:0.175 (33723.0 examples/sec; 0.01 sec/batch)
Training Epoch:6/10 loss:0.17 acc:0.94 fineshed usetime:5.6 sec
Do testing on validation dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
50 / 50 finished !Epoch 6: test_acc:0.910 test_loss:0.247 total_time:0 sec
Epoch:6 with train_time:0.094112min and test_time:0.003806min
Confuse_matrix:
T-shirt/top Trouser Pullover Dress Coat Sandal Shirt Sneaker Bag Ankle boot
T-shirt/top 949 0 8 8 1 1 27 0 6 0
Trouser 3 988 0 6 0 0 1 0 2 0
Pullover 23 2 914 8 32 0 20 0 1 0
Dress 50 2 11 913 16 0 8 0 0 0
Coat 3 1 102 32 820 0 40 0 2 0
Sandal 0 0 0 0 0 989 0 6 0 5
Shirt 220 1 90 16 57 0 604 0 12 0
Sneaker 0 0 0 0 0 12 0 974 0 14
Bag 4 1 1 4 0 2 0 1 987 0
Ankle boot 0 0 0 0 0 8 1 26 0 965
Classification_report:
precision recall f1-score support
T-shirt/top 0.7580 0.9490 0.8428 1000
Trouser 0.9930 0.9880 0.9905 1000
Pullover 0.8117 0.9140 0.8598 1000
Dress 0.9250 0.9130 0.9190 1000
Coat 0.8855 0.8200 0.8515 1000
Sandal 0.9773 0.9890 0.9831 1000
Shirt 0.8616 0.6040 0.7102 1000
Sneaker 0.9672 0.9740 0.9706 1000
Bag 0.9772 0.9870 0.9821 1000
Ankle boot 0.9807 0.9650 0.9728 1000
accuracy 0.9103 10000
macro avg 0.9137 0.9103 0.9082 10000
weighted avg 0.9137 0.9103 0.9082 10000
the result saved in /tmp/out/model_out/logs_20191219_190839/validation/validation_result.txt
Model saved at Epoch 6 end .
Do training Epoch=7/10 on train dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
2019-12-19 19:10:05: step:100 epoch:6.048 /10 celoss:0.157 regloss:0.0577 total_loss:0.214 batch_acc:93.50% epoch_acc:93.94% epoch_loss:0.165 (34723.9 examples/sec; 0.01 sec/batch)
2019-12-19 19:10:07: step:200 epoch:6.095 /10 celoss:0.105 regloss:0.0578 total_loss:0.163 batch_acc:95.00% epoch_acc:93.98% epoch_loss:0.165 (33531.6 examples/sec; 0.01 sec/batch)
2019-12-19 19:10:09: step:300 epoch:6.143 /10 celoss:0.181 regloss:0.0587 total_loss:0.240 batch_acc:92.50% epoch_acc:94.07% epoch_loss:0.162 (37954.1 examples/sec; 0.01 sec/batch)
Training Epoch:7/10 loss:0.16 acc:0.94 fineshed usetime:5.6 sec
Do testing on validation dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
50 / 50 finished !Epoch 7: test_acc:0.916 test_loss:0.230 total_time:0 sec
Epoch:7 with train_time:0.093442min and test_time:0.003851min
Confuse_matrix:
T-shirt/top Trouser Pullover Dress Coat Sandal Shirt Sneaker Bag Ankle boot
T-shirt/top 914 0 16 7 2 1 55 0 5 0
Trouser 2 991 0 4 1 0 1 0 1 0
Pullover 17 1 887 7 33 0 54 0 1 0
Dress 33 1 8 926 21 0 11 0 0 0
Coat 2 1 72 23 827 0 75 0 0 0
Sandal 0 0 0 0 0 964 0 23 0 13
Shirt 149 1 56 22 53 0 713 0 6 0
Sneaker 0 0 0 0 0 1 0 986 1 12
Bag 5 2 2 4 1 1 1 0 984 0
Ankle boot 0 0 0 0 0 3 1 30 0 966
Classification_report:
precision recall f1-score support
T-shirt/top 0.8146 0.9140 0.8615 1000
Trouser 0.9940 0.9910 0.9925 1000
Pullover 0.8521 0.8870 0.8692 1000
Dress 0.9325 0.9260 0.9293 1000
Coat 0.8817 0.8270 0.8535 1000
Sandal 0.9938 0.9640 0.9787 1000
Shirt 0.7827 0.7130 0.7462 1000
Sneaker 0.9490 0.9860 0.9671 1000
Bag 0.9860 0.9840 0.9850 1000
Ankle boot 0.9748 0.9660 0.9704 1000
accuracy 0.9158 10000
macro avg 0.9161 0.9158 0.9153 10000
weighted avg 0.9161 0.9158 0.9153 10000
the result saved in /tmp/out/model_out/logs_20191219_190839/validation/validation_result.txt
Model saved at Epoch 7 end .
Do training Epoch=8/10 on train dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
2019-12-19 19:10:11: step:100 epoch:7.042 /10 celoss:0.140 regloss:0.0589 total_loss:0.199 batch_acc:93.50% epoch_acc:94.71% epoch_loss:0.146 (32723.3 examples/sec; 0.01 sec/batch)
2019-12-19 19:10:13: step:200 epoch:7.083 /10 celoss:0.171 regloss:0.0591 total_loss:0.230 batch_acc:96.00% epoch_acc:94.65% epoch_loss:0.148 (33557.1 examples/sec; 0.01 sec/batch)
2019-12-19 19:10:15: step:300 epoch:7.125 /10 celoss:0.123 regloss:0.0597 total_loss:0.183 batch_acc:96.00% epoch_acc:94.78% epoch_loss:0.145 (34612.2 examples/sec; 0.01 sec/batch)
Training Epoch:8/10 loss:0.14 acc:0.95 fineshed usetime:5.6 sec
Do testing on validation dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
50 / 50 finished !Epoch 8: test_acc:0.921 test_loss:0.225 total_time:0 sec
Epoch:8 with train_time:0.092996min and test_time:0.003475min
Confuse_matrix:
T-shirt/top Trouser Pullover Dress Coat Sandal Shirt Sneaker Bag Ankle boot
T-shirt/top 870 0 16 12 5 1 90 0 6 0
Trouser 0 984 1 10 2 0 1 0 2 0
Pullover 14 1 862 7 78 0 37 0 1 0
Dress 14 1 9 933 28 0 15 0 0 0
Coat 2 0 21 24 928 0 25 0 0 0
Sandal 1 0 0 0 0 966 0 22 0 11
Shirt 85 1 53 21 95 0 739 0 6 0
Sneaker 0 0 0 0 0 3 0 978 0 19
Bag 3 2 2 3 2 1 1 2 984 0
Ankle boot 0 0 0 0 0 4 2 24 0 970
Classification_report:
precision recall f1-score support
T-shirt/top 0.8797 0.8700 0.8748 1000
Trouser 0.9949 0.9840 0.9894 1000
Pullover 0.8942 0.8620 0.8778 1000
Dress 0.9238 0.9330 0.9284 1000
Coat 0.8155 0.9280 0.8681 1000
Sandal 0.9908 0.9660 0.9782 1000
Shirt 0.8121 0.7390 0.7738 1000
Sneaker 0.9532 0.9780 0.9654 1000
Bag 0.9850 0.9840 0.9845 1000
Ankle boot 0.9700 0.9700 0.9700 1000
accuracy 0.9214 10000
macro avg 0.9219 0.9214 0.9211 10000
weighted avg 0.9219 0.9214 0.9211 10000
the result saved in /tmp/out/model_out/logs_20191219_190839/validation/validation_result.txt
Model saved at Epoch 8 end .
Do training Epoch=9/10 on train dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
2019-12-19 19:10:17: step:100 epoch:8.037 /10 celoss:0.122 regloss:0.0600 total_loss:0.182 batch_acc:95.50% epoch_acc:95.35% epoch_loss:0.130 (33997.8 examples/sec; 0.01 sec/batch)
2019-12-19 19:10:19: step:200 epoch:8.074 /10 celoss:0.142 regloss:0.0602 total_loss:0.203 batch_acc:95.00% epoch_acc:95.23% epoch_loss:0.132 (33875.6 examples/sec; 0.01 sec/batch)
2019-12-19 19:10:21: step:300 epoch:8.111 /10 celoss:0.152 regloss:0.0607 total_loss:0.212 batch_acc:94.50% epoch_acc:95.32% epoch_loss:0.130 (34940.9 examples/sec; 0.01 sec/batch)
Training Epoch:9/10 loss:0.13 acc:0.95 fineshed usetime:5.6 sec
Do testing on validation dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
50 / 50 finished !Epoch 9: test_acc:0.917 test_loss:0.236 total_time:0 sec
Epoch:9 with train_time:0.093057min and test_time:0.003536min
Confuse_matrix:
T-shirt/top Trouser Pullover Dress Coat Sandal Shirt Sneaker Bag Ankle boot
T-shirt/top 896 0 18 5 3 1 68 0 9 0
Trouser 4 982 1 7 3 0 1 0 2 0
Pullover 12 1 887 4 66 0 29 0 1 0
Dress 33 2 13 870 54 0 27 0 1 0
Coat 2 0 32 10 938 0 18 0 0 0
Sandal 1 0 0 0 0 981 0 11 0 7
Shirt 123 0 62 13 112 0 682 0 8 0
Sneaker 0 0 0 0 0 4 0 986 0 10
Bag 2 0 1 1 3 1 1 1 990 0
Ankle boot 0 0 0 0 0 4 2 33 0 961
Classification_report:
precision recall f1-score support
T-shirt/top 0.8350 0.8960 0.8644 1000
Trouser 0.9970 0.9820 0.9894 1000
Pullover 0.8748 0.8870 0.8808 1000
Dress 0.9560 0.8700 0.9110 1000
Coat 0.7956 0.9380 0.8609 1000
Sandal 0.9899 0.9810 0.9854 1000
Shirt 0.8237 0.6820 0.7462 1000
Sneaker 0.9564 0.9860 0.9710 1000
Bag 0.9792 0.9900 0.9846 1000
Ankle boot 0.9826 0.9610 0.9717 1000
accuracy 0.9173 10000
macro avg 0.9190 0.9173 0.9165 10000
weighted avg 0.9190 0.9173 0.9165 10000
the result saved in /tmp/out/model_out/logs_20191219_190839/validation/validation_result.txt
Model saved at Epoch 9 end .
Do training Epoch=10/10 on train dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
2019-12-19 19:10:24: step:100 epoch:9.033 /10 celoss:0.120 regloss:0.0610 total_loss:0.181 batch_acc:95.50% epoch_acc:95.62% epoch_loss:0.121 (32010.3 examples/sec; 0.01 sec/batch)
2019-12-19 19:10:26: step:200 epoch:9.067 /10 celoss:0.123 regloss:0.0611 total_loss:0.185 batch_acc:93.50% epoch_acc:95.70% epoch_loss:0.120 (35502.8 examples/sec; 0.01 sec/batch)
2019-12-19 19:10:27: step:300 epoch:9.100 /10 celoss:0.094 regloss:0.0615 total_loss:0.156 batch_acc:97.50% epoch_acc:95.79% epoch_loss:0.118 (31961.5 examples/sec; 0.01 sec/batch)
Training Epoch:10/10 loss:0.12 acc:0.96 fineshed usetime:5.6 sec
Do testing on validation dataset>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>:
50 / 50 finished !Epoch 10: test_acc:0.921 test_loss:0.221 total_time:0 sec
Epoch:10 with train_time:0.093466min and test_time:0.003928min
Confuse_matrix:
T-shirt/top Trouser Pullover Dress Coat Sandal Shirt Sneaker Bag Ankle boot
T-shirt/top 880 0 21 12 4 1 76 0 6 0
Trouser 1 981 1 12 2 0 1 0 2 0
Pullover 11 1 917 7 34 0 29 0 1 0
Dress 13 0 9 939 25 0 14 0 0 0
Coat 2 0 70 20 873 0 35 0 0 0
Sandal 1 0 0 0 0 968 0 19 0 12
Shirt 100 1 73 25 71 0 725 0 5 0
Sneaker 0 0 0 0 0 4 0 973 0 23
Bag 3 1 2 3 3 1 2 3 982 0
Ankle boot 0 0 0 0 0 4 1 22 0 973
Classification_report:
precision recall f1-score support
T-shirt/top 0.8704 0.8800 0.8752 1000
Trouser 0.9970 0.9810 0.9889 1000
Pullover 0.8390 0.9170 0.8763 1000
Dress 0.9224 0.9390 0.9306 1000
Coat 0.8626 0.8730 0.8678 1000
Sandal 0.9898 0.9680 0.9788 1000
Shirt 0.8211 0.7250 0.7700 1000
Sneaker 0.9567 0.9730 0.9648 1000
Bag 0.9859 0.9820 0.9840 1000
Ankle boot 0.9653 0.9730 0.9691 1000
accuracy 0.9211 10000
macro avg 0.9210 0.9211 0.9205 10000
weighted avg 0.9210 0.9211 0.9205 10000
the result saved in /tmp/out/model_out/logs_20191219_190839/validation/validation_result.txt
Model saved at Epoch 10 end .
文件结构
!tree /tmp/out/model_out
[01;34m/tmp/out/model_out[00m
├── [01;34mhdf5_models_20191219_190839[00m
│ ├── ckpt_epoch01_val_acc0.88.hdf5
│ ├── ckpt_epoch02_val_acc0.90.hdf5
│ ├── ckpt_epoch03_val_acc0.91.hdf5
│ ├── ckpt_epoch04_val_acc0.90.hdf5
│ ├── ckpt_epoch05_val_acc0.91.hdf5
│ ├── ckpt_epoch06_val_acc0.91.hdf5
│ ├── ckpt_epoch07_val_acc0.92.hdf5
│ ├── ckpt_epoch08_val_acc0.92.hdf5
│ ├── ckpt_epoch09_val_acc0.92.hdf5
│ └── ckpt_epoch10_val_acc0.92.hdf5
└── [01;34mlogs_20191219_190839[00m
├── [01;35mmodel_arch.png[00m
├── model_json.json
├── [01;34mtrain[00m
│ ├── events.out.tfevents.1576753723.cuda10.6152.8.v2
│ ├── events.out.tfevents.1576753766.cuda10.profile-empty
│ └── [01;34mplugins[00m
│ └── [01;34mprofile[00m
│ └── [01;34m2019-12-19_19-09-26[00m
│ └── local.trace
├── [01;35mtraining.png[00m
└── [01;34mvalidation[00m
├── [01;35mconfusion_matrix_epoch10.png[00m
├── [01;35mconfusion_matrix_epoch1.png[00m
├── [01;35mconfusion_matrix_epoch2.png[00m
├── [01;35mconfusion_matrix_epoch3.png[00m
├── [01;35mconfusion_matrix_epoch4.png[00m
├── [01;35mconfusion_matrix_epoch5.png[00m
├── [01;35mconfusion_matrix_epoch6.png[00m
├── [01;35mconfusion_matrix_epoch7.png[00m
├── [01;35mconfusion_matrix_epoch8.png[00m
├── [01;35mconfusion_matrix_epoch9.png[00m
├── events.out.tfevents.1576753723.cuda10.6152.16.v2
└── validation_result.txt
7 directories, 28 files
xt
7 directories, 28 files
总结
查看tensorboard只要输入:
tensorboard --logdir=/tmp/out/model_out/logs_20191219_190839 --bind_all
需要说明的是,由于在jupyter中执行,可能是因为eager模式,因此没有在tensorboard记录graph
暂时还没找到方法解决这个问题。
后期会加入如何在jupyter中显示tensorboard的代码