paddle深度学习基础之模型加载及恢复训练
程序员文章站
2022-03-04 20:30:10
...
前言
前面几节,我们从各个方面对模型进行了优化,也实现了将模型保存下来。但是在日常训练工作中我们会遇到一些突发情况,导致训练过程主动或被动的中断。如果训练一个模型需要花费几天的训练时间,中断后从初始状态重新训练是不可接受的。别着急,这一节咱们就是讨论这个事情。
保存模型
如果前面几篇博客看过的同学,肯定已经知道如何保存模型。这里还需要强调一下,我们不仅可以保存模型的参数,还可以保存优化器的参数。比如我们这次测试代码使用的是动态学习率的优化器,训练的次数不同,学习率也不一样,所有,我们也需要把这个信息给存储下来。
model_save_path="model/mnist-model/dygraph-mnist"
fluid.save_dygraph(model.state_dict(),model_save_path)#保存模型参数
fluid.save_dygraph(optimaizer.state_dict(),model_save_path)#保存优化器参数
加载模型
params_dict, opt_dict = fluid.load_dygraph(model_save_path)
model = MNIST()
model.load_dict(params_dict)
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=0.001,parameter_list=model.parameters())
optimizer.set_dict(opt_dict)
-
params_dict :模型的参数
-
opt_dict :优化器的参数
完整代码
import paddle
import numpy as np
import matplotlib.pyplot as plt
import gzip
import json
import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Linear
from paddle.fluid.dygraph.nn import Conv2D,Pool2D
from tb_paddle import SummaryWriter
from PIL import Image
import os
'''
此项目主要是在网络结构层面上优化模型
1.经典的全链接神经网络
'''
#解压缩
mnistdata = gzip.open('data/mnist.json.gz')
#通过json导入 因为此数据使用json形式保存的,所以需要json导入 也可以使用pandas 数据导入方式
data = json.load(mnistdata)
#分别获取训练集、验证集和测试集数据
train_data,val_data,test_data = data
#设置数据大小
IMG_ROWS=28
IMG_COLS=28
##数据乱序,生成批次数据
def data_loader(dataname='train',batch_size=20):
#乱序处理方法1
if(dataname=='train'):
img = train_data[0]
label = train_data[1]
elif(dataname=='test'):
img = test_data[0]
label = test_data[1]
elif(dataname=='val'):
img = val_data[0]
label = val_data[1]
else:
raise Exception("data only can be one of ['train','test','val']")
#验证数据有效性
assert len(img)==len(label),'the lenth of img must be the same as the length of label'
list = []
datasize = len(img)
list = [i for i in range(datasize)]
#打乱数据
np.random.shuffle(list)
#获取数据,定义一个数据生成器
def data_genergator():
listdata=[]
listlabel=[]
for i in list:
#转化数据结构
imgdata = np.reshape(img[i],[1,IMG_ROWS,IMG_COLS]).astype('float32')
labeldata = np.reshape(label[i],[1]).astype('int64')
listdata.append(imgdata)
listlabel.append(labeldata)
if(len(listdata)%batch_size==0):
yield np.array(listdata),np.array(listlabel)
listlabel=[]
listdata=[]
if(len(listdata)>0):
yield np.array(listdata),np.array(listlabel)
return data_genergator
#定义类
class MNIST(fluid.dygraph.Layer):
def __init__(self):
super(MNIST, self).__init__()
# self.linear1 = Linear(input_dim=28*28,output_dim=10,act=None)
# self.linear2 = Linear(input_dim=10,output_dim=10,act='sigmoid')
# self.linear3 = Linear(input_dim=10,output_dim=1,act='sigmoid')
self.conv1 = Conv2D(num_channels=1, num_filters=20, filter_size=5, stride=1, padding=2, act='relu')
self.pool1 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
self.conv2 = Conv2D(num_channels=20, num_filters=20, filter_size=5, stride=1, padding=2, act='relu')
self.pool2 = Pool2D(pool_size=2, pool_stride=2, pool_type='max')
self.linear = Linear(input_dim=980, output_dim=10, act='softmax')
def forward(self, inputs,label,check_shape=False,check_content=False):
conv1 = self.conv1(inputs)
pool1 = self.pool1(conv1)
conv2 = self.conv2(pool1)
pool2 = self.pool2(conv2)
pool21 = fluid.layers.reshape(pool2, [pool2.shape[0], -1])
outputs = self.linear(pool21)
# hidden1 = self.linear1(inputs)
# hidden2 = self.linear2(hidden1)
# outputs = self.linear3(hidden2)
if(check_shape):
print("\n------------打印各个层设置的网络超参数的尺寸 -------------")
print("conv1-- kernel_size:{}, padding:{}, stride:{}".format(self.conv1.weight.shape, self.conv1._padding, self.conv1._stride))
print("conv2-- kernel_size:{}, padding:{}, stride:{}".format(self.conv2.weight.shape, self.conv2._padding, self.conv2._stride))
print("pool1-- pool_type:{}, pool_size:{}, pool_stride:{}".format(self.pool1._pool_type, self.pool1._pool_size, self.pool1._pool_stride))
print("pool2-- pool_type:{}, poo2_size:{}, pool_stride:{}".format(self.pool2._pool_type, self.pool2._pool_size, self.pool2._pool_stride))
print("liner-- weight_size:{}, bias_size_{}, activation:{}".format(self.linear.weight.shape, self.linear.bias.shape, self.linear._act))
print("\n------------打印各个层的形状 -------------")
print("inputs_shape: {}".format(inputs.shape))
print("outputs1_shape: {}".format(conv1.shape))
print("outputs2_shape: {}".format(pool1.shape))
print("outputs3_shape: {}".format(conv2.shape))
print("outputs4_shape: {}".format(pool2.shape))
print("outputs5_shape: {}".format(outputs.shape))
if check_content:
# 打印卷积层的参数-卷积核权重,权重参数较多,此处只打印部分参数
print("\n########## print convolution layer's kernel ###############")
print("conv1 params -- kernel weights:", self.conv1.weight[0][0])
print("conv2 params -- kernel weights:", self.conv2.weight[0][0])
# 创建随机数,随机打印某一个通道的输出值
idx1 = np.random.randint(0, conv1.shape[1])
idx2 = np.random.randint(0, conv1.shape[1])
# 打印卷积-池化后的结果,仅打印batch中第一个图像对应的特征
print("\nThe {}th channel of conv1 layer: ".format(idx1), conv1[0][idx1])
print("The {}th channel of conv2 layer: ".format(idx2), conv1[0][idx2])
print("The output of last layer:", conv1[0], '\n')
if label is not None:
acc = fluid.layers.accuracy(input=outputs,label=label)
return outputs,acc
else:
return outputs
#训练
with fluid.dygraph.guard():
model = MNIST()
model.train()
train_loader = data_loader()
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=0.001,parameter_list=model.parameters())
place = fluid.CPUPlace()
traindata_loader = fluid.io.DataLoader.from_generator(capacity=5, return_list=True)
traindata_loader.set_batch_generator(train_loader, places=place)
EPOCH_NUM = 3
#添加日志
data_writer = SummaryWriter(logdir="log/data")
model_save_path="model/mnist-model/dygraph-mnist"
for epoch_id in range(EPOCH_NUM):
for batch_id,data in enumerate(traindata_loader()):
image_data, label_data = data
image = fluid.dygraph.to_variable(image_data)
label = fluid.dygraph.to_variable(label_data)
if batch_id==1000:
predict,acc = model(image,label,check_shape=False,check_content=False)
else:
predict,acc = model(image,label)
# loss = fluid.layers.square_error_cost(predict,label)
loss = fluid.layers.cross_entropy(predict,label)
avg_loss = fluid.layers.mean(loss)
if batch_id !=0 and batch_id %100 ==0:
data_writer.add_scalar("train/loss",avg_loss.numpy(),batch_id)
data_writer.add_scalar("train/accuracy",acc.numpy(),batch_id)
print("epoch:{},batch:{},loss is:{},acc is :{}".format(epoch_id,batch_id,avg_loss.numpy(),acc.numpy()))
avg_loss.backward()
optimizer.minimize(avg_loss)
model.clear_gradients()
print("保存模型")
fluid.save_dygraph(model.state_dict(), model_save_path+""+str(epoch_id))
fluid.save_dygraph(optimizer.state_dict(),model_save_path+""+str(epoch_id))
#再训练
print("接着训练")
with fluid.dygraph.guard():
model = MNIST()
model_save_path="model/mnist-model/dygraph-mnist"
params_dict, opt_dict = fluid.load_dygraph(model_save_path+"0")
model.load_dict(params_dict)
train_loader = data_loader()
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=0.001,parameter_list=model.parameters())
optimizer.set_dict(opt_dict)
place = fluid.CPUPlace()
traindata_loader = fluid.io.DataLoader.from_generator(capacity=5, return_list=True)
traindata_loader.set_batch_generator(train_loader, places=place)
EPOCH_NUM = 3
#添加日志
for epoch_id in range(1,EPOCH_NUM):
for batch_id,data in enumerate(traindata_loader()):
image_data, label_data = data
image = fluid.dygraph.to_variable(image_data)
label = fluid.dygraph.to_variable(label_data)
predict,acc = model(image,label)
# loss = fluid.layers.square_error_cost(predict,label)
loss = fluid.layers.cross_entropy(predict,label)
avg_loss = fluid.layers.mean(loss)
if batch_id !=0 and batch_id %100 ==0:
print("epoch:{},batch:{},loss is:{},acc is :{}".format(epoch_id,batch_id,avg_loss.numpy(),acc.numpy()))
avg_loss.backward()
optimizer.minimize(avg_loss)
model.clear_gradients()
总结
截至到这篇博客,整个基础系列就总结结束了。这些资源都是百度AI Studio提供的免费课程,全程听完,实践后,真的是收获很多。也很感谢制作这些课程的工作人员,也同样希望,这一系列基础课程能够给大家带来一些帮助。