初探PaddlePaddle的静态图与动态图
程序员文章站
2024-01-19 12:32:40
...
最近学习paddle的分割框架,使用了paddle的动态图,自己实现了一遍,发现和pytorch的结构很相似,再把静态图也实现了遍,果然又和tensorflow相似。下面是mnist的两种实现,顺便把paddle的优化器和学习率调整也学习了,paddle支持动态图和静态图的转换,下次再尝试。
静态图:
import paddle
import numpy as np
import paddle.fluid as fluid
epoch_num = 10
BATCH_SIZE = 64
train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True)
test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128, drop_last=False)
class Mnist(fluid.Layer):
def __init__(self):
super(Mnist, self).__init__()
self.x = fluid.data(name="img", shape=[None, 1, 28, 28], dtype="float32")
self.y = fluid.data(name="label", shape=[None, 1], dtype="int64")
def forward(self):
self.conv_bn1 = fluid.layers.batch_norm(fluid.layers.conv2d(input=self.x,num_filters=8, filter_size=(3,3),
stride=2, padding="SAME", act=None),act="leaky_relu")
self.conv_bn2 = fluid.layers.batch_norm(fluid.layers.conv2d(input=self.conv_bn1,num_filters=16, filter_size=(3,3),
stride=2, padding="SAME", act=None),act="leaky_relu")
print(self.conv_bn2.shape) # 7*7
self.conv_bn_pool = fluid.nets.img_conv_group(input=self.conv_bn2, conv_num_filter=(32,32), conv_padding=1, conv_act="leaky_relu",
conv_filter_size=3, conv_with_batchnorm=True, pool_size=3, pool_stride=2)
print(self.conv_bn_pool.shape) # 3*3
self.feat = fluid.layers.reshape(self.conv_bn_pool, shape=(-1, 3*3*32))
self.fc = fluid.layers.fc(input=self.feat, size=10, act="softmax")
# self.output = np.argmax(self.fc, axis=1)
# 在网络内部做argmax,会报超出维度的错误,猜测是网络里self.fc的形状为(-1,10),不能直接在维度上变换
return self.fc
def backward(self):
loss = fluid.layers.cross_entropy(self.fc, self.y)
avg_loss = fluid.layers.mean(loss)
return avg_loss
if __name__ == '__main__':
use_cuda = True
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
net = Mnist()
out = net.forward()
avg_loss = net.backward()
boundaries = [4685, 6246, 7808]
lr_steps = [0.001, 0.0005, 0.00001, 0.00005]
learning_rate = fluid.layers.piecewise_decay(boundaries, lr_steps)
lr = fluid.layers.linear_lr_warmup(learning_rate=learning_rate, warmup_steps=500, start_lr=0.0001, end_lr=0.001)
opt = fluid.optimizer.MomentumOptimizer(learning_rate=lr, momentum=0.9)
opt.minimize(avg_loss) # opt.minimize()不能放在训练中求出loss后面,会报错,tf是放在后面的
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) # 该函数可以获取默认/全局 startup Program (初始化启动程序)。
# default_startup_program 只运行一次来初始化参数, default_main_program 在每个mini batch中运行并更新权重。
main_program = fluid.default_main_program() # 获取当前用于存储op和variable描述信息的 default main program
test_program = fluid.default_main_program().clone(for_test=True)
for epoch in range(epoch_num):
for batch_id, data in enumerate(train_reader()):
x = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32')
y = np.array([x[1] for x in data]).astype('int64').reshape(-1, 1)
_, loss, _lr = exe.run(main_program, feed={"img": x, "label": y}, fetch_list=[out, avg_loss, lr])
if batch_id % 100 == 0:
print("epoch {} step {} lr {} Loss {}".format(epoch, batch_id, _lr, loss))
test_num = 0
rigth_num = 0
for _, data in enumerate(test_reader()):
x = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32')
y = np.array([x[1] for x in data]).astype('int64').reshape(-1, 1)
output, loss = exe.run(test_program, feed={"img": x, "label": y}, fetch_list=[out, avg_loss])
pred = np.argmax(output, axis=1)
label = y.T[0]
rigth_num += (pred == label).sum()
test_num += pred.shape[0]
# print("pred: ", pred[:10])
# print("label:", label[:10])
acc = rigth_num / test_num
print("test_acc:", acc)
print('-' * 40)
动态图
import paddle
import numpy as np
import paddle.fluid as fluid
epoch_num = 10
BATCH_SIZE = 64
train_reader = paddle.batch(paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=60000), batch_size=BATCH_SIZE, drop_last=True)
test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128, drop_last=False)
class Conv_Mnist(fluid.dygraph.Layer):
def __init__(self):
super(Conv_Mnist, self).__init__()
self.conv1 = fluid.dygraph.Conv2D(num_channels=1, num_filters=8, filter_size=(3, 3), stride=2, padding=1)
self.bn1 = fluid.dygraph.BatchNorm(num_channels=8, act="leaky_relu")
self.conv2 = fluid.dygraph.Conv2D(num_channels=8, num_filters=16, filter_size=(3, 3), stride=2, padding=1)
self.bn2 = fluid.dygraph.BatchNorm(num_channels=16, act="leaky_relu")
self.conv3 = fluid.dygraph.Conv2D(num_channels=16, num_filters=32, filter_size=(3, 3), stride=2, padding=1)
self.bn3 = fluid.dygraph.BatchNorm(num_channels=32, act="leaky_relu")
self.fc = fluid.dygraph.Linear(input_dim=4*4*32, output_dim=10, act="softmax")
def forward(self, x):
conv1 = self.conv1(x)
bn1 = self.bn1(conv1)
conv2 = self.conv2(bn1)
bn2 = self.bn2(conv2)
conv3 = self.conv3(bn2)
bn3 = self.bn3(conv3)
bn3 = fluid.layers.reshape(bn3, shape=(-1, 4*4*32))
out = self.fc(bn3)
return out
if __name__ == '__main__':
with fluid.dygraph.guard():
net = Conv_Mnist()
learning_rate = fluid.layers.exponential_decay(learning_rate=0.001, decay_steps=1000, decay_rate=0.8)
lr = fluid.layers.linear_lr_warmup(learning_rate=learning_rate, warmup_steps=500, start_lr=0.0001, end_lr=0.001)
opt = fluid.optimizer.MomentumOptimizer(learning_rate=lr, momentum=0.9, parameter_list=net.parameters())
# nll_loss = fluid.dygraph.NLLLoss() loss为负,且loss和avg_loss一样,fluid.dygraph里貌似没有交叉熵
for epoch in range(epoch_num):
for batch_id, data in enumerate(train_reader()):
x = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32')
y = np.array([x[1] for x in data]).astype('int64').reshape(-1, 1)
img = fluid.dygraph.to_variable(x)
label = fluid.dygraph.to_variable(y)
out = net(img)
loss = fluid.layers.cross_entropy(out, label)
avg_loss = fluid.layers.mean(loss)
_lr = opt.current_step_lr() # 当前步的学习率
avg_loss.backward()
opt.minimize(avg_loss)
net.clear_gradients()
if batch_id % 100 == 0:
print("epoch {} step {} lr {} Loss {}".format(epoch, batch_id, _lr, avg_loss.numpy()))
test_num = 0
rigth_num = 0
for _, data in enumerate(test_reader()):
net.eval()
x = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32')
y = np.array([x[1] for x in data]).astype('int64').reshape(-1, 1)
img = fluid.dygraph.to_variable(x)
label = fluid.dygraph.to_variable(y)
out = net(img)
output = np.argmax(out.numpy(), axis=1)
_label = y.T[0]
rigth_num += (output == _label).sum()
test_num += output.shape[0]
# print("output:", output[:10])
# print("label: ", _label[:10])
acc = rigth_num / test_num
print("test_acc:", acc)
print('-' * 60)
net.train()