【PyTorch】14:Dropout 正则化
程序员文章站
2024-03-15 11:35:23
...
文章目录
在相对较小的数据集上训练的大型神经网络可能会导致过拟合,使模型学习训练数据中的噪声数据,当在新数据(例如测试数据集)上评估模型时,会导致性能不佳。由于过拟合,泛化误差增加。
减少过拟合的一种方法是在数据集上拟合所有可能的不同神经网络模型,并对每个模型的预测求平均。在实践中这是不可行的,可以使用集成少量模型的融合模型来近似。但即使采用整体近似法也存在一个问题,即需要拟合和存储多个模型,如果模型很大,可能是需要很长时间进行训练。
Dropout 是一种正则化方法,它类似并行地训练大量具有不同架构的神经网络。来自论文:《Dropout: A simple way to prevent neural networks from overfitting》
Dropout:随机失活。随机:dropout probability;失活:weight=0。
- 数据尺度变化:测试时,所有权重乘以 1 drop_prob;例如 drop_prob= 0.3,需要乘以1 drop_prob = 0.7。
Dropout 具有使训练过程变得嘈杂的作用,迫使层中的节点上的输出输出连接随机地断开。因为这种断开连接,可以使得网络减少对来自先前层的噪声共同适应的可能性,从而使模型更加健壮。
实例:
# -*- coding:utf-8 -*-
"""
@file name : dropout_regularization.py
# @author : TingsongYu https://github.com/TingsongYu
@date : 2019-10-31
@brief : dropout 使用实验
"""
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.utils.tensorboard import SummaryWriter
n_hidden = 200
max_iter = 2000
disp_interval = 400
lr_init = 0.01
# ============================ step 1/5 数据 ============================
def gen_data(num_data=10, x_range=(-1, 1)):
w = 1.5
train_x = torch.linspace(*x_range, num_data).unsqueeze_(1)
train_y = w*train_x + torch.normal(0, 0.5, size=train_x.size())
test_x = torch.linspace(*x_range, num_data).unsqueeze_(1)
test_y = w*test_x + torch.normal(0, 0.3, size=test_x.size())
return train_x, train_y, test_x, test_y
train_x, train_y, test_x, test_y = gen_data(x_range=(-1, 1))
# ============================ step 2/5 模型 ============================
class MLP(nn.Module):
def __init__(self, neural_num, d_prob=0.5):
super(MLP, self).__init__()
self.linears = nn.Sequential(
nn.Linear(1, neural_num),
nn.ReLU(inplace=True),
nn.Dropout(d_prob),
nn.Linear(neural_num, neural_num),
nn.ReLU(inplace=True),
nn.Dropout(d_prob),
nn.Linear(neural_num, neural_num),
nn.ReLU(inplace=True),
nn.Dropout(d_prob),
nn.Linear(neural_num, 1),
)
def forward(self, x):
return self.linears(x)
net_prob_0 = MLP(neural_num=n_hidden, d_prob=0.)
net_prob_05 = MLP(neural_num=n_hidden, d_prob=0.5)
# ============================ step 3/5 优化器 ============================
optim_normal = torch.optim.SGD(net_prob_0.parameters(), lr=lr_init, momentum=0.9)
optim_reglar = torch.optim.SGD(net_prob_05.parameters(), lr=lr_init, momentum=0.9)
# ============================ step 4/5 损失函数 ============================
loss_func = torch.nn.MSELoss()
# ============================ step 5/5 迭代训练 ============================
writer = SummaryWriter(comment='_test_tensorboard', filename_suffix="12345678")
for epoch in range(max_iter):
pred_normal, pred_wdecay = net_prob_0(train_x), net_prob_05(train_x)
loss_normal, loss_wdecay = loss_func(pred_normal, train_y), loss_func(pred_wdecay, train_y)
optim_normal.zero_grad()
optim_reglar.zero_grad()
loss_normal.backward()
loss_wdecay.backward()
optim_normal.step()
optim_reglar.step()
if (epoch+1) % disp_interval == 0:
net_prob_0.eval()
net_prob_05.eval()
# 可视化
for name, layer in net_prob_0.named_parameters():
writer.add_histogram(name + '_grad_normal', layer.grad, epoch)
writer.add_histogram(name + '_data_normal', layer, epoch)
for name, layer in net_prob_05.named_parameters():
writer.add_histogram(name + '_grad_regularization', layer.grad, epoch)
writer.add_histogram(name + '_data_regularization', layer, epoch)
test_pred_prob_0, test_pred_prob_05 = net_prob_0(test_x), net_prob_05(test_x)
# 绘图
plt.clf()
plt.scatter(train_x.data.numpy(), train_y.data.numpy(), c='blue', s=50, alpha=0.3, label='train')
plt.scatter(test_x.data.numpy(), test_y.data.numpy(), c='red', s=50, alpha=0.3, label='test')
plt.plot(test_x.data.numpy(), test_pred_prob_0.data.numpy(), 'r-', lw=3, label='d_prob_0')
plt.plot(test_x.data.numpy(), test_pred_prob_05.data.numpy(), 'b--', lw=3, label='d_prob_05')
plt.text(-0.25, -1.5, 'd_prob_0 loss={:.8f}'.format(loss_normal.item()), fontdict={'size': 15, 'color': 'red'})
plt.text(-0.25, -2, 'd_prob_05 loss={:.6f}'.format(loss_wdecay.item()), fontdict={'size': 15, 'color': 'red'})
plt.ylim((-2.5, 2.5))
plt.legend(loc='upper left')
plt.title("Epoch: {}".format(epoch+1))
plt.show()
plt.close()
net_prob_0.train()
net_prob_05.train()
Dropout 层:
torch.nn.Dropout(p=0.5, inplace=False)
参数:
-p
:被舍弃概率, 失活概率。
class Net(nn.Module):
def __init__(self, neural_num, d_prob=0.5):
super(Net, self).__init__()
self.linears = nn.Sequential(
nn.Dropout(d_prob),
nn.Linear(neural_num, 1, bias=False),
nn.ReLU(inplace=True)
)
def forward(self, x):
return self.linears(x)
input_num = 10000
x = torch.ones((input_num, ), dtype=torch.float32)
net = Net(input_num, d_prob=0.5)
net.linears[1].weight.detach().fill_(1.)
net.train()
y = net(x)
print("output in training mode", y)
net.eval()
y = net(x)
print("output in eval mode", y)
输出:
output in training mode tensor([10036.], grad_fn=<ReluBackward1>)
output in eval mode tensor([10000.], grad_fn=<ReluBackward1>)
由以上可知,PyTorch中在训练时的数据尺度会自动乘以 ,这样在验证时就无需在变化,从而加快模型训练速度。