【深度学习】Dropout实现(pyTorch)
程序员文章站
2022-07-13 10:08:20
...
3.13 Dropout解决过拟合问题
3.13.1 方法
本节中提到的丢弃法特指 倒置丢弃法(inverted dropout)
设丢弃概率为p,那么有p的概率hi会被清零,有1−p的概率hi会除以1−p做拉伸以保证不改变下层输入的期望值。
在测试模型时,我们为了拿到更加确定性的结果,一般不使用丢弃法。
3.13.2 从零开始实现
%matplotlib inline
import torch
import torch.nn as nn
import numpy as np
import sys
sys.path.append('..')
import d2lzh_pytorch as d2l
def dropout(X, drop_prob):
X=X.float()
assert 0 <= drop_prob <= 1
keep_prob = 1-drop_prob
if keep_prob ==0:
return torch.zeros_like(X)
mask = (torch.randn(X.shape) < keep_prob).float()
return mask * X / keep_prob
X = torch.arange(16).view(2,8)
dropout(X, 0)
tensor([[ 0., 1., 0., 3., 4., 5., 6., 7.],
[ 8., 0., 10., 11., 12., 13., 14., 15.]])
dropout(X, 0.5)
tensor([[ 0., 2., 4., 0., 0., 10., 12., 0.],
[ 0., 0., 0., 22., 24., 26., 28., 30.]])
dropout(X, 1)
tensor([[0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0.]])
#定义模型参数
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256
W1 = torch.tensor(np.random.normal(0, 0.01, size=(num_inputs, num_hiddens1)), dtype=torch.float, requires_grad=True)
b1 = torch.zeros(num_hiddens1, requires_grad=True)
W2 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens1, num_hiddens2)), dtype=torch.float, requires_grad=True)
b2 = torch.zeros(num_hiddens2, requires_grad=True)
W3 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens2, num_outputs)), dtype=torch.float, requires_grad=True)
b3 = torch.zeros(num_outputs, requires_grad=True)
params = [W1, b1, W2, b2, W3, b3]
#定义模型
drop_prob1, drop_prob2 = 0.2,0.5
def net(X, is_training=True):
X=X.view(-1, num_inputs)
H1 = (torch.matmul(X, W1)+b1).relu()
if is_training:
H1=dropout(H1, drop_prob1)
H2 = (torch.matmul(H1, W2)+b2).relu()
if is_training:
H2=dropout(H2, drop_prob2)
return torch.matmul(H2, W3)+b3
num_epochs, lr, batch_size= 5, 100.0, 256
loss = torch.nn.CrossEntropyLoss()
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
params=params, lr=lr, optimizer=None)
epoch 1, loss 0.0042, train acc 0.583, test acc 0.780
epoch 2, loss 0.0022, train acc 0.796, test acc 0.790
epoch 3, loss 0.0018, train acc 0.826, test acc 0.821
epoch 4, loss 0.0017, train acc 0.844, test acc 0.811
epoch 5, loss 0.0016, train acc 0.853, test acc 0.826
#简单实现
net = nn.Sequential(
d2l.FlattenLayer(),
nn.Linear(num_inputs, num_hiddens1),
nn.ReLU(),
nn.Dropout(drop_prob1),
nn.Linear(num_hiddens1, num_hiddens2),
nn.ReLU(),
nn.Dropout(drop_prob2),
nn.Linear(num_hiddens2, num_outputs)
)
for param in net.parameters():
nn.init.normal_(param, mean=0,std=0.01)
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
None, None,optimizer=optimizer)
epoch 1, loss 0.0047, train acc 0.538, test acc 0.726
epoch 2, loss 0.0023, train acc 0.782, test acc 0.806
epoch 3, loss 0.0019, train acc 0.820, test acc 0.813
epoch 4, loss 0.0018, train acc 0.838, test acc 0.827
epoch 5, loss 0.0017, train acc 0.845, test acc 0.835
Hints:
我们可以通过使用丢弃法应对过拟合。要记住,丢弃法只在训练模型时使用。
欢迎关注【OAOA】
上一篇: 深度解析Kafka中CopyOnWriteMap实现原理
下一篇: 神经网络中Dropout的理解