Pytorch 自动微分
程序员文章站
2022-07-12 23:03:07
...
-
Tensor.requires_grad = True
记录对Tensor的所有操作,后序.backward()
自动计算所有梯度到.grad
属性
import torch
x = torch.ones(2,2, requires_grad=True) # 默认是False
print(x)
tensor([[1., 1.],
[1., 1.]], requires_grad=True)
- 停止记录调用
.detach()
x.detach_()
print(x.requires_grad) # False
-
.grad_fn
保存了创建张量的 Function 的引用
y = x + 2
print(y)
print(y.grad_fn)
tensor([[3., 3.],
[3., 3.]], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x0000015716529D68>
z = y*y*3
out = z.mean()
print(z, out)
tensor([[27., 27.],
[27., 27.]], grad_fn=<MulBackward0>)
tensor(27., grad_fn=<MeanBackward0>)
# requires_grad 默认为 False
a = torch.randn(2, 2)
a = ((a*3)/(a-1))
print(a.requires_grad) # False
b = (a*a).sum()
print(b.grad_fn) # None
a.requires_grad_(True) # 设置为 True
print(a.requires_grad) # True
b = (a*a).sum()
print(b.grad_fn)
# <SumBackward0 object at 0x0000015717DC69E8>
-
backward()
后向传播
z = y*y*3
y = x+2
计算 d(out)/dx
o
u
t
=
1
4
(
∑
3
(
x
i
+
2
)
2
)
→
d
o
u
t
d
x
i
=
3
2
(
x
i
+
2
)
out = \frac{1}{4}(\sum3(x_i+2)^2) \rightarrow \frac{d_{out}}{dx_i} = \frac{3}{2}(x_i+2)
out=41(∑3(xi+2)2)→dxidout=23(xi+2)
x
i
=
1
,
d
o
u
t
/
d
x
i
=
4.5
x_i = 1, d_{out}/dx_i = 4.5
xi=1,dout/dxi=4.5
out.backward()
print(y.grad) # None, 为什么?是 None
print(x.grad)
tensor([[4.5000, 4.5000],
[4.5000, 4.5000]])
J = ( ∂ y 1 ∂ x 1 ⋯ ∂ y m ∂ x 1 ⋮ ⋱ ⋮ ∂ y 1 ∂ x n ⋯ ∂ y m ∂ x n ) J=\left(\begin{array}{ccc}\frac{\partial y_{1}}{\partial x_{1}} & \cdots & \frac{\partial y_{m}}{\partial x_{1}} \\ \vdots & \ddots & \vdots \\ \frac{\partial y_{1}}{\partial x_{n}} & \cdots & \frac{\partial y_{m}}{\partial x_{n}}\end{array}\right) J=⎝⎜⎛∂x1∂y1⋮∂xn∂y1⋯⋱⋯∂x1∂ym⋮∂xn∂ym⎠⎟⎞
- 当又使用了一个函数
l
=
g
(
y
)
l = g(y)
l=g(y),v 是
l
l
l 对
y
y
y 的导数,链式求导相乘,得到
l
l
l 对
x
x
x 的导数
J ⋅ v = ( ∂ y 1 ∂ x 1 ⋯ ∂ y m ∂ x 1 ⋮ ⋱ ⋮ ∂ y 1 ∂ x n ⋯ ∂ y m ∂ x n ) ( ∂ l ∂ y 1 ⋮ ∂ l ∂ y m ) = ( ∂ l ∂ x 1 ⋮ ∂ l ∂ x n ) J \cdot v=\left(\begin{array}{ccc}\frac{\partial y_{1}}{\partial x_{1}} & \cdots & \frac{\partial y_{m}}{\partial x_{1}} \\ \vdots & \ddots & \vdots \\ \frac{\partial y_{1}}{\partial x_{n}} & \cdots & \frac{\partial y_{m}}{\partial x_{n}}\end{array}\right)\left(\begin{array}{c}\frac{\partial l}{\partial y_{1}} \\ \vdots \\ \frac{\partial l}{\partial y_{m}}\end{array}\right)=\left(\begin{array}{c}\frac{\partial l}{\partial x_{1}} \\ \vdots \\ \frac{\partial l}{\partial x_{n}}\end{array}\right) J⋅v=⎝⎜⎛∂x1∂y1⋮∂xn∂y1⋯⋱⋯∂x1∂ym⋮∂xn∂ym⎠⎟⎞⎝⎜⎛∂y1∂l⋮∂ym∂l⎠⎟⎞=⎝⎜⎛∂x1∂l⋮∂xn∂l⎠⎟⎞
上面代码改为:
v = torch.tensor(2, dtype=torch.float)
out.backward(v)
print(x.grad)
# 梯度乘以了 2
tensor([[9., 9.],
[9., 9.]])
- 评估阶段可以使用
with torch.no_grad():
不需要梯度计算和更新
print(x.requires_grad) # True
print((x ** 2).requires_grad) # True
# 取消梯度记录
with torch.no_grad():
print((x ** 2).requires_grad) # False