λ²μ: λ°κ±΄μ
μμ ν κ·Έλνλ₯Ό ν΅ν΄ μμ νλ₯Ό λ λ² μ€ννλ κ²μ κ°λμ© μ μ©ν κ²½μ°κ° μμ΅λλ€. μλ₯Ό λ€μ΄ κ³ μ°¨ λ―ΈλΆμ κ³μ°ν λμ λλ€. κ·Έλ¬λ μ΄μ€ μμ νλ₯Ό μ§μνλ €λ©΄ autogradμ λν μ΄ν΄μ μΈμ¬ν μ£Όμκ° νμν©λλ€. λ¨μΌ μμ νλ₯Ό μ§μνλ€κ³ λ°λμ μ΄μ€ μμ νλ₯Ό μ§μνλ κ²μ μλλλ€. μ΄ νν 리μΌμμλ μ΄λ»κ² μ¬μ©μ μ μ ν¨μλ‘ μ΄μ€ μμ νλ₯Ό μ§μνλμ§ μλ €μ£Όκ³ μ£Όμν΄μΌ ν μ λ€μ μλ΄ν©λλ€.
μ΄μ€ μμ νλ₯Ό μ¬μ©νλ μ¬μ©μ μ μ autograd ν¨μλ₯Ό μ¬μ©ν λ, ν¨μ λ΄μμ μ΄λ»κ² λμνλμ§ μΈμ κ³μ° κ²°κ³Όκ° κΈ°λ‘λκ³ μΈμ κΈ°λ‘λμ§ μλμ§ μ΄ν΄νλ κ²μ΄ μ€μν©λλ€. νΉν μ 체 κ³Όμ μμ save_for_backward κ° μ΄λ»κ² λμνλμ§ μλ κ²μ΄ κ°μ₯ μ€μν©λλ€.
μ¬μ©μ μ μ ν¨μλ μ묡μ μΌλ‘ grad λͺ¨λμ λ κ°μ§ λ°©μμΌλ‘ μν₯μ μ€λλ€.
- μμ νλ₯Ό μ§ννλ λμ autogradλ μμ ν ν¨μμμμ λμνλ μ΄λ€ μ°μ°λ κ·Έλνμ κΈ°λ‘νμ§ μμ΅λλ€. μμ νκ° λλκ³ μ¬μ©μ μ μ ν¨μμ μμ νλ μμ νμ κ²°κ³Όμ grad_fn μ΄ λ©λλ€.
- μμ νκ° μ§νλλ λμ create_graphκ° μ§μ λμ΄ μλ€λ©΄ autogradλ μμ νμ μ°μ°μ κ·Έλνμ κΈ°λ‘ν©λλ€.
λ€μμΌλ‘, save_for_backward κ° μμ λ΄μ©κ³Ό μ΄λ»κ² μνΈμμ©νλμ§ μ΄ν΄νκΈ° μν΄μ, λͺ κ°μ§ μμλ₯Ό μ΄ν΄λ³΄κ² μ΅λλ€.
κ°λ¨ν μ κ³± ν¨μλ₯Ό μκ°ν΄ λ³΄κ² μ΅λλ€. μ΄ ν¨μλ μμ νλ₯Ό μν΄μ μ λ ₯ ν μλ₯Ό μ μ₯ν©λλ€. μμ ν κ³Όμ μ autogradκ° κΈ°λ‘ν μ μλ€λ©΄ μ΄μ€ μμ νλ μλμΌλ‘ λμν©λλ€. λ°λΌμ μμ νλ₯Ό μν΄ μ λ ₯μ μ μ₯ν λλ μΌλ°μ μΌλ‘ κ±±μ ν νμκ° μμ΅λλ€. μ λ ₯μ΄ gradλ₯Ό μꡬνλ ν μλΆν° κ³μ°λ ν¨μλΌλ©΄ grad_fnμ κ°μ§κ³ μκ³ μ΄λ₯Ό ν΅ν΄μ λ³νλκ° μ¬λ°λ₯΄κ² μ νλκΈ° λλ¬Έμ λλ€.
import torch
class Square(torch.autograd.Function):
@staticmethod
def forward(ctx, x):
# Because we are saving one of the inputs use `save_for_backward`
# Save non-tensors and non-inputs/non-outputs directly on ctx
ctx.save_for_backward(x)
return x**2
@staticmethod
def backward(ctx, grad_out):
# A function support double backward automatically if autograd
# is able to record the computations performed in backward
x, = ctx.saved_tensors
return grad_out * 2 * x
# Use double precision because finite differencing method magnifies errors
x = torch.rand(3, 3, requires_grad=True, dtype=torch.double)
torch.autograd.gradcheck(Square.apply, x)
# Use gradcheck to verify second-order derivatives
torch.autograd.gradgradcheck(Square.apply, x)torchvizλ‘ κ·Έλνλ₯Ό μκ°νν΄μ μλμ리λ₯Ό νμΈν μ μμ΅λλ€.
import torchviz
x = torch.tensor(1., requires_grad=True).clone()
out = Square.apply(x)
grad_x, = torch.autograd.grad(out, x, create_graph=True)
torchviz.make_dot((grad_x, x, out), {"grad_x": grad_x, "x": x, "out": out})xμ λν λ³νλκ° κ·Έ μμ²΄λ‘ xμ ν¨μλΌλ κ²μ νμΈν μ μμ΅λλ€(dout/dx = 2x). μ΄ ν¨μμ λν κ·Έλνλ μ λλ‘ μμ±λμμ΅λλ€.
μ΄μ μμ λ₯Ό μ‘°κΈ λ³ννλ©΄ μ λ ₯λμ μΆλ ₯μ μ μ₯ν μ μμ΅λλ€. μΆλ ₯λ grad_fnκ³Ό μ°κ²°λκΈ°μ λ°©μμ λΉμ·ν©λλ€.
class Exp(torch.autograd.Function):
# Simple case where everything goes well
@staticmethod
def forward(ctx, x):
# This time we save the output
result = torch.exp(x)
# Note that we should use `save_for_backward` here when
# the tensor saved is an ouptut (or an input).
ctx.save_for_backward(result)
return result
@staticmethod
def backward(ctx, grad_out):
result, = ctx.saved_tensors
return result * grad_out
x = torch.tensor(1., requires_grad=True, dtype=torch.double).clone()
# Validate our gradients using gradcheck
torch.autograd.gradcheck(Exp.apply, x)
torch.autograd.gradgradcheck(Exp.apply, x)torchvizλ‘ κ·Έλν μκ°ννκΈ°.
out = Exp.apply(x)
grad_x, = torch.autograd.grad(out, x, create_graph=True)
torchviz.make_dot((grad_x, x, out), {"grad_x": grad_x, "x": x, "out": out})μ€κ° κ²°κ³Όλ₯Ό μ μ₯νλ κ²μ μ’ λ μ΄λ ΅μ΅λλ€. λ€μμ ꡬννμ¬ λ³΄μ¬λλ¦¬κ² μ΅λλ€.
sinh(x) := \frac{e^x - e^{-x}}{2}
sinhμ λν¨μλ coshμ΄λ―λ‘, μμ νμ μ€κ° κ²°κ³ΌμΈ exp(x) μ exp(-x) λ₯Ό μμ ν κ³μ°μ μ¬μ¬μ©νλ©΄ ν¨μ¨μ μ λλ€.
μ€κ° κ²°κ³Όλ₯Ό μ§μ μ μ₯νμ¬ μμ νμ μ¬μ©νλ©΄ μ λ©λλ€. μμ νκ° no-grad λͺ¨λμμ μ€νλκΈ° λλ¬Έμ, λ§μ½ μμ νμ μ€κ° κ²°κ³Όκ° μμ νμμ λ³νλλ₯Ό κ³μ°νλ λ° μ¬μ©λλ©΄ λ³νλμ μμ ν κ·Έλνμ μ€κ° κ²°κ³Όλ₯Ό κ³μ°ν μ°μ°λ€μ΄ ν¬ν¨λμ§ μμ΅λλ€. κ²°κ³Όμ μΌλ‘ λ³νλκ° λΆμ νν΄μ§λλ€.
class Sinh(torch.autograd.Function):
@staticmethod
def forward(ctx, x):
expx = torch.exp(x)
expnegx = torch.exp(-x)
ctx.save_for_backward(expx, expnegx)
# In order to be able to save the intermediate results, a trick is to
# include them as our outputs, so that the backward graph is constructed
return (expx - expnegx) / 2, expx, expnegx
@staticmethod
def backward(ctx, grad_out, _grad_out_exp, _grad_out_negexp):
expx, expnegx = ctx.saved_tensors
grad_input = grad_out * (expx + expnegx) / 2
# We cannot skip accumulating these even though we won't use the outputs
# directly. They will be used later in the second backward.
grad_input += _grad_out_exp * expx
grad_input -= _grad_out_negexp * expnegx
return grad_input
def sinh(x):
# Create a wrapper that only returns the first output
return Sinh.apply(x)[0]
x = torch.rand(3, 3, requires_grad=True, dtype=torch.double)
torch.autograd.gradcheck(sinh, x)
torch.autograd.gradgradcheck(sinh, x)torchvizλ‘ κ·Έλν μκ°ννκΈ°.
out = sinh(x)
grad_x, = torch.autograd.grad(out.sum(), x, create_graph=True)
torchviz.make_dot((grad_x, x, out), params={"grad_x": grad_x, "x": x, "out": out})μ€κ° κ²°κ³Όλ₯Ό μΆλ ₯μΌλ‘ λ°ννμ§ μμΌλ©΄ μ΄λ€ μΌμ΄ λ°μνλμ§ μ΄ν΄λ³΄κ² μ΅λλ€. grad_x λ μμ ν κ·Έλνλ₯Ό μμ κ°μ§ λͺ»ν©λλ€. μ΄κ²μ grad_x κ° μ€μ§ gradλ₯Ό νμλ‘ νμ§ μλ exp μ expnegx μ ν¨μμ΄κΈ° λλ¬Έμ λλ€.
class SinhBad(torch.autograd.Function):
# This is an example of what NOT to do!
@staticmethod
def forward(ctx, x):
expx = torch.exp(x)
expnegx = torch.exp(-x)
ctx.expx = expx
ctx.expnegx = expnegx
return (expx - expnegx) / 2
@staticmethod
def backward(ctx, grad_out):
expx = ctx.expx
expnegx = ctx.expnegx
grad_input = grad_out * (expx + expnegx) / 2
return grad_inputtorchvizλ‘ κ·Έλν μκ°ννκΈ°. grad_x κ° κ·Έλνμ ν¬ν¨λμ§ μλ κ²μ νμΈνμΈμ!
out = SinhBad.apply(x)
grad_x, = torch.autograd.grad(out.sum(), x, create_graph=True)
torchviz.make_dot((grad_x, x, out), params={"grad_x": grad_x, "x": x, "out": out})λ§μ§λ§μΌλ‘ autogradκ° ν¨μμ μμ νμ λν λ³νλλ₯Ό μΆμ ν μ μλ μν©μ μ΄ν΄λ³΄κ² μ΅λλ€. cube_backwardκ° SciPyλ NumPy κ°μ μΈλΆ λΌμ΄λΈλ¬λ¦¬λ₯Ό μ¬μ©νκ±°λ C++λ‘ κ΅¬νλμλ€κ³ κ°μ ν΄ λ³΄κ² μ΅λλ€. μ΄λ° κ²½μ°λ CubeBackwardλΌλ λ λ€λ₯Έ μ¬μ©μ μ μ ν¨μλ₯Ό μμ±νμ¬ cube_backwardμ μμ νλ μλμΌλ‘ μ§μ νλ κ²μ λλ€!
def cube_forward(x):
return x**3
def cube_backward(grad_out, x):
return grad_out * 3 * x**2
def cube_backward_backward(grad_out, sav_grad_out, x):
return grad_out * sav_grad_out * 6 * x
def cube_backward_backward_grad_out(grad_out, x):
return grad_out * 3 * x**2
class Cube(torch.autograd.Function):
@staticmethod
def forward(ctx, x):
ctx.save_for_backward(x)
return cube_forward(x)
@staticmethod
def backward(ctx, grad_out):
x, = ctx.saved_tensors
return CubeBackward.apply(grad_out, x)
class CubeBackward(torch.autograd.Function):
@staticmethod
def forward(ctx, grad_out, x):
ctx.save_for_backward(x, grad_out)
return cube_backward(grad_out, x)
@staticmethod
def backward(ctx, grad_out):
x, sav_grad_out = ctx.saved_tensors
dx = cube_backward_backward(grad_out, sav_grad_out, x)
dgrad_out = cube_backward_backward_grad_out(grad_out, x)
return dgrad_out, dx
x = torch.tensor(2., requires_grad=True, dtype=torch.double)
torch.autograd.gradcheck(Cube.apply, x)
torch.autograd.gradgradcheck(Cube.apply, x)torchvizλ‘ κ·Έλν μκ°ννκΈ°.
out = Cube.apply(x)
grad_x, = torch.autograd.grad(out, x, create_graph=True)
torchviz.make_dot((grad_x, x, out), params={"grad_x": grad_x, "x": x, "out": out})κ²°λ‘ μ μΌλ‘ μ¬μ©μ μ μ ν¨μμ μ΄μ€ μμ ν μλ μ¬λΆλ autogradκ° μμ ν κ³Όμ μ μΆμ ν μ μλλμ λ¬λ € μμ΅λλ€. μ²μ λ μμ μμλ μ΄μ€ μμ νκ° μλμΌλ‘ λμνλ κ²½μ°λ₯Ό 보μ¬μ£Όμκ³ , μΈ λ²μ§Έμ λ€ λ²μ§Έ μμ λ μΆμ λμ§ μλ μμ ν ν¨μλ₯Ό μΆμ κ°λ₯νκ² λ§λλ λ°©λ²μ μ€λͺ νμ΅λλ€.




