Realization of two-layer neural network by Torch
import torch
N, D_in, H, D_out = 64, 1000, 100, 10
# create train date of random
x = torch.randn(N, D_in).to("cuda:0")
y = torch.randn(N, D_out).to("cuda:0")
w1 = torch.randn(D_in, H).to("cuda:0")
w2 = torch.randn(H, D_out).to("cuda:0")
learning_rate = 1e-6
for it in range(5000):
# Forward pass
h = x.mm(w1) # N * H
h_relu = h.clamp(min=0) # N * H
y_pred = h_relu.mm(w2) # N * D_out
# compute loss
loss = (y_pred - y).pow(2).sum().item()
print(it, loss)
# backward pass
# compute the gradient
grad_y_pred = 2.0 * (y_pred - y)
grad_w2 = h_relu.t().mm(grad_y_pred)
grad_h_relu = grad_y_pred.mm(w2.T)
grad_h = grad_h_relu.clone()
grad_h[h<0] = 0
grad_w1 = x.t().mm(grad_h)
# update weights of w1 and w2
w1 -= learning_rate * grad_w1
w2 -= learning_rate * grad_w2
Gradient descent with Torch
import torch
import numpy as np
N, D_in, H, D_out = 64, 1000, 100, 10
# create train date of random
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
w1 = torch.randn(D_in, H, requires_grad=True)
w2 = torch.randn(H, D_out, requires_grad=True)
learning_rate = 1e-6
for it in range(5000):
# Forward pass
y_pred = x.mm(w1).clamp(min=0).mm(w2)
# compute loss
loss = (y_pred - y).pow(2).sum() # computation graph
print(it, loss.item())
# backward pass
loss.backward()
# update weights of w1 and w2
with torch.no_grad():
w1 -= learning_rate * w1.grad
w2 -= learning_rate * w2.grad
w1.grad.zero_()
w2.grad.zero_()
chage optimizer
import torch
N, D_in, H, D_out = 64, 1000, 100, 10
# create train date of random
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
model = torch.nn.Sequential(
torch.nn.Linear(D_in, H),
torch.nn.ReLU(),
torch.nn.Linear(H, D_out)
)
loss_fn = torch.nn.MSELoss(reduction="sum")
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for it in range(5000):
# Forward pass
y_pred = model(x)
# compute loss
loss = loss_fn(y_pred, y) # computation graph
print(it, loss.item())
optimizer.zero_grad()
# backward pass
loss.backward()
# update model parameters
optimizer.step()
import torch
N, D_in, H, D_out = 64, 1000, 100, 10
# create train date of random
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
class TwoLayerNet(torch.nn.Module):
def __init__(self, D_in, H, D_out):
super(TwoLayerNet, self).__init__()
#define the model architecture
self.linear1 = torch.nn.Linear(D_in, H)
self.linear2 = torch.nn.Linear(H, D_out)
def forward(self, x):
y_pred = self.linear2(self.linear1(x).clamp(min=0))
return y_pred
model = TwoLayerNet(D_in, H, D_out)
loss_fn = torch.nn.MSELoss(reduction="sum")
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for it in range(5000):
# Forward pass
y_pred = model(x)
# compute loss
loss = loss_fn(y_pred, y) # computation graph
print(it, loss.item())
optimizer.zero_grad()
# backward pass
loss.backward()
# update model parameters
optimizer.step()
- define input and output
- define model
- define loss function
- define optimizer
- use input and model predict output
- compute loss
- backward pass
- up model parameters