torch$manual_seed(0)
#> <torch._C.Generator>
device = torch$device('cpu')
# Input (temp, rainfall, humidity)
inputs = np$array(list(list(73, 67, 43),
list(91, 88, 64),
list(87, 134, 58),
list(102, 43, 37),
list(69, 96, 70)), dtype='float32')
# Targets (apples, oranges)
targets = np$array(list(list(56, 70),
list(81, 101),
list(119, 133),
list(22, 37),
list(103, 119)), dtype='float32')
# Convert inputs and targets to tensors
inputs = torch$from_numpy(inputs)
targets = torch$from_numpy(targets)
print(inputs)
#> tensor([[ 73., 67., 43.],
#> [ 91., 88., 64.],
#> [ 87., 134., 58.],
#> [102., 43., 37.],
#> [ 69., 96., 70.]], dtype=torch.float64)
print(targets)
#> tensor([[ 56., 70.],
#> [ 81., 101.],
#> [119., 133.],
#> [ 22., 37.],
#> [103., 119.]], dtype=torch.float64)
# random numbers for weights and biases. Then convert to double()
torch$set_default_dtype(torch$double)
w = torch$randn(2L, 3L, requires_grad=TRUE) #$double()
b = torch$randn(2L, requires_grad=TRUE) #$double()
print(w)
#> tensor([[ 1.5410, -0.2934, -2.1788],
#> [ 0.5684, -1.0845, -1.3986]], requires_grad=True)
print(b)
#> tensor([0.4033, 0.8380], requires_grad=True)
model <- function(x) {
wt <- w$t()
return(torch$add(torch$mm(x, wt), b))
}
# Generate predictions
preds = model(inputs)
print(preds)
#> tensor([[ -0.4516, -90.4691],
#> [ -24.6303, -132.3828],
#> [ -31.2192, -176.1530],
#> [ 64.3523, -39.5645],
#> [ -73.9524, -161.9560]], grad_fn=<AddBackward0>)
# MSE loss
mse = function(t1, t2) {
diff <- torch$sub(t1, t2)
mul <- torch$sum(torch$mul(diff, diff))
return(torch$div(mul, diff$numel()))
}
# Compute loss
loss = mse(preds, targets)
print(loss)
#> tensor(33060.8053, grad_fn=<DivBackward0>)
# 46194
# 33060.8070
# Compute gradients
loss$backward()
# Gradients for weights
print(w)
#> tensor([[ 1.5410, -0.2934, -2.1788],
#> [ 0.5684, -1.0845, -1.3986]], requires_grad=True)
print(w$grad)
#> tensor([[ -6938.4351, -9674.6757, -5744.0206],
#> [-17408.7861, -20595.9333, -12453.4702]])
# Reset the gradients
w$grad$zero_()
#> tensor([[0., 0., 0.],
#> [0., 0., 0.]])
b$grad$zero_()
#> tensor([0., 0.])
print(w$grad)
#> tensor([[0., 0., 0.],
#> [0., 0., 0.]])
print(b$grad)
#> tensor([0., 0.])
# Generate predictions
preds = model(inputs)
print(preds)
#> tensor([[ -0.4516, -90.4691],
#> [ -24.6303, -132.3828],
#> [ -31.2192, -176.1530],
#> [ 64.3523, -39.5645],
#> [ -73.9524, -161.9560]], grad_fn=<AddBackward0>)
# Calculate the loss
loss = mse(preds, targets)
print(loss)
#> tensor(33060.8053, grad_fn=<DivBackward0>)
# Compute gradients
loss$backward()
# print(w); print(b)
print(w$grad)
#> tensor([[ -6938.4351, -9674.6757, -5744.0206],
#> [-17408.7861, -20595.9333, -12453.4702]])
print(b$grad)
#> tensor([ -89.3802, -212.1051])
# Adjust weights and reset gradients
with(torch$no_grad(), {
print(w); print(b) # requires_grad attribute remains
w$data <- torch$sub(w$data, torch$mul(w$grad$data, torch$scalar_tensor(1e-5)))
b$data <- torch$sub(b$data, torch$mul(b$grad$data, torch$scalar_tensor(1e-5)))
print(w$grad$data$zero_())
print(b$grad$data$zero_())
})
#> tensor([[ 1.5410, -0.2934, -2.1788],
#> [ 0.5684, -1.0845, -1.3986]], requires_grad=True)
#> tensor([0.4033, 0.8380], requires_grad=True)
#> tensor([[0., 0., 0.],
#> [0., 0., 0.]])
#> tensor([0., 0.])
print(w)
#> tensor([[ 1.6104, -0.1967, -2.1213],
#> [ 0.7425, -0.8786, -1.2741]], requires_grad=True)
print(b)
#> tensor([0.4042, 0.8401], requires_grad=True)
# Calculate loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)
#> tensor(23432.4894, grad_fn=<DivBackward0>)