vertopal.com_PyTorch_CrashCourse
vertopal.com_PyTorch_CrashCourse
1. Tensors
Everything in PyTorch is based on Tensor operations. A Tensor is a multi-dimensional matrix
containing elements of a single data type:
import torch
# torch.empty(size): uninitiallized
x = torch.empty(1) # scalar
print("empty(1):", x)
x = torch.empty(3) # vector
print("empty(3):",x)
x = torch.empty(2, 3) # matrix
print("empty(2,3):",x)
x = torch.empty(2, 2, 3) # tensor, 3 dimensions
#x = torch.empty(2,2,2,3) # tensor, 4 dimensions
print("empty(2, 2, 3):",x)
empty(1): tensor([3.3631e-44])
empty(3): tensor([-3.1612e-12, 3.0753e-41, 6.4666e-37])
empty(2,3): tensor([[-7.5699e+33, 3.0760e-41, -7.5689e+33],
[ 3.0760e-41, 1.4013e-45, 1.3873e-43]])
empty(2, 2, 3): tensor([[[-3.1650e-12, 3.0753e-41, 0.0000e+00],
[ 1.4013e-45, 0.0000e+00, 0.0000e+00]],
# check type
print(x.dtype)
# requires_grad argument
# This will tell pytorch that it will need to calculate the gradients
for this tensor
# later in your optimization steps
# i.e. this is a variable in your model that you want to optimize
x = torch.tensor([5.5, 3], requires_grad=True)
print(x)
# elementwise addition
z = x + y
# torch.add(x,y)
print(x)
print(y)
print(z)
# subtraction
z = x - y
z = torch.sub(x, y)
# multiplication
z = x * y
z = torch.mul(x,y)
# division
z = x / y
z = torch.div(x,y)
# Slicing
x = torch.rand(5,3)
print(x)
print("x[:, 0]", x[:, 0]) # all rows, column 0
print("x[1, :]", x[1, :]) # row 1, all columns
print("x[1, 1]", x[1,1]) # element at 1, 1
NumPy
Converting a Torch Tensor to a NumPy array and vice versa is very easy
a = torch.ones(5)
print(a)
[1. 1. 1. 1. 1.]
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
GPU Support
By default all tensors are created on the CPU. But we can also move them to the GPU (if it's
available ), or create them directly on the GPU.
2. Autograd
The autograd package provides automatic differentiation for all operations on Tensors.
Generally speaking, torch.autograd is an engine for computing the vector-Jacobian product. It
computes partial derivates while applying the chain rule.
import torch
# requires_grad = True -> tracks all operations on the tensor.
x = torch.randn(3, requires_grad=True)
y = x + 2
# Do more operations on y
z = y * y * 3
print(z)
z = z.mean()
print(z)
print(x.grad)
z.backward()
print(x.grad) # dz/dx
None
tensor([1.8299, 4.6817, 2.3913])
• x.requires_grad_(False)
• x.detach()
• wrap in with torch.no_grad():
# .requires_grad_(...) changes an existing flag in-place.
a = torch.randn(2, 2)
b = (a * a).sum()
print(a.requires_grad)
print(b.grad_fn)
a.requires_grad_(True)
b = (a * a).sum()
print(a.requires_grad)
print(b.grad_fn)
False
None
True
<SumBackward0 object at 0x793f3830efb0>
# .detach(): get a new Tensor with the same content but no gradient
computation:
a = torch.randn(2, 2, requires_grad=True)
b = a.detach()
print(a.requires_grad)
print(b.requires_grad)
True
False
True
False
f ( x )=w∗x+ b
here : f(x) = 2 * x
import torch
# Linear regression
# f = w * x + b
# here : f = 2 * x
X = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8, 10, 12, 14, 16], dtype=torch.float32)
# model output
def forward(x):
return w * x
# loss = MSE
def loss(y, y_pred):
return ((y_pred - y)**2).mean()
X_test = 5.0
# Training
learning_rate = 0.01
n_epochs = 100
# loss
l = loss(Y, y_pred)
# update weights
#w.data = w.data - learning_rate * w.grad
with torch.no_grad():
w -= learning_rate * w.grad
if (epoch+1) % 10 == 0:
print(f'epoch {epoch+1}: w = {w.item():.3f}, loss =
{l.item():.3f}')
# Linear regression
# f = w * x
# here : f = 2 * x
n_samples = 8, n_features = 1
class LinearRegression(nn.Module):
def __init__(self, input_dim, output_dim):
super(LinearRegression, self).__init__()
# define different layers
self.lin = nn.Linear(input_dim, output_dim)
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
# 3) Training loop
for epoch in range(n_epochs):
# predict = forward pass with our model
y_predicted = model(X)
# loss
l = loss(Y, y_predicted)
# update weights
optimizer.step()
if (epoch+1) % 10 == 0:
w, b = model.parameters() # unpack parameters
print('epoch ', epoch+1, ': w = ', w[0][0].item(), ' loss = ',
l.item())
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Hyper-parameters
input_size = 784 # 28x28
hidden_size = 500
num_classes = 10
num_epochs = 2
batch_size = 100
learning_rate = 0.001
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./data',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = torchvision.datasets.MNIST(root='./data',
train=False,
transform=transforms.ToTensor())
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
examples = iter(test_loader)
example_data, example_targets = examples.next()
for i in range(6):
plt.subplot(2,3,i+1)
plt.imshow(example_data[i][0], cmap='gray')
plt.show()
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet, self).__init__()
self.l1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.l2 = nn.Linear(hidden_size, num_classes)
if (i+1) % 100 == 0:
print (f'Epoch [{epoch+1}/{num_epochs}], Step
[{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
outputs = model(images)
• Convolutional Layers
• MaxPooling
• Save/Load model
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Hyper-parameters
num_epochs = 10
batch_size = 32
learning_rate = 0.001
test_dataset = torchvision.datasets.CIFAR10(root='./data',
train=False,
download=True,
transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset,
batch_size=batch_size,
shuffle=False)
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
def imshow(imgs):
imgs = imgs / 2 + 0.5 # unnormalize
npimgs = imgs.numpy()
plt.imshow(np.transpose(npimgs, (1, 2, 0)))
plt.show()
class ConvNet(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 32, 3)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(32, 64, 3)
self.conv3 = nn.Conv2d(64, 64, 3)
self.fc1 = nn.Linear(64*4*4, 64)
self.fc2 = nn.Linear(64, 10)
model = ConvNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
running_loss = 0.0
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
running_loss += loss.item()
print('Finished Training')
PATH = './cnn.pth'
torch.save(model.state_dict(), PATH)
loaded_model = ConvNet()
loaded_model.load_state_dict(torch.load(PATH)) # it takes the loaded
dictionary, not the path file itself
loaded_model.to(device)
loaded_model.eval()
with torch.no_grad():
n_correct = 0
n_correct2 = 0
n_samples = len(test_loader.dataset)
outputs2 = loaded_model(images)
_, predicted2 = torch.max(outputs2, 1)
n_correct2 += (predicted2 == labels).sum().item()