CGAN_FULL_CODE
CGAN_FULL_CODE
CGAN_FULL_CODE
You will be started you off with a pre-trained generator and classifier, so that
you can focus on the controllability aspects. However, in case you would like to
train your own classifier, the code for that has been provided as well.
import torch
from torch import nn
from tqdm.auto import tqdm
from torchvision import transforms
from torchvision.utils import make_grid
from torchvision.datasets import CelebA
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
torch.manual_seed(0) # Set for our testing purposes, please do not change!
class Generator(nn.Module):
'''
Generator Class
Values:
z_dim: the dimension of the noise vector, a scalar
im_chan: the number of channels in the images, fitted for the dataset used,
a scalar
(CelebA is rgb, so 3 is our default)
hidden_dim: the inner dimension, a scalar
'''
def __init__(self, z_dim=10, im_chan=3, hidden_dim=64):
super(Generator, self).__init__()
self.z_dim = z_dim
# Build the neural network
self.gen = nn.Sequential(
self.make_gen_block(z_dim, hidden_dim * 8),
self.make_gen_block(hidden_dim * 8, hidden_dim * 4),
self.make_gen_block(hidden_dim * 4, hidden_dim * 2),
self.make_gen_block(hidden_dim * 2, hidden_dim),
self.make_gen_block(hidden_dim, im_chan, kernel_size=4,
final_layer=True),
)
def make_gen_block(self, input_channels, output_channels, kernel_size=3,
stride=2, final_layer=False):
'''
Function to return a sequence of operations corresponding to a generator
block of DCGAN;
a transposed convolution, a batchnorm (except in the final layer), and an
activation.
Parameters:
input_channels: how many channels the input feature representation has
output_channels: how many channels the output feature representation
should have
kernel_size: the size of each convolutional filter, equivalent to
(kernel_size, kernel_size)
stride: the stride of the convolution
final_layer: a boolean, true if it is the final layer and false
otherwise
(affects activation and batchnorm)
'''
if not final_layer:
return nn.Sequential(
nn.ConvTranspose2d(input_channels, output_channels, kernel_size,
stride),
nn.BatchNorm2d(output_channels),
nn.ReLU(inplace=True),
)
else:
return nn.Sequential(
nn.ConvTranspose2d(input_channels, output_channels, kernel_size,
stride),
nn.Tanh(),
)
class Classifier(nn.Module):
'''
Classifier Class
Values:
im_chan: the number of channels in the images, fitted for the dataset used,
a scalar
(CelebA is rgb, so 3 is our default)
n_classes: the total number of classes in the dataset, an integer scalar
hidden_dim: the inner dimension, a scalar
'''
def __init__(self, im_chan=3, n_classes=2, hidden_dim=64):
super(Classifier, self).__init__()
self.classifier = nn.Sequential(
self.make_classifier_block(im_chan, hidden_dim),
self.make_classifier_block(hidden_dim, hidden_dim * 2),
self.make_classifier_block(hidden_dim * 2, hidden_dim * 4, stride=3),
self.make_classifier_block(hidden_dim * 4, n_classes,
final_layer=True),
)
z_dim = 64
batch_size = 128
device = 'cuda'
## Train a Classifier (Optional)
You're welcome to train your own classifier with this code, but you are provided
with a pretrained one later in the code. Feel free to skip this code block, and if
you do want to train your own classifier, it is recommended that you initially go
through the assignment with the provided classifier!
def train_classifier(filename):
import seaborn as sns
import matplotlib.pyplot as plt
# You can run this code to train your own classifier, but there is a provided
pretrained one.
# If you'd like to use this, just run "train_classifier(filename)"
# to train and save a classifier on the label indices to that filename.
# Target all the classes, so that's how many the classifier will learn
label_indices = range(40)
n_epochs = 3
display_step = 500
lr = 0.001
beta_1 = 0.5
beta_2 = 0.999
image_size = 64
transform = transforms.Compose([
transforms.Resize(image_size),
transforms.CenterCrop(image_size),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
dataloader = DataLoader(
CelebA(".", split='train', download=True, transform=transform),
batch_size=batch_size,
shuffle=True)
classifier = Classifier(n_classes=len(label_indices)).to(device)
class_opt = torch.optim.Adam(classifier.parameters(), lr=lr, betas=(beta_1,
beta_2))
criterion = nn.BCEWithLogitsLoss()
cur_step = 0
classifier_losses = []
# classifier_val_losses = []
for epoch in range(n_epochs):
# Dataloader returns the batches
for real, labels in tqdm(dataloader):
real = real.to(device)
labels = labels[:, label_indices].to(device).float()
class_opt.zero_grad()
class_pred = classifier(real)
class_loss = criterion(class_pred, labels)
class_loss.backward() # Calculate the gradients
class_opt.step() # Update the weights
classifier_losses += [class_loss.item()] # Keep track of the average
classifier loss
## Visualization code ##
if cur_step % display_step == 0 and cur_step > 0:
class_mean = sum(classifier_losses[-display_step:]) / display_step
print(f"Epoch {epoch}, step {cur_step}: Classifier loss:
{class_mean}")
step_bins = 20
x_axis = sorted([i * step_bins for i in
range(len(classifier_losses) // step_bins)] * step_bins)
sns.lineplot(x_axis, classifier_losses[:len(x_axis)],
label="Classifier Loss")
plt.legend()
plt.show()
torch.save({"classifier": classifier.state_dict()}, filename)
cur_step += 1
# Uncomment the last line to train your own classfier - this line will not work in
Coursera.
# If you'd like to do this, you'll have to download it and run it, ideally using a
GPU
train_classifier("trained_classifier.pth")
You will then load the pretrained generator and classifier using the following
code. (If you trained your own classifier, you can load that one here instead.)
import torch
gen = Generator(z_dim).to(device)
gen_dict = torch.load("trained_gen.pth", map_location=torch.device(device))["gen"]
gen.load_state_dict(gen_dict)
gen.eval()
n_classes = 40
classifier = Classifier(n_classes=n_classes).to(device)
class_dict = torch.load("trained_classifier.pth",
map_location=torch.device(device))["classifier"]
classifier.load_state_dict(class_dict)
classifier.eval()
print("Loaded the models!")
Given the noise with its gradient already calculated through the classifier, you
want to return the new noise vector.
<details>
1. Remember the equation for gradient ascent: `new = old + (∇ old * weight)`.
# UNIT TEST
# Check that the basic function works
opt.zero_grad()
noise = torch.ones(20, 20) * 2
noise.requires_grad_()
fake_classes = (noise ** 2).mean()
fake_classes.backward()
new_noise = calculate_updated_noise(noise, 0.1)
assert type(new_noise) == torch.Tensor
assert tuple(new_noise.shape) == (20, 20)
assert new_noise.max() == 2.0010
assert new_noise.min() == 2.0010
assert torch.isclose(new_noise.sum(), torch.tensor(0.4) + 20 * 20 * 2)
print("Success!")
The list you have here are the features labeled in CelebA, which you used to train
your classifier. If you wanted to control another feature, you would need to get
data that is labeled with that feature and train a classifier on that feature.
Here, you'll have to implement the score function: the higher, the better. The
score is calculated by adding the target score and a penalty -- note that the
penalty is meant to lower the score, so it should have a negative value.
For every non-target class, take the difference between the current noise and the
old noise. The greater this value is, the more features outside the target have
changed. You will calculate the magnitude of the change, take the mean, and negate
it. Finally, add this penalty to the target score. The target score is the mean of
the target class in the current noise.
# UNIT TEST
assert torch.isclose(
get_score(torch.ones(4, 3), torch.zeros(4, 3), [0], [1, 2], 0.2),
1 - torch.sqrt(torch.tensor(2.)) * 0.2
)
rows = 10
current_class = torch.tensor([[1] * rows, [2] * rows, [3] * rows, [4] *
rows]).T.float()
original_class = torch.tensor([[1] * rows, [2] * rows, [3] * rows, [4] *
rows]).T.float()
# Must be 3
assert get_score(current_class, original_class, [1, 3] , [0, 2], 0.2).item() == 3
print("Success!")
In the following block of code, you will run the gradient ascent with this new
score function. You might notice a few things after running it:
1. It may fail more often at producing the target feature when compared to the
original approach. This suggests that the model may not be able to generate an
image that has the target feature without changing the other features. This makes
sense! For example, it may not be able to generate a face that's smiling but whose
mouth is NOT slightly open. This may also expose a limitation of the generator.
Alternatively, even if the generator can produce an image with the intended
features, it might require many intermediate changes to get there and may get stuck
in a local minimum.
2. This process may change features which the classifier was not trained to
recognize since there is no way to penalize them with this method. Whether it's
possible to train models to avoid changing unsupervised features is an open
question.
fake_image_history = []
### Change me! ###
target_indices = feature_names.index("Smiling") # Feel free to change this value to
any string from feature_names from earlier!
other_indices = [cur_idx != target_indices for cur_idx, _ in
enumerate(feature_names)]
noise = get_noise(n_images, z_dim).to(device).requires_grad_()
original_classifications = classifier(gen(noise)).detach()
for i in range(grad_steps):
opt.zero_grad()
fake = gen(noise)
fake_image_history += [fake]
fake_score = get_score(
classifier(fake),
original_classifications,
target_indices,
other_indices,
penalty_weight=0.1
)
fake_score.backward()
noise.data = calculate_updated_noise(noise, 1 / grad_steps)