I'm new with deep learning and I was making VGG 16(cifar-100) with pytorch.
I thought every thing was fine but when I test it, the test loss(accuracy) is very bad.
red=test, blue=train, left=loss, right=acc
I think this is overfit problem, so I searched to solve it.
I tried changing epochs, batch size, learning rate, weight decay, image preprocessing, and adding dropout, batch normalization.
Also tried to change the architecture of VGG.
But almost nothing improved the performance(rather the performance was worse)
My code is below and is there any solution to increase the performance? Thank you
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))])
train = torchvision.datasets.CIFAR100(root='CIFAR-100/',
train=True, transform=transform,
download=True)
test = torchvision.datasets.CIFAR100(root='CIFAR-100/',
train=False, transform=transform,
download=True)
train_loader = torch.utils.data.DataLoader(dataset=train, batch_size=256, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test, batch_size=256, shuffle=True)
cuda = torch.device('cuda')
class VGG(nn.Module):
def __init__(self):
super(VGG, self).__init__()
# 16
self.layer_1 = nn.Sequential(nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3,
stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3,
stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer_2 = nn.Sequential(nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3,
stride=1, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3,
stride=1, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
# 8
self.layer_3 = nn.Sequential(nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3,
stride=1, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3,
stride=1, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3,
stride=1, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer_4 = nn.Sequential(nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3,
stride=1, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3,
stride=1, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3,
stride=1, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
# 4
self.layer_5 = nn.Sequential(nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3,
stride=1, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3,
stride=1, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3,
stride=1, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer_6 = nn.Sequential(nn.Flatten(),
nn.Linear(512, 100, bias=True)
)
def forward(self, x):
x = self.layer_1(x)
x = self.layer_2(x)
x = self.layer_3(x)
x = self.layer_4(x)
x = self.layer_5(x)
x = self.layer_6(x)
return x
model = VGG()
model = model.cuda()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001, weight_decay=5e-4)
cost = 0
iterations = []
train_losses = []
test_losses = []
train_acc = []
test_acc = []
for epoch in range(100):
model.train()
correct = 0
for X, Y in train_loader:
X = X.to(cuda)
Y = Y.to(cuda)
optimizer.zero_grad()
hypo = model(X)
cost = loss(hypo, Y)
cost.backward()
optimizer.step()
prediction = hypo.data.max(1)[1]
correct += prediction.eq(Y.data).sum()
model.eval()
correct2 = 0
for data, target in test_loader:
data = data.to(cuda)
target = target.to(cuda)
output = model(data)
cost2 = loss(output, target)
prediction = output.data.max(1)[1]
correct2 += prediction.eq(target.data).sum()
print("Epoch : {:>4} / cost : {:>.9}".format(epoch + 1, cost))
iterations.append(epoch)
train_losses.append(cost.tolist())
test_losses.append(cost2.tolist())
train_acc.append((100*correct/len(train_loader.dataset)).tolist())
test_acc.append((100*correct2/len(test_loader.dataset)).tolist())
# del train_loader
# torch.cuda.empty_cache()
model.eval()
correct = 0
for data, target in test_loader:
data = data.to(cuda)
target = target.to(cuda)
output = model(data)
prediction = output.data.max(1)[1]
correct += prediction.eq(target.data).sum()
print('Test set: Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))
plt.subplot(121)
plt.plot(range(1, len(iterations)+1), train_losses, 'b--')
plt.plot(range(1, len(iterations)+1), test_losses, 'r--')
plt.subplot(122)
plt.plot(range(1, len(iterations)+1), train_acc, 'b-')
plt.plot(range(1, len(iterations)+1), test_acc, 'r-')
plt.title('loss and accuracy')
plt.show()