引入
AlexNet [ 1 ] \color{red}^{[1]} [1]与Lenet的设计理念很类似,但也有显著的区别 [ 2 ] \color{red}^{[2]} [2]:
1)包含 8 \mathbf{8} 8层变换,其中有 5 5 5层卷积和 2 2 2层全链接隐藏层,以及一个全链接输出层:
1.1)第一层卷积窗口形状为 11 × 11 \mathbf{11 \times 11} 11×11,能够适用于尺寸更大的图像;
1.2)第二层卷积窗口形状为 5 × 5 \mathbf{5 \times 5} 5×5,其他为 3 × 3 3 \times 3 3×3;
1.3)第1、2、五个卷积层以后都使用了窗口形状为 3 × 3 \mathbf{3 \times 3} 3×3、步幅为 2 2 2的最大池化层;
1.4)使用更多的卷积通道数;
1.5)最后一个卷积层是两个输出个数为4096的全链接层。
2)使用更简单的ReLU激活函数:
2.1)ReLU函数计算简单,例如无需求幂运算;
2.2)ReLU函数在不一样的参数初始化方法下,模型都更容易训练。
3)经过丢弃法来控制全链接层的模型复杂度。
4)引入了大量的图像增广,如翻转、裁剪和颜色变化等,从而进一步扩大数据集,并缓解过拟合。
html
1 模型构建
""" @author: Inki @contact: inki.yinji@qq.com @version: Created in 2020 1218, last modified in 2020 1218. """ import time import torch import torchvision from torch import nn, optim from util.SimpleTool import load_data_fashion_mnist class AlexNet(nn.Module): def __init__(self): super(AlexNet, self).__init__() self.conv = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=96, kernel_size=11, stride=4), nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2), nn.ReLU(), nn.MaxPool2d(3, 2), nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2d(3, 2) ) self.fc = nn.Sequential( nn.Linear(256 * 5 * 5, 4096), nn.ReLU(), nn.Dropout(0.5), nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5), nn.Linear(4096, 10), ) def forward(self, img): """ The forward function. """ feature = self.conv(img) output = self.fc(feature.view(img.shape[0], -1)) return output if __name__ == '__main__': temp_net = AlexNet() print(temp_net)
输出以下:python
AlexNet( (conv): Sequential( (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4)) (1): ReLU() (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False) (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)) (4): ReLU() (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False) (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (7): ReLU() (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (9): ReLU() (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (11): ReLU() (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False) ) (fc): Sequential( (0): Linear(in_features=6400, out_features=4096, bias=True) (1): ReLU() (2): Dropout(p=0.5, inplace=False) (3): Linear(in_features=4096, out_features=4096, bias=True) (4): ReLU() (5): Dropout(p=0.5, inplace=False) (6): Linear(in_features=4096, out_features=10, bias=True) ) )
2 读取数据
Alex中使用的ImageNet数据集,可是该训练数据集训练时间较长,这里仍然使用Fashion-MNIST数据集来测试。
读取数据时,额外将图像扩大到AlexNet中的 224 × 224 224 \times 224 224×224:web
if __name__ == '__main__': temp_batch_size = 128 temp_resize = 224 temp_tr_iter, temp_te_iter = load_data_fashion_mnist(temp_batch_size, resize=temp_resize)
3 模型训练
训练函数与Lenet一致:app
def train(net, tr_iter, te_iter, batch_size, optimizer, loss=nn.CrossEntropyLoss(), device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'), num_epochs=100): """ The train function. """ net = net.to(device) temp_batch_count = 0 print("Training on", device) for epoch in range(num_epochs): temp_tr_loss_sum, temp_tr_acc_sum, temp_num, temp_start_time = 0., 0., 0, time.time() for x, y in tr_iter: x = x.to(device) y = y.to(device) temp_y_pred = net(x) temp_loss = loss(temp_y_pred, y) optimizer.zero_grad() temp_loss.backward() optimizer.step() temp_tr_loss_sum += temp_loss.cpu().item() temp_tr_acc_sum += (temp_y_pred.argmax(dim=1) == y).sum().cpu().item() temp_num += y.shape[0] temp_batch_count += 1 test_acc = evaluate_accuracy(te_iter, net) print("Epoch %d, loss %.4f, training acc %.3f, test ass %.3f, time %.1f s" % (epoch + 1, temp_tr_loss_sum / temp_batch_count, temp_tr_acc_sum / temp_num, test_acc, time.time() - temp_start_time)) def evaluate_accuracy(data_iter, net, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): """ The evaluate function, and the performance measure is accuracy. """ ret_acc, temp_num = 0., 0 with torch.no_grad(): for x, y in data_iter: net.eval() # The evaluate mode, and the dropout is closed. ret_acc += (net(x.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item() net.train() temp_num += y.shape[0] return ret_acc / temp_num
内存用了窝8个G,CPU拉满,难顶。。ide
Training on cpu Epoch 1, loss 0.6334, training acc 0.756, test ass 0.846, time 1856.7 s
完整代码
""" @author: Inki @contact: inki.yinji@qq.com @version: Created in 2020 1218, last modified in 2020 1218. """ import time import torch import torchvision from torch import nn, optim from util.SimpleTool import load_data_fashion_mnist class AlexNet(nn.Module): def __init__(self): super(AlexNet, self).__init__() self.conv = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=96, kernel_size=11, stride=4), nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2), nn.ReLU(), nn.MaxPool2d(3, 2), nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2d(3, 2) ) self.fc = nn.Sequential( nn.Linear(256 * 5 * 5, 4096), nn.ReLU(), nn.Dropout(0.5), nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5), nn.Linear(4096, 10), ) def forward(self, img): """ The forward function. """ feature = self.conv(img) output = self.fc(feature.view(img.shape[0], -1)) return output def train(net, tr_iter, te_iter, batch_size, optimizer, loss=nn.CrossEntropyLoss(), device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'), num_epochs=100): """ The train function. """ net = net.to(device) temp_batch_count = 0 print("Training on", device) for epoch in range(num_epochs): temp_tr_loss_sum, temp_tr_acc_sum, temp_num, temp_start_time = 0., 0., 0, time.time() for x, y in tr_iter: x = x.to(device) y = y.to(device) temp_y_pred = net(x) temp_loss = loss(temp_y_pred, y) optimizer.zero_grad() temp_loss.backward() optimizer.step() temp_tr_loss_sum += temp_loss.cpu().item() temp_tr_acc_sum += (temp_y_pred.argmax(dim=1) == y).sum().cpu().item() temp_num += y.shape[0] temp_batch_count += 1 test_acc = evaluate_accuracy(te_iter, net) print("Epoch %d, loss %.4f, training acc %.3f, test ass %.3f, time %.1f s" % (epoch + 1, temp_tr_loss_sum / temp_batch_count, temp_tr_acc_sum / temp_num, test_acc, time.time() - temp_start_time)) def evaluate_accuracy(data_iter, net, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): """ The evaluate function, and the performance measure is accuracy. """ ret_acc, temp_num = 0., 0 with torch.no_grad(): for x, y in data_iter: net.eval() # The evaluate mode, and the dropout is closed. ret_acc += (net(x.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item() net.train() temp_num += y.shape[0] return ret_acc / temp_num if __name__ == '__main__': temp_batch_size = 128 temp_resize = 224 temp_lr = 0.001 temp_num_epochs = 5 temp_tr_iter, temp_te_iter = load_data_fashion_mnist(temp_batch_size, resize=temp_resize) temp_net = AlexNet() temp_optimizer = optim.Adam(temp_net.parameters(), lr=temp_lr) train(temp_net, temp_tr_iter, temp_te_iter, temp_batch_size, temp_optimizer, num_epochs=temp_num_epochs)
支持代码
uitl.SimpleTool
def load_data_fashion_mnist(batch_size=10, root='D:/Data/Datasets/FashionMNIST', resize=None): """ Download the fashion mnist dataset and then load into memory. """ trans = [] if resize: trans.append(transforms.Resize(size=resize)) trans.append(transforms.ToTensor()) transform = transforms.Compose(trans) mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform) mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform) if sys.platform.startswith('win'): num_workers = 0 else: num_workers = cpu_count() train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers) return train_iter, test_iter
注:
[1] Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). Imagenet classification with deep convolutional neural networks. In Advances in neural information processing systems (pp. 1097-1105).
[2] 李沐、Aston Zhang等老师的这本《动手学深度学习》一书。svg
本文同步分享在 博客“因吉”(CSDN)。
若有侵权,请联系 support@oschina.cn 删除。
本文参与“OSC源创计划”,欢迎正在阅读的你也加入,一块儿分享。函数