文章目录

引入

AlexNet $\color{red}^{[1]}$ 与Lenet的设计理念很类似，但也有显著的区别 $\color{red}^{[2]}$ ：
1）包含 $\mathbf{8}$ 层变换，其中有 $5$ 层卷积和 $2$ 层全链接隐藏层，以及一个全链接输出层：
1.1）第一层卷积窗口形状为 $\mathbf{11 \times 11}$ ，能够适用于尺寸更大的图像；
1.2）第二层卷积窗口形状为 $\mathbf{5 \times 5}$ ，其他为 $\times 3$ ；
1.3）第1、2、五个卷积层以后都使用了窗口形状为 $\mathbf{3 \times 3}$ 、步幅为 $2$ 的最大池化层；
1.4）使用更多的卷积通道数；
1.5）最后一个卷积层是两个输出个数为4096的全链接层。
2）使用更简单的ReLU激活函数：
2.1）ReLU函数计算简单，例如无需求幂运算；
2.2）ReLU函数在不一样的参数初始化方法下，模型都更容易训练。
3）经过丢弃法来控制全链接层的模型复杂度。
4）引入了大量的图像增广，如翻转、裁剪和颜色变化等，从而进一步扩大数据集，并缓解过拟合。
html

1 模型构建

""" @author: Inki @contact: inki.yinji@qq.com @version: Created in 2020 1218, last modified in 2020 1218. """

import time
import torch
import torchvision
from torch import nn, optim
from util.SimpleTool import load_data_fashion_mnist


class AlexNet(nn.Module):

    def __init__(self):
        super(AlexNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=96, kernel_size=11, stride=4),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(3, 2),
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(3, 2)
        )
        self.fc = nn.Sequential(
            nn.Linear(256 * 5 * 5, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 10),
        )
	
	    def forward(self, img):
        """ The forward function. """
        feature = self.conv(img)
        output = self.fc(feature.view(img.shape[0], -1))
        return output


if __name__ == '__main__':
    temp_net = AlexNet()
    print(temp_net)

输出以下：python

AlexNet(
  (conv): Sequential(
    (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=6400, out_features=4096, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=4096, out_features=10, bias=True)
  )
)

2 读取数据

Alex中使用的ImageNet数据集，可是该训练数据集训练时间较长，这里仍然使用Fashion-MNIST数据集来测试。
读取数据时，额外将图像扩大到AlexNet中的 $224 \times 224$ ：web

if __name__ == '__main__':
    temp_batch_size = 128
    temp_resize = 224
    temp_tr_iter, temp_te_iter = load_data_fashion_mnist(temp_batch_size, resize=temp_resize)

3 模型训练

训练函数与Lenet一致：app

def train(net, tr_iter, te_iter, batch_size, optimizer,
          loss=nn.CrossEntropyLoss(),
          device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
          num_epochs=100):
    """ The train function. """
    net = net.to(device)
    temp_batch_count = 0
    print("Training on", device)
    for epoch in range(num_epochs):
        temp_tr_loss_sum, temp_tr_acc_sum, temp_num, temp_start_time = 0., 0., 0, time.time()
        for x, y in tr_iter:
            x = x.to(device)
            y = y.to(device)
            temp_y_pred = net(x)
            temp_loss = loss(temp_y_pred, y)
            optimizer.zero_grad()
            temp_loss.backward()
            optimizer.step()
            temp_tr_loss_sum += temp_loss.cpu().item()
            temp_tr_acc_sum += (temp_y_pred.argmax(dim=1) == y).sum().cpu().item()
            temp_num += y.shape[0]
            temp_batch_count += 1
        test_acc = evaluate_accuracy(te_iter, net)
        print("Epoch %d, loss %.4f, training acc %.3f, test ass %.3f, time %.1f s" %
              (epoch + 1, temp_tr_loss_sum / temp_batch_count, temp_tr_acc_sum / temp_num, test_acc,
               time.time() - temp_start_time))


def evaluate_accuracy(data_iter, net, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')):
    """ The evaluate function, and the performance measure is accuracy. """
    ret_acc, temp_num = 0., 0
    with torch.no_grad():
        for x, y in data_iter:
            net.eval() # The evaluate mode, and the dropout is closed.
            ret_acc += (net(x.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
            net.train()
            temp_num += y.shape[0]

    return ret_acc / temp_num

内存用了窝8个G，CPU拉满，难顶。。ide

Training on cpu
Epoch 1, loss 0.6334, training acc 0.756, test ass 0.846, time 1856.7 s

完整代码

""" @author: Inki @contact: inki.yinji@qq.com @version: Created in 2020 1218, last modified in 2020 1218. """

import time
import torch
import torchvision
from torch import nn, optim
from util.SimpleTool import load_data_fashion_mnist


class AlexNet(nn.Module):

    def __init__(self):
        super(AlexNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=96, kernel_size=11, stride=4),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(3, 2),
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(3, 2)
        )
        self.fc = nn.Sequential(
            nn.Linear(256 * 5 * 5, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 10),
        )

    def forward(self, img):
        """ The forward function. """
        feature = self.conv(img)
        output = self.fc(feature.view(img.shape[0], -1))
        return output


def train(net, tr_iter, te_iter, batch_size, optimizer,
          loss=nn.CrossEntropyLoss(),
          device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
          num_epochs=100):
    """ The train function. """
    net = net.to(device)
    temp_batch_count = 0
    print("Training on", device)
    for epoch in range(num_epochs):
        temp_tr_loss_sum, temp_tr_acc_sum, temp_num, temp_start_time = 0., 0., 0, time.time()
        for x, y in tr_iter:
            x = x.to(device)
            y = y.to(device)
            temp_y_pred = net(x)
            temp_loss = loss(temp_y_pred, y)
            optimizer.zero_grad()
            temp_loss.backward()
            optimizer.step()
            temp_tr_loss_sum += temp_loss.cpu().item()
            temp_tr_acc_sum += (temp_y_pred.argmax(dim=1) == y).sum().cpu().item()
            temp_num += y.shape[0]
            temp_batch_count += 1
        test_acc = evaluate_accuracy(te_iter, net)
        print("Epoch %d, loss %.4f, training acc %.3f, test ass %.3f, time %.1f s" %
              (epoch + 1, temp_tr_loss_sum / temp_batch_count, temp_tr_acc_sum / temp_num, test_acc,
               time.time() - temp_start_time))


def evaluate_accuracy(data_iter, net, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')):
    """ The evaluate function, and the performance measure is accuracy. """
    ret_acc, temp_num = 0., 0
    with torch.no_grad():
        for x, y in data_iter:
            net.eval() # The evaluate mode, and the dropout is closed.
            ret_acc += (net(x.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
            net.train()
            temp_num += y.shape[0]

    return ret_acc / temp_num


if __name__ == '__main__':
    temp_batch_size = 128
    temp_resize = 224
    temp_lr = 0.001
    temp_num_epochs = 5
    temp_tr_iter, temp_te_iter = load_data_fashion_mnist(temp_batch_size, resize=temp_resize)
    temp_net = AlexNet()
    temp_optimizer = optim.Adam(temp_net.parameters(), lr=temp_lr)
    train(temp_net, temp_tr_iter, temp_te_iter, temp_batch_size, temp_optimizer, num_epochs=temp_num_epochs)

支持代码

uitl.SimpleTool

def load_data_fashion_mnist(batch_size=10, root='D:/Data/Datasets/FashionMNIST', resize=None):
    """ Download the fashion mnist dataset and then load into memory. """
    trans = []
    if resize:
        trans.append(transforms.Resize(size=resize))
    trans.append(transforms.ToTensor())

    transform = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
    if sys.platform.startswith('win'):
        num_workers = 0
    else:
        num_workers = cpu_count()
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    return train_iter, test_iter

注：
[1] Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). Imagenet classification with deep convolutional neural networks. In Advances in neural information processing systems (pp. 1097-1105).
[2] 李沐、Aston Zhang等老师的这本《动手学深度学习》一书。svg

本文同步分享在博客“因吉”（CSDN）。
若有侵权，请联系 support@oschina.cn 删除。
本文参与“OSC源创计划”，欢迎正在阅读的你也加入，一块儿分享。函数

深度学习 (二十一)：卷积神经网络之AlexNet模型