PyTorch是FaceBook开源的第三方库,其做用是快速搭建深度学习模型,验证思路。网络
首先导入各类第三方库函数
import torch from torch import nn,optim from matplotlib import pyplot as plt from torch.utils.data import DataLoader from torch.autograd import Variable import numpy as np from torchvision import datasets, transforms
train_dataset = datasets.MNIST(root='./', train=True, transform=transforms.ToTensor(), download=True) test_dataset = datasets.MNIST(root='./', train=False, transform=transforms.ToTensor(), download=True)
运行这段代码时,对于PyTorch的低版本会出现错误,笔者升级到1.7.1则运行成功学习
# 批次大小 batch_size = 64 # 装载训练集 train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) # 装载测试集 test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)
因为给定的数据的形状是$$[batch\_size, 1, 28, 28]$$
以后训练和预测时咱们须要将数据转换成二维格式测试
# 定义网络结构 class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.fc1 = nn.Linear(784, 10) self.softmax = nn.Softmax(dim=1) def forward(self, x): inputs = x.reshape((x.shape[0], -1)) x = self.fc1(inputs) out = self.softmax(x) return out
def train(): for i, data in enumerate(train_loader): # 得到一个批次的数据和标签 inputs, labels = data inputs = inputs.cuda() labels = labels.cuda() # 得到模型预测结果 out = model(inputs) # to one-hot # scatter_(input, dim, index, src) → Tensor # 将src中的全部值按照index肯定的索引写入本tensor中。其中索引是根据给定的dimension,dim按照gather()描述的规则来肯定。 labels = labels.reshape((-1, 1)).cuda() one_hot = torch.zeros(inputs.shape[0], 10).cuda().scatter(1, labels, 1) # 计算loss # mse_loss要求形状一致 loss = MSE_loss(out, one_hot) # 梯度清零 optimizer.zero_grad() # 反向传播 loss.backward() # 更新参数 optimizer.step()
def test(): correct = 0 for i, data in enumerate(test_loader): # 得到一个批次的数据和标签 inputs, labels = data inputs = inputs.cuda() labels = labels.cuda() # 得到模型预测结果 out = model(inputs) _, predicted = torch.max(out, 1) correct += (predicted == labels).sum() print("Test acc:{0}".format(correct.item()/len(test_dataset)))
Dropout能够使神经元随机失活,其中参数p表示失活几率优化
# 定义网络结构 class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.layer1 = nn.Sequential(nn.Linear(784, 500), nn.Dropout(p=0.5), nn.Tanh()) self.layer2 = nn.Sequential(nn.Linear(500, 200), nn.Dropout(p=0.5), nn.Tanh()) self.layer3 = nn.Sequential(nn.Linear(200, 10),nn.Softmax(dim=1)) def forward(self, x): inputs = x.reshape((x.shape[0], -1)) x = self.layer1(inputs) x = self.layer2(x) out = self.layer3(x) return out
# 定义代价函数 MSE_loss = nn.CrossEntropyLoss()
除了交叉熵损失函数,PyTorch中还定义了不少损失函数,暂不一一列举
交叉熵损失函数要求第一个参数形状为[batch_size, C](C为类别数), 第二个参数形状为[batch_size](一维向量)code
optimizer = optim.SGD(model.parameters(), lr=0.1) # 或者改为Adam optimizer = optim.Adam(model.parameters(), lr=0.1)
optimizer = optim.SGD(model.parameters(), lr=0.1, weight_decay=0.001)
这里是添加L2正则化,weight_decay是惩罚系数orm