import torch from torch import nn from torch.nn import init import numpy as np import sys sys.path.append('..') import d2lzh_pytorch as d2l import torchvision import torchvision.transforms as transforms
#与上一节一样的数据集以及批量大小 batch_size= 256 mnist_train= torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST',download=True,train=True,transform=transforms.ToTensor()) mnist_test = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST',download=True,train=False,transform=transforms.ToTensor()) if sys.platform.startswith('win'): num_worker=0 # 表示不用额外的进程来加速读取数据 else: num_worker=4 train_iter = torch.utils.data.DataLoader(mnist_train,batch_size=batch_size,shuffle=True,num_workers=num_worker) test_iter = torch.utils.data.DataLoader(mnist_test,batch_size=batch_size,shuffle=False,num_workers=num_worker)
softmax的输出层是一个全链接层,因此咱们使用一个线性模块就能够,由于前面咱们数据返回的每一个batch的样本X的形状为(batch_size,1,28,28),咱们先用view()将X转化为(batch_size,784)才送入全链接层python
num_inputs = 784 num_outputs = 10 class LinearNet(nn.Module): def __init__(self,num_inputs,num_outputs): super(LinearNet,self).__init__() self.linear = nn.Linear(num_inputs,num_outputs) def forward(self,x): y = self.linear(x.view(x.shape[0],-1)) return y net = LinearNet(num_inputs,num_outputs)
# 咱们将形状转化的这个功能定义成一个FlattenLayer class FlattenLayer(nn.Module): def __init__(self): super(FlattenLayer,self).__init__() def forward(self,x): return x.view(x.shape[0],-1)
from collections import OrderedDict net = nn.Sequential( OrderedDict( [ ('flatten',FlattenLayer()), ('linear',nn.Linear(num_inputs,num_outputs)) ]) ) # 以前线性回归的是num_output是1
init.normal_(net.linear.weight,mean=0,std=0.01) init.constant_(net.linear.bias,val=0)
Parameter containing: tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)
print(net)
Sequential( (flatten): FlattenLayer() (linear): Linear(in_features=784, out_features=10, bias=True) )
#pytorch提供了一个包括softmax预算和交叉熵损失计算的函数 loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(),lr=0.1)
def evaluate_accuracy(data_iter, net): acc_sum, n = 0.0, 0 for X, y in data_iter: acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() n += y.shape[0] return acc_sum / n
num_epochs, lr = 5, 0.1 def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, optimizer=None): for epoch in range(num_epochs): train_l_sum, train_acc_sum, n = 0.0, 0.0, 0 for X, y in train_iter: y_hat = net(X) l = loss(y_hat, y).sum() # 梯度清零 if optimizer is not None: optimizer.zero_grad() elif params is not None and params[0].grad is not None: for param in params: param.grad.data.zero_() l.backward() if optimizer is None: # 上节的代码optimizer is None,使用的手写的代码SGD sgd(params, lr, batch_size) else: # optimizer 非None, optimizer.step() # “softmax回归的简洁实现”一节将用到 train_l_sum += l.item() train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item() n += y.shape[0] test_acc = evaluate_accuracy(test_iter, net) print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None,optimizer)
epoch 1, loss 0.0031, train acc 0.749, test acc 0.765 epoch 2, loss 0.0022, train acc 0.813, test acc 0.808 epoch 3, loss 0.0021, train acc 0.826, test acc 0.818 epoch 4, loss 0.0020, train acc 0.832, test acc 0.816 epoch 5, loss 0.0019, train acc 0.837, test acc 0.821