数据集来自 http://yann.lecun.com/exdb/mnist/
尚未理解透彻CNN的结构和功能、原理blabla,先参考已有模型写一个看看
实现方法:Keras 2.3.1 + Tensorflow 2.1 + CUDA 10.1
参考了AlexNet的卷积神经网络结构,根据训练样本的特性修改获得一个新的CNN。
因为训练样本所有是黑白图像,只须要读取单通道的灰度信息便可,输入数据维数是\(28\times 28\times 1\).
网络的结构以下,激活函数所有使用relu,optimizer使用Adam
1:卷积层1,采用\(5\times 5\)卷积核,步长\((2,2)\),64层filter,padding方式采起边界补零,该层输出为\(13\times13\times64\)
2:池化层1,\(2\times 2\)平均值池化,步长\((2,2)\).padding方式采起边界补零,输出\(7\times 7\times64\)
3:正则化1
4:卷积层2:使用\(3\times3\)大小的卷积核,步长\((1,1)\),192层filter,padding方式边界补零,输出\(5\times 5\times 192\)
5:池化层2:\(2\times 2\)平均值池化,步长\((1,1)\).padding方式去除多余边界,输出\(4\times4\times 192\)
6:正则化2
7:全链接+dropoutpython
# CNNmod.py from tensorflow import keras from keras.models import Sequential from keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization def myNetwork(img_rows, img_cols): inputs = keras.Input(shape=[img_rows, img_cols, 1]) # 卷积层1 conv1 = keras.layers.Conv2D(filters=64, kernel_size=[5, 5], strides=[2, 2], activation=keras.activations.relu, use_bias=True, padding='same')(inputs) # 池化层1 pooling1 = keras.layers.AveragePooling2D(pool_size=[2, 2], strides=[2, 2], padding='valid')(conv1) # 正则化层1 stand1 = keras.layers.BatchNormalization(axis=1)(pooling1) # 卷积层2 conv2 = keras.layers.Conv2D(filters=192, kernel_size=[3, 3], strides=[1, 1], activation=keras.activations.relu, use_bias=True, padding='same')(stand1) # 池化层2 pooling2 = keras.layers.AveragePooling2D(pool_size=[2, 2], strides=[1, 1], padding='valid')(conv2) # 正则化层2 stand2 = keras.layers.BatchNormalization(axis=1)(pooling2) # 全链接层 flatten = keras.layers.Flatten()(stand2) fc1 = keras.layers.Dense(4096, activation=keras.activations.relu, use_bias=True)(flatten) drop1 = keras.layers.Dropout(0.5)(fc1) fc2 = keras.layers.Dense(4096, activation=keras.activations.relu, use_bias=True)(drop1) drop2 = keras.layers.Dropout(0.5)(fc2) fc3 = keras.layers.Dense(10, activation=keras.activations.softmax, use_bias=True)(drop2) # 构建模型 return keras.Model(inputs=inputs, outputs=fc3)
训练主程序:es6
# train.py from tensorflow import keras import cv2 from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation, Flatten from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D from keras.layers.normalization import BatchNormalization from keras.callbacks import ModelCheckpoint import numpy as np import tensorflow as tf from tensorflow import optimizers import model.CNNmod as mod batch_size = 128 num_classes = 10 epochs = 10 img_shape = (28, 28, 1) img_rows, img_cols = 28, 28 x_train = [] x_test = [] y_train = [] y_test = [] log_dir = "./logs/" # 打开数据集的txt with open(r"./preprocess_train.txt", "r") as f: lines = f.readlines() for i in range(60000): name = lines[i].split(";")[0] img = cv2.imread(r"./train_set/" + name) img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 转换成灰度图片 img = img / 255 x_train.append(img) y_train.append(lines[i].split(';')[1]) f.close() print("训练数据集读取完成") with open(r"./preprocess_test.txt", "r") as f: lines = f.readlines() for i in range(10000): name = lines[i].split(";")[0] img = cv2.imread(r"./test_set/" + name) img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = img / 255 x_test.append(img) y_test.append(lines[i].split(';')[1]) f.close() print("测试数据集读取完成") # 数组转换 x_train = np.array(x_train) x_test = np.array(x_test) y_train = np.array(y_train) y_test = np.array(y_test) y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) # 编译模型 model = mod.myNetwork(28, 28) model.compile(optimizer=tf.optimizers.Adam(0.001), loss=keras.losses.categorical_crossentropy, metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test)) model.save_weights(log_dir + 'latest.h5')
数据集预处理,使用opencv将mnist数据集转换成.jpg格式存储,标签存在文件名里,另外保存了一个.txt文件易于读取数组
# preprocess.py import os import struct import numpy as np import cv2 def save_mnist_to_jpg(mnist_image_file, mnist_label_file, save_dir): if 'train' in os.path.basename(mnist_image_file): prefix = 'train' else: prefix = 'test' labelIndex = 0 imageIndex = 0 i = 0 lbdata = open(mnist_label_file, 'rb').read() magic, nums = struct.unpack_from(">II", lbdata, labelIndex) labelIndex += struct.calcsize('>II') imgdata = open(mnist_image_file, "rb").read() magic, nums, numRows, numColumns = struct.unpack_from( '>IIII', imgdata, imageIndex) imageIndex += struct.calcsize('>IIII') for i in range(nums): label = struct.unpack_from('>B', lbdata, labelIndex)[0] labelIndex += struct.calcsize('>B') im = struct.unpack_from('>784B', imgdata, imageIndex) imageIndex += struct.calcsize('>784B') im = np.array(im, dtype='uint8') img = im.reshape(28, 28) save_name = os.path.join( save_dir, '{}_{}_{}.jpg'.format(prefix, i, label)) cv2.imwrite(save_name, img) if __name__ == '__main__': train_images = './dataset/train-images.idx3-ubyte' # 训练集图像的文件名 train_labels = './dataset/train-labels.idx1-ubyte' # 训练集label的文件名 test_images = './dataset/t10k-images.idx3-ubyte' # 测试集图像的文件名 test_labels = './dataset/t10k-labels.idx1-ubyte' # 测试集label的文件名 save_train_dir = './train_set' save_test_dir = './test_set' if not os.path.exists(save_train_dir): os.makedirs(save_train_dir) if not os.path.exists(save_test_dir): os.makedirs(save_test_dir) save_mnist_to_jpg(test_images, test_labels, save_test_dir) save_mnist_to_jpg(train_images, train_labels, save_train_dir) photos = os.listdir("./train_set") with open("./preprocess_train.txt", "w") as f: for photo in photos: num = photo.split("_")[2].split(".")[0] f.write(photo + ";" + num + "\n") f.close() photos = os.listdir("./test_set") with open("./preprocess_test.txt", "w") as f: for photo in photos: num = photo.split("_")[2].split(".")[0] f.write(photo + ";" + num + "\n") f.close()
使用以上代码和全部60000张样本训练10个epochs,batch_size为128,测试集使用所有10000张图片。使用CUDA10.1 + RTX2070进行训练,总时间约60秒训练完毕,测试集的错误率约0.9%。
接下来对上述代码进行修改。相对于原模型只修改一种变量
(1).更改池化层的池化方式,将两个池化层由平均值池化改成最大值池化,验证测试集的错误率约0.86%
(2).更改池化层padding方式:由'valid'改成'same',验证测试集错误率无明显变化。
(3).更改卷积层1的激活函数:由relu改成sigmoid函数,观察到测试集错误率收敛速度增长,当训练完第二个Epoch时,验证测试集的错误率已经下降到约1.78%.(使用relu时,同阶段错误率约5%),但使用sigmoid时,模型最终的错误率基本再也不降低,约1.45%.(使用relu时错误率约0.9%)网络
参考:https://blog.csdn.net/weixin_41055137/article/details/81071226app