TensorFlow2.0 实现MNIST

参考了这篇：https://geektutu.com/post/tensorflow2-mnist-cnn.html

但是其中有个小问题，这里做出纠正；

MNIST采用LeNet5的网络模型，输入一个矩阵或图像，大小为 $32*32$ 。
不计输入层，有3个卷积层，2个下采样（pooling）层，1个全连接层和1个输出层
LeNet论文：论文地址

输入包

import os
import tensorflow as tf
import numpy as np
from tensorflow.keras import datasets, layers, models
from PIL import Image
import matplotlib.pyplot as plt
import scipy.misc
np.set_printoptions(suppress=True)  # 不用科学计数法输出

定义模型

class LeNet(object):
    def __init__(self):
        model = models.Sequential()
        # 第1层卷积，卷积核大小为3*3，32个，28*28为待训练图片的大小
        model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
        model.add(layers.MaxPooling2D((2, 2)))
        # 第2层卷积，卷积核大小为3*3，64个
        model.add(layers.Conv2D(64, (3, 3), activation='relu'))
        model.add(layers.MaxPooling2D((2, 2)))
        # 第3层卷积，卷积核大小为3*3，64个
        model.add(layers.Conv2D(64, (3, 3), activation='relu'))
        
        # Flatten层用来将输入“压平”，即把多维的输入一维化，常用在从卷积层到全连接层的过渡。Flatten不影响batch的大小。
        model.add(layers.Flatten())
        
        # 全连接层
        model.add(layers.Dense(64, activation='relu'))
        model.add(layers.Dense(10, activation='softmax'))
        
        # model.summary()输出模型各层的参数状况，打印模型的结构
        model.summary()
        
        self.model = model

数据准备

class DataSource(object):
    def __init__(self):
        # minisi存储位置，不存在讲自动下载
        data_path = os.path.abspath(os.path.dirname('__file__')) + '\\dataset_tf2\\minist.npz'
        (train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data(path=data_path)
        # 6万张训练，1万张测试
        train_images = train_images.reshape((60000, 28, 28, 1))
        test_images = test_images.reshape((10000, 28, 28, 1))
        # 像素值映射到0 - 1之间
        train_images, test_images = train_images/255.0, test_images/255.0
        
        # print(train_images[0], train_labels[0])
        
        self.train_images, self.train_labels = train_images, train_labels
        self.test_images, self.test_labels = test_images, test_labels

训练

class Train:
    def __init__(self):
        self.cnn = LeNet()
        self.data = DataSource()

    def train(self):
        check_path = './ckpt/cp-{epoch:04d}.ckpt'
        # period 每隔5epoch保存一次
        save_model_cb = tf.keras.callbacks.ModelCheckpoint(check_path, save_weights_only=True, verbose=1, period=5)

        self.cnn.model.compile(optimizer='adam',
                               loss='sparse_categorical_crossentropy',
                               metrics=['accuracy'])
        self.cnn.model.fit(self.data.train_images, self.data.train_labels, epochs=5, callbacks=[save_model_cb])

        test_loss, test_acc = self.cnn.model.evaluate(self.data.test_images, self.data.test_labels)
        print("准确率: %.4f，共测试了%d张图片 " % (test_acc, len(self.data.test_labels)))
        print(self.data.test_images[0],self.data.test_labels[0])

app = Train()
app.train()

预测

原文中语句为：x = np.array([1 - flatten_img]) ，但训练了几遍预测结果都不对，最后打印数据发现flatten_img的数据没有归一化，改成x = np.array([1 - (flatten_img / 255.0)])即可。

class Predict(object):
    def __init__(self):
        latest = tf.train.latest_checkpoint('./ckpt')
        self.cnn = LeNet()
        # 恢复网络权重
        self.cnn.model.load_weights(latest)

    def predict(self, image_path):
        # 以黑白方式读取图片
        img = Image.open(image_path).convert('L')
        print(img.size)
        img = img.resize((28, 28))
        img_array = img.load()
        print(img.size)
        plt.imshow(img)

        flatten_img = np.reshape(img, (28, 28, 1))
        x = np.array([1 - (flatten_img / 255.0)])

        # API refer: https://keras.io/models/model/
        y = self.cnn.model.predict(x)

        # 因为x只传入了一张图片，取y[0]即可
        # np.argmax()取得最大值的下标，即代表的数字
        print(image_path)
        print(y[0])
        print('        -> Predict digit', np.argmax(y[0]))

app = Predict()
app.predict('./test_images/test.png')

来源：CSDN

作者：ahbcwin

链接：https://blog.csdn.net/weixin_43939952/article/details/104029631

标签

mnist

卷积

test