原文地址:https://nbviewer.jupyter.org/github/fchollet/deep-learning-with-python-not…
使用带有小数据集的网点
在小数据集上从头开始训练一个convnet
深度学习对于小数据问题的相关性
但更重要的是,深度学习模式在本质上是高度可重复使用的:您可以采用例如大规模数据集上训练的图像分类或语音 - 文本模型,然后在极其不同的问题上重用它,只需稍作更改即可。具体而言,在计算机视觉的情况下,许多预先训练的模型(通常在ImageNet数据集上进行培训)现在已公开可供下载,并且可用于以非常少的数据引导强大的视觉模型。这就是我们将在下一节中做的。
下载数据
www.kaggle.com/c/dogs-vs-cats/data(如果您还没有帐户,则需要创建一个Kaggle帐户 - 不要担心 ,这个过程是无痛的)。import os, shutil
# The path to the directory where the original # dataset was uncompressed original_dataset_dir = '/Users/fchollet/Downloads/kaggle_original_data' # The directory where we will # store our smaller dataset base_dir = '/Users/fchollet/Downloads/cats_and_dogs_small' os.mkdir(base_dir) # Directories for our training, # validation and test splits train_dir = os.path.join(base_dir, 'train') os.mkdir(train_dir) validation_dir = os.path.join(base_dir, 'validation') os.mkdir(validation_dir) test_dir = os.path.join(base_dir, 'test') os.mkdir(test_dir) # Directory with our training cat pictures train_cats_dir = os.path.join(train_dir, 'cats') os.mkdir(train_cats_dir) # Directory with our training dog pictures train_dogs_dir = os.path.join(train_dir, 'dogs') os.mkdir(train_dogs_dir) # Directory with our validation cat pictures validation_cats_dir = os.path.join(validation_dir, 'cats') os.mkdir(validation_cats_dir) # Directory with our validation dog pictures validation_dogs_dir = os.path.join(validation_dir, 'dogs') os.mkdir(validation_dogs_dir) # Directory with our validation cat pictures test_cats_dir = os.path.join(test_dir, 'cats') os.mkdir(test_cats_dir) # Directory with our validation dog pictures test_dogs_dir = os.path.join(test_dir, 'dogs') os.mkdir(test_dogs_dir) # Copy first 1000 cat images to train_cats_dir fnames = ['cat.{}.jpg'.format(i) for i in range(1000)] for fname in fnames: src = os.path.join(original_dataset_dir, fname) dst = os.path.join(train_cats_dir, fname) shutil.copyfile(src, dst) # Copy next 500 cat images to validation_cats_dir fnames = ['cat.{}.jpg'.format(i) for i in range(1000, 1500)] for fname in fnames: src = os.path.join(original_dataset_dir, fname) dst = os.path.join(validation_cats_dir, fname) shutil.copyfile(src, dst) # Copy next 500 cat images to test_cats_dir fnames = ['cat.{}.jpg'.format(i) for i in range(1500, 2000)] for fname in fnames: src = os.path.join(original_dataset_dir, fname) dst = os.path.join(test_cats_dir, fname) shutil.copyfile(src, dst) # Copy first 1000 dog images to train_dogs_dir fnames = ['dog.{}.jpg'.format(i) for i in range(1000)] for fname in fnames: src = os.path.join(original_dataset_dir, fname) dst = os.path.join(train_dogs_dir, fname) shutil.copyfile(src, dst) # Copy next 500 dog images to validation_dogs_dir fnames = ['dog.{}.jpg'.format(i) for i in range(1000, 1500)] for fname in fnames: src = os.path.join(original_dataset_dir, fname) dst = os.path.join(validation_dogs_dir, fname) shutil.copyfile(src, dst) # Copy next 500 dog images to test_dogs_dir fnames = ['dog.{}.jpg'.format(i) for i in range(1500, 2000)] for fname in fnames: src = os.path.join(original_dataset_dir, fname) dst = os.path.join(test_dogs_dir, fname) shutil.copyfile(src, dst)
print('total training cat images:', len(os.listdir(train_cats_dir))) total training cat images: 1000
print('total training dog images:', len(os.listdir(train_dogs_dir))) total training dog images: 1000
print('total validation cat images:', len(os.listdir(validation_cats_dir))) total validation cat images: 500
print('total validation dog images:', len(os.listdir(validation_dogs_dir))) total validation dog images: 500
print('total test cat images:', len(os.listdir(test_cats_dir))) total test cat images: 500
print('total test dog images:', len(os.listdir(test_dogs_dir))) total test dog images: 500
建立我们的网络
from keras import layers from keras import models model = models.Sequential() model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3))) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(64, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(128, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(128, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Flatten()) model.add(layers.Dense(512, activation='relu')) model.add(layers.Dense(1, activation='sigmoid'))
model.summary()
from keras import optimizers model.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(lr=1e-4), metrics=['acc'])
数据预处理
from keras.preprocessing.image import ImageDataGenerator # All images will be rescaled by 1./255 train_datagen = ImageDataGenerator(rescale=1./255) test_datagen = ImageDataGenerator(rescale=1./255) train_generator = train_datagen.flow_from_directory( # This is the target directory train_dir, # All images will be resized to 150x150 target_size=(150, 150), batch_size=20, # Since we use binary_crossentropy loss, we need binary labels class_mode='binary') validation_generator = test_datagen.flow_from_directory( validation_dir, target_size=(150, 150), batch_size=20, class_mode='binary')
Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.
for data_batch, labels_batch in train_generator: print('data batch shape:', data_batch.shape) print('labels batch shape:', labels_batch.shape) break
data batch shape: (20, 150, 150, 3) labels batch shape: (20,)
history = model.fit_generator( train_generator, steps_per_epoch=100, epochs=30, validation_data=validation_generator, validation_steps=50)
model.save('cats_and_dogs_small_1.h5')
import matplotlib.pyplot as plt acc = history.history['acc'] val_acc = history.history['val_acc'] loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(len(acc)) plt.plot(epochs, acc, 'bo', label='Training acc') plt.plot(epochs, val_acc, 'b', label='Validation acc') plt.title('Training and validation accuracy') plt.legend() plt.figure() plt.plot(epochs, loss, 'bo', label='Training loss') plt.plot(epochs, val_loss, 'b', label='Validation loss') plt.title('Training and validation loss') plt.legend() plt.show()
文章来源: Keras学习教程七