I have got a memory error due to a huge amount of images, that happens when I directly load all the images from their given paths in a dataframe.
dataframe(df_train
ImageDataGenerator
creates a tf.data.Dataset
object, so you can use that directly for more flexiblity. You can pass a list of filenames and it will only load them iteratively.
import pandas as pd
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
import tensorflow as tf
df = pd.read_clipboard()
x = df.uu
y = df.vv
z = df.class_id
def load(file_path):
img = tf.io.read_file(file_path)
img = tf.image.decode_png(img, channels=3)
img = tf.image.convert_image_dtype(img, tf.float32)
img = tf.image.resize(img, size=(100, 100))
return img
ds = tf.data.Dataset.from_tensor_slices((x, y, z)).\
map(lambda xx, yy, zz: (load(xx), load(yy), zz)).\
batch(4)
next(iter(ds))
Here's a complete example starting from a list of files (it's easy when you have a dataframe), all the way to model training.
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
import numpy as np
import cv2
from skimage import data
import tensorflow as tf
coffee = data.coffee()
cat = data.chelsea()
for image, name in zip([coffee, cat], ['coffee', 'cat']):
for i in range(5):
cv2.imwrite(f'{name}_{i}.png', image)
cat_files = list(filter(lambda x: x.startswith('cat'), os.listdir()))
coffee_files = list(filter(lambda x: x.startswith('coffee'), os.listdir()))
def load(file_path):
img = tf.io.read_file(file_path)
img = tf.image.decode_png(img, channels=3)
img = tf.image.convert_image_dtype(img, tf.float32)
img = tf.image.resize(img, size=(100, 100))
return img
def label(string):
return tf.cast(tf.equal(string, 'abnormal'), tf.int32)
x = cat_files
y = coffee_files
z = np.random.choice(['normal', 'abnormal'], 5)
inputs = tf.data.Dataset.from_tensor_slices((x, y)).map(lambda x, y: (load(x), load(y)))
labels = tf.data.Dataset.from_tensor_slices(z).map(lambda x: label(x))
ds = tf.data.Dataset.zip((inputs, labels)).batch(4)
next(iter(ds))
inputs1 = tf.keras.layers.Input(shape=(100, 100, 3), name='input1')
inputs2 = tf.keras.layers.Input(shape=(100, 100, 3), name='input2')
xx = tf.keras.layers.Flatten()(inputs1)
yy = tf.keras.layers.Flatten()(inputs2)
x = tf.keras.layers.Concatenate()([xx, yy])
x = tf.keras.layers.Dense(32, activation='relu')(x)
output = tf.keras.layers.Dense(1, activation='sigmoid')(x)
model = tf.keras.Model(inputs=[inputs1, inputs2], outputs=output)
model.compile(loss='binary_crossentropy', optimizer='adam')
history = model.fit(ds)
Train for 2 steps
1/2 [==============>...............] - ETA: 0s - loss: 0.7527
2/2 [==============================] - 1s 251ms/step - loss: 5.5188
Then you can also predict:
model.predict(ds)
array([[4.7391814e-26],
[4.7391814e-26],
[4.7391814e-26],
[4.7391814e-26],
[4.7390730e-26]], dtype=float32)