问题
I'm attempting to train a regression model to predict attributes of music such as BPM. The model takes in spectrograms of audio snippets that are 256x128px png files and outputs a couple continuous values. I have the following code so far that I have developed based upon this guide on the tensorflow website:
import tensorflow as tf
import os
import random
import pathlib
AUTOTUNE = tf.data.experimental.AUTOTUNE
TRAINING_DATA_DIR = r'specgrams'
def gen_model():
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(256, 128, 3)),
tf.keras.layers.Dense(256, activation='relu'),
tf.keras.layers.Dense(2)
])
model.compile(optimizer=tf.keras.optimizers.RMSprop(0.001),
loss='mse',
metrics=['mse', 'mae'])
return model
def fetch_batch(batch_size=1000):
all_image_paths = []
all_image_labels = []
data_root = pathlib.Path(TRAINING_DATA_DIR)
files = data_root.iterdir()
for file in files:
file = str(file)
all_image_paths.append(os.path.abspath(file))
label = file[:-4].split('-')[2:]
label = float(label[0]) / 200, int(label[1]) / 1000.0
all_image_labels.append(label)
def preprocess_image(path):
img_raw = tf.io.read_file(path)
image = tf.image.decode_png(img_raw, channels=3)
image = tf.image.resize(image, [256, 128])
image /= 255.0
return image
def preprocess(path, label):
return preprocess_image(path), label
path_ds = tf.data.Dataset.from_tensor_slices(all_image_paths)
image_ds = path_ds.map(preprocess_image, num_parallel_calls=AUTOTUNE)
label_ds = tf.data.Dataset.from_tensor_slices(all_image_labels)
ds = tf.data.Dataset.zip((image_ds, label_ds))
ds = ds.shuffle(buffer_size=len(os.listdir(TRAINING_DATA_DIR)))
ds = ds.repeat()
ds = ds.batch(batch_size)
ds = ds.prefetch(buffer_size=AUTOTUNE)
return ds
ds = fetch_batch()
model = gen_model()
model.fit(ds, epochs=1, steps_per_epoch=10)
However I believe I have made a mistake with the structure of my model or how I am preprocessing the training data because I get an error about incorrect dimensions but I'm struggling to narrow down exactly where the issue is. I understand that the guide I followed was for classification problem as opposed to regression and my "labels" are an array of 2 value which is what is causing the problem but I'm not sure how to resolve this.
For context the filenames are in the format xxx-xxx-A-B.png
where A and B are the two desired output values of the model. A is a floating-point value somewhere between 70 and 180 and B is an integer value between 0-1000. As such the label
variable for each image looks something like this: (0.64, 0.319)
.
This is the error I am seeing when I attempt to execute the above script:
Traceback (most recent call last):
File "C:\Users\cainy\Desktop\BeatNet\training.py", line 60, in <module>
model.fit(ds, epochs=1, steps_per_epoch=3)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\keras\engine\training.py", line 791, in fit
initial_epoch=initial_epoch)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\keras\engine\training.py", line 1515, in fit_generator
steps_name='steps_per_epoch')
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\keras\engine\training_generator.py", line 257, in model_iteration
batch_outs = batch_function(*batch_data)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\keras\engine\training.py", line 1259, in train_on_batch
outputs = self._fit_function(ins) # pylint: disable=not-callable
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\keras\backend.py", line 3217, in __call__
outputs = self._graph_fn(*converted_inputs)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\eager\function.py", line 558, in __call__
return self._call_flat(args)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\eager\function.py", line 627, in _call_flat
outputs = self._inference_function.call(ctx, args)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\eager\function.py", line 415, in call
ctx=ctx)
File "C:\Users\cainy\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow\python\eager\execute.py", line 66, in quick_execute
six.raise_from(core._status_to_exception(e.code, message), None)
File "<string>", line 3, in raise_from
tensorflow.python.framework.errors_impl.InvalidArgumentError: Can not squeeze dim[1], expected a dimension of 1, got 2
[[{{node metrics/accuracy/Squeeze}}]] [Op:__inference_keras_scratch_graph_734]
Edit: I have uploaded the source code to GitHub here.
回答1:
You currently only have 1 output - a tensor with length 2 (per batch element). If you want to use/monitor separate losses you'll need to unstack it in both the model output and the labels.
I'm not sure if models.Sequential
will be suitable, but you can definitely use the functional API:
def gen_model():
inputs = tf.keras.layers.Input(shape=(256, 128, 3), dtype=tf.float32)
x = inputs
x = tf.keras.layers.Dense(256, activation='relu')
x = tf.keras.layers.Dense(2)
a, b = tf.keras.layers.Lambda(tf.unstack, arguments=dict(axis=-1))(x)
model = tf.keras.models.Model(inputs=inputs, outputs=[a, b])
model.compile(optimizer=tf.keras.optimizers.RMSprop(0.001),
loss=['mse', 'mae'],
metrics=[['mse'], ['mae']])
return model
And in your preprocessing:
def preprocess(path, label):
return preprocess_image(path), tf.unstack(label, axis=-1)
来源:https://stackoverflow.com/questions/55884849/how-do-i-create-a-regression-model-with-multiple-outputs-in-tf-keras