问题
I'm using Talos and Google colab TPU to run hyperparameter tuning of a Keras model. Note that I'm using Tensorflow 1.15.0 and Keras 2.2.4-tf.
import os
import tensorflow as tf
import talos as ta
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
def iris_model(x_train, y_train, x_val, y_val, params):
# Specify a distributed strategy to use TPU
resolver = tf.contrib.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.contrib.distribute.initialize_tpu_system(resolver)
strategy = tf.contrib.distribute.TPUStrategy(resolver)
# Use the strategy to create and compile a Keras model
with strategy.scope():
model = Sequential()
model.add(Dense(32, input_shape=(4,), activation=tf.nn.relu, name="relu"))
model.add(Dense(3, activation=tf.nn.softmax, name="softmax"))
model.compile(optimizer=Adam(learning_rate=0.1), loss=params['losses'])
# Convert data type to use TPU
x_train = x_train.astype('float32')
x_val = x_val.astype('float32')
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
dataset = dataset.cache()
dataset = dataset.shuffle(1000, reshuffle_each_iteration=True).repeat()
dataset = dataset.batch(params['batch_size'], drop_remainder=True)
# Fit the Keras model on the dataset
out = model.fit(dataset, batch_size=params['batch_size'], epochs=params['epochs'], validation_data=[x_val, y_val], verbose=0, steps_per_epoch=2)
return out, model
# Load dataset
X, y = ta.templates.datasets.iris()
# Train and test set
x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=0.30, shuffle=False)
# Create a hyperparameter distributions
p = {'losses': ['logcosh'], 'batch_size': [128, 256, 384, 512, 1024], 'epochs': [10, 20]}
# Use Talos to scan the best hyperparameters of the Keras model
scan_object = ta.Scan(x_train, y_train, params=p, model=iris_model, experiment_name='test', x_val=x_val, y_val=y_val, fraction_limit=0.1)
After converting the train set to a Dataset using tf.data.Dataset
, I get the following error when fitting the model with out = model.fit
:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-3-c812209b95d0> in <module>()
8
9 # Use Talos to scan the best hyperparameters of the Keras model
---> 10 scan_object = ta.Scan(x_train, y_train, params=p, model=iris_model, experiment_name='test', x_val=x_val, y_val=y_val, fraction_limit=0.1)
8 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py in _validate_or_infer_batch_size(self, batch_size, steps, x)
1813 'The `batch_size` argument must not be specified for the given '
1814 'input type. Received input: {}, batch_size: {}'.format(
-> 1815 x, batch_size))
1816 return
1817
ValueError: The `batch_size` argument must not be specified for the given input type. Received input: <DatasetV1Adapter shapes: ((512, 4), (512, 3)), types: (tf.float32, tf.float32)>, batch_size: 512
Then, if I follow those instructions and don't set the batch-size argument to model.fit
. I get another error in:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-3-c812209b95d0> in <module>()
8
9 # Use Talos to scan the best hyperparameters of the Keras model
---> 10 scan_object = ta.Scan(x_train, y_train, params=p, model=iris_model, experiment_name='test', x_val=x_val, y_val=y_val, fraction_limit=0.1)
8 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py in _distribution_standardize_user_data(self, x, y, sample_weight, class_weight, batch_size, validation_split, shuffle, epochs, allow_partial_batch)
2307 strategy) and not drop_remainder:
2308 dataset_size = first_x_value.shape[0]
-> 2309 if dataset_size % batch_size == 0:
2310 drop_remainder = True
2311
TypeError: unsupported operand type(s) for %: 'int' and 'NoneType'
回答1:
from github code :
ValueError will be raised if
x
is a generator orSequence
instance andbatch_size
is specified as we expect users to provide batched datasets.
Try using batch_size = None
回答2:
Not sure if the following will fit your bill but something to try. All I have done is taken off the repeat() from dataset and batch_size=params['batch_size'] from model.fit
If the above is not what you are ready to sacrifice then please ignore the post.
import os
import tensorflow as tf
import talos as ta
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
def iris_model(x_train, y_train, x_val, y_val, params):
# Specify a distributed strategy to use TPU
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_host(resolver.master())
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)
with strategy.scope():
model = Sequential()
model.add(Dense(32, input_dim=4, activation=params['activation']))
model.add(Dense(3, activation='softmax'))
model.compile(optimizer=params['optimizer'], loss=params['losses'])
# Convert the train set to a Dataset to use TPU
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
dataset = dataset.cache().shuffle(1000, reshuffle_each_iteration=True).batch(params['batch_size'], drop_remainder=True)
out = model.fit(dataset, epochs=params['epochs'], validation_data=[x_val, y_val], verbose=0)
return out, model
x, y = ta.templates.datasets.iris()
p = {'activation': ['relu', 'elu'],
'optimizer': ['Nadam', 'Adam'],
'losses': ['logcosh'],
'batch_size': (20, 50, 5),
'epochs': [10, 20]}
scan_object = ta.Scan(x, y, model=iris_model, params=p, fraction_limit=0.1, experiment_name='first_test')
回答3:
That second error you get, in
_distribution_standardize_user_data
, when you don't pass the batch_size
to fit.
The code you're running for that function is here:
https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/keras/engine/training.py#L2192
You didn't post a trace-back, but I bet it's failing on line 2294, since that's the only place where batch_size
is multiplied by something.
if shuffle:
# We want a buffer size that is larger than the batch size provided by
# the user and provides sufficient randomness. Note that larger
# numbers introduce more memory usage based on the size of each
# sample.
ds = ds.shuffle(max(1024, batch_size * 8))
It looks like you can shut it off by setting shuffle=False
.
fit(ds, shuffle=False,...)
Does that work?
回答4:
Could you remove, these lines from your code and try:
dataset = dataset.cache()
dataset = dataset.shuffle(1000, reshuffle_each_iteration=True).repeat()
dataset = dataset.batch(params['batch_size'], drop_remainder=True)
WITH THESE:
dataset = dataset.repeat()
dataset = dataset.batch(128, drop_remainder=True)
dataset = dataset.prefetch(1)
Otherwise what you wrote in tf.data.Dataset.from_tensor_slices
has something to do with the error.
来源:https://stackoverflow.com/questions/58958437/tf-data-dataset-the-batch-size-argument-must-not-be-specified-for-the-given-i