问题
I'm trying to build a custom keras model whith the subclassing API but I get some errors when I load a previous instance of my model and I try to train it :
Here is the class of my model, it has 3 inputs and 1 output :
import tensorflow as tf
spec1 = tf.TensorSpec(shape=(1,40,5,1))
spec2 = tf.TensorSpec(shape=(1,3))
class Conv_Rnn_model(tf.keras.Model):
def __init__(self):
# super() permet d'appeler le constructeur de la classe mère dans la classe fille
# permet égalemet de résoudre le problème d'appel multiple de méthode dans les configuration de classe en diamant
super().__init__()
self.loss_object = tf.keras.losses.MeanSquaredError()
self.optimizer = tf.keras.optimizers.Adam()
# Convolutions :
# input : [batch_size, rows, cols, channels]
# return : [batch_size, new_rows, new_cols, filters]
self.conv1 = tf.keras.layers.Conv2D(filters = 32, kernel_size = (8,2),input_shape=(40,5,1), activation='relu', name ="conv1")
self.conv2 = tf.keras.layers.Conv2D(filters = 64, kernel_size = (6,1),input_shape=(40,5,1), activation='relu', name ="conv2")
self.conv3 = tf.keras.layers.Conv2D(filters =128, kernel_size = (6,1),input_shape=(40,5,1), activation='relu', name ="conv3")
# recurrent cells :
#input : [batch_size, time_steps, features]
#return : [batch_size, time_steps, units (if return_sequence=True)]
self.lstm1A = tf.keras.layers.LSTM(64, return_sequences=True, name = "lstm1A")
self.lstm1B = tf.keras.layers.LSTM(64, name = "lstm1B")
self.lstm2A = tf.keras.layers.LSTM(64, return_sequences=True, name = "lstm2A")
self.lstm2B = tf.keras.layers.LSTM(64, name = "lstm2B")
self.lstm3A = tf.keras.layers.LSTM(64, return_sequences=True, name = "lstm3A")
self.lstm3B = tf.keras.layers.LSTM(64, name = "lstm3B")
# Concat layer :
self.concat = tf.keras.layers.Concatenate(axis=1)
# fully connected layers :
#input : [batch_size, ... , input_dim]
#return : [batch_size, ... , units]
self.dense = tf.keras.layers.Dense(32)
self.out = tf.keras.layers.Dense(3, activation='softmax')
@tf.function(input_signature=[[tf.TensorSpec(shape=(1,40,5,1),name="M15"),
tf.TensorSpec(shape=(1,40,5,1),name="H1"),
tf.TensorSpec(shape=(1,40,5,1),name="H4")]
])
def call(self, data):
"""
TODO: comprendre comment se calculer les outputs des conv2D
pour remplacer les conv_res.shape par des constantes
"""
#tf.no_gradient("concatenate")
#tf.no_gradient("reshape")
conv1_res = self.conv1(data[0])
conv2_res = self.conv2(data[1])
conv3_res = self.conv3(data[2])
shape1 = (1, conv1_res.shape[1],conv1_res.shape[2]*conv1_res.shape[3])
shape2 = (1, conv2_res.shape[1],conv2_res.shape[2]*conv2_res.shape[3])
shape3 = (1, conv3_res.shape[1],conv3_res.shape[2]*conv3_res.shape[3])
f1 = self.lstm1B(self.lstm1A( tf.reshape(conv1_res, shape1) ))
f2 = self.lstm2B(self.lstm2A( tf.reshape(conv2_res, shape2) ))
f3 = self.lstm3B(self.lstm3A( tf.reshape(conv3_res, shape3) ))
# returns of fully connected layers
pre_output = self.dense(self.concat([f1,f2,f3]))
output = self.out(pre_output)
return output
@tf.function(input_signature=[[spec1,spec1,spec1], spec2])
def train_step(self,X_train, y_train):
#X_train, y_train = data
with tf.GradientTape() as tape:
y_pred = self(X_train) # Forward pass
# Compute the loss value
loss = self.loss_object(y_train, y_pred)
# Compute gradients
gradients = tape.gradient(loss, self.trainable_variables)
# Update weights
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
When I train my model and then I save it all work very well :
from Conv_Rnn_model import Conv_Rnn_model
from tensorflow.keras.models import load_model
from numpy.random import rand
from functions import getDataset_3period
model_1 = Conv_Rnn_model()
model_1.compile(loss='mse', optimizer='adam')
iterations = 5
data_environment = getDataset_3period(window_size=40)# get dataframe of EURUSD
for i in range(iterations):
state = state = data_environment[i]
# target = tf.constant(rand(1,3),dtype=tf.float32)
target= rand(1,3)
X_train = [state[:1],state[1:2],state[2:3]]
# X_train = tf.constant(X_train, dtype=tf.float32)
model_1.train_step(X_train, target)
print("epoch", i)
model_1.save("models/model_test1")
But when I try to reload my trained model and train it again I get errors :
model_2 = load_model("models/model_test1", compile=False)
2020-08-08 18:17:27.277841: W tensorflow/core/common_runtime/graph_constructor.cc:808] Node 'while' has 11 outputs but the _output_shapes attribute specifies shapes for 20 outputs. Output shapes may be inaccurate.
2020-08-08 18:17:28.048269: W tensorflow/core/common_runtime/graph_constructor.cc:808] Node 'while' has 11 outputs but the _output_shapes attribute specifies shapes for 20 outputs. Output shapes may be inaccurate.
2020-08-08 18:17:28.651946: W tensorflow/core/common_runtime/graph_constructor.cc:808] Node 'while' has 11 outputs but the _output_shapes attribute specifies shapes for 20 outputs. Output shapes may be inaccurate.
2020-08-08 18:17:28.946418: W tensorflow/core/common_runtime/graph_constructor.cc:808] Node 'while' has 11 outputs but the _output_shapes attribute specifies shapes for 20 outputs. Output shapes may be inaccurate.
2020-08-08 18:17:32.857832: W tensorflow/core/common_runtime/graph_constructor.cc:808] Node 'while' has 11 outputs but the _output_shapes attribute specifies shapes for 20 outputs. Output shapes may be inaccurate.
2020-08-08 18:17:32.872207: W tensorflow/core/common_runtime/graph_constructor.cc:808] Node 'while' has 11 outputs but the _output_shapes attribute specifies shapes for 20 outputs. Output shapes may be inaccurate.
2020-08-08 18:17:32.891483: W tensorflow/core/common_runtime/graph_constructor.cc:808] Node 'lstm1A/PartitionedCall' has 5 outputs but the _output_shapes attribute specifies shapes for 22 outputs. Output shapes may be inaccurate.
2020-08-08 18:17:32.892203: W tensorflow/core/common_runtime/graph_constructor.cc:808] Node 'lstm1B/PartitionedCall' has 5 outputs but the _output_shapes attribute specifies shapes for 22 outputs. Output shapes may be inaccurate.
2020-08-08 18:17:32.892926: W tensorflow/core/common_runtime/graph_constructor.cc:808] Node 'lstm2A/PartitionedCall' has 5 outputs but the _output_shapes attribute specifies shapes for 22 outputs. Output shapes may be inaccurate.
2020-08-08 18:17:32.893593: W tensorflow/core/common_runtime/graph_constructor.cc:808] Node 'lstm2B/PartitionedCall' has 5 outputs but the _output_shapes attribute specifies shapes for 22 outputs. Output shapes may be inaccurate.
2020-08-08 18:17:32.894289: W tensorflow/core/common_runtime/graph_constructor.cc:808] Node 'lstm3A/PartitionedCall' has 5 outputs but the _output_shapes attribute specifies shapes for 22 outputs. Output shapes may be inaccurate.
2020-08-08 18:17:32.894950: W tensorflow/core/common_runtime/graph_constructor.cc:808] Node 'lstm3B/PartitionedCall' has 5 outputs but the _output_shapes attribute specifies shapes for 22 outputs. Output shapes may be inaccurate.
model_2(X_train)
Out[3]: <tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[0.2654996 , 0.40409103, 0.33040944]], dtype=float32)>
with train_step function :
model_2.train_step(X_train, target)
Traceback (most recent call last):
File "<ipython-input-4-27db33666dda>", line 1, in <module>
model_2.train_step(X_train, target)
TypeError: train_step() takes 2 positional arguments but 3 were given
or with fit function :
model_2.compile(loss='mse', optimizer='adam')
model_2.fit(X_train, target)
Traceback (most recent call last):
File "<ipython-input-6-d013b7a5a810>", line 2, in <module>
model_2.fit(X_train, target)
File "/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py", line 103, in _method_wrapper
return method(self, *args, **kwargs)
File "/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py", line 1102, in fit
tmp_logs = self.train_function(iterator)
File "/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 787, in __call__
result = self._call(*args, **kwds)
File "/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 830, in _call
self._initialize(args, kwds, add_initializers_to=initializers)
File "/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 702, in _initialize
self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
File "/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 2948, in _get_concrete_function_internal_garbage_collected
graph_function, _, _ = self._maybe_define_function(args, kwargs)
File "/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3319, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3171, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 987, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 613, in wrapped_fn
return weak_wrapped_fn().__wrapped__(*args, **kwds)
File "/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 974, in wrapper
raise e.ag_error_metadata.to_exception(e)
ValueError: in user code:
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:809 train_function *
return step_function(self, iterator)
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:799 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1261 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2794 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3217 _call_for_each_replica
return fn(*args, **kwargs)
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:792 run_step **
outputs = model.train_step(data)
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:750 train_step
y_pred = self(x, training=True)
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:990 __call__
outputs = call_fn(inputs, *args, **kwargs)
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/saving/saved_model/utils.py:71 return_outputs_and_add_losses
outputs, losses = fn(inputs, *args, **kwargs)
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/saving/saved_model/utils.py:167 wrap_with_training_arg
return control_flow_util.smart_cond(
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/utils/control_flow_util.py:112 smart_cond
return smart_module.smart_cond(
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/framework/smart_cond.py:54 smart_cond
return true_fn()
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/saving/saved_model/utils.py:168 <lambda>
training, lambda: replace_training_and_call(True),
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/saving/saved_model/utils.py:165 replace_training_and_call
return wrapped_call(*args, **kwargs)
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py:787 __call__
result = self._call(*args, **kwds)
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py:821 _call
results = self._stateful_fn(*args, **kwds)
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py:2921 __call__
graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py:3319 _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py:3171 _create_graph_function
func_graph_module.func_graph_from_py_func(
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py:987 func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py:613 wrapped_fn
return weak_wrapped_fn().__wrapped__(*args, **kwds)
/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/saved_model/function_deserialization.py:251 restored_function_body
raise ValueError(
ValueError: Could not find matching function to call loaded from the SavedModel. Got:
Positional arguments (2 total):
* (<tf.Tensor 'data:0' shape=(None, 40, 5, 1) dtype=float32>, <tf.Tensor 'data_1:0' shape=(None, 40, 5, 1) dtype=float32>, <tf.Tensor 'data_2:0' shape=(None, 40, 5, 1) dtype=float32>)
* True
Keyword arguments: {}
Expected these arguments to match one of the following 4 option(s):
Option 1:
Positional arguments (2 total):
* [TensorSpec(shape=(1, 40, 5, 1), dtype=tf.float32, name='data/0'), TensorSpec(shape=(1, 40, 5, 1), dtype=tf.float32, name='data/1'), TensorSpec(shape=(1, 40, 5, 1), dtype=tf.float32, name='data/2')]
* False
Keyword arguments: {}
Option 2:
Positional arguments (2 total):
* [TensorSpec(shape=(1, 40, 5, 1), dtype=tf.float32, name='M15'), TensorSpec(shape=(1, 40, 5, 1), dtype=tf.float32, name='H1'), TensorSpec(shape=(1, 40, 5, 1), dtype=tf.float32, name='H4')]
* False
Keyword arguments: {}
Option 3:
Positional arguments (2 total):
* [TensorSpec(shape=(1, 40, 5, 1), dtype=tf.float32, name='M15'), TensorSpec(shape=(1, 40, 5, 1), dtype=tf.float32, name='H1'), TensorSpec(shape=(1, 40, 5, 1), dtype=tf.float32, name='H4')]
* True
Keyword arguments: {}
Option 4:
Positional arguments (2 total):
* [TensorSpec(shape=(1, 40, 5, 1), dtype=tf.float32, name='data/0'), TensorSpec(shape=(1, 40, 5, 1), dtype=tf.float32, name='data/1'), TensorSpec(shape=(1, 40, 5, 1), dtype=tf.float32, name='data/2')]
* True
Keyword arguments: {}
I'm trying to resolve this bug since 1 week and I've already read many times the tensorflow guide.
回答1:
Try to re-create model and copy weights from the saved model to the new one. There were some issues with continue training of saved model and I used this walk-around. You have to assure that names of the layers are the same for both models, or you may access layers by index. Below is code example for copying weights:
for layer in modelSaved.layers:
print(f"name = {layer.name}, params = {layer.count_params():d}")
if layer.count_params() > 0:
destLayers = [x for x in model.layers if x.name == layer.name]
if (len(destLayers) == 1) and (destLayers[0].count_params() == layer.count_params()):
destLayers[0].set_weights(layer.get_weights())
print(f'Weights copied for layer {destLayers[0].name}')
来源:https://stackoverflow.com/questions/63320771/errors-when-training-my-saved-tensorflow-model