问题
This is the two methods for creating a keras model, but the output shapes of the summary results of the two methods are different. Obviously, the former prints more information and makes it easier to check the correctness of the network.
import tensorflow as tf
from tensorflow.keras import Input, layers, Model
class subclass(Model):
def __init__(self):
super(subclass, self).__init__()
self.conv = layers.Conv2D(28, 3, strides=1)
def call(self, x):
return self.conv(x)
def func_api():
x = Input(shape=(24, 24, 3))
y = layers.Conv2D(28, 3, strides=1)(x)
return Model(inputs=[x], outputs=[y])
if __name__ == '__main__':
func = func_api()
func.summary()
sub = subclass()
sub.build(input_shape=(None, 24, 24, 3))
sub.summary()
output:
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 24, 24, 3) 0
_________________________________________________________________
conv2d (Conv2D) (None, 22, 22, 28) 784
=================================================================
Total params: 784
Trainable params: 784
Non-trainable params: 0
_________________________________________________________________
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_1 (Conv2D) multiple 784
=================================================================
Total params: 784
Trainable params: 784
Non-trainable params: 0
_________________________________________________________________
So, how should I use the subclass method to get the output shape at the summary()?
回答1:
I have used this method to solve this problem, I don't know if there is an easier way.
class subclass(Model):
def __init__(self):
...
def call(self, x):
...
def model(self):
x = Input(shape=(24, 24, 3))
return Model(inputs=[x], outputs=self.call(x))
if __name__ == '__main__':
sub = subclass()
sub.model().summary()
回答2:
I guess that key point is the _init_graph_network method in the class Network, which is the parent class of Model. _init_graph_network will be called if you specify the inputs and outputs arguments when calling __init__ method.
So there will be two possible methods:
- Manually calling the
_init_graph_networkmethod to build the graph of the model. - Reinitialize with the input layer and output.
and both methods need the input layer and output (required from self.call).
Now calling summary will give the exact output shape. However it would show the Input layer, which isn't a part of subclassing Model.
from tensorflow import keras
from tensorflow.keras import layers as klayers
class MLP(keras.Model):
def __init__(self, input_shape=(32), **kwargs):
super(MLP, self).__init__(**kwargs)
# Add input layer
self.input_layer = klayers.Input(input_shape)
self.dense_1 = klayers.Dense(64, activation='relu')
self.dense_2 = klayers.Dense(10)
# Get output layer with `call` method
self.out = self.call(self.input_layer)
# Reinitial
super(MLP, self).__init__(
inputs=self.input_layer,
outputs=self.out,
**kwargs)
def build(self):
# Initialize the graph
self._is_graph_network = True
self._init_graph_network(
inputs=self.input_layer,
outputs=self.out
)
def call(self, inputs):
x = self.dense_1(inputs)
return self.dense_2(x)
if __name__ == '__main__':
mlp = MLP(16)
mlp.summary()
The output will be:
Model: "mlp_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 16)] 0
_________________________________________________________________
dense (Dense) (None, 64) 1088
_________________________________________________________________
dense_1 (Dense) (None, 10) 650
=================================================================
Total params: 1,738
Trainable params: 1,738
Non-trainable params: 0
_________________________________________________________________
来源:https://stackoverflow.com/questions/55235212/model-summary-cant-print-output-shape-while-using-subclass-model