How to initialise weights of a MLP using an autoencoder #2nd part - Deep autoencoder #3rd part - Stacked autoencoder

问题

I have built an autoencoder (1 encoder 8:5, 1 decoder 5:8) which takes the Pima-Indian-Diabetes dataset (https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv) and reduces its dimension (from 8 to 5). I would now like to use these reduced features to classify the data using an mlp. Now, here, I have some problems with the basic understanding of the architecture. How do I use the weights of the autoencoder and feed them into the mlp? I have checked these threads - https://github.com/keras-team/keras/issues/91 and https://www.codementor.io/nitinsurya/how-to-re-initialize-keras-model-weights-et41zre2g. The question here is which weight matrix should I consider? the one for the encoder part or the decoder part? When I add the layers for the mlp, how do I initialise the weights with these saved weights, not getting the exact syntax. Also, should my mlp start with 5 neurons since my reduced dimension is 5? What are the possible dimensions of the mlp for this binary classification problem? If anyone could elaborate please?

The deep autoencoder code is as follows:

# from keras.models import Sequential
from keras.layers import Input, Dense
from keras.models import Model
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy

# Data pre-processing...

# load pima indians dataset
dataset = numpy.loadtxt("C:/Users/dibsa/Python Codes/pima.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:, 0:8]
Y = dataset[:, 8]

# Split data into training and testing datasets
x_train, x_test, y_train, y_test = train_test_split(
                                X, Y, test_size=0.2, random_state=42)

# scale the data within [0-1] range
scalar = MinMaxScaler()
x_train = scalar.fit_transform(x_train)
x_test = scalar.fit_transform(x_test)

# Autoencoder code begins here...
encoding_dim1 = 5    # size of encoded representations
encoding_dim2 = 3    # size of encoded representations in the bottleneck layer

# this is our input placeholder
input_data = Input(shape=(8,))
# "encoded" is the first encoded representation of the input
encoded = Dense(encoding_dim1, activation='relu', name='encoder1')(input_data)
# "enc" is the second encoded representation of the input
enc = Dense(encoding_dim2, activation='relu', name='encoder2')(encoded)
# "dec" is the lossy reconstruction of the input
dec = Dense(encoding_dim1, activation='sigmoid', name='decoder1')(enc)
# "decoded" is the final lossy reconstruction of the input
decoded = Dense(8, activation='sigmoid', name='decoder2')(dec)
# this model maps an input to its reconstruction
autoencoder = Model(inputs=input_data, outputs=decoded)

autoencoder.compile(optimizer='sgd', loss='mse')

# training
autoencoder.fit(x_train, x_train,
            epochs=300,
            batch_size=10,
            shuffle=True,
            validation_data=(x_test, x_test))  # need more tuning

# test the autoencoder by encoding and decoding the test dataset
reconstructions = autoencoder.predict(x_test)
print('Original test data')
print(x_test)
print('Reconstructed test data')
print(reconstructions)

#The stacked autoencoder code is as follows:

# from keras.models import Sequential
from keras.layers import Input, Dense
from keras.models import Model
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy

# Data pre-processing...

# load pima indians dataset
dataset = numpy.loadtxt("C:/Users/dibsa/Python Codes/pima.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:, 0:8]
Y = dataset[:, 8]

# Split data into training and testing datasets
x_train, x_test, y_train, y_test = train_test_split(
                                X, Y, test_size=0.2, random_state=42)

# scale the data within [0-1] range
scalar = MinMaxScaler()
x_train = scalar.fit_transform(x_train)
x_test = scalar.fit_transform(x_test)

# Autoencoder code goes here...
encoding_dim1 = 5    # size of encoded representations
encoding_dim2 = 3    # size of encoded representations in the bottleneck layer

# this is our input placeholder
input_data1 = Input(shape=(8,))
# the first encoded representation of the input
encoded1 = Dense(encoding_dim1, activation='relu',
             name='encoder1')(input_data1)
# the first lossy reconstruction of the input
decoded1 = Dense(8, activation='sigmoid', name='decoder1')(encoded1)
# this model maps an input to its first layer of reconstructions
autoencoder1 = Model(inputs=input_data1, outputs=decoded1)
# this is the first encoder model
enc1 = Model(inputs=input_data1, outputs=encoded1)

autoencoder1.compile(optimizer='sgd', loss='mse')

# training
autoencoder1.fit(x_train, x_train, epochs=300,
             batch_size=10, shuffle=True,
             validation_data=(x_test, x_test))
FirstAEoutput = autoencoder1.predict(x_train)

input_data2 = Input(shape=(encoding_dim1,))
# the second encoded representations of the input
encoded2 = Dense(encoding_dim2, activation='relu',
             name='encoder2')(input_data2)
# the final lossy reconstruction of the input
decoded2 = Dense(encoding_dim1, activation='sigmoid',
             name='decoder2')(encoded2)

# this model maps an input to its second layer of reconstructions
autoencoder2 = Model(inputs=input_data2, outputs=decoded2)

# this is the second encoder
enc2 = Model(inputs=input_data2, outputs=encoded2)

autoencoder2.compile(optimizer='sgd', loss='mse')

# training
autoencoder2.fit(FirstAEoutput, FirstAEoutput, epochs=300,
             batch_size=10, shuffle=True)

# this is the overall autoencoder mapping an input to its final reconstructions
autoencoder = Model(inputs=input_data1, outputs=encoded2)
# test the autoencoder by encoding and decoding the test dataset

reconstructions = autoencoder.predict(x_test)
print('Original test data')
print(x_test)
print('Reconstructed test data')
print(reconstructions)

回答1:

So many questions. What have you tried so far? Code snippets?

If your decoder is trying to reconstruct the input, then it doesn't really make sense to me to attach your classifier to its output. I mean, why not just attach it to the input in the first time? So if you are set on using an auto-encoder, I'd say it's pretty clear that you should attach your classifier to the output of the encoder pipe.

I'm not quite sure what you mean with "use the weights of the autoencoder and feed them into the mlp". You don't feed a layer with another layer's weights, but with it's output signal. This is pretty easy to do on Keras. Let's say you defined your auto-encoder and trained it as such:

from keras Input, Model
from keras import backend as K
from keras.layers import Dense

x = Input(shape=[8])
y = Dense(5, activation='sigmoid' name='encoder')(x)
y = Dense(8, name='decoder')(y)

ae = Model(inputs=x, outputs=y)
ae.compile(loss='mse', ...)
ae.fit(x_train, x_train, ...)

K.models.save_model(ae, './autoencoder.h5')

Then you can attach a classifying layer at the encoder and create a classifier model with the following code:

# load the model from the disk if you
# are in a different execution.
ae = K.models.load_model('./autoencoder.h5')

y = ae.get_layer('encoder').output
y = Dense(1, activation='sigmoid', name='predictions')(y)

classifier = Model(inputs=ae.inputs, outputs=y)
classifier.compile(loss='binary_crossentropy', ...)
classifier.fit(x_train, y_train, ...)

That's it, really. The classifier model will now have the first embedding layer encoder of the ae model as its first layer, followed by a sigmoid decision layer predictions.

If what you are really trying to do is to use the weights learned by the auto-encoder to initialize the weights from the classifier (I'm not positive I recommend this approach):

You can take the weight matrices with layer#get_weights, prune it (because the encoder has 5 units and the classifier only has 1) and finally set the classifier weights. Something in the following lines:

w, b = ae.get_layer('encoder').get_weights()

# remove all units except by one.
neuron_to_keep = 2
w = w[:, neuron_to_keep:neuron_to_keep + 1]
b = b[neuron_to_keep:neuron_to_keep + 1]

classifier.get_layer('predictions').set_weights(w, b)

回答2:

Idavid, this is for your reference - MLP using Autoencoder reduced features. I need to understand which figure is the correct one? Sorry, I had to upload the picture as an answer as there was no option of uploading an image via comment. I think you are saying figure B is the correct one. Here is the code snippet for the same. Please let me know if I going right.

# This is a mlp classification code with features reduced by an Autoencoder

# from keras.models import Sequential
from keras.layers import Input, Dense
from keras.models import Model
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy

# Data pre-processing...

# load pima indians dataset
dataset = numpy.loadtxt("C:/Users/dibsa/Python Codes/pima.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:, 0:8]
Y = dataset[:, 8]

# Split data into training and testing datasets
x_train, x_test, y_train, y_test = train_test_split(
                                X, Y, test_size=0.2, random_state=42)

# scale the data within [0-1] range
scalar = MinMaxScaler()
x_train = scalar.fit_transform(x_train)
x_test = scalar.fit_transform(x_test)

# Autoencoder code goes here...
encoding_dim = 5    # size of our encoded representations

# this is our input placeholder
input_data = Input(shape=(8,))
# "encoded" is the encoded representation of the input
encoded = Dense(encoding_dim, activation='relu', name='encoder')(input_data)
# "decoded" is the lossy reconstruction of the input
decoded = Dense(8, activation='sigmoid', name='decoder')(encoded)
# this model maps an input to its reconstruction
autoencoder = Model(inputs=input_data, outputs=decoded)

autoencoder.compile(optimizer='sgd', loss='mse')

# training
autoencoder.fit(x_train, x_train,
            epochs=300,
            batch_size=10,
            shuffle=True,
            validation_data=(x_test, x_test))  # need more tuning

# test the autoencoder by encoding and decoding the test dataset
reconstructions = autoencoder.predict(x_test)
print('Original test data')
print(x_test)
print('Reconstructed test data')
print(reconstructions)

# MLP code goes here...
# create model

x = autoencoder.get_layer('encoder').output
# h = Dense(3, activation='relu', name='hidden')(x)
y = Dense(1, activation='sigmoid', name='predictions')(x)
classifier = Model(inputs=autoencoder.inputs, outputs=y)

# Compile model
classifier.compile(loss='binary_crossentropy', optimizer='adam',
               metrics=['accuracy'])

# Fit the model
classifier.fit(x_train, y_train, epochs=250, batch_size=10)

print('Now making predictions')
predictions = classifier.predict(x_test)
# round predictions
rounded_predicted_classes = [round(x[0]) for x in predictions]
temp = sum(y_test == rounded_predicted_classes)
acc = temp/len(y_test)
print(acc)

来源：https://stackoverflow.com/questions/49565638/how-to-initialise-weights-of-a-mlp-using-an-autoencoder-2nd-part-deep-autoenc

标签

python

tensorflow

keras

autoencoder