LSTM 3D input 3D output dimension mismatch - keras

I have a simple LSTM network in Keras:
def create_model(x):
lstm_model = Sequential()
lstm_model.add(LSTM(100, input_dim=x.shape[2], input_length=x.shape[1]))
lstm_model.add(Dense(1,activation='sigmoid'))
lstm_model.compile(loss='mean_squared_error', optimizer='adam')
return lstm_model
and I am trying to train it on the data that has the following shapes:
training data input: (100, 2784, 6), training data output: (100, 2784, 1)
validation data input: (50, 27, 6), validation data output: (50, 27, 1)
test data input: (50, 27, 6), test data output: (50, 27, 1)
model.fit(train_x, train_y, validation_data=(validation_x, validation_y), epochs=EPOCHS, batch_size=BATCH_SIZE, shuffle=False, callbacks=[...])
I keep on failing on dimensions either because a) validation data is not the same shape as the training data or b) because the y shapes are not right
What am I doing wrong?
P.S. Standalone code for convenience
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import LSTM, Dense
x_train=np.arange(1670400)
x_train=x_train.reshape((100, 2784, 6))
y_train=np.arange(278400)
y_train=y_train.reshape((100, 2784, 1))
x_val=np.arange(8100)
x_val=x_val.reshape((50, 27, 6))
y_val=np.arange(1350)
y_val=y_val.reshape((50, 27, 1))
x_test=np.arange(8100)
x_test=x_test.reshape((50, 27, 6))
y_test=np.arange(1350)
y_test=y_test.reshape((50, 27, 1))
def create_model(x):
lstm_model = Sequential()
lstm_model.add(LSTM(100, input_dim=x.shape[2], input_length=x.shape[1]))
lstm_model.add(Dense(1,activation='sigmoid'))
lstm_model.compile(loss='mean_squared_error', optimizer='adam')
return lstm_model
model=create_model(x_train)
model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=10, batch_size=32, shuffle=False)

This modification worked:
changing to batch_input_shape and adding return_sequences=True to the LSTM layer
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import LSTM, Dense
x_train=np.arange(1670400)
x_train=x_train.reshape((100, 2784, 6))
y_train=np.arange(278400)
y_train=y_train.reshape((100, 2784, 1))
x_val=np.arange(8100)
x_val=x_val.reshape((50, 27, 6))
y_val=np.arange(1350)
y_val=y_val.reshape((50, 27, 1))
x_test=np.arange(8100)
x_test=x_test.reshape((50, 27, 6))
y_test=np.arange(1350)
y_test=y_test.reshape((50, 27, 1))
def create_model():
lstm_model = Sequential()
lstm_model.add(LSTM(100, batch_input_shape=(None,None,6), return_sequences=True))
lstm_model.add(Dense(1, activation='sigmoid'))
lstm_model.compile(loss='mean_squared_error', optimizer='adam')
return lstm_model
model=create_model()
model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=10, batch_size=32, shuffle=False)

Related

Q: ValueError Keras expected conv2d_14_input to have shape (3, 12, 1) but got array with shape (3, 12, 6500)?

I am building a CNN for non image data in Keras 2.1.0 on Window 10.
My input feature is a 3x12 matrix of non negative number and my output is a binary multi-label vector with length 6x1
And I was running into this error expected conv2d_14_input to have shape (3, 12, 1) but got array with shape (3, 12, 6500)
Here is my code below
import tensorflow as tf
from scipy.io import loadmat
import numpy as np
from tensorflow.keras.layers import BatchNormalization
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten
reshape_channel_train = loadmat('reshape_channel_train')
reshape_channel_test = loadmat('reshape_channel_test.mat')
reshape_label_train = loadmat('reshape_label_train')
reshape_label_test = loadmat('reshape_label_test')
X_train = reshape_channel_train['store_train']
X_test = reshape_channel_test['store_test']
X_train = np.expand_dims(X_train,axis = 0)
X_test = np.expand_dims(X_test, axis = 0)
Y_train = reshape_label_train['label_train']
Y_test = reshape_label_test['label_test']
classifier = Sequential()
classifier.add(Conv2D(8, kernel_size=(3,3) , input_shape=(3, 12, 1), padding="same"))
classifier.add(BatchNormalization())
classifier.add(Activation('relu'))
classifier.add(Conv2D(8, kernel_size=(3,3), input_shape=(3, 12, 1), padding="same"))
classifier.add(BatchNormalization())
classifier.add(Activation('relu'))
classifier.add(Flatten())
classifier.add(Dense(8, activation='relu'))
classifier.add(Dense(6, activation='sigmoid'))
classifier.compile(optimizer='nadam', loss='binary_crossentropy', metrics=['accuracy'])
history = classifier.fit(X_train, Y_train, batch_size = 32, epochs=100,
validation_data=(X_test, Y_test), verbose=2)
After some searching, I have use the dimension expanding trick but it seem not to work
X_train = np.expand_dims(X_train,axis = 0)
X_test = np.expand_dims(X_test, axis = 0)
The X_train variable containing 6500 training instances is loaded from a Matlab .mat file with dimension 3x12x6500.
Where each training instance is a 3x12 matrix.
Before using the expand_dim tricks, the k-th training sample could be invoke by X_train[:,:,k] and X_train[:,:,k].shape would return (3,12). Also X_train.shape would return (3, 12, 6500)
After using the expand_dim tricks the command X_train[:,:,k].shape would return (1, 3, 6500)
Please help me with this !
Thank you
you manage your data wrongly. A Conv2D layer accepts data in this format (n_sample, height, width, channels) which in your case (for your X_train) became (6500,3,12,1). you need to simply reconduct to this case
# create data as in your matlab data
n_class = 6
n_sample = 6500
X_train = np.random.uniform(0,1, (3,12,n_sample)) # (3,12,n_sample)
Y_train = tf.keras.utils.to_categorical(np.random.randint(0,n_class, n_sample)) # (n_sample, n_classes)
# reshape your data for conv2d
X_train = X_train.transpose(2,0,1) # (n_sample,3,12)
X_train = np.expand_dims(X_train, -1) # (n_sample,3,12,1)
classifier = Sequential()
classifier.add(Conv2D(8, kernel_size=(3,3) , input_shape=(3, 12, 1), padding="same"))
classifier.add(BatchNormalization())
classifier.add(Activation('relu'))
classifier.add(Conv2D(8, kernel_size=(3,3), padding="same"))
classifier.add(BatchNormalization())
classifier.add(Activation('relu'))
classifier.add(Flatten())
classifier.add(Dense(8, activation='relu'))
classifier.add(Dense(n_class, activation='softmax'))
classifier.compile(optimizer='nadam', loss='categorical_crossentropy', metrics=['accuracy'])
history = classifier.fit(X_train, Y_train, batch_size = 32, epochs=2, verbose=2)
# get predictions
pred = np.argmax(classifier.predict(X_train), 1)
I also use a softmax activation with categorical_crossentropy which is more suited for multiclass problem but you can also modify this. remember to applicate the same data manipulation also on your test data
you need to pass data_format="channels_last" argument, bcoz your channels are at last
you try this:
x_train=x_train.reshape((6500,3,12,1))
x_test=x_test.reshape((-1,3,12,1))
and in each of conv2d layer conv2D(<other args>, data_format="channels_last")

ValueError: Dimension 1 in both shapes must be equal, but are 10 and 1

I was trying to implement shap DeepExplainer (DeepSHAP) module with the help of MNIST Dataset. But i am getting the following error:
ValueError: Dimension 1 in both shapes must be equal, but are 10 and 1. Shapes are [?,10] and [?,1]. for gradients_7/dense_2_1/Softmax_grad/gradients/gradients_7/dense_2_1/Softmax_grad/truediv_grad/Select_1 (op: 'Select') with input shapes: [?,1], [?,10], [?,10].
The error is in the line:
shap_values = e.shap_values(x_test[1:5])
Full code:
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
batch_size = 128
num_classes = 10
epochs = 12
# input image dimensions
img_rows, img_cols = 28, 28
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
import shap
import numpy as np
# select a set of background examples to take an expectation over
background = x_train[np.random.choice(x_train.shape[0], 100, replace=False)]
# explain predictions of the model on three images
e = shap.DeepExplainer(model, background)
shap_values = e.shap_values(x_test[1:5])
shap.image_plot(shap_values, -x_test[1:5])
Which Keras version you are using? You can print from your code keras.__version__
When I imported keras under Tensorflow, your code worked as expected. One of the major change is using adam optimizer which works better in your case as it is able to improve accuracy within few epochs. I tried Adadelta but it was slowly optimizing. check the performance yourself.
Check the following code for small modifications (not much). All i did was to import modules under Tensorflow as keas 2.3.1 was throwing some other error.
from __future__ import print_function
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras import backend as K
batch_size = 256
num_classes = 10
epochs = 12
# input image dimensions
img_rows, img_cols = 28, 28
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='relu', input_shape=input_shape,))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=tf.keras.losses.categorical_crossentropy,
optimizer=tf.keras.optimizers.Adam(0.001),#tf.keras.optimizers.Adadelta(),
metrics=['accuracy'])
model.fit(x_train, y_train,
steps_per_epoch = x_train.shape[0]//batch_size,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test),validation_steps=x_test.shape[0]//batch_size)
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Error fitting the model - expected conv2d_3_input to have 4 dimensions

I am writing to build a model to predict handwritten characters using the dataset given here (https://www.kaggle.com/sachinpatel21/az-handwritten-alphabets-in-csv-format)
EDIT: ( after making the changes suggested in the comments )
Error I get now : ValueError: Error when checking input: expected conv2d_4_input to have shape (28, 28, 1) but got array with shape (249542, 784, 1)
Find below the code for the CNN :
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras import backend as K
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
seed = 785
np.random.seed(seed)
dataset = np.loadtxt('../input/A_Z Handwritten Data/A_Z Handwritten Data.csv', delimiter=',')
print(dataset.shape) # (372451, 785)
X = dataset[:,1:785]
Y = dataset[:,0]
(X_train, X_test, Y_train, Y_test) = train_test_split(X, Y, test_size=0.33, random_state=seed)
X_train = X_train / 255
X_test = X_test / 255
X_train = X_train.reshape((-1, X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((-1, X_test.shape[0], X_test.shape[1], 1))
print(X_train.shape) # (1, 249542, 784, 1)
Y_train = np_utils.to_categorical(Y_train)
Y_test = np_utils.to_categorical(Y_test)
print(Y_test.shape) # (122909, 26)
num_classes = Y_test.shape[1] # 26
model = Sequential()
model.add(Conv2D(32, (5, 5), input_shape=(28, 28, 1), activation='relu', data_format="channels_last"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print("DONE")
model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=10, batch_size=256, verbose=2)
# Final evaluation of the model
scores = model.evaluate(X_test,Y_test, verbose=0)
print("CNN Error: %.2f%%" % (100-scores[1]*100))
model.save('weights.model')
So the problem is that your data isn't structured properly. Look at the solution below:
Read the data with pandas:
data = pd.read_csv('/users/vpolimenov/Downloads/A_Z Handwritten Data.csv')
data.shape
# shape: (372450, 785)
Get your X and y:
data.rename(columns={'0':'label'}, inplace=True)
X = data.drop('label',axis = 1)
y = data['label']
Split and scale:
X_train, X_test, y_train, y_test = train_test_split(X,y)
standard_scaler = MinMaxScaler()
standard_scaler.fit(X_train)
X_train = standard_scaler.transform(X_train)
X_test = standard_scaler.transform(X_test)
Here is the magic:
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32')
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
X_train.shape
# (279337, 28, 28, 1)
Here is your model:
num_classes = y_test.shape[1] # 26
model = Sequential()
model.add(Conv2D(32, (5, 5), input_shape=(28, 28, 1), activation='relu', data_format="channels_last"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print("DONE")
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=256, verbose=2) # WHERE I GET THE ERROR
Summary of your model:
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_25 (Conv2D) (None, 24, 24, 32) 832
_________________________________________________________________
max_pooling2d_25 (MaxPooling (None, 12, 12, 32) 0
_________________________________________________________________
dropout_1 (Dropout) (None, 12, 12, 32) 0
_________________________________________________________________
flatten_25 (Flatten) (None, 4608) 0
_________________________________________________________________
dense_42 (Dense) (None, 128) 589952
_________________________________________________________________
dense_43 (Dense) (None, 26) 3354
=================================================================
Total params: 594,138
Trainable params: 594,138
Non-trainable params: 0
I've stopped it after the second epoch, but you can see it working:
Train on 279337 samples, validate on 93113 samples
Epoch 1/10
- 80s - loss: 0.2478 - acc: 0.9308 - val_loss: 0.1021 - val_acc: 0.9720
Epoch 2/10
- 273s - loss: 0.0890 - acc: 0.9751 - val_loss: 0.0716 - val_acc: 0.9803
Epoch 3/10
Note:
It takes so long to fit due to the huge number of parameters in your network. You can try to reduce those and get a much faster/efficient network.

How to change the Keras sample code model.fit to generator manner?

I'm trying to implement a CNN+RNN+LSTM structure(1) with Keras.
And I found a related Keras sample code.
How can I convert the model.fit to model.fit_generator correctly?
Original code:
from keras.models import Sequential
from keras.layers import Activation, MaxPooling2D, Dropout, LSTM, Flatten, Merge, TimeDistributed
import numpy as np
from keras.layers import Concatenate
from keras.layers.convolutional import Conv2D
# Generate fake data
# Assumed to be 1730 grayscale video frames
x_data = np.random.random((1730, 1, 8, 10))
sequence_lengths = None
Izda=Sequential()
Izda.add(TimeDistributed(Conv2D(40,(3,3),padding='same'), input_shape=(sequence_lengths, 1,8,10)))
Izda.add(Activation('relu'))
Izda.add(TimeDistributed(MaxPooling2D(data_format="channels_first", pool_size=(2, 2))))
Izda.add(Dropout(0.2))
Dcha=Sequential()
Dcha.add(TimeDistributed(Conv2D(40,(3,3),padding='same'), input_shape=(sequence_lengths, 1,8,10)))
Dcha.add(Activation('relu'))
Dcha.add(TimeDistributed(MaxPooling2D(data_format="channels_first", pool_size=(2, 2))))
Dcha.add(Dropout(0.2))
Frt=Sequential()
Frt.add(TimeDistributed(Conv2D(40,(3,3),padding='same'), input_shape=(sequence_lengths, 1,8,10)))
Frt.add(Activation('relu'))
Frt.add(TimeDistributed(MaxPooling2D(data_format="channels_first", pool_size=(2, 2))))
Frt.add(Dropout(0.2))
merged=Merge([Izda, Dcha,Frt], mode='concat', concat_axis=2)
#merged=Concatenate()([Izda, Dcha, Frt], axis=2)
# Output from merge is (batch_size, sequence_length, 120, 4, 5)
# We want to get this down to (batch_size, sequence_length, 120*4*5)
model=Sequential()
model.add(merged)
model.add(TimeDistributed(Flatten()))
model.add(LSTM(240, return_sequences=True))
model.compile(loss='mse', optimizer='adam')
model.summary()
After my modification:
from keras.models import Sequential
from keras.layers import Activation, MaxPooling2D, Dropout, LSTM, Flatten, Merge, TimeDistributed
import numpy as np
from keras.layers import Concatenate
from keras.layers.convolutional import Conv2D
# Generate fake data
# Assumed to be 1730 grayscale video frames
x_data = np.random.random((1730, 1, 8, 10))
sequence_lengths = None
def defModel():
model=Sequential()
model.add(TimeDistributed(Conv2D(40,(3,3),padding='same'), input_shape=(sequence_lengths, 1,8,10)))
model.add(Activation('relu'))
model.add(TimeDistributed(MaxPooling2D(data_format="channels_first", pool_size=(2, 2))))
model.add(Dropout(0.2))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(240, return_sequences=True))
model.compile(loss='mse', optimizer='adam')
model.summary()
return model
def gen():
for i in range(1730):
x_train = np.random.random((1, 8, 10))
y_train = np.ones((15, 240))
yield (x_train, y_train)
def main():
model = defModel()
# Slice our long, single sequence up into shorter sequeunces of images
# Let's make 50 examples of 15 frame videos
x_train = []
seq_len = 15
for i in range(50):
x_train.append(x_data[i*5:i*5+seq_len, :, :, :])
x_train = np.asarray(x_train, dtype='float32')
print(x_train.shape)
# >> (50, 15, 1, 8, 10)
model.fit_generator(
generator = gen(),
steps_per_epoch = 1,
epochs = 2)
if __name__ == "__main__":
main()
How can I resolve this error produce from by my modification?
ValueError: Error when checking input: expected
time_distributed_1_input to have 5 dimensions, but got array with
shape (1, 8, 10)
(1) Wang, S., Clark, R., Wen, H., & Trigoni, N. (2017). DeepVO: Towards end-to-end visual odometry with deep Recurrent Convolutional Neural Networks. Proceedings - IEEE International Conference on Robotics and Automation, 2043–2050.
Update: Concatenate CNN and LSTM as sample code
model.add(TimeDistributed(Conv2D(16, (7, 7),padding='same'),input_shape=(None, 540, 960, 1)))
model.add(Activation('relu'))
model.add(TimeDistributed(Conv2D(32, (5, 5),padding='same'))) model.add(Activation('relu'))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(num_classes, return_sequences=True))
Got error
ValueError: Error when checking target: expected lstm_1 to have 3 dimensions, but got array with shape (4, 3)
Update2
The goal is to extract image feature by CNN, then combine 3 feature from 3 images and feed into LSTM.
Goal
#Input image
(540, 960, 1) ==> (x,y,ch) ==> CNN ==> (m,n,k)┐
(540, 960, 1) ==> (x,y,ch) ==> CNN ==> (m,n,k)---> (3, m,n,k) --flatten--> (3, mnk)
(540, 960, 1) ==> (x,y,ch) ==> CNN ==> (m,n,k)」
(3, mnk) => LSTM => predict three regression value
Model
model = Sequential()
model.add(TimeDistributed(Conv2D(16, (7, 7), padding='same'),input_shape=(None, 540, 960, 1)))
model.add(Activation('relu'))
model.add(TimeDistributed(Conv2D(32, (5, 5), padding='same')))
model.add(Activation('relu'))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(num_classes, return_sequences=True))
model.compile(loss='mean_squared_error', optimizer='adam')
The generator
a = readIMG(filenames[start]) # (540, 960, 1)
b = readIMG(filenames[start + 1]) # (540, 960, 1)
c = readIMG(filenames[start + 2]) # (540, 960, 1)
x_train = np.array([[a, b, c]]) # (1, 3, 540, 960, 1)
Then I still got the error:
ValueError: Error when checking target: expected lstm_1 to have 3 dimensions, but got array with shape (1, 3)
The problem is a plain shape mismatch problem.
You defined input_shape=(sequence_lengths, 1,8,10), so your model is expecting five dimensions as input: (batch_size, sequence_lengths, 1, 8, 10)
All you need is to make your generator output the correct shapes with 5 dimensions.
def gen():
x_data = np.random.random((numberOfVideos, videoLength, 1, 8, 10))
y_data = np.ones((numberOfVideos, videoLength, 240))
for video in range(numberOfVideos):
x_train = x_data[video:video+1]
y_train = y_data[video:video+1]
yield (x_train, y_train)
Here is the working example of CNNLSTM using generator: https://gist.github.com/HTLife/25c0cd362faa91477b8f28f6033adb45

Autoencoder for sound data in Keras

I have a 2d array of log-scaled mel-spectrograms of sound samples for 5 different categories.
For training I have used convolutional and dense neural network in Keras. Here the code:
model = Sequential()
model.add(Conv1D(80, 8, activation='relu', padding='same',input_shape=(60,108)))
model.add(MaxPooling1D(2,padding='same',strides=None))
model.add(Flatten())
initializer=initializers.TruncatedNormal()
model.add(Dense(200, activation='relu', kernel_initializer=initializer,bias_initializer=initializer))
model.add(BatchNormalization())
model.add(Dropout(0.8))
model.add(Dense(50, activation='relu', kernel_initializer=initializer,bias_initializer=initializer))
model.add(Dropout(0.8))
model.add(Dense(5, activation='softmax', kernel_initializer=initializer,bias_initializer=initializer))
model.compile(loss='categorical_crossentropy',
optimizer='adam',lr=0.01,
metrics=['accuracy'])
What kind of autoencoder can I apply to this type of data input? What model? Any suggestion or also code example would be helpful. :)
Since I don’t have answers to my question about the nature of the data, I will assume that we have set of 2 dimensional data with the shape like (NSamples, 68, 108). Also, I assume that answer on my suggestion to use Convolutional2D instead Convolutional1D is yes
Here is sample of models for convolutional auto encoder, model, which can use a trained auto encoder and how to use weights from an auto encoder for the final model:
from keras.layers.core import Dense, Dropout, Flatten, Reshape
from keras.layers import Conv1D, Conv2D, Deconv2D, MaxPooling1D, MaxPooling2D, UpSampling2D, Conv2DTranspose, Flatten, BatchNormalization, Dropout
from keras.callbacks import ModelCheckpoint
import keras.models as models
import keras.initializers as initializers
from sklearn.model_selection import train_test_split
ae = models.Sequential()
#model.add(Conv1D(80, 8, activation='relu', padding='same',input_shape=(60,108)))
#encoder
c = Conv2D(80, 3, activation='relu', padding='same',input_shape=(60, 108, 1))
ae.add(c)
ae.add(MaxPooling2D(pool_size=(2, 2), padding='same', strides=None))
ae.add(Flatten())
initializer=initializers.TruncatedNormal()
d1 = Dense(200, activation='relu', kernel_initializer=initializer,bias_initializer=initializer)
ae.add(d1)
ae.add(BatchNormalization())
ae.add(Dropout(0.8))
d2 = Dense(50, activation='relu', kernel_initializer=initializer,bias_initializer=initializer)
ae.add(d2)
ae.add(Dropout(0.8))
#decodser
ae.add(Dense(d2.input_shape[1], activation='sigmoid'))
ae.add(Dense(d1.input_shape[1], activation='sigmoid'))
ae.add(Reshape((30, 54, 80)))
ae.add(UpSampling2D((2,2)))
ae.add(Deconv2D(filters= c.filters, kernel_size= c.kernel_size, strides=c.strides, activation=c.activation, padding=c.padding, ))
ae.add(Deconv2D(filters= 1, kernel_size= c.kernel_size, strides=c.strides, activation=c.activation, padding=c.padding, ))
ae.compile(loss='binary_crossentropy',
optimizer='adam',lr=0.001,
metrics=['accuracy'])
ae.summary()
#now train your convolutional autoencoder to reconstruct your input data
#reshape your data to (NSamples, 60, 108, 1)
#Then train your autoencoder. it can be something like that:
#X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=43)
#pre_mcp = ModelCheckpoint("CAE.hdf5", monitor='val_accuracy', verbose=2, save_best_only=True, mode='max')
#pre_history = ae.fit(X_train, X_train, epochs=100, validation_data=(X_val, X_val), batch_size=22, verbose=2, callbacks=[pre_mcp])
#model
model = models.Sequential()
#model.add(Conv1D(80, 8, activation='relu', padding='same',input_shape=(60,108)))
model.add(Conv2D(80, 3, activation='relu', padding='same',input_shape=(60, 108, 1)))
model.add(MaxPooling2D(pool_size=(2, 2), padding='same',strides=None))
model.add(Flatten())
initializer=initializers.TruncatedNormal()
model.add(Dense(200, activation='relu', kernel_initializer=initializer,bias_initializer=initializer))
model.add(BatchNormalization())
model.add(Dropout(0.8))
model.add(Dense(50, activation='relu', kernel_initializer=initializer,bias_initializer=initializer))
model.add(Dropout(0.8))
model.add(Dense(5, activation='softmax', kernel_initializer=initializer,bias_initializer=initializer))
model.compile(loss='categorical_crossentropy',
optimizer='adam',lr=0.001,
metrics=['accuracy'])
#Set weights
model.layers[0].set_weights(ae.layers[0].get_weights())
model.layers[3].set_weights(ae.layers[3].get_weights())
model.layers[4].set_weights(ae.layers[4].get_weights())
model.layers[6].set_weights(ae.layers[6].get_weights())
model.summary()
#Now you can train your model with pre-trained weights from autoencoder
A model like this was useful for me with MNIST dataset and improved accuracy of model with initial weights from auto encoder in comparison with model initialized with random weights
However, I would recommend using of several convolutional/deconvolutional layers, probably 3 or more, since from my experience convolutional auto encoders with 3 and more convolutional layers are more efficient than with 1 convolutional layer. In fact, with one convolutional layer I can’t even see any accuracy improvements sometimes
Update:
I checked auto encoder with data provided by Emanuela, also I checked it with different auto encoders architectures without any success
My hypothesis about that is that the data doesn’t contain any significant features, which can be distinguished by auto encoder or even CAE
However, it looks like my assumption about 2 dimensional nature of the data was confirmed by reaching of almost 99.99% validation accuracy:
Nevertheless, in the same time, 97.31% accuracy of training data can indicate potential issues with dataset, so it looks like a good idea to revise it
In addition, I would suggest using ensembles of networks. You could train, for example 10 networks with different validation data and assign a category for items by the most voted categories
Here is my code:
from keras.layers.core import Dense, Dropout, Flatten
from keras.layers import Conv2D, BatchNormalization
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import keras.models as models
import keras.initializers as initializers
import msgpack
import numpy as np
with open('SoundDataX.msg', "rb") as fx,open('SoundDataY.msg', "rb") as fy:
dataX=msgpack.load(fx)
dataY=msgpack.load(fy)
num_samples = len(dataX)
x = np.empty((num_samples, 60, 108, 1), dtype = np.float32)
y = np.empty((num_samples, 4), dtype = np.float32)
for i in range(0, num_samples):
x[i] = np.asanyarray(dataX[i]).reshape(60, 108, 1)
y[i] = np.asanyarray(dataY[i])
X_train, X_val, y_train, y_val = train_test_split(x, y, test_size=0.2, random_state=43)
#model
model = models.Sequential()
model.add(Conv2D(128, 3, activation='relu', padding='same',input_shape=(60, 108, 1)))
model.add(Conv2D(128, 5, activation='relu', padding='same',input_shape=(60, 108, 1)))
model.add(Conv2D(128, 7, activation='relu', padding='same',input_shape=(60, 108, 1)))
model.add(Flatten())
initializer=initializers.TruncatedNormal()
model.add(Dense(200, activation='relu', kernel_initializer=initializer,bias_initializer=initializer))
model.add(BatchNormalization())
model.add(Dropout(0.8))
model.add(Dense(50, activation='relu', kernel_initializer=initializer,bias_initializer=initializer))
model.add(Dropout(0.8))
model.add(Dense(4, activation='softmax', kernel_initializer=initializer,bias_initializer=initializer))
model.compile(loss='categorical_crossentropy',
optimizer=Adam(lr=0.0001),
metrics=['accuracy'])
model.summary()
filepath="weights-{epoch:02d}-{val_acc:.7f}-{acc:.7f}.hdf5"
mcp = ModelCheckpoint(filepath, monitor='val_acc', verbose=2, save_best_only=True, mode='max')
history = model.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val), batch_size=64, verbose=2, callbacks=[mcp])

Resources