How to do sequence classification using LSTM? - keras

I'm working on a human activity recognition problem using depth stream.
Each sample is a Matlab file of size (20,3,no_of_frames), i.e., there are 20 rows and 3 cols in every frame and the number of frames can vary for different samples.
I have padded all the samples with 0's so that all of them contains same no of frames (say 100).
So now all samples are of size (20,3,100).
Also let the total no of samples are 400 and no of classes be 10.
How can I arrange my dataset so as to use LSTM in Keras. Would you also suggest some basic LSTM model for the classification purpose?

Here is my code to create dataset:
clc;
clear all;
ctr=0;
longest=326; % length of longest sequence
gyroData = zeros(431,longest*3);
gyroLabel=zeros(431,1);
for a=1:27
for s=1:2:7
for t=1:4
fname = strcat('a', int2str(a), '_s', int2str(s), '_t', int2str(t),'_inertial.mat');
if exist(fname)
load(fname);
d_iner = d_iner(:,1:3);
r = size(d_iner,1);
d_iner = cat(1,d_iner,zeros(longest-r,3)); % do zero padding to longest sequence
ctr = ctr+1;
d_iner = d_iner';
gyroData(ctr,:,:) = d_iner(:);
gyroLabel(ctr,1)=a-1;
end
end
end
end
n1 = randperm(ctr);
for i=1:ctr
if i==1
x1=gyroData(n1(i),:);
y1=gyroLabel(n1(i),1);
else
x1=cat(1,x1,gyroData(n1(i),:));
y1=cat(1,y1,gyroLabel(n1(i),1));
end
end
%%
ctr=0;
gyroData = zeros(430,longest*3);
gyroLabel=zeros(430,1);
for a=1:27
for s=2:2:8
for t=1:4
fname = strcat('a', int2str(a), '_s', int2str(s), '_t', int2str(t),'_inertial.mat');
if exist(fname)
load(fname);
d_iner = d_iner(:,1:3);
r = size(d_iner,1);
d_iner = cat(1,d_iner,zeros(longest-r,3)); % do zero padding to longest sequence
ctr = ctr+1;
d_iner = d_iner';
gyroData(ctr,:,:) = d_iner(:);
gyroLabel(ctr,1)=a-1;
end
end
end
end
n1 = randperm(ctr);
for i=1:ctr
if i==1
x2=gyroData(n1(i),:);
y2=gyroLabel(n1(i),1);
else
x2=cat(1,x2,gyroData(n1(i),:));
y2=cat(1,y2,gyroLabel(n1(i),1));
end
end
save('inertial_padded.mat', 'x1', 'y1', 'x2', 'y2');
**And this is my LSTM code in Keras**
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import scipy.io
# fix random seed for reproducibility
np.random.seed(7)
mat = scipy.io.loadmat('inertial_padded.mat')
x_train = mat['x1']
y_train = mat['y1']
x_test = mat['x2']
y_test = mat['y2']
data_dim = 3
timesteps = 326
num_classes = 27
x_train = x_train.reshape(x_train.shape[0], 326,3)
x_test = x_test.reshape(x_test.shape[0], 326, 3)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
model = Sequential()
model.add(LSTM(32, input_shape=(timesteps, data_dim), activation='sigmoid'))
#model.add(LSTM(32, activation='tanh')) # returns a sequence of vectors of dimension 32
#model.add(LSTM(32)) # return a single vector of dimension 32
model.add(Dense(27, activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
print(model.summary())
model.fit(x_train, y_train,
batch_size=16, epochs=25,
validation_data=(x_test, y_test))
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Related

Keras: Multiclass CNN Text Classifier predicts the same class for all input data

I am trying to predict one out of 8 classes for some short text data with word embeddings and CNN. The occuring problem is that the CNN Classifier predicts everything in one class (the biggest one of the training data, distribution ~40%). The text data should be preprocesed quite well, because the classification works fine with other classifiers like SVM or NB.
'''
#first, build index mapping words in the embeddings set to their embedding vector
import os
embeddings_index = {}
f = open(os.path.join('', 'embedding_word2vec.txt'), encoding='utf-8')
for line in f:
values = line.split()
words = values[0]
coefs = np.asarray(values[1:])
embeddings_index[word] = coefs
f.close()
#vectorize text samples in 2D Integer Tensor
from tensorflow.python.keras.preprocessing.text import Tokenizer
tokenizer_obj = Tokenizer()
tokenizer_obj.fit_on_texts(stemmed_text)
sequences = tokenizer_obj.texts_to_sequences(stemmed_text)
#pad sequences
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
max_length = max([len(s.split(',')) for s in total_texts])
print('MaxLength :', max_length)
word_index = tokenizer_obj.word_index
print('Found %s unique Tokens.' % len(word_index))
text_pad = pad_sequences(sequences)
labels = to_categorical(np.asarray(labels))
print('Shape of label tensor:', labels.shape)
print('Shape of Text Tensor: ', text_pad.shape)
embedding_dim = 100
num_words = len(word_index) + 1
#prepare embedding matrix
embedding_matrix = np.zeros((vocab_size, embedding_dim))
for index, word in enumerate(vocab):
embedding_vector = w2v.wv[word]
if embedding_vector is not None:
embedding_matrix[index] = embedding_vector
#train test split
validation_split = 0.2
indices = np.arange(text_pad.shape[0])
np.random.shuffle(indices)
text_pad = text_pad[indices]
labels = labels[indices]
num_validation_samples = int(validation_split * text_pad.shape[0])
x_train = text_pad[:-num_validation_samples]
y_train = labels[:-num_validation_samples]
x_test = text_pad[-num_validation_samples:]
y_test = labels[-num_validation_samples:]
from keras.models import Sequential
from keras.layers.core import Dense, Flatten, Dropout
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.layers.embeddings import Embedding
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_length,
#embeddings_initializer = Constant(embedding_matrix),
weights=[embedding_matrix],
trainable=False))
model.add(Dropout(0.2))
model.add(Conv1D(filters=64, kernel_size=5, activation='relu'))
model.add(MaxPooling1D(pool_size=4))
#model.add(Conv1D(filters=64, kernel_size=5, activation='relu'))
#model.add(MaxPooling1D(pool_size=4))
model.add(Flatten())
model.add(Dense(8, activation='sigmoid'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(x_train, y_train,
epochs=25,
validation_data=(x_test, y_test))
print(model.summary())
'''
I am grateful for every hint.
Thank you.

Model fit after each "fold" in LOOCV? Multilabel LOOCV by hand

I am wondering if it is an issue (possible data leakage?) when implementing leave one out cross validation by hand if the model is fit each iteration after testing on each fold? It seems like if the model is trained on all data except for "X" and after testing on "X" the model is trained on all data other than "Y" and tested on "Y" it has seen "Y" on the first iteration. Is this actually a problem, and does my implementation of LOOCV by hand appear to be correct?
Thanks for your time!
i = 0
j = 0
for i in range(0, 41):
X_copy = X_orig[(i):(i+1)] #Slice the ith element from the numpy array
y_copy = y_orig[(i):(i+1)]
X_model = X_orig
y_model = y_orig
X_model = np.delete(X_model, i, axis = 0)
y_model = np.delete(y_model, i, axis = 0)
model.fit(X_model, y_model, epochs=115, batch_size=28, verbose = 0) #verbose = 0 removes learning info
prediction = model.predict(X_copy)
prediction[prediction>=0.5] = 1
prediction[prediction<0.5] = 0
print(prediction, y_copy)
if np.array_equal(y_copy, prediction):
j = j + 1
#print(y_copy, prediction)
if np.not_equal:
#print(y_copy, prediction)
pass
print(j/41) #For 41 samples in dataset
Why don't you use this?
from sklearn.model_selection import LeaveOneOut
loo = LeaveOneOut()
model =...
test_fold_predictions = []
for train_index, test_index in loo.split(X):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
model.fit(X_train, y_train)
test_fold_predictions.append(model.predict(X_test))
EDIT
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
model = Sequential()
model.add(Dense(5000, activation='relu', input_dim=X_train.shape[1]))
model.add(Dropout(0.1))
model.add(Dense(600, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(y_train.shape[1], activation='sigmoid'))
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='binary_crossentropy',
optimizer=sgd)
from sklearn.model_selection import LeaveOneOut
loo = LeaveOneOut()
test_fold_predictions = []
for train_index, test_index in loo.split(X):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
model.fit(X_train, y_train, epochs=5, batch_size=2000)
test_fold_predictions.append(model.predict(X_test))

Writing my own FGSM Adversarial attack always gives zero perturbation

I'm trying to write a simple FGSM attack on MNIST. I tried the foolbox library and it seems to work but the FGSM is very slow (perhaps because it searches for a minimum eps/perturbation which gives a different target label). I started writing my own and my code always gives me zero perturbation. I.e., if I plot the x_adversarial it is the same as x_input. I checked that the gradient computation leads all zero matrix. The computed loss function is small, but I imagine there is some gradient to this loss function. Can somebody please help me? I have been racking my head for a week now without any progress. Thanks again!
import tensorflow as tf
import numpy as np
import foolbox
import tensorflow.keras.backend as K
import matplotlib.pyplot as plt
# Importing the required Keras modules containing model and layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
num_digits_to_classify = 10
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
input_shape = (28, 28, 1)
# Making sure that the values are float so that we can get decimal points after division
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
# Normalizing the RGB codes by dividing it to the max RGB value.
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print('Number of images in x_train', x_train.shape[0])
print('Number of images in x_test', x_test.shape[0])
def create_model_deep():
model = Sequential()
model.add(Conv2D(32, kernel_size=(5,5), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Conv2D(64,kernel_size=(5,5),activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten()) # Flattening the 2D arrays for fully connected layers
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_digits_to_classify,activation='softmax'))
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model
model = create_model_deep()
model.summary()
model.fit(x=x_train,y=y_train, epochs=10)
model.evaluate(x_test, y_test)
########################## foolbox FGSM attack ###############################################
from keras.backend import set_learning_phase
set_learning_phase(0)
from foolbox.criteria import Misclassification
fmodel = foolbox.models.TensorFlowModel.from_keras(model, bounds=(0,1))
attack = foolbox.attacks.FGSM(fmodel, criterion=Misclassification())
fgsm_error = 0.0
for i in range(x_test.shape[0]):
if i%1000 == 0:
print(i)
adversarial = attack(x_test[i],y_test[i])
if adversarial is not None:
adversarial = adversarial.reshape(1,28,28,1)
model_predictions = model.predict(adversarial)
label = np.argmax(model_predictions)
if label != y_test[i]:
fgsm_error = fgsm_error + 1.0
fgsm_error = fgsm_error/x_test.shape[0]
########################## My own FGSM attack ###############################################
sess = K.get_session()
eps = 0.3
x_adv = tf.placeholder(tf.float32,shape=(1,28,28,1),name="adv_example")
x_noise = tf.placeholder(tf.float32,shape=(1,28,28,1),name="adv_noise")
x_input = x_test[0].reshape(1,28,28,1)
y_input = y_test[0]
def loss_fn(y_true, y_pred):
return K.sparse_categorical_crossentropy(y_true, y_pred)
grad = K.gradients(loss_fn(y_input,model.output), model.input)
delta = K.sign(grad[0])
x_noise = x_noise + delta
x_adv = x_adv + eps*delta
x_adv = K.clip(x_adv,0.0,1.0)
x_adv, x_noise, grad = sess.run([x_adv, x_noise, grad], feed_dict={model.input:x_input, x_adv:x_input, x_noise:np.zeros_like(x_input)})
pred = model.predict(x_adv)
The following code seems to work now. Please look at the comment I made below.
sess = K.get_session()
eps = 0.3
i = 100
x_input = x_test[i].reshape(1,28,28,1)
y_input = y_test[i]
x_adv = x_input
# Added noise
x_noise = np.zeros_like(x_input)
def loss_fn(y_true, y_pred):
target = K.one_hot(y_true,10)
loss = K.categorical_crossentropy(target, y_pred)
return loss
#loss = K.print_tensor(loss,message='loss = ')
#return K.sparse_categorical_crossentropy(y_true, y_pred)
def loss_fn_sparse(y_true, y_pred):
loss = K.sparse_categorical_crossentropy(y_true, y_pred)
return loss
image = K.cast(x_input,dtype='float32')
y_pred = model(image)
loss = loss_fn_sparse(y_input, y_pred)
grad = K.gradients(loss, image)
delta = K.sign(grad[0])
x_noise = x_noise + delta
x_adv = x_adv + eps*delta
x_adv = K.clip(x_adv,0.0,1.0)
x_adv, x_noise = sess.run([x_adv, x_noise], feed_dict={model.input:x_input})
pred = model.predict(x_adv)

Predicting Future values with Keras LSTM

I have created an LSTM sales prediction model that works really well on the train and test sets. I would now like to predict beyond the dates in the entire dataset.
I have tried following this answer how to use the Keras model to forecast for future dates or events? but I really can't figure out how to adjust my code to do future predictions.
Also, I changed my code from
X_train, y_train = train_set_scaled[:, 1:], train_set_scaled[:, 0:1]
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test, y_test = test_set_scaled[:, 1:], test_set_scaled[:, 0:1]
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
to
X_train, y_train = train_set_scaled[:, 1:], train_set_scaled[:, 1:8]
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test, y_test = test_set_scaled[:, 1:], test_set_scaled[:, 1:8]
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
after trying the solution in Keras time series can I predict next 6 month in one time
Here is the code where training and modelling happens:
# changed to initial
for df in m:
train_set, test_set = m[df][0:-6].values, m[df][-6:].values
#apply Min Max Scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(train_set)
# reshape training set
train_set = train_set.reshape(train_set.shape[0], train_set.shape[1])
train_set_scaled = scaler.transform(train_set)
# reshape test set
test_set = test_set.reshape(test_set.shape[0], test_set.shape[1])
test_set_scaled = scaler.transform(test_set)
#build the LSTM Model
X_train, y_train = train_set_scaled[:, 1:], train_set_scaled[:, 0:1]
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test, y_test = test_set_scaled[:, 1:], test_set_scaled[:, 0:1]
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
print('Fitting model for: {}'.format(df))
#fit our LSTM Model
model = Sequential()
model.add(LSTM(4, batch_input_shape=(1, X_train.shape[1], X_train.shape[2]), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, y_train, nb_epoch=500, batch_size=1, verbose=1, shuffle=False)
# model.save('lstm_model.h5')
print('Predictions for: {}'.format(df))
#check prediction
y_pred = model.predict(X_test,batch_size=1)
print('Inverse Transform for: {}'.format(df))
#inverse transformation to see actual sales
#reshape y_pred
y_pred = y_pred.reshape(y_pred.shape[0], 1, y_pred.shape[1])
#rebuild test set for inverse transform
pred_test_set = []
for index in range(0,len(y_pred)):
print (np.concatenate([y_pred[index],X_test[index]],axis=1))
pred_test_set.append(np.concatenate([y_pred[index],X_test[index]],axis=1))
#reshape pred_test_set
pred_test_set = np.array(pred_test_set)
pred_test_set = pred_test_set.reshape(pred_test_set.shape[0], pred_test_set.shape[2])
#inverse transform
pred_test_set_inverted = scaler.inverse_transform(pred_test_set)
I would like the predictions to go beyond the data in the dataset.
UPDATE: I trained the model and took its predictions on the test set. Use these as input for another LSTM model to fit and predict for 12 months. It worked for me. Also changed my last Dense layer (above) to predict 1 point at a time instead of 7 as I had before.
Below is the code:
from numpy import array
for df in d:
if df in list_df:
# df_ADIDAS DYN PUL DEO 150 FCA5421
#KEEP
result_list = []
sales_dates = list(d["{}".format(df)][-7:].Month)
act_sales = list(d["{}".format(df)][-7:].Sale)
for index in range(0,len(pred_test_set_inverted)):
result_dict = {}
result_dict['pred_value'] = int(pred_test_set_inverted[index][0] + act_sales[index]) #change to 0 ffrom act_sales[index]
result_dict['date'] = sales_dates[index] #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>REVIEW
result_list.append(result_dict)
df_result = pd.DataFrame(result_list)
predictions = list(df_result['pred_value'])
forecasts = []
result_list
for i in range(len(result_list)):
forecasts.append(result_list[i]['pred_value'])
def split_sequence(sequence, n_steps):
X, y = list(), list()
for i in range(len(sequence)):
# find the end of this pattern
end_ix = i + n_steps
# check if we are beyond the sequence
if end_ix > len(sequence)-1:
break
# gather input and output parts of the pattern
seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
# choose a number of time steps
n_steps = 4
# split into samples
X, y = split_sequence(forecasts, n_steps)
# summarize the data
# for i in range(len(X)):
# print(X[i], y[i])
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))
# define model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
# fit model
model.fit(X, y, epochs=200, verbose=0)
# demonstrate prediction
x_input = array(predictions[-4:])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
#print(yhat)
currentStep = yhat[:, -1:]
print('Twelve Month Prediction for {}'.format(df))
for i in range(12):
if i == 0:
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(yhat)
else:
x0_input = np.append(x_input, [currentStep[i-1]])
x0_input = x0_input.reshape((1, n_steps+1, n_features))
x_input = x0_input[:,1:]
yhat = model.predict(x_input)
currentStep = np.append(currentStep, yhat[:,-1:])
print(yhat)
Your last Dense layer says that you are predicting 7 points at a time. Save those predictions and feed them to the model again to predict next 7. That makes it 14 predictions simultaneously. And so on. Or change the number of nodes and shape of y from 7 to corresponding number and train again.

Keras fit_generator() not working due to shape error

I am running MNIST prediction using Keras, with tensorflow backend.
I have code that runs with batches , using Keras fit() as
(X_train, y_train), (X_test, y_test) = mnist.load_data()
N1 = X_train.shape[0]
N2 = X_test.shape[0]
h = X_train.shape[1]
w = X_train.shape[2]
num_pixels = h*w
# reshape N1 samples to num_pixels
x_train = X_train.reshape(N1, num_pixels).astype('float32') # shape is now (60000,784)
x_test = X_test.reshape(N2, num_pixels).astype('float32') # shape is now (10000,784)
x_train = x_train / 255
x_test = x_test / 255
y_train = np_utils.to_categorical(y_train) #(60000,10)
y_test = np_utils.to_categorical(y_test) # (10000,10):
num_classes = y_test.shape[1]
def baseline_model():
# create model
model = Sequential()
model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
model = baseline_model()
batch_size = 200
epochs = 20
max_batches = 2 * len(x_train) / batch_size # 2*60000/200
# reshape to be [samples][width][height][ channel] for ImageDataGenerator
x_t = X_train.reshape(N1, w, h, 1).astype('float32')
datagen = ImageDataGenerator(rescale= 1./255)
train_gen = datagen.flow(x_t, y_train, batch_size=batch_size)
for e in range(epochs):
batches = 0
for x_batch, y_batch in train_gen:
# x_batch is of size [batch_sz,w,h,ch]: resize to [bth_sz,pixel_sz]: (200,28,28,1)-> (200,784)
# for model.fit
x_batch = np.reshape(x_batch, [-1, num_pixels])
model.fit(x_batch, y_batch,validation_split=0.15,verbose=0)
batches += 1
print("Epoch %d/%d, Batch %d/%d" % (e+1, epochs, batches, max_batches))
if batches >= max_batches:
break
scores = model.evaluate(x_test, y_test, verbose=0)
However, when I try to implement similar code using fit_generator(), I get an error.
the code is as below:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# separate data into train and validation
from sklearn.model_selection import train_test_split
# Split the data
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.15, shuffle= True)
# number of training samples
N1 = X_train.shape[0] # training size
N2 = X_test.shape[0] # test size
N3 = X_valid.shape[0] # valid size
h = X_train.shape[1]
w = X_train.shape[2]
num_pixels = h*w
y_train = np_utils.to_categorical(y_train)
y_valid = np_utils.to_categorical(y_valid)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]
def baseline_model():
# create model
model = Sequential()
model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
model = baseline_model()
batch_size = 200
epochs = 20
steps_per_epoch_tr = int(N1/ batch_size) # 51000/200
steps_per_epoch_val = int(N3/batch_size)
# reshape to be [samples][width][height][ channel] for ImageData Gnerator->datagen.flow
x_t = X_train.reshape(N1, w, h, 1).astype('float32')
x_v = X_valid.reshape(N3, w, h, 1).astype('float32')
# define data preparation
datagen = ImageDataGenerator(rescale=1./255) # scales x_t/x_v
train_gen = datagen.flow(x_t, y_train, batch_size=batch_size)
valid_gen = datagen.flow(x_v,y_valid, batch_size=batch_size)
model.fit_generator(train_gen,steps_per_epoch = steps_per_epoch_tr,validation_data = valid_gen,
validation_steps = steps_per_epoch_val,epochs=epochs)
This gives an error:
This is due to expected image dimension error, but I am not sure where/how to fix this. any help is greatly appreciated.
Thanks
sedy
In the model.fit() case, this line flattened the input before feeding it for training.
x_batch = np.reshape(x_batch, [-1, num_pixels])
But in the generator case, there is nothing to flatten the input before feeding it to the Dense layer. The Dense layer cannot process 2D input (28 x 28). Adding, a Flatten() layer to the model should do the trick as shown below.
def baseline_model():
# create model
model = Sequential()
model.add(Flatten(input_shape=(28,28,1)))
model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model

Resources