Keras, convolution neural network val_loss always the same for the same data - conv-neural-network

I am trying to build a neural network that learn over DNA sequence data. The neural network has to be a classifier that say yes or no. The first thing that I have done is to transform a sequence e.g. ATTTGGCAAT with the oneHot encoding using the following function :
class hot_dna:
def init(self,fasta):
#check for and grab sequence name
if re.search(">",fasta):
name = re.split("\n",fasta)[0]
sequence = re.split("\n",fasta)[1]
else :
name = 'unknown_sequence'
sequence = fasta
#get sequence into an array
seq_array = array(list(sequence))
#integer encode the sequence
label_encoder = LabelEncoder()
integer_encoded_seq = label_encoder.fit_transform(seq_array)
#one hot the sequence
onehot_encoder = OneHotEncoder(sparse=False)
#reshape because that's what OneHotEncoder likes
integer_encoded_seq = integer_encoded_seq.reshape(len(integer_encoded_seq), 1)
onehot_encoded_seq = onehot_encoder.fit_transform(integer_encoded_seq)
#add the attributes to self
self.name = name
self.sequence = fasta
self.integer = integer_encoded_seq
self.onehot = onehot_encoded_seq
I already prepared the data in the following categories :
trainNegative used for the training and has to be assigned to no
TrainPositive used for the training and has to be assigned to yes
TrainTTNegative used for the test and has to be assigned to no
TrainTTPositive used for the test and has to be assigned to yes
TestNegative this will never be seen from the neural network and will be used to evaluate true positive, false positive, and has to be assigned to no 6) testPositive -> this will never be seen from the neural network and will be used to evaluate true positive, false positive, and has to be assigned to yes.
In the following step i will just read the fasta file (file containing the sequence for each sample) ans stack them together according to Train, TrainT and test. Also during this loading matrix i will create a vector that has 1 or 0 according to the positive or negative class (yes or no). There is also a control on the shape since all the sequence has to have same dimension.
a=list()
trainT=[]
fasta_sequences = SeqIO.parse(open("/scratch/trainTNegative.fa"),'fasta')
for fasta in fasta_sequences:
f=hot_dna(str(fasta.seq))
if (f.sequence).count("N") == 0:
if f.onehot.shape == (5000,4):
a.append(f.onehot)
trainT.append(0)
fasta_sequences = SeqIO.parse(open("/scratch/trainTPositive.fa"),'fasta')
for fasta in fasta_sequences:
f=hot_dna(str(fasta.seq))
if (f.sequence).count("N") == 0:
if f.onehot.shape == (5000,4):
a.append(f.onehot)
trainT.append(1)
train=np.stack(a)
#train=train.reshape(train.shape[0], 1,train.shape[1],train.shape[2] )
train=train.reshape(train.shape[0],train.shape[1],train.shape[2],1 )
trainT=to_categorical(trainT)
#TESTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
a=list()
testT=[]
fasta_sequences = SeqIO.parse(open("/scratch/trainTTNegative.fa"),'fasta')
for fasta in fasta_sequences:
f=hot_dna(str(fasta.seq))
if (f.sequence).count("N") == 0:
if f.onehot.shape == (5000,4):
a.append(f.onehot)
testT.append(0)
fasta_sequences = SeqIO.parse(open("/scratch/trainTTPositive.fa"),'fasta')
for fasta in fasta_sequences:
f=hot_dna(str(fasta.seq))
if (f.sequence).count("N") == 0:
if f.onehot.shape == (5000,4):
a.append(f.onehot)
testT.append(1)
test=np.stack(a)
#test=test.reshape(test.shape[0], 1,test.shape[1],test.shape[2] )
test=test.reshape(test.shape[0],test.shape[1],test.shape[2],1 )
testT=to_categorical(testT)
#NEVERSEEEEEEEEEEEEEEEEEEEEEEEEENN
a=list()
neverT=[]
fasta_sequences = SeqIO.parse(open("/scratch/testNegative.fa"),'fasta')
for fasta in fasta_sequences:
f=hot_dna(str(fasta.seq))
if (f.sequence).count("N") == 0:
if f.onehot.shape == (5000,4):
a.append(f.onehot)
neverT.append(0)
fasta_sequences = SeqIO.parse(open("/scratch/testPositive.fa"),'fasta')
for fasta in fasta_sequences:
f=hot_dna(str(fasta.seq))
if (f.sequence).count("N") == 0:
if f.onehot.shape == (5000,4):
a.append(f.onehot)
neverT.append(1)
never=np.stack(a)
never=never.reshape(never.shape[0],never.shape[1],never.shape[2],1 )
neverT=to_categorical(neverT)
Now I set up the neural network, the idea is to have 3 convolutional neural network with a window (4,10), since the columns of the one-Hot encoding are 4 and i would like a sliding window of ten.
model = Sequential()
model.add(Conv2D(32, (4, 10), padding="same", activation="relu",input_shape= [test.shape[1],test.shape[2],test.shape[3]]))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64, (4, 10), padding="same", activation="relu"))
model.add(MaxPooling2D(pool_size=(4, 2)))
model.add(Conv2D(128, (4, 10), padding="same", activation="relu"))
model.add(MaxPooling2D(pool_size=(8, 1)))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(keras.layers.Dropout(rate=0.5, noise_shape=None, seed=None))
model.add(Dense(16, activation='relu'))
model.add(keras.layers.Dropout(rate=0.5, noise_shape=None, seed=None))
model.add(Dense(8, activation='relu'))
model.add(keras.layers.Dropout(rate=0.5, noise_shape=None, seed=None))
model.add(Dense(2, activation='sigmoid'))
model.compile(optimizer=keras.optimizers.Adam(lr=0.001), loss="categorical_crossentropy",metrics=['accuracy'])
print(model.summary())
Model: "sequential_4"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_10 (Conv2D) (None, 5000, 4, 32) 3232
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 2500, 2, 32) 0
_________________________________________________________________
conv2d_11 (Conv2D) (None, 2500, 2, 64) 81984
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 625, 1, 64) 0
_________________________________________________________________
conv2d_12 (Conv2D) (None, 625, 1, 128) 327808
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 78, 1, 128) 0
_________________________________________________________________
flatten_4 (Flatten) (None, 9984) 0
_________________________________________________________________
dense_13 (Dense) (None, 32) 319520
_________________________________________________________________
dropout_10 (Dropout) (None, 32) 0
_________________________________________________________________
dense_14 (Dense) (None, 16) 528
_________________________________________________________________
dropout_11 (Dropout) (None, 16) 0
_________________________________________________________________
dense_15 (Dense) (None, 8) 136
_________________________________________________________________
dropout_12 (Dropout) (None, 8) 0
_________________________________________________________________
dense_16 (Dense) (None, 2) 18
=================================================================
Total params: 733,226
Trainable params: 733,226
Non-trainable params: 0
_________________________________________________________________
None
And here are the shapes of my data
train.shape
(1023, 5000, 4, 1)
trainT.shape
(1023, 2)
where 1023 is the number of samples, 5000 is the length of the sequence, so how many letter i have and 4 is the number of columns once the sequence is transformed by the one-Hot encoding. Now i create the checkpoint and I train my model
checkpoint_name = "/scratch/"+"test"+"/BW/"+'BW.hdf5'
try:
os.makedirs("/scratch/"+"test"+"/BW/")
except:
print("folder already exists")
checkpoint = keras.callbacks.ModelCheckpoint(checkpoint_name, monitor='val_accuracy', verbose = 1, save_best_only = True, mode ='max')
callbacks_list = [checkpoint]
NNResult=model.fit(train,trainT, epochs=1000, batch_size=64,validation_data=(test,testT),callbacks=callbacks_list)
Main problem val_loss does not go down but worst part is that even if I restart the training, setting up the same model over again, the val_loss value is exactly the same. If I change seed and restart also the python session the val_loss is always the same. Does anyone have any Idea of the reason? Also I'm wondering if the flatten layer that i have added is doing something bad to my training.
I also just tried to modify the network like this
model = Sequential()
model.add(Conv1D(32, (10), padding="same", activation="relu",input_shape= [test.shape[1],test.shape[2]]))
model.add(GlobalAveragePooling1D())
model.add(Dense(2, activation='relu'))
and is crazy i have the same effect as before. Do you think might be the data then?
Thank you

Related

ValueError: expected dense_22 to have shape (None, 37) but got array with shape (1000, 2)

I am currently working on a question answering system. I create a synthetic dataset that contains multiple words in the answers. But, the answers are not a span of the given context.
Initially, I am planning to test it using a deep learning-based model. But I have some problems building the model.
This is how I vectorized data.
def vectorize(data, word2idx, story_maxlen, question_maxlen, answer_maxlen):
""" Create the story and question vectors and the label """
Xs, Xq, Y = [], [], []
for story, question, answer in data:
xs = [word2idx[word] for word in story]
xq = [word2idx[word] for word in question]
y = [word2idx[word] for word in answer]
#y = np.zeros(len(word2idx) + 1)
#y[word2idx[answer]] = 1
Xs.append(xs)
Xq.append(xq)
Y.append(y)
return (pad_sequences(Xs, maxlen=story_maxlen),
pad_sequences(Xq, maxlen=question_maxlen),
pad_sequences(Y, maxlen=answer_maxlen))
#np.array(Y))
below is how I create the model.
# story encoder. Output dim: (None, story_maxlen, EMBED_HIDDEN_SIZE)
story_encoder = Sequential()
story_encoder.add(Embedding(input_dim=vocab_size,
output_dim=EMBED_HIDDEN_SIZE,
input_length=story_maxlen))
story_encoder.add(Dropout(0.3))
# question encoder. Output dim: (None, question_maxlen, EMBED_HIDDEN_SIZE)
question_encoder = Sequential()
question_encoder.add(Embedding(input_dim=vocab_size,
output_dim=EMBED_HIDDEN_SIZE,
input_length=question_maxlen))
question_encoder.add(Dropout(0.3))
# episodic memory (facts): story * question
# Output dim: (None, question_maxlen, story_maxlen)
facts_encoder = Sequential()
facts_encoder.add(Merge([story_encoder, question_encoder],
mode="dot", dot_axes=[2, 2]))
facts_encoder.add(Permute((2, 1)))
## combine response and question vectors and do logistic regression
answer = Sequential()
answer.add(Merge([facts_encoder, question_encoder],
mode="concat", concat_axis=-1))
answer.add(LSTM(LSTM_OUTPUT_SIZE, return_sequences=True))
answer.add(Dropout(0.3))
answer.add(Flatten())
answer.add(Dense(vocab_size,activation= "softmax"))
answer.compile(optimizer="rmsprop", loss="categorical_crossentropy",
metrics=["accuracy"])
answer.fit([Xs_train, Xq_train], Y_train,
batch_size=BATCH_SIZE, nb_epoch=NBR_EPOCHS,
validation_data=([Xs_test, Xq_test], Y_test))
and this is the summary of the model
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
merge_46 (Merge) (None, 5, 616) 0
_________________________________________________________________
lstm_23 (LSTM) (None, 5, 32) 83072
_________________________________________________________________
dropout_69 (Dropout) (None, 5, 32) 0
_________________________________________________________________
flatten_9 (Flatten) (None, 160) 0
_________________________________________________________________
dense_22 (Dense) (None, 37) 5957
=================================================================
Total params: 93,765.0
Trainable params: 93,765.0
Non-trainable params: 0.0
_________________________________________________________________
It gives the following error.
ValueError: Error when checking model target: expected dense_22 to have shape (None, 37) but got array with shape (1000, 2)
I think the error is related to Y_train, Y_test. I should encode them to categorical values and the answers are not spans of text, but sequential. I don't know what/how to do it.
how can I fix it? any ideas?
EDIT:
When I use sparse_categorical_crossentropy in the loss, and Reshape(2,-1);
answer.summary()
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
merge_94 (Merge) (None, 5, 616) 0
_________________________________________________________________
lstm_65 (LSTM) (None, 5, 32) 83072
_________________________________________________________________
dropout_139 (Dropout) (None, 5, 32) 0
_________________________________________________________________
reshape_22 (Reshape) (None, 2, 80) 0
_________________________________________________________________
dense_44 (Dense) (None, 2, 37) 2997
=================================================================
Total params: 90,805.0
Trainable params: 90,805.0
Non-trainable params: 0.0
_________________________________________________________________
EDIT2:
The model after modifications
# story encoder. Output dim: (None, story_maxlen, EMBED_HIDDEN_SIZE)
story_encoder = Sequential()
story_encoder.add(Embedding(input_dim=vocab_size,
output_dim=EMBED_HIDDEN_SIZE,
input_length=story_maxlen))
story_encoder.add(Dropout(0.3))
# question encoder. Output dim: (None, question_maxlen, EMBED_HIDDEN_SIZE)
question_encoder = Sequential()
question_encoder.add(Embedding(input_dim=vocab_size,
output_dim=EMBED_HIDDEN_SIZE,
input_length=question_maxlen))
question_encoder.add(Dropout(0.3))
# episodic memory (facts): story * question
# Output dim: (None, question_maxlen, story_maxlen)
facts_encoder = Sequential()
facts_encoder.add(Merge([story_encoder, question_encoder],
mode="dot", dot_axes=[2, 2]))
facts_encoder.add(Permute((2, 1)))
## combine response and question vectors and do logistic regression
## combine response and question vectors and do logistic regression
answer = Sequential()
answer.add(Merge([facts_encoder, question_encoder],
mode="concat", concat_axis=-1))
answer.add(LSTM(LSTM_OUTPUT_SIZE, return_sequences=True))
answer.add(Dropout(0.3))
#answer.add(Flatten())
answer.add(keras.layers.Reshape((2, -1)))
answer.add(Dense(vocab_size,activation= "softmax"))
answer.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy",
metrics=["accuracy"])
answer.fit([Xs_train, Xq_train], Y_train,
batch_size=BATCH_SIZE, nb_epoch=NBR_EPOCHS,
validation_data=([Xs_test, Xq_test], Y_test))
It still gives
ValueError: Error when checking model target: expected dense_46 to have 3 dimensions, but got array with shape (1000, 2)
As far as I understand - Y_train, Y_test comprise of indexes (not one-hot vectors). If so - change loss to sparse_categorical_entropy:
answer.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy",
metrics=["accuracy"])
As far as I understand - Y_train, Y_test have a sequence dimension. And the length of questions (5) doesn't equal to the length of the answers (2). This dimension is removed by Flatten(). Try to replace Flatten() by Reshape():
# answer.add(Flatten())
answer.add(tf.keras.layers.Reshape((2, -1)))

Keras with Hierarchical LSTM

I had a problem about hierarchical lstm in keras. It works well when the data is 2 dimensions. When I changed it to three dimensions, it does not work. My data is (25,10,2)
I want to build a hierarchical lstm, the first layer lstm will convert each data with shape (10,2) into a vector, there are 25 vectors feed into the second layer lstm. The input data in the first layer lstm is (10,2). I used two embeddings and multiply them. I appreciate if anyone can help.
def H_LSTM():
single_input = Input(shape=(10,2),dtype='int32')
in_sentence = Lambda(lambda x: single_input[:,:, 0:1], output_shape=(maxlen,))(single_input)
in_sentence = Reshape((maxlen,), input_shape = (maxlen,1))(in_sentence)
in_drug = Lambda(lambda x: single_input[:, :, 1:1], output_shape=(maxlen,))(single_input)
in_drug = Reshape((maxlen,), input_shape = (maxlen,1))(in_drug)
embedded_sentence = Embedding(len(word_index) + 1, embedding_dim, weights=[embedding_matrix],
input_length=maxlen, trainable=True, mask_zero=False)(in_sentence)
embedded_drug = Embedding(len(word_index) + 1, embedding_dim, weights=[embedding_matrix],
input_length=maxlen, trainable=True, mask_zero=False)(in_drug)
embedded_sequences = Multiply()([embedded_sentence, embedded_drug])
lstm_sentence = LSTM(100)(embedded_sequences)
encoded_model = Model(inputs = single_input, outputs = lstm_sentence)
sequence_input = Input(shape=(25,10,2),dtype='int32')
seq_encoded = TimeDistributed(encoded_model)(sequence_input)
seq_encoded = Dropout(0.2)(seq_encoded)
# Encode entire sentence
seq_encoded = LSTM(100)(seq_encoded)
# Prediction
prediction = Dense(2, activation='softmax')(seq_encoded)
model = Model(inputs = sequence_input, outputs = prediction)
model.compile(loss='categorical_crossentropy',
optimizer='rmsprop',
metrics=['acc'])
return model
Model Summary:
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_3 (InputLayer) (None, 10, 2) 0
__________________________________________________________________________________________________
lambda_3 (Lambda) (None, 10) 0 input_3[0][0]
__________________________________________________________________________________________________
lambda_4 (Lambda) (None, 10) 0 input_3[0][0]
__________________________________________________________________________________________________
reshape_3 (Reshape) (None, 10) 0 lambda_3[0][0]
__________________________________________________________________________________________________
reshape_4 (Reshape) (None, 10) 0 lambda_4[0][0]
__________________________________________________________________________________________________
embedding_3 (Embedding) (None, 10, 128) 4895744 reshape_3[0][0]
__________________________________________________________________________________________________
embedding_4 (Embedding) (None, 10, 128) 4895744 reshape_4[0][0]
__________________________________________________________________________________________________
multiply_2 (Multiply) (None, 10, 128) 0 embedding_3[0][0]
embedding_4[0][0]
__________________________________________________________________________________________________
lstm_3 (LSTM) (None, 100) 91600 multiply_2[0][0]
==================================================================================================
Total params: 9,883,088
Trainable params: 9,883,088
Non-trainable params: 0
__________________________________________________________________________________________________
None
Model: "model_4"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_4 (InputLayer) (None, 25, 10, 2) 0
_________________________________________________________________
time_distributed_2 (TimeDist (None, 25, 100) 9883088
_________________________________________________________________
dropout_2 (Dropout) (None, 25, 100) 0
_________________________________________________________________
lstm_4 (LSTM) (None, 100) 80400
_________________________________________________________________
dense_2 (Dense) (None, 2) 202
=================================================================
Total params: 9,963,690
Trainable params: 9,963,690
Non-trainable params: 0
Error Message:
InvalidArgumentError: You must feed a value for placeholder tensor 'input_3' with dtype int32 and shape [?,10,2]
[[node input_3 (defined at D:\Users\Jinhe.Shi\AppData\Local\Continuum\anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:3009) ]] [Op:__inference_keras_scratch_graph_6214]
Function call stack:
keras_scratch_graph
Update: the framework is shown in the following, the difference is no attention layer and I added two embeddings in the lower layer lstm.
enter image description here
Model fit:
The error happens during the model fitting.
model2 = H_LSTM();
print("model fitting - Hierachical network")
model2.fit(X_train, Y_train, nb_epoch=3, batch_size=100, validation_data=(X_test, Y_test))
The input data likes:
enter image description here

Keras LSTM and Embedding Concatenate Array Dimensions Issue

I am building an LSTM network for multivariate time series classification using 2 categorical features which I have created Embedding layers for in Keras. The model compiles and the architecture is displayed below with code. I am getting a ValueError: all the input array dimensions except for the concatenation axis must match exactly. This is strange to me because of model compiling and the output shapes seem to match (3D alignment concatenated along axis = -1). The model fit X parameters are a list of 3 inputs (first categorical variable array, second categorical variable array, and multivariate time series input 3-D for LSTM)
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_4 (InputLayer) (None, 1) 0
__________________________________________________________________________________________________
input_5 (InputLayer) (None, 1) 0
__________________________________________________________________________________________________
VAR_1 (Embedding) (None, 46, 5) 50 input_4[0][0]
__________________________________________________________________________________________________
VAR_2 (Embedding) (None, 46, 13) 338 input_5[0][0]
__________________________________________________________________________________________________
time_series (InputLayer) (None, 46, 11) 0
__________________________________________________________________________________________________
concatenate_3 (Concatenate) (None, 46, 18) 0 VAR_1[0][0]
VAR_2[0][0]
__________________________________________________________________________________________________
concatenate_4 (Concatenate) (None, 46, 29) 0 time_series[0][0]
concatenate_3[0][0]
__________________________________________________________________________________________________
lstm_2 (LSTM) (None, 46, 100) 52000 concatenate_4[0][0]
__________________________________________________________________________________________________
attention_2 (Attention) (None, 100) 146 lstm_2[0][0]
__________________________________________________________________________________________________
dense_2 (Dense) (None, 1) 101 attention_2[0][0]
==================================================================================================
Total params: 52,635
Trainable params: 52,635
Non-trainable params: 0
n_timesteps = 46
n_features = 11
def EmbeddingNet(cat_vars,n_timesteps,n_features,embedding_sizes):
inputs = []
embed_layers = []
for (c, (in_size, out_size)) in zip(cat_vars, embedding_sizes):
i = Input(shape=(1,))
o = Embedding(in_size, out_size, input_length=n_timesteps, name=c)(i)
inputs.append(i)
embed_layers.append(o)
embed = Concatenate()(embed_layers)
time_series_input = Input(batch_shape=(None,n_timesteps,n_features ), name='time_series')
inputs.append(time_series_input)
concatenated_inputs = Concatenate(axis=-1)([time_series_input, embed])
lstm_layer1 = LSTM(units=100,return_sequences=True)(concatenated_inputs)
attention = Attention()(lstm_layer1)
output_layer = Dense(1, activation="sigmoid")(attention)
opt = Adam(lr=0.001)
model = Model(inputs=inputs, outputs=output_layer)
model.compile(loss='binary_crossentropy',optimizer=opt,metrics=['accuracy'])
model.summary()
return model
model = EmbeddingNet(cat_vars,n_timesteps,n_features,embedding_sizes)
history = model.fit(x=[x_train_cat_array[0],x_train_cat_array[1],x_train_input], y=y_train_input, batch_size=8, epochs=1, verbose=1, validation_data=([x_val_cat_array[0],x_val_cat_array[1],x_val_input], y_val_input),shuffle=False)
I'm trying to the the very same thing. You should concatenate over axis 2. Please check HERE
Let me know if this works in your dataset, because categorical features are not giving any benefit to me.

Issue Merging Two Layers in an LSTM Seq2Seq Model for Q&A Use Case

I'm trying to build a Q&A model based off of the bAbI Task 8 example and I am having trouble merging two of my input layers into one layer. Here is my current model architecture:
story_input = Input(shape=(story_maxlen,vocab_size), name='story_input')
story_input_proc = Embedding(vocab_size, latent_dim, name='story_input_embed', input_length=story_maxlen)(story_input)
story_input_proc = Reshape((latent_dim,story_maxlen), name='story_input_reshape')(story_input_proc)
query_input = Input(shape=(query_maxlen,vocab_size), name='query_input')
query_input_proc = Embedding(vocab_size, latent_dim, name='query_input_embed', input_length=query_maxlen)(query_input)
query_input_proc = Reshape((latent_dim,query_maxlen), name='query_input_reshape')(query_input_proc)
story_query = dot([story_input_proc, query_input_proc], axes=(1, 1), name='story_query_merge')
encoder = LSTM(latent_dim, return_state=True, name='encoder')
encoder_output, state_h, state_c = encoder(story_query)
encoder_output = RepeatVector(3, name='encoder_3dim')(encoder_output)
encoder_states = [state_h, state_c]
decoder = LSTM(latent_dim, return_sequences=True, name='decoder')(encoder_output, initial_state=encoder_states)
answer_output = Dense(vocab_size, activation='softmax', name='answer_output')(decoder)
model = Model([story_input, query_input], answer_output)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
and here is the output of model.summary()
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
story_input (InputLayer) (None, 358, 38) 0
__________________________________________________________________________________________________
query_input (InputLayer) (None, 5, 38) 0
__________________________________________________________________________________________________
story_input_embed (Embedding) (None, 358, 64) 2432 story_input[0][0]
__________________________________________________________________________________________________
query_input_embed (Embedding) (None, 5, 64) 2432 query_input[0][0]
__________________________________________________________________________________________________
story_input_reshape (Reshape) (None, 64, 358) 0 story_input_embed[0][0]
__________________________________________________________________________________________________
query_input_reshape (Reshape) (None, 64, 5) 0 query_input_embed[0][0]
__________________________________________________________________________________________________
story_query_merge (Dot) (None, 358, 5) 0 story_input_reshape[0][0]
query_input_reshape[0][0]
__________________________________________________________________________________________________
encoder (LSTM) [(None, 64), (None, 17920 story_query_merge[0][0]
__________________________________________________________________________________________________
encoder_3dim (RepeatVector) (None, 3, 64) 0 encoder[0][0]
__________________________________________________________________________________________________
decoder (LSTM) (None, 3, 64) 33024 encoder_3dim[0][0]
encoder[0][1]
encoder[0][2]
__________________________________________________________________________________________________
answer_output (Dense) (None, 3, 38) 2470 decoder[0][0]
==================================================================================================
Total params: 58,278
Trainable params: 58,278
Non-trainable params: 0
__________________________________________________________________________________________________
where vocab_size = 38, story_maxlen = 358, query_maxlen = 5, latent_dim = 64, and the batch size = 64.
When I try to train this model I get the error:
Input to reshape is a tensor with 778240 values, but the requested shape has 20480
Here is the formula for those two values:
input_to_reshape = batch_size * latent_dim * query_maxlen * vocab_size
requested_shape = batch_size * latent_dim * query_maxlen
Where I'm At
I believe the error message is saying the shape of the tensor inputted into the query_input_reshape layer is (?, 5, 38, 64) but it is expecting a tensor of shape (?, 5, 64) (see formulas above), but I could be wrong on that.
When I change the target_shape input of Reshape to be 3D (i.e. Reshape((latent_dim,query_maxlen,vocab_size)) I get the error total size of new array must be unchanged, which doesn't make any sense to me because the input is 3D. You would think that Reshape((latent_dim,query_maxlen)) would give me that error because it'd be changing a 3D tensor into a 2D tensor, but it compiles fine, so I've no clue what's going on there.
The only reason I'm using Reshape is because I need to merge the two tensors as an input into the LSTM encoder. When I try to get rid of the Reshape layers I just get dimension mismatch errors when I try to compile the model. The model architecture above at least compiles but I can't train it.
Can someone please help me figure out how I can merge the story_input and query_input layers? Thanks!

Ensemble of CNN and RNN model in keras

trying to implement the model from paper Ensemble Application of Convolutional and Recurrent Neural Networks for Multi-label Text Categorization in keras
The model looks like the following (taken from the paper)
I have the code as
document_input = Input(shape=(None,), dtype='int32')
embedding_layer = Embedding(vocab_size, WORD_EMB_SIZE, weights=[initial_embeddings],
input_length=DOC_SEQ_LEN, trainable=True)
convs = []
filter_sizes = [2,3,4,5]
doc_embedding = embedding_layer(document_input)
for filter_size in filter_sizes:
l_conv = Conv1D(filters=256, kernel_size=filter_size, padding='same', activation='relu')(doc_embedding)
l_pool = MaxPooling1D(filter_size)(l_conv)
convs.append(l_pool)
l_merge = Concatenate(axis=1)(convs)
l_flat = Flatten()(l_merge)
l_dense = Dense(100, activation='relu')(l_flat)
l_dense_3d = Reshape((1,int(l_dense.shape[1])))(l_dense)
gene_variation_input = Input(shape=(None,), dtype='int32')
gene_variation_embedding = embedding_layer(gene_variation_input)
rnn_layer = LSTM(100, return_sequences=False, stateful=True)(gene_variation_embedding,initial_state=[l_dense_3d])
l_flat = Flatten()(rnn_layer)
output_layer = Dense(9, activation='softmax')(l_flat)
model = Model(inputs=[document_input,gene_variation_input], outputs=[output_layer])
I dont know whether I am setting up the Text feature vector right in the above diagram right ! I tried and I get the error as
ValueError: Layer lstm_9 expects 3 inputs, but it received 2 input tensors. Input received: [<tf.Tensor 'embedding_10_1/Gather:0' shape=(?, ?, 200) dtype=float32>, <tf.Tensor 'reshape_9/Reshape:0' shape=(?, 1, 100) dtype=float32>]
I did follow the section on Note on specifying the initial state of RNNs in keras documentation and code
Any help appreciated.
update:
The suggestion and some more reading into the code the model looks like this
embedding_layer = Embedding(vocab_size, WORD_EMB_SIZE, weights=[initial_embeddings], trainable=True)
document_input = Input(shape=(DOC_SEQ_LEN,), batch_shape=(BATCH_SIZE, DOC_SEQ_LEN),dtype='int32')
doc_embedding = embedding_layer(document_input)
convs = []
filter_sizes = [2,3,4,5]
for filter_size in filter_sizes:
l_conv = Conv1D(filters=256, kernel_size=filter_size, padding='same', activation='relu')(doc_embedding)
l_pool = MaxPooling1D(filter_size)(l_conv)
convs.append(l_pool)
l_merge = Concatenate(axis=1)(convs)
l_flat = Flatten()(l_merge)
l_dense = Dense(100, activation='relu')(l_flat)
gene_variation_input = Input(shape=(GENE_VARIATION_SEQ_LEN,), batch_shape=(BATCH_SIZE, GENE_VARIATION_SEQ_LEN),dtype='int32')
gene_variation_embedding = embedding_layer(gene_variation_input)
rnn_layer = LSTM(100, return_sequences=False,
batch_input_shape=(BATCH_SIZE, GENE_VARIATION_SEQ_LEN, WORD_EMB_SIZE),
stateful=False)(gene_variation_embedding, initial_state=[l_dense, l_dense])
output_layer = Dense(9, activation='softmax')(rnn_layer)
model = Model(inputs=[document_input,gene_variation_input], outputs=[output_layer])
model summary
____________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
====================================================================================================
input_8 (InputLayer) (32, 9) 0
____________________________________________________________________________________________________
input_7 (InputLayer) (32, 4000) 0
____________________________________________________________________________________________________
embedding_6 (Embedding) multiple 73764400 input_7[0][0]
input_8[0][0]
____________________________________________________________________________________________________
conv1d_13 (Conv1D) (32, 4000, 256) 102656 embedding_6[0][0]
____________________________________________________________________________________________________
conv1d_14 (Conv1D) (32, 4000, 256) 153856 embedding_6[0][0]
____________________________________________________________________________________________________
conv1d_15 (Conv1D) (32, 4000, 256) 205056 embedding_6[0][0]
____________________________________________________________________________________________________
conv1d_16 (Conv1D) (32, 4000, 256) 256256 embedding_6[0][0]
____________________________________________________________________________________________________
max_pooling1d_13 (MaxPooling1D) (32, 2000, 256) 0 conv1d_13[0][0]
____________________________________________________________________________________________________
max_pooling1d_14 (MaxPooling1D) (32, 1333, 256) 0 conv1d_14[0][0]
____________________________________________________________________________________________________
max_pooling1d_15 (MaxPooling1D) (32, 1000, 256) 0 conv1d_15[0][0]
____________________________________________________________________________________________________
max_pooling1d_16 (MaxPooling1D) (32, 800, 256) 0 conv1d_16[0][0]
____________________________________________________________________________________________________
concatenate_4 (Concatenate) (32, 5133, 256) 0 max_pooling1d_13[0][0]
max_pooling1d_14[0][0]
max_pooling1d_15[0][0]
max_pooling1d_16[0][0]
____________________________________________________________________________________________________
flatten_4 (Flatten) (32, 1314048) 0 concatenate_4[0][0]
____________________________________________________________________________________________________
dense_6 (Dense) (32, 100) 131404900 flatten_4[0][0]
____________________________________________________________________________________________________
lstm_4 (LSTM) (32, 100) 120400 embedding_6[1][0]
dense_6[0][0]
dense_6[0][0]
____________________________________________________________________________________________________
dense_7 (Dense) (32, 9) 909 lstm_4[0][0]
====================================================================================================
Total params: 206,008,433
Trainable params: 206,008,433
Non-trainable params: 0
____________________________________________________________________________________________________
An LSTM has 2 hidden states, but you are providing only 1 initial state. You could do one of the following:
Replace LSTM with an RNN which has only 1 hidden state, such as GRU:
rnn_layer = GRU(100, return_sequences=False, stateful=True)
(gene_variation_embedding,initial_state=[l_dense_3d])
Or pass zeros as initial state for the second hidden state of LSTM:
zeros = Lambda(lambda x: K.zeros_like(x), output_shape=lambda s: s)(l_dense_3d)
rnn_layer = LSTM(100, return_sequences=False, stateful=True)
(gene_variation_embedding,initial_state=[l_dense_3d, zeros])

Resources