Combining CNN with attention network - keras

Here is my attention layer
class Attention(Layer):
def __init__(self, **kwargs):
self.init = initializers.get('normal')
self.supports_masking = True
self.attention_dim = 50
super(Attention, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape) == 3
self.W = K.variable(self.init((input_shape[-1], 1)))
self.b = K.variable(self.init((self.attention_dim, )))
self.u = K.variable(self.init((self.attention_dim, 1)))
self.trainable_weights = [self.W, self.b, self.u]
super(Attention, self).build(input_shape)
def compute_mask(self, inputs, mask=None):
return mask
def call(self, x, mask=None):
uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
ait = K.dot(uit, self.u)
ait = K.squeeze(ait, -1)
ait = K.exp(ait)
if mask is not None:
ait *= K.cast(mask, K.floatx())
ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
ait = K.expand_dims(ait)
weighted_input = x * ait
output = K.sum(weighted_input, axis=1)
return output
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[-1])
I am trying to combine CNN with attention network for text classification. Following is my code in keras:-
def inputs_and_embeddings(features, config):
inputs, embeddings = [], []
for f in features:
E = Embedding if not config.fixed_embedding else FixedEmbedding
# i = Input(shape=(config.doc_size,), dtype='int32', name=f.name)
i = Input(shape=(config.doc_size,), dtype='int32', name=f.name)
e = E(f.input_dim, f.output_dim, weights=[f.weights],
input_length=config.doc_size)(i)
inputs.append(i)
embeddings.append(e)
return inputs, embeddings
inputs, embeddings = inputs_and_embeddings(features, config)
#calculating the size of documents and all features.
seq = concat(embeddings)
cshape = (config.doc_size, sum(f.output_dim for f in features))
seq = Reshape((1,)+cshape)(seq)
#seq = Reshape((1, config.doc_size, w2v.output_dim))(embeddings) #old way of doing the above
# seq = Bidirectional()
# Convolution(s)
convLayers = []
for filter_size, filter_num in zip(config.filter_sizes, config.filter_nums):
seq2 = Convolution2D(
filter_num,
filter_size,
cshape[1],
border_mode='valid',
activation='relu',
dim_ordering='th'
)(seq)
seq2 = MaxPooling2D(
pool_size=(config.doc_size-filter_size+1, 1),
dim_ordering='th'
)(seq2)
# seq2 = Flatten()(seq2)
convLayers.append(seq2)
seq = Concatenate(axis=1)(convLayers)
if config.drop_prob:
seq = Dropout(config.drop_prob)(seq)
for s in config.hidden_sizes:
seq = Dense(s, activation='relu')(seq)
#need reshaping here
seq = Reshape((200,3))(seq)
word_encoder = Bidirectional(GRU(50, return_sequences=True))(seq)
rnn_type = 'GRU'
dense_transform_word = Dense(
100,
activation='relu', kernel_regularizer=l2_reg,
name='dense_transform_word')(word_encoder)
# word attention
attention_weighted_sentence = Model(
inputs, Attention(name="word_attention")(dense_transform_word))
word_attention_model = attention_weighted_sentence
attention_weighted_sentence.summary()
# sentence-attention-weighted document scores
texts_in = Input(shape=(MAX_SEQ_LEN,config.doc_size), dtype='int32', name="input_2")
attention_weighted_sentences = TimeDistributed(attention_weighted_sentence)(texts_in)
if rnn_type is 'GRU':
#sentence_encoder = Bidirectional(GRU(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.2))(attention_weighted_sentences)
dropout = Dropout(0.1)(attention_weighted_sentences)
sentence_encoder = Bidirectional(GRU(50, return_sequences=True))(dropout)
else:
sentence_encoder = Bidirectional(LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.2))(attention_weighted_sentences)
dense_transform_sentence = Dense(
100,
activation='relu',
name='dense_transform_sentence',
kernel_regularizer=l2_reg)(sentence_encoder)
# sentence attention
attention_weighted_text = Attention(name="sentence_attention")(dense_transform_sentence)
prediction = Dense(19, activation='sigmoid')(attention_weighted_text)
model = Model(inputs, prediction)
model.summary()
I am getting error message Graph disconnected error when I initialize model with inputs and prediction as shown in code. On researching I found that this error occurs when there is no connection between inputs and outputs. However, I can't figure out the input of my model. Can anyone please help me with this?

def inputs_and_embeddings(features, config):
inputs, embeddings = [], []
for f in features:
E = Embedding if not config.fixed_embedding else FixedEmbedding
# i = Input(shape=(config.doc_size,), dtype='int32', name=f.name)
i = Input(shape=(config.doc_size,), dtype='int32', name=f.name)
e = E(f.input_dim,
f.output_dim,
weights=[f.weights],
input_length=config.doc_size)(i)
inputs.append(i)
embeddings.append(e)
return inputs, embeddings
inputs, embeinputsddings = inputs_and_embeddings(features, config)
#calculating the size of documents and all features.
seq = concat(embeddings)
cshape = (config.doc_size, sum(f.output_dim for f in features))
seq = Reshape((1,)+cshape)(seq)
#seq = Reshape((1, config.doc_size, w2v.output_dim))(embeddings) #old way of doing the above
# seq = Bidirectional()
# Convolution(s)
convLayers = []
for filter_size, filter_num in zip(config.filter_sizes, config.filter_nums):
seq2 = Convolution2D(
filter_num,
filter_size,
cshape[1],
border_mode='valid',
activation='relu',
dim_ordering='th'
)(seq)
seq2 = MaxPooling2D(
pool_size=(config.doc_size-filter_size+1, 1),
dim_ordering='th'
)(seq2)
# seq2 = Flatten()(seq2)
convLayers.append(seq2)
seq = Concatenate(axis=1)(convLayers)
if config.drop_prob:
seq = Dropout(config.drop_prob)(seq)
for s in config.hidden_sizes:
seq = Dense(s, activation='relu')(seq)
#need reshaping here
seq = Reshape((200,3))(seq)
word_encoder = Bidirectional(GRU(50, return_sequences=True))(seq)
rnn_type = 'GRU'
dense_transform_word = Dense(
100,
activation='relu', kernel_regularizer=l2_reg,
name='dense_transform_word')(word_encoder)
outputs = Attention(name="word_attention")(dense_transform_word)
# word attention
attention_weighted_sentence = Model(
inputs, outputs)
word_attention_model = attention_weighted_sentence
attention_weighted_sentence.summary()
# sentence-attention-weighted document scores
texts_in = Input(shape=(MAX_SEQ_LEN,config.doc_size), dtype='int32', name="input_2")
attention_weighted_sentences = TimeDistributed(outputs)(texts_in)
if rnn_type is 'GRU':
#sentence_encoder = Bidirectional(GRU(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.2))(attention_weighted_sentences)
dropout = Dropout(0.1)(attention_weighted_sentences)
sentence_encoder = Bidirectional(GRU(50, return_sequences=True))(dropout)
else:
sentence_encoder = Bidirectional(LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.2))(attention_weighted_sentences)
dense_transform_sentence = Dense(
100,
activation='relu',
name='dense_transform_sentence',
kernel_regularizer=l2_reg)(sentence_encoder)
# sentence attention
attention_weighted_text = Attention(name="sentence_attention")(dense_transform_sentence)
prediction = Dense(19, activation='sigmoid')(attention_weighted_text)
model = Model([inputs, texts_in], prediction)
model.summary()

Related

How can I create configuration file of bert with cnn for text analysis?

I need to save configuration file of the bert model with CNN. I will this file to visualize bert. so any suggestion to do that?
my model:
class MixModel(nn.Module):
def __init__(self,pre_trained='bert-base-uncased'):
super().__init__()
config = BertConfig.from_pretrained('bert-base-uncased', output_hidden_states=True)
self.bert = BertModel.from_pretrained('bert-base-uncased',config=config)
self.hidden_size = self.bert.config.hidden_size
self.conv = nn.Conv1d(in_channels=3072, out_channels=256, kernel_size=5, stride=1)
self.relu = nn.ReLU()
self.pool = nn.MaxPool1d(kernel_size= 64- 5 + 1)
self.dropout = nn.Dropout(0.3)
self.flat=nn.Flatten()
self.clf1 = nn.Linear(256,256)
self.clf2= nn.Linear(256,6)
def forward(self,inputs, mask , labels):
inputs=torch.tensor(inputs)
mask=torch.tensor(mask)
labels=torch.tensor(labels)
x = self.bert(input_ids=inputs,attention_mask=mask, return_dict= True)
x = self.conv(x)
x = self.relu(x)
x = self.pool(x)
x = self.dropout(x)
x = self.flat(x)
x = self.clf1(x)
x = self.clf2(x)
return x
Note That: save_pretrained() function can't work with fine-tuned bert model with CNN

Fluctuations and overfitting in first epochs

I am training a CNN network on the DVS gesture dataset using PyTorch. However, the training is not progressing in a soft way, the accuracies of both training and validation fluctuate a lot, they are both progressing, but there is a big difference between them (5~6% up to 10%) as if there is overfitting in 3/4 epoch. I have tried L2 regularization as well as a dropout with high values, the difference disappears in the first iterations but reappears strongly afterward, and I am sure that datasets are perfectly merged and split randomly, changed several times the batch size but didn't impact, normalization make it worse.
PS: May this be an underfit, how to identify an underfit ?
Thanks in advance!
CODE (Using snntorch library) :
spike_grad = surrogate.fast_sigmoid(slope=5.4)
beta = 0.72
num_epochs = 200
class Net(nn.Module):
def __init__(self):
super().__init__()
# Initialize layers
self.conv1 = nn.Conv2d(2, 16, kernel_size=5, bias=False)
self.pool1 = nn.AvgPool2d(2)
self.lif1 = snn.Leaky(beta=beta, spike_grad=spike_grad, threshold=2.5)#, threshold_p=2.5, threshold_n=-2.5)
self.conv2 = nn.Conv2d(16, 32, kernel_size=5, bias=False)
self.pool2 = nn.AvgPool2d(2)
self.lif2 = snn.Leaky(beta=beta, spike_grad=spike_grad, threshold=2.5)#, threshold_p=2.5, threshold_n=-2.5)
self.fc1 = nn.Linear(800, 11)
self.drop1 = nn.Dropout(0.93)
self.lif3 = snn.Leaky(beta=beta, spike_grad=spike_grad, threshold=2.5)#, threshold_p=2.5, threshold_n=-2.5)
self.flatten = nn.Flatten()
def forward(self, x):
mem1 = self.lif1.init_leaky()
mem2 = self.lif2.init_leaky()
mem3 = self.lif3.init_leaky()
spk_rec = []
mem_rec = []
for step in range(x.size(1)):
cur1 = self.pool1(self.conv1((x.permute(1,0,2,3,4))[step]))
spk1, mem1 = self.lif1(cur1, mem1)
cur2 = self.pool1(self.conv2(spk1))
spk2, mem2 = self.lif2(cur2, mem2)
cur3 = self.drop1(self.fc1(self.flatten(spk2)))
spk3, mem3 = self.lif3(cur3, mem3)
spk_rec.append(spk3)
mem_rec.append(mem3)
return torch.stack(spk_rec), torch.stack(mem_rec)
net_9 = Net().to(device)
optimizer = torch.optim.Adam(net_9.parameters(), lr=7.5e-3, betas=(0.9, 0.999))#, weight_decay=1e-2)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=735, eta_min=0, last_epoch=-1)
loss = SF.mse_count_loss() # spk mse
train_loss_hist_9 = []
valid_loss_hist_9 = []
train_acc_hist_9 = []
valid_acc_hist_9 = []
path_9 = "1-DVS\net_9_"
for epoch in range(num_epochs):
batch_train = batch_valid = 0
# Minibatch training loop
net_9.train()
for data_train, targets_train in iter(train_loader):
data_train = data_train.to(device)
targets_train = targets_train.to(device)
spk_train, mem_train = net_9.forward(data_train)
loss_train = loss(spk_train, targets_train)
optimizer.zero_grad()
loss_train.backward()
optimizer.step()
scheduler.step()
_, idx = spk_train.sum(dim=0).max(1)
acc_train = np.mean((targets_train == idx).detach().cpu().numpy())
train_acc_hist_9.append(acc_train.item())
train_loss_hist_9.append(loss_train.item())
batch_train += 1
# Minibatch validation loop
net_9.eval()
with torch.no_grad():
for data_valid, targets_valid in iter(valid_loader):
data_valid = data_valid.to(device)
targets_valid = targets_valid.to(device)
spk_valid, mem_valid = net_9.forward(data_valid)
loss_valid = loss(spk_valid, targets_valid)
_, idx = spk_valid.sum(dim=0).max(1)
acc_valid = np.mean((targets_valid == idx).detach().cpu().numpy())
valid_acc_hist_9.append(acc_valid.item())
valid_loss_hist_9.append(loss_valid.item())
batch_valid += 1
scheduler.step(loss_valid)
torch.save({'model_state_dict': net_9.state_dict()}, path_9 + str(epoch))
print("----------------------------------------------------------------------")
print_epoch_accuracy(train_acc_hist_9, valid_acc_hist_9, batch_train, batch_valid)
print("----------------------------------------------------------------------")
print("\n")

Must the input height of a 1D CNN be constant?

I'm currently doing my honours research project on online/dynamic signature verification. I am using the SVC 2004 dataset (Task 2). I have done the following data processing:
def load_dataset_normalized(path):
file_names = os.listdir(path)
num_of_persons = len(file_names)
initial_starting_point = np.zeros(np.shape([7]))
x_dataset = []
y_dataset = []
for infile in file_names:
full_file_name = os.path.join(path, infile)
file = open(full_file_name, "r")
file_lines = file.readlines()
num_of_points = int(file_lines[0])
x = []
y = []
time_stamp = []
button_status = []
azimuth_angles = []
altitude = []
pressure = []
for idx, line in enumerate(file_lines[1:]):
idx+=1
nums = line.split(' ')
if idx == 1:
nums[2] = 0
initial_starting_point = nums
x.append(int(nums[0]))
y.append(int(nums[1]))
time_stamp.append(0)
button_status.append(int(nums[3]))
azimuth_angles.append(int(nums[4]))
altitude.append(int(nums[5]))
pressure.append(int(nums[6]))
else:
x.append(int(nums[0]))
y.append(int(nums[1]))
time_stamp.append(10)
button_status.append(int(nums[3]))
azimuth_angles.append(int(nums[4]))
altitude.append(int(nums[5]))
pressure.append(int(nums[6]))
max_x = max(x)
max_y = max(y)
max_azimuth_angle = max(azimuth_angles)
max_altitude = max(altitude)
max_pressure = max(pressure)
min_x = min(x)
min_y = min(y)
min_azimuth_angle = min(azimuth_angles)
min_altitude = min(altitude)
min_pressure = min(pressure)
#Alignment normalization:
for i in range(num_of_points):
x[i] -= int(initial_starting_point[0])
y[i] -= int(initial_starting_point[1])
azimuth_angles[i] -= int(initial_starting_point[4])
altitude[i] -= int(initial_starting_point[5])
pressure[i] -= int(initial_starting_point[6])
#Size normalization
for i in range(num_of_points):
x[i] = ((x[i] - max_x) / (min_x - max_x))
y[i] = ((y[i] - max_y) / (min_y - max_y))
azimuth_angles[i] = ((azimuth_angles[i] - max_azimuth_angle) / (min_azimuth_angle - max_azimuth_angle))
altitude[i] = ((altitude[i] - max_altitude) / (min_altitude - max_altitude))
pressure[i] = ((pressure[i] - max_pressure) / (min_pressure - max_pressure))
#data points to dataset
x_line = []
for i in range (num_of_points):
x_line.append([x[i], y[i], time_stamp[i], button_status[i], azimuth_angles[i], altitude[i], pressure[i]])
if i == num_of_points-1:
x_dataset.append(x_line)
infile_without_extension = infile.replace('.TXT','')
index_of_s = infile_without_extension.find("S")
index_of_num = index_of_s + 1
sig_ID = int(infile_without_extension[index_of_num:])
if sig_ID < 21:
y_dataset.append([1,0])
else:
y_dataset.append([0,1])
x_dataset = np.asarray(x_dataset)
y_dataset = np.asarray(y_dataset)
return x_dataset, y_dataset
I also have another method that takes the values as they are in the text file and created an "original" dataset.
Now, the aim of my research is to create a CRNN (convolutional recurrent neural network) that can identify if a signature is authentic or forged. Here is the code for the model:
class crnn_model:
def __init__(self, trainX, trainy, testX, testy, optimizer_method):
self.trainX = trainX
self.trainy = trainy
self.testX = testX
self.testy = testy
self.evaluate_model(optimizer_method)
def evaluate_model(self, optimizer_method):
verbose, epochs, batch_size = 0, 40, 10
n_timesteps, n_features, n_outputs = len(self.trainX), 7, 2
print(n_timesteps)
model = keras.Sequential()
model.add(keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps, n_features), use_bias=True))
model.add(keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.MaxPooling1D(pool_size=2))
model.add(keras.layers.Flatten())
model.add(keras.layers.LSTM(2, input_shape=[30592,1], return_sequences=True))
model.summary()
# Compile the model
model.compile(optimizer=optimizer_method, loss='categorical_crossentropy', metrics=['accuracy'])
#fit model
model.fit(self.trainX, self.trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
# evaluate model
_, accuracy = model.evaluate(self.testX, self.testy, batch_size=batch_size, verbose=0)
return accuracy
Here is the problem I am having: the number of points used to store each signature is different, hence making the input height of the input matrix vary from one signature to the next. Must I now force the dataset to some uniform/constant number of points?
Much appreciated for your time.

How to generate sequence correctly with encoder-decoder lstm?

I am implementing some code to generate labeled data for Natural Language Understanding (NLU) from the article "Labeled Data Generation with Encoder-decoder LSTM for Semantic Slot Filling" (https://pdfs.semanticscholar.org/7ffe/83d7dd3a474e15ccc2aef412009f100a5802.pdf). My architecture is a simple encoder-decoder LSTM, but since my generated sentences (for words and labels) are not correct, I am trying to generate exactly the same sentence (only words) I give as input. Unfortunately, this is not working correctly.
I am using a vord2vec for word-embedding and the dimension of the embeddings is set to 64 (as suggested in the article). The encoder LSTM is receiving the sequence in reversed order and with a dropout rate of 0.5. The decoder LSTM also has a dropout rate of 0.5 and a softmax layer for each output of the sequence to map the most probable word. The inputs are exactly the same that the targets (same sentences) since first I want to produce exactly the same sentence.
For training, I used Adam optimizer and categorical_crossentropy for loss. For inference, I used a beam search (B=3) when generating sequences.
My training code:
def pretrained_embedding_layer(emb):
vocab_len = len(emb)
emb_dim = len(emb[0])
emb_layer = Embedding(vocab_len, emb_dim, trainable = False)
emb_layer.build((None,))
emb_layer.set_weights([emb])
return emb_layer
LSTM_encoder = LSTM(1024, dropout=0.5, return_state=True, go_backwards=True, name='lstm_encoder')
LSTM_decoder = LSTM(1024, dropout=0.5, return_sequences=True, return_state=True, name='lstm_decoder')
dense_w = Dense(vocab_w_size, activation='softmax', name="word_output")
K.set_learning_phase(1)
def model1_enc_dec(input_shape, w_emb):
words_indices = Input(shape=input_shape, dtype='int32')
wemb_layer = pretrained_embedding_layer(w_emb)
wemb = wemb_layer(words_indices)
enc_out, enc_state_h, enc_state_c = LSTM_encoder(wemb)
encoder_states = [enc_state_h, enc_state_c]
dec_out, dec_state_h, dec_state_c = LSTM_decoder(wemb,
initial_state=encoder_states)
dec_out = dense_w(dec_out)
model1 = Model(inputs=[words_indices], outputs=[dec_out])
return model1
model = model1_enc_dec((maxlen,), w_emb, s_emb)
model.summary()
model.compile(loss="categorical_crossentropy", optimizer='adam', metrics=['accuracy'])
model.fit(train_w, train_lab_w, validation_data=(val_w, val_lab_w), epochs=epochs, verbose=1, shuffle=True)
My inference code:
wemb_layer = Embedding(len(w_emb), len(w_emb[0]), trainable=False)
wemb_layer.build((None,))
LSTM_encoder = LSTM(1024, return_state=True, go_backwards=True, name='lstm_encoder')
LSTM_decoder = LSTM(1024, return_sequences=True, return_state=True, name='lstm_decoder')
dense_w = Dense(vocab_w_size, activation='softmax', name="word_output")
def target_model(input_shape):
words_indices = Input(shape=input_shape, dtype='int32')
wemb = wemb_layer(words_indices)
enc_out, enc_state_h, enc_state_c = LSTM_encoder(wemb)
encoder_states = [enc_state_h, enc_state_c]
dec_out, dec_state_h, dec_state_c = LSTM_decoder(wemb,
initial_state=encoder_states)
dec_out = dense_w(dec_out)
model = Model(inputs=[words_indices], outputs=[dec_out])
return model
target_model = target_model((maxlen,))
wemb_layer.set_weights(model1.layers[1].get_weights()) # layer 0: input
LSTM_encoder.set_weights(model1.layers[2].get_weights())
LSTM_decoder.set_weights(model1.layers[3].get_weights())
dense_w.set_weights(model1.layers[4].get_weights())
def model1_enco_infe(input_shape):
words_indices = Input(shape=input_shape, dtype='int32')
wemb = wemb_layer(words_indices)
enc_out, enc_state_h, enc_state_c = LSTM_encoder(wemb)
encoder_model = Model(inputs=[words_indices], outputs=[enc_state_h,
enc_state_c])
return encoder_model
def model1_deco_infe(input_shape):
dec_word_input = Input(shape=input_shape, dtype='int32')
dec_state_input_h = Input(shape=(1024,))
dec_state_input_c = Input(shape=(1024,))
wemb = wemb_layer(dec_word_input)
dec_states_input = [dec_state_input_h, dec_state_input_c]
dec_out, dec_state_h, dec_state_c = LSTM_decoder(wemb,
initial_state=dec_states_input)
dec_states_output = [dec_state_h, dec_state_c]
deco_out = dense_w(dec_out)
decoder_model = Model(inputs=[dec_word_input] + dec_states_input, outputs=
[deco_out] + dec_states_output)
return decoder_model
encoder_model = model1_enco_infe((maxlen,))
decoder_model = model1_deco_infe((1,))
def beamsearch_B(deco_w_out, beam):
words_index = []
dw = deco_w_out.copy()
for i in range(beam):
word_index = np.argmax(dw, axis=-1)
dw[0][0][word_index[0][0]] = 0
words_index.append(word_index[0][0])
return words_index
def generate_model1_add(word_seq, encoder_model, decoder_model, dec_word_input, id2word, beam):
[enc_state_h, enc_state_c] = encoder_model.predict(word_seq)
states = [enc_state_h, enc_state_c]
word_sentence = ''
probs_word = []
word_sentences = []
dec_word_inputs = []
states_beam = []
stop_condition = False
[dec_w_out, dec_state_h, dec_state_c] =
decoder_model.predict([dec_word_input] + states)
words_index, _ = beamsearch_B(dec_w_out, [], beam)
for i in range(beam):
probs_word.append(-log(dec_w_out[0][0][words_index[i]]))
word_sentences.append(id2word[words_index[i]])
dec_word_inputs.append([words_index[i]])
states_beam.append([dec_state_h, dec_state_c])
n_words = 1
endgame = []
while not stop_condition:
words_indexes, words_sentences, probs_words, states_b = [], [],
[], []
for k in range(beam):
[dec_w_out, dec_state_h, dec_state_c] =
decoder_model.predict([dec_word_inputs[k]] + states_beam[k])
words_index, _ = beamsearch_B(dec_w_out, [], beam)
states = [dec_state_h, dec_state_c]
for j in range(beam):
words_indexes.append(words_index[j])
probs_words.append(probs_word[k] * -log(dec_w_out[0][0]
[words_index[j]]) + 1e-7)
words_sentences.append(word_sentences[k] + ' ' +
id2word[words_index[j]])
states_b.append(states)
probs = []
for i in range(len(probs_words)):
probs.append(1 / (probs_words[i]))
indexes = []
for i in range(beam):
index = np.argmax(probs, axis=-1)
probs[index] = 0
indexes.append(index)
for i in range(beam):
probs_word[i] = probs_words[indexes[i]]
word_sentences[i] = words_sentences[indexes[i]]
dec_word_inputs[i] = [words_indexes[indexes[i]]]
states_beam[i] = states_b[indexes[i]]
if (id2word[words_indexes[indexes[i]]] == 'EOS'):
endgame.append(i)
if len(endgame) == 1:
word_sentence = word_sentences[endgame]
stop_condition = True
elif len(endgame) > 1:
word_sentence = word_sentences[np.min(endgame)]
stop_condition = True
n_words += 1
if n_words > 50:
word_sentence = word_sentences[0]
stop_condition = True
return word_sentence
word_sentence = generate_model1_add(np.reshape(train_w[i], (1, maxlen)),
encoder_model, 0, decoder_model, [w2i['BOS']], i2w, 3)
An example of my generated sequences:
Input sentence: BOS i 'm fourth in flying from boston to atlanta EOS PAD PAD PAD ...
Generated sentence: BOS from from from from from from from from from from from from from from from from from from from ...
It seems that the training weights are not correct but I got loss: 0.0032 - acc: 0.9990 - val_loss: 0.0794 - val_acc: 0.9888 during training.
What I want is just to generate exactly the same sentence of the input. Hope you can help me guys. Thank you in advance!

ValueError: Index out of range using input dim 2; input has only 2 dims for 'crf_1/strided_slice

I'm trying to implement crf rather softmax after BiLSTM, and I'm using keras_contrib to get crf. I think I make some mistake about dimention of array, but I can't fix it.
Here is code:
# preds = Dense(num_label, activation='softmax')(out)
# preds_binary = Dense(2, activation='softmax')(out)
'''
test 1
'''
preds = kcl.CRF(num_label, sparse_target=True)(out)
preds_binary = kcl.CRF(2, sparse_target=True)(out)
here is error message:
ValueError: Index out of range using input dim 2; input has only 2 dims for 'crf_1/strided_slice' (op: 'StridedSlice') with input shapes: [?,5], [3], [3], [3] and with computed input tensors: input[3] = <1 1 1>.
Anybody here can help me?
#giser_yugang Here's my code:
num_labels = 5
train_array = [X_train, POS1_train, POS2_train]
test_array = [X_test, POS1_test, POS2_test]
train_label = [Y_train, binary_label_train]
test_label = [Y_test, binary_label_test ]
x_test_drug, x_test_med, y_test_drug, y_test_med = pd.splitDrug_Med(id_test, X_test, Y_test, POS1_test, POS2_test,
binary_label_test)
print("\nthe shape of x_test_drug[0]: ", x_test_drug[0].shape, '\n')
print("\nthe shape of x_test_med[0] : ", x_test_med[0].shape, '\n')
print("load word2vec...")
len_dic, embedding_matrix = ld.load_word_matrix(GLOVE_DIR,
MAX_NB_WORDS,
word_index,
EMBEDDING_DIM)
print("create word embedding layer...")
embedding_layer = Embedding(len_dic,
EMBEDDING_DIM,
weights=[embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH,
trainable=True)
print("create position embedding layer...")
position_em_dim = 10
pos_embedding_matrix = np.random.uniform(-0.1, 0.1, size=(400, position_em_dim))
print("the shape of pos_embedding_matrix", pos_embedding_matrix.shape)
pos_embedding_layer = Embedding(400,
position_em_dim,
weights=[pos_embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH,
trainable=True)
print('create model...')
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
word_embedded_sequences = embedding_layer(sequence_input)
pos1_sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
pos1_embedded_sequences = pos_embedding_layer(pos1_sequence_input)
pos2_sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
pos2_embedded_sequences = pos_embedding_layer(pos2_sequence_input)
# embedded_sequences = merge([word_embedded_sequences, pos1_embedded_sequences, pos2_embedded_sequences],
# mode='concat')
embedded_sequences = concatenate([word_embedded_sequences, pos1_embedded_sequences, pos2_embedded_sequences], axis=-1)
'''
#lstm_attention_add_pos_add_last_two_out
'''
embedded_sequences = Dropout(0.3)(embedded_sequences)
lstm_forward, lstm_backword_reverse = Bidirectional(LSTM(hidden_dim, dropout_W=0.3,
dropout_U=0.3,
return_sequences=True,
U_regularizer=regularizers.l2(0.0001)),
merge_mode=None)(embedded_sequences)
# lstm_forward = LSTM(150, dropout_W=0.2, dropout_U=0.2, return_sequences=True)(embedded_sequences)
# lstm_backword = LSTM(150, dropout_W=0.2, dropout_U=0.2, return_sequences=True, go_backwards=True)(embedded_sequences)
flip_layer = Lambda(lambda x: K.reverse(x, 1), output_shape=lambda x: (x[0], x[1], x[2]))
flip_layer.supports_masking = True
lstm_backword = flip_layer(lstm_backword_reverse)
# lstm_sequence = merge([lstm_forward, lstm_backword_reverse], mode='concat', concat_axis=-1)
lstm_sequence = concatenate([lstm_forward, lstm_backword_reverse], axis=-1)
# pos_featrue = merge([pos1_embedded_sequences, pos2_embedded_sequences], mode='concat', concat_axis=-1)
pos_featrue = concatenate([pos1_embedded_sequences, pos2_embedded_sequences], axis=-1)
pos_featrue = TimeDistributed(Dense(20, init='he_normal'))(pos_featrue)
h_feature = TimeDistributed(Dense(hidden_dim * 2))(lstm_sequence)
# att_feature = merge([h_feature, pos_featrue], mode='concat', concat_axis=-1)
att_feature = concatenate([h_feature, pos_featrue], axis=-1)
weights = AttentionWeight2(name='attention')(att_feature)
weights_repeat = RepeatVector(hidden_dim * 2)(weights)
weights_repeat_per = Permute((2, 1))(weights_repeat)
# mul = merge([lstm_sequence, weights_repeat_per], mode='mul')
mul = multiply([lstm_sequence, weights_repeat_per])
sumpool = Lambda(lambda x: K.sum(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
sumpool.supports_masking = True
att_out = sumpool(mul)
lastout = Lambda(slice, output_shape=lambda x: (x[0], x[2]), arguments={'index': -1})
lstm_last_forward = lastout(lstm_forward)
lstm_last_backward = lastout(lstm_backword)
# lstm_last = merge([lstm_last_forward, lstm_last_backward], mode='concat')
lstm_last = concatenate([lstm_last_forward, lstm_last_backward], axis=-1)
att_out = Dense(hidden_dim * 2)(att_out)
lstm_last = Dense(hidden_dim * 2)(lstm_last)
# out = merge([att_out, lstm_last], mode='sum')
out = add([att_out, lstm_last])
out = Dropout(0.5)(out)
out = Activation(activation='tanh')(out)
preds = Dense(num_label, activation='softmax')(out)
preds_binary = Dense(2, activation='softmax')(out)
'''
test 1
'''
preds = kcl.CRF(num_label, sparse_target=True)(out)
preds_binary = kcl.CRF(2, sparse_target=True)(out)
'''
If it's not enough, I'll give you more.

Resources