Must the input height of a 1D CNN be constant? - python-3.x

I'm currently doing my honours research project on online/dynamic signature verification. I am using the SVC 2004 dataset (Task 2). I have done the following data processing:
def load_dataset_normalized(path):
file_names = os.listdir(path)
num_of_persons = len(file_names)
initial_starting_point = np.zeros(np.shape([7]))
x_dataset = []
y_dataset = []
for infile in file_names:
full_file_name = os.path.join(path, infile)
file = open(full_file_name, "r")
file_lines = file.readlines()
num_of_points = int(file_lines[0])
x = []
y = []
time_stamp = []
button_status = []
azimuth_angles = []
altitude = []
pressure = []
for idx, line in enumerate(file_lines[1:]):
idx+=1
nums = line.split(' ')
if idx == 1:
nums[2] = 0
initial_starting_point = nums
x.append(int(nums[0]))
y.append(int(nums[1]))
time_stamp.append(0)
button_status.append(int(nums[3]))
azimuth_angles.append(int(nums[4]))
altitude.append(int(nums[5]))
pressure.append(int(nums[6]))
else:
x.append(int(nums[0]))
y.append(int(nums[1]))
time_stamp.append(10)
button_status.append(int(nums[3]))
azimuth_angles.append(int(nums[4]))
altitude.append(int(nums[5]))
pressure.append(int(nums[6]))
max_x = max(x)
max_y = max(y)
max_azimuth_angle = max(azimuth_angles)
max_altitude = max(altitude)
max_pressure = max(pressure)
min_x = min(x)
min_y = min(y)
min_azimuth_angle = min(azimuth_angles)
min_altitude = min(altitude)
min_pressure = min(pressure)
#Alignment normalization:
for i in range(num_of_points):
x[i] -= int(initial_starting_point[0])
y[i] -= int(initial_starting_point[1])
azimuth_angles[i] -= int(initial_starting_point[4])
altitude[i] -= int(initial_starting_point[5])
pressure[i] -= int(initial_starting_point[6])
#Size normalization
for i in range(num_of_points):
x[i] = ((x[i] - max_x) / (min_x - max_x))
y[i] = ((y[i] - max_y) / (min_y - max_y))
azimuth_angles[i] = ((azimuth_angles[i] - max_azimuth_angle) / (min_azimuth_angle - max_azimuth_angle))
altitude[i] = ((altitude[i] - max_altitude) / (min_altitude - max_altitude))
pressure[i] = ((pressure[i] - max_pressure) / (min_pressure - max_pressure))
#data points to dataset
x_line = []
for i in range (num_of_points):
x_line.append([x[i], y[i], time_stamp[i], button_status[i], azimuth_angles[i], altitude[i], pressure[i]])
if i == num_of_points-1:
x_dataset.append(x_line)
infile_without_extension = infile.replace('.TXT','')
index_of_s = infile_without_extension.find("S")
index_of_num = index_of_s + 1
sig_ID = int(infile_without_extension[index_of_num:])
if sig_ID < 21:
y_dataset.append([1,0])
else:
y_dataset.append([0,1])
x_dataset = np.asarray(x_dataset)
y_dataset = np.asarray(y_dataset)
return x_dataset, y_dataset
I also have another method that takes the values as they are in the text file and created an "original" dataset.
Now, the aim of my research is to create a CRNN (convolutional recurrent neural network) that can identify if a signature is authentic or forged. Here is the code for the model:
class crnn_model:
def __init__(self, trainX, trainy, testX, testy, optimizer_method):
self.trainX = trainX
self.trainy = trainy
self.testX = testX
self.testy = testy
self.evaluate_model(optimizer_method)
def evaluate_model(self, optimizer_method):
verbose, epochs, batch_size = 0, 40, 10
n_timesteps, n_features, n_outputs = len(self.trainX), 7, 2
print(n_timesteps)
model = keras.Sequential()
model.add(keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps, n_features), use_bias=True))
model.add(keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.MaxPooling1D(pool_size=2))
model.add(keras.layers.Flatten())
model.add(keras.layers.LSTM(2, input_shape=[30592,1], return_sequences=True))
model.summary()
# Compile the model
model.compile(optimizer=optimizer_method, loss='categorical_crossentropy', metrics=['accuracy'])
#fit model
model.fit(self.trainX, self.trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
# evaluate model
_, accuracy = model.evaluate(self.testX, self.testy, batch_size=batch_size, verbose=0)
return accuracy
Here is the problem I am having: the number of points used to store each signature is different, hence making the input height of the input matrix vary from one signature to the next. Must I now force the dataset to some uniform/constant number of points?
Much appreciated for your time.

Related

Fluctuations and overfitting in first epochs

I am training a CNN network on the DVS gesture dataset using PyTorch. However, the training is not progressing in a soft way, the accuracies of both training and validation fluctuate a lot, they are both progressing, but there is a big difference between them (5~6% up to 10%) as if there is overfitting in 3/4 epoch. I have tried L2 regularization as well as a dropout with high values, the difference disappears in the first iterations but reappears strongly afterward, and I am sure that datasets are perfectly merged and split randomly, changed several times the batch size but didn't impact, normalization make it worse.
PS: May this be an underfit, how to identify an underfit ?
Thanks in advance!
CODE (Using snntorch library) :
spike_grad = surrogate.fast_sigmoid(slope=5.4)
beta = 0.72
num_epochs = 200
class Net(nn.Module):
def __init__(self):
super().__init__()
# Initialize layers
self.conv1 = nn.Conv2d(2, 16, kernel_size=5, bias=False)
self.pool1 = nn.AvgPool2d(2)
self.lif1 = snn.Leaky(beta=beta, spike_grad=spike_grad, threshold=2.5)#, threshold_p=2.5, threshold_n=-2.5)
self.conv2 = nn.Conv2d(16, 32, kernel_size=5, bias=False)
self.pool2 = nn.AvgPool2d(2)
self.lif2 = snn.Leaky(beta=beta, spike_grad=spike_grad, threshold=2.5)#, threshold_p=2.5, threshold_n=-2.5)
self.fc1 = nn.Linear(800, 11)
self.drop1 = nn.Dropout(0.93)
self.lif3 = snn.Leaky(beta=beta, spike_grad=spike_grad, threshold=2.5)#, threshold_p=2.5, threshold_n=-2.5)
self.flatten = nn.Flatten()
def forward(self, x):
mem1 = self.lif1.init_leaky()
mem2 = self.lif2.init_leaky()
mem3 = self.lif3.init_leaky()
spk_rec = []
mem_rec = []
for step in range(x.size(1)):
cur1 = self.pool1(self.conv1((x.permute(1,0,2,3,4))[step]))
spk1, mem1 = self.lif1(cur1, mem1)
cur2 = self.pool1(self.conv2(spk1))
spk2, mem2 = self.lif2(cur2, mem2)
cur3 = self.drop1(self.fc1(self.flatten(spk2)))
spk3, mem3 = self.lif3(cur3, mem3)
spk_rec.append(spk3)
mem_rec.append(mem3)
return torch.stack(spk_rec), torch.stack(mem_rec)
net_9 = Net().to(device)
optimizer = torch.optim.Adam(net_9.parameters(), lr=7.5e-3, betas=(0.9, 0.999))#, weight_decay=1e-2)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=735, eta_min=0, last_epoch=-1)
loss = SF.mse_count_loss() # spk mse
train_loss_hist_9 = []
valid_loss_hist_9 = []
train_acc_hist_9 = []
valid_acc_hist_9 = []
path_9 = "1-DVS\net_9_"
for epoch in range(num_epochs):
batch_train = batch_valid = 0
# Minibatch training loop
net_9.train()
for data_train, targets_train in iter(train_loader):
data_train = data_train.to(device)
targets_train = targets_train.to(device)
spk_train, mem_train = net_9.forward(data_train)
loss_train = loss(spk_train, targets_train)
optimizer.zero_grad()
loss_train.backward()
optimizer.step()
scheduler.step()
_, idx = spk_train.sum(dim=0).max(1)
acc_train = np.mean((targets_train == idx).detach().cpu().numpy())
train_acc_hist_9.append(acc_train.item())
train_loss_hist_9.append(loss_train.item())
batch_train += 1
# Minibatch validation loop
net_9.eval()
with torch.no_grad():
for data_valid, targets_valid in iter(valid_loader):
data_valid = data_valid.to(device)
targets_valid = targets_valid.to(device)
spk_valid, mem_valid = net_9.forward(data_valid)
loss_valid = loss(spk_valid, targets_valid)
_, idx = spk_valid.sum(dim=0).max(1)
acc_valid = np.mean((targets_valid == idx).detach().cpu().numpy())
valid_acc_hist_9.append(acc_valid.item())
valid_loss_hist_9.append(loss_valid.item())
batch_valid += 1
scheduler.step(loss_valid)
torch.save({'model_state_dict': net_9.state_dict()}, path_9 + str(epoch))
print("----------------------------------------------------------------------")
print_epoch_accuracy(train_acc_hist_9, valid_acc_hist_9, batch_train, batch_valid)
print("----------------------------------------------------------------------")
print("\n")

Multivariate Encoder-Decoder Model approaches value

I am creating my first multivariate multistep encoder-decoder LSTM to forecast revenues.
As you can see, the values move towards a value and then stop at that value. The aim is to create a forecast for a longer period, but there is no deviation at all from this standard value after the first week.
What is wrong and what can I do? To me it doesn't look like it is working at all.
code:
class ModelTrainer:
def __init__(self, prediction_length=30, offset=1):
self.prediction_length = prediction_length
self.offset = offset
self._setup_values()
self.use_scaling = True
self.__prepare_data()
def _setup_values(self):
# Model configuration
self.additional_metrics = ['accuracy']
self.embedding_output_dims = 15
self.max_sequence_length = 300
self.num_distinct_words = 5000
self.verbosity_mode = 1
# DATA
self.WINDOW_LENGTH = 70 # ! SHOULD BE ADJUSTED TO THE AMOUNT OF FORECASTING DAYS
self.SAMPLING_RATE = 1
self.BATCH_SIZE = 128
# MODEL
self.DROPOUT = 0.3
self.NODES_PER_LAYER = 256
self.NUMBER_OF_LAYERS = 3
# TRAINING
self.LEARNING_RATE = 0.001
self.OPTIMIZER = Adam(learning_rate=self.LEARNING_RATE)
self.VALIDATION_SPLIT = 0.20
self.NUMBER_OF_EPOCHS = 10
self.TEST_SIZE = 0.1
self.RANDOM_STATE = 123
self.LOSS_FUNCTION = MeanSquaredError()
def __import_data(self):
self.series = DataOrganizer().df
def __prepare_data(self):
self.__import_data()
self.scaler = preprocessing.MinMaxScaler()
data_scaled = self.scaler.fit_transform(self.series)
self.features, self.target = self._create_feature_target_values_window(
data_scaled)
def _create_feature_target_values_window(self, data):
self.number_of_output_columns = 4
feature_data = data
target_data = data[:, :self.number_of_output_columns]
features, target = list(), list()
in_start = 0
for _ in range(len(data)):
in_end = in_start + self.WINDOW_LENGTH
out_end = in_end + self.prediction_length
if out_end <= len(data):
features.append(feature_data[in_start:in_end, :])
target.append(
target_data[in_end:out_end, 0:self.number_of_output_columns])
in_start += 1
return np.array(features), np.array(target)
def __create_LSTM_model(self):
num_feature_columns = self.features.shape[2]
num_output_columns = self.target.shape[2]
model = Sequential()
model.add(LSTM(self.NODES_PER_LAYER, input_shape=(
self.WINDOW_LENGTH, num_feature_columns)))
model.add(Dropout(self.DROPOUT))
model.add(RepeatVector(self.prediction_length))
model.add(LSTM(self.NODES_PER_LAYER, return_sequences=True))
model.add(Dropout(self.DROPOUT))
model.add(TimeDistributed(Dense(self.NODES_PER_LAYER)))
model.add(Dropout(self.DROPOUT))
model.add(TimeDistributed(Dense(num_output_columns)))
model.summary()
return model
def train_model(self, callbacks=[]):
model = self.__create_LSTM_model()
model.compile(loss=self.LOSS_FUNCTION,
optimizer=self.OPTIMIZER,
metrics=['accuracy', MeanAbsoluteError()]
)
model.fit(
x=self.features,
y=self.target,
epochs=self.NUMBER_OF_EPOCHS,
validation_split=self.TEST_SIZE,
shuffle=False,
callbacks=callbacks
)
self.model = model
def create_forecast(self):
prediction = self.model.predict(self.features[-1:])
# prediction = self.model.predict(self.features[-30:-29]) # Show forecast from a month old
test_X = self.features.copy()
test_X = test_X[:self.prediction_length,
:1, self.number_of_output_columns:]
test_X = test_X.reshape(
self.prediction_length, self.series.shape[1] - self.number_of_output_columns)
prediction = prediction.reshape(self.prediction_length,
self.number_of_output_columns)
inv_yhat = np.concatenate((prediction, test_X), axis=1)
inv_yhat = self.scaler.inverse_transform(inv_yhat)
prediction_df = pd.DataFrame(
inv_yhat, columns=self.scaler.feature_names_in_)
first_date = self.series.last_valid_index() + timedelta(days=1)
last_date = first_date + timedelta(days=self.prediction_length-1)
days = pd.date_range(first_date, last_date, freq='D')
prediction_df.set_index(days, inplace=True)
prediction_df = prediction_df[self.series.columns[0:4]]
Actual
Forecast:
(I know the x-axis description is incorrect. Don't worry about it)

ValueError: Index out of range using input dim 2; input has only 2 dims for 'crf_1/strided_slice

I'm trying to implement crf rather softmax after BiLSTM, and I'm using keras_contrib to get crf. I think I make some mistake about dimention of array, but I can't fix it.
Here is code:
# preds = Dense(num_label, activation='softmax')(out)
# preds_binary = Dense(2, activation='softmax')(out)
'''
test 1
'''
preds = kcl.CRF(num_label, sparse_target=True)(out)
preds_binary = kcl.CRF(2, sparse_target=True)(out)
here is error message:
ValueError: Index out of range using input dim 2; input has only 2 dims for 'crf_1/strided_slice' (op: 'StridedSlice') with input shapes: [?,5], [3], [3], [3] and with computed input tensors: input[3] = <1 1 1>.
Anybody here can help me?
#giser_yugang Here's my code:
num_labels = 5
train_array = [X_train, POS1_train, POS2_train]
test_array = [X_test, POS1_test, POS2_test]
train_label = [Y_train, binary_label_train]
test_label = [Y_test, binary_label_test ]
x_test_drug, x_test_med, y_test_drug, y_test_med = pd.splitDrug_Med(id_test, X_test, Y_test, POS1_test, POS2_test,
binary_label_test)
print("\nthe shape of x_test_drug[0]: ", x_test_drug[0].shape, '\n')
print("\nthe shape of x_test_med[0] : ", x_test_med[0].shape, '\n')
print("load word2vec...")
len_dic, embedding_matrix = ld.load_word_matrix(GLOVE_DIR,
MAX_NB_WORDS,
word_index,
EMBEDDING_DIM)
print("create word embedding layer...")
embedding_layer = Embedding(len_dic,
EMBEDDING_DIM,
weights=[embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH,
trainable=True)
print("create position embedding layer...")
position_em_dim = 10
pos_embedding_matrix = np.random.uniform(-0.1, 0.1, size=(400, position_em_dim))
print("the shape of pos_embedding_matrix", pos_embedding_matrix.shape)
pos_embedding_layer = Embedding(400,
position_em_dim,
weights=[pos_embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH,
trainable=True)
print('create model...')
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
word_embedded_sequences = embedding_layer(sequence_input)
pos1_sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
pos1_embedded_sequences = pos_embedding_layer(pos1_sequence_input)
pos2_sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
pos2_embedded_sequences = pos_embedding_layer(pos2_sequence_input)
# embedded_sequences = merge([word_embedded_sequences, pos1_embedded_sequences, pos2_embedded_sequences],
# mode='concat')
embedded_sequences = concatenate([word_embedded_sequences, pos1_embedded_sequences, pos2_embedded_sequences], axis=-1)
'''
#lstm_attention_add_pos_add_last_two_out
'''
embedded_sequences = Dropout(0.3)(embedded_sequences)
lstm_forward, lstm_backword_reverse = Bidirectional(LSTM(hidden_dim, dropout_W=0.3,
dropout_U=0.3,
return_sequences=True,
U_regularizer=regularizers.l2(0.0001)),
merge_mode=None)(embedded_sequences)
# lstm_forward = LSTM(150, dropout_W=0.2, dropout_U=0.2, return_sequences=True)(embedded_sequences)
# lstm_backword = LSTM(150, dropout_W=0.2, dropout_U=0.2, return_sequences=True, go_backwards=True)(embedded_sequences)
flip_layer = Lambda(lambda x: K.reverse(x, 1), output_shape=lambda x: (x[0], x[1], x[2]))
flip_layer.supports_masking = True
lstm_backword = flip_layer(lstm_backword_reverse)
# lstm_sequence = merge([lstm_forward, lstm_backword_reverse], mode='concat', concat_axis=-1)
lstm_sequence = concatenate([lstm_forward, lstm_backword_reverse], axis=-1)
# pos_featrue = merge([pos1_embedded_sequences, pos2_embedded_sequences], mode='concat', concat_axis=-1)
pos_featrue = concatenate([pos1_embedded_sequences, pos2_embedded_sequences], axis=-1)
pos_featrue = TimeDistributed(Dense(20, init='he_normal'))(pos_featrue)
h_feature = TimeDistributed(Dense(hidden_dim * 2))(lstm_sequence)
# att_feature = merge([h_feature, pos_featrue], mode='concat', concat_axis=-1)
att_feature = concatenate([h_feature, pos_featrue], axis=-1)
weights = AttentionWeight2(name='attention')(att_feature)
weights_repeat = RepeatVector(hidden_dim * 2)(weights)
weights_repeat_per = Permute((2, 1))(weights_repeat)
# mul = merge([lstm_sequence, weights_repeat_per], mode='mul')
mul = multiply([lstm_sequence, weights_repeat_per])
sumpool = Lambda(lambda x: K.sum(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
sumpool.supports_masking = True
att_out = sumpool(mul)
lastout = Lambda(slice, output_shape=lambda x: (x[0], x[2]), arguments={'index': -1})
lstm_last_forward = lastout(lstm_forward)
lstm_last_backward = lastout(lstm_backword)
# lstm_last = merge([lstm_last_forward, lstm_last_backward], mode='concat')
lstm_last = concatenate([lstm_last_forward, lstm_last_backward], axis=-1)
att_out = Dense(hidden_dim * 2)(att_out)
lstm_last = Dense(hidden_dim * 2)(lstm_last)
# out = merge([att_out, lstm_last], mode='sum')
out = add([att_out, lstm_last])
out = Dropout(0.5)(out)
out = Activation(activation='tanh')(out)
preds = Dense(num_label, activation='softmax')(out)
preds_binary = Dense(2, activation='softmax')(out)
'''
test 1
'''
preds = kcl.CRF(num_label, sparse_target=True)(out)
preds_binary = kcl.CRF(2, sparse_target=True)(out)
'''
If it's not enough, I'll give you more.

Tensorflow : ValueError: Can't load save_path when it is None

import os
import tarfile
from six.moves import urllib
URL = 'http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz'
PATH = 'aclImdb'
def fetch_data(url = URL, path = PATH):
if not os.path.isdir(path):
os.makedirs(path)
file_path = os.path.join(oath, "aclImdb_v1.tar.gz")
urllib.request.urlretrieve(url, file_path)
file_gz = tarfile.open(file_path)
file_gz.extractall(path = path)
file_gz.close()
import pyprind # for progress visualisation
import pandas as pd
PATH = 'aclImdb'
labels = {'pos': 1, 'neg': 0} # int class labels for 'positive' and 'negative'
pbar = pyprind.ProgBar(50000) # initialise a progress bar with 50k iterations = no. of docs
df = pd.DataFrame()
# use nested for loops to iterate over 'train' & 'test' subdir
for s in ('test', 'train'):
for l in ('pos', 'neg'): # and read text files from 'pos' and 'neg' subdir
path = os.path.join(PATH, s, l)
for file in os.listdir(path):
# append to the df pandas DataFrame with an int class (post = 1, neg = 0)
with open(os.path.join(path, file), 'r', encoding = 'utf-8') as infile:
txt = infile.read()
df = df.append([[txt, labels[l]]], ignore_index = True)
pbar.update()
df.columns = ['review', 'sentiment']
import numpy as np
np. random.seed(0)
df = df.reindex(np.random.permutation(df.index))
df.to_csv('movie_data.csv', index = False, encoding = 'utf-8')
n_words = max(list(word_to_int.values())) + 1
df = pd.read_csv('movie_data.csv', encoding = 'utf-8')
df.head(3)
# Separate words and count each word's occurence
import pyprind # for progress visualisation
from collections import Counter
from string import punctuation
import re
counts = Counter() # collects the counts of occurence of each unique word
pbar = pyprind.ProgBar(len(df['review']),
title = 'Counting word occurences...') # progress bar
for i, review in enumerate(df['review']):
text = ''.join([c if c not in punctuation else ' '+c+' '
for c in review]).lower()
df.loc[i, 'review'] = text
pbar.update()
counts.update(text.split())
# Mapping each unique word to an int
word_counts = sorted(counts, key = counts.get, reverse = True)
print(word_counts[:5])
word_to_int = {word: ii for ii, word in enumerate(word_counts, 1)}
mapped_reviews = []
pbar = pyprind.ProgBar(len(df['review']),
title = 'Map movie reviews to integers...')
# Left-pad with zeros if the sequence length < 200
# Use 200 elements if the length > 200
sequence_length = 200
sequences = np.zeros((len(mapped_reviews), sequence_length), dtype = int)
for i, row in enumerate(mapped_reviews):
review_arr = np.array(row)
sequences[i, -len(row):] = review_arr[-sequence_length:]
# Split the dataset into training and test sets
X_train = sequences[:25000, :]
y_train = df.loc[:25000, 'sentiment'].values
X_test = sequences[25000:, :]
y_test = df.loc[25000:, 'sentiment'].values
# Define the mini-batches generator
np.random.seed(123)
def batch_gen(x, y = None, batch_size = 64):
n_batches = len(x) // batch_size
x = x[:n_batches * batch_size]
if y is not None:
y = y[:n_batches * batch_size]
for ii in range(0, len(x), batch_size):
if y is not None:
yield x[ii : ii + batch_size], y[ii : ii + batch_size]
else:
yield x[ii : ii + batch_size]
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' ## suppress the 3.5 warning if using TF 1.4
class SentimentRNN(object):
# Define __init__
def __init__(self,
n_words,
seq_len = 200,
lstm_size = 256,
num_layers = 1,
batch_size = 64,
learning_rate = 0.0001,
embed_size = 200):
self.n_words = n_words
self.seq_len = seq_len
self.lstm_size = lstm_size # no. of hidden units
self.num_layers = num_layers
self.batch_size = batch_size
self.learning_rate = learning_rate
self.embed_size = embed_size
self.g = tf.Graph()
with self.g.as_default():
tf.set_random_seed(123)
self.build()
self.saver = tf.train.Saver()
self.init_op = tf.global_variables_initializer()
# Define the build method
def build(self):
# Define the placeholders
tf_x = tf.placeholder(tf.int32,
shape = (self.batch_size, self.seq_len),
name = 'tf_x')
tf_y = tf.placeholder(tf.float32,
shape = (self.batch_size),
name = 'tf_y')
tf_keepprob = tf.placeholder(tf.float32,
name = 'tf_keepprob')
# Create the embedding layer
embedding = tf.Variable(
tf.random_uniform(
shape = (self.n_words, self.embed_size),
minval = -1,
maxval = 1),
name = 'embedding')
embed_x = tf.nn.embedding_lookup(embedding,
tf_x,
name = 'embed_x')
# Define LSTM cells and stack them
cells = tf.contrib.rnn.MultiRNNCell(
[tf.contrib.rnn.DropoutWrapper(
tf.contrib.rnn.BasicLSTMCell(num_units = self.lstm_size),
output_keep_prob = tf_keepprob)
for i in range(self.num_layers)])
# Define the initial state:
self.initial_state = cells.zero_state(
self.batch_size, tf.float32)
print(' << initial state >> ', self.initial_state)
# Put together components with tf.nn.dynamic_rnn
lstm_outputs, self.final_state = tf.nn.dynamic_rnn(
cell = cells,
inputs = embed_x,
initial_state = self.initial_state)
## lstm_outputs shape: [batch_size, max_time, cells.output_size]
print('\n << lstm_output >> ', lstm_outputs)
print('\n << final state >> ', self.final_state)
# Apply a full-connected layer on the RNN output
logits = tf.layers.dense(
inputs = lstm_outputs[:, -1],
units = 1, # dimensionality of the output space
activation = None,
name = 'logits')
# Remove dimensions of size 1 from the tensor shape
logits = tf.squeeze(input = logits,
name = 'logits_squeezed')
print ('\n << logits >> ', logits)
# If you want prob's
y_proba = tf.nn.sigmoid(logits, name = 'probabilities')
predictions = {'probabilities' : y_proba,
'labels' : tf.cast(tf.round(y_proba),
tf.int32,
name = 'labels')}
print('\n << predictions >> ', predictions)
# Define the cost function
cost = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(
labels = tf_y,
logits = logits),
name = 'cost')
# Define the optimiser
optimizer = tf.train.AdamOptimizer(self.learning_rate)
train_op = optimizer.minimize(cost, name = 'train_op')
# Define the train method
def train(self, X_train, y_train, num_epochs):
with tf.Session(graph = self.g) as sess:
sess.run(self.init_op)
iteration = 1
for epoch in range(num_epochs):
state = sess.run(self.initial_state)
for batch_x, batch_y in batch_gen(
X_train,
y_train,
batch_size = self.batch_size):
feed = {'tf_x:0' : batch_x,
'tf_y:0' : batch_y,
'tf_keepprob:0' : 0.5,
self.initial_state : state}
loss, _, state = sess.run(
['cost:0',
'train_op',
self.final_state],
feed_dict=feed)
if iteration % 20 == 0:
print("Epoch: %d/%d Iteration: %d "
"| Train loss: %.5f" % (
epoch + 1,
num_epochs,
iteration,
loss))
iteration += 1
if (epoch + 1) % 10 == 0:
self.saver.save(
sess,
"model/sentiment-%d.ckpt" % epoch)
# Define the predict method
def predict(self, X_data, return_proba=False):
preds = []
with tf.Session(graph = self.g) as sess:
self.saver.restore(
sess,
tf.train.latest_checkpoint('model/'))
test_state = sess.run(self.initial_state)
for ii, batch_x in enumerate(batch_gen(
x = X_data,
y = None,
batch_size = self.batch_size), 1):
feed = {'tf_x:0' : batch_x,
'tf_keepprob:0' : 1.0,
self.initial_state : test_state}
if return_proba:
pred, test_state = sess.run(
['probabilities:0', self.final_state],
feed_dict=feed)
else:
pred, test_state = sess.run(
['labels:0', self.final_state],
feed_dict=feed)
preds.append(pred)
return np.concatenate(preds)
for review in df['review']:
mapped_reviews.append([word_to_int[word] for word in review.split()])
pbar.update()
rnn = SentimentRNN(n_words = n_words,
seq_len = sequence_length,
embed_size = 256,
lstm_size = 128,
num_layers = 1,
batch_size = 100,
learning_rate = 0.001)
preds = rnn.predict(X_test)
y_true = y_test\[:len(preds)\]
print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))][1]
Create an object of the SentimentRNN class with the following parameters:
n_words = n_words, seq_len = sequence_length, embed_size = 256, lstm_size = 128, num_layers = 1, batch_size = 100, learning_rate = 0.001.
Since we have a relatively small dataset, the number of layers = 1 may generalise better
enter image description here
ValueError Traceback (most recent call last)
<ipython-input-23-a3cfe03a9a49> in <module>()
----> 1 preds = rnn.predict(X_test)
2 y_true = y_test[:len(preds)]
3 print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))
<ipython-input-12-d83ee67c43b6> in predict(self, X_data, return_proba)
173 self.saver.restore(
174 sess,
--> 175 tf.train.latest_checkpoint('model/'))
176 test_state = sess.run(self.initial_state)
177
/usr/local/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py in restore(self, sess, save_path)
1680 return
1681 if save_path is None:
-> 1682 raise ValueError("Can't load save_path when it is None.")
1683 logging.info("Restoring parameters from %s", save_path)
1684 if context.in_graph_mode():
ValueError: Can't load save_path when it is None.
The error just means tf.train.latest_checkpoint didn't find anything. It returns None, then the Saver complains because it was passed None. So there's no checkpoint in that directory.

How do I predict the sentiment in the LSTM model using python3 in tensorflow?

wordsList = np.load('training_data/wordsList.npy')
wordsList = wordsList.tolist() #Originally loaded as numpy array
wordsList = [word.decode('UTF-8') for word in wordsList] #Encode words as UTF-8
wordVectors = np.load('training_data/wordVectors.npy')
loaded some positiveFiles and negativeFiles in the variable
with tf.device('/gpu:0'):
ids = np.zeros((numFiles, maxSeqLength), dtype='int32')
fileCounter = 0
for pf in positiveFiles:
with open(pf, "r") as f:
indexCounter = 0
line=f.readline()
cleanedLine = cleanSentences(line)
split = cleanedLine.split()
for word in split:
try:
ids[fileCounter][indexCounter] = wordsList.index(word)
except ValueError:
ids[fileCounter][indexCounter] = 399999 #Vector for unkown words
#print('value :' + str(ids))
indexCounter = indexCounter + 1
if indexCounter >= maxSeqLength:
break
fileCounter = fileCounter + 1
for nf in negativeFiles:
with open(nf, "r") as f:
indexCounter = 0
line=f.readline()
cleanedLine = cleanSentences(line)
split = cleanedLine.split()
for word in split:
try:
ids[fileCounter][indexCounter] = wordsList.index(word)
except ValueError:
ids[fileCounter][indexCounter] = 399999 #Vector for unkown words
# print('value :' + str(ids))
indexCounter = indexCounter + 1
if indexCounter >= maxSeqLength:
break
fileCounter = fileCounter + 1
#Pass into embedding function and see if it evaluates.
np.save('idsMatrix', ids)
batchSize = 24
Training and testing methods
def getTrainBatch():
labels = []
arr = np.zeros([batchSize, maxSeqLength])
for i in range(batchSize):
if (i % 2 == 0):
num = randint(1,11499)
labels.append([1,0])
else:
num = randint(13499,24999)
labels.append([0,1])
arr[i] = ids[num-1:num]
return arr, labels
def getTestBatch():
labels = []
arr = np.zeros([batchSize, maxSeqLength])
for i in range(batchSize):
num = randint(11499,13499)
if (num <= 12499):
labels.append([1,0])
else:
labels.append([0,1])
arr[i] = ids[num-1:num]
return arr, labels
with tf.device('/gpu:0'):
batchSize = 24
lstmUnits = 64
numClasses = 2
iterations = 100000
tf.reset_default_graph()
labels = tf.placeholder(tf.float32, [batchSize, numClasses])
input_data = tf.placeholder(tf.int32, [batchSize, maxSeqLength])
data = tf.Variable(tf.zeros([batchSize, maxSeqLength, numDimensions]), dtype=tf.float32)
data = tf.nn.embedding_lookup(wordVectors, input_data)
lstmCell = tf.contrib.rnn.BasicLSTMCell(lstmUnits)
lstmCell = tf.contrib.rnn.DropoutWrapper(cell=lstmCell, output_keep_prob=0.75)
value, _ = tf.nn.dynamic_rnn(lstmCell, data, dtype=tf.float32)
with tf.device('/gpu:0'):
weight = tf.Variable(tf.truncated_normal([lstmUnits, numClasses]))
bias = tf.Variable(tf.constant(0.1, shape=[numClasses]))
value = tf.transpose(value, [1, 0, 2])
last = tf.gather(value, int(value.get_shape()[0]) - 1)
prediction = (tf.matmul(last, weight) + bias)
correctPred = tf.equal(tf.argmax(prediction,1), tf.argmax(labels,1))
accuracy = tf.reduce_mean(tf.cast(correctPred, tf.float32))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=labels))
optimizer = tf.train.AdamOptimizer().minimize(loss)
sess = tf.InteractiveSession()
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
with tf.device('/gpu:0'):
for i in range(iterations):
nextBatch, nextBatchLabels = getTrainBatch();
sess.run(optimizer, {input_data: nextBatch, labels: nextBatchLabels})
iterations = 10
for i in range(iterations):
nextBatch, nextBatchLabels = getTestBatch();
sess.run(accuracy, {input_data: nextBatch, labels: nextBatchLabels})
Here I am trying to predict the output in the form of 1 or 0 for a given sentence.
after loading this file from the checkpoint by this..How am I suppose to test the sentence is Positive(1) or Negative(0).
new_saver = tf.train.import_meta_graph('models/pretrained....')
new_saver.restore(sess, tf.train.latest_checkpoint('models/./'))
Please help.
Use naming for inputs and output, then retrieve the tensor from graph to do prediction; I have suggested few required changes and additional code to get prediction going
...
input_data = tf.placeholder(tf.int32, [batchSize, maxSeqLength], name='inputs')
...
prediction = (tf.matmul(last, weight) + bias)
# you may use softmax if you want probabilities for prediction, but not for calculating the loss
# prediction = tf.nn.softmax(prediction)
prediction = tf.identity(prediction, name='prediction')
...
with tf.device('/gpu:0'):
for i in range(iterations):
nextBatch, nextBatchLabels = getTrainBatch();
sess.run(optimizer, {input_data: nextBatch, labels: nextBatchLabels}
saver.save(sess, 'model')
code for restoring: here use the relative/absolute path to model.meta and model
new_saver = tf.train.import_meta_graph('/path/to/model.meta')
new_saver.restore(sess, '/path/to/model')
with tf.Session() as sess:
g = tf.get_default_graph()
inputs = g.get_tensor_by_name('inputs:0')
prediction = g.get_tensor_by_name('prediction:0')
prediction_ = sess.run(prediction, {inputs: your_inputs})

Resources