I am training a neural networks with three different output prediction. For computing the loss of one output I need one of the input that is passed into the network. I am not able to access it as the training data is feed into the network by a keras data generator object. Is there any workaround for this problem.
This is the Generator class that feds data into the model
class DataGenerator(tf.keras.utils.Sequence):
def __init__(self,list_ID,centers,sizes,batch_size=2,dims=(512,512),n_channels=3,n_classes=10,shuffle=True) -> None:
assert len(list_ID) == len(centers)
self.dims = dims
self.batch_size = batch_size
self.list_ID = list_ID
self.centers = centers
self.n_channels = n_channels
self.n_classes = n_classes
self.shuffle = shuffle
self.sizes = sizes
self.on_epoch_end()
self.mask = None
def __len__(self):
return int(np.floor(len(self.list_ID) / self.batch_size))
def on_epoch_end(self):
self.indexes = np.arange(len(self.list_ID))
if self.shuffle:
np.random.shuffle(self.indexes)
def __getitem__(self, index):
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
list_ID_temp = [self.list_ID[k] for k in indexes]
centers_temp = [self.centers[k] for k in indexes]
sizes_temp = [self.sizes[k] for k in indexes]
X, y = self.__datageneration(list_ID_temp, centers_temp,sizes_temp)
return X, y
def __datageneration(self, list_ID_temp,centers_temp,sizes_temp):
X = np.empty((self.batch_size,*self.dims,self.n_channels))
Y_center = np.empty((self.batch_size,128,128,1))
Y_dimension = np.empty((self.batch_size,128,128,2))
Y_offset = np.empty((self.batch_size,128,128,2))
self.mask = np.empty((self.batch_size,128,128,1))
for i,ID in enumerate(list_ID_temp):
image = cv2.imread(path+'/'+ID) / 255.0
heat_center, self.mask[i,] = gaussian_2d(centers_temp[i],image.shape)
'''Here I tried to save mask which is what I need,
as an attribute to data generator but when accessed by loss function
the value is just None which is what I initialized it as in init method'''
heat_size,heat_off = size_off_heatmap(sizes_temp[i], centers_temp[i],image.shape)
image = cv2.resize(image,(512,512))
X[i,] = image
Y_center[i,] = heat_center
Y_dimension[i,] = heat_size
Y_offset[i,] = heat_off
return (X,{'center_output':Y_center,'size_output':Y_dimension,'offset_output':Y_offset})
This is the generator class I implemented and I needed the mask , which I tried to write as an attribute of data generator object(I have commented the code. For reference I will also include the function that will return the mask and the error function that requires the mask.
Function returning mask
def gaussian_2d(centers, img_shape):
heatmap = []
y_index = np.tile(np.arange(128), (128, 1))
mask = np.zeros((128,128,1))
width = img_shape[1]
height = img_shape[0]
for x_o, y_o in centers:
x = int(x_o / width * 128)
y = int(y_o / height * 128)
mask[y,x] = 1
gauss = np.exp(-((y_index.T - y) ** 2 + (y_index - x) ** 2) / 2 * 0.2 ** 2)
heatmap.append(gauss)
if len(heatmap) > 1:
heatmap = np.stack(heatmap)
heatmap = np.max(heatmap, axis=0)
else:
heatmap = np.array(heatmap)
heatmap = heatmap.reshape((128, 128,1))
return heatmap,mask
Loss function
def final_loss(mask):
def l1_loss(y_true, y_pred):
y_true = tf.cast(y_true, tf.float32)
y_pred = tf.cast(y_pred, tf.float32)
n = tf.reduce_sum(tf.cast(tf.equal(mask, 1.0),dtype=tf.float32))
tot_loss = tf.reduce_sum(tf.abs(y_pred - y_true))
if tf.greater(n,0):
loss = tot_loss / (n)
else:
loss = tot_loss
return loss
return l1_loss
The error show is as below
Epoch 1/10
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-27-74a28b075f52> in <module>()
----> 1 model.fit(gen,epochs=10,verbose=1,callbacks=Callback(patience=4))
9 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
975 except Exception as e: # pylint:disable=broad-except
976 if hasattr(e, "ag_error_metadata"):
--> 977 raise e.ag_error_metadata.to_exception(e)
978 else:
979 raise
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self, iterator)
<ipython-input-24-c45fe131feb7>:5 l1_loss *
n = tf.reduce_sum(tf.cast(tf.equal(mask, 1.0),dtype=tf.float32))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper **
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:1679 equal
return gen_math_ops.equal(x, y, name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_math_ops.py:3179 equal
name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py:540 _apply_op_helper
(input_name, err))
ValueError: Tried to convert 'x' to a tensor and failed. Error: None values not supported.
'''
Related
I have found the code below that defines supervised contrastive loss for classification task.
class SupConLoss(nn.Module):
def __init__(self, temperature=0.07, contrast_mode='all',
base_temperature=0.07):
super(SupConLoss, self).__init__()
self.temperature = temperature
self.contrast_mode = contrast_mode
self.base_temperature = base_temperature
def forward(self, features, labels=None, mask=None):
"""Args:
features: hidden vector of shape [bsz, n_views, ...].
labels: ground truth of shape [bsz].
mask: contrastive mask of shape [bsz, bsz], mask_{i,j}=1 if sample j
has the same class as sample i. Can be asymmetric.
Returns:
A loss scalar.
"""
device = (torch.device('cuda')
if features.is_cuda
else torch.device('cpu'))
if len(features.shape) < 3:
raise ValueError('`features` needs to be [bsz, n_views, ...],'
'at least 3 dimensions are required')
if len(features.shape) > 3:
features = features.view(features.shape[0], features.shape[1], -1)
batch_size = features.shape[0]
if labels is not None and mask is not None:
raise ValueError('Cannot define both `labels` and `mask`')
elif labels is None and mask is None:
mask = torch.eye(batch_size, dtype=torch.float32).to(device)
elif labels is not None:
labels = labels.contiguous().view(-1, 1)
if labels.shape[0] != batch_size:
raise ValueError('Num of labels does not match num of features')
mask = torch.eq(labels, labels.T).float().to(device)
else:
mask = mask.float().to(device)
contrast_count = features.shape[1]
contrast_feature = torch.cat(torch.unbind(features, dim=1), dim=0)
if self.contrast_mode == 'one':
anchor_feature = features[:, 0]
anchor_count = 1
elif self.contrast_mode == 'all':
anchor_feature = contrast_feature
anchor_count = contrast_count
else:
raise ValueError('Unknown mode: {}'.format(self.contrast_mode))
# compute logits
anchor_dot_contrast = torch.div(
torch.matmul(anchor_feature, contrast_feature.T),
self.temperature)
# for numerical stability
logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True)
logits = anchor_dot_contrast - logits_max.detach()
# tile mask
mask = mask.repeat(anchor_count, contrast_count)
# mask-out self-contrast cases
logits_mask = torch.scatter(
torch.ones_like(mask),
1,
torch.arange(batch_size * anchor_count).view(-1, 1).to(device),
0
)
mask = mask * logits_mask
# compute log_prob
exp_logits = torch.exp(logits) * logits_mask
log_prob = logits - torch.log(exp_logits.sum(1, keepdim=True))
# compute mean of log-likelihood over positive
mean_log_prob_pos = (mask * log_prob).sum(1) / mask.sum(1)
# loss
loss = - (self.temperature / self.base_temperature) * mean_log_prob_pos
loss = loss.view(anchor_count, batch_size).mean()
return loss
My question is how I can use this loss for a semantic segmentation task on a pixel-wise level, where the input of the model is of size (batch, channels, height, width) and the labels are masks of size (batch, height, width).
I have a custom dataset loader for my dataset. I want to split the dataset into 70% train data, 20% validation data, and 10% test data. I have 16,488 data. So, my train data is supposed to be 11,542. But it's becoming 770 train data, 220 validation data, and 110 test data. I've tried but couldn't figure out the problem.
class Dataset(Dataset):
def __init__(self, directory, transform, preload=False, device: torch.device = torch.device('cpu'), **kwargs):
self.device = device
self.directory = directory
self.transform = transform
self.labels = []
self.images = []
self.preload = preload
for i, file in enumerate(os.listdir(self.directory)):
file_labels = parse('{}_{}_{age}_{gender}.jpg', file)
if file_labels is None:
continue
if self.preload:
image = Image.open(os.path.join(self.directory, file)).convert('RGB')
if self.transform is not None:
image = self.transform(image).to(self.device)
else:
image = os.path.join(self.directory, file)
self.images.append(image)
gender_to_class_id = {
'm': 0,
'f': 1
}
gender = gender_to_class_id[file_labels['gender']]
age = int(file_labels['age'])
self.labels.append({
'age': age,
'gender': gender
})
pass
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
image = self.images[idx]
if not self.preload:
image = Image.open(image).convert('RGB')
if self.transform is not None:
image = self.transform(image).to(self.device)
labels = {
'age': self.labels[idx]['age'],
'gender': self.labels[idx]['gender'],
}
return image.to(self.device), labels
def get_loaders(self, transform, train_size=0.7, validate_size=0.2, test_size=0.1, batch_size=15, **kwargs):
if round(train_size + validate_size + test_size, 1) > 1.0:
sys.exit("Sum of the percentages should be less than 1. it's " + str(
train_size + validate_size + test_size) + " now!")
train_len = int(len(self) * train_size)
validate_len = int(len(self) * validate_size)
test_len = int(len(self) * test_size)
others_len = len(self) - train_len - validate_len - test_len
self.trainDataset, self.validateDataset, self.testDataset, _ = torch.utils.data.random_split(
self, [train_len, validate_len, test_len, others_len]
)
train_loader = DataLoader(self.trainDataset, batch_size=batch_size)
validate_loader = DataLoader(self.validateDataset, batch_size=batch_size)
test_loader = DataLoader(self.testDataset, batch_size=batch_size)
return train_loader, validate_loader, test_loader
It seems that you are giving
batch_size=15
As a dataloader is iterable, it maybe simply giving you the len() of the 1st batch.
It also explains why you are getting train data = 770, where it is supposed to be 11,542. Because,
16488 / 15 * 0.7 = 769.44 ≈ 770
Assigning batch_size = 1 should do the trick.
16488 / 1 * 0.7 = 11541.6 ≈ 11542
I have encountered the following error. This is a different rnn structure. I have implemented to use in graph convolution. The problem is that the hidden is updated in-place operation. However, I have to update its value in each forward call. How can I do that? Thanks in advance.
RuntimeError Traceback (most recent call last)
<ipython-input-110-b4425651d544> in <module>()
8 out = model(x[i])
9 loss = mael(out, x[i+1])
---> 10 loss.backward(retain_graph=True)
11 optimizer.step()
12 print(loss.item())
1 frames
/usr/local/lib/python3.6/dist-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
130 Variable._execution_engine.run_backward(
131 tensors, grad_tensors_, retain_graph, create_graph,
--> 132 allow_unreachable=True) # allow_unreachable flag
133
134
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [1, 100]], which is output 0 of SelectBackward, is at version 1; expected version 0 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
This is a different rnn structure. I have implemented to use in graph convolution. The problem is that the hidden is updated in-place operation. However, I have to update its value in each forward call. How can I do that? Thanks in advance.
class RNN(nn.Module):
def __init__(self, input_dim, hidden_dim):
super(RNN,self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.weight = Parameter(torch.rand(10,input_dim,hidden_dim, requires_grad=True))
self.weight_h = Parameter(torch.rand(10,input_dim,hidden_dim, requires_grad=True))
self.bias = Parameter(torch.rand(10,input_dim,hidden_dim, requires_grad=True))
self.hidden = torch.rand(10,input_dim, hidden_dim)
self.weight_2 = Parameter(torch.rand(10,input_dim,hidden_dim,requires_grad=True))
self.weight_h_2 = Parameter(torch.rand(10,hidden_dim,hidden_dim, requires_grad=True))
self.bias_2 = Parameter(torch.rand(10,input_dim,hidden_dim, requires_grad=True))
self.tanh = Tanh()
self.iteration = 0
self.next_layer = False
self.hidden_init = torch.rand(1,1)
def set_hidden(self,x):
y = self.tanh(mm(x, self.weight[self.iteration]) + mm(self.hidden_init, self.weight_h[self.iteration]) + self.bias[self.iteration])
return y
def set_hidden_state_layer_2(self, x, hidden):
y = self.tanh(mm(x, self.weight_2[self.iteration]) + mm(hidden, self.weight_h_2[self.iteration]) + self.bias_2[self.iteration])
return y
def forward(self, x):
try:
dim_1, dim_2, dim_3 = x.shape
except:
x = torch.unsqueeze(x,0)
if self.iteration == 10:
self.next_layer = True
self.iteration = 0
if self.next_layer:
self.hidden[self.iteration] = self.set_hidden_state_layer_2(x, self.hidden[self.iteration].clone())
self.iteration = self.iteration + 1
return self.hidden[self.iteration - 1]
else:
hidden_init = torch.rand(1,1)
self.hidden[self.iteration] = self.tanh(mm(x, self.weight[self.iteration]) + mm(self.hidden_init, self.weight_h[self.iteration]) + self.bias[self.iteration])
self.iteration = self.iteration + 1
return self.hidden[self.iteration - 1]
model = RNN(1,100)
mael = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
x = torch.rand(11,1)
x_2 = torch.rand(11,1)
for i in range(10):
optimizer.zero_grad()
out = model(x[i])
loss = mael(out, x[i+1])
loss.backward(retain_graph=True)
optimizer.step()
print(loss.item())
class loss(Function):
#staticmethod
def forward(ctx,x,INPUT):
batch_size = x.shape[0]
X = x.detach().numpy()
input = INPUT.detach().numpy()
Loss = 0
for i in range(batch_size):
t_R_r = input[i,0:4]
R_r = t_R_r[np.newaxis,:]
t_R_i = input[i,4:8]
R_i = t_R_i[np.newaxis,:]
t_H_r = input[i,8:12]
H_r = t_H_r[np.newaxis,:]
t_H_i = input[i,12:16]
H_i = t_H_i[np.newaxis,:]
t_T_r = input[i, 16:32]
T_r = t_T_r.reshape(4,4)
t_T_i = input[i, 32:48]
T_i = t_T_i.reshape(4,4)
R = np.concatenate((R_r, R_i), axis=1)
H = np.concatenate((H_r, H_i), axis=1)
temp_t1 = np.concatenate((T_r,T_i),axis=1)
temp_t2 = np.concatenate((-T_i,T_r),axis=1)
T = np.concatenate((temp_t1,temp_t2),axis=0)
phi_r = np.zeros((4,4))
row, col = np.diag_indices(4)
phi_r[row,col] = X[i,0:4]
phi_i = np.zeros((4, 4))
row, col = np.diag_indices(4)
phi_i[row, col] = 1 - np.power(X[i, 0:4],2)
temp_phi1 = np.concatenate((phi_r,phi_i),axis=1)
temp_phi2 = np.concatenate((-phi_i, phi_r), axis=1)
phi = np.concatenate((temp_phi1,temp_phi2),axis=0)
temp1 = np.matmul(R,phi)
temp2 = np.matmul(temp1,T) # error
H_hat = H + temp2
t_Q_r = np.zeros((4,4))
t_Q_r[np.triu_indices(4,1)] = X[i,4:10]
Q_r = t_Q_r + t_Q_r.T
row,col = np.diag_indices(4)
Q_r[row,col] = X[i,10:14]
Q_i = np.zeros((4,4))
Q_i[np.triu_indices(4,1)] = X[i,14:20]
Q_i = Q_i - Q_i.T
temp_Q1 = np.concatenate((Q_r,Q_i),axis=1)
temp_Q2 = np.concatenate((-Q_i,Q_r),axis=1)
Q = np.concatenate((temp_Q1,temp_Q2),axis=0)
t_H_hat_r = H_hat[0,0:4]
H_hat_r = t_H_hat_r[np.newaxis,:]
t_H_hat_i= H_hat[0,4:8]
H_hat_i = t_H_hat_i[np.newaxis,:]
temp_H1 = np.concatenate((-H_hat_i.T,H_hat_r.T),axis=0)
H_hat_H = np.concatenate((H_hat.T,temp_H1),axis=1)
temp_result1 = np.matmul(H_hat,Q)
temp_result2 = np.matmul(temp_result1,H_hat_H)
Loss += np.log10(1+temp_result2[0][0])
Loss = t.from_numpy(np.array(Loss / batch_size))
return Loss
#staticmethod
def backward(ctx,grad_output):
print('gradient')
return grad_output
def criterion(output,input):
return loss.apply(output,input)
This is my loss function. But it present the error:
Traceback (most recent call last):
File "/Users/mrfang/channel_capacity/training.py", line 24, in
loss.backward() File "/Users/mrfang/anaconda3/lib/python3.6/site-packages/torch/tensor.py",
line 150, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph) File
"/Users/mrfang/anaconda3/lib/python3.6/site-packages/torch/autograd/init.py",
line 99, in backward
allow_unreachable=True) # allow_unreachable flag RuntimeError: function lossBackward returned an incorrect number of gradients
(expected 2, got 1)
How could I fix it. Thanks very much
Your forward(ctx,x,INPUT) takes two inputs, x and INPUT, thus backward should output two gradients as well, grad_x and grad_INPUT.
In addition, in your snippet, you're not really computing a custom gradient, so you could compute that with Pytorch's autograd, without having to define a special Function.
If this is working code and you're going to define the custom loss, here's a quick boilerplate of what backward should comprise:
#staticmethod
def forward(ctx, x, INPUT):
# this is required so they're available during the backwards call
ctx.save_for_backward(x, INPUT)
# custom forward
#staticmethod
def backward(ctx, grad_output):
x, INPUT = ctx.saved_tensors
grad_x = grad_INPUT = None
# compute grad here
return grad_x, grad_INPUT
You don't need to return gradients for inputs that don't require it, thus you can return None for them.
More info here and here.
import os
import tarfile
from six.moves import urllib
URL = 'http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz'
PATH = 'aclImdb'
def fetch_data(url = URL, path = PATH):
if not os.path.isdir(path):
os.makedirs(path)
file_path = os.path.join(oath, "aclImdb_v1.tar.gz")
urllib.request.urlretrieve(url, file_path)
file_gz = tarfile.open(file_path)
file_gz.extractall(path = path)
file_gz.close()
import pyprind # for progress visualisation
import pandas as pd
PATH = 'aclImdb'
labels = {'pos': 1, 'neg': 0} # int class labels for 'positive' and 'negative'
pbar = pyprind.ProgBar(50000) # initialise a progress bar with 50k iterations = no. of docs
df = pd.DataFrame()
# use nested for loops to iterate over 'train' & 'test' subdir
for s in ('test', 'train'):
for l in ('pos', 'neg'): # and read text files from 'pos' and 'neg' subdir
path = os.path.join(PATH, s, l)
for file in os.listdir(path):
# append to the df pandas DataFrame with an int class (post = 1, neg = 0)
with open(os.path.join(path, file), 'r', encoding = 'utf-8') as infile:
txt = infile.read()
df = df.append([[txt, labels[l]]], ignore_index = True)
pbar.update()
df.columns = ['review', 'sentiment']
import numpy as np
np. random.seed(0)
df = df.reindex(np.random.permutation(df.index))
df.to_csv('movie_data.csv', index = False, encoding = 'utf-8')
n_words = max(list(word_to_int.values())) + 1
df = pd.read_csv('movie_data.csv', encoding = 'utf-8')
df.head(3)
# Separate words and count each word's occurence
import pyprind # for progress visualisation
from collections import Counter
from string import punctuation
import re
counts = Counter() # collects the counts of occurence of each unique word
pbar = pyprind.ProgBar(len(df['review']),
title = 'Counting word occurences...') # progress bar
for i, review in enumerate(df['review']):
text = ''.join([c if c not in punctuation else ' '+c+' '
for c in review]).lower()
df.loc[i, 'review'] = text
pbar.update()
counts.update(text.split())
# Mapping each unique word to an int
word_counts = sorted(counts, key = counts.get, reverse = True)
print(word_counts[:5])
word_to_int = {word: ii for ii, word in enumerate(word_counts, 1)}
mapped_reviews = []
pbar = pyprind.ProgBar(len(df['review']),
title = 'Map movie reviews to integers...')
# Left-pad with zeros if the sequence length < 200
# Use 200 elements if the length > 200
sequence_length = 200
sequences = np.zeros((len(mapped_reviews), sequence_length), dtype = int)
for i, row in enumerate(mapped_reviews):
review_arr = np.array(row)
sequences[i, -len(row):] = review_arr[-sequence_length:]
# Split the dataset into training and test sets
X_train = sequences[:25000, :]
y_train = df.loc[:25000, 'sentiment'].values
X_test = sequences[25000:, :]
y_test = df.loc[25000:, 'sentiment'].values
# Define the mini-batches generator
np.random.seed(123)
def batch_gen(x, y = None, batch_size = 64):
n_batches = len(x) // batch_size
x = x[:n_batches * batch_size]
if y is not None:
y = y[:n_batches * batch_size]
for ii in range(0, len(x), batch_size):
if y is not None:
yield x[ii : ii + batch_size], y[ii : ii + batch_size]
else:
yield x[ii : ii + batch_size]
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' ## suppress the 3.5 warning if using TF 1.4
class SentimentRNN(object):
# Define __init__
def __init__(self,
n_words,
seq_len = 200,
lstm_size = 256,
num_layers = 1,
batch_size = 64,
learning_rate = 0.0001,
embed_size = 200):
self.n_words = n_words
self.seq_len = seq_len
self.lstm_size = lstm_size # no. of hidden units
self.num_layers = num_layers
self.batch_size = batch_size
self.learning_rate = learning_rate
self.embed_size = embed_size
self.g = tf.Graph()
with self.g.as_default():
tf.set_random_seed(123)
self.build()
self.saver = tf.train.Saver()
self.init_op = tf.global_variables_initializer()
# Define the build method
def build(self):
# Define the placeholders
tf_x = tf.placeholder(tf.int32,
shape = (self.batch_size, self.seq_len),
name = 'tf_x')
tf_y = tf.placeholder(tf.float32,
shape = (self.batch_size),
name = 'tf_y')
tf_keepprob = tf.placeholder(tf.float32,
name = 'tf_keepprob')
# Create the embedding layer
embedding = tf.Variable(
tf.random_uniform(
shape = (self.n_words, self.embed_size),
minval = -1,
maxval = 1),
name = 'embedding')
embed_x = tf.nn.embedding_lookup(embedding,
tf_x,
name = 'embed_x')
# Define LSTM cells and stack them
cells = tf.contrib.rnn.MultiRNNCell(
[tf.contrib.rnn.DropoutWrapper(
tf.contrib.rnn.BasicLSTMCell(num_units = self.lstm_size),
output_keep_prob = tf_keepprob)
for i in range(self.num_layers)])
# Define the initial state:
self.initial_state = cells.zero_state(
self.batch_size, tf.float32)
print(' << initial state >> ', self.initial_state)
# Put together components with tf.nn.dynamic_rnn
lstm_outputs, self.final_state = tf.nn.dynamic_rnn(
cell = cells,
inputs = embed_x,
initial_state = self.initial_state)
## lstm_outputs shape: [batch_size, max_time, cells.output_size]
print('\n << lstm_output >> ', lstm_outputs)
print('\n << final state >> ', self.final_state)
# Apply a full-connected layer on the RNN output
logits = tf.layers.dense(
inputs = lstm_outputs[:, -1],
units = 1, # dimensionality of the output space
activation = None,
name = 'logits')
# Remove dimensions of size 1 from the tensor shape
logits = tf.squeeze(input = logits,
name = 'logits_squeezed')
print ('\n << logits >> ', logits)
# If you want prob's
y_proba = tf.nn.sigmoid(logits, name = 'probabilities')
predictions = {'probabilities' : y_proba,
'labels' : tf.cast(tf.round(y_proba),
tf.int32,
name = 'labels')}
print('\n << predictions >> ', predictions)
# Define the cost function
cost = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(
labels = tf_y,
logits = logits),
name = 'cost')
# Define the optimiser
optimizer = tf.train.AdamOptimizer(self.learning_rate)
train_op = optimizer.minimize(cost, name = 'train_op')
# Define the train method
def train(self, X_train, y_train, num_epochs):
with tf.Session(graph = self.g) as sess:
sess.run(self.init_op)
iteration = 1
for epoch in range(num_epochs):
state = sess.run(self.initial_state)
for batch_x, batch_y in batch_gen(
X_train,
y_train,
batch_size = self.batch_size):
feed = {'tf_x:0' : batch_x,
'tf_y:0' : batch_y,
'tf_keepprob:0' : 0.5,
self.initial_state : state}
loss, _, state = sess.run(
['cost:0',
'train_op',
self.final_state],
feed_dict=feed)
if iteration % 20 == 0:
print("Epoch: %d/%d Iteration: %d "
"| Train loss: %.5f" % (
epoch + 1,
num_epochs,
iteration,
loss))
iteration += 1
if (epoch + 1) % 10 == 0:
self.saver.save(
sess,
"model/sentiment-%d.ckpt" % epoch)
# Define the predict method
def predict(self, X_data, return_proba=False):
preds = []
with tf.Session(graph = self.g) as sess:
self.saver.restore(
sess,
tf.train.latest_checkpoint('model/'))
test_state = sess.run(self.initial_state)
for ii, batch_x in enumerate(batch_gen(
x = X_data,
y = None,
batch_size = self.batch_size), 1):
feed = {'tf_x:0' : batch_x,
'tf_keepprob:0' : 1.0,
self.initial_state : test_state}
if return_proba:
pred, test_state = sess.run(
['probabilities:0', self.final_state],
feed_dict=feed)
else:
pred, test_state = sess.run(
['labels:0', self.final_state],
feed_dict=feed)
preds.append(pred)
return np.concatenate(preds)
for review in df['review']:
mapped_reviews.append([word_to_int[word] for word in review.split()])
pbar.update()
rnn = SentimentRNN(n_words = n_words,
seq_len = sequence_length,
embed_size = 256,
lstm_size = 128,
num_layers = 1,
batch_size = 100,
learning_rate = 0.001)
preds = rnn.predict(X_test)
y_true = y_test\[:len(preds)\]
print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))][1]
Create an object of the SentimentRNN class with the following parameters:
n_words = n_words, seq_len = sequence_length, embed_size = 256, lstm_size = 128, num_layers = 1, batch_size = 100, learning_rate = 0.001.
Since we have a relatively small dataset, the number of layers = 1 may generalise better
enter image description here
ValueError Traceback (most recent call last)
<ipython-input-23-a3cfe03a9a49> in <module>()
----> 1 preds = rnn.predict(X_test)
2 y_true = y_test[:len(preds)]
3 print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))
<ipython-input-12-d83ee67c43b6> in predict(self, X_data, return_proba)
173 self.saver.restore(
174 sess,
--> 175 tf.train.latest_checkpoint('model/'))
176 test_state = sess.run(self.initial_state)
177
/usr/local/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py in restore(self, sess, save_path)
1680 return
1681 if save_path is None:
-> 1682 raise ValueError("Can't load save_path when it is None.")
1683 logging.info("Restoring parameters from %s", save_path)
1684 if context.in_graph_mode():
ValueError: Can't load save_path when it is None.
The error just means tf.train.latest_checkpoint didn't find anything. It returns None, then the Saver complains because it was passed None. So there's no checkpoint in that directory.