not working on Google Colab - pytorch

I am trying to perform a random_split() of my Custom Dataloader but it keeps saying that I have given it a torch._C.Generator even though I did not pass in any generators.
Below shows the code for the class that I have used:
class CustomDataset(Dataset):
def __init__(self, filepath):
self.imgs_path = filepath
file_list = sorted(os.listdir(self.imgs_path))
#print(file_list) = []
for group in file_list:
number_group = group.split('/')[-1]
for classes in sorted(os.listdir(self.imgs_path + number_group + '/')):
class_name = classes.split('/')[-1]
for img_name in sorted(os.listdir(self.imgs_path + number_group + '/' + class_name + '/')):[img_name, class_name, number_group])
self.class_map = {file_list[0]: 0,
file_list[1]: 1,
file_list[2]: 2,
file_list[3]: 3,
file_list[4]: 4,
file_list[5]: 5,
file_list[6]: 6,
file_list[7]: 7,
file_list[8]: 8,
file_list[9]: 9}
self.img_dim = (227, 227)
def __len__(self):
return len(
def __getitem__(self, idx):
img_name, class_name, number_group =[idx]
input_image = + number_group + "/" + class_name + "/" + img_name)
input_image = input_image.convert('RGB')
class_id = self.class_map[class_name]
class_id = np.asarray(class_id).squeeze()
class_id = torch.from_numpy(class_id)
# Preprocess Data
preprocess = transforms.Compose([
#transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],std=[0.2023, 0.1994, 0.2010]),
input_tensor = preprocess(input_image)
return input_tensor, class_id
Below is the code for the random_split() function:
dataset = CustomDataset('/content/drive/MyDrive/ColabNotebooks/leapGestRecog/')
train_size = int(0.8*len(dataset))
validation_test_size = len(dataset)-train_size
print("train_size: ", train_size)
print("validation_test_size: ", validation_test_size)
train_dataset, validation_dataset, test_dataset = random_split(dataset,[train_size,validation_test_size/2,validation_test_size/2])
Finally, these are the output messages including the error message:
['frame_00_01_0001.png', '01_palm', '00']
train_size: 16000
validation_test_size: 4000
TypeError Traceback (most recent call last)
<ipython-input-20-71dee9977e06> in <module>
4 print("train_size: ", train_size)
5 print("validation_test_size: ", validation_test_size)
----> 6 train_dataset, validation_dataset, test_dataset = random_split(dataset,[train_size,validation_test_size/2,validation_test_size/2])
/usr/local/lib/python3.7/dist-packages/torch/utils/data/ in random_split(dataset, lengths, generator)
311 raise ValueError("Sum of input lengths does not equal the length of the input dataset!")
--> 313 indices = randperm(sum(lengths)).tolist()
314 return [Subset(dataset, indices[offset - length : offset]) for offset, length in zip(_accumulate(lengths), lengths)]
TypeError: randperm() received an invalid combination of arguments - got (float, generator=torch._C.Generator), but expected one of:
* (int n, *, torch.Generator generator, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
* (int n, *, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
What exactly is a torch._C.Generator and how do I get rid of it?

Try updating the PyTorch version on Google Colab cause it may be a version compatibility issue here


How to re-use a single weight per group across all channels in pytorch?

Let's suppose I have the following 2D convolution layer:
nn.Conv2d(kernel_size=(1,20), stride=1, groups=5, out_channels=30, in_channels=30, bias=False),
What it does is that it creates a weight of 30x6x1x20 dimension, and in my model it results in overfitting.
Since the data is similar for every group, I want to reuse a single weight per group across all output channels associated with that group.
Ie. I would like my weight to be of only 5x1x1x20 dimension, where 5 corresponds to groups, and then repeat it 6 times for every input and 6 times for every output channel of that group.
How do I do this in pytorch?
Well, I defined a custom dimension weight and then repeated it before the convolution.
import torch
from typing import Optional, List, Tuple, Union
from torch import Tensor
from torch.nn.parameter import Parameter
from torch.nn.common_types import _size_2_t
from torch.nn.modules.utils import _single, _pair, _triple, _reverse_repeat_tuple
from torch.nn import init
import math
from torch.nn import functional as F
class _ConvNd(torch.nn.Module):
__constants__ = ['stride', 'padding', 'dilation', 'groups',
'padding_mode', 'output_padding', 'in_channels',
'out_channels', 'kernel_size']
__annotations__ = {'bias': Optional[torch.Tensor]}
def _conv_forward(self, input: Tensor, weight: Tensor, bias: Optional[Tensor]) -> Tensor:
_in_channels: int
_reversed_padding_repeated_twice: List[int]
out_channels: int
kernel_size: Tuple[int, ...]
stride: Tuple[int, ...]
padding: Union[str, Tuple[int, ...]]
dilation: Tuple[int, ...]
transposed: bool
output_padding: Tuple[int, ...]
groups: int
padding_mode: str
weight: Tensor
bias: Optional[Tensor]
def __init__(self,
in_channels: int,
out_channels: int,
kernel_size: Tuple[int, ...],
stride: Tuple[int, ...],
padding: Tuple[int, ...],
dilation: Tuple[int, ...],
transposed: bool,
output_padding: Tuple[int, ...],
groups: int,
bias: bool,
padding_mode: str,
dtype=None) -> None:
factory_kwargs = {'device': device, 'dtype': dtype}
super(_ConvNd, self).__init__()
if in_channels % groups != 0:
raise ValueError('in_channels must be divisible by groups')
if out_channels % groups != 0:
raise ValueError('out_channels must be divisible by groups')
valid_padding_strings = {'same', 'valid'}
if isinstance(padding, str):
if padding not in valid_padding_strings:
raise ValueError(
"Invalid padding string {!r}, should be one of {}".format(
padding, valid_padding_strings))
if padding == 'same' and any(s != 1 for s in stride):
raise ValueError("padding='same' is not supported for strided convolutions")
valid_padding_modes = {'zeros', 'reflect', 'replicate', 'circular'}
if padding_mode not in valid_padding_modes:
raise ValueError("padding_mode must be one of {}, but got padding_mode='{}'".format(
valid_padding_modes, padding_mode))
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.dilation = dilation
self.transposed = transposed
self.output_padding = output_padding
self.groups = groups
self.padding_mode = padding_mode
# `_reversed_padding_repeated_twice` is the padding to be passed to
# `F.pad` if needed (e.g., for non-zero padding types that are
# implemented as two ops: padding + conv). `F.pad` accepts paddings in
# reverse order than the dimension.
if isinstance(self.padding, str):
self._reversed_padding_repeated_twice = [0, 0] * len(kernel_size)
if padding == 'same':
for d, k, i in zip(dilation, kernel_size,
range(len(kernel_size) - 1, -1, -1)):
total_padding = d * (k - 1)
left_pad = total_padding // 2
self._reversed_padding_repeated_twice[2 * i] = left_pad
self._reversed_padding_repeated_twice[2 * i + 1] = (
total_padding - left_pad)
self._reversed_padding_repeated_twice = _reverse_repeat_tuple(self.padding, 2)
if transposed:
self.weight = Parameter(torch.empty(
(in_channels, out_channels // groups, *kernel_size), **factory_kwargs))
self.weight = Parameter(torch.empty(
(groups, 1, *kernel_size), **factory_kwargs))
if bias:
self.bias = Parameter(torch.empty(out_channels, **factory_kwargs))
self.register_parameter('bias', None)
def reset_parameters(self) -> None:
# Setting a=sqrt(5) in kaiming_uniform is the same as initializing with
# uniform(-1/sqrt(k), 1/sqrt(k)), where k = weight.size(1) * prod(*kernel_size)
# For more details see:
init.kaiming_uniform_(self.weight, a=math.sqrt(5))
if self.bias is not None:
fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
if fan_in != 0:
bound = 1 / math.sqrt(fan_in)
init.uniform_(self.bias, -bound, bound)
def extra_repr(self):
s = ('{in_channels}, {out_channels}, kernel_size={kernel_size}'
', stride={stride}')
if self.padding != (0,) * len(self.padding):
s += ', padding={padding}'
if self.dilation != (1,) * len(self.dilation):
s += ', dilation={dilation}'
if self.output_padding != (0,) * len(self.output_padding):
s += ', output_padding={output_padding}'
if self.groups != 1:
s += ', groups={groups}'
if self.bias is None:
s += ', bias=False'
if self.padding_mode != 'zeros':
s += ', padding_mode={padding_mode}'
return s.format(**self.__dict__)
def __setstate__(self, state):
super(_ConvNd, self).__setstate__(state)
if not hasattr(self, 'padding_mode'):
self.padding_mode = 'zeros'
class SharedConv2d(_ConvNd):
def __init__(
in_channels: int,
out_channels: int,
kernel_size: _size_2_t,
stride: _size_2_t = 1,
padding: Union[str, _size_2_t] = 0,
dilation: _size_2_t = 1,
groups: int = 1,
bias: bool = True,
padding_mode: str = 'zeros', # TODO: refine this type
) -> None:
factory_kwargs = {'device': device, 'dtype': dtype}
kernel_size_ = _pair(kernel_size)
stride_ = _pair(stride)
padding_ = padding if isinstance(padding, str) else _pair(padding)
dilation_ = _pair(dilation)
super(SharedConv2d, self).__init__(
in_channels, out_channels, kernel_size_, stride_, padding_, dilation_,
False, _pair(0), groups, bias, padding_mode, **factory_kwargs)
def _conv_forward(self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
if self.padding_mode != 'zeros':
return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
weight.repeat(self.out_channels//self.groups, self.in_channels//self.groups, 1, 1), bias, self.stride,
_pair(0), self.dilation, self.groups)
return F.conv2d(input, weight.repeat(self.out_channels//self.groups, self.in_channels//self.groups, 1, 1), bias, self.stride,
self.padding, self.dilation, self.groups)
def forward(self, input: Tensor) -> Tensor:
return self._conv_forward(input, self.weight, self.bias)
Then we can use it like this:
SharedConv2d(kernel_size=(1,20), stride=1, groups=5, out_channels=30, in_channels=30, bias=False)
The question is, are gradients going to backpropagate properly with repeat used on the weight?

PyTorch random_split() is returning wrong sized loader

I have a custom dataset loader for my dataset. I want to split the dataset into 70% train data, 20% validation data, and 10% test data. I have 16,488 data. So, my train data is supposed to be 11,542. But it's becoming 770 train data, 220 validation data, and 110 test data. I've tried but couldn't figure out the problem.
class Dataset(Dataset):
def __init__(self, directory, transform, preload=False, device: torch.device = torch.device('cpu'), **kwargs):
self.device = device = directory
self.transform = transform
self.labels = []
self.images = []
self.preload = preload
for i, file in enumerate(os.listdir(
file_labels = parse('{}_{}_{age}_{gender}.jpg', file)
if file_labels is None:
if self.preload:
image =, file)).convert('RGB')
if self.transform is not None:
image = self.transform(image).to(self.device)
image = os.path.join(, file)
gender_to_class_id = {
'm': 0,
'f': 1
gender = gender_to_class_id[file_labels['gender']]
age = int(file_labels['age'])
'age': age,
'gender': gender
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
image = self.images[idx]
if not self.preload:
image ='RGB')
if self.transform is not None:
image = self.transform(image).to(self.device)
labels = {
'age': self.labels[idx]['age'],
'gender': self.labels[idx]['gender'],
return, labels
def get_loaders(self, transform, train_size=0.7, validate_size=0.2, test_size=0.1, batch_size=15, **kwargs):
if round(train_size + validate_size + test_size, 1) > 1.0:
sys.exit("Sum of the percentages should be less than 1. it's " + str(
train_size + validate_size + test_size) + " now!")
train_len = int(len(self) * train_size)
validate_len = int(len(self) * validate_size)
test_len = int(len(self) * test_size)
others_len = len(self) - train_len - validate_len - test_len
self.trainDataset, self.validateDataset, self.testDataset, _ =
self, [train_len, validate_len, test_len, others_len]
train_loader = DataLoader(self.trainDataset, batch_size=batch_size)
validate_loader = DataLoader(self.validateDataset, batch_size=batch_size)
test_loader = DataLoader(self.testDataset, batch_size=batch_size)
return train_loader, validate_loader, test_loader
It seems that you are giving
As a dataloader is iterable, it maybe simply giving you the len() of the 1st batch.
It also explains why you are getting train data = 770, where it is supposed to be 11,542. Because,
16488 / 15 * 0.7 = 769.44 ≈ 770
Assigning batch_size = 1 should do the trick.
16488 / 1 * 0.7 = 11541.6 ≈ 11542

too many values to unpack (expected 2) Pytorch

datasets = {
train_transform if x == 'train' else val_test_transform
) for x in ["train", "test", "val"]
dataloaders = {x:[x], batch_size=batch_size) for x in ["train", "test", "val"]}
idx = 0
phase = "train"
origin, mask = datasets[phase][idx]
class LungDataset(
def __init__(self, origin_mask_list, origins_folder, masks_folder, transforms=None):
self.origin_mask_list = origin_mask_list
self.origins_folder = origins_folder
self.masks_folder = masks_folder
self.transforms = transforms
def __getitem__(self, idx):
origin_name, mask_name = self.origin_mask_list[idx]
origin = / (origin_name + ".png")).convert("P")
mask = / (mask_name + ".png"))
if self.transforms is not None:
origin, mask = self.transforms((origin, mask))
origin = torchvision.transforms.functional.to_tensor(origin) - 0.5
mask = np.array(mask)
mask = (torch.tensor(mask) > 128).long()
return origin, mask
def __len__(self):
return len(self.origin_mask_list)
ValueError Traceback (most recent call last)
<ipython-input-111-6b5b08e37e6d> in <module>
4 plt.figure(figsize=(20, 10))
----> 5 origin, mask = datasets[phase][idx]
6 pil_origin = torchvision.transforms.functional.to_pil_image(origin + 0.5).convert("RGB")
7 pil_mask = torchvision.transforms.functional.to_pil_image(mask.float())
D:\python\care\Lung-segmentation\src\ in __getitem__(self, idx)
17 def __getitem__(self, idx):
---> 18 origin_name, mask_name = self.origin_mask_list[idx]
19 origin = / (origin_name + ".png")).convert("P")
20 mask = / (mask_name + ".png"))
ValueError: too many values to unpack (expected 2)
I'm trying to load data but It is showing me the error:
It's Lung Segmentation Project you can go to the repo by GitHub link
and the datasets are in drive if you want more detail then comment to me. I'll provide you
It's lung segmentation code
For more details take a look at github.

How to pass model input to loss function in tensorflow keras?

I am training a neural networks with three different output prediction. For computing the loss of one output I need one of the input that is passed into the network. I am not able to access it as the training data is feed into the network by a keras data generator object. Is there any workaround for this problem.
This is the Generator class that feds data into the model
class DataGenerator(tf.keras.utils.Sequence):
def __init__(self,list_ID,centers,sizes,batch_size=2,dims=(512,512),n_channels=3,n_classes=10,shuffle=True) -> None:
assert len(list_ID) == len(centers)
self.dims = dims
self.batch_size = batch_size
self.list_ID = list_ID
self.centers = centers
self.n_channels = n_channels
self.n_classes = n_classes
self.shuffle = shuffle
self.sizes = sizes
self.mask = None
def __len__(self):
return int(np.floor(len(self.list_ID) / self.batch_size))
def on_epoch_end(self):
self.indexes = np.arange(len(self.list_ID))
if self.shuffle:
def __getitem__(self, index):
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
list_ID_temp = [self.list_ID[k] for k in indexes]
centers_temp = [self.centers[k] for k in indexes]
sizes_temp = [self.sizes[k] for k in indexes]
X, y = self.__datageneration(list_ID_temp, centers_temp,sizes_temp)
return X, y
def __datageneration(self, list_ID_temp,centers_temp,sizes_temp):
X = np.empty((self.batch_size,*self.dims,self.n_channels))
Y_center = np.empty((self.batch_size,128,128,1))
Y_dimension = np.empty((self.batch_size,128,128,2))
Y_offset = np.empty((self.batch_size,128,128,2))
self.mask = np.empty((self.batch_size,128,128,1))
for i,ID in enumerate(list_ID_temp):
image = cv2.imread(path+'/'+ID) / 255.0
heat_center, self.mask[i,] = gaussian_2d(centers_temp[i],image.shape)
'''Here I tried to save mask which is what I need,
as an attribute to data generator but when accessed by loss function
the value is just None which is what I initialized it as in init method'''
heat_size,heat_off = size_off_heatmap(sizes_temp[i], centers_temp[i],image.shape)
image = cv2.resize(image,(512,512))
X[i,] = image
Y_center[i,] = heat_center
Y_dimension[i,] = heat_size
Y_offset[i,] = heat_off
return (X,{'center_output':Y_center,'size_output':Y_dimension,'offset_output':Y_offset})
This is the generator class I implemented and I needed the mask , which I tried to write as an attribute of data generator object(I have commented the code. For reference I will also include the function that will return the mask and the error function that requires the mask.
Function returning mask
def gaussian_2d(centers, img_shape):
heatmap = []
y_index = np.tile(np.arange(128), (128, 1))
mask = np.zeros((128,128,1))
width = img_shape[1]
height = img_shape[0]
for x_o, y_o in centers:
x = int(x_o / width * 128)
y = int(y_o / height * 128)
mask[y,x] = 1
gauss = np.exp(-((y_index.T - y) ** 2 + (y_index - x) ** 2) / 2 * 0.2 ** 2)
if len(heatmap) > 1:
heatmap = np.stack(heatmap)
heatmap = np.max(heatmap, axis=0)
heatmap = np.array(heatmap)
heatmap = heatmap.reshape((128, 128,1))
return heatmap,mask
Loss function
def final_loss(mask):
def l1_loss(y_true, y_pred):
y_true = tf.cast(y_true, tf.float32)
y_pred = tf.cast(y_pred, tf.float32)
n = tf.reduce_sum(tf.cast(tf.equal(mask, 1.0),dtype=tf.float32))
tot_loss = tf.reduce_sum(tf.abs(y_pred - y_true))
if tf.greater(n,0):
loss = tot_loss / (n)
loss = tot_loss
return loss
return l1_loss
The error show is as below
Epoch 1/10
ValueError Traceback (most recent call last)
<ipython-input-27-74a28b075f52> in <module>()
----> 1,epochs=10,verbose=1,callbacks=Callback(patience=4))
9 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ in wrapper(*args, **kwargs)
975 except Exception as e: # pylint:disable=broad-except
976 if hasattr(e, "ag_error_metadata"):
--> 977 raise e.ag_error_metadata.to_exception(e)
978 else:
979 raise
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/ train_function *
return step_function(self, iterator)
<ipython-input-24-c45fe131feb7>:5 l1_loss *
n = tf.reduce_sum(tf.cast(tf.equal(mask, 1.0),dtype=tf.float32))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/ wrapper **
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/ equal
return gen_math_ops.equal(x, y, name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/ equal
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ _apply_op_helper
(input_name, err))
ValueError: Tried to convert 'x' to a tensor and failed. Error: None values not supported.

Tensorflow : ValueError: Can't load save_path when it is None

import os
import tarfile
from six.moves import urllib
URL = ''
PATH = 'aclImdb'
def fetch_data(url = URL, path = PATH):
if not os.path.isdir(path):
file_path = os.path.join(oath, "aclImdb_v1.tar.gz")
urllib.request.urlretrieve(url, file_path)
file_gz =
file_gz.extractall(path = path)
import pyprind # for progress visualisation
import pandas as pd
PATH = 'aclImdb'
labels = {'pos': 1, 'neg': 0} # int class labels for 'positive' and 'negative'
pbar = pyprind.ProgBar(50000) # initialise a progress bar with 50k iterations = no. of docs
df = pd.DataFrame()
# use nested for loops to iterate over 'train' & 'test' subdir
for s in ('test', 'train'):
for l in ('pos', 'neg'): # and read text files from 'pos' and 'neg' subdir
path = os.path.join(PATH, s, l)
for file in os.listdir(path):
# append to the df pandas DataFrame with an int class (post = 1, neg = 0)
with open(os.path.join(path, file), 'r', encoding = 'utf-8') as infile:
txt =
df = df.append([[txt, labels[l]]], ignore_index = True)
df.columns = ['review', 'sentiment']
import numpy as np
np. random.seed(0)
df = df.reindex(np.random.permutation(df.index))
df.to_csv('movie_data.csv', index = False, encoding = 'utf-8')
n_words = max(list(word_to_int.values())) + 1
df = pd.read_csv('movie_data.csv', encoding = 'utf-8')
# Separate words and count each word's occurence
import pyprind # for progress visualisation
from collections import Counter
from string import punctuation
import re
counts = Counter() # collects the counts of occurence of each unique word
pbar = pyprind.ProgBar(len(df['review']),
title = 'Counting word occurences...') # progress bar
for i, review in enumerate(df['review']):
text = ''.join([c if c not in punctuation else ' '+c+' '
for c in review]).lower()
df.loc[i, 'review'] = text
# Mapping each unique word to an int
word_counts = sorted(counts, key = counts.get, reverse = True)
word_to_int = {word: ii for ii, word in enumerate(word_counts, 1)}
mapped_reviews = []
pbar = pyprind.ProgBar(len(df['review']),
title = 'Map movie reviews to integers...')
# Left-pad with zeros if the sequence length < 200
# Use 200 elements if the length > 200
sequence_length = 200
sequences = np.zeros((len(mapped_reviews), sequence_length), dtype = int)
for i, row in enumerate(mapped_reviews):
review_arr = np.array(row)
sequences[i, -len(row):] = review_arr[-sequence_length:]
# Split the dataset into training and test sets
X_train = sequences[:25000, :]
y_train = df.loc[:25000, 'sentiment'].values
X_test = sequences[25000:, :]
y_test = df.loc[25000:, 'sentiment'].values
# Define the mini-batches generator
def batch_gen(x, y = None, batch_size = 64):
n_batches = len(x) // batch_size
x = x[:n_batches * batch_size]
if y is not None:
y = y[:n_batches * batch_size]
for ii in range(0, len(x), batch_size):
if y is not None:
yield x[ii : ii + batch_size], y[ii : ii + batch_size]
yield x[ii : ii + batch_size]
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' ## suppress the 3.5 warning if using TF 1.4
class SentimentRNN(object):
# Define __init__
def __init__(self,
seq_len = 200,
lstm_size = 256,
num_layers = 1,
batch_size = 64,
learning_rate = 0.0001,
embed_size = 200):
self.n_words = n_words
self.seq_len = seq_len
self.lstm_size = lstm_size # no. of hidden units
self.num_layers = num_layers
self.batch_size = batch_size
self.learning_rate = learning_rate
self.embed_size = embed_size
self.g = tf.Graph()
with self.g.as_default():
self.saver = tf.train.Saver()
self.init_op = tf.global_variables_initializer()
# Define the build method
def build(self):
# Define the placeholders
tf_x = tf.placeholder(tf.int32,
shape = (self.batch_size, self.seq_len),
name = 'tf_x')
tf_y = tf.placeholder(tf.float32,
shape = (self.batch_size),
name = 'tf_y')
tf_keepprob = tf.placeholder(tf.float32,
name = 'tf_keepprob')
# Create the embedding layer
embedding = tf.Variable(
shape = (self.n_words, self.embed_size),
minval = -1,
maxval = 1),
name = 'embedding')
embed_x = tf.nn.embedding_lookup(embedding,
name = 'embed_x')
# Define LSTM cells and stack them
cells = tf.contrib.rnn.MultiRNNCell(
tf.contrib.rnn.BasicLSTMCell(num_units = self.lstm_size),
output_keep_prob = tf_keepprob)
for i in range(self.num_layers)])
# Define the initial state:
self.initial_state = cells.zero_state(
self.batch_size, tf.float32)
print(' << initial state >> ', self.initial_state)
# Put together components with tf.nn.dynamic_rnn
lstm_outputs, self.final_state = tf.nn.dynamic_rnn(
cell = cells,
inputs = embed_x,
initial_state = self.initial_state)
## lstm_outputs shape: [batch_size, max_time, cells.output_size]
print('\n << lstm_output >> ', lstm_outputs)
print('\n << final state >> ', self.final_state)
# Apply a full-connected layer on the RNN output
logits = tf.layers.dense(
inputs = lstm_outputs[:, -1],
units = 1, # dimensionality of the output space
activation = None,
name = 'logits')
# Remove dimensions of size 1 from the tensor shape
logits = tf.squeeze(input = logits,
name = 'logits_squeezed')
print ('\n << logits >> ', logits)
# If you want prob's
y_proba = tf.nn.sigmoid(logits, name = 'probabilities')
predictions = {'probabilities' : y_proba,
'labels' : tf.cast(tf.round(y_proba),
name = 'labels')}
print('\n << predictions >> ', predictions)
# Define the cost function
cost = tf.reduce_mean(
labels = tf_y,
logits = logits),
name = 'cost')
# Define the optimiser
optimizer = tf.train.AdamOptimizer(self.learning_rate)
train_op = optimizer.minimize(cost, name = 'train_op')
# Define the train method
def train(self, X_train, y_train, num_epochs):
with tf.Session(graph = self.g) as sess:
iteration = 1
for epoch in range(num_epochs):
state =
for batch_x, batch_y in batch_gen(
batch_size = self.batch_size):
feed = {'tf_x:0' : batch_x,
'tf_y:0' : batch_y,
'tf_keepprob:0' : 0.5,
self.initial_state : state}
loss, _, state =
if iteration % 20 == 0:
print("Epoch: %d/%d Iteration: %d "
"| Train loss: %.5f" % (
epoch + 1,
iteration += 1
if (epoch + 1) % 10 == 0:
"model/sentiment-%d.ckpt" % epoch)
# Define the predict method
def predict(self, X_data, return_proba=False):
preds = []
with tf.Session(graph = self.g) as sess:
test_state =
for ii, batch_x in enumerate(batch_gen(
x = X_data,
y = None,
batch_size = self.batch_size), 1):
feed = {'tf_x:0' : batch_x,
'tf_keepprob:0' : 1.0,
self.initial_state : test_state}
if return_proba:
pred, test_state =
['probabilities:0', self.final_state],
pred, test_state =
['labels:0', self.final_state],
return np.concatenate(preds)
for review in df['review']:
mapped_reviews.append([word_to_int[word] for word in review.split()])
rnn = SentimentRNN(n_words = n_words,
seq_len = sequence_length,
embed_size = 256,
lstm_size = 128,
num_layers = 1,
batch_size = 100,
learning_rate = 0.001)
preds = rnn.predict(X_test)
y_true = y_test\[:len(preds)\]
print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))][1]
Create an object of the SentimentRNN class with the following parameters:
n_words = n_words, seq_len = sequence_length, embed_size = 256, lstm_size = 128, num_layers = 1, batch_size = 100, learning_rate = 0.001.
Since we have a relatively small dataset, the number of layers = 1 may generalise better
enter image description here
ValueError Traceback (most recent call last)
<ipython-input-23-a3cfe03a9a49> in <module>()
----> 1 preds = rnn.predict(X_test)
2 y_true = y_test[:len(preds)]
3 print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))
<ipython-input-12-d83ee67c43b6> in predict(self, X_data, return_proba)
173 self.saver.restore(
174 sess,
--> 175 tf.train.latest_checkpoint('model/'))
176 test_state =
/usr/local/anaconda/lib/python3.6/site-packages/tensorflow/python/training/ in restore(self, sess, save_path)
1680 return
1681 if save_path is None:
-> 1682 raise ValueError("Can't load save_path when it is None.")
1683"Restoring parameters from %s", save_path)
1684 if context.in_graph_mode():
ValueError: Can't load save_path when it is None.
The error just means tf.train.latest_checkpoint didn't find anything. It returns None, then the Saver complains because it was passed None. So there's no checkpoint in that directory.
