PyTorch Dataloader bucket by tensor length - pytorch

I've been trying to create a custom Dataloader that can serve batches of data that are all same-sized to feed into a Conv2d layer for classification purposes.
Here's some test data
X is a NUMBER OF POINTS x CHOICES x NUM_FEATURES, while y is the label (that can be any integer CHOICES-1)
I'm having trouble writing the Sampler and Dataloader.
import random
import torch
from collections import defaultdict
from sklearn.utils import shuffle
from torch.utils.data import Dataset, DataLoader
from typing import Sequence, Iterator
import numpy as np
sample_probs = np.array([2.04302017e-03, 6.84249612e-03, 3.18776004e-02, 6.69332322e-01,
1.79056125, 1.63388916, 1.31819391, 1.43798623,
2.44057406, 5.51664089e-01, 9.66624185e-02, 1.67495225e-02,
3.59960696e-03, 2.43216687e-05])
X = []
y = []
train_datasets = []
i_dict = {0: 19,
1: 63,
2: 30,
3: 6192,
4: 16564,
5: 15115,
6: 12195,
7: 13303,
8: 22578,
9: 5103,
10: 894,
11: 155,
12: 33,
13: 2}
for i in range(2,16):
temp_x = []
temp_y = []
for j in range(i_dict[i-2]):
temp_x.append(torch.rand(i, 4, 1))
temp_y.append(torch.tensor(random.randint(0,i-1)))
X = torch.stack(temp_x)
y = torch.stack(temp_y)
train_datasets.append((X.clone(),y.clone()))
class WeightedBucketSampler(torch.utils.data.Sampler):
def __init__(self, data, weights: Sequence[float], num_samples: int,
replacement: bool = True, generator=None, shuffle=True, drop_last=False):
super().__init__(data)
self.shuffle = shuffle
self.drop_last = drop_last
self.weights = torch.as_tensor(weights, dtype=torch.double)
self.num_samples = num_samples
self.replacement = replacement
self.generator = generator
self.buckets = defaultdict(list)
'''data is a CustomDataset containing a tensor of COUNT x NUM_ROUTES x FEATURES x 1 and a tensor with the corresponding labels'''
counter = 0
for i in range(len(data)):
self.buckets[i+2] += [data[i][0],data[i][1]]
counter += len(data[i][0])
self.length = counter
def __iter__(self) -> Iterator[int]:
# Choose a bucket depending on the weighted sample
rand_bucket = torch.multinomial(self.weights, self.num_samples, self.replacement, generator=self.generator).tolist()[0]
shifter = sum([len(self.buckets[i+2][0]) for i in range(rand_bucket)])
# Generate random indices from the bucket
rand_tensor = torch.randperm(len(self.buckets[rand_bucket+2][0]), generator=self.generator)
yield from torch.add(rand_tensor, shifter).tolist()
def __len__(self):
return self.length
class CustomDataset(Dataset):
def __init__(self, data):
self.routes = dict()
self.choice = dict()
counter = 0
for i in range(len(data)):
for j in range(len(data[i][0])):
self.routes[counter] = data[i][0][j]
self.choice[counter] = data[i][1][j]
counter += 1
def __len__(self):
return len(self.choice)
def __getitem__(self, idx):
choice = self.choice[idx]
routes = self.routes[idx]
return routes, choice
train_datasets_ds = CustomDataset(train_datasets)
bucket_sampler = WeightedBucketSampler(train_datasets, sample_probs,len(sample_probs), shuffle=True, drop_last=False)
loader = DataLoader(train_datasets_ds, sampler=bucket_sampler, batch_size=32, pin_memory=True)
for X,y in loader:
print(X.size(),y.size())
This code is a combination of WeightedRandomSampler and Bucket sampling code
I'm essentially sampling via the sample weights of each classification to choose a bucket, and from that bucket choose randomly to form a batch up to batch_size.
However, when going through loader, I get the output:
...
torch.Size([32, 10, 4, 1]) torch.Size([32])
torch.Size([32, 10, 4, 1]) torch.Size([32])
torch.Size([32, 10, 4, 1]) torch.Size([32])
torch.Size([18, 10, 4, 1]) torch.Size([18])
The sum of all these batches add up to the elements in bucket 10. So it's right, but it's not jumping to another bucket. Rerunning the code
for X,y in loader:
print(X.size(),y.size())
will produce another bucket's batches.
I'm still learning PyTorch, so some of the code might be inefficient. Would love some advice as well!

Thanks to some help on the unofficial PyTorch Discord channel (sudomaze), I've fixed my problem. There's a need to iterate through all the data in the sampler.
The __len__ function in the sampler also needed fixing.
class WeightedBucketSampler(Sampler[List[int]]):
def __init__(self, data, weights: Sequence[float], num_samples: int,
replacement: bool = True, generator=None, shuffle=True, batch_size=32, drop_last=False):
super().__init__(data)
self.shuffle = shuffle
self.drop_last = drop_last
self.weights = torch.as_tensor(weights, dtype=torch.double)
self.num_samples = num_samples
self.replacement = replacement
self.generator = generator
self.batch_size = batch_size
self.buckets = defaultdict(list)
'''data is a CustomDataset containing a tensor of COUNT x NUM_ROUTES x FEATURES x 1 and a tensor with the corresponding labels'''
counter = 0
for i in range(len(data)):
self.buckets[i+2] += [data[i][0],data[i][1]]
counter += len(data[i][0])
self.length = counter
def __iter__(self) -> Iterator[int]:
# Choose a bucket depending on the weighted sample
rand_bucket = torch.multinomial(self.weights, self.num_samples, self.replacement, generator=self.generator)
batch = [0] * self.batch_size
idx_in_batch = 0
for bucket_idx in rand_bucket.tolist():
bucketsample_count = 0
shifter = sum([len(self.buckets[i+2][0]) for i in range(bucket_idx)])
# Generate random indices from the bucket and shift them
rand_tensor = torch.randperm(len(self.buckets[bucket_idx+2][0]), generator=self.generator)
# print(len(self.buckets[bucket_idx+2][0]), len(rand_tensor.tolist()))
for idx in rand_tensor.tolist():
batch[idx_in_batch] = idx+shifter
idx_in_batch += 1
if idx_in_batch == self.batch_size:
bucketsample_count += self.batch_size
yield batch
idx_in_batch = 0
batch = [0] * self.batch_size
if idx_in_batch > 0:
bucketsample_count += idx_in_batch
yield batch[:idx_in_batch]
# The last remaining tensors are added into one batch. Terminate batch and move to next bucket
idx_in_batch = 0
batch = [0] * self.batch_size
continue
def __len__(self):
return (self.length + (self.batch_size - 1)) // self.batch_size
class CustomDataset(Dataset):
def __init__(self, data):
self.routes = dict()
self.choice = dict()
counter = 0
for i in range(len(data)):
for j in range(len(data[i][0])):
self.routes[counter] = data[i][0][j]
self.choice[counter] = data[i][1][j]
counter += 1
def __len__(self):
return len(self.choice)
def __getitem__(self, idx):
choice = self.choice[idx]
routes = self.routes[idx]
return routes, choice
w = np.array([len(i[0]) for i in train_datasets])
sample_probs = 1/sample_probs*w
train_datasets_ds = CustomDataset(train_datasets)
bucket_sampler = WeightedBucketSampler(train_datasets, sample_probs,len(sample_probs), shuffle=True, batch_size=batch_size, drop_last=False)
train_loader = DataLoader(train_datasets_ds, batch_sampler=bucket_sampler)

Related

Pytorch sequential data loader

I have looked through the documentation with for ex class IterableDataset and Start / End but I'm just not good enough to solve this one at the moment.
Training my model with random batches is fine, but using it for predictions I need it to start from min(index) up to max(index). So I wanted to re-use below and change to fit that.
Now it will take random items from the range so I can get duplicate predictions of the same index number. ex range(5) in index 1,2,3,4,5 might give 4,2,2,3,4 not desired 1,2,3,4,5.
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)
DataLoader shuffle = False, then it just takes len / max of index.
I probably need to change the sampler.
class CompanyDataset(Dataset):
def __init__(self, csv_name, root_dir, training_length, forecast_window):
"""
Args:
csv_file (string): Path to the csv file.
root_dir (string): Directory
"""
# load raw data file
csv_file = os.path.join(root_dir, csv_name)
self.df = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = MinMaxScaler()
self.T = training_length
self.S = forecast_window
def __len__(self):
# return number of sensors
return len(self.df.groupby(by=["index"]))
# Will pull an index between 0 and __len__.
def __getitem__(self, idx):
# Sensors are indexed from 1
idx = idx + 1
# np.random.seed(0)
start = np.random.randint(0, len(self.df[self.df["index"] == idx]) - self.T - self.S)
Company = str(self.df[self.df["index"] == idx][["station"]][start:start + 1].values.item())
index_in = torch.tensor([i for i in range(start, start + self.T)])
index_tar = torch.tensor([i for i in range(start + self.T, start + self.T + self.S)])
_input = torch.tensor(self.df[self.df["index"] == idx][
["A1","A2","A3","A4","A5","A6","A7","A8", "A9", "A10", "A11"]][
start: start + self.T].values)
target = torch.tensor(self.df[self.df["index"] == idx][
[["A1","A2","A3","A4","A5","A6","A7","A8", "A9", "A10", "A11"]][
start + self.T: start + self.T + self.S].values)
scaler = self.transform
scaler.fit(_input[:, 0].unsqueeze(-1))
_input[:, 0] = torch.tensor(scaler.transform(_input[:, 0].unsqueeze(-1)).squeeze(-1))
target[:, 0] = torch.tensor(scaler.transform(target[:, 0].unsqueeze(-1)).squeeze(-1))
dump(scaler, 'scalar_item.joblib')
return index_in, index_tar, _input, target, station

keras BatchGenerator(keras.utils.Sequence) is too slow

I'm using a custom batch generator with large dataframe. but the Generator takes too much time to generate a batch, it takes 127s to generate a batch of 1024. I've tried Dask but still, the processing is slow. is there any way to integrate multiprocessing with inside the generator. knowing that I've tried use_multiprocessing=True with workers=12
import keras
from random import randint
import glob
import warnings
import numpy as np
import math
import pandas as pd
import dask.dataframe as dd
class BatchGenerator(keras.utils.Sequence):
'Generates data for Keras'
def __init__(self, labels=None, batch_size=8, n_classes=4, shuffle=True,
seq_len=6, data_path=None, meta_path=None,list_IDs=None):
'Initialization'
self.batch_size = batch_size
self.labels = labels
self.n_classes = n_classes
self.shuffle = shuffle
self.seq_len = seq_len
self.meta_df = meta_path
self.data_df = data_path
self.data_df = self.data_df.astype({"mjd": int})
self.list_IDs = list_IDs
if self.list_IDs==None:
self.list_IDs = list(self.meta_df['object_id'].unique())
self.on_epoch_end()
def __len__(self):
'Denotes the number of batches per epoch'
return int(np.floor(len(self.list_IDs) / self.batch_size))
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
# Find list of IDs
list_IDs_temp = [self.list_IDs[k] for k in indexes]
# Generate data
X, y = self.__data_generation(list_IDs_temp)
return X, y
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.list_IDs))
if self.shuffle == True:
np.random.shuffle(self.indexes)
def __data_generation(self, list_IDs_temp):
X_dat = np.zeros((self.batch_size, self.seq_len,6,1))
Y_mask = np.zeros((self.batch_size, self.seq_len,6,1))
# Y_dat = np.empty((self.batch_size,1), dtype=int)
X_length= np.empty((self.batch_size,1), dtype=int)
for i, trans_id in enumerate(list_IDs_temp):
curve = self.data_df[self.data_df.object_id==trans_id]
mjdlist = list(curve['mjd'].unique())
ts_length = len(mjdlist)
if ts_length <= self.seq_len :
start_ind = 0
else :
start_ind = randint(0, ts_length - self.seq_len)
ts_length = self.seq_len
for j in range(ts_length):
if j+start_ind < len(mjdlist):
step = curve[curve.mjd==mjdlist[j+start_ind]]
for k in range(len(step.mjd)):
obs = step[step.passband==k]
if len(obs) == 0 :
# print('here is one')
continue
else:
if k == 0:
X_dat[i,j,0,0] =obs.flux.iloc[0]
Y_mask[i,j,0,0] = 1
if k == 1:
X_dat[i,j,1,0] = obs.flux.iloc[0]
Y_mask[i,j,1,0] = 1
if k == 2:
X_dat[i,j,2,0] = obs.flux.iloc[0]
Y_mask[i,j,2,0] = 1
if k == 3:
X_dat[i,j,3,0] = obs.flux.iloc[0]
Y_mask[i,j,3,0] = 1
if k == 4:
X_dat[i,j,4,0] = obs.flux.iloc[0]
Y_mask[i,j,4,0] = 1
if k == 5:
X_dat[i,j,5,0] = obs.flux.iloc[0]
Y_mask[i,j,5,0] = 1
# meta = self.meta_df[self.meta_df['object_id'] == trans_id]
# Y_dat[i] = self.labels[int(meta['target'])]
X_length[i,0] = ts_length
flux_max = np.max(X_dat[i])
flux_min = np.min(X_dat[i])
flux_pow = math.log2(flux_max - flux_min)
X_dat[i] /= flux_pow
X_noised = X_dat + np.random.uniform(low=0, high=0.5, size=X_dat.shape)
return [X_noised, X_length, np.reshape(Y_mask,(self.batch_size, self.seq_len*6))], np.reshape(X_dat,(self.batch_size, self.seq_len*6))
To make it faster, the for loop in the function __data_generation should be parallelized. Using the joblib package may help.

Building a dataset with dataloader pytorch getting error cannot import name 'read_data_sets'

Loading data into dataset using pytorch dataloader.
Getting error cannot import name 'read_data_sets'
Tried searaching for results from similar issues.
If there is confusion about file instead of module and it can't find read_data_sets in your file How do i change to fix?
class MRDataset(data.Dataset):
def __init__(self, root_dir, task, plane, train=True, transform=None, weights=None):
super().__init__()
self.task = task
self.plane = plane
self.root_dir = root_dir
self.train = train
if self.train:
self.folder_path = self.root_dir + 'train/{0}/'.format(plane)
self.records = pd.read_csv(
self.root_dir + 'train-{0}.csv'.format(task), header=None, names=['id', 'label'])
else:
transform = None
self.folder_path = self.root_dir + 'valid/{0}/'.format(plane)
self.records = pd.read_csv(
self.root_dir + 'valid-{0}.csv'.format(task), header=None, names=['id', 'label'])
self.records['id'] = self.records['id'].map(
lambda i: '0' * (4 - len(str(i))) + str(i))
self.paths = [self.folder_path + filename +
'.npy' for filename in self.records['id'].tolist()]
self.labels = self.records['label'].tolist()
self.transform = transform
if weights is None:
pos = np.sum(self.labels)
neg = len(self.labels) - pos
self.weights = torch.FloatTensor([1, neg / pos])
else:
self.weights = torch.FloatTensor(weights)
def __len__(self):
return len(self.paths)
def __getitem__(self, index):
array = np.load(self.paths[index])
label = self.labels[index]
if label == 1:
label = torch.FloatTensor([[0, 1]])
elif label == 0:
label = torch.FloatTensor([[1, 0]])
if self.transform:
array = self.transform(array)
else:
array = np.stack((array,)*3, axis=1)
array = torch.FloatTensor(array)
# if label.item() == 1:
# weight = np.array([self.weights[1]])
# weight = torch.FloatTensor(weight)
# else:
# weight = np.array([self.weights[0]])
# weight = torch.FloatTensor(weight)
return array, label, self.weights
There is a model and train class to run this. Arguments specified in train.
Running the train should load data and run through model

Tensorflow : ValueError: Can't load save_path when it is None

import os
import tarfile
from six.moves import urllib
URL = 'http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz'
PATH = 'aclImdb'
def fetch_data(url = URL, path = PATH):
if not os.path.isdir(path):
os.makedirs(path)
file_path = os.path.join(oath, "aclImdb_v1.tar.gz")
urllib.request.urlretrieve(url, file_path)
file_gz = tarfile.open(file_path)
file_gz.extractall(path = path)
file_gz.close()
import pyprind # for progress visualisation
import pandas as pd
PATH = 'aclImdb'
labels = {'pos': 1, 'neg': 0} # int class labels for 'positive' and 'negative'
pbar = pyprind.ProgBar(50000) # initialise a progress bar with 50k iterations = no. of docs
df = pd.DataFrame()
# use nested for loops to iterate over 'train' & 'test' subdir
for s in ('test', 'train'):
for l in ('pos', 'neg'): # and read text files from 'pos' and 'neg' subdir
path = os.path.join(PATH, s, l)
for file in os.listdir(path):
# append to the df pandas DataFrame with an int class (post = 1, neg = 0)
with open(os.path.join(path, file), 'r', encoding = 'utf-8') as infile:
txt = infile.read()
df = df.append([[txt, labels[l]]], ignore_index = True)
pbar.update()
df.columns = ['review', 'sentiment']
import numpy as np
np. random.seed(0)
df = df.reindex(np.random.permutation(df.index))
df.to_csv('movie_data.csv', index = False, encoding = 'utf-8')
n_words = max(list(word_to_int.values())) + 1
df = pd.read_csv('movie_data.csv', encoding = 'utf-8')
df.head(3)
# Separate words and count each word's occurence
import pyprind # for progress visualisation
from collections import Counter
from string import punctuation
import re
counts = Counter() # collects the counts of occurence of each unique word
pbar = pyprind.ProgBar(len(df['review']),
title = 'Counting word occurences...') # progress bar
for i, review in enumerate(df['review']):
text = ''.join([c if c not in punctuation else ' '+c+' '
for c in review]).lower()
df.loc[i, 'review'] = text
pbar.update()
counts.update(text.split())
# Mapping each unique word to an int
word_counts = sorted(counts, key = counts.get, reverse = True)
print(word_counts[:5])
word_to_int = {word: ii for ii, word in enumerate(word_counts, 1)}
mapped_reviews = []
pbar = pyprind.ProgBar(len(df['review']),
title = 'Map movie reviews to integers...')
# Left-pad with zeros if the sequence length < 200
# Use 200 elements if the length > 200
sequence_length = 200
sequences = np.zeros((len(mapped_reviews), sequence_length), dtype = int)
for i, row in enumerate(mapped_reviews):
review_arr = np.array(row)
sequences[i, -len(row):] = review_arr[-sequence_length:]
# Split the dataset into training and test sets
X_train = sequences[:25000, :]
y_train = df.loc[:25000, 'sentiment'].values
X_test = sequences[25000:, :]
y_test = df.loc[25000:, 'sentiment'].values
# Define the mini-batches generator
np.random.seed(123)
def batch_gen(x, y = None, batch_size = 64):
n_batches = len(x) // batch_size
x = x[:n_batches * batch_size]
if y is not None:
y = y[:n_batches * batch_size]
for ii in range(0, len(x), batch_size):
if y is not None:
yield x[ii : ii + batch_size], y[ii : ii + batch_size]
else:
yield x[ii : ii + batch_size]
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' ## suppress the 3.5 warning if using TF 1.4
class SentimentRNN(object):
# Define __init__
def __init__(self,
n_words,
seq_len = 200,
lstm_size = 256,
num_layers = 1,
batch_size = 64,
learning_rate = 0.0001,
embed_size = 200):
self.n_words = n_words
self.seq_len = seq_len
self.lstm_size = lstm_size # no. of hidden units
self.num_layers = num_layers
self.batch_size = batch_size
self.learning_rate = learning_rate
self.embed_size = embed_size
self.g = tf.Graph()
with self.g.as_default():
tf.set_random_seed(123)
self.build()
self.saver = tf.train.Saver()
self.init_op = tf.global_variables_initializer()
# Define the build method
def build(self):
# Define the placeholders
tf_x = tf.placeholder(tf.int32,
shape = (self.batch_size, self.seq_len),
name = 'tf_x')
tf_y = tf.placeholder(tf.float32,
shape = (self.batch_size),
name = 'tf_y')
tf_keepprob = tf.placeholder(tf.float32,
name = 'tf_keepprob')
# Create the embedding layer
embedding = tf.Variable(
tf.random_uniform(
shape = (self.n_words, self.embed_size),
minval = -1,
maxval = 1),
name = 'embedding')
embed_x = tf.nn.embedding_lookup(embedding,
tf_x,
name = 'embed_x')
# Define LSTM cells and stack them
cells = tf.contrib.rnn.MultiRNNCell(
[tf.contrib.rnn.DropoutWrapper(
tf.contrib.rnn.BasicLSTMCell(num_units = self.lstm_size),
output_keep_prob = tf_keepprob)
for i in range(self.num_layers)])
# Define the initial state:
self.initial_state = cells.zero_state(
self.batch_size, tf.float32)
print(' << initial state >> ', self.initial_state)
# Put together components with tf.nn.dynamic_rnn
lstm_outputs, self.final_state = tf.nn.dynamic_rnn(
cell = cells,
inputs = embed_x,
initial_state = self.initial_state)
## lstm_outputs shape: [batch_size, max_time, cells.output_size]
print('\n << lstm_output >> ', lstm_outputs)
print('\n << final state >> ', self.final_state)
# Apply a full-connected layer on the RNN output
logits = tf.layers.dense(
inputs = lstm_outputs[:, -1],
units = 1, # dimensionality of the output space
activation = None,
name = 'logits')
# Remove dimensions of size 1 from the tensor shape
logits = tf.squeeze(input = logits,
name = 'logits_squeezed')
print ('\n << logits >> ', logits)
# If you want prob's
y_proba = tf.nn.sigmoid(logits, name = 'probabilities')
predictions = {'probabilities' : y_proba,
'labels' : tf.cast(tf.round(y_proba),
tf.int32,
name = 'labels')}
print('\n << predictions >> ', predictions)
# Define the cost function
cost = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(
labels = tf_y,
logits = logits),
name = 'cost')
# Define the optimiser
optimizer = tf.train.AdamOptimizer(self.learning_rate)
train_op = optimizer.minimize(cost, name = 'train_op')
# Define the train method
def train(self, X_train, y_train, num_epochs):
with tf.Session(graph = self.g) as sess:
sess.run(self.init_op)
iteration = 1
for epoch in range(num_epochs):
state = sess.run(self.initial_state)
for batch_x, batch_y in batch_gen(
X_train,
y_train,
batch_size = self.batch_size):
feed = {'tf_x:0' : batch_x,
'tf_y:0' : batch_y,
'tf_keepprob:0' : 0.5,
self.initial_state : state}
loss, _, state = sess.run(
['cost:0',
'train_op',
self.final_state],
feed_dict=feed)
if iteration % 20 == 0:
print("Epoch: %d/%d Iteration: %d "
"| Train loss: %.5f" % (
epoch + 1,
num_epochs,
iteration,
loss))
iteration += 1
if (epoch + 1) % 10 == 0:
self.saver.save(
sess,
"model/sentiment-%d.ckpt" % epoch)
# Define the predict method
def predict(self, X_data, return_proba=False):
preds = []
with tf.Session(graph = self.g) as sess:
self.saver.restore(
sess,
tf.train.latest_checkpoint('model/'))
test_state = sess.run(self.initial_state)
for ii, batch_x in enumerate(batch_gen(
x = X_data,
y = None,
batch_size = self.batch_size), 1):
feed = {'tf_x:0' : batch_x,
'tf_keepprob:0' : 1.0,
self.initial_state : test_state}
if return_proba:
pred, test_state = sess.run(
['probabilities:0', self.final_state],
feed_dict=feed)
else:
pred, test_state = sess.run(
['labels:0', self.final_state],
feed_dict=feed)
preds.append(pred)
return np.concatenate(preds)
for review in df['review']:
mapped_reviews.append([word_to_int[word] for word in review.split()])
pbar.update()
rnn = SentimentRNN(n_words = n_words,
seq_len = sequence_length,
embed_size = 256,
lstm_size = 128,
num_layers = 1,
batch_size = 100,
learning_rate = 0.001)
preds = rnn.predict(X_test)
y_true = y_test\[:len(preds)\]
print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))][1]
Create an object of the SentimentRNN class with the following parameters:
n_words = n_words, seq_len = sequence_length, embed_size = 256, lstm_size = 128, num_layers = 1, batch_size = 100, learning_rate = 0.001.
Since we have a relatively small dataset, the number of layers = 1 may generalise better
enter image description here
ValueError Traceback (most recent call last)
<ipython-input-23-a3cfe03a9a49> in <module>()
----> 1 preds = rnn.predict(X_test)
2 y_true = y_test[:len(preds)]
3 print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))
<ipython-input-12-d83ee67c43b6> in predict(self, X_data, return_proba)
173 self.saver.restore(
174 sess,
--> 175 tf.train.latest_checkpoint('model/'))
176 test_state = sess.run(self.initial_state)
177
/usr/local/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py in restore(self, sess, save_path)
1680 return
1681 if save_path is None:
-> 1682 raise ValueError("Can't load save_path when it is None.")
1683 logging.info("Restoring parameters from %s", save_path)
1684 if context.in_graph_mode():
ValueError: Can't load save_path when it is None.
The error just means tf.train.latest_checkpoint didn't find anything. It returns None, then the Saver complains because it was passed None. So there's no checkpoint in that directory.

TensorFlow, losses after training the model are different than losses printed during the last Epoch of Stochastic Gradient Descent.

I'm trying to do binary classification on two spirals. For testing, I am feeding my neural network the exact spiral data with no noise, and the model seems to work as the losses near 0 during SGD. However, after using my model to infer the exact same data points after SGD has completed, I get completely different losses than what was printed during the last epoch of SGD.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(threshold=np.nan)
# get the spiral points
t_p = np.linspace(0, 4, 1000)
x1_p = t_p * np.cos(t_p*2*np.pi)
y1_p = t_p * np.sin(t_p*2*np.pi)
x2_p = t_p * np.cos(t_p*2*np.pi + np.pi)
y2_p = t_p * np.sin(t_p*2*np.pi + np.pi)
plt.plot(x1_p, y1_p, x2_p, y2_p)
# generate data points
x1_dat = x1_p
y1_dat = y1_p
x2_dat = x2_p
y2_dat = y2_p
def model_variable(shape, name, initializer):
variable = tf.get_variable(name=name,
dtype=tf.float32,
shape=shape,
initializer=initializer
)
tf.add_to_collection('model_variables', variable)
return variable
class Model():
#layer specifications includes bias nodes
def __init__(self, sess, data, nEpochs, learning_rate, layer_specifications):
self.sess = sess
self.data = data
self.nEpochs = nEpochs
self.learning_rate = learning_rate
if layer_specifications[0] != 2 or layer_specifications[-1] != 1:
raise ValueError('First layer only two nodes, last layer only 1 node')
else:
self.layer_specifications = layer_specifications
self.build_model()
def build_model(self):
# x is the two nodes that will be layer one, will input an x, y coordinate
# and need to classify which spiral is it on, the non phase shifted or the phase
# shifted one.
# y is the output of the model
self.x = tf.placeholder(tf.float32, shape=[2, 1])
self.y = tf.placeholder(tf.float32, shape=[])
self.thetas = []
self.biases = []
for i in range(1, len(self.layer_specifications)):
self.thetas.append(model_variable([self.layer_specifications[i], self.layer_specifications[i-1]], 'theta'+str(i), tf.random_normal_initializer(stddev=0.1)))
self.biases.append(model_variable([self.layer_specifications[i], 1], 'bias'+str(i), tf.constant_initializer()))
#forward propagation
intermediate = self.x
for i in range(0, len(self.layer_specifications)-1):
if i != (len(self.layer_specifications) - 2):
intermediate = tf.nn.elu(tf.add(tf.matmul(self.thetas[i], intermediate), self.biases[i]))
else:
intermediate = tf.add(tf.matmul(self.thetas[i], intermediate), self.biases[i])
self.yhat = tf.squeeze(intermediate)
self.loss = tf.nn.sigmoid_cross_entropy_with_logits(self.yhat, self.y);
def train_init(self):
model_variables = tf.get_collection('model_variables')
self.optim = (
tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate)
.minimize(self.loss, var_list=model_variables)
)
self.check = tf.add_check_numerics_ops()
self.sess.run(tf.initialize_all_variables())
# here is where x and y combine to get just x in tf with shape [2, 1] and where label becomes y in tf
def train_iter(self, x, y):
loss, _, _ = sess.run([self.loss, self.optim, self.check],
feed_dict = {self.x: x, self.y: y})
print('loss: {0} on:{1}'.format(loss, x))
# here x and y are still x and y coordinates, label is separate
def train(self):
for _ in range(self.nEpochs):
for x, y, label in self.data():
print(label)
self.train_iter([[x], [y]], label)
print("NEW ONE:\n")
# here x and y are still x and y coordinates, label is separate
def infer(self, x, y, label):
return self.sess.run((tf.sigmoid(self.yhat), self.loss), feed_dict={self.x : [[x], [y]], self.y : label})
def data():
#so first spiral is label 0, second is label 1
for _ in range(len(x1_dat)-1, -1, -1):
for dat in range(2):
if dat == 0:
yield x1_dat[_], y1_dat[_], 0
else:
yield x2_dat[_], y2_dat[_], 1
layer_specifications = [2, 100, 100, 100, 1]
sess = tf.Session()
model = Model(sess, data, nEpochs=10, learning_rate=1.1e-2, layer_specifications=layer_specifications)
model.train_init()
model.train()
inferrences_1 = []
inferrences_2 = []
losses = 0
for i in range(len(t_p)-1, -1, -1):
infer, loss = model.infer(x1_p[i], y1_p[i], 0)
if infer >= 0.5:
print('loss: {0} on point {1}, {2}'.format(loss, x1_p[i], y1_p[i]))
losses = losses + 1
inferrences_1.append('r')
else:
inferrences_1.append('g')
for i in range(len(t_p)-1, -1, -1):
infer, loss = model.infer(x2_p[i], y2_p[i], 1)
if infer >= 0.5:
inferrences_2.append('r')
else:
print('loss: {0} on point {1}, {2}'.format(loss, x2_p[i], y2_p[i]))
losses = losses + 1
inferrences_2.append('g')
print('total losses: {}'.format(losses))
plt.scatter(x1_p, y1_p, c=inferrences_1)
plt.scatter(x2_p, y2_p, c=inferrences_2)
plt.show()

Resources