Object Detection - RuntimeError: stack expects each tensor to be equal size - pytorch

I created a custom dataset for object detection named ReceiptDataset as below.
from torch.nn.utils.rnn import pad_sequence
import torch.nn.functional as F
class ReceiptDataset(torch.utils.data.Dataset):
def __init__(self, train_dir,width,height,labels,transforms=None):
self.images = os.listdir(train_dir)
self.width = width
self.height = height
self.train_dir = train_dir
self.labels = labels
self.transforms = transforms
def __getitem__(self,idx):
img_name = self.images[idx]
img_path = os.path.join(self.train_dir,img_name)
#print(f"img_name: {img_name}")
img = cv2.imread(img_path)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
img_res = cv2.resize(img_rgb,(self.width,self.height), cv2.INTER_AREA)
img_res /= 255.0
annot = self.labels[str(img_name)]
lbls = []
boxes = []
target = {}
ht, wt, _ = img.shape
#print(f"img_res shape: {img_res.shape}, orig shape: {wt}, {ht}")
for item in annot:
x,y,box_wt,box_ht,lbl = item
x_min = x
x_max = x + box_wt
y_min = y
y_max = y + box_ht
x_min_corr = (x_min / wt) * self.width
x_max_corr = (x_max /wt ) * self.width
y_min_corr = (y_min / ht) * self.height
y_max_corr = (y_max / ht) * self.height
boxes.append([x_min_corr, y_min_corr, x_max_corr, y_max_corr])
lbls.append( classes.index(str(lbl)) )
#print(f"dls_lbls: {lbls}, {len(lbls)}")
#lbls += [-1] * (NUM_CLASSES - len(lbls))
boxes = torch.as_tensor(boxes, dtype=torch.float32)
lbls = torch.as_tensor(lbls, dtype=torch.int64)
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)
target["boxes"] = boxes
target["labels"] = lbls
target["image_id"] = torch.as_tensor(idx)
target["area"] = area
target["iscrowd"] = iscrowd
#print(f"dls_lbls -- 2: {target['labels']}, { target['labels'].shape }")
if self.transforms:
trans = self.transforms(image=img_res,
bboxes = target["boxes"],
labels=lbls
)
img_res = trans["image"]
target["boxes"] = torch.Tensor(trans["bboxes"])
return img_res, target
def __len__(self):
return len(self.images)
and I created an instance with:
train_dataset = ReceiptDataset("label-detector/images",width,height,plabels)
and my training snippet is :
from engine import train_one_epoch, evaluate
for epoch in range(num_epochs):
train_one_epoch(model,optim,train_loader,device,epoch,print_freq=2)
lr_scheduler.step()
evaluate(model,test_loader,device)
but anytime I run the training loop, I’m getting a runtime error:
RuntimeError: stack expects each tensor to be equal size, but got [11,4] at entry 0 and [9,4] at entry 1
There are 17 classes in total and each image has a minimum of 4 annotations.
I noticed the problem seems to be coming from my labels list/tensor in the dataset class, the size of the labels list/tensor varies based on the number of annotated items in an image, but I can’t seem to figure out a way to fix this.
Thank you!

I solved it by implementing a custom collate function for the dataloader that returns a batch of my dataset as needed by my model.
def collate_fn_seq(batch):
images = [ item[0] for item in batch ]
targets = [ item[1] for item in batch ]
imgs = []
for image in images:
img = torch.from_numpy(image).permute(2, 0, 1)
imgs.append(img)
boxes = [target["boxes"] for target in targets]
labels = [target["labels"] for target in targets]
image_ids = [ target["image_id"] for target in targets ]
areas = [target["area"] for target in targets]
iscrowds = [target["iscrowd"] for target in targets]
tars = []
for i in range(len(batch)):
box = boxes[i]
label = labels[i]
image_id = image_ids[i]
area = areas[i]
iscrowd = iscrowds[i]
target = {"boxes": box, "labels": label, "image_id": image_id, "area": area, "iscrowd": iscrowd}
tars.append(target)
return imgs, tars
and included it in my dataloaders using:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn_seq)

Related

Evaluation gives 0 score

I am working on a instance segmentation problem on mask rcnn with pytorch. Training part is working with below code but evaluation gives 0 score at every mAP. What's the problem in the code?
More info:
I use Albumentations for transforms and some files from pytorch vision for training.
Some problems I've been through:
When I use coco for bbox format instead of pascal voc it gives following error.
RuntimeError: Boolean value of Tensor with more than one value is ambiguous
When put labels out of the convert_seg_boolMask function, it gives following error.
RuntimeError: Boolean value of Tensor with more than one value is ambiguous
def get_transforms(train=False):
if train:
transform = A.Compose([
ToTensorV2()
],bbox_params=A.BboxParams("pascal_voc",label_fields=["labels","iscrowd"]))
else:
transform = A.Compose([
ToTensorV2()
],bbox_params=A.BboxParams("pascal_voc",label_fields=["labels","iscrowd"]))
return transform
class Dataset(datasets.VisionDataset):
def __init__(self, coco_, data_dir, transform=None, target_transform=None, transforms=None):
super().__init__(data_dir, transforms, transform, target_transform)
self.coco_info = coco_
self.data_dir = data_dir
self.transforms = transforms
if isinstance(self.coco_info,dict):
self.ids = [x["id"] for x in self.coco_info["images"] if len(self._load_target(x["id"]))>0]
def _load_image(self, id: int):
name = loadImgs(self.coco_info["images"],id)[0]['file_name']
image = cv2.imread(os.path.join(self.data_dir, name))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)/255
return image
def _load_target(self, id):
return loadAnns(self.coco_info["annotations"],id)
def n_classes(self):
category_names = list(set(sorted([a["name"] for a in self.coco_info["categories"]])))
self.classes = ["__background__"]+[i for i in category_names]
return self.classes
def __getitem__(self,idx):
id = self.ids[idx]
image = self._load_image(id)
target = copy.deepcopy(self._load_target(id))
image_shape = (image.shape[0],image.shape[1])
img_info = {
"img_shape":image_shape,
"image_id":id,
"labels":[t["category_id"]for t in target],
"segmentation":[t["segmentation"][0] for t in target],
"id": [t["id"] for t in target]
}
mask, labels = self.convert_seg_to_boolMask(img_info)
obj_ids = np.unique(mask)
obj_ids = obj_ids[1:]
masks = torch.tensor(mask == obj_ids[:, None, None])
boxes = []
bbox = np.array([t["bbox"] for t in target])
for xmin,ymin,width,height in bbox:
xmax = xmin+width
ymax = ymin+height
boxes.append([xmin, ymin, xmax, ymax])
boxes = torch.tensor(boxes)
labels = torch.tensor(labels)
image_id = torch.tensor([id])
iscrowd = torch.tensor([t["iscrowd"] for t in target])
transformed = self.transforms(image=image, masks=masks, bboxes=boxes, labels=labels, iscrowd=iscrowd)
image = transformed['image']
masks = torch.tensor(transformed["masks"])
boxes = torch.tensor(transformed['bboxes'])
labels = torch.tensor(transformed["labels"])
iscrowd = torch.tensor(transformed["iscrowd"])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
image_id = torch.tensor(image_id)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = masks
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
return image, target
def __len__(self):
return len(self.ids)
def convert_seg_to_boolMask(self,img_info):
mask = np.zeros(img_info["img_shape"], dtype=np.uint8)
mask = Image.fromarray(mask)
draw = ImageDraw.Draw(mask)
for seg, i in zip(img_info["segmentation"],img_info["id"]):
points = [tuple([k,l]) for k,l in zip(seg[0::2],seg[1::2])]
draw.polygon(xy=points,
outline=tuple([i]),
fill=tuple([i]))
mask = np.array(mask)
labels = img_info["labels"]
return mask, labels

Tensorflow: How to use a generator for fit() which runs in parallel with multiple processes

I am trying to train a model on a data set which does not fit in my RAM.
Therefore I am using a data generator which inherits from tensorflow.keras.utils.Sequence as shown below.
This is working. However because I am doing processing on the images my training is CPU bound. When looking in GPU-Z my GPU is only at 10-20% but one of my CPU Cores is at its max.
To solve this I am trying to run the generator in parallel on all my 16 cores. However when I set use_multiprocessing=True in the fit() function the program freezes. And using workers=8 does not speed up the process just produces batches in uneven intervals.
ex.:
batch 1-8 is processed immediately than there is some delay and than batch 9-16 is processed.
The code below shows what I am trying to do.
#read the dataset
x, o_y = reader.read_dataset_whole(ETLCharacterGroups.kanji)
#split data into 90/10 percent parts
percentage = round(len(x) / 100 * 80)
x_train = x[:percentage]
x_test = x[percentage:]
y_train = o_y[:percentage]
y_test = o_y[percentage:]
def distort_sample(img : Image) -> (Image, [int], [int]):
"""
Distort the given image randomly.
Randomly applies the transformations:
- rotation
- shear
- scale
- translate
- sharpen
- blur
Returns the distorted image.
"""
offset, scale = (0, 0), (64, 64)
t = random.choice(["sine"]) # "rotate", "shear", "scale",
f = random.choice(["blur", "sharpen", "smooth"])
# randomly apply transformations...
# rotate image
if("rotate" in t):
img = img.rotate(random.uniform(-30, 30))
# shear image
if("shear" in t):
y_shear = random.uniform(-0.2, 0.2)
x_shear = random.uniform(-0.2, 0.2)
img = img.transform(img.size, PImage.AFFINE, (1, x_shear, 0, y_shear, 1, 0))
# scale and translate image
if("scale" in t):
#scale the image
size_x = random.randrange(20, 63)
size_y = random.randrange(20, 63)
scale = (size_x, size_y)
offset = (math.ceil((64 - size_x) / 2), math.ceil((64 - size_y) / 2))
img = img.resize(scale)
# put it again on a black background (translated)
background = PImage.new('L', (64, 64))
trans_x = random.randrange(0, math.floor((64 - size_x)))
trans_y = random.randrange(0, math.floor((64 - size_y)))
offset = (trans_x, trans_y)
background.paste(img, offset)
img = background
if("sine" in t):
t_img = np.array(img)
A = t_img.shape[0] / 3.0
w = 2.0 / t_img.shape[1]
shift = lambda x: random.uniform(0.15, 0.2) * A * np.sin(-2*np.pi*x * w)
for i in range(t_img.shape[0]):
t_img[:,i] = np.roll(t_img[:,i], int(shift(i)))
img = PImage.fromarray(t_img)
# blur
if("blur" in f):
img = img.filter(ImageFilter.GaussianBlur(radius=random.uniform(0.5, 1.2)))
# sharpen
if("sharpen" in f):
img = img.filter(ImageFilter.SHARPEN)
# smooth
if("smooth" in f):
img = img.filter(ImageFilter.SMOOTH)
return img, offset, scale
class DataGenerator(tf.keras.utils.Sequence):
def __init__(self, x_col, y_col, batch_size, mode="training", shuffle=True):
self.batch_size = batch_size
self.undistorted_images = batch_size // 2
self.shuffle = shuffle
self.indices = len(x_col)
self.x_col = x_col
self.y_col = y_col
def __len__(self):
return self.indices // self.batch_size
def on_epoch_end(self):
if(False):
rng_state = np.random.get_state()
np.random.shuffle(x)
np.random.set_state(rng_state)
np.random.shuffle(o_y)
def __getitem__(self, index):
X, Y = [], []
for i in range(index * self.undistorted_images, (index+1) * self.undistorted_images):
base_img = self.x_col[i]
img = PImage.fromarray(np.uint8(base_img.reshape(64, 64) * 255))
# distort_sample() creates random variations of an image
img, *unused = distort_sample(img)
# add transformed image
X.append(np.array(img).reshape(64, 64, 1))
Y.append(self.y_col[i])
# add base image
X.append(base_img)
Y.append(self.y_col[i])
return np.array(X), np.array(Y)
#instantiate generators
training_generator = DataGenerator(x_col = x_train, y_col = y_train, batch_size = 256)
validation_generator = DataGenerator(x_col = x_test, y_col = y_test, batch_size = 256)
#train the model
hist = model.fit(
x=training_generator,
epochs=100,
validation_data=training_generator,
max_queue_size=50,
workers=8,
#use_multiprocessing=True <- this freezes the program
)
In the end I needed to make the Data generator use multi processing. To do this, the arrays needed to be stored in shared memory and than used in the sub processes.
import multiprocessing as mp
import numpy as np
from PIL import Image as PImage
from PIL import ImageFilter
import random
import math
import tensorflow as tf
shared_dict = {}
def distort_sample(img : PImage) -> (PImage, [int], [int]):
"""
Distort the given image randomly.
Randomly applies the transformations:
rotation, shear, scale, translate,
Randomly applies the filter:
sharpen, blur, smooth
Returns the distorted image.
"""
offset, scale = (0, 0), (64, 64)
t = random.choice(["sine", "rotate", "shear", "scale"])
f = random.choice(["blur", "sharpen", "smooth"])
# randomly apply transformations...
# rotate image
if("rotate" in t):
img = img.rotate(random.uniform(-15, 15))
# shear image
if("shear" in t):
y_shear = random.uniform(-0.2, 0.2)
x_shear = random.uniform(-0.2, 0.2)
img = img.transform(img.size, PImage.AFFINE, (1, x_shear, 0, y_shear, 1, 0))
# scale and translate image
if("scale" in t):
#scale the image
size_x = random.randrange(25, 63)
size_y = random.randrange(25, 63)
scale = (size_x, size_y)
offset = (math.ceil((64 - size_x) / 2), math.ceil((64 - size_y) / 2))
img = img.resize(scale)
# put it again on a black background (translated)
background = PImage.new('L', (64, 64))
trans_x = random.randrange(0, math.floor((64 - size_x)))
trans_y = random.randrange(0, math.floor((64 - size_y)))
offset = (trans_x, trans_y)
background.paste(img, offset)
img = background
if("sine" in t):
t_img = np.array(img)
A = t_img.shape[0] / 3.0
w = 2.0 / t_img.shape[1]
shift_factor = random.choice([-1, 1]) * random.uniform(0.15, 0.2)
shift = lambda x: shift_factor * A * np.sin(-2*np.pi*x * w)
for i in range(t_img.shape[0]):
t_img[:,i] = np.roll(t_img[:,i], int(shift(i)))
img = PImage.fromarray(t_img)
# blur
if("blur" in f):
img = img.filter(ImageFilter.GaussianBlur(radius=random.uniform(0.5, 1.2)))
# sharpen
if("sharpen" in f):
img = img.filter(ImageFilter.SHARPEN)
# smooth
if("smooth" in f):
img = img.filter(ImageFilter.SMOOTH)
return img, offset, scale
def generator_func(start_index, end_index, x_shape, y_shape):
X, Y = [], []
x_loc = np.frombuffer(shared_dict["x"], dtype="float16").reshape(x_shape)
y_loc = np.frombuffer(shared_dict["y"], dtype="b").reshape(y_shape)
for i in range(start_index, end_index):
base_img = x_loc[i]
img = PImage.fromarray(np.uint8(base_img.reshape(64, 64) * 255))
img, *unused = distort_sample(img)
# add transformed image
X.append(np.array(img).reshape(64, 64, 1))
Y.append(y_loc[i])
X.append(np.array(img).reshape(64, 64, 1))
Y.append(y_loc[i])
# add base image
#X.append(base_img)
#Y.append(y_loc[i])
return X, Y
def generator_initializer(_x_shared, _y_shared):
shared_dict["x"] = _x_shared
shared_dict["y"] = _y_shared
def generator_func(start_index, end_index, x_shape, y_shape):
X, Y = [], []
x_loc = np.frombuffer(shared_dict["x"], dtype="float16").reshape(x_shape)
y_loc = np.frombuffer(shared_dict["y"], dtype="b").reshape(y_shape)
for i in range(start_index, end_index):
base_img = x_loc[i]
img = PImage.fromarray(np.uint8(base_img.reshape(64, 64) * 255))
img, *unused = distort_sample(img)
# add transformed image
X.append(np.array(img).reshape(64, 64, 1))
Y.append(y_loc[i])
X.append(np.array(img).reshape(64, 64, 1))
Y.append(y_loc[i])
# add base image
#X.append(base_img)
#Y.append(y_loc[i])
return X, Y
class DataGenerator(tf.keras.utils.Sequence):
def __init__(self, num_samples, batch_size,
percentage, mode,
x_shared, y_shared,
x_np_shape, y_np_shape,
processes, shuffle=True):
self.num_samples = num_samples
# 50% original images + 50% augmented images
self.batch_size = batch_size // 2
self.percentage = percentage
# an offset to devide the data set into test and train
self.start_index = 0
if(mode == "testing"):
self.start_index = num_samples - (num_samples // 100 * percentage)
# is this a train or a test generator
self.mode = mode
# how many processes should be used for this generator
self.processes = processes
# should the arrays be shuffled after each epoch
self.shuffle = shuffle
self.x_np_shape = x_np_shape
self.y_np_shape = y_np_shape
# a pool of processes for generating augmented data
self.pool = mp.Pool(processes=self.processes,
initializer=generator_initializer,
initargs=(x_shared, y_shared))
def __len__(self):
return (self.num_samples // 100 * self.percentage) // self.batch_size
def on_epoch_end(self):
if(False):
rng_state = np.random.get_state()
np.random.shuffle(x_np)
np.random.set_state(rng_state)
np.random.shuffle(y_np)
def __getitem__(self, index):
arguments = []
slice_size = self.batch_size // self.processes
current_batch = index * self.batch_size
for i in range(self.processes):
slice_start = self.start_index + (current_batch + i * slice_size)
slice_end = self.start_index + (current_batch + (i+1) * slice_size)
arguments.append([slice_start, slice_end, self.x_np_shape, self.y_np_shape])
return_values = self.pool.starmap(generator_func, arguments)
X, Y = [], []
for imgs, labels in return_values:
X.append(imgs)
Y.append(labels)
return np.concatenate(X).astype(np.float16), np.concatenate(Y).astype(np.float16)

Tensorflow : ValueError: Can't load save_path when it is None

import os
import tarfile
from six.moves import urllib
URL = 'http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz'
PATH = 'aclImdb'
def fetch_data(url = URL, path = PATH):
if not os.path.isdir(path):
os.makedirs(path)
file_path = os.path.join(oath, "aclImdb_v1.tar.gz")
urllib.request.urlretrieve(url, file_path)
file_gz = tarfile.open(file_path)
file_gz.extractall(path = path)
file_gz.close()
import pyprind # for progress visualisation
import pandas as pd
PATH = 'aclImdb'
labels = {'pos': 1, 'neg': 0} # int class labels for 'positive' and 'negative'
pbar = pyprind.ProgBar(50000) # initialise a progress bar with 50k iterations = no. of docs
df = pd.DataFrame()
# use nested for loops to iterate over 'train' & 'test' subdir
for s in ('test', 'train'):
for l in ('pos', 'neg'): # and read text files from 'pos' and 'neg' subdir
path = os.path.join(PATH, s, l)
for file in os.listdir(path):
# append to the df pandas DataFrame with an int class (post = 1, neg = 0)
with open(os.path.join(path, file), 'r', encoding = 'utf-8') as infile:
txt = infile.read()
df = df.append([[txt, labels[l]]], ignore_index = True)
pbar.update()
df.columns = ['review', 'sentiment']
import numpy as np
np. random.seed(0)
df = df.reindex(np.random.permutation(df.index))
df.to_csv('movie_data.csv', index = False, encoding = 'utf-8')
n_words = max(list(word_to_int.values())) + 1
df = pd.read_csv('movie_data.csv', encoding = 'utf-8')
df.head(3)
# Separate words and count each word's occurence
import pyprind # for progress visualisation
from collections import Counter
from string import punctuation
import re
counts = Counter() # collects the counts of occurence of each unique word
pbar = pyprind.ProgBar(len(df['review']),
title = 'Counting word occurences...') # progress bar
for i, review in enumerate(df['review']):
text = ''.join([c if c not in punctuation else ' '+c+' '
for c in review]).lower()
df.loc[i, 'review'] = text
pbar.update()
counts.update(text.split())
# Mapping each unique word to an int
word_counts = sorted(counts, key = counts.get, reverse = True)
print(word_counts[:5])
word_to_int = {word: ii for ii, word in enumerate(word_counts, 1)}
mapped_reviews = []
pbar = pyprind.ProgBar(len(df['review']),
title = 'Map movie reviews to integers...')
# Left-pad with zeros if the sequence length < 200
# Use 200 elements if the length > 200
sequence_length = 200
sequences = np.zeros((len(mapped_reviews), sequence_length), dtype = int)
for i, row in enumerate(mapped_reviews):
review_arr = np.array(row)
sequences[i, -len(row):] = review_arr[-sequence_length:]
# Split the dataset into training and test sets
X_train = sequences[:25000, :]
y_train = df.loc[:25000, 'sentiment'].values
X_test = sequences[25000:, :]
y_test = df.loc[25000:, 'sentiment'].values
# Define the mini-batches generator
np.random.seed(123)
def batch_gen(x, y = None, batch_size = 64):
n_batches = len(x) // batch_size
x = x[:n_batches * batch_size]
if y is not None:
y = y[:n_batches * batch_size]
for ii in range(0, len(x), batch_size):
if y is not None:
yield x[ii : ii + batch_size], y[ii : ii + batch_size]
else:
yield x[ii : ii + batch_size]
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' ## suppress the 3.5 warning if using TF 1.4
class SentimentRNN(object):
# Define __init__
def __init__(self,
n_words,
seq_len = 200,
lstm_size = 256,
num_layers = 1,
batch_size = 64,
learning_rate = 0.0001,
embed_size = 200):
self.n_words = n_words
self.seq_len = seq_len
self.lstm_size = lstm_size # no. of hidden units
self.num_layers = num_layers
self.batch_size = batch_size
self.learning_rate = learning_rate
self.embed_size = embed_size
self.g = tf.Graph()
with self.g.as_default():
tf.set_random_seed(123)
self.build()
self.saver = tf.train.Saver()
self.init_op = tf.global_variables_initializer()
# Define the build method
def build(self):
# Define the placeholders
tf_x = tf.placeholder(tf.int32,
shape = (self.batch_size, self.seq_len),
name = 'tf_x')
tf_y = tf.placeholder(tf.float32,
shape = (self.batch_size),
name = 'tf_y')
tf_keepprob = tf.placeholder(tf.float32,
name = 'tf_keepprob')
# Create the embedding layer
embedding = tf.Variable(
tf.random_uniform(
shape = (self.n_words, self.embed_size),
minval = -1,
maxval = 1),
name = 'embedding')
embed_x = tf.nn.embedding_lookup(embedding,
tf_x,
name = 'embed_x')
# Define LSTM cells and stack them
cells = tf.contrib.rnn.MultiRNNCell(
[tf.contrib.rnn.DropoutWrapper(
tf.contrib.rnn.BasicLSTMCell(num_units = self.lstm_size),
output_keep_prob = tf_keepprob)
for i in range(self.num_layers)])
# Define the initial state:
self.initial_state = cells.zero_state(
self.batch_size, tf.float32)
print(' << initial state >> ', self.initial_state)
# Put together components with tf.nn.dynamic_rnn
lstm_outputs, self.final_state = tf.nn.dynamic_rnn(
cell = cells,
inputs = embed_x,
initial_state = self.initial_state)
## lstm_outputs shape: [batch_size, max_time, cells.output_size]
print('\n << lstm_output >> ', lstm_outputs)
print('\n << final state >> ', self.final_state)
# Apply a full-connected layer on the RNN output
logits = tf.layers.dense(
inputs = lstm_outputs[:, -1],
units = 1, # dimensionality of the output space
activation = None,
name = 'logits')
# Remove dimensions of size 1 from the tensor shape
logits = tf.squeeze(input = logits,
name = 'logits_squeezed')
print ('\n << logits >> ', logits)
# If you want prob's
y_proba = tf.nn.sigmoid(logits, name = 'probabilities')
predictions = {'probabilities' : y_proba,
'labels' : tf.cast(tf.round(y_proba),
tf.int32,
name = 'labels')}
print('\n << predictions >> ', predictions)
# Define the cost function
cost = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(
labels = tf_y,
logits = logits),
name = 'cost')
# Define the optimiser
optimizer = tf.train.AdamOptimizer(self.learning_rate)
train_op = optimizer.minimize(cost, name = 'train_op')
# Define the train method
def train(self, X_train, y_train, num_epochs):
with tf.Session(graph = self.g) as sess:
sess.run(self.init_op)
iteration = 1
for epoch in range(num_epochs):
state = sess.run(self.initial_state)
for batch_x, batch_y in batch_gen(
X_train,
y_train,
batch_size = self.batch_size):
feed = {'tf_x:0' : batch_x,
'tf_y:0' : batch_y,
'tf_keepprob:0' : 0.5,
self.initial_state : state}
loss, _, state = sess.run(
['cost:0',
'train_op',
self.final_state],
feed_dict=feed)
if iteration % 20 == 0:
print("Epoch: %d/%d Iteration: %d "
"| Train loss: %.5f" % (
epoch + 1,
num_epochs,
iteration,
loss))
iteration += 1
if (epoch + 1) % 10 == 0:
self.saver.save(
sess,
"model/sentiment-%d.ckpt" % epoch)
# Define the predict method
def predict(self, X_data, return_proba=False):
preds = []
with tf.Session(graph = self.g) as sess:
self.saver.restore(
sess,
tf.train.latest_checkpoint('model/'))
test_state = sess.run(self.initial_state)
for ii, batch_x in enumerate(batch_gen(
x = X_data,
y = None,
batch_size = self.batch_size), 1):
feed = {'tf_x:0' : batch_x,
'tf_keepprob:0' : 1.0,
self.initial_state : test_state}
if return_proba:
pred, test_state = sess.run(
['probabilities:0', self.final_state],
feed_dict=feed)
else:
pred, test_state = sess.run(
['labels:0', self.final_state],
feed_dict=feed)
preds.append(pred)
return np.concatenate(preds)
for review in df['review']:
mapped_reviews.append([word_to_int[word] for word in review.split()])
pbar.update()
rnn = SentimentRNN(n_words = n_words,
seq_len = sequence_length,
embed_size = 256,
lstm_size = 128,
num_layers = 1,
batch_size = 100,
learning_rate = 0.001)
preds = rnn.predict(X_test)
y_true = y_test\[:len(preds)\]
print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))][1]
Create an object of the SentimentRNN class with the following parameters:
n_words = n_words, seq_len = sequence_length, embed_size = 256, lstm_size = 128, num_layers = 1, batch_size = 100, learning_rate = 0.001.
Since we have a relatively small dataset, the number of layers = 1 may generalise better
enter image description here
ValueError Traceback (most recent call last)
<ipython-input-23-a3cfe03a9a49> in <module>()
----> 1 preds = rnn.predict(X_test)
2 y_true = y_test[:len(preds)]
3 print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))
<ipython-input-12-d83ee67c43b6> in predict(self, X_data, return_proba)
173 self.saver.restore(
174 sess,
--> 175 tf.train.latest_checkpoint('model/'))
176 test_state = sess.run(self.initial_state)
177
/usr/local/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py in restore(self, sess, save_path)
1680 return
1681 if save_path is None:
-> 1682 raise ValueError("Can't load save_path when it is None.")
1683 logging.info("Restoring parameters from %s", save_path)
1684 if context.in_graph_mode():
ValueError: Can't load save_path when it is None.
The error just means tf.train.latest_checkpoint didn't find anything. It returns None, then the Saver complains because it was passed None. So there's no checkpoint in that directory.

Keras Neural Style Transfer: backend.gradients returns None

I am trying to implement Neural Style Transfer using Keras and trying to keep it as simple as possible. While trying to find gradient using backend.gradients() function of keras, it returns [None]. My code is as follows:
content_image = cv2.imread("C:/Users/Max/Desktop/IMG_20170331_103755.jpg")
content_image = cv2.resize(content_image, (512,512))
style_image = cv2.imread("C:/Users/Max/Desktop/starry.jpg")
style_image = cv2.resize(style_image, (512,512))
content_array = np.asarray(content_image, dtype=np.float32)
content_array = np.expand_dims(content_array, axis=0)
style_array = np.asarray(style_image, dtype=np.float32)
style_array = np.expand_dims(style_array, axis=0)
# Constants:
epochs = 1
height = 512
width = 512
num_channels = 3
step_size = 10
content_layer = ['block2_conv2']
style_layer = ['block1_conv2', 'block2_conv2', 'block3_conv3','block4_conv3', 'block5_conv3']
loss_total = backend.variable(0.0)
# VGG16 Model:
model = VGG16(input_shape = [height, width, num_channels],weights='imagenet', include_top=False)
# Defining losses:
def content_loss(Content, Mixed):
content_loss = backend.mean(backend.square(Mixed - Content))
return content_loss
def gram(layer):
flat = backend.reshape(layer, shape=[1, -1])
gram = backend.dot(flat, backend.transpose(flat))
return gram
def style_loss(Style, Mixed):
S_G = gram(Style)
M_G = gram(Mixed)
size = height*width
return backend.sum(backend.square(S_G - M_G)) / (4. * (num_channels ** 2) * (size ** 2))
'''
def denoise(Image):
loss = backend.mean(backend.abs(Image[:,1:,:,:] - Image[:,:-1,:,:]) + backend.abs(Image[:,:,1:,:] - Image[:,:,:-1,:]))
return loss
'''
# Backend Functions:
output_c = backend.function(inputs = [model.layers[0].input] , outputs = [model.get_layer(content_layer[0]).output])
output_s = backend.function(inputs = [model.layers[0].input] , outputs = [model.get_layer(layer).output for layer in style_layer])
content_output = output_c([content_array])
style_output = output_s([style_array])
# Randomly generated image:
Mixed = np.random.uniform(0, 255, [1, height, width, 3]) - 128
# Loop:
for i in range(epochs):
mixed_c = output_c([Mixed])
mixed_c = mixed_c[0]
loss_c = content_loss(content_output[0], mixed_c)
total = []
mixed_s = output_s([Mixed])
for i in range(len(style_layer)):
style = style_loss(style_output[i], mixed_s[i])
total.append(style)
loss_s = backend.sum(total)
#loss_d = denoise(Mixed)
loss_total = w_c * loss_c + w_s * loss_s #+ w_d * loss_d
gradient = backend.gradients(loss_total, Mixed)
gradient = np.squeeze(gradient)
step_size = step_size / (np.std(gradient) + 1e-8)
Mixed -= gradient * step_size
What changes should i make to get the gradients working properly. I am clueless as to what went wrong.
Thanks!
You're taking gradient of Mixed which is a numpy array and not a variable. You need to define a tensor which will then have value of Mixed.
From Keras documentation:
gradients
keras.backend.gradients(loss, variables)
Returns the gradients of variables w.r.t. loss.
Arguments
loss: Scalar tensor to minimize.
variables: List of variables.

How to create layer0 input for input images with 3 channels

Hi I am following the http://deeplearning.net/tutorial/code/convolutional_mlp.py code to implement a conv neural net. I have input images where the channel is important and hence I want to have 3 channel feature map as layer 0 input.
So I need something like this
layer0_input = x.reshape((batch_size, 3, 240, 135)) # width 240, height 135, 3 channels
instead of
layer0_input = x.reshape((batch_size, 1, 28, 28)) # 28*28 normalized MNIST gray scale images
which will be used here
layer0 = LeNetConvPoolLayer(
rng,
input=layer0_input,
image_shape=(batch_size, 3, 240, 135),
filter_shape=(nkerns[0], 1, 5, 5),
poolsize=(2, 2)
)
where that x is provided to theano as
train_model = theano.function(
[index],
cost,
updates=updates,
givens={
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]
}
)
So - my question is - how should I create (shape) that train_set_x ?
With (gray scale intensity - i.e single channel) train_set_x is created as
shared_x = theano.shared(numpy.asarray(data_x,
dtype=theano.config.floatX),
where data_x is a flattened numpy array of length 784 (for 28*28 pixels)
Thanks a lot for advice
I was able to get it working. I am pasting some code here which might help some one. Not very elegant - but works.
def shuffle_in_unison(a, b):
#courtsey http://stackoverflow.com/users/190280/josh-bleecher-snyder
assert len(a) == len(b)
shuffled_a = np.empty(a.shape, dtype=a.dtype)
shuffled_b = np.empty(b.shape, dtype=b.dtype)
permutation = np.random.permutation(len(a))
for old_index, new_index in enumerate(permutation):
shuffled_a[new_index] = a[old_index]
shuffled_b[new_index] = b[old_index]
return shuffled_a, shuffled_b
def createDataSet(imagefolder):
os.chdir(imagefolder)
# total number of files
number_of_files = len([item for item in os.listdir('.') if os.path.isfile(os.path.join('.', item))])
# get a shuffled list : I needed this because my image names were of the format n_x_<some details>.jpg
# where n was my target and x was a number from 0 to m-1 where m was the number of samples
# of the target value n. So I needed so shuffle and iterate while putting images in train
# test and validate arrays
image_index_array = range(0,number_of_files)
random.seed(12)
random.shuffle(image_index_array)
# split 80/10/10 - train/test/val
trainsize = int(number_of_files*.8)
testsize = int(number_of_files*.1)
valsize = number_of_files - trainsize - testsize
# create the random value arrays of train/test/val by slicing the total image index array
train_index_array = image_index_array[0:trainsize]
test_index_array = image_index_array[trainsize:trainsize+testsize]
validate_index_array = image_index_array[trainsize+testsize:]
# initialize the data structures
dataset = {'train':[[],[]],'test':[[],[]],'validate':[[],[]]}
i_counter = 0
train_X = []
train_y = []
test_X = []
test_y = []
val_X = []
val_y = []
for item in os.listdir('.'):
if not os.path.isfile(os.path.join('.', item)):
continue
if item.endswith('.pkl'):
continue
print 'Processing item ' + item
item_y = item.split('_')[0]
item_x = cv2.imread(item)
height, width = item_x.shape[:2]
# this was my requirement - skip it if you do not need it
if(height != 135 or width != 240):
continue
# get 3 channels
b,g,r = cv2.split(item_x)
item_x = [b,g,r]
item_x = np.array(item_x)
item_x = item_x.reshape(3,135*240)
if i_counter in test_index_array:
test_X.append(item_x)
test_y.append(item_y)
elif i_counter in validate_index_array:
val_X.append(item_x)
val_y.append(item_y)
else:
train_X.append(item_x)
train_y.append(item_y)
i_counter = i_counter + 1
# fix the dimensions. Flatten out the channel and intensity dimensions
train_X = np.array(train_X)
train_X = train_X.reshape(train_X.shape[0],train_X.shape[1]*train_X.shape[2])
test_X = np.array(test_X)
test_X = test_X.reshape(test_X.shape[0],test_X.shape[1]*test_X.shape[2])
val_X = np.array(val_X)
val_X = val_X.reshape(val_X.shape[0],val_X.shape[1]*val_X.shape[2])
train_y = np.array(train_y)
test_y = np.array(test_y)
val_y = np.array(val_y)
# shuffle the train and test arrays in unison
train_X,train_y = shuffle_in_unison(train_X,train_y)
test_X,test_y = shuffle_in_unison(test_X,test_y)
# pickle them
dataset['train'] = [train_X,train_y]
dataset['test'] = [test_X,test_y]
dataset['validate'] = [val_X,val_y]
output = open('pcount.pkl', 'wb')
cPickle.dump(dataset, output)
output.close`
Once you have this pickle file
You can use it in convolutional_mlp.py like this.
layer0_input = x.reshape((batch_size, 3, 135, 240))
# Construct the first convolutional pooling layer:
# filtering reduces the image size to (135-8+1 , 240-5+1) = (128, 236)
# maxpooling reduces this further to (128/2, 236/2) = (64, 118)
# 4D output tensor is thus of shape (batch_size, nkerns[0], 64, 118)
layer0 = LeNetConvPoolLayer(
rng,
input=layer0_input,
image_shape=(batch_size, 3, 135, 240),
filter_shape=(nkerns[0], 3, 8, 5),
poolsize=(2, 2)
)
The load_data function in logistic_sgd.py will need a small change as below
f = open(dataset, 'rb')
dump = cPickle.load(f)
train_set = dump['train']
valid_set = dump['validate']
test_set = dump['test']
f.close()
Hope this helps

Resources