I used Mask-Rcnn for training an image set (Note with high resolution Eg:2400*1920 ) with VIAtool following this reference article Mask rcnn usage. Here, I have edited the and the code is as follows:
import os
import sys
import json
import datetime
import numpy as np
import skimage.draw
# Root directory of the project
ROOT_DIR = os.path.abspath("../../")
# Import Mask RCNN
sys.path.append(ROOT_DIR) # To find local version of the library
from mrcnn.config import Config
from mrcnn import model as modellib, utils
# Path to trained weights file
COCO_WEIGHTS_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
print('weights not available')
print('weights available')
DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
# Configurations
class NeuralCodeConfig(Config):
NAME = "screens"
# We use a GPU with 12GB memory, which can fit two images.
# Adjust down if you use a smaller GPU.
# Number of classes (including background)
NUM_CLASSES = 1 + 10 # Background + other region classes
# Number of training steps per epoch
# Skip detections with < 90% confidence
# Dataset
class NeuralCodeDataset(utils.Dataset):
def load_screen(self, dataset_dir, subset):
"""Load a subset of the screens dataset.
dataset_dir: Root directory of the dataset.
subset: Subset to load: train or val
# Add classes.
# Train or validation dataset?
assert subset in ["train", "val"]
dataset_dir = os.path.join(dataset_dir, subset)
# Load annotations
# VGG Image Annotator saves each image in the form:
# { 'filename': '28503151_5b5b7ec140_b.jpg',
# 'regions': {
# '0': {
# 'region_attributes': {},
# 'shape_attributes': {
# 'all_points_x': [...],
# 'all_points_y': [...],
# 'name': 'polygon'}},
# ... more regions ...
# },
# 'size': 100202
# }
# We mostly care about the x and y coordinates of each region
annotations = json.load(open(os.path.join(dataset_dir, "via_region_data.json")))
if annotations is None:
print ("region data json not loaded")
print("region data json loaded")
# print(annotations)
annotations = list(annotations.values()) # don't need the dict keys
# The VIA tool saves images in the JSON even if they don't have any
# annotations. Skip unannotated images.
annotations = [a for a in annotations if a['regions']]
# Add images
for a in annotations:
# Get the x, y coordinaets of points of the polygons that make up
# the outline of each object instance. There are stores in the
# shape_attributes and region_attributes (see json format above)
polygons = [r['shape_attributes'] for r in a['regions']]
screens = [r['region_attributes']for r in a['regions']]
#getting the filename by spliting
class_name = screens[0]['html']
file_name = a['filename'].split("/")
file_name = file_name[len(file_name)-1]
#getting class_ids with file_name
class_ids = class_name+"_"+file_name
# #getting width an height of the images
# height = [h['height'] for h in polygons]
# width = [w['width'] for w in polygons]
# print(height,'height')
# print('polygons',polygons)
# load_mask() needs the image size to convert polygons to masks.
# Unfortunately, VIA doesn't include it in JSON, so we must readpath
# the image. This is only managable since the dataset is tiny.
image_path = os.path.join(dataset_dir,file_name)
image =
#resizing images
# image = utils.resize_image(image, min_dim=800, max_dim=1000, min_scale=None, mode="square")
# print('image',image)
height,width = image.shape[:2]
# print('height',height)
# print('width',width)
# height = 800
# width = 800
image_id=file_name, # use file name as a unique image id
width=width, height=height,
def load_mask(self, image_id):
"""Generate instance masks for an image.
masks: A bool array of shape [height, width, instance count] with
one mask per instance.
class_ids: a 1D array of class IDs of the instance masks.
# If not a screens dataset image, delegate to parent class.
image_info = self.image_info[image_id]
if image_info["source"] != "screens":
return super(self.__class__, self).load_mask(image_id)
# Convert polygons to a bitmap mask of shape
# [height, width, instance_count]
info = self.image_info[image_id]
mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
for i, p in enumerate(info["polygons"]):
# Get indexes of pixels inside the polygon and set them to 1
rr, cc = skimage.draw.polygon(p['y'], p['x'])
mask[rr, cc, i] = 1
# Return mask, and array of class IDs of each instance. Since we have
# one class ID only, we return an array of 1s
# return mask.astype(np.bool), np.ones([mask.shape[-1]], dtype=np.int32)
# class_ids = np.array(class_ids,dtype=np.int32)
return mask,class_ids
def image_reference(self, image_id):
"""Return the path of the image."""
info = self.image_info[image_id]
if info["source"] == "screens":
return info["path"]
super(self.__class__, self).image_reference(image_id)
def train(model):
# Train the model.
# Training dataset.
dataset_train = NeuralCodeDataset()
dataset_train.load_screen(args.dataset, "train")
# Validation dataset
dataset_val = NeuralCodeDataset()
dataset_val.load_screen(args.dataset, "val")
# *** This training schedule is an example. Update to your needs ***
# Since we're using a very small dataset, and starting from
# COCO trained weights, we don't need to train too long. Also,
# no need to train all layers, just the heads should do it.
print("Training network heads")
model.train(dataset_train, dataset_val,
# Training
if __name__ == '__main__':
import argparse
# Parse command line arguments
parser = argparse.ArgumentParser(
description='Train Mask R-CNN to detect screens.')
help="'train' or 'splash'")
parser.add_argument('--dataset', required='True',
help='Directory of the screens dataset')
parser.add_argument('--weights', required=True,
help="Path to weights .h5 file or 'coco'")
parser.add_argument('--logs', required=False,
help='Logs and checkpoints directory (default=logs/)')
parser.add_argument('--image', required=False,
metavar="path or URL to image",
help='Image to apply the color splash effect on')
parser.add_argument('--video', required=False,
metavar="path or URL to video",
help='Video to apply the color splash effect on')
args = parser.parse_args()
# Validate arguments
if args.command == "train":
assert args.dataset, "Argument --dataset is required for training"
elif args.command == "splash":
assert args.image or,\
"Provide --image or --video to apply color splash"
print("Weights: ", args.weights)
print("Dataset: ", args.dataset)
print("Logs: ", args.logs)
# Configurations
if args.command == "train":
config = NeuralCodeConfig()
class InferenceConfig(NeuralCodeConfig):
# Set batch size to 1 since we'll be running inference on
# one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
config = InferenceConfig()
# Create model
if args.command == "train":
model = modellib.MaskRCNN(mode="training", config=config,
model = modellib.MaskRCNN(mode="inference", config=config,
# Select weights file to load
if args.weights.lower() == "coco":
weights_path = COCO_WEIGHTS_PATH
# Download weights file
if not os.path.exists(weights_path):
elif args.weights.lower() == "last":
# Find last trained weights
weights_path = model.find_last()
elif args.weights.lower() == "imagenet":
# Start from ImageNet trained weights
weights_path = model.get_imagenet_weights()
weights_path = args.weights
# Load weights
print("Loading weights ", weights_path)
if args.weights.lower() == "coco":
# Exclude the last layers because they require a matching
# number of classes
model.load_weights(weights_path, by_name=True, exclude=[
"mrcnn_class_logits", "mrcnn_bbox_fc",
"mrcnn_bbox", "mrcnn_mask"])
model.load_weights(weights_path, by_name=True)
# Train or evaluate
if args.command == "train":
# elif args.command == "splash":
# detect_and_color_splash(model, image_path=args.image,
print("'{}' is not recognized. "
"Use 'train' or 'splash'".format(args.command))
And I am getting the following error when training the data set with pretrained COCO dataset:
UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
"Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
2018-08-09 13:52:27.993239: W tensorflow/core/framework/] Allocation of 51380224 exceeds 10% of system memory.
2018-08-09 13:52:28.037704: W tensorflow/core/framework/] Allocation of 51380224 exceeds 10% of system memory.
/home/scit/anaconda3/lib/python3.6/site-packages/keras/engine/ UserWarning: Using a generator with use_multiprocessing=True` and multiple workers may duplicate your data. Please consider using the`keras.utils.Sequence class.
UserWarning('Using a generator with `use_multiprocessing=True`'
ERROR:root:Error processing image {'id': '487.jpg', 'source': 'screens', 'path': '../../datasets/screens/train/487.jpg', 'width': 1920, 'height': 7007, 'polygons': [{'name': 'rect', 'x': 384, 'y': 5, 'width': 116, 'height': 64}, {'name': 'rect', 'x': 989, 'y': 17, 'width': 516, 'height': 42}, {'name': 'rect', 'x': 984, 'y': 5933, 'width': 565, 'height': 273}, {'name': 'rect', 'x': 837, 'y': 6793, 'width': 238, 'height': 50}], 'class_ids': 'logo_487.jpg'}
Traceback (most recent call last):
File "/home/scit/Desktop/My_work/object_detection/mask_rcnn/mrcnn/", line 1717, in data_generator
File "/home/scit/Desktop/My_work/object_detection/mask_rcnn/mrcnn/", line 1219, in load_image_gt
mask, class_ids = dataset.load_mask(image_id)
File "", line 235, in load_mask
rr, cc = skimage.draw.polygon(p['y'], p['x'])
File "/home/scit/anaconda3/lib/python3.6/site-packages/skimage/draw/", line 441, in polygon
return _polygon(r, c, shape)
File "skimage/draw/_draw.pyx", line 217, in skimage.draw._draw._polygon (skimage/draw/_draw.c:4402)
OverflowError: Python int too large to convert to C ssize_t
My laptop graphics specs are follows:
Nvidia GeForce 830M (2 GB) with 250 CUDA cores
CPU specs:
Intel Core i5 (4th gen), 8 GB RAM
What may be the case here? Is it the resolution of the images or the incapability of my GPU. Shall I proceed with CPU?

I am sharing my observations with Mask RCNN while training my custom dataset.
My dataset comprises of images of various dimension (i.e. smallest image has approx 1700 x 1600 pixels and the largest image has approx 8500 x 4600 pixels).
I am training on nVIDIA RTX 2080Ti, 32 GB DDR4 RAM and while training I get the below mentioned warnings; but the training process completes.
UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
"Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
2019-05-23 15:25:23.433774: W T:\src\github\tensorflow\tensorflow\core\common_runtime\] Allocator (GPU_0_bfc) ran out of memory trying to allocate 3.14GiB. The caller indicates that this is not a failure, but may mean that there could be performance gains if more memory were available.
Few months back, I tried the Matterport Splash of Color Example on my Laptop which has 12 GB RAM and nVIDIA 920M (2GB GPU); and have encountered similar Memory Errors.
So, we can suspect that size of the GPU Memory is a contributing factor in this error.
Additionally, batch size is another contributing factor; but I see that you have set the IMAGE_PER_GPU=1. If you search for the BATCH_SIZE in the file present in the mrcnn folder, you will find –
So, in your case the batch_size is 1.
In conclusion, I would suggest to please try the same code on a more powerful GPU.


My training images are downscaled versions of their associated HR image. Thus, the input and the output images aren't the same dimension. For now, I'm using a hand-crafted sample of 13 images, but eventually I would like to be able to use my 500-ish HR (high-resolution) images dataset. This dataset, however, does not have images of the same dimension, so I'm guessing I'll have to crop them in order to obtain a uniform dimension.
I currently have this code set up: it takes a bunch of 512x512x3 images and applies a few transformations to augment the data (flips). I thus obtain a basic set of 39 images in their HR form, and then I downscale them by a factor of 4, thus obtaining my trainset which consits of 39 images of dimension 128x128x3.
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.image as mpimg
import skimage
from skimage import transform
from constants import data_path
from constants import img_width
from constants import img_height
from model import setUpModel
def setUpImages():
train = []
finalTest = []
sample_amnt = 11
max_amnt = 13
# Extracting images (512x512)
for i in range(sample_amnt):
train.append(mpimg.imread(data_path + str(i) + '.jpg'))
for i in range(max_amnt-sample_amnt):
finalTest.append(mpimg.imread(data_path + str(i+sample_amnt) + '.jpg'))
# # TODO:
# ImageDataGenerator(featurewise_center=False, samplewise_center=False, featurewise_std_normalization=False,
# samplewise_std_normalization=False, zca_whitening=False, zca_epsilon=1e-06, rotation_range=0,
# width_shift_range=0.0, height_shift_range=0.0, brightness_range=None, shear_range=0.0,
# zoom_range=0.0, channel_shift_range=0.0, fill_mode='nearest', cval=0.0, horizontal_flip=False,
# vertical_flip=False, rescale=None, preprocessing_function=None, data_format=None,
# validation_split=0.0, dtype=None)
# Augmenting data
trainData = dataAugmentation(train)
testData = dataAugmentation(finalTest)
setUpData(trainData, testData)
def setUpData(trainData, testData):
# print(type(trainData)) # <class 'numpy.ndarray'>
# print(len(trainData)) # 64
# print(type(trainData[0])) # <class 'numpy.ndarray'>
# print(trainData[0].shape) # (1400, 1400, 3)
# print(trainData[len(trainData)//2-1].shape) # (1400, 1400, 3)
# print(trainData[len(trainData)//2].shape) # (350, 350, 3)
# print(trainData[len(trainData)-1].shape) # (350, 350, 3)
# TODO: substract mean of all images to all images
# Separating the training data
Y_train = trainData[:len(trainData)//2] # First half is the unaltered data
X_train = trainData[len(trainData)//2:] # Second half is the deteriorated data
# Separating the testing data
Y_test = testData[:len(testData)//2] # First half is the unaltered data
X_test = testData[len(testData)//2:] # Second half is the deteriorated data
# Adjusting shapes for Keras input # TODO: make into a function ?
X_train = np.array([x for x in X_train])
Y_train = np.array([x for x in Y_train])
Y_test = np.array([x for x in Y_test])
X_test = np.array([x for x in X_test])
# # Sanity check: display four images (2x HR/LR)
# plt.figure(figsize=(10, 10))
# for i in range(2):
# plt.subplot(2, 2, i + 1)
# plt.imshow(Y_train[i],
# for i in range(2):
# plt.subplot(2, 2, i + 1 + 2)
# plt.imshow(X_train[i],
setUpModel(X_train, Y_train, X_test, Y_test)
# TODO: possibly remove once Keras Preprocessing is integrated?
def dataAugmentation(dataToAugment):
print("Starting to augment data")
arrayToFill = []
# faster computation with values between 0 and 1 ?
dataToAugment = np.divide(dataToAugment, 255.)
# TODO: switch from RGB channels to CbCrY
# # TODO: Try GrayScale
# trainingData = np.array(
# [(cv2.cvtColor(np.uint8(x * 255), cv2.COLOR_BGR2GRAY) / 255).reshape(350, 350, 1) for x in trainingData])
# validateData = np.array(
# [(cv2.cvtColor(np.uint8(x * 255), cv2.COLOR_BGR2GRAY) / 255).reshape(1400, 1400, 1) for x in validateData])
# adding the normal images (8)
for i in range(len(dataToAugment)):
# vertical axis flip (-> 16)
for i in range(len(arrayToFill)):
# horizontal axis flip (-> 32)
for i in range(len(arrayToFill)):
# downsizing by scale of 4 (-> 64 images of 128x128x3)
for i in range(len(arrayToFill)):
(img_width/4, img_height/4),
# # Sanity check: display the images
# plt.figure(figsize=(10, 10))
# for i in range(64):
# plt.subplot(8, 8, i + 1)
# plt.imshow(arrayToFill[i],
return np.array(arrayToFill)
My question is: in my case, can I use the Preprocessing tool that Keras offers? I would ideally like to be able to input my varying sized images of high quality, crop them (not downsize them) to 512x512x3, and data augment them through flips and whatnot. Substracting the mean would also be part of what I'd like to achieve. That set would represent my validation set.
Reusing the validation set, I want to downscale by a factor of 4 all the images, and that would generate my training set.
Those two sets could then be split appropriately to obtain, ultimately, the famous X_train Y_train X_test Y_test.
I'm just hesitant about throwing out all the work I've done so far to preprocess my mini sample, but I'm thinking if it can all be done with a single built-in function, maybe I should give that a go.
This is my first ML project, hence me not understanding very well Keras, and the documentation isn't always the clearest. I'm thinking that the fact that I'm working with a X and Y that are different in size, maybe this function doesn't apply to my project.
Thank you! :)
Yes you can use keras preprocessing function. Below some snippets to help you...
def cropping_function(x):
return cropped_image
X_image_gen = ImageDataGenerator(preprocessing_function = cropping_function,
horizontal_flip = True,
X_train_flow = X_image_gen.flow(X_train, batch_size = 16, seed = 1)
Y_image_gen = ImageDataGenerator(horizontal_flip = True,
Y_train_flow = Y_image_gen.flow(y_train, batch_size = 16, seed = 1)
train_flow = zip(X_train_flow,Y_train_flow)
Christof Henkel's suggestion is very clean and nice. I would just like to offer another way to do it using imgaug, a convenient way to augment images in lots of different ways. It's usefull if you want more implemented augmentations or if you ever need to use some ML library other than Keras.
It unfortunatly doesn't have a way to make crops that way but it allows implementing custom functions. Here is an example function for generating random crops of a set size from an image that's at least as big as the chosen crop size:
from imgaug import augmenters as iaa
def random_crop(images, random_state, parents, hooks):
crop_h, crop_w = 128, 128
new_images = []
for img in images:
if (img.shape[0] >= crop_h) and (img.shape[1] >= crop_w):
rand_h = np.random.randint(0, img.shape[0]-crop_h)
rand_w = np.random.randint(0, img.shape[1]-crop_w)
new_images.append(img[rand_h:rand_h+crop_h, rand_w:rand_w+crop_w])
new_images.append(np.zeros((crop_h, crop_w, 3)))
return np.array(new_images)
def keypoints_dummy(keypoints_on_images, random_state, parents, hooks):
return keypoints_on_images
cropper = iaa.Lambda(func_images=random_crop, func_keypoints=keypoints_dummy)
You can then combine this function with any other builtin imgaug function, for example the flip functions that you're already using like this:
seq = iaa.Sequential([cropper, iaa.Fliplr(0.5), iaa.Flipud(0.5)])
This function could then generate lots of different crops from each image. An example image with some possible results (note that it would result in actual (128, 128, 3) images, they are just merged into one image here for visualization):
Your image set could then be generated by:
crops_per_image = 10
images = [ for path in glob.glob('train_data/*.jpg')]
augs = np.array([seq.augment_image(img)/255 for img in images for _ in range(crops_per_image)])
It would also be simple to add new functions to be applied to the images, for example the remove mean functions you mentioned.
Here's another way performing random and center crop before resizing using native ImageDataGenerator and flow_from_directory. You can add it as module into your project.
It first resizes image preserving aspect ratio and then performs crop. Resized image size is based on crop_fraction which is hardcoded but can be changed. See crop_fraction = 0.875 line where 0.875 appears to be the most common, e.g. 224px crop from 256px image.
Note that the implementation has been done by monkey patching keras_preprocessing.image.utils.loag_img function as I couldn't find any other way to perform crop before resizing without rewriting many other classes above.
Due to these limitations, the cropping method is enumerated into the interpolation field. Methods are delimited by : where the first part is interpolation and second is crop e.g. lanczos:random. Supported crop methods are none, center, random. When no crop method is specified, none is assumed.
How to use it
Just drop the into your project to enable cropping. The example below shows how you can use random cropping for the training and center cropping for validation:
import preprocess_crop
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.inception_v3 import preprocess_input
# Training with random crop
train_datagen = ImageDataGenerator(
train_img_generator = train_datagen.flow_from_directory(
target_size = (IMG_SIZE, IMG_SIZE),
batch_size = BATCH_SIZE,
class_mode = 'categorical',
interpolation = 'lanczos:random', # <--------- random crop
shuffle = True
# Validation with center crop
validate_datagen = ImageDataGenerator(
validate_img_generator = validate_datagen.flow_from_directory(
target_size = (IMG_SIZE, IMG_SIZE),
batch_size = BATCH_SIZE,
class_mode = 'categorical',
interpolation = 'lanczos:center', # <--------- center crop
shuffle = False
Here's file to include with your project:
import random
import keras_preprocessing.image
def load_and_crop_img(path, grayscale=False, color_mode='rgb', target_size=None,
"""Wraps keras_preprocessing.image.utils.loag_img() and adds cropping.
Cropping method enumarated in interpolation
# Arguments
path: Path to image file.
color_mode: One of "grayscale", "rgb", "rgba". Default: "rgb".
The desired image format.
target_size: Either `None` (default to original size)
or tuple of ints `(img_height, img_width)`.
interpolation: Interpolation and crop methods used to resample and crop the image
if the target size is different from that of the loaded image.
Methods are delimited by ":" where first part is interpolation and second is crop
e.g. "lanczos:random".
Supported interpolation methods are "nearest", "bilinear", "bicubic", "lanczos",
"box", "hamming" By default, "nearest" is used.
Supported crop methods are "none", "center", "random".
# Returns
A PIL Image instance.
# Raises
ImportError: if PIL is not available.
ValueError: if interpolation method is not supported.
# Decode interpolation string. Allowed Crop methods: none, center, random
interpolation, crop = interpolation.split(":") if ":" in interpolation else (interpolation, "none")
if crop == "none":
return keras_preprocessing.image.utils.load_img(path,
# Load original size image using Keras
img = keras_preprocessing.image.utils.load_img(path,
# Crop fraction of total image
crop_fraction = 0.875
target_width = target_size[1]
target_height = target_size[0]
if target_size is not None:
if img.size != (target_width, target_height):
if crop not in ["center", "random"]:
raise ValueError('Invalid crop method {} specified.', crop)
if interpolation not in keras_preprocessing.image.utils._PIL_INTERPOLATION_METHODS:
raise ValueError(
'Invalid interpolation method {} specified. Supported '
'methods are {}'.format(interpolation,
", ".join(keras_preprocessing.image.utils._PIL_INTERPOLATION_METHODS.keys())))
resample = keras_preprocessing.image.utils._PIL_INTERPOLATION_METHODS[interpolation]
width, height = img.size
# Resize keeping aspect ratio
# result shold be no smaller than the targer size, include crop fraction overhead
target_size_before_crop = (target_width/crop_fraction, target_height/crop_fraction)
ratio = max(target_size_before_crop[0] / width, target_size_before_crop[1] / height)
target_size_before_crop_keep_ratio = int(width * ratio), int(height * ratio)
img = img.resize(target_size_before_crop_keep_ratio, resample=resample)
width, height = img.size
if crop == "center":
left_corner = int(round(width/2)) - int(round(target_width/2))
top_corner = int(round(height/2)) - int(round(target_height/2))
return img.crop((left_corner, top_corner, left_corner + target_width, top_corner + target_height))
elif crop == "random":
left_shift = random.randint(0, int((width - target_width)))
down_shift = random.randint(0, int((height - target_height)))
return img.crop((left_shift, down_shift, target_width + left_shift, target_height + down_shift))
return img
# Monkey patch
keras_preprocessing.image.iterator.load_img = load_and_crop_img

I would like to train my keras model on google cloud machine learning engine. I am currently using image augmentation and grabbing images from a local directory.
train_datagen = ImageDataGenerator(
train_generator = train_datagen.flow_from_directory(
target_size=(IM_WIDTH, IM_HEIGHT),
Is it possible to achieve this behavior on google cloud bucket? Could I first download the images to a local machine? I'm seeing alot of people using pickle on ML engine, but that doesn't quite make sense since images are 'generated' at the time of training.
I have created a working version of flow_from_directory using google storage api instead of os. It's not perfect and some functionality are missing.
import multiprocessing.pool
from functools import partial
from keras.preprocessing.image import Iterator
import warnings
import numpy as np
import keras.backend as K
import keras
from import storage
import os
# rewrite of flow_from_directory
def flow_from_google_storage(imageDataGen, project, bucket, directory,
target_size=(256, 256), color_mode='rgb',
classes=None, class_mode='categorical',
batch_size=32, shuffle=True, seed=None,
"""Takes the path to a directory, and generates batches of augmented/normalized data.
# Arguments
directory: path to the target directory.
It should contain one subdirectory per class.
Any PNG, JPG, BMP, PPM or TIF images inside each of the subdirectories directory tree will be included in the generator.
See [this script]( for more details.
target_size: tuple of integers `(height, width)`, default: `(256, 256)`.
The dimensions to which all images found will be resized.
color_mode: one of "grayscale", "rbg". Default: "rgb".
Whether the images will be converted to have 1 or 3 color channels.
classes: optional list of class subdirectories (e.g. `['dogs', 'cats']`). Default: None.
If not provided, the list of classes will be automatically
inferred from the subdirectory names/structure under `directory`,
where each subdirectory will be treated as a different class
(and the order of the classes, which will map to the label indices, will be alphanumeric).
The dictionary containing the mapping from class names to class
indices can be obtained via the attribute `class_indices`.
class_mode: one of "categorical", "binary", "sparse", "input" or None. Default: "categorical".
Determines the type of label arrays that are returned: "categorical" will be 2D one-hot encoded labels,
"binary" will be 1D binary labels, "sparse" will be 1D integer labels, "input" will be images identical
to input images (mainly used to work with autoencoders).
If None, no labels are returned (the generator will only yield batches of image data, which is useful to use
`model.predict_generator()`, `model.evaluate_generator()`, etc.).
Please note that in case of class_mode None,
the data still needs to reside in a subdirectory of `directory` for it to work correctly.
batch_size: size of the batches of data (default: 32).
shuffle: whether to shuffle the data (default: True)
seed: optional random seed for shuffling and transformations.
save_to_dir: None or str (default: None). This allows you to optionally specify a directory to which to save
the augmented pictures being generated (useful for visualizing what you are doing).
save_prefix: str. Prefix to use for filenames of saved pictures (only relevant if `save_to_dir` is set).
save_format: one of "png", "jpeg" (only relevant if `save_to_dir` is set). Default: "png".
follow_links: whether to follow symlinks inside class subdirectories (default: False).
subset: Subset of data (`"training"` or `"validation"`) if
`validation_split` is set in `ImageDataGenerator`.
interpolation: Interpolation method used to resample the image if the
target size is different from that of the loaded image.
Supported methods are `"nearest"`, `"bilinear"`, and `"bicubic"`.
If PIL version 1.1.3 or newer is installed, `"lanczos"` is also
supported. If PIL version 3.4.0 or newer is installed, `"box"` and
`"hamming"` are also supported. By default, `"nearest"` is used.
# Returns
A DirectoryIterator yielding tuples of `(x, y)` where `x` is a numpy array containing a batch
of images with shape `(batch_size, *target_size, channels)` and `y` is a numpy array of corresponding labels.
return GoogleStorageIterator(project, bucket,
directory, imageDataGen,
target_size=target_size, color_mode=color_mode,
classes=classes, class_mode=class_mode,
batch_size=batch_size, shuffle=shuffle, seed=seed,
class GoogleStorageIterator(Iterator):
"""Iterator capable of reading images from a directory on disk.
# Arguments
directory: Path to the directory to read images from.
Each subdirectory in this directory will be
considered to contain images from one class,
or alternatively you could specify class subdirectories
via the `classes` argument.
image_data_generator: Instance of `ImageDataGenerator`
to use for random transformations and normalization.
target_size: tuple of integers, dimensions to resize input images to.
color_mode: One of `"rgb"`, `"grayscale"`. Color mode to read images.
classes: Optional list of strings, names of subdirectories
containing images from each class (e.g. `["dogs", "cats"]`).
It will be computed automatically if not set.
class_mode: Mode for yielding the targets:
`"binary"`: binary targets (if there are only two classes),
`"categorical"`: categorical targets,
`"sparse"`: integer targets,
`"input"`: targets are images identical to input images (mainly
used to work with autoencoders),
`None`: no targets get yielded (only input images are yielded).
batch_size: Integer, size of a batch.
shuffle: Boolean, whether to shuffle the data between epochs.
seed: Random seed for data shuffling.
data_format: String, one of `channels_first`, `channels_last`.
save_to_dir: Optional directory where to save the pictures
being yielded, in a viewable format. This is useful
for visualizing the random transformations being
applied, for debugging purposes.
save_prefix: String prefix to use for saving sample
images (if `save_to_dir` is set).
save_format: Format to use for saving sample images
(if `save_to_dir` is set).
subset: Subset of data (`"training"` or `"validation"`) if
validation_split is set in ImageDataGenerator.
interpolation: Interpolation method used to resample the image if the
target size is different from that of the loaded image.
Supported methods are "nearest", "bilinear", and "bicubic".
If PIL version 1.1.3 or newer is installed, "lanczos" is also
supported. If PIL version 3.4.0 or newer is installed, "box" and
"hamming" are also supported. By default, "nearest" is used.
def __init__(self, project, bucket, directory, image_data_generator,
target_size=(256, 256), color_mode='rgb',
classes=None, class_mode='categorical',
batch_size=32, shuffle=True, seed=None,
save_to_dir=None, save_prefix='', save_format='png',
if data_format is None:
data_format = K.image_data_format() = directory
self.image_data_generator = image_data_generator
self.target_size = tuple(target_size)
if color_mode not in {'rgb', 'grayscale'}:
raise ValueError('Invalid color mode:', color_mode,
'; expected "rgb" or "grayscale".')
self.color_mode = color_mode
self.data_format = data_format
if self.color_mode == 'rgb':
if self.data_format == 'channels_last':
self.image_shape = self.target_size + (3,)
self.image_shape = (3,) + self.target_size
if self.data_format == 'channels_last':
self.image_shape = self.target_size + (1,)
self.image_shape = (1,) + self.target_size
self.classes = classes
if class_mode not in {'categorical', 'binary', 'sparse',
'input', None}:
raise ValueError('Invalid class_mode:', class_mode,
'; expected one of "categorical", '
'"binary", "sparse", "input"'
' or None.')
self.class_mode = class_mode
self.save_to_dir = save_to_dir
self.save_prefix = save_prefix
self.save_format = save_format
self.interpolation = interpolation
if subset is not None:
validation_split = self.image_data_generator._validation_split
if subset == 'validation':
split = (0, validation_split)
elif subset == 'training':
split = (validation_split, 1)
raise ValueError('Invalid subset name: ', subset,
'; expected "training" or "validation"')
split = None
self.subset = subset
white_list_formats = {'png', 'jpg', 'jpeg', 'bmp', 'ppm', 'tif', 'tiff'}
# init gs
self.storage_client = storage.Client(project)
self.bucket = self.storage_client.get_bucket(bucket)
# first, count the number of samples and classes
self.samples = 0
if not classes:
labels_folder_iter = self.bucket.list_blobs(delimiter="/",
list(labels_folder_iter) # populate labels_folder_iter
classes = [p[len(] for p in sorted(labels_folder_iter.prefixes)]
self.num_classes = len(classes)
self.class_indices = dict(zip(classes, range(len(classes))))
pool = multiprocessing.pool.ThreadPool()
function_partial = partial(self._count_valid_files_in_directory,
self.samples = sum(,
(os.path.join(, subdir) for subdir in classes)))
print('Found %d images belonging to %d classes.' % (self.samples, self.num_classes))
# second, build an index of the images in the different class subfolders
results = []
self.filenames = []
self.classes = np.zeros((self.samples,), dtype='int32')
i = 0
for dirpath in (os.path.join(, subdir) for subdir in classes):
(dirpath, white_list_formats, split,
self.class_indices, follow_links)))
for res in results:
classes, filenames = res.get()
self.classes[i:i + len(classes)] = classes
self.filenames += filenames
i += len(classes)
super(GoogleStorageIterator, self).__init__(self.samples, batch_size, shuffle, seed)
def _get_batches_of_transformed_samples(self, index_array):
batch_x = np.zeros((len(index_array),) + self.image_shape, dtype=K.floatx())
grayscale = self.color_mode == 'grayscale'
# build batch of image data
for i, j in enumerate(index_array):
fname = self.filenames[j]
blob = self.bucket.get_blob(os.path.join(, fname), self.storage_client)
img = self.load_img_from_string(blob.download_as_string(self.storage_client),
x = keras.preprocessing.image.img_to_array(img, data_format=self.data_format)
x = self.image_data_generator.random_transform(x)
x = self.image_data_generator.standardize(x)
batch_x[i] = x
# TODO write save to gs
# optionally save augmented images to disk for debugging purposes
# if self.save_to_dir:
# for i, j in enumerate(index_array):
# img = keras.preprocessing.image.array_to_img(batch_x[i], self.data_format, scale=True)
# fname = '{prefix}_{index}_{hash}.{format}'.format(prefix=self.save_prefix,
# index=j,
# hash=np.random.randint(1e7),
# format=self.save_format)
#, fname))
# build batch of labels
if self.class_mode == 'input':
batch_y = batch_x.copy()
elif self.class_mode == 'sparse':
batch_y = self.classes[index_array]
elif self.class_mode == 'binary':
batch_y = self.classes[index_array].astype(K.floatx())
elif self.class_mode == 'categorical':
batch_y = np.zeros((len(batch_x), self.num_classes), dtype=K.floatx())
for i, label in enumerate(self.classes[index_array]):
batch_y[i, label] = 1.
return batch_x
return batch_x, batch_y
def next(self):
"""For python 2.x.
# Returns
The next batch.
with self.lock:
index_array = next(self.index_generator)
# The transformation of images is not under thread lock
# so it can be done in parallel
return self._get_batches_of_transformed_samples(index_array)
def _count_valid_files_in_directory(self, directory, white_list_formats, split, follow_links):
"""Count files with extension in `white_list_formats` contained in directory.
# Arguments
directory: absolute path to the directory
containing files to be counted
white_list_formats: set of strings containing allowed extensions for
the files to be counted.
split: tuple of floats (e.g. `(0.2, 0.6)`) to only take into
account a certain fraction of files in each directory.
E.g.: `segment=(0.6, 1.0)` would only account for last 40 percent
of images in each directory.
follow_links: boolean.
# Returns
the count of files with extension in `white_list_formats` contained in
the directory.
num_files = len(list(self._iter_valid_files(directory, white_list_formats, follow_links)))
if split:
start, stop = int(split[0] * num_files), int(split[1] * num_files)
start, stop = 0, num_files
return stop - start
def _iter_valid_files(self, directory, white_list_formats, follow_links):
"""Count files with extension in `white_list_formats` contained in directory.
# Arguments
directory: absolute path to the directory
containing files to be counted
white_list_formats: set of strings containing allowed extensions for
the files to be counted.
follow_links: boolean.
# Yields
tuple of (root, filename) with extension in `white_list_formats`.
def _recursive_list(subpath):
# TODO should return all file path relative to subpath walk trhough any directory it find
if subpath[-1] != '/':
subpath = subpath + '/'
iter_blobs = self.bucket.list_blobs(delimiter="/", prefix=subpath)
blobs = list(iter_blobs)
return sorted(map(lambda blob: (subpath,[len(subpath):]), blobs), key=lambda x: x[1])
for root, fname in _recursive_list(directory):
for extension in white_list_formats:
if fname.lower().endswith('.tiff'):
warnings.warn('Using \'.tiff\' files with multiple bands will cause distortion. '
'Please verify your output.')
if fname.lower().endswith('.' + extension):
yield root, fname
def _list_valid_filenames_in_directory(self, directory, white_list_formats, split,
class_indices, follow_links):
"""List paths of files in `subdir` with extensions in `white_list_formats`.
# Arguments
directory: absolute path to a directory containing the files to list.
The directory name is used as class label and must be a key of `class_indices`.
white_list_formats: set of strings containing allowed extensions for
the files to be counted.
split: tuple of floats (e.g. `(0.2, 0.6)`) to only take into
account a certain fraction of files in each directory.
E.g.: `segment=(0.6, 1.0)` would only account for last 40 percent
of images in each directory.
class_indices: dictionary mapping a class name to its index.
follow_links: boolean.
# Returns
classes: a list of class indices
filenames: the path of valid files in `directory`, relative from
`directory`'s parent (e.g., if `directory` is "dataset/class1",
the filenames will be ["class1/file1.jpg", "class1/file2.jpg", ...]).
dirname = os.path.basename(directory)
if split:
num_files = len(list(self._iter_valid_files(directory, white_list_formats, follow_links)))
start, stop = int(split[0] * num_files), int(split[1] * num_files)
valid_files = list(self._iter_valid_files(directory, white_list_formats, follow_links))[start: stop]
valid_files = self._iter_valid_files(directory, white_list_formats, follow_links)
classes = []
filenames = []
for root, fname in valid_files:
absolute_path = os.path.join(root, fname)
relative_path = os.path.join(dirname, os.path.relpath(absolute_path, directory))
return classes, filenames
def load_img_from_string(self, img_string, grayscale=False, target_size=None,
from PIL import Image as pil_image
import io
'nearest': pil_image.NEAREST,
'bilinear': pil_image.BILINEAR,
'bicubic': pil_image.BICUBIC,
"""Loads an image into PIL format.
# Arguments
path: Path to image file
grayscale: Boolean, whether to load the image as grayscale.
target_size: Either `None` (default to original size)
or tuple of ints `(img_height, img_width)`.
interpolation: Interpolation method used to resample the image if the
target size is different from that of the loaded image.
Supported methods are "nearest", "bilinear", and "bicubic".
If PIL version 1.1.3 or newer is installed, "lanczos" is also
supported. If PIL version 3.4.0 or newer is installed, "box" and
"hamming" are also supported. By default, "nearest" is used.
# Returns
A PIL Image instance.
# Raises
ImportError: if PIL is not available.
ValueError: if interpolation method is not supported.
if pil_image is None:
raise ImportError('Could not import PIL.Image. '
'The use of `array_to_img` requires PIL.')
img =
if grayscale:
if img.mode != 'L':
img = img.convert('L')
if img.mode != 'RGB':
img = img.convert('RGB')
if target_size is not None:
width_height_tuple = (target_size[1], target_size[0])
if img.size != width_height_tuple:
if interpolation not in _PIL_INTERPOLATION_METHODS:
raise ValueError(
'Invalid interpolation method {} specified. Supported '
'methods are {}'.format(
", ".join(_PIL_INTERPOLATION_METHODS.keys())))
resample = _PIL_INTERPOLATION_METHODS[interpolation]
img = img.resize(width_height_tuple, resample)
return img
Yes, you can first download the images from GCS to the VM using os.system('gstuil cp YOUR_IMAGES .').
Transform your images into TFRecords, store them in Google Cloud Storage. TFRecordDataset has support for Google Cloud Storage.
Using TFRecords have performance advantages, if you train on large datasets I recommend to use TFRecords.

I know using Multithreading is usefull training a DNN with Tensorflow.
But does it make any sense to use it for inference? For example if you are using Googles Object Detection API for realtime object detection in video streams?
And if Yes, how is it implemented?
I created a github repo ( that allows easy Real Time Object Detection but i am not satisfied with the generated FPS, So i thougth about using Multithreading to speed it up.
Has anybody Experience with this or could help me implement it in my code?
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
Created on Thu Dec 21 12:01:40 2017
#author: GustavZ
import numpy as np
import os
import six.moves.urllib as urllib
import tarfile
import tensorflow as tf
import cv2
# Protobuf Compilation (once necessary)
os.system('protoc object_detection/protos/*.proto --python_out=.')
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
from stuff.helper import FPS2
# Define Video Input
# Must be OpenCV readable
# 0 = Default Camera
video_input = 0
width = 640
height = 480
fps_interval = 3
# Model preparation
# What model to download.
MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = 'models/' + MODEL_NAME + '/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
LABEL_MAP = 'mscoco_label_map.pbtxt'
PATH_TO_LABELS = 'object_detection/data/' + LABEL_MAP
# Download Model
if not os.path.isfile(PATH_TO_CKPT):
print('Model not found. Downloading it now.')
opener = urllib.request.URLopener()
tar_file =
for file in tar_file.getmembers():
file_name = os.path.basename(
if 'frozen_inference_graph.pb' in file_name:
tar_file.extract(file, os.getcwd())
os.remove('../' + MODEL_FILE)
print('Model found. Proceed.')
# Load a (frozen) Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph =
tf.import_graph_def(od_graph_def, name='')
# Loading label map
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
# Start Video Stream
video_stream = cv2.VideoCapture(video_input)
video_stream.set(cv2.CAP_PROP_FRAME_WIDTH, width)
video_stream.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
# Detection
print ("Press 'q' to Exit")
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess: # config=tf.ConfigProto(log_device_placement=True)
# Definite input and output Tensors for detection_graph
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# fps calculation
fps = FPS2(fps_interval).start()
while video_stream.isOpened():
ret_val,image_np =
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Actual detection.
(boxes, scores, classes, num) =
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
# Visualization of the results of a detection.
cv2.imshow('object_detection', image_np)
# Exit Option
if cv2.waitKey(1) & 0xFF == ord('q'):
# End everything
print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed()))
print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
It makes sense only if you run it on a device where your computing capacities are limited.
Basically what you would do is to run in different threads the image processing and the inference.
The result would be a smooth video display, and your inference would lag behind without impacting your display framerate.
You can see on this file an example (just a draft, not tested yet) about how the multi threading would look like.
I am loading my model and starting my session, then looping over the video captured, feeding my prediction queue if I have capacities to infer it.

i am loading the cifar-10 data set , the methods adds the data to tensor array , so to access the data i used .eval() with session , on a normal tf constant it return the value , but on the labels and the train set which are tf array it wont
1- i am using docker tensorflow-jupyter
2- it uses python 3
3- the batch file must be added to data folder
i am using the first batch [data_batch_1.bin]from this file
As notebook:
The code[As in tensorflow site but modified to read 1 patch] [check the last 7 lines for the data loading] :
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import urllib
import tensorflow as tf
from six.moves import xrange # pylint: disable=redefined-builtin
# Global constants describing the CIFAR-10 data set.
def _generate_image_and_label_batch(image, label, min_queue_examples,
batch_size, shuffle):
"""Construct a queued batch of images and labels.
image: 3-D Tensor of [height, width, 3] of type.float32.
label: 1-D Tensor of type.int32
min_queue_examples: int32, minimum number of samples to retain
in the queue that provides of batches of examples.
batch_size: Number of images per batch.
shuffle: boolean indicating whether to use a shuffling queue.
images: Images. 4D tensor of [batch_size, height, width, 3] size.
labels: Labels. 1D tensor of [batch_size] size.
# Create a queue that shuffles the examples, and then
# read 'batch_size' images + labels from the example queue.
num_preprocess_threads = 2
if shuffle:
images, label_batch = tf.train.shuffle_batch(
[image, label],
capacity=min_queue_examples + 3 * batch_size,
images, label_batch = tf.train.batch(
[image, label],
capacity=min_queue_examples + 3 * batch_size)
# Display the training images in the visualizer.
tf.image_summary('images', images)
return images, tf.reshape(label_batch, [batch_size])
def read_cifar10(filename_queue):
"""Reads and parses examples from CIFAR10 data files.
Recommendation: if you want N-way read parallelism, call this function
N times. This will give you N independent Readers reading different
files & positions within those files, which will give better mixing of
filename_queue: A queue of strings with the filenames to read from.
An object representing a single example, with the following fields:
height: number of rows in the result (32)
width: number of columns in the result (32)
depth: number of color channels in the result (3)
key: a scalar string Tensor describing the filename & record number
for this example.
label: an int32 Tensor with the label in the range 0..9.
uint8image: a [height, width, depth] uint8 Tensor with the image data
class CIFAR10Record(object):
result = CIFAR10Record()
# Dimensions of the images in the CIFAR-10 dataset.
# See for a description of the
# input format.
label_bytes = 1 # 2 for CIFAR-100
result.height = 32
result.width = 32
result.depth = 3
image_bytes = result.height * result.width * result.depth
# Every record consists of a label followed by the image, with a
# fixed number of bytes for each.
record_bytes = label_bytes + image_bytes
# Read a record, getting filenames from the filename_queue. No
# header or footer in the CIFAR-10 format, so we leave header_bytes
# and footer_bytes at their default of 0.
reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
result.key, value =
# Convert from a string to a vector of uint8 that is record_bytes long.
record_bytes = tf.decode_raw(value, tf.uint8)
# The first bytes represent the label, which we convert from uint8->int32.
result.label = tf.cast(
tf.slice(record_bytes, [0], [label_bytes]), tf.int32)
# The remaining bytes after the label represent the image, which we reshape
# from [depth * height * width] to [depth, height, width].
depth_major = tf.reshape(tf.slice(record_bytes, [label_bytes], [image_bytes]),
[result.depth, result.height, result.width])
# Convert from [depth, height, width] to [height, width, depth].
result.uint8image = tf.transpose(depth_major, [1, 2, 0])
return result
def inputs(eval_data, data_dir, batch_size):
"""Construct input for CIFAR evaluation using the Reader ops.
eval_data: bool, indicating if one should use the train or eval data set.
data_dir: Path to the CIFAR-10 data directory.
batch_size: Number of images per batch.
images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
labels: Labels. 1D tensor of [batch_size] size.
filenames.append(os.path.join(data_dir, 'data_batch_1.bin') )
num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN
# Create a queue that produces the filenames to read.
filename_queue = tf.train.string_input_producer(filenames)
# Read examples from files in the filename queue.
read_input = read_cifar10(filename_queue)
reshaped_image = tf.cast(read_input.uint8image, tf.float32)
height = IMAGE_SIZE
width = IMAGE_SIZE
# Image processing for evaluation.
# Crop the central [height, width] of the image.
resized_image = tf.image.resize_image_with_crop_or_pad(reshaped_image,
width, height)
# Subtract off the mean and divide by the variance of the pixels.
float_image = tf.image.per_image_whitening(resized_image)
# Ensure that the random shuffling has good mixing properties.
min_fraction_of_examples_in_queue = 0.4
min_queue_examples = int(num_examples_per_epoch *
# Generate a batch of images and labels by building up a queue of examples.
return _generate_image_and_label_batch(float_image, read_input.label,
min_queue_examples, batch_size,
sess = tf.InteractiveSession()
train_data,train_labels = inputs(False,"data",6000)
print (train_data,train_labels)
You must call tf.train.start_queue_runners(sess) before you call train_data.eval() or train_labels.eval().
This is a(n unfortunate) consequence of how TensorFlow input pipelines are implemented: the tf.train.string_input_producer(), tf.train.shuffle_batch(), and tf.train.batch() functions internally create queues that buffer records between different stages in the input pipeline. The tf.train.start_queue_runners() call tells TensorFlow to start fetching records into these buffers; without calling it the buffers remain empty and eval() hangs indefinitely.

So far I'm trying to implement the fit-generator for sentiment analysis as I only have a small PGU and big dataset. But I keep getting this error
Using Theano backend.
Can not use cuDNN on context None: cannot compile with cuDNN. We got this error:
b'In file included from C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0\\include/driver_types.h:53:0,\r\n from C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0\\include/cudnn.h:63,\r\n from C:\\Users\\Def\\AppData\\Local\\Temp\\try_flags_p2iwer2o.c:4:\r\nC:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0\\include/host_defines.h:84:0: warning: "__cdecl" redefined\r\n #define __cdecl\r\n ^\r\n<built-in>: note: this is the location of the previous definition\r\nd000029.o:(.idata$5+0x0): multiple definition of `__imp___C_specific_handler\'\r\nd000026.o:(.idata$5+0x0): first defined here\r\nC:/Users/Def/Anaconda3/envs/Final/Library/mingw-w64/bin/../lib/gcc/x86_64-w64-mingw32/5.3.0/../../../../x86_64-w64-mingw32/lib/../lib/crt2.o: In function `__tmainCRTStartup\':\r\nC:/repo/mingw-w64-crt-git/src/mingw-w64/mingw-w64-crt/crt/crtexe.c:285: undefined reference to `_set_invalid_parameter_handler\'\r\ncollect2.exe: error: ld returned 1 exit status\r\n'
Mapped name None to device cuda: GeForce GTX 960M (0000:01:00.0)
Epoch 1/10
Traceback (most recent call last):
File "C:/Users/Def/PycharmProjects/KerasUkExpenditure/", line 136, in <module>
File "C:\Users\Def\Anaconda3\envs\Final\lib\site-packages\keras\legacy\", line 88, in wrapper
return func(*args, **kwargs)
File "C:\Users\Def\Anaconda3\envs\Final\lib\site-packages\keras\", line 1097, in fit_generator
File "C:\Users\Def\Anaconda3\envs\Final\lib\site-packages\keras\legacy\", line 88, in wrapper
return func(*args, **kwargs)
File "C:\Users\Def\Anaconda3\envs\Final\lib\site-packages\keras\engine\", line 1876, in fit_generator
File "C:\Users\Def\Anaconda3\envs\Final\lib\site-packages\keras\engine\", line 1614, in train_on_batch
File "C:\Users\Def\Anaconda3\envs\Final\lib\site-packages\keras\engine\", line 1307, in _standardize_user_data
_check_array_lengths(x, y, sample_weights)
File "C:\Users\Def\Anaconda3\envs\Final\lib\site-packages\keras\engine\", line 229, in _check_array_lengths
'and ' + str(list(set_y)[0]) + ' target samples.')
ValueError: Input arrays should have the same number of samples as target arrays. Found 1000 input samples and 1 target samples.
I have a matrix that is 1000 elements long since I only have a maximum corpus of 1000 words which is specified in the Tokenizer().
I then have the sentiment which is either a 0 for negative or a 1 for positive.
My question is why do I receive the error? I have tried to use the transform on both the data and labels and I still receive the same error. here is my code.
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.preprocessing.text import Tokenizer
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import re
the amount of samples out to the 1 million to use, my 960m 2GB can only handle
about 30,000ish at the moment depending on a number of neurons in the
deep layer and a number of layers.
maxSamples = 3000
#Load the CSV and get the correct columns
data = pd.read_csv("C:\\Users\\Def\\Desktop\\Sentiment Analysis Dataset1.csv")
dx = pd.DataFrame()
dy = pd.DataFrame()
dy[['Sentiment']] = data[['Sentiment']]
dx[['SentimentText']] = data[['SentimentText']]
dataY = dy.iloc[0:maxSamples]
dataX = dx.iloc[0:maxSamples]
testY = dy.iloc[maxSamples: maxSamples + 1000]
testX = dx.iloc[maxSamples: maxSamples + 1000]
here I filter the data and clean it up by removing # tags, hyperlinks and
also any characters that are not alpha-numeric.
def removeTagsAndLinks(dataframe):
for x in dataframe.iterrows():
#Removes Hyperlinks
x[1].values[0] = re.sub("(http|ftp|https)://([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,#?^=%&:/~+#-]*[\w#?^=%&/~+#-])?", "", str(x[1].values[0]))
#Removes # tags
x[1].values[0] = re.sub("#\\w+", '', str(x[1].values[0]))
#keeps only alpha-numeric chars
x[1].values[0] = re.sub("\W+", ' ', str(x[1].values[0]))
return dataframe
xData = removeTagsAndLinks(dataX)
xTest = removeTagsAndLinks(testX)
This loop looks for any Tweets with characters shorter than 2 and once found write the
index of that Tweet to an array so I can remove from the Dataframe of sentiment and the
list of Tweets later
indexOfBlankStrings = []
for index, string in enumerate(xData):
if len(string) < 2:
for row in indexOfBlankStrings:
dataY.drop(row, axis=0, inplace=True)
This makes a BOW model out of all the tweets then creates a
vector for each of the tweets containing all the words from
the BOW model, each vector is the same size becuase the
network expects it
def vectorise(tokenizer, list):
return tokenizer.fit_on_texts(list)
#Make BOW model and vectorise it
t = Tokenizer(lower=False, num_words=1000)
Here im experimenting with multiple layers of the total
amount of words in the syllabus divided by ^2 - This
has given me quite accurate results compared to random guess's
of amount of neron's.
l1 = int(xData.shape[0] / 4) #To big for my GPU
l2 = int(xData.shape[0] / 8) #To big for my GPU
l3 = int(xData.shape[0] / 16)
l4 = int(xData.shape[0] / 32)
l5 = int(xData.shape[0] / 64)
l6 = int(xData.shape[0] / 128)
#Make the model
model = Sequential()
model.add(Dense(l1, input_dim=xData.shape[1]))
model.add(Dense(1, activation='relu'))
#Compile the model
model.compile(optimizer='RMSProp', loss='binary_crossentropy', metrics=['acc'])
This here will use multiple batches to train the model.
This is the starting index of the array for which you want to
start training the network from.
The number of elements use to train the network in each batch so
since dataRange = 1000 this mean it goes from
startIndex...dataRange OR 0...1000
This is kinda self explanitory, the more Epochs the more it
is supposed to learn AKA updates the optimisation algo numbers
amountOfEpochs = 1
dataRange = 1000
startIndex = 0
def generator(tokenizer, data, labels, totalSize=maxSamples, startIndex=0):
l = labels.as_matrix()
while True:
for i in range(startIndex, totalSize):
batch_features = tokenizer.texts_to_matrix(xData.iloc[i])
batch_labels = l[i]
yield batch_features, batch_labels
derp = generator(t, data=xData, labels=dataY)
##This runs the model for batch AKA load a little them process then load a little more
for amountOfData in range(1000, maxSamples, 1000):
#(loss, acc) = model.train_on_batch(x=dim[startIndex:amountOfData], y=np.asarray(dataY.iloc[startIndex:amountOfData]))
history = model.fit_generator(generator=generator(tokenizer=t,
The problem you are having is that the number of samples in your input array, do not equal the number of samples in your target array. This means the number of rows in you matrices do not match. The problems stems from your generator function. You index the data as
batch_labels = l[i]
which is only returning one sample (row of matrix). When instead it should be something like...
batch_labels = l[i:i+1000]
However there are other problems with your use of the fit_generator. You should not be using this within a loop. I don't see how it is benefiting the program, and calling the fit_generator in a loop defeats the purpose of using a generator. The function you would use to train an an individual batch of data would be
as seen in the docs
