Evaluation gives 0 score - pytorch

I am working on a instance segmentation problem on mask rcnn with pytorch. Training part is working with below code but evaluation gives 0 score at every mAP. What's the problem in the code?
More info:
I use Albumentations for transforms and some files from pytorch vision for training.
Some problems I've been through:
When I use coco for bbox format instead of pascal voc it gives following error.
RuntimeError: Boolean value of Tensor with more than one value is ambiguous
When put labels out of the convert_seg_boolMask function, it gives following error.
RuntimeError: Boolean value of Tensor with more than one value is ambiguous
def get_transforms(train=False):
if train:
transform = A.Compose([
ToTensorV2()
],bbox_params=A.BboxParams("pascal_voc",label_fields=["labels","iscrowd"]))
else:
transform = A.Compose([
ToTensorV2()
],bbox_params=A.BboxParams("pascal_voc",label_fields=["labels","iscrowd"]))
return transform
class Dataset(datasets.VisionDataset):
def __init__(self, coco_, data_dir, transform=None, target_transform=None, transforms=None):
super().__init__(data_dir, transforms, transform, target_transform)
self.coco_info = coco_
self.data_dir = data_dir
self.transforms = transforms
if isinstance(self.coco_info,dict):
self.ids = [x["id"] for x in self.coco_info["images"] if len(self._load_target(x["id"]))>0]
def _load_image(self, id: int):
name = loadImgs(self.coco_info["images"],id)[0]['file_name']
image = cv2.imread(os.path.join(self.data_dir, name))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)/255
return image
def _load_target(self, id):
return loadAnns(self.coco_info["annotations"],id)
def n_classes(self):
category_names = list(set(sorted([a["name"] for a in self.coco_info["categories"]])))
self.classes = ["__background__"]+[i for i in category_names]
return self.classes
def __getitem__(self,idx):
id = self.ids[idx]
image = self._load_image(id)
target = copy.deepcopy(self._load_target(id))
image_shape = (image.shape[0],image.shape[1])
img_info = {
"img_shape":image_shape,
"image_id":id,
"labels":[t["category_id"]for t in target],
"segmentation":[t["segmentation"][0] for t in target],
"id": [t["id"] for t in target]
}
mask, labels = self.convert_seg_to_boolMask(img_info)
obj_ids = np.unique(mask)
obj_ids = obj_ids[1:]
masks = torch.tensor(mask == obj_ids[:, None, None])
boxes = []
bbox = np.array([t["bbox"] for t in target])
for xmin,ymin,width,height in bbox:
xmax = xmin+width
ymax = ymin+height
boxes.append([xmin, ymin, xmax, ymax])
boxes = torch.tensor(boxes)
labels = torch.tensor(labels)
image_id = torch.tensor([id])
iscrowd = torch.tensor([t["iscrowd"] for t in target])
transformed = self.transforms(image=image, masks=masks, bboxes=boxes, labels=labels, iscrowd=iscrowd)
image = transformed['image']
masks = torch.tensor(transformed["masks"])
boxes = torch.tensor(transformed['bboxes'])
labels = torch.tensor(transformed["labels"])
iscrowd = torch.tensor(transformed["iscrowd"])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
image_id = torch.tensor(image_id)
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = masks
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
return image, target
def __len__(self):
return len(self.ids)
def convert_seg_to_boolMask(self,img_info):
mask = np.zeros(img_info["img_shape"], dtype=np.uint8)
mask = Image.fromarray(mask)
draw = ImageDraw.Draw(mask)
for seg, i in zip(img_info["segmentation"],img_info["id"]):
points = [tuple([k,l]) for k,l in zip(seg[0::2],seg[1::2])]
draw.polygon(xy=points,
outline=tuple([i]),
fill=tuple([i]))
mask = np.array(mask)
labels = img_info["labels"]
return mask, labels

Related

Object Detection - RuntimeError: stack expects each tensor to be equal size

I created a custom dataset for object detection named ReceiptDataset as below.
from torch.nn.utils.rnn import pad_sequence
import torch.nn.functional as F
class ReceiptDataset(torch.utils.data.Dataset):
def __init__(self, train_dir,width,height,labels,transforms=None):
self.images = os.listdir(train_dir)
self.width = width
self.height = height
self.train_dir = train_dir
self.labels = labels
self.transforms = transforms
def __getitem__(self,idx):
img_name = self.images[idx]
img_path = os.path.join(self.train_dir,img_name)
#print(f"img_name: {img_name}")
img = cv2.imread(img_path)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
img_res = cv2.resize(img_rgb,(self.width,self.height), cv2.INTER_AREA)
img_res /= 255.0
annot = self.labels[str(img_name)]
lbls = []
boxes = []
target = {}
ht, wt, _ = img.shape
#print(f"img_res shape: {img_res.shape}, orig shape: {wt}, {ht}")
for item in annot:
x,y,box_wt,box_ht,lbl = item
x_min = x
x_max = x + box_wt
y_min = y
y_max = y + box_ht
x_min_corr = (x_min / wt) * self.width
x_max_corr = (x_max /wt ) * self.width
y_min_corr = (y_min / ht) * self.height
y_max_corr = (y_max / ht) * self.height
boxes.append([x_min_corr, y_min_corr, x_max_corr, y_max_corr])
lbls.append( classes.index(str(lbl)) )
#print(f"dls_lbls: {lbls}, {len(lbls)}")
#lbls += [-1] * (NUM_CLASSES - len(lbls))
boxes = torch.as_tensor(boxes, dtype=torch.float32)
lbls = torch.as_tensor(lbls, dtype=torch.int64)
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)
target["boxes"] = boxes
target["labels"] = lbls
target["image_id"] = torch.as_tensor(idx)
target["area"] = area
target["iscrowd"] = iscrowd
#print(f"dls_lbls -- 2: {target['labels']}, { target['labels'].shape }")
if self.transforms:
trans = self.transforms(image=img_res,
bboxes = target["boxes"],
labels=lbls
)
img_res = trans["image"]
target["boxes"] = torch.Tensor(trans["bboxes"])
return img_res, target
def __len__(self):
return len(self.images)
and I created an instance with:
train_dataset = ReceiptDataset("label-detector/images",width,height,plabels)
and my training snippet is :
from engine import train_one_epoch, evaluate
for epoch in range(num_epochs):
train_one_epoch(model,optim,train_loader,device,epoch,print_freq=2)
lr_scheduler.step()
evaluate(model,test_loader,device)
but anytime I run the training loop, I’m getting a runtime error:
RuntimeError: stack expects each tensor to be equal size, but got [11,4] at entry 0 and [9,4] at entry 1
There are 17 classes in total and each image has a minimum of 4 annotations.
I noticed the problem seems to be coming from my labels list/tensor in the dataset class, the size of the labels list/tensor varies based on the number of annotated items in an image, but I can’t seem to figure out a way to fix this.
Thank you!
I solved it by implementing a custom collate function for the dataloader that returns a batch of my dataset as needed by my model.
def collate_fn_seq(batch):
images = [ item[0] for item in batch ]
targets = [ item[1] for item in batch ]
imgs = []
for image in images:
img = torch.from_numpy(image).permute(2, 0, 1)
imgs.append(img)
boxes = [target["boxes"] for target in targets]
labels = [target["labels"] for target in targets]
image_ids = [ target["image_id"] for target in targets ]
areas = [target["area"] for target in targets]
iscrowds = [target["iscrowd"] for target in targets]
tars = []
for i in range(len(batch)):
box = boxes[i]
label = labels[i]
image_id = image_ids[i]
area = areas[i]
iscrowd = iscrowds[i]
target = {"boxes": box, "labels": label, "image_id": image_id, "area": area, "iscrowd": iscrowd}
tars.append(target)
return imgs, tars
and included it in my dataloaders using:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn_seq)

Tensorflow: How to use a generator for fit() which runs in parallel with multiple processes

I am trying to train a model on a data set which does not fit in my RAM.
Therefore I am using a data generator which inherits from tensorflow.keras.utils.Sequence as shown below.
This is working. However because I am doing processing on the images my training is CPU bound. When looking in GPU-Z my GPU is only at 10-20% but one of my CPU Cores is at its max.
To solve this I am trying to run the generator in parallel on all my 16 cores. However when I set use_multiprocessing=True in the fit() function the program freezes. And using workers=8 does not speed up the process just produces batches in uneven intervals.
ex.:
batch 1-8 is processed immediately than there is some delay and than batch 9-16 is processed.
The code below shows what I am trying to do.
#read the dataset
x, o_y = reader.read_dataset_whole(ETLCharacterGroups.kanji)
#split data into 90/10 percent parts
percentage = round(len(x) / 100 * 80)
x_train = x[:percentage]
x_test = x[percentage:]
y_train = o_y[:percentage]
y_test = o_y[percentage:]
def distort_sample(img : Image) -> (Image, [int], [int]):
"""
Distort the given image randomly.
Randomly applies the transformations:
- rotation
- shear
- scale
- translate
- sharpen
- blur
Returns the distorted image.
"""
offset, scale = (0, 0), (64, 64)
t = random.choice(["sine"]) # "rotate", "shear", "scale",
f = random.choice(["blur", "sharpen", "smooth"])
# randomly apply transformations...
# rotate image
if("rotate" in t):
img = img.rotate(random.uniform(-30, 30))
# shear image
if("shear" in t):
y_shear = random.uniform(-0.2, 0.2)
x_shear = random.uniform(-0.2, 0.2)
img = img.transform(img.size, PImage.AFFINE, (1, x_shear, 0, y_shear, 1, 0))
# scale and translate image
if("scale" in t):
#scale the image
size_x = random.randrange(20, 63)
size_y = random.randrange(20, 63)
scale = (size_x, size_y)
offset = (math.ceil((64 - size_x) / 2), math.ceil((64 - size_y) / 2))
img = img.resize(scale)
# put it again on a black background (translated)
background = PImage.new('L', (64, 64))
trans_x = random.randrange(0, math.floor((64 - size_x)))
trans_y = random.randrange(0, math.floor((64 - size_y)))
offset = (trans_x, trans_y)
background.paste(img, offset)
img = background
if("sine" in t):
t_img = np.array(img)
A = t_img.shape[0] / 3.0
w = 2.0 / t_img.shape[1]
shift = lambda x: random.uniform(0.15, 0.2) * A * np.sin(-2*np.pi*x * w)
for i in range(t_img.shape[0]):
t_img[:,i] = np.roll(t_img[:,i], int(shift(i)))
img = PImage.fromarray(t_img)
# blur
if("blur" in f):
img = img.filter(ImageFilter.GaussianBlur(radius=random.uniform(0.5, 1.2)))
# sharpen
if("sharpen" in f):
img = img.filter(ImageFilter.SHARPEN)
# smooth
if("smooth" in f):
img = img.filter(ImageFilter.SMOOTH)
return img, offset, scale
class DataGenerator(tf.keras.utils.Sequence):
def __init__(self, x_col, y_col, batch_size, mode="training", shuffle=True):
self.batch_size = batch_size
self.undistorted_images = batch_size // 2
self.shuffle = shuffle
self.indices = len(x_col)
self.x_col = x_col
self.y_col = y_col
def __len__(self):
return self.indices // self.batch_size
def on_epoch_end(self):
if(False):
rng_state = np.random.get_state()
np.random.shuffle(x)
np.random.set_state(rng_state)
np.random.shuffle(o_y)
def __getitem__(self, index):
X, Y = [], []
for i in range(index * self.undistorted_images, (index+1) * self.undistorted_images):
base_img = self.x_col[i]
img = PImage.fromarray(np.uint8(base_img.reshape(64, 64) * 255))
# distort_sample() creates random variations of an image
img, *unused = distort_sample(img)
# add transformed image
X.append(np.array(img).reshape(64, 64, 1))
Y.append(self.y_col[i])
# add base image
X.append(base_img)
Y.append(self.y_col[i])
return np.array(X), np.array(Y)
#instantiate generators
training_generator = DataGenerator(x_col = x_train, y_col = y_train, batch_size = 256)
validation_generator = DataGenerator(x_col = x_test, y_col = y_test, batch_size = 256)
#train the model
hist = model.fit(
x=training_generator,
epochs=100,
validation_data=training_generator,
max_queue_size=50,
workers=8,
#use_multiprocessing=True <- this freezes the program
)
In the end I needed to make the Data generator use multi processing. To do this, the arrays needed to be stored in shared memory and than used in the sub processes.
import multiprocessing as mp
import numpy as np
from PIL import Image as PImage
from PIL import ImageFilter
import random
import math
import tensorflow as tf
shared_dict = {}
def distort_sample(img : PImage) -> (PImage, [int], [int]):
"""
Distort the given image randomly.
Randomly applies the transformations:
rotation, shear, scale, translate,
Randomly applies the filter:
sharpen, blur, smooth
Returns the distorted image.
"""
offset, scale = (0, 0), (64, 64)
t = random.choice(["sine", "rotate", "shear", "scale"])
f = random.choice(["blur", "sharpen", "smooth"])
# randomly apply transformations...
# rotate image
if("rotate" in t):
img = img.rotate(random.uniform(-15, 15))
# shear image
if("shear" in t):
y_shear = random.uniform(-0.2, 0.2)
x_shear = random.uniform(-0.2, 0.2)
img = img.transform(img.size, PImage.AFFINE, (1, x_shear, 0, y_shear, 1, 0))
# scale and translate image
if("scale" in t):
#scale the image
size_x = random.randrange(25, 63)
size_y = random.randrange(25, 63)
scale = (size_x, size_y)
offset = (math.ceil((64 - size_x) / 2), math.ceil((64 - size_y) / 2))
img = img.resize(scale)
# put it again on a black background (translated)
background = PImage.new('L', (64, 64))
trans_x = random.randrange(0, math.floor((64 - size_x)))
trans_y = random.randrange(0, math.floor((64 - size_y)))
offset = (trans_x, trans_y)
background.paste(img, offset)
img = background
if("sine" in t):
t_img = np.array(img)
A = t_img.shape[0] / 3.0
w = 2.0 / t_img.shape[1]
shift_factor = random.choice([-1, 1]) * random.uniform(0.15, 0.2)
shift = lambda x: shift_factor * A * np.sin(-2*np.pi*x * w)
for i in range(t_img.shape[0]):
t_img[:,i] = np.roll(t_img[:,i], int(shift(i)))
img = PImage.fromarray(t_img)
# blur
if("blur" in f):
img = img.filter(ImageFilter.GaussianBlur(radius=random.uniform(0.5, 1.2)))
# sharpen
if("sharpen" in f):
img = img.filter(ImageFilter.SHARPEN)
# smooth
if("smooth" in f):
img = img.filter(ImageFilter.SMOOTH)
return img, offset, scale
def generator_func(start_index, end_index, x_shape, y_shape):
X, Y = [], []
x_loc = np.frombuffer(shared_dict["x"], dtype="float16").reshape(x_shape)
y_loc = np.frombuffer(shared_dict["y"], dtype="b").reshape(y_shape)
for i in range(start_index, end_index):
base_img = x_loc[i]
img = PImage.fromarray(np.uint8(base_img.reshape(64, 64) * 255))
img, *unused = distort_sample(img)
# add transformed image
X.append(np.array(img).reshape(64, 64, 1))
Y.append(y_loc[i])
X.append(np.array(img).reshape(64, 64, 1))
Y.append(y_loc[i])
# add base image
#X.append(base_img)
#Y.append(y_loc[i])
return X, Y
def generator_initializer(_x_shared, _y_shared):
shared_dict["x"] = _x_shared
shared_dict["y"] = _y_shared
def generator_func(start_index, end_index, x_shape, y_shape):
X, Y = [], []
x_loc = np.frombuffer(shared_dict["x"], dtype="float16").reshape(x_shape)
y_loc = np.frombuffer(shared_dict["y"], dtype="b").reshape(y_shape)
for i in range(start_index, end_index):
base_img = x_loc[i]
img = PImage.fromarray(np.uint8(base_img.reshape(64, 64) * 255))
img, *unused = distort_sample(img)
# add transformed image
X.append(np.array(img).reshape(64, 64, 1))
Y.append(y_loc[i])
X.append(np.array(img).reshape(64, 64, 1))
Y.append(y_loc[i])
# add base image
#X.append(base_img)
#Y.append(y_loc[i])
return X, Y
class DataGenerator(tf.keras.utils.Sequence):
def __init__(self, num_samples, batch_size,
percentage, mode,
x_shared, y_shared,
x_np_shape, y_np_shape,
processes, shuffle=True):
self.num_samples = num_samples
# 50% original images + 50% augmented images
self.batch_size = batch_size // 2
self.percentage = percentage
# an offset to devide the data set into test and train
self.start_index = 0
if(mode == "testing"):
self.start_index = num_samples - (num_samples // 100 * percentage)
# is this a train or a test generator
self.mode = mode
# how many processes should be used for this generator
self.processes = processes
# should the arrays be shuffled after each epoch
self.shuffle = shuffle
self.x_np_shape = x_np_shape
self.y_np_shape = y_np_shape
# a pool of processes for generating augmented data
self.pool = mp.Pool(processes=self.processes,
initializer=generator_initializer,
initargs=(x_shared, y_shared))
def __len__(self):
return (self.num_samples // 100 * self.percentage) // self.batch_size
def on_epoch_end(self):
if(False):
rng_state = np.random.get_state()
np.random.shuffle(x_np)
np.random.set_state(rng_state)
np.random.shuffle(y_np)
def __getitem__(self, index):
arguments = []
slice_size = self.batch_size // self.processes
current_batch = index * self.batch_size
for i in range(self.processes):
slice_start = self.start_index + (current_batch + i * slice_size)
slice_end = self.start_index + (current_batch + (i+1) * slice_size)
arguments.append([slice_start, slice_end, self.x_np_shape, self.y_np_shape])
return_values = self.pool.starmap(generator_func, arguments)
X, Y = [], []
for imgs, labels in return_values:
X.append(imgs)
Y.append(labels)
return np.concatenate(X).astype(np.float16), np.concatenate(Y).astype(np.float16)

getting shape mismatch error between shape of labels and logits?

I'm trying to do attention mechanism while returning the tensor I'm getting the following error
ValueError: Shape mismatch: The shape of labels (received (64, 53)) should equal the shape of logits except for the last dimension (received (64, 1, 500)).
Please find the below code
Here is code for attention please correct me if it is wrong
class Attention(tf.keras.layers.Layer):
def __init__(self):
super().__init__()
def call(self,enc_op,hidden_state):
# print(enc_op.shape,hidden_state.shape)
query_with_time_axis = tf.expand_dims(hidden_state, 1)
context_vector = tf.matmul(enc_op,tf.transpose(query_with_time_axis,perm=[0,2,1]))
context_vector = tf.nn.softmax(context_vector,axis=1)
context_vector = context_vector * enc_op
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector
Here is decoder part I'm calling the attention from here
class Decoder(tf.keras.layers.Layer):
def init(self,vocab_size,embedding_dim,input_length,dec_units):
super().init()
self.vocab_size = vocab_size
self.embedding_dim = embedding_dim
self.dec_units = dec_units
self.input_length = input_length
self.attention = Attention()
def build(self,input_shape):
self.embedding = Embedding(input_dim=self.vocab_size,output_dim = self.embedding_dim,input_shape = input_shape,
mask_zero = True, name = "embedding_layer_decoder")
self.lstm = LSTM(self.dec_units,return_sequences=True,return_state=True,name = "Decoder_LSTM")
def call(self,target_sentances,enc_op,hidden_state,cell_state):
target_embed = self.embedding(target_sentances)
for i in range(target_embed.shape[1]):
context_vector = self.attention(enc_op,hidden_state)
y = tf.concat([context_vector, target_embed[:,i,:]], axis=-1)
y = tf.expand_dims(y, 1)
lstm_output,hidden_state,_ = self.lstm(y,initial_state = [hidden_state,cell_state])
return lstm_output
class Mymodel(Model):
def __init__(self,encoder_inputs_length,decoder_inputs_length,output_vocab_size):
super().__init__()
self.encoder = Encoder(vocab_size = 500, embedding_dim = 50, input_length = encoder_inputs_length, enc_units=64)
self.decoder = Decoder(vocab_size = 500, embedding_dim = 50, input_length = decoder_inputs_length, dec_units=64)
self.dense = Dense(output_vocab_size,activation = "softmax")
def call(self,data):
input,output = data[0],data[1]
print(input.shape,output.shape)
encoder_output,encoder_h,encoder_c = self.encoder(input)
print("="*20, "ENCODER", "="*20)
print("-"*35)
print(encoder_output)
print("ENCODER ==> OUTPUT SHAPE",encoder_output.shape)
print("ENCODER ==> HIDDEN STATE SHAPE",encoder_h.shape)
print("ENCODER ==> CELL STATE SHAPE", encoder_c.shape)
print("="*20,"Decoder","="*20)
decoder_output = self.decoder(output,encoder_output,encoder_h,encoder_c)
output1 = self.dense(decoder_output)
print("-"*35)
print("Final output shape",output.shape)
print("="*50)
return output1
model = Mymodel(encoder_inputs_length=30,decoder_inputs_length=20,output_vocab_size=500)
ENCODER_SEQ_LEN = 30
DECODER_SEQ_LEN = 20
optimizer = tf.keras.optimizers.Adam()
model.compile(optimizer=optimizer,loss=tf.keras.losses.SparseCategoricalCrossentropy())
for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
model.fit([inp, targ], targ, steps_per_epoch=1)
The shape of my input and target is
(64, 55) (64, 53)
64 is batch size

Building a dataset with dataloader pytorch getting error cannot import name 'read_data_sets'

Loading data into dataset using pytorch dataloader.
Getting error cannot import name 'read_data_sets'
Tried searaching for results from similar issues.
If there is confusion about file instead of module and it can't find read_data_sets in your file How do i change to fix?
class MRDataset(data.Dataset):
def __init__(self, root_dir, task, plane, train=True, transform=None, weights=None):
super().__init__()
self.task = task
self.plane = plane
self.root_dir = root_dir
self.train = train
if self.train:
self.folder_path = self.root_dir + 'train/{0}/'.format(plane)
self.records = pd.read_csv(
self.root_dir + 'train-{0}.csv'.format(task), header=None, names=['id', 'label'])
else:
transform = None
self.folder_path = self.root_dir + 'valid/{0}/'.format(plane)
self.records = pd.read_csv(
self.root_dir + 'valid-{0}.csv'.format(task), header=None, names=['id', 'label'])
self.records['id'] = self.records['id'].map(
lambda i: '0' * (4 - len(str(i))) + str(i))
self.paths = [self.folder_path + filename +
'.npy' for filename in self.records['id'].tolist()]
self.labels = self.records['label'].tolist()
self.transform = transform
if weights is None:
pos = np.sum(self.labels)
neg = len(self.labels) - pos
self.weights = torch.FloatTensor([1, neg / pos])
else:
self.weights = torch.FloatTensor(weights)
def __len__(self):
return len(self.paths)
def __getitem__(self, index):
array = np.load(self.paths[index])
label = self.labels[index]
if label == 1:
label = torch.FloatTensor([[0, 1]])
elif label == 0:
label = torch.FloatTensor([[1, 0]])
if self.transform:
array = self.transform(array)
else:
array = np.stack((array,)*3, axis=1)
array = torch.FloatTensor(array)
# if label.item() == 1:
# weight = np.array([self.weights[1]])
# weight = torch.FloatTensor(weight)
# else:
# weight = np.array([self.weights[0]])
# weight = torch.FloatTensor(weight)
return array, label, self.weights
There is a model and train class to run this. Arguments specified in train.
Running the train should load data and run through model

samples.cols == var_count && samples.type() == 5 in function 'cv::ml::SVMImpl::predict' error on svm.predict method

I'm creating a object classifier in opencv python using svm. Training dataset is of 200 positive and 200 negative images. For positive images first took 200 images and cropped target object from images and resized them to (64,128) size for HOG calculation. Then for negative images, First created Pyramid of images then applied sliding window of 64X128 and then calculated HOG for positive as well all windows of negative images with labels 1 and 0. Trained svm model on hog features.
I am getting error "cv2.error: OpenCV(3.4.2) C:\projects\opencv-python\opencv\modules\ml\src\svm.cpp:2010: error: (-215:Assertion failed) samples.cols == var_count && samples.type() == 5 in function 'cv::ml::SVMImpl::predict' " when i called predict function using res = svm.predict(samples[0]).ravel() method.
import cv2
import os
import time
import numpy as np
import imutils
positive_path='C:\\Users\\Admin\\3D Objects\\datqaet with hog and svm\\ROI images'
negative_path='C:\\Users\\Admin\\3D Objects\\datqaet with hog and svm\\Negative images'
def pyramid(img): #Create image Pyramid
minSize=(30, 30)
imgarr = []
while True:
scale = 2
imgarr.append(img)
w = int(img.shape[1] / scale)
img = imutils.resize(img, width=w)
if img.shape[0] < minSize[1] or img.shape[1] < minSize[0]:
break
return imgarr
def sliding_window(image, stepSize, windowSize): #Sliding window for negative images
sliding = []
for y in range(0, image.shape[0], stepSize):
for x in range(0, image.shape[1], stepSize):
sliding.append((x, y, image[y:y + windowSize[1], x:x + windowSize[0]]))
return sliding
def get_hog() :
winSize = (64,128)
blockSize = (16,16)
blockStride = (16,16)
cellSize = (8,8)
nbins = 9
derivAperture = 1
winSigma = 4.
histogramNormType = 0
L2HysThreshold = 0.2
gammaCorrection = 0
nlevels = 64
signedGradient = True
hog = cv2.HOGDescriptor(winSize,blockSize,blockStride,cellSize,nbins,derivAperture,winSigma,histogramNormType,L2HysThreshold,gammaCorrection,nlevels, signedGradient)
return hog
samples = []
labels = []
sam = []
hog = get_hog()
for filename in os.listdir(positive_path):
img = cv2.imread(os.path.join(positive_path,filename),0) #RGB image
img = cv2.resize(img,(64,128))
img = np.array(img)
hist = hog.compute(img)
hist = cv2.normalize(hist,None)
sam.append(img)
samples.append(hist)
labels.append(1)
i=0
for filename in os.listdir(negative_path):
img = cv2.imread(os.path.join(negative_path,filename),0)
(winW, winH) = (64,128)
pyr = pyramid(img)
for resized in pyr:
sliding = sliding_window(resized, stepSize=32, windowSize=(winW, winH))
for (x, y, window) in sliding:
if window.shape[0] != winH or window.shape[1] != winW:
continue
hist = hog.compute(window)
hist = cv2.normalize(hist,None)
sam.append(window)
samples.append(hist)
labels.append(0)
print(i)
i=i+1
samples = np.array(samples,dtype=np.float32)
labels = np.array(labels,dtype=int)
samples = np.squeeze(samples)
print(len(samples))
print(samples.shape)
rand = np.random.RandomState(10)
shuffle = rand.permutation(len(samples))
sam = samples[shuffle]
samples = sam[shuffle]
labels = labels[shuffle]
svm = cv2.ml.SVM_create()
svm.setKernel(cv2.ml.SVM_LINEAR)
svm.setType(cv2.ml.SVM_C_SVC)
svm.setC(2.67)
svm.setGamma(5.383)
svm_params = dict( kernel_type = cv2.ml.SVM_LINEAR,
svm_type = cv2.ml.SVM_C_SVC,
C=2.67, gamma=5.383 )
svm.train(samples,cv2.ml.ROW_SAMPLE,labels)
print("trained")
res = svm.predict(samples[0]).ravel()
print(res)
cap = cv2.VideoCapture(0)
while True:
ret, img = cap.read()
img=cv2.resize(img,(400,400))
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
(winW, winH) = (64,128)
pyr = pyramid(img)
for resized in pyr:
sliding = sliding_window(resized, stepSize=32, windowSize=(winW, winH))
for (x, y, window) in sliding:
if window.shape[0] != winH or window.shape[1] != winW:
continue
hist = hog.compute(window)
hist = cv2.normalize(hist,None)
hist = np.reshape(hist,(1,hist.shape[0]))
res = svm.predict(hist)[1].ravel()
if res == 1:
print("found")
cv2.imshow('img',img)
cv2.waitKey(10)

Resources