I am trying to use object detection for digit detection.
I found the SVHN dataset.
Speed is important in my project so I decided to apply a YOLO approach.
However, all tutorials and explanatiosn on using YOLOv3 either expect me to be using a dataset made from the Google Open Images API or by manually labeling images using a tool such as labellimg.py.
I however have a premade dataset with annotaions in the PASCAL VOC format (which can be found here https://github.com/penny4860/svhn-voc-annotation-format). Because of this I do not create a labels.txt or classes.txt file as I do no labeling myself.
I am rather at a loss on where to get started.
Any help would be appreciated.
You can follow the below code to convert from PASCAL VOC to YOLO supported format.
import glob
import os
import pickle
import xml.etree.ElementTree as ET
from os import listdir, getcwd
from os.path import join
dirs = ['train', 'val']
classes = ['person', 'car']
def getImagesInDir(dir_path):
image_list = []
for filename in glob.glob(dir_path + '/*.jpg'):
image_list.append(filename)
return image_list
def convert(size, box):
dw = 1./(size[0])
dh = 1./(size[1])
x = (box[0] + box[1])/2.0 - 1
y = (box[2] + box[3])/2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(dir_path, output_path, image_path):
basename = os.path.basename(image_path)
basename_no_ext = os.path.splitext(basename)[0]
in_file = open(dir_path + '/' + basename_no_ext + '.xml')
out_file = open(output_path + basename_no_ext + '.txt', 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult)==1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
cwd = getcwd()
for dir_path in dirs:
full_dir_path = cwd + '/' + dir_path
output_path = full_dir_path +'/yolo/'
if not os.path.exists(output_path):
os.makedirs(output_path)
image_paths = getImagesInDir(full_dir_path)
list_file = open(full_dir_path + '.txt', 'w')
for image_path in image_paths:
list_file.write(image_path + '\n')
convert_annotation(full_dir_path, output_path, image_path)
list_file.close()
print("Finished processing: " + dir_path)
Related
I am working on classifying sounds with Deep Learning and my problem is that I run out of memory when I try to convert .wav files to spectrograms using lib.load() from librosa.
split = ['train','val']
categories=['URTI', 'Healthy', 'Asthma', 'COPD', 'LRTI', 'Bronchiectasis','Pneumonia', 'Bronchiolitis']
files_loc = "path"
i=0
for s in split:
for cat in categories:
print('-' * 100)
print('working on ' + cat +" "+str(s)+" "+ '...')
print('-' * 100)
files = [f for f in listdir(files_loc + s + '/' + cat + '/') if isfile(join(files_loc + s + '/' + cat + '/', f)) and is_wav(f)]
for f in files:
convert_to_spec_image(file_loc = files_loc, category=cat, filename=f, is_train=(s == 'train'), verbose=False)
i=i+1
print("We have processed: "+str(i)+" "+ str((i/773*100))+" % "+" so far")
The function convert_to_spec_image is this:
#create images using librosa spectogram
def convert_to_spec_image(file_loc, filename, category, is_train=False, verbose=False):
'''
Converts audio file to spec image
Input file includes path
Saves the file to a png image in the save_directory
'''
train_ = 'train/'
val_ = 'val/'
loc = file_loc + train_ + category + '/' + filename
if is_train == False:
loc = file_loc + val_ + category + '/' + filename
if verbose == True:
print('reading and converting ' + filename + '...')
y, sr = lb.load(loc)
#Plot signal in
plt.figure(figsize=(10,3))
src_ft = lb.stft(y)
src_db = lb.amplitude_to_db(abs(src_ft))
specshow(src_db, sr=sr, x_axis='time', y_axis='hz')
plt.ylim(0, 5000)
save_directory = "C:/Users/raulf/Desktop/espectograms2/"
filename_img = filename.split('.wav')[0]
save_loc = save_directory + train_ + category + '/' + filename_img + '.png'
if is_train == False:
save_loc = save_directory + val_ + category + '/' + filename_img + '.png'
plt.savefig(save_loc)
plt.close('all')
if verbose == True:
print(filename + ' converted!')
plt.close('all')
I am trying to reuse the code from this Kaggle Notebook:
https://www.kaggle.com/danaelisanicolas/cnn-part-3-create-spectrogram-images
Thanks in advance
I'm trying to input a comandline argument to set values for a and r from the comandline. I'm really new to python so any help would be appreciated.
a = 1/(# days infected)
r = infectiousness of disease
import matplotlib.pyplot as plt
import numpy as np
import sys
population = 763
Scur = population -1 # number of people susceptible
Icur = 1 # number of people infected
Rcur = 0 # number of people recovered
trans_const = 0.00218 # infectiousness of disease r = kb/N
recov_rate = 0.5 # recovery rate a = 1/(# days infected)
simlength = 20 # number of days in simulation
SIRarray = np.zeros((simlength+1,3)) # using floats as ~% of popn
SIRarray[0,:] = Scur, Icur, Rcur # record initial values
for i in range(1, simlength+1):
new_infected = trans_const * Scur * Icur # = rSI
new_recovered = recov_rate * Icur # = aI
Scur = Scur - new_infected
Icur = Icur + new_infected - new_recovered
Rcur = Rcur + new_recovered
SIRarray[i,:] = Scur, Icur, Rcur
print("SIR Model Simulation")
print("Scur\t\tIcur\t\tRcur")
print("----------------------------------------")
for i in range(len(SIRarray)):
print("{0:.2f}\t\t{1:.2f}\t\t {2:.2f}".format(SIRarray[i,0],
SIRarray[i,1], SIRarray[i,2]))
a = int(sys.argv[1])
r = int(sys.argv[1])
plt.plot(SIRarray[:,0], "b")
plt.plot(SIRarray[:,1], "r")
plt.plot(SIRarray[:,2], "g" )
plt.title("SIR model parameters r = " + str(trans_const) + " a = " + str(recov_rate))
plt.xlabel("Number of People")
plt.ylabel("Number of People")
plt.legend(['Susceptible People', 'Infected', 'Recovered', 'y = 4x'],
loc='upper left')
plt.legend(['Susceptible People', 'Infected', 'Recovered', 'y = 4x'],
loc='upper left')
plt.savefig('SIR Model Simulation')
plt.show()
I am working on face detecting and then cropping face from image, i can crop the face but I can't save them to another folder. My code is below
import cv2
import os
import glob
def facecrop(image):
facedata = "haarcascade_frontalface_alt.xml"
cascade = cv2.CascadeClassifier(facedata)
img = cv2.imread(image)
minisize = (img.shape[1],img.shape[0])
miniframe = cv2.resize(img, minisize)
faces = cascade.detectMultiScale(miniframe)
for f in faces:
x, y, w, h = [ v for v in f ]
#cv2.rectangle(img, (x,y), (x+w,y+h), (0,0,0))
sub_face = img[y:y+h + 500, x:x+w + 500]
fname, ext = os.path.splitext(image)
save = "./salman/crop/"
#print(fname)
#cv2.imwrite(f'{save}"cropped_"{image}{sub_face}')
cv2.imwrite(fname + "_cropped_"+ext, sub_face)
#cv2.imwrite(os.path.join(p),(fname + "_cropped_" + ext, sub_face))
return
for image in glob.glob('./salman/*.jpg'):
facecrop(image)
I have a script that produces 4 images (Below I only include 2 as an example of output). I have another function that determines what % cat or dog the picture is and I would like to call that function in the title of the following code
import cv2
import matplotlib.pyplot as plt
def Mpic():
plt.figure(figsize=(15,30))
path = r"data/dogscats1/pupper"
path1 = r"data/dogscatspeople/test1"
path2 = r"data/dogscatspeople/test1"
path3 = r"data/dogscats1/pupper"
imgpath1 = path + "/cat.jpg"
imgpath2 = path1 + "/1.jpg"
imgpath3 = path2 + "/2.jpg"
imgpath4 = path3 + "/dog.jpg"
img1 = cv2.imread(imgpath1, 1)
img2 = cv2.imread(imgpath2, 1)
img3 = cv2.imread(imgpath3, 1)
img4 = cv2.imread(imgpath4, 1)
img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
img3 = cv2.cvtColor(img3, cv2.COLOR_BGR2RGB)
img4 = cv2.cvtColor(img4, cv2.COLOR_BGR2RGB)
titles = ['Kitty', '% Cat = , Dog % = ','% Cat = , Dog % =', 'Pupper']
images = [img1, img2, img3, img4]
for i in range(4):
plt.subplot(4,2,i+1)
plt.imshow(images[i])
plt.xticks([])
plt.title(titles[i])
plt.yticks([])
plt.show()
if __name__ == "__main__":
Mpic()
This is the original function that calls the array:
def pred_datsci(file_path):
prev_precompute = learn.precompute
learn.precompute = False
try:
trn_tfms, val_tfms = tfms_from_model(arch,sz)
test_img = open_image(file_path)
im = val_tfms(test_img)
pred = learn.predict_array(im[None])
class_index = (np.exp(pred))
class_index1 = np.argmax(np.exp(pred))
print(class_index*100)
return data.classes[class_index1]
finally:
learn.precompute = prev_precompute
Which can return something along the lines of:
pred_datsci(f"data/dogscats1/valid/dogs/12501.jpg")
I want it to call it in the form of something like this:
titles = [ cat % = pred_datsci(f"data/dogscats1/valid/cats/cat.1.jpg"),"etc"]
titles = [ "cat % = {}".format(pred_datsci("data/dogscats1/valid/cats/cat.1.jpg")),"etc"]
I'm having a problem debugging the following code, for some reason The perceptron stops updating itself after a couple of steps with random values as the weights. I have tried not using a class for my work and edited everything to the bare minimum, but still had the same problem. I have also checked the Perceptron.train(), and it works just fine. So, I'm guessing the main problem is with the train function itself. I am kind of new to python programming so any help would be apreciated guys.
import random
import Plot as plt
import numpy as np
#-----Function Of the line that seperates the two different Data Types-----$
def f(x):
return x
#-----Activation Function-----#
def act(x):
if x >= 0:
return 1.0
return 0.0
class Point:
def __init__(self, x, y):
self.X = x
self.Y = y
if y > f(x):
self.Target = 1.0
else:
self.Target = 0.0
class Perceptron:
def __init__(self, n, actFunc = act, lr = 0.2):
self.Weights = [0 for i in range(n)]
self.ActFunc = actFunc
self.LR = lr
def guess(self, inputs):
valSum = 0
for i in range(len(inputs)):
valSum += self.Weights[i] * inputs[i]
return self.ActFunc(valSum)
def train(self, inputs, target):
cal = self.guess(inputs)
err = target - cal
for i in range(0, len(self.Weights)):
self.Weights[i] += self.LR * err * inputs[i]
def printWeights(self):
for i in range(len(self.Weights)):
print("WEIGHT[" + str(i) + "] = " + str(self.Weights[i]))
print("")
def lineFunc(self):
# y = w0 + w1x + w2y
# (1 - w2)y = w0 + w1x
# y = w0/(1-w2) + w1/(1 - w2)x
w0 = self.Weights[0]
w1 = self.Weights[1]
w2 = self.Weights[2]
return (str(w0/(1 - w2)) + " + " + str(w1/(1 - w2)) + " * x")
#-----INITIALISING DATA------#
brain = Perceptron(3)
n = 20
points = [Point(random.uniform(-10, 10), random.uniform(-10, 10)) for x in range(n)]
t = 1000
#-----Training-----#
for i in range(t):
point = points[random.randrange(0, n)]
brain.train([1, point.X, point.Y], point.Target)
brain.printWeights()
print(brain.lineFunc())
I did find the problem myself. There was a bug in the LineFunc() method. The return value was wrong and it should have been:
return (str(-w0/w2) + " + " + str(-w1/w2) + " * x")