Why librosa.load() Runs Out of Memory when generating spectrograms? - python-3.x

I am working on classifying sounds with Deep Learning and my problem is that I run out of memory when I try to convert .wav files to spectrograms using lib.load() from librosa.
split = ['train','val']
categories=['URTI', 'Healthy', 'Asthma', 'COPD', 'LRTI', 'Bronchiectasis','Pneumonia', 'Bronchiolitis']
files_loc = "path"
i=0
for s in split:
for cat in categories:
print('-' * 100)
print('working on ' + cat +" "+str(s)+" "+ '...')
print('-' * 100)
files = [f for f in listdir(files_loc + s + '/' + cat + '/') if isfile(join(files_loc + s + '/' + cat + '/', f)) and is_wav(f)]
for f in files:
convert_to_spec_image(file_loc = files_loc, category=cat, filename=f, is_train=(s == 'train'), verbose=False)
i=i+1
print("We have processed: "+str(i)+" "+ str((i/773*100))+" % "+" so far")
The function convert_to_spec_image is this:
#create images using librosa spectogram
def convert_to_spec_image(file_loc, filename, category, is_train=False, verbose=False):
'''
Converts audio file to spec image
Input file includes path
Saves the file to a png image in the save_directory
'''
train_ = 'train/'
val_ = 'val/'
loc = file_loc + train_ + category + '/' + filename
if is_train == False:
loc = file_loc + val_ + category + '/' + filename
if verbose == True:
print('reading and converting ' + filename + '...')
y, sr = lb.load(loc)
#Plot signal in
plt.figure(figsize=(10,3))
src_ft = lb.stft(y)
src_db = lb.amplitude_to_db(abs(src_ft))
specshow(src_db, sr=sr, x_axis='time', y_axis='hz')
plt.ylim(0, 5000)
save_directory = "C:/Users/raulf/Desktop/espectograms2/"
filename_img = filename.split('.wav')[0]
save_loc = save_directory + train_ + category + '/' + filename_img + '.png'
if is_train == False:
save_loc = save_directory + val_ + category + '/' + filename_img + '.png'
plt.savefig(save_loc)
plt.close('all')
if verbose == True:
print(filename + ' converted!')
plt.close('all')
I am trying to reuse the code from this Kaggle Notebook:
https://www.kaggle.com/danaelisanicolas/cnn-part-3-create-spectrogram-images
Thanks in advance

Related

Python loop code with try: & except: pass

When scraping list of json files, sometimes, file are missing and can't be downloaded.
On my python script, when that case occurs, the script display an error
json.decoder.JSONDecodeError : Expecting value: linke 1 column 1 (char 0)
How can I ask to the script to continue the loop if error ?
I tried to put and try: except, but without success (IndentationError)
This is the code :
RACE_L = x1["pageProps"]["initialState"]["racecards"]["races"][today2]
for r1 in RACE_L:
id_race = r1["uuid"]
link2go = link_append + id_race + '.json'
n1 = "races"
n12 = "races"
n2 = r1["uuid"]
name1 = n12 + '-' + n2
name1 = today2 + '_' + name1 + '.json'
with open(path +'%s' %name1,'w',encoding='utf-8') as f2:
print('Writing %s into file' %name1)
r3 = requests.get(link2go, headers=headers)
sleep(2)
x3 = r3.json()
json.dump(x3, f2, indent=4, ensure_ascii=False)
put the try and except blocks this way-
RACE_L = x1["pageProps"]["initialState"]["racecards"]["races"][today2]
for r1 in RACE_L:
try:
id_race = r1["uuid"]
link2go = link_append + id_race + '.json'
n1 = "races"
n12 = "races"
n2 = r1["uuid"]
name1 = n12 + '-' + n2
name1 = today2 + '_' + name1 + '.json'
with open(path +'%s' %name1,'w',encoding='utf-8') as f2:
print('Writing %s into file' %name1)
r3 = requests.get(link2go, headers=headers)
sleep(2)
x3 = r3.json()
json.dump(x3, f2, indent=4, ensure_ascii=False)
except:
pass

Pillow pasting doesn't overlay correctly

I'm trying to merge 2 images together using Pillow. A problem comes when some images have the in itself the same image as the other one, but with transparency (I'll post an example later). Here's the code I'm using:
import os, subprocess
from PIL import Image, ImageDraw, ImageFilter
# Creates card art
rootdir = os.listdir('C:/Users/amati/Desktop/Dokkan Assets/japan/character/card')
for entries in rootdir:
newdir = 'C:/Users/amati/Desktop/Dokkan Assets/japan/character/card/' + str(entries)
bg = newdir + '/' + 'card_' + str(entries) + '_bg.png'
effect = newdir + '/' + 'card_' + str(entries) + '_effect.png'
try:
image1 = Image.open(bg)
image3 = Image.open(effect)
rgbimage1 = image1.convert('RGBA')
rgbimage3 = image3.convert('RGBA')
image1_size = rgbimage1.size
final = Image.new('RGBA',(image1_size[0], image1_size[1]), (0,0,0,255))
final.paste(rgbimage1,(0,0))
final.paste(rgbimage3,(0,0),rgbimage3)
if not os.path.exists('C:/Users/amati/Desktop/prova/card_' + str(entries) + '.png'):
final.save('C:/Users/amati/Desktop/prova/card_' + str(entries) + '.png', quality = 95)
except IOError:
continue
Images:
Image 1
Image 2
Result
Expected result using Paint 3D

Implementing a YOLOv3 object detector for the SVHN dataset

I am trying to use object detection for digit detection.
I found the SVHN dataset.
Speed is important in my project so I decided to apply a YOLO approach.
However, all tutorials and explanatiosn on using YOLOv3 either expect me to be using a dataset made from the Google Open Images API or by manually labeling images using a tool such as labellimg.py.
I however have a premade dataset with annotaions in the PASCAL VOC format (which can be found here https://github.com/penny4860/svhn-voc-annotation-format). Because of this I do not create a labels.txt or classes.txt file as I do no labeling myself.
I am rather at a loss on where to get started.
Any help would be appreciated.
You can follow the below code to convert from PASCAL VOC to YOLO supported format.
import glob
import os
import pickle
import xml.etree.ElementTree as ET
from os import listdir, getcwd
from os.path import join
dirs = ['train', 'val']
classes = ['person', 'car']
def getImagesInDir(dir_path):
image_list = []
for filename in glob.glob(dir_path + '/*.jpg'):
image_list.append(filename)
return image_list
def convert(size, box):
dw = 1./(size[0])
dh = 1./(size[1])
x = (box[0] + box[1])/2.0 - 1
y = (box[2] + box[3])/2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(dir_path, output_path, image_path):
basename = os.path.basename(image_path)
basename_no_ext = os.path.splitext(basename)[0]
in_file = open(dir_path + '/' + basename_no_ext + '.xml')
out_file = open(output_path + basename_no_ext + '.txt', 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult)==1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
cwd = getcwd()
for dir_path in dirs:
full_dir_path = cwd + '/' + dir_path
output_path = full_dir_path +'/yolo/'
if not os.path.exists(output_path):
os.makedirs(output_path)
image_paths = getImagesInDir(full_dir_path)
list_file = open(full_dir_path + '.txt', 'w')
for image_path in image_paths:
list_file.write(image_path + '\n')
convert_annotation(full_dir_path, output_path, image_path)
list_file.close()
print("Finished processing: " + dir_path)

name error:name 'wav_filename' is not defined

name 'wav_filename' is not defined
def Text2Speech(file_name,InpText):
Mp3Extension=".mp3"
waveExtension=".wav"
mp3FilePath="C:\\Texttospeech\\"+file_name+Mp3Extension
waveFilePath="C:\\Texttospeech\\"+file_name+waveExtension
if os.path.isfile(waveFilePath):
PlaySound(waveFilePath);
else:
tts=gTTS(text=InpText,lang="en-us")
tts.save(mp3FilePath)
f = TemporaryFile();
tts.write_to_fp(f);
f.close();
subprocess.call(['C:\\Temp\\ffmpeg\\bin\\ffmpeg', '-i', mp3FilePath,mp3FilePath])
PlaySound(waveFilePath);
return;
def PlaySound (wavFile) :
chunk = 1024
try:
wf = wave.open(wavFile, 'rb')
except IOError as ioe:
sys.stderr.write('IOError on file ' + wav_filename + '\n' + \
str(ioe) + '. Skipping.\n')
return
except EOFError as eofe:
sys.stderr.write('EOFError on file ' + wav_filename + '\n' + \
str(eofe) + '. Skipping.\n')
return
# Instantiate PyAudio.
p = pyaudio.PyAudio()
stream = p.open(
format = p.get_format_from_width(wf.getsampwidth()),
channels = wf.getnchannels(),
rate = wf.getframerate(),
output = True)
data = wf.readframes(chunk)
while len(data) > 0:
stream.write(data)
data = wf.readframes(chunk)
wav_filename was not defined in the function PlaySound.

Convert Wavefront .obj to .off

How can I convert a Wavefront's .obj file to a .off file ?
You can use the open source GUI software Meshlab.
File > Import Mesh (Ctrl-I)
File > Export Mesh As and chose "Object file format (.off)"
You can use the CLI, closed source, binary only, meshconv
chmod u+x meshconv
./meshconv input.obj -c off -o output.off
Howether the result seems to be a bit different from what I get in my answer using Meshlab because I could not load the resulting .off file in CGAL (the error look like this one).
This should work for triangular meshes
def conv_obj(file):
x = open(file)
k = 0
while "\n" in x.readline():
k += 1
x = open(file)
out = str()
v = 0
f = 0
for i in range(k) :
y = x.readline().split()
if len(y) > 0 and y[0] == "v" :
v += 1
out += str(y[1]) + " " + str(y[2]) + " " + str(y[3]) + "\n"
if len(y) > 0 and y[0] == "f" :
f += 1
out += "3 " + str(int(y[1])-1) + " " + str(int(y[2])-1) + " " + str(int(y[3])-1) + "\n"
out1 = "OFF\n" + str(v) + " " + str(f) + " " + "0" + "\n" + out
w = open(file.strip("obj") + "off", "w")
w.write(out1)
w.close()
x.close()
return "done"

Resources