When scraping list of json files, sometimes, file are missing and can't be downloaded.
On my python script, when that case occurs, the script display an error
json.decoder.JSONDecodeError : Expecting value: linke 1 column 1 (char 0)
How can I ask to the script to continue the loop if error ?
I tried to put and try: except, but without success (IndentationError)
This is the code :
RACE_L = x1["pageProps"]["initialState"]["racecards"]["races"][today2]
for r1 in RACE_L:
id_race = r1["uuid"]
link2go = link_append + id_race + '.json'
n1 = "races"
n12 = "races"
n2 = r1["uuid"]
name1 = n12 + '-' + n2
name1 = today2 + '_' + name1 + '.json'
with open(path +'%s' %name1,'w',encoding='utf-8') as f2:
print('Writing %s into file' %name1)
r3 = requests.get(link2go, headers=headers)
sleep(2)
x3 = r3.json()
json.dump(x3, f2, indent=4, ensure_ascii=False)
put the try and except blocks this way-
RACE_L = x1["pageProps"]["initialState"]["racecards"]["races"][today2]
for r1 in RACE_L:
try:
id_race = r1["uuid"]
link2go = link_append + id_race + '.json'
n1 = "races"
n12 = "races"
n2 = r1["uuid"]
name1 = n12 + '-' + n2
name1 = today2 + '_' + name1 + '.json'
with open(path +'%s' %name1,'w',encoding='utf-8') as f2:
print('Writing %s into file' %name1)
r3 = requests.get(link2go, headers=headers)
sleep(2)
x3 = r3.json()
json.dump(x3, f2, indent=4, ensure_ascii=False)
except:
pass
Related
Hey there awesome peeps,
I am trying to retrieve some trend information based on some keywords that I have in a list (1000 keywords). In order to minimize the chance of getting blocked by Google I have a cutoff period of 50 and a 10 second pause. At the moment I get an error saying that my Length of value does not match the length of the index. This fails on the
df3['Trend'] = trends
If anyone can help I will really appreciate it.
Thanks!
!pip install pytrends
import pandas as pd
import json
import time
from pytrends.request import TrendReq
get_gsc_file = "/content/Queries.csv"
sortby = "Clicks"
cutoff = 50
pause = 10
timeframe = "today 3-m"
geo = "US"
df = pd.read_csv(get_gsc_file, encoding='utf-8')
df.sort_values(by=[sortby], ascending=False, inplace=True)
df = df[:cutoff]
d = {'Keyword': [], sortby:[], 'Trend': []}
df3 = pd.DataFrame(data=d)
keywords = []
trends = []
metric = df[sortby].tolist()
up = 0
down = 0
flat = 0
na = 0
for index, row in df.iterrows():
keyword = row['Top queries']
pytrends = TrendReq(hl='en-US', tz=360, retries=2, backoff_factor=0.1)
kw_list = [keyword]
pytrends.build_payload(kw_list, cat=0, timeframe=timeframe, geo=geo, gprop='')
df2 = pytrends.interest_over_time()
keywords.append(keyword)
try:
trend1 = int((df2[keyword][-5] + df2[keyword][-4] + df2[keyword][-3])/3)
trend2 = int((df2[keyword][-4] + df2[keyword][-3] + df2[keyword][-2])/3)
trend3 = int((df2[keyword][-3] + df2[keyword][-2] + df2[keyword][-1])/3)
if trend3 > trend2 and trend2 > trend1:
trends.append('UP')
up+=1
elif trend3 < trend2 and trend2 < trend1:
trends.append('DOWN')
down+=1
else:
trends.append('FLAT')
flat+=1
except:
trends.append('N/A')
na+=1
time.sleep(pause)
df3['Keyword'] = keywords
df3['Trend'] = trends
df3[sortby] = metric
def colortable(val):
if val == 'DOWN':
color="lightcoral"
elif val == 'UP':
color = "lightgreen"
elif val == 'FLAT':
color = "lightblue"
else:
color = 'white'
return 'background-color: %s' % color
df3 = df3.style.applymap(colortable)
total = len(trends)
print("Up: " + str(up) + " | " + str(round((up/total)*100,0)) + "%")
print("Down: " + str(down) + " | " + str(round((down/total)*100,0)) + "%")
print("Flat: " + str(flat) + " | " + str(round((flat/total)*100,0)) + "%")
print("N/A: " + str(na) + " | " + str(round((na/total)*100,0)) + "%")
df3
I am working on classifying sounds with Deep Learning and my problem is that I run out of memory when I try to convert .wav files to spectrograms using lib.load() from librosa.
split = ['train','val']
categories=['URTI', 'Healthy', 'Asthma', 'COPD', 'LRTI', 'Bronchiectasis','Pneumonia', 'Bronchiolitis']
files_loc = "path"
i=0
for s in split:
for cat in categories:
print('-' * 100)
print('working on ' + cat +" "+str(s)+" "+ '...')
print('-' * 100)
files = [f for f in listdir(files_loc + s + '/' + cat + '/') if isfile(join(files_loc + s + '/' + cat + '/', f)) and is_wav(f)]
for f in files:
convert_to_spec_image(file_loc = files_loc, category=cat, filename=f, is_train=(s == 'train'), verbose=False)
i=i+1
print("We have processed: "+str(i)+" "+ str((i/773*100))+" % "+" so far")
The function convert_to_spec_image is this:
#create images using librosa spectogram
def convert_to_spec_image(file_loc, filename, category, is_train=False, verbose=False):
'''
Converts audio file to spec image
Input file includes path
Saves the file to a png image in the save_directory
'''
train_ = 'train/'
val_ = 'val/'
loc = file_loc + train_ + category + '/' + filename
if is_train == False:
loc = file_loc + val_ + category + '/' + filename
if verbose == True:
print('reading and converting ' + filename + '...')
y, sr = lb.load(loc)
#Plot signal in
plt.figure(figsize=(10,3))
src_ft = lb.stft(y)
src_db = lb.amplitude_to_db(abs(src_ft))
specshow(src_db, sr=sr, x_axis='time', y_axis='hz')
plt.ylim(0, 5000)
save_directory = "C:/Users/raulf/Desktop/espectograms2/"
filename_img = filename.split('.wav')[0]
save_loc = save_directory + train_ + category + '/' + filename_img + '.png'
if is_train == False:
save_loc = save_directory + val_ + category + '/' + filename_img + '.png'
plt.savefig(save_loc)
plt.close('all')
if verbose == True:
print(filename + ' converted!')
plt.close('all')
I am trying to reuse the code from this Kaggle Notebook:
https://www.kaggle.com/danaelisanicolas/cnn-part-3-create-spectrogram-images
Thanks in advance
I wrote this code but it is very slow. Is there a way to make my code execute faster?
mirTable has 8789 rows and tgtTable has 9100 rows. It's taking 5 minutes to execute this code
mirTable = mirTable.to_dict()
tgtTable = tgtTable.to_dict()
for mn in range(mLen):
m = 0
for lk in range(tLen):
g = ""
h = ""
if mirTable['UPDATE_KEY'][mn] != tgtTable['UPDATE_KEY'][lk]:
m = 9
if row.INTERFACE_KEY == None:
g = str(mirTable['CONTACT_SRC_ID'][mn])
else:
g = row.INTERFACE_KEY
g = str(mirTable[g][mn])
if row.INTERFACE_KEY_2 != None:
c = row.INTERFACE_KEY_2
h = "_" + str(mirTable[c][mn])
else:
h = None
a = ""
if row.INTERFACE_KEY_2 == None:
a = "drft_" + g + "_" + row.INTERFACE_TYPE + "_" + str(curDate) + ".xml"
else:
a = "drft_" + g + h + "_" + row.INTERFACE_TYPE + "_" + str(curDate) + ".xml"
if m == 9:
f.append({i:mirTable[i][mn] for i in (mirTable.keys())})
p.append(mirTable['UPDATE_KEY'][mn])
xml_FileName.append(a)
t1 = time.time()
print(t1-t0)
name 'wav_filename' is not defined
def Text2Speech(file_name,InpText):
Mp3Extension=".mp3"
waveExtension=".wav"
mp3FilePath="C:\\Texttospeech\\"+file_name+Mp3Extension
waveFilePath="C:\\Texttospeech\\"+file_name+waveExtension
if os.path.isfile(waveFilePath):
PlaySound(waveFilePath);
else:
tts=gTTS(text=InpText,lang="en-us")
tts.save(mp3FilePath)
f = TemporaryFile();
tts.write_to_fp(f);
f.close();
subprocess.call(['C:\\Temp\\ffmpeg\\bin\\ffmpeg', '-i', mp3FilePath,mp3FilePath])
PlaySound(waveFilePath);
return;
def PlaySound (wavFile) :
chunk = 1024
try:
wf = wave.open(wavFile, 'rb')
except IOError as ioe:
sys.stderr.write('IOError on file ' + wav_filename + '\n' + \
str(ioe) + '. Skipping.\n')
return
except EOFError as eofe:
sys.stderr.write('EOFError on file ' + wav_filename + '\n' + \
str(eofe) + '. Skipping.\n')
return
# Instantiate PyAudio.
p = pyaudio.PyAudio()
stream = p.open(
format = p.get_format_from_width(wf.getsampwidth()),
channels = wf.getnchannels(),
rate = wf.getframerate(),
output = True)
data = wf.readframes(chunk)
while len(data) > 0:
stream.write(data)
data = wf.readframes(chunk)
wav_filename was not defined in the function PlaySound.
How can I convert a Wavefront's .obj file to a .off file ?
You can use the open source GUI software Meshlab.
File > Import Mesh (Ctrl-I)
File > Export Mesh As and chose "Object file format (.off)"
You can use the CLI, closed source, binary only, meshconv
chmod u+x meshconv
./meshconv input.obj -c off -o output.off
Howether the result seems to be a bit different from what I get in my answer using Meshlab because I could not load the resulting .off file in CGAL (the error look like this one).
This should work for triangular meshes
def conv_obj(file):
x = open(file)
k = 0
while "\n" in x.readline():
k += 1
x = open(file)
out = str()
v = 0
f = 0
for i in range(k) :
y = x.readline().split()
if len(y) > 0 and y[0] == "v" :
v += 1
out += str(y[1]) + " " + str(y[2]) + " " + str(y[3]) + "\n"
if len(y) > 0 and y[0] == "f" :
f += 1
out += "3 " + str(int(y[1])-1) + " " + str(int(y[2])-1) + " " + str(int(y[3])-1) + "\n"
out1 = "OFF\n" + str(v) + " " + str(f) + " " + "0" + "\n" + out
w = open(file.strip("obj") + "off", "w")
w.write(out1)
w.close()
x.close()
return "done"