Update: turns out od.download() returns None by design.
What might be better than a None check for od.download() "failure"?
I am downloading a .zip file using opendatasets lib.
In iris_scans(); line print(download), without the if-statement prints None.
However, at invocation scans = iris_scans() data is returned and subsequent prints can display data successfully.
The purpose of the if-statement is for "Graceful error handling".
Note: I've used an if-statement instead of try-except as there are many possibilities why download == None (e.g. dead link, connection interrupt etc.)
pip3 install opendatasets
import opendatasets as od
import zipfile
import os
import shutil
from PIL import Image
import numpy as np
def iris_scans():
download = od.download('http://www.mae.cuhk.edu.hk/~cvl/iris_database/iris_database.zip')
"""
if download == None:
print('Iris Scans - Link could not be established')
return [[]*1778]
"""
print(download)
path_extract = 'iris_database/'
with zipfile.ZipFile('iris_database.zip', 'r') as zip_ref:
zip_ref.extractall(path_extract)
os.remove(path_extract + 'readme.txt')
filenames = os.listdir(path_extract)
scans = []
for f in filenames:
img = Image.open(path_extract + f)
#print("IMG", img)
matrix = np.array(img)
#print("MATRIX", matrix)
scans.append(matrix)
shutil.rmtree(path_extract)
os.remove(path_extract[:-1] + '.zip')
# Data Augmentation
scans_90 = [np.rot90(s) for s in scans]
scans_180 = [np.rot90(s) for s in scans_90]
scans_270 = [np.rot90(s) for s in scans_180]
scans_flip = [np.flip(s) for s in scans]
scans_flip_90 = [np.rot90(s) for s in scans_flip]
scans_flip_180 = [np.rot90(s) for s in scans_flip_90]
scans_flip_270 = [np.rot90(s) for s in scans_flip_180]
scans += scans_90
scans += scans_180
scans += scans_270
scans += scans_flip_90
scans += scans_flip_180
scans += scans_flip_270
return scans
scans = iris_scans()
print(scans[0])
print(len(scans))
The original question was a road-block on the path of implementing some form of Exception Handling for the download.
od.download() == None by design; so an alternative to if download == None needed to be made.
As pointed out and assisted by #Henry; the below Try-except incorporates all exceptions found in the Github source.
...
import urllib
def iris_scans():
try:
download = od.download('http://www.dgcdgyugcwyugyugcasc.com/wqduiuwqdwq') # BROKEN
...
return scans
except (urllib.error.URLError, IOError, RuntimeError) as e:
print('Iris Scans - failed')
return [[]*1778]
Iris Scans - failed
[]
1
Top answer to this post demos many exceptions on one line.
Related
I am using the following code to process some pictures for my ML project and I would like to parallelize it.
import multiprocessing as mp
import concurrent.futures
def track_ids(seq):
'''The func is so big I can not put it here'''
ood = {}
for i in seq:
# I load around 500 images and process them
ood[i] = some Value
return ood
seqs = []
for seq in range(1, 10):# len(seqs)+1):
seq = txt+str(seq)
seqs.append(seq)
# serial call of the function
track_ids(seq)
#parallel call of the function
with concurrent.futures.ProcessPoolExecutor(max_workers=mp.cpu_count()) as ex:
ood_id = ex.map(track_ids, seqs)
if I run the code serially it takes 3.0 minutes but for parallel with concurrent, it takes 3.5 minutes.
can someone please explain why is that? and present a way to solve the problem.
btw, I have 12 cores.
Thanks
Here's a brief example of how one might go about profiling multiprocessing code vs serial execution:
from multiprocessing import Pool
from cProfile import Profile
from pstats import Stats
import concurrent.futures
def track_ids(seq):
'''The func is so big I can not put it here'''
ood = {}
for i in seq:
# I load around 500 images and process them
ood[i] = some Value
return ood
def profile_seq():
p = Profile() #one and only profiler instance
p.enable()
seqs = []
for seq in range(1, 10):# len(seqs)+1):
seq = txt+str(seq)
seqs.append(seq)
# serial call of the function
track_ids(seq)
p.disable()
return Stats(p), seqs
def track_ids_pr(seq):
p = Profile() #profile the child tasks
p.enable()
retval = track_ids(seq)
p.disable()
return (Stats(p, stream="dummy"), retval)
def profile_parallel():
p = Profile() #profile stuff in the main process
p.enable()
with concurrent.futures.ProcessPoolExecutor(max_workers=mp.cpu_count()) as ex:
retvals = ex.map(track_ids_pr, seqs)
p.disable()
s = Stats(p)
out = []
for ret in retvals:
s.add(ret[0])
out.append(ret[1])
return s, out
if __name__ == "__main__":
stat, retval = profile_parallel()
stat.print_stats()
EDIT: Unfortunately I found out that pstat.Stats objects cannot be used normally with multiprocessing.Queue because it is not pickleable (which is needed for the operation of concurrent.futures). Evidently it normally will store a reference to a file for the purpose of writing statistics to that file, and if none is given, it will by default grab a reference to sys.stdout. We don't actually need that reference however until we actually want to print out the statistics, so we can just give it a temporary value to prevent the pickle error, and then restore an appropriate value later. The following example should be copy-paste-able and run just fine rather than the pseudocode-ish example above.
from multiprocessing import Queue, Process
from cProfile import Profile
from pstats import Stats
import sys
def isprime(x):
for d in range(2, int(x**.5)):
if x % d == 0:
return False
return True
def foo(retq):
p = Profile()
p.enable()
primes = []
max_n = 2**20
for n in range(3, max_n):
if isprime(n):
primes.append(n)
p.disable()
retq.put(Stats(p, stream="dummy")) #Dirty hack: set `stream` to something picklable then override later
if __name__ == "__main__":
q = Queue()
p1 = Process(target=foo, args=(q,))
p1.start()
p2 = Process(target=foo, args=(q,))
p2.start()
s1 = q.get()
s1.stream = sys.stdout #restore original file
s2 = q.get()
# s2.stream #if we are just adding this `Stats` object to another the `stream` just gets thrown away anyway.
s1.add(s2) #add up the stats from both child processes.
s1.print_stats() #s1.stream gets used here, but not before. If you provide a file to write to instead of sys.stdout, it will write to that file)
p1.join()
p2.join()
I'm been trying to create a csv file from this code, but it fails every time, I have tried different ways to place it inside the code but nothing has work so far.
I'm new to python and to Stack overflow.
If somebody can explain what I'm doing wrong it will be helpful.
Thanks in advance for any help.
from time import sleep
import os
import sys
from bleson import get_provider, Observer, UUID16
import csv
GOVEE_BT_mac_OUI_PREFIX = "A4:C1:38"
H5075_UPDATE_UUID16 = UUID16(0xEC88)
govee_devices = {}
# ###########################################################################
FORMAT_PRECISION = ".2f"
# Decode H5075 Temperature into degrees Fahrenheit
def decode_temp_in_f(encoded_data):
return format((((encoded_data / 10000) * 1.8) + 32), FORMAT_PRECISION)
# Decode H5075 percent humidity
def decode_humidity(encoded_data):
return format(((encoded_data % 1000) / 10), FORMAT_PRECISION)
#focus here
with open('temp.csv','w',newline='') as record:
record = csv.writer(record)
record.writerow(['Device Name','Device Address','Temp','Humidity'])
def print_values(mac):
govee_device = govee_devices[mac]
print(govee_device['name'],govee_device['address'],govee_device['tempInF'],govee_device['humidity'],govee_device['battery'])
record.writerow(govee_device['name'])
# On BLE advertisement callback
def on_advertisement(advertisement):
if advertisement.address.address.startswith(GOVEE_BT_mac_OUI_PREFIX):
mac = advertisement.address.address
if mac not in govee_devices:
govee_devices[mac] = {}
if H5075_UPDATE_UUID16 in advertisement.uuid16s:
# HACK: Proper decoding is done in bleson > 0.10
name = advertisement.name.split("'")[0]
encoded_data = int(advertisement.mfg_data.hex()[6:12], 16)
battery = int(advertisement.mfg_data.hex()[12:14], 16)
govee_devices[mac]["address"] = mac
govee_devices[mac]["name"] = name
govee_devices[mac]["mfg_data"] = advertisement.mfg_data
govee_devices[mac]["data"] = encoded_data
govee_devices[mac]["tempInF"] = decode_temp_in_f(encoded_data)
govee_devices[mac]["humidity"] = decode_humidity(encoded_data)
govee_devices[mac]["battery"] = battery
print_values(mac)
if advertisement.rssi is not None and advertisement.rssi != 0:
govee_devices[mac]["rssi"] = advertisement.rssi
# ###########################################################################
adapter = get_provider().get_adapter()
observer = Observer(adapter)
observer.on_advertising_data = on_advertisement
try:
while True:
observer.start()
sleep(2)
observer.stop()
except KeyboardInterrupt:
try:
observer.stop()
sys.exit(0)
except SystemExit:
observer.stop()
os._exit(0)
Error that Im getting is:
File "/home/pi/GoveeWatcher-master/python/goveeWatcher.py", line 37, in print_values
record.writerow(govee_device['name'])
ValueError: I/O operation on closed file.
I would be tempted to put the CSV writing functionality inside of the print_values function so it opens the file, writes the data, and then closes the file on each value that is found by the observer.
For example:
#focus here
def print_values(mac):
govee_device = govee_devices[mac]
print(govee_device['name'], govee_device['tempInF'])
with open('temp.csv','a',newline='') as record:
writer = csv.DictWriter(record, fieldnames=govee_device.keys())
writer.writerow(govee_device)
I found some code to create a progess bar with tqdm and Python multiprocessing, which uses an integer to update the progress bar. I changed it to use it a file loop, but the lambda callback creates a cartesian product with file paths, which let my machine run out of memory with a great number of files. I tried to find the solution in other questions, but didn't find the answer.
What can I do to avoid the cartesian product in the async_result (and the out of memory), but still create the progress bar?
import glob
import jpylyzer
import multiprocessing as mp
from tqdm import tqdm
cores=2
src="/path/to/jp2/files"
def f_process_file(filename):
now=time.strftime("%Y-%m-%d %H:%M:%S")
try:
result = jpylyzer.checkOneFile(filename)
status=result.findtext('isValid')
except Exception as ex:
print("oopsie")
return filename, status, now
# Find JP2 files in the source directory case insensitively
files = [f for f in glob.iglob(src + '/**/*.[jJ][pP]2', recursive=True)]
filecount=len(files)
# Start a multiprocessing pool
pool = mp.Pool(processes = cores)
# Define a progress bar
pbar = tqdm(total=filecount)
# process all files asynchronously and do callback for the progress bar
async_result = [pool.map_async(f_process_file, files, callback=lambda _: pbar.update(1)) for file in files]
# magic for the progress barr
results = [p.get() for p in async_result]
pool.close()
pool.join()
for i in range(len(results)):
if results[i][i][1] != 'True':
print(results[i][i])
I found the answer by removing the [] from the async_result, removing the callback=lambda and declaring a global variable pbar for the progress bar, before initiating it dynamically
#!/usr/bin/env python3
import glob
from tqdm import tqdm
import time, sys
def f_process_file(filename):
fnctn=sys._getframe().f_code.co_name
now=time.strftime("%Y-%m-%d %H:%M:%S")
try:
# Do some stuff here
result = 'isValid' # for testing purpose declare a value
status = 'True' # for testing purpose declare a value
except Exception as ex:
print("failure in {}".format(fnctn))
#update the progress bar
time.sleep(0.005)
pbar.update(1)
return filename, status, now
def f_doall(src):
files = [f for f in glob.iglob(src + '/**/*.[jJ][pP]2', recursive=True)]
filecount=len(files)
print(filecount)
#Declare a global variable for the progress bar
global pbar
# Initiate the progree bar
pbar = tqdm(total=filecount)
for f in files:
f_process_file(f)
def main():
src="/path/to/images"
f_doall(src)
if __name__ == "__main__":
main()
Now I can expand my code to use a multi processing pool
I want to make a batch request getting campaigns for a specific ad account. I created a simple code based on this issue
but I've used some global arrays and I don't know if time.sleep(2) is necessary for this code. My code is as below:
from facebookads import FacebookAdsApi
from facebookads.api import FacebookRequest
import pandas as pd
import time
batch_body_responses = []
list_of_artists = [1]
def success_callback(response):
try:
pair = [response.json()['data']]
next = [response.json()['paging']['next']]
batch_body_responses.append(pair)
batch_body_responses.append(next)
except IndexError:
pass
except UnicodeEncodeError:
pass
def error_callback(response):
pass
def generate_batches(iterable, batch_size_limit):
# This function can be found in examples/batch_utils.py
batch = []
for item in iterable:
if len(batch) == batch_size_limit:
yield batch
batch = []
batch.append(item)
if len(batch):
yield batch
def get_id_list(art_search_list):
batches = []
your_app_id = '756885'
your_app_secret = '123456789'
your_access_token = 'EAA.....'
api = FacebookAdsApi.init(your_app_id, your_app_secret, your_access_token)
batch_limit = 25
for batch in generate_batches(art_search_list, batch_limit):
next_batch = api.new_batch()
for artt in batch:
requestss = [FacebookRequest(node_id='act_1234/campaigns',method="GET",endpoint="?fields=id,name")]
for req in requestss:
next_batch.add_request(req, success_callback, error_callback)
batches.append(next_batch)
for batch_request in batches:
batch_request.execute()
time.sleep(2)
print(batch_body_responses)
return batch_body_responses
df = pd.DataFrame(get_id_list(list_of_artists))
How can this code optimized by not using global arrays and how to execute without sleep statement and why it is needed sleep?
I need to export a my Photoshop swatches to a document with RGB, HSB, HEX values and the name of the swatch and maybe the colour patch too. Is there any tool which can export swatches to this kind of a document?
Would not be difficult to script but I don't know of any existing tools. To script it, look into xtools to access the color swatches themselves: http://ps-scripts.sourceforge.net/xtools.html
The following is a limited (no support for lab, hsb or spot color at the moment) python aco -> text dumper. File name needs to be changed to test.aco or change the file name in code
#!/usr/bin/python
# -*- coding: utf-8 -*-
# quick script no warranties whatsoever
import struct
class ColorSwatch():
def __init__(self, fp):
self.rawdata = struct.unpack(">5H",fp.read(10))
namelen, = struct.unpack(">I",fp.read(4))
cp = fp.read(2*namelen)
self.name = cp[0:-2].decode('utf-16-be')
self.typename = self.colorTypeName()
def colorTypeName(self):
try:
return {0:"RGB", 1:"HSB",
2:"CMYK",7:"Lab",
8:"Grayscale"}[self.rawdata[0]]
except IndexError:
print self.rawdata[0]
def __strCMYK(self):
rgb8bit = map(lambda a: (65535 - a)/655.35, self.rawdata[1:])
return "{name} ({typename}): {0}% {1}% {2}% {3}%".format(*rgb8bit,**self.__dict__)
def __strRGB(self):
rgb8bit = map(lambda a: a/256,self.rawdata[1:4])
return "{name} ({typename}): #{0:x}{1:x}{2:x}".format(*rgb8bit,**self.__dict__)
def __strGrayscale(self):
gray = self.rawdata[1]/100.
return "{name} ({typename}): {0}%".format(gray,**self.__dict__)
def __str__(self):
return {0: self.__strRGB, 1:"HSB",
2:self.__strCMYK,7:"Lab",
8:self.__strGrayscale}[self.rawdata[0]]()
with open("test.aco", "rb") as acoFile:
#skip ver 1 file
head = acoFile.read(2)
ver, = struct.unpack(">H",head)
if (ver != 1):
raise TypeError("Probably not a adobe aco file")
count = acoFile.read(2)
cnt, = struct.unpack(">H",count)
acoFile.seek(cnt*10,1)
#read ver2 file
head = acoFile.read(2)
ver, = struct.unpack(">H",head)
if (ver != 2):
raise TypeError("Probably not a adobe aco file")
count = acoFile.read(2)
count, = struct.unpack(">H",count)
for _ in range(count):
swatch = ColorSwatch(acoFile)
print str(swatch)
This question was also posted on Graphic Design Stack Exchange and this is the original answer
https://github.com/czebe/node-swatch-names
This node tool exports your Swatches to SCSS/JS. You can also use it right in node to accomplish transformations and then write it back to an .aco file.