Sharing asyncio.Queue with another thread or process - python-3.x

I've recently converted my old template matching program to asyncio and I have a situation where one of my coroutines relies on a blocking method (processing_frame).
I want to run that method in a seperate thread or process whenever the coroutine that calls that method (analyze_frame) gets an item from the shared asyncio.Queue()
I'm not sure if that's possible or worth it performance wise since I have very little experience with threading and multiprocessing
import cv2
import datetime
import argparse
import os
import asyncio
# Making CLI
if not os.path.exists("frames"):
os.makedirs("frames")
t0 = datetime.datetime.now()
ap = argparse.ArgumentParser()
ap.add_argument("-v", "--video", required=True,
help="path to our file")
args = vars(ap.parse_args())
threshold = .2
death_count = 0
was_found = False
template = cv2.imread('youdied.png')
vidcap = cv2.VideoCapture(args["video"])
loop = asyncio.get_event_loop()
frames_to_analyze = asyncio.Queue()
def main():
length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
tasks = []
for _ in range(int(length / 50)):
tasks.append(loop.create_task(read_frame(50, frames_to_analyze)))
tasks.append(loop.create_task(analyze_frame(threshold, template, frames_to_analyze)))
final_task = asyncio.gather(*tasks)
loop.run_until_complete(final_task)
dt = datetime.datetime.now() - t0
print("App exiting, total time: {:,.2f} sec.".format(dt.total_seconds()))
print(f"Deaths registered: {death_count}")
async def read_frame(frames, frames_to_analyze):
global vidcap
for _ in range(frames-1):
vidcap.grab()
else:
current_frame = vidcap.read()[1]
print("Read 50 frames")
await frames_to_analyze.put(current_frame)
async def analyze_frame(threshold, template, frames_to_analyze):
global vidcap
global was_found
global death_count
frame = await frames_to_analyze.get()
is_found = processing_frame(frame)
if was_found and not is_found:
death_count += 1
await writing_to_file(death_count, frame)
was_found = is_found
def processing_frame(frame):
res = cv2.matchTemplate(frame, template, cv2.TM_CCOEFF_NORMED)
max_val = cv2.minMaxLoc(res)[1]
is_found = max_val >= threshold
print(is_found)
return is_found
async def writing_to_file(death_count, frame):
cv2.imwrite(f"frames/frame{death_count}.jpg", frame)
if __name__ == '__main__':
main()
I've tried using unsync but without much success
I would get something along the lines of
with self._rlock:
PermissionError: [WinError 5] Access is denied

If processing_frame is a blocking function, you should call it with await loop.run_in_executor(None, processing_frame, frame). That will submit the function to a thread pool and allow the event loop to proceed with doing other things until the call function completes.
The same goes for calls such as cv2.imwrite. As written, writing_to_file is not truly asynchronous, despite being defined with async def. This is because it doesn't await anything, so once its execution starts, it will proceed to the end without ever suspending. In that case one could as well make it a normal function in the first place, to make it obvious what's going on.

Related

Why serial code is faster than concurrent.futures in this case?

I am using the following code to process some pictures for my ML project and I would like to parallelize it.
import multiprocessing as mp
import concurrent.futures
def track_ids(seq):
'''The func is so big I can not put it here'''
ood = {}
for i in seq:
# I load around 500 images and process them
ood[i] = some Value
return ood
seqs = []
for seq in range(1, 10):# len(seqs)+1):
seq = txt+str(seq)
seqs.append(seq)
# serial call of the function
track_ids(seq)
#parallel call of the function
with concurrent.futures.ProcessPoolExecutor(max_workers=mp.cpu_count()) as ex:
ood_id = ex.map(track_ids, seqs)
if I run the code serially it takes 3.0 minutes but for parallel with concurrent, it takes 3.5 minutes.
can someone please explain why is that? and present a way to solve the problem.
btw, I have 12 cores.
Thanks
Here's a brief example of how one might go about profiling multiprocessing code vs serial execution:
from multiprocessing import Pool
from cProfile import Profile
from pstats import Stats
import concurrent.futures
def track_ids(seq):
'''The func is so big I can not put it here'''
ood = {}
for i in seq:
# I load around 500 images and process them
ood[i] = some Value
return ood
def profile_seq():
p = Profile() #one and only profiler instance
p.enable()
seqs = []
for seq in range(1, 10):# len(seqs)+1):
seq = txt+str(seq)
seqs.append(seq)
# serial call of the function
track_ids(seq)
p.disable()
return Stats(p), seqs
def track_ids_pr(seq):
p = Profile() #profile the child tasks
p.enable()
retval = track_ids(seq)
p.disable()
return (Stats(p, stream="dummy"), retval)
def profile_parallel():
p = Profile() #profile stuff in the main process
p.enable()
with concurrent.futures.ProcessPoolExecutor(max_workers=mp.cpu_count()) as ex:
retvals = ex.map(track_ids_pr, seqs)
p.disable()
s = Stats(p)
out = []
for ret in retvals:
s.add(ret[0])
out.append(ret[1])
return s, out
if __name__ == "__main__":
stat, retval = profile_parallel()
stat.print_stats()
EDIT: Unfortunately I found out that pstat.Stats objects cannot be used normally with multiprocessing.Queue because it is not pickleable (which is needed for the operation of concurrent.futures). Evidently it normally will store a reference to a file for the purpose of writing statistics to that file, and if none is given, it will by default grab a reference to sys.stdout. We don't actually need that reference however until we actually want to print out the statistics, so we can just give it a temporary value to prevent the pickle error, and then restore an appropriate value later. The following example should be copy-paste-able and run just fine rather than the pseudocode-ish example above.
from multiprocessing import Queue, Process
from cProfile import Profile
from pstats import Stats
import sys
def isprime(x):
for d in range(2, int(x**.5)):
if x % d == 0:
return False
return True
def foo(retq):
p = Profile()
p.enable()
primes = []
max_n = 2**20
for n in range(3, max_n):
if isprime(n):
primes.append(n)
p.disable()
retq.put(Stats(p, stream="dummy")) #Dirty hack: set `stream` to something picklable then override later
if __name__ == "__main__":
q = Queue()
p1 = Process(target=foo, args=(q,))
p1.start()
p2 = Process(target=foo, args=(q,))
p2.start()
s1 = q.get()
s1.stream = sys.stdout #restore original file
s2 = q.get()
# s2.stream #if we are just adding this `Stats` object to another the `stream` just gets thrown away anyway.
s1.add(s2) #add up the stats from both child processes.
s1.print_stats() #s1.stream gets used here, but not before. If you provide a file to write to instead of sys.stdout, it will write to that file)
p1.join()
p2.join()

Getting returning value from multithreading in python 3

I'm trying to get one or several returning values from a thread in a multithreading process. The code I show get cycled with no way to interrupt it with Ctrl-C, Ctrl+D.
import queue as Queue
import threading
class myThread (threading.Thread):
def __init__(self, threadID, name, region):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.region = region
def run(self):
GetSales(self.region)
def GetSales(strReg):
print("Thread-" + strReg)
return "Returning-" + strReg
def Main():
RegionList = []
RegionList.append("EMEA")
RegionList.append("AP")
RegionList.append("AM")
# Create threads
threads = []
x = 0
for region in RegionList:
x += 1
rthread = myThread(x, "Thread-" + region, region) # Create new thread
rthread.start() # Start new thread
threads.append(rthread) # Add new thread to threads list
que = Queue.Queue()
# Wait for all threads to complete
for t in threads:
t.join()
result = que.get()
print(t.name + " -> Done")
Main()
If I comment line "result = que.get()" the program runs with no issues.
What you are looking for is future and async management.
Firstly, your program loop indefinitely because of the line que.get(), because there is nothing in the queue, it wait that something happen, which will never happen. You don't use it.
What you want to do is an async task and get the result :
import asyncio
async def yourExpensiveTask():
// some long calculation
return 42
async main():
tasks = []
tasks += [asyncio.create_task(yourExpensiveTask())]
tasks += [asyncio.create_task(yourExpensiveTask())]
for task in tasks:
result = await task
print(result)
See also https://docs.python.org/3/library/asyncio-task.html

Writing an EventLoop without using asyncio

I'm getting very familiar with python's asyncio, the asynchronous programming in python, co-routines etc.
I want to be able to executing several co-routines with my own custom made eventloop.
I'm curious if i can write my own eventloop without importing asyncio at all
I want to be able to executing several co-routines with my own custom made eventloop.
The asyncio event loop is well-tested and can be easily extended to acknowledge non-asyncio events. If you describe the actual use case, it might be easier to help. But if your goal is to learn about async programming and coroutines, read on.
I'm curious if i can write my own eventloop without importing asyncio at all
It's definitely possible - asyncio itself is just a library, after all - but it will take some work for your event loop to be useful. See this excellent talk by David Beazley where he demonstrates writing an event loop in front of a live audience. (Don't be put off by David using the older yield from syntax - await works exactly the same way.)
Ok, so i found an example somewhere (sorry, don't remember where, no link), and changed a little bit.
An eventloop and co-routins without even importing asyncio:
import datetime
import heapq
import types
import time
class Task:
def __init__(self, wait_until, coro):
self.coro = coro
self.waiting_until = wait_until
def __eq__(self, other):
return self.waiting_until == other.waiting_until
def __lt__(self, other):
return self.waiting_until < other.waiting_until
class SleepingLoop:
def __init__(self, *coros):
self._new = coros
self._waiting = []
def run_until_complete(self):
# Start all the coroutines.
for coro in self._new:
wait_for = coro.send(None)
heapq.heappush(self._waiting, Task(wait_for, coro))
# Keep running until there is no more work to do.
while self._waiting:
now = datetime.datetime.now()
# Get the coroutine with the soonest resumption time.
task = heapq.heappop(self._waiting)
if now < task.waiting_until:
# We're ahead of schedule; wait until it's time to resume.
delta = task.waiting_until - now
time.sleep(delta.total_seconds())
now = datetime.datetime.now()
try:
# It's time to resume the coroutine.
wait_until = task.coro.send(now)
heapq.heappush(self._waiting, Task(wait_until, task.coro))
except StopIteration:
# The coroutine is done.
pass
#types.coroutine
def async_sleep(seconds):
now = datetime.datetime.now()
wait_until = now + datetime.timedelta(seconds=seconds)
actual = yield wait_until
return actual - now
async def countdown(label, total_seconds_wait, *, delay=0):
print(label, 'waiting', delay, 'seconds before starting countdown')
delta = await async_sleep(delay)
print(label, 'starting after waiting', delta)
while total_seconds_wait:
print(label, 'T-minus', total_seconds_wait)
waited = await async_sleep(1)
total_seconds_wait -= 1
print(label, 'lift-off!')
def main():
loop = SleepingLoop(countdown('A', 5, delay=0),
countdown('B', 3, delay=2),
countdown('C', 4, delay=1))
start = datetime.datetime.now()
loop.run_until_complete()
print('Total elapsed time is', datetime.datetime.now() - start)
if __name__ == '__main__':
main()

Python Tweepy streaming with multitasking

in Python 2.7 I am successful in using the following code to listen to a direct message stream on an account:
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy import API
from tweepy.streaming import StreamListener
# These values are appropriately filled in the code
consumer_key = '######'
consumer_secret = '######'
access_token = '######'
access_token_secret = '######'
class StdOutListener( StreamListener ):
def __init__( self ):
self.tweetCount = 0
def on_connect( self ):
print("Connection established!!")
def on_disconnect( self, notice ):
print("Connection lost!! : ", notice)
def on_data( self, status ):
print("Entered on_data()")
print(status, flush = True)
return True
# I can add code here to execute when a message is received, such as slicing the message and activating something else
def on_direct_message( self, status ):
print("Entered on_direct_message()")
try:
print(status, flush = True)
return True
except BaseException as e:
print("Failed on_direct_message()", str(e))
def on_error( self, status ):
print(status)
def main():
try:
auth = OAuthHandler(consumer_key, consumer_secret)
auth.secure = True
auth.set_access_token(access_token, access_token_secret)
api = API(auth)
# If the authentication was successful, you should
# see the name of the account print out
print(api.me().name)
stream = Stream(auth, StdOutListener())
stream.userstream()
except BaseException as e:
print("Error in main()", e)
if __name__ == '__main__':
main()
This is great, and I can also execute code when I receive a message, but the jobs I'm adding to a work queue need to be able to stop after a certain amount of time. I'm using a popular start = time.time() and subtracting current time to determine elapsed time, but this streaming code does not loop to check the time. I just waits for a new message, so the clock is never checked so to speak.
My question is this: How can I get streaming to occur and still track time elapsed? Do I need to use multithreading as described in this article? http://www.tutorialspoint.com/python/python_multithreading.htm
I am new to Python and having fun playing around with hardware attached to a Raspberry Pi. I have learned so much from Stackoverflow, thank you all :)
I'm not sure exactly how you want to decide when to stop, but you can pass a timeout argument to the stream to give up after a certain delay.
stream = Stream(auth, StdOutListener(), timeout=30)
That will call your listener's on_timeout() method. If you return true, it will continue streaming. Otherwise, it will stop.
Between the stream's timeout argument and your listener's on_timeout(), you should be able to decide when to stop streaming.
I found I was able to get some multithreading code the way I wanted to. Unlike this tutorial from Tutorialspoint which gives an example of launching multiple instances of the same code with varying timing parameters, I was able to get two different blocks of code to run in their own instances
One block of code constantly adds 10 to a global variable (var).
Another block checks when 5 seconds elapses then prints var's value.
This demonstrates 2 different tasks executing and sharing data using Python multithreading.
See code below
import threading
import time
exitFlag = 0
var = 10
class myThread1 (threading.Thread):
def __init__(self, threadID, name, counter):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.counter = counter
def run(self):
#var counting block begins here
print "addemup starting"
global var
while (var < 100000):
if var > 90000:
var = 0
var = var + 10
class myThread2 (threading.Thread):
def __init__(self, threadID, name, counter):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.counter = counter
def run(self):
#time checking block begins here and prints var every 5 secs
print "checkem starting"
global var
start = time.time()
elapsed = time.time() - start
while (elapsed < 10):
elapsed = time.time() - start
if elapsed > 5:
print "var = ", var
start = time.time()
elapsed = time.time() - start
# Create new threads
thread1 = myThread1(1, "Thread-1", 1)
thread2 = myThread2(2, "Thread-2", 2)
# Start new Threads
thread1.start()
thread2.start()
print "Exiting Main Thread"
My next task will be breaking up my twitter streaming in to its own thread, and passing direct messages received as variables to a task queueing program, while hopefully the first thread continues to listen for more direct messages.

How to use aiopg pool in multi-threaded application?

I have a python 3.4.3, postgreSQL 9.4, aiopg-0.7.0. An example of multi-threaded applications, was taken from this site. How to use the pool? The thread hangs when the operation of the select.
import time
import asyncio
import aiopg
import functools
from threading import Thread, current_thread, Event
from concurrent.futures import Future
class B(Thread):
def __init__(self, start_event):
Thread.__init__(self)
self.loop = None
self.tid = None
self.event = start_event
def run(self):
self.loop = asyncio.new_event_loop()
asyncio.set_event_loop(self.loop)
self.tid = current_thread()
self.loop.call_soon(self.event.set)
self.loop.run_forever()
def stop(self):
self.loop.call_soon_threadsafe(self.loop.stop)
def add_task(self, coro):
"""this method should return a task object, that I
can cancel, not a handle"""
def _async_add(func, fut):
try:
ret = func()
fut.set_result(ret)
except Exception as e:
fut.set_exception(e)
f = functools.partial(asyncio.async, coro, loop=self.loop)
if current_thread() == self.tid:
return f() # We can call directly if we're not going between threads.
else:
# We're in a non-event loop thread so we use a Future
# to get the task from the event loop thread once
# it's ready.
fut = Future()
self.loop.call_soon_threadsafe(_async_add, f, fut)
return fut.result()
def cancel_task(self, task):
self.loop.call_soon_threadsafe(task.cancel)
#asyncio.coroutine
def test(pool, name_task):
while True:
print(name_task, 'running')
with (yield from pool.cursor()) as cur:
print(name_task, " select. ")
yield from cur.execute("SELECT count(*) FROM test")
count = yield from cur.fetchone()
print(name_task, ' Result: ', count)
yield from asyncio.sleep(3)
#asyncio.coroutine
def connect_db():
dsn = 'dbname=%s user=%s password=%s host=%s' % ('testdb', 'user', 'passw', '127.0.0.1')
pool = yield from aiopg.create_pool(dsn)
print('create pool type =', type(pool))
# future.set_result(pool)
return (pool)
event = Event()
b = B(event)
b.start()
event.wait() # Let the loop's thread signal us, rather than sleeping
loop_db = asyncio.get_event_loop()
pool = loop_db.run_until_complete(connect_db())
time.sleep(2)
t = b.add_task(test(pool, 'Task1')) # This is a real task
t = b.add_task(test(pool, 'Task2'))
while True:
time.sleep(10)
b.stop()
Not return result in 'yield from cur.execute("SELECT count(*) FROM test")'
Long story short: you cannot share aiopg pool object from different event loops.
Every aiopg.Pool is coupled to event loop. If you don't specify loop parameter explicitly it is taken from asyncio.get_event_loop() call.
So it your example you have a pool coupled to event loop from main thread.
When you execute db query from separate thread you trying to accomplish it by executing thread's loop, not the main one. It doesn't work.

Resources