How to use aiopg pool in multi-threaded application? - python-3.x

I have a python 3.4.3, postgreSQL 9.4, aiopg-0.7.0. An example of multi-threaded applications, was taken from this site. How to use the pool? The thread hangs when the operation of the select.
import time
import asyncio
import aiopg
import functools
from threading import Thread, current_thread, Event
from concurrent.futures import Future
class B(Thread):
def __init__(self, start_event):
Thread.__init__(self)
self.loop = None
self.tid = None
self.event = start_event
def run(self):
self.loop = asyncio.new_event_loop()
asyncio.set_event_loop(self.loop)
self.tid = current_thread()
self.loop.call_soon(self.event.set)
self.loop.run_forever()
def stop(self):
self.loop.call_soon_threadsafe(self.loop.stop)
def add_task(self, coro):
"""this method should return a task object, that I
can cancel, not a handle"""
def _async_add(func, fut):
try:
ret = func()
fut.set_result(ret)
except Exception as e:
fut.set_exception(e)
f = functools.partial(asyncio.async, coro, loop=self.loop)
if current_thread() == self.tid:
return f() # We can call directly if we're not going between threads.
else:
# We're in a non-event loop thread so we use a Future
# to get the task from the event loop thread once
# it's ready.
fut = Future()
self.loop.call_soon_threadsafe(_async_add, f, fut)
return fut.result()
def cancel_task(self, task):
self.loop.call_soon_threadsafe(task.cancel)
#asyncio.coroutine
def test(pool, name_task):
while True:
print(name_task, 'running')
with (yield from pool.cursor()) as cur:
print(name_task, " select. ")
yield from cur.execute("SELECT count(*) FROM test")
count = yield from cur.fetchone()
print(name_task, ' Result: ', count)
yield from asyncio.sleep(3)
#asyncio.coroutine
def connect_db():
dsn = 'dbname=%s user=%s password=%s host=%s' % ('testdb', 'user', 'passw', '127.0.0.1')
pool = yield from aiopg.create_pool(dsn)
print('create pool type =', type(pool))
# future.set_result(pool)
return (pool)
event = Event()
b = B(event)
b.start()
event.wait() # Let the loop's thread signal us, rather than sleeping
loop_db = asyncio.get_event_loop()
pool = loop_db.run_until_complete(connect_db())
time.sleep(2)
t = b.add_task(test(pool, 'Task1')) # This is a real task
t = b.add_task(test(pool, 'Task2'))
while True:
time.sleep(10)
b.stop()
Not return result in 'yield from cur.execute("SELECT count(*) FROM test")'

Long story short: you cannot share aiopg pool object from different event loops.
Every aiopg.Pool is coupled to event loop. If you don't specify loop parameter explicitly it is taken from asyncio.get_event_loop() call.
So it your example you have a pool coupled to event loop from main thread.
When you execute db query from separate thread you trying to accomplish it by executing thread's loop, not the main one. It doesn't work.

Related

Multiprocessing pool executing synchronous

I need an asynchronous parent process to handover function calls to a process pool.
The imports are to time consuming to spawn a new worker/process every time. So I thought to put tasks in an asyncio.queue and have a consumer listen to it and hand them off to the workers. (Sort of like how Gunicorn works but I don't want to run a webserver in order to make the calls).
However the function call seems to only be executed if I call res.get() on the response of pool.apply_async() but then it just runs as if I would call a normal synchronous for-loop.
This is my code:
#!/usr/bin/env python
import os
import time
import multiprocessing as mp
import asyncio
def f(x: list) -> int:
print(f'the pid of this process is: {os.getpid()}')
time.sleep(1)
return len(x)
def callback_func(x):
print(f'this is the callback function')
print(x)
async def consumer(queue):
with mp.Pool(processes=4) as pool:
while True:
x = await queue.get()
if x == 'stop':
break
# this makes it seem to run synchronous:
res = pool.apply_async(f, (x,))
print(res.get(), x, os.getpid())
# if I run this instead, both f() and callback_func
# are not executed.
#res = pool.apply_async(f, (x,), callback_func)
#print(x, os.getpid())
queue.task_done()
print(f'consumed')
async def producer(queue):
for i in range(20):
await queue.put([i,i+1,i+2])
# await asyncio.sleep(0.5)
await queue.put('stop')
async def main():
queue = asyncio.Queue()
input_coroutines = [consumer(queue), producer(queue)]
for f in asyncio.as_completed(input_coroutines):
try:
result = await f
print(result)
except Exception as e:
print('caught exception')
print(e)
if __name__ == "__main__":
asyncio.run(main())
What am I doing wrong?

Getting returning value from multithreading in python 3

I'm trying to get one or several returning values from a thread in a multithreading process. The code I show get cycled with no way to interrupt it with Ctrl-C, Ctrl+D.
import queue as Queue
import threading
class myThread (threading.Thread):
def __init__(self, threadID, name, region):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.region = region
def run(self):
GetSales(self.region)
def GetSales(strReg):
print("Thread-" + strReg)
return "Returning-" + strReg
def Main():
RegionList = []
RegionList.append("EMEA")
RegionList.append("AP")
RegionList.append("AM")
# Create threads
threads = []
x = 0
for region in RegionList:
x += 1
rthread = myThread(x, "Thread-" + region, region) # Create new thread
rthread.start() # Start new thread
threads.append(rthread) # Add new thread to threads list
que = Queue.Queue()
# Wait for all threads to complete
for t in threads:
t.join()
result = que.get()
print(t.name + " -> Done")
Main()
If I comment line "result = que.get()" the program runs with no issues.
What you are looking for is future and async management.
Firstly, your program loop indefinitely because of the line que.get(), because there is nothing in the queue, it wait that something happen, which will never happen. You don't use it.
What you want to do is an async task and get the result :
import asyncio
async def yourExpensiveTask():
// some long calculation
return 42
async main():
tasks = []
tasks += [asyncio.create_task(yourExpensiveTask())]
tasks += [asyncio.create_task(yourExpensiveTask())]
for task in tasks:
result = await task
print(result)
See also https://docs.python.org/3/library/asyncio-task.html

Sharing asyncio.Queue with another thread or process

I've recently converted my old template matching program to asyncio and I have a situation where one of my coroutines relies on a blocking method (processing_frame).
I want to run that method in a seperate thread or process whenever the coroutine that calls that method (analyze_frame) gets an item from the shared asyncio.Queue()
I'm not sure if that's possible or worth it performance wise since I have very little experience with threading and multiprocessing
import cv2
import datetime
import argparse
import os
import asyncio
# Making CLI
if not os.path.exists("frames"):
os.makedirs("frames")
t0 = datetime.datetime.now()
ap = argparse.ArgumentParser()
ap.add_argument("-v", "--video", required=True,
help="path to our file")
args = vars(ap.parse_args())
threshold = .2
death_count = 0
was_found = False
template = cv2.imread('youdied.png')
vidcap = cv2.VideoCapture(args["video"])
loop = asyncio.get_event_loop()
frames_to_analyze = asyncio.Queue()
def main():
length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
tasks = []
for _ in range(int(length / 50)):
tasks.append(loop.create_task(read_frame(50, frames_to_analyze)))
tasks.append(loop.create_task(analyze_frame(threshold, template, frames_to_analyze)))
final_task = asyncio.gather(*tasks)
loop.run_until_complete(final_task)
dt = datetime.datetime.now() - t0
print("App exiting, total time: {:,.2f} sec.".format(dt.total_seconds()))
print(f"Deaths registered: {death_count}")
async def read_frame(frames, frames_to_analyze):
global vidcap
for _ in range(frames-1):
vidcap.grab()
else:
current_frame = vidcap.read()[1]
print("Read 50 frames")
await frames_to_analyze.put(current_frame)
async def analyze_frame(threshold, template, frames_to_analyze):
global vidcap
global was_found
global death_count
frame = await frames_to_analyze.get()
is_found = processing_frame(frame)
if was_found and not is_found:
death_count += 1
await writing_to_file(death_count, frame)
was_found = is_found
def processing_frame(frame):
res = cv2.matchTemplate(frame, template, cv2.TM_CCOEFF_NORMED)
max_val = cv2.minMaxLoc(res)[1]
is_found = max_val >= threshold
print(is_found)
return is_found
async def writing_to_file(death_count, frame):
cv2.imwrite(f"frames/frame{death_count}.jpg", frame)
if __name__ == '__main__':
main()
I've tried using unsync but without much success
I would get something along the lines of
with self._rlock:
PermissionError: [WinError 5] Access is denied
If processing_frame is a blocking function, you should call it with await loop.run_in_executor(None, processing_frame, frame). That will submit the function to a thread pool and allow the event loop to proceed with doing other things until the call function completes.
The same goes for calls such as cv2.imwrite. As written, writing_to_file is not truly asynchronous, despite being defined with async def. This is because it doesn't await anything, so once its execution starts, it will proceed to the end without ever suspending. In that case one could as well make it a normal function in the first place, to make it obvious what's going on.

Coroutine to mimic a OS's scheduler

I am following the :A Curious Course on Coroutines and Concurrency to learn coroutine, encounter problem to get the following codes running:
The code mimic an operating system to schedule tasks
from queue import Quue
class Task:
taskid = 0
def __init__(self, target):
Task.taskid += 1 #count the task
self.tid = Task.taskid
self.tartet = target
self.sendval = None
def run(self):
return self.target.send(self.sendval)
class Scheduler:
def __init__(self):
self.ready = Queue() # a queue of tasks that are ready to run.
self.taskmap = {} #dictionary that keeps track of all active tasks (each task has a unique integer task ID)
def new(self, target): #introduce a new task to the scheduler
newtask = Task(target)
self.taskmap[newtask.tid] = newtask
def schedule(self, task):
self.ready.put(task)
def mainloop(self):
while self.taskmap: #I think the problem is here
task = self.ready.get() #I think it should be while self.ready
result = task.run()
self.schedule(task)
Test it with
def foo():
while True:
print("I'm foo")
yield
def bar():
while True:
print("I'm bar")
yield
It pending instead of return value
In [85]: schedule.new(foo())
In [86]: schedule.new(bar())
In [87]: schedule.mainloop()
^C---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
I review the codes and find problem with
def mainloop(self):
while self.taskmap: #I think the problem is here
task = self.ready.get() #I think it should be while self.ready
result = task.run()
self.schedule(task)
while self.taskmap, but there is no methods to remove elements, so it is an infinite loop
I changed it to
def mainloop(self):
while self.taskmap: #I think the problem is here
task = self.ready.get() #I think it should be while self.ready
result = task.run()
self.schedule(task)
However, it still not work.
What's the problem with my code.

tornado server is incompatible with threading module

I'm using tornado with threads.
In short, each time the websocket handler receives a requests, it start to execute a task, which might take a few minutes.
However, once a client is connected, no other client can be connected, until the first one disconnects.
Any ideas?
I've attached a minimal example that uses time.sleep to simulate long running tasks.
import tornado.web
import tornado.websocket
import tornado.httpserver
import tornado.ioloop
import time
import json
import threading
class TaskHandler(tornado.websocket.WebSocketHandler):
def open(self):
pass
def check_origin(self, origin):
return True
def on_message(self, message):
try:
print 'received: ', message
self.write_message(json.dumps({'status': 'running'}))
def worker_A(kwargs):
time.sleep(100)
pass
def worker_B(kwargs):
time.sleep(100)
pass
threads = []
for target in [worker_A, worker_B]:
t = threading.Thread(target = target, args = ({'xxx': 'yyy'}, ))
t.daemon = True
t.start()
threads.append(t)
for t in threads:
t.join()
except Exception, e:
print 'TaskHandler: exception: ', e
pass
self.write_message(json.dumps({'status': 'done'}))
def on_close(self):
pass
class Server(tornado.web.Application):
def __init__(self):
handlers = [
('/task', TaskHandler),
]
tornado.web.Application.__init__(self, handlers)
if __name__ == '__main__':
server = tornado.httpserver.HTTPServer(Server())
server.listen(8765, address = '127.0.0.1')
tornado.ioloop.IOLoop.instance().start()
You block the whole Tornado event loop for 100 seconds in t.join. Unless you have a yield statement or schedule a callback and exit a function, then your function is not asynchronous. Notice how your function "on_message" begins two threads and then calls t.join on each -- how can Tornado's event loop accomplish any other work while your function is waiting for t.join?
Instead, use a ThreadPoolExecutor something like this:
thread_pool = ThreadPoolExecutor(4)
class TaskHandler(tornado.websocket.WebSocketHandler):
# Make this an asynchronous coroutine
#gen.coroutine
def on_message_coroutine(self, message):
print 'received: ', message
self.write_message(json.dumps({'status': 'running'}))
def worker_A(kwargs):
time.sleep(100)
pass
def worker_B(kwargs):
time.sleep(100)
pass
futures = []
for target in [worker_A, worker_B]:
f = thread_pool.submit(target, {'xxx': 'yyy'})
futures.append(future)
# Now the event loop can do other things
yield futures
def on_message(self, message):
IOLoop.current().spawn_callback(self.on_message_coroutine,
message)

Resources