Python multiprocessing with async functions - python-3.x

I built a websocket server, a simplified version of it is shown below:
import websockets, subprocess, asyncio, json, re, os, sys
from multiprocessing import Process
def docker_command(command_words):
return subprocess.Popen(
["docker"] + command_words,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
async def check_submission(websocket:object, submission:dict):
exercise=submission["exercise"]
with docker_command(["exec", "-w", "badkan", "grade_exercise", exercise]) as proc:
for line in proc.stdout:
print("> " + line)
await websocket.send(line)
async def run(websocket, path):
submission_json = await websocket.recv() # returns a string
submission = json.loads(submission_json) # converts the string to a python dict
####
await check_submission(websocket, submission)
websocketserver = websockets.server.serve(run, '0.0.0.0', 8888, origins=None)
asyncio.get_event_loop().run_until_complete(websocketserver)
asyncio.get_event_loop().run_forever()
It works fine when there is only a single user at a time. But, when several users try to use the server, the server processes them serially so later users have to wait a long time.
I tried to convert it to a multiprocessing server by replacing the line marked with "####" ("await check_submission...") with:
p = Process(target=check_submission, args=(websocket, submission,))
p.start()
But, it did not work - I got a Runtime Warning: "coroutine: 'check_submission' was never awaited", and I did not see any output coming through the websocket.
I also tried to replace these lines with:
loop = asyncio.get_event_loop()
loop.set_default_executor(ProcessPoolExecutor())
await loop.run_in_executor(None, check_submission, websocket, submission)
but got a different error: "can't pickle asyncio.Future objects".
How can I build this multi-processing websocket server?

this is my example, asyncio.run() worked for me, use multi process start an async function
class FlowConsumer(Base):
def __init__(self):
pass
async def run(self):
self.logger("start consumer process")
while True:
# get flow from queue
flow = {}
# call flow executor get result
executor = FlowExecutor(flow)
rtn = FlowResult()
try:
rtn = await executor.run()
except Exception as e:
self.logger("flow run except:{}".format(traceback.format_exc()))
rtn.status = FLOW_EXCEPT
rtn.msg = str(e)
self.logger("consumer flow finish,result:{}".format(rtn.dict()))
time.sleep(1)
def process(self):
asyncio.run(self.run())
processes = []
consumer_proc_count = 3
# start multi consumer processes
for _ in range(consumer_proc_count):
# old version
# p = Process(target=FlowConsumer().run)
p = Process(target=FlowConsumer().process)
p.start()
processes.append(p)
for p in processes:
p.join()

The problem is that subprocess.Popen is not async, so check_submission blocks the event loop while waiting for the next line of docker output.
You don't need to use multiprocessing at all; since you are blocking while waiting on a subprocess, you just need to switch from subprocess to asyncio.subprocess:
async def docker_command(command_words):
return await asyncio.subprocess.create_subprocess_exec(
*["docker"] + command_words,
stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.STDOUT)
async def check_submission(websocket:object, submission:dict):
exercise = submission["exercise"]
proc = await docker_command(["exec", "-w", "badkan", "grade_exercise", exercise])
async for line in proc.stdout:
print(b"> " + line)
await websocket.send(line)
await proc.wait()

Related

Multiprocessing pool executing synchronous

I need an asynchronous parent process to handover function calls to a process pool.
The imports are to time consuming to spawn a new worker/process every time. So I thought to put tasks in an asyncio.queue and have a consumer listen to it and hand them off to the workers. (Sort of like how Gunicorn works but I don't want to run a webserver in order to make the calls).
However the function call seems to only be executed if I call res.get() on the response of pool.apply_async() but then it just runs as if I would call a normal synchronous for-loop.
This is my code:
#!/usr/bin/env python
import os
import time
import multiprocessing as mp
import asyncio
def f(x: list) -> int:
print(f'the pid of this process is: {os.getpid()}')
time.sleep(1)
return len(x)
def callback_func(x):
print(f'this is the callback function')
print(x)
async def consumer(queue):
with mp.Pool(processes=4) as pool:
while True:
x = await queue.get()
if x == 'stop':
break
# this makes it seem to run synchronous:
res = pool.apply_async(f, (x,))
print(res.get(), x, os.getpid())
# if I run this instead, both f() and callback_func
# are not executed.
#res = pool.apply_async(f, (x,), callback_func)
#print(x, os.getpid())
queue.task_done()
print(f'consumed')
async def producer(queue):
for i in range(20):
await queue.put([i,i+1,i+2])
# await asyncio.sleep(0.5)
await queue.put('stop')
async def main():
queue = asyncio.Queue()
input_coroutines = [consumer(queue), producer(queue)]
for f in asyncio.as_completed(input_coroutines):
try:
result = await f
print(result)
except Exception as e:
print('caught exception')
print(e)
if __name__ == "__main__":
asyncio.run(main())
What am I doing wrong?

Using asyncio to wait for results from subprocess

My Python script contains a loop that uses subprocess to run commands outside the script. Each subprocess is independent. I listen for the returned message in case there's an error; I can't ignore the result of the subprocess. Here's the script without asyncio (I've replaced my computationally expensive call with sleep):
from subprocess import PIPE # https://docs.python.org/3/library/subprocess.html
import subprocess
def go_do_something(index: int) -> None:
"""
This function takes a long time
Nothing is returned
Each instance is independent
"""
process = subprocess.run(["sleep","2"],stdout=PIPE,stderr=PIPE,timeout=20)
stdout = process.stdout.decode("utf-8")
stderr = process.stderr.decode("utf-8")
if "error" in stderr:
print("error for "+str(index))
return
def my_long_func(val: int) -> None:
"""
This function contains a loop
Each iteration of the loop calls a function
Nothing is returned
"""
for index in range(val):
print("index = "+str(index))
go_do_something(index)
# run the script
my_long_func(3) # launch three tasks
I think I could use asyncio to speed up this activity since the Python script is waiting on the external subprocess to complete. I think threading or multiprocessing are not necessary, though they could also result in faster execution. Using a task queue (e.g., Celery) is another option.
I tried implementing the asyncio approach, but am missing something since the following attempt doesn't change the overall execution time:
import asyncio
from subprocess import PIPE # https://docs.python.org/3/library/subprocess.html
import subprocess
async def go_do_something(index: int) -> None:
"""
This function takes a long time
Nothing is returned
Each instance is independent
"""
process = subprocess.run(["sleep","2"],stdout=PIPE,stderr=PIPE,timeout=20)
stdout = process.stdout.decode("utf-8")
stderr = process.stderr.decode("utf-8")
if "error" in stderr:
print("error for "+str(index))
return
def my_long_func(val: int) -> None:
"""
This function contains a loop
Each iteration of the loop calls a function
Nothing is returned
"""
# https://docs.python.org/3/library/asyncio-eventloop.html
loop = asyncio.get_event_loop()
tasks = []
for index in range(val):
task = go_do_something(index)
tasks.append(task)
# https://docs.python.org/3/library/asyncio-task.html
tasks = asyncio.gather(*tasks)
loop.run_until_complete(tasks)
loop.close()
return
my_long_func(3) # launch three tasks
If I want to monitor the output of each subprocess but not wait while each subprocess runs, can I benefit from asyncio? Or does this situation require something like multiprocessing or Celery?
Try executing the commands using asyncio instead of subprocess.
Define a run() function:
import asyncio
async def run(cmd: str):
proc = await asyncio.create_subprocess_shell(
cmd,
stderr=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE
)
stdout, stderr = await proc.communicate()
print(f'[{cmd!r} exited with {proc.returncode}]')
if stdout:
print(f'[stdout]\n{stdout.decode()}')
if stderr:
print(f'[stderr]\n{stderr.decode()}')
Then you may call it as you would call any async function:
asyncio.run(run('sleep 2'))
#=>
['sleep 2' exited with 0]
The example was taken from the official documentation. Also available here.
#ronginat pointed me to https://asyncio.readthedocs.io/en/latest/subprocess.html which I was able to adapt to the situation I am seeking:
import asyncio
async def run_command(*args):
# Create subprocess
process = await asyncio.create_subprocess_exec(
*args,
# stdout must a pipe to be accessible as process.stdout
stdout=asyncio.subprocess.PIPE)
# Wait for the subprocess to finish
stdout, stderr = await process.communicate()
# Return stdout
return stdout.decode().strip()
async def go_do_something(index: int) -> None:
print('index=',index)
res = await run_command('sleep','2')
return res
def my_long_func(val: int) -> None:
task_list = []
for indx in range(val):
task_list.append( go_do_something(indx) )
loop = asyncio.get_event_loop()
commands = asyncio.gather(*task_list)
reslt = loop.run_until_complete(commands)
print(reslt)
loop.close()
my_long_func(3) # launch three tasks
The total time of execution is just over 2 seconds even though there are three sleeps of duration 2 seconds. And I get the stdout from each subprocess.

websockets, asyncio and PyQt5 together at last. Is Quamash necessary?

I've been working on a client that uses PyQt5 and the websockets module which is built around asyncio. I thought that something like the code below would work but I'm finding that the incoming data (from the server) is not being updated in the GUI until I click enter in the line edit box. Those incoming messages are intended to set the pulse for the updates to the GUI and will carry data to be used for updating. Is quamash a better way to approach this? btw, I will be using processes for some other aspects of this code so I don't consider it overkill (at this point).
This is Python 3.6, PyQt5.6(or higher) and whatever version of websockets that currently installs with pip. https://github.com/aaugustin/websockets
The client:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import asyncio
import websockets
import sys
import time
from multiprocessing import Process, Pipe, Queue
from PyQt5 import QtCore, QtGui, QtWidgets
class ComBox(QtWidgets.QDialog):
def __init__(self):
QtWidgets.QDialog.__init__(self)
self.verticalLayout = QtWidgets.QVBoxLayout(self)
self.groupBox = QtWidgets.QGroupBox(self)
self.groupBox.setTitle( "messages from beyond" )
self.gridLayout = QtWidgets.QGridLayout(self.groupBox)
self.label = QtWidgets.QLabel(self.groupBox)
self.gridLayout.addWidget(self.label, 0, 0, 1, 1)
self.verticalLayout.addWidget(self.groupBox)
self.lineEdit = QtWidgets.QLineEdit(self)
self.verticalLayout.addWidget(self.lineEdit)
self.lineEdit.editingFinished.connect(self.enterPress)
#QtCore.pyqtSlot()
def enterPress(self):
mytext = str(self.lineEdit.text())
self.inputqueue.put(mytext)
#QtCore.pyqtSlot(str)
def updategui(self, message):
self.label.setText(message)
class Websocky(QtCore.QThread):
updatemaingui = QtCore.pyqtSignal(str)
def __init__(self):
super(Websocky, self).__init__()
def run(self):
while True:
time.sleep(.1)
message = self.outputqueue.get()
try:
self.updatemaingui[str].emit(message)
except Exception as e1:
print("updatemaingui problem: {}".format(e1))
async def consumer_handler(websocket):
while True:
try:
message = await websocket.recv()
outputqueue.put(message)
except Exception as e1:
print(e1)
async def producer_handler(websocket):
while True:
message = inputqueue.get()
await websocket.send(message)
await asyncio.sleep(.1)
async def handler():
async with websockets.connect('ws://localhost:8765') as websocket:
consumer_task = asyncio.ensure_future(consumer_handler(websocket))
producer_task = asyncio.ensure_future(producer_handler(websocket))
done, pending = await asyncio.wait(
[consumer_task, producer_task],
return_when=asyncio.FIRST_COMPLETED, )
for task in pending:
task.cancel()
def start_websockets():
loop = asyncio.get_event_loop()
loop.run_until_complete(handler())
inputqueue = Queue()
outputqueue = Queue()
app = QtWidgets.QApplication(sys.argv)
comboxDialog = ComBox()
comboxDialog.inputqueue = inputqueue
comboxDialog.outputqueue = outputqueue
comboxDialog.show()
webster = Websocky()
webster.outputqueue = outputqueue
webster.updatemaingui[str].connect(comboxDialog.updategui)
webster.start()
p2 = Process(target=start_websockets)
p2.start()
sys.exit(app.exec_())
The server:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import asyncio
import time
import websockets
# here we'll store all active connections to use for sending periodic messages
connections = []
##asyncio.coroutine
async def connection_handler(connection, path):
connections.append(connection) # add connection to pool
while True:
msg = await connection.recv()
if msg is None: # connection lost
connections.remove(connection) # remove connection from pool, when client disconnects
break
else:
print('< {}'.format(msg))
##asyncio.coroutine
async def send_periodically():
while True:
await asyncio.sleep(2) # switch to other code and continue execution in 5 seconds
for connection in connections:
message = str(round(time.time()))
print('> Periodic event happened.')
await connection.send(message) # send message to each connected client
start_server = websockets.serve(connection_handler, 'localhost', 8765)
asyncio.get_event_loop().run_until_complete(start_server)
asyncio.ensure_future(send_periodically()) # before blocking call we schedule our coroutine for sending periodic messages
asyncio.get_event_loop().run_forever()
Shortly after posting this question I realized the problem. The line
message = inputqueue.get()
in the producer_handler function is blocking. This causes what should be an async function to hang everything in that process until it sees something in the queue. My workaround was to use the aioprocessing module which provides asyncio compatible queues. So, it looks more like this:
import aioprocessing
async def producer_handler(websocket):
while True:
message = await inputqueue.coro_get()
await websocket.send(message)
await asyncio.sleep(.1)
inputqueue = aioprocessing.AioQueue()
The aioprocessing module provides some nice options and documentation. And in this case is a rather simple solution for the issue. https://github.com/dano/aioprocessing
So, to answer my question: No, you don't have to use quamash for this kind of thing.

Pass asynchronous websocket.send() to stdout/stderr wrapper class

I have a class function which unbuffers stdout and stderr, like so:
class Unbuffered:
def __init__(self, stream):
self.stream = stream
def write(self, data):
data = data.strip()
if data.startswith("INFO: "):
data = data[6:]
if '[' in data:
progress = re.compile(r"\[(\d+)/(\d+)\]")
data = progress.match(data)
total = data.group(2)
current = data.group(1)
data = '{0}/{1}'.format(current, total)
if data.startswith("ERROR: "):
data = data[7:]
self.stream.write(data + '\n')
self.stream.flush()
def __getattr__(self, attr):
return getattr(self.stream, attr)
The output is from a function run in ProcessPoolExecutor when inbound from websocket arrives.
I want the output printed in console as well as sent to my websocket client. I tried asyncing Unbuffered and passing websocket instance to it but no luck.
UPDATE: The essentials of run() and my websocket handler() look something like this:
def run(url, path):
logging.addLevelName(25, "INFO")
fmt = logging.Formatter('%(levelname)s: %(message)s')
#----
output.progress_stream = Unbuffered(sys.stderr)
stream = Unbuffered(sys.stdout)
#----
level = logging.INFO
hdlr = logging.StreamHandler(stream)
hdlr.setFormatter(fmt)
log.addHandler(hdlr)
log.setLevel(level)
get_media(url, opt)
async def handler(websocket, path):
while True:
inbound = json.loads(await websocket.recv())
if inbound is None:
break
url = inbound['url']
if 'path' in inbound:
path = inbound['path'].rstrip(os.path.sep) + os.path.sep
else:
path = os.path.expanduser("~") + os.path.sep
# blah more code
while inbound != None:
await asyncio.sleep(.001)
await loop.run_in_executor(None, run, url, path)
run(), handler() and Unbuffered are separate from each other.
Rewriting get_media() to use asyncio instead of running it in a different thread would be the best. Otherwise, there are some options to communicate between a regular thread and coroutines, for example, using a socketpair:
import asyncio
import socket
import threading
import time
import random
# threads stuff
def producer(n, writer):
for i in range(10):
# print("sending", i)
writer.send("message #{}.{}\n".format(n, i).encode())
time.sleep(random.uniform(0.1, 1))
def go(writer):
threads = [threading.Thread(target=producer, args=(i + 1, writer,))
for i in range(5)]
for t in threads:
t.start()
for t in threads:
t.join()
writer.send("bye\n".encode())
# asyncio coroutines
async def clock():
for i in range(11):
print("The time is", i)
await asyncio.sleep(1)
async def main(reader):
buffer = ""
while True:
buffer += (await loop.sock_recv(reader, 10000)).decode()
# print(len(buffer))
while "\n" in buffer:
msg, _nl, buffer = buffer.partition("\n")
print("Got", msg)
if msg == "bye":
return
reader, writer = socket.socketpair()
reader.setblocking(False)
threading.Thread(target=go, args=(writer,)).start()
# time.sleep(1.5) # socket is buffering
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait([clock(), main(reader)]))
loop.close()
You can also try this 3rd-party thread+asyncio compatible queue: janus

Python asyncio - Loop exits with Task was destroyed but it is pending

This is the relevant code of my python program:
import discord
import asyncio
class Bot(discord.Client):
def __init__(self):
super().__init__()
#asyncio.coroutine
def my_background_task(self):
yield from self.wait_until_ready()
while not self.is_closed:
yield from asyncio.sleep(3600*24) # <- This is line 76 where it fails
doSomething()
bot = Bot()
loop = asyncio.get_event_loop()
try:
loop.create_task(bot.my_background_task())
loop.run_until_complete(bot.login('username', 'password'))
loop.run_until_complete(bot.connect())
except Exception:
loop.run_until_complete(bot.close())
finally:
loop.close()
The program occasionally quits (on its own, while it should not) with no other errors or warning other than
Task was destroyed but it is pending!
task: <Task pending coro=<my_background_task() running at bin/discordBot.py:76> wait_for=<Future pending cb=[Task._wakeup()]>>
How to ensure the program won't randomly quit? I have Python 3.4.3+ on Xubuntu 15.10.
This is because the discord client module needs control once every minute or so.
This means that any function that steals control for more than a certain time causes discord's client to enter an invalid state (which will manifest itself as an exception some point later, perhaps upon next method call of client).
To ensure that the discord module client can ping the discord server, you should use a true multi-threading solution.
One solution is to offload all heavy processing onto a separate process (a separate thread will not do, because Python has a global interpreter lock) and use the discord bot as a thin layer whose responsibility is to populate work queues.
Related reading:
https://discordpy.readthedocs.io/en/latest/faq.html#what-does-blocking-mean
Example solution... this is WAY beyond the scope of the problem, but I already had the code mostly written. If I had more time, I would write a shorter solution :)
2 parts, discord interaction and processing server:
This is the discord listener.
import discord
import re
import asyncio
import traceback
import websockets
import json
# Call a function on other server
async def call(methodName, *args, **kwargs):
async with websockets.connect('ws://localhost:9001/meow') as websocket:
payload = json.dumps( {"method":methodName, "args":args, "kwargs": kwargs})
await websocket.send(payload)
#...
resp = await websocket.recv()
#...
return resp
client = discord.Client()
tok = open("token.dat").read()
#client.event
async def on_ready():
print('Logged in as')
print(client.user.name)
print(client.user.id)
print('------')
#client.event
async def on_error(event, *args, **kwargs):
print("Error?")
#client.event
async def on_message(message):
try:
if message.author.id == client.user.id:
return
m = re.match("(\w+) for (\d+).*?", message.content)
if m:
g = m.groups(1)
methodName = g[0]
someNumber = int(g[1])
response = await call(methodName, someNumber)
if response:
await client.send_message(message.channel, response[0:2000])
except Exception as e:
print (e)
print (traceback.format_exc())
client.run(tok)
This is the worker server for processing heavy requests. You can make this part sync or async.
I chose to use some magic called a websocket to send data from one python process to another one. But you can use anything you want. You could make one script write files into a dir, and the other script could read the files out and process them, for example.
import tornado
import tornado.websocket
import tornado.httpserver
import json
import asyncio
import inspect
import time
class Handler:
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def consume(self, text):
return "You said {0} and I say hiya".format(text)
async def sweeps(self, len):
await asyncio.sleep(len)
return "Slept for {0} seconds asynchronously!".format(len)
def sleeps(self, len):
time.sleep(len)
return "Slept for {0} seconds synchronously!".format(len)
class MyService(Handler, tornado.websocket.WebSocketHandler):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def stop(self):
Handler.server.stop()
def open(self):
print("WebSocket opened")
def on_message(self, message):
print (message)
j = json.loads(message)
methodName = j["method"]
args = j.get("args", ())
method = getattr(self, methodName)
if inspect.iscoroutinefunction(method):
loop = asyncio.get_event_loop()
task = loop.create_task(method(*args))
task.add_done_callback( lambda res: self.write_message(res.result()))
future = asyncio.ensure_future(task)
elif method:
resp = method(*args)
self.write_message(resp)
def on_close(self):
print("WebSocket closed")
application = tornado.web.Application([
(r'/meow', MyService),
])
if __name__ == "__main__":
from tornado.platform.asyncio import AsyncIOMainLoop
AsyncIOMainLoop().install()
http_server = tornado.httpserver.HTTPServer(application)
Handler.server = http_server
http_server.listen(9001)
asyncio.get_event_loop().run_forever()
Now, if you run both processes in separate python scripts, and tell your bot "sleep for 100", it will sleep for 100 seconds happily!
The asyncio stuff functions as a make-shift work queue, and you can properly separate the listener from the backend processing by running them as separate python scripts.
Now, no matter how long your functions run in the 'server' part, the client part will never be prevented from pinging the discord server.
Image failed to upload, but... anyway, this is how to tell the bot to sleep and reply... note that the sleep is synchronous.
http://i.imgur.com/N4ZPPbB.png
I don't think problem happens while asyncio.sleep. Anyway you shouldn't suppress exception you got:
bot = Bot()
loop = asyncio.get_event_loop()
try:
# ...
except Exception as e:
loop.run_until_complete(bot.close())
raise e # <--- reraise exception you got while execution to see it (or log it here)
finally:
# ...
You have to manually stop your task on exit:
import discord
import asyncio
class Bot(discord.Client):
def __init__(self):
super().__init__()
#asyncio.coroutine
def my_background_task(self):
yield from self.wait_until_ready()
while not self.is_closed:
yield from asyncio.sleep(3600*24) # <- This is line 76 where it fails
doSomething()
bot = Bot()
loop = asyncio.get_event_loop()
try:
task = loop.create_task(bot.my_background_task())
loop.run_until_complete(bot.login('username', 'password'))
loop.run_until_complete(bot.connect())
except Exception:
loop.run_until_complete(bot.close())
finally:
task.cancel()
try:
loop.run_until_complete(task)
except Exception:
pass
loop.close()

Resources