I am trying out the Requests_Threads library and I've got it where it is returning data to me the way I want it to, but its giving me a Process finished with exit code 0 and exiting out before going forward. Anybody used this library and figured it out?
from requests_threads import AsyncSession
import asyncio
class apis:
def __init__(self):
self.session=AsyncSession()
self.main_out=self.session.run(self.main)
print('still alive')
async def main(self):
rs = []
for _ in range(100):
rs.append(await self.sub('thing'))
return[ (x[0].json(),x[1]) for x in rs]
async def sub(self,key):
return await self.session.get('http://httpbin.org/get'),key
So, I ended up with a solution using aiohttp
1000 gets using standard Requests took 2 minutes and 4 seconds, using aiohttp it took 56 seconds, or a reduction of a little over 54%.
import aiohttp
import asyncio
from datetime import datetime
class apis:
def __init__(self):
self.session=aiohttp.ClientSession()
self.loop=asyncio.get_event_loop()
self.main_out=self.loop.run_until_complete(self.main())
async def main(self):
rs = []
for _ in range(1000):
rs.append(await self.loop.create_task(self.sub('foo')))
return rs
async def sub(self,key):
return await self.session.get('http://httpbin.org/get'),key,datetime.now()
Related
I want use a parallel downloading videos from youtube, but my code ending with exception "PicklingError". Can you help guys with code, how it should be, please.
Another fixed variant:
import sys
#from pathos.multiprocessing import ProcessingPool as Pool
from multiprocessing import Pool
from pytube import YouTube
from youtubeMultiDownloader import UiMainWindow
from PyQt5 import QtCore, QtGui, QtWidgets
from PyQt5.QtWidgets import QFileDialog
class YouTubeInstance:
def __init__(self, path):
self.youtube = YouTube
self.path = path
#self.ui_obj = ui_obj
def download_file(self, url):
self.youtube(url).streams.get_highest_resolution().download(self.path)
#self.ui.ui.youtube_outputs.setText(f'Video \'{self.youtube.title}\' has been downloaded successfully!')
class YouTubeMultiDownloader(QtWidgets.QMainWindow):
def __init__(self):
super().__init__()
self.pool = Pool
self.ui = UiMainWindow()
self.ui.setup_ui(self)
self.path_to_dir = None
self.urls = None
def _get_urls_from_form(self):
self.urls = self.ui.youtube_urls.toPlainText().split('\n')
return len(self.urls)
def choose_directory(self):
self.path_to_dir = str(QFileDialog.getExistingDirectory(self, "Select Directory"))
def run_multi_downloads(self):
youtube = YouTubeInstance(self.path_to_dir)
self.pool(self._get_urls_from_form()).map(youtube.download_file, self.urls)
if __name__ == "__main__":
app = QtWidgets.QApplication([])
application = YouTubeMultiDownloader()
application.show()
sys.exit(app.exec_())
Updated:
My ui :)
Error 1 fixed:
Error 2 fixed:
Error 3 actual:
You've got the wrong side of the stick. Take a look at multiprocessing module documents. As it says, calling Pool method is for running multiple instance of same function simultaneously (in parallel). So call Pool method as many numbers you want, meanwhile your method does not any parameters, call it without any arguments:
with Pool(5) as p:
print(p.map(YouTubeMultiDownloader))
It create 5 parallel instance. You can change the code an refine your errors.
I am just getting started with asyncio in python. What I am trying to do is below :
A Websocket connects to a data provider and keeps listening for new data (Run forever )
Parallelly work on this data and maybe save the data to a file. Make buy /sell decisions(Any parellel operation is okay)
Trying to use asyncio to achieve this (Will threads be better? Although threads seem more complicated that asyncio )
I am Using jupyter notebook( So Event loop is already created, maybe that's the problem ?)
Code 1 works , but this blocks my event loop and only keeps printing the data. All other code is blocked. Since its always busy in the web socket I guess.
import ssl
import websocket
import json
from IPython.display import display, clear_output
from threading import Thread
def on_message(ws, message):
resp1 = json.loads(message)
clear_output(wait=True)
print(resp1['events'][0]['price'])
def run():
ws = websocket.WebSocketApp("wss://api.gemini.com/v1/marketdata/BTCUSD",on_message=on_message)
ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
ws_run = Thread(target=run)
ws_run.start()
print("ok") # this prints once, but then it gets blocked by the websocket code.
I tried code 2 :
But this hangs forever and doesn't do anything.
import asyncio
import ssl
import websocket
import json
from IPython.display import display, clear_output
async def on_message(ws, message):
resp1 = json.loads(message)
clear_output(wait=True)
#print(resp1,sort_keys=True, indent=4)
#print(resp1['events'][0]['side'])
print(resp1['events'][0]['price'])
async def count2(x):
x=0
for i in range(5):
await asyncio.sleep(0.01)
print('second' , x)
y=x+10
return y
async def main():
await asyncio.gather(count(x), on_message(ws, message))
if __name__ == "__main__":
import time
ws = websocket.WebSocketApp("wss://api.gemini.com/v1/marketdata/BTCUSD",on_message=on_message)
asyncio.get_event_loop().run_forever(ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE}))
s = time.perf_counter()
await main()
elapsed = time.perf_counter() - s
print(f" executed in {elapsed:0.2f} seconds.")
Tried this variant in main(), still no response. :
if __name__ == "__main__":
import time
ws = websocket.WebSocketApp("wss://api.gemini.com/v1/marketdata/BTCUSD",on_message=on_message)
ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
s = time.perf_counter()
await main()
elapsed = time.perf_counter() - s
print(f" executed in {elapsed:0.2f} seconds.")
Update: I got this to work ,but I dont know if this is the right way, without using run_forever():
import ssl
import websocket
import asyncio
import time
async def MySock():
while True:
print(ws.recv())
await asyncio.sleep(0.5)
async def MyPrint():
while True:
print("-------------------------------------------")
await asyncio.sleep(0.5)
async def main():
await asyncio.gather(MySock(),MyPrint())
if __name__ == "__main__":
ws = websocket.WebSocket()
ws.connect("wss://api.gemini.com/v1/marketdata/btcusd?top_of_book=true&offers=true")
s = time.perf_counter()
await main()
elapsed = time.perf_counter() - s
print(f" executed in {elapsed:0.2f} seconds.")
This question already has answers here:
Running an async background task in Tornado
(5 answers)
Closed 3 years ago.
I'm writing a service which consume a disk space and have to clean it from time to time. Before this moment cleaning was performed in request handling. Unfortunately, in case when there is a big disk space have to be cleaned, it consume a lot of time and service hangs. I tried to rewrite cleaning procedure as tornado's future but confused in way of making a future(or something like this) from generator. There is a simplification of my code below:
import tornado
import tornado.ioloop
import tornado.web
from tornado.process import Subprocess
#from tornado.stack_context import run_in_stack_context, NullContext
from time import time
import random
import json
import asyncio
class meta_doc(type):
def __init__(cls, name, bases, methods):
super().__init__(name, bases, methods)
cls.storage_size=0
class Documentation_parsing(tornado.web.RequestHandler, metaclass=meta_doc):
max_storage_size=200
optimal_storage_size=100
cleaning=False
#classmethod
#tornado.gen.coroutine
def _collect_old_folders(cls):
print('start cleaning')
for d in subfolders:
if cls.storage_size<cls.optimal_storage_size:
break
delta=random.randint(5, 15)
time.sleep(random.uniform(0.5, 3))
cls.storage_size-=delta
print('Folder have been deleted. Folder size:', cls.storage_size)
yield None
cls.cleaning=False
print('finish cleaning')
#classmethod
def collect_old_folders(cls):
if not cls.cleaning:
cls.cleaning=True
tornado.ioloop.IOLoop.current().add_future(cls._collect_old_folders, lambda f: f.result())
#tornado.gen.coroutine
def post(self):
request_id=self.get_body_argument("request_id", default='')
self.__class__.storage_size+=random.randint(5, 15)
if self.storage_size>self.max_storage_size:
self.collect_old_folders()
self.write(json.dumps({'request_id': request_id, 'storage_size': self.storage_size}))
print('process request: request_id {0}, storage size {1}'.format(request_id, self.storage_size))
ApplicationSuffixes=[(r'/main_parsing.*', Documentation_parsing)]
if __name__=='__main__':
app = tornado.web.Application(ApplicationSuffixes)
app.listen(9999)
tornado.ioloop.IOLoop.current().start()
This code return future assertation error. I looked for another decorator which make a future from generator or corutine, but didn't find. Please help me with this problem.
UPD. I have already tried solution from mentioned question but receive "Cannot import run_in_stack_context". That's why such import is commented in my code
Solve it:
import tornado
import tornado.ioloop
import tornado.web
from tornado.process import Subprocess
#from tornado.stack_context import run_in_stack_context, NullContext
import time
import random
import json
import asyncio
class meta_doc(type):
def __init__(cls, name, bases, methods):
super().__init__(name, bases, methods)
cls.storage_size=0
class Documentation_parsing(tornado.web.RequestHandler, metaclass=meta_doc):
max_storage_size=200
optimal_storage_size=100
cleaning=False
#classmethod
#tornado.gen.coroutine
def _collect_old_folders(cls):
print('start cleaning')
while True:
if cls.storage_size<cls.optimal_storage_size:
break
delta=random.randint(5, 15)
time.sleep(random.uniform(0.5, 3))
cls.storage_size-=delta
print('Folder have been deleted. Folder size:', cls.storage_size)
yield None
cls.cleaning=False
print('finish cleaning')
#classmethod
def collect_old_folders(cls):
def func(inp):
print('called', inp)
if not cls.cleaning:
cls.cleaning=True
future=cls._collect_old_folders()
print(type(future))
tornado.ioloop.IOLoop.current().add_future(future, func)
#tornado.gen.coroutine
def post(self):
request_id=self.get_body_argument("request_id", default='')
self.__class__.storage_size+=random.randint(5, 15)
if self.storage_size>self.max_storage_size:
self.collect_old_folders()
self.write(json.dumps({'request_id': request_id, 'storage_size': self.storage_size}))
print('process request: request_id {0}, storage size {1}'.format(request_id, self.storage_size))
ApplicationSuffixes=[(r'/main_parsing.*', Documentation_parsing)]
if __name__=='__main__':
app = tornado.web.Application(ApplicationSuffixes)
app.listen(8999)
tornado.ioloop.IOLoop.current().start()
Comments: decorator tornado.gen.coroutine return a function which return a future. So I just need to call method to get future. In 5.0.2 version of Tornado I can add a future directly to IOLoop. The only think is that apart of future I have to pass a function as a second parameter. The function receive Exceptions or result of performance of future.
I'm using tornado with threads.
In short, each time the websocket handler receives a requests, it start to execute a task, which might take a few minutes.
However, once a client is connected, no other client can be connected, until the first one disconnects.
Any ideas?
I've attached a minimal example that uses time.sleep to simulate long running tasks.
import tornado.web
import tornado.websocket
import tornado.httpserver
import tornado.ioloop
import time
import json
import threading
class TaskHandler(tornado.websocket.WebSocketHandler):
def open(self):
pass
def check_origin(self, origin):
return True
def on_message(self, message):
try:
print 'received: ', message
self.write_message(json.dumps({'status': 'running'}))
def worker_A(kwargs):
time.sleep(100)
pass
def worker_B(kwargs):
time.sleep(100)
pass
threads = []
for target in [worker_A, worker_B]:
t = threading.Thread(target = target, args = ({'xxx': 'yyy'}, ))
t.daemon = True
t.start()
threads.append(t)
for t in threads:
t.join()
except Exception, e:
print 'TaskHandler: exception: ', e
pass
self.write_message(json.dumps({'status': 'done'}))
def on_close(self):
pass
class Server(tornado.web.Application):
def __init__(self):
handlers = [
('/task', TaskHandler),
]
tornado.web.Application.__init__(self, handlers)
if __name__ == '__main__':
server = tornado.httpserver.HTTPServer(Server())
server.listen(8765, address = '127.0.0.1')
tornado.ioloop.IOLoop.instance().start()
You block the whole Tornado event loop for 100 seconds in t.join. Unless you have a yield statement or schedule a callback and exit a function, then your function is not asynchronous. Notice how your function "on_message" begins two threads and then calls t.join on each -- how can Tornado's event loop accomplish any other work while your function is waiting for t.join?
Instead, use a ThreadPoolExecutor something like this:
thread_pool = ThreadPoolExecutor(4)
class TaskHandler(tornado.websocket.WebSocketHandler):
# Make this an asynchronous coroutine
#gen.coroutine
def on_message_coroutine(self, message):
print 'received: ', message
self.write_message(json.dumps({'status': 'running'}))
def worker_A(kwargs):
time.sleep(100)
pass
def worker_B(kwargs):
time.sleep(100)
pass
futures = []
for target in [worker_A, worker_B]:
f = thread_pool.submit(target, {'xxx': 'yyy'})
futures.append(future)
# Now the event loop can do other things
yield futures
def on_message(self, message):
IOLoop.current().spawn_callback(self.on_message_coroutine,
message)
After a lot of investigating, I found out that after serving hundreds of thousands of HTTP POST requests, there's a memory leak. The strange part is that the memory leak only occurs when using PyPy.
Here's an example code:
from twisted.internet import reactor
import tornado.ioloop
do_tornado = False
port = 8888
if do_tornado:
from tornado.web import RequestHandler, Application
else:
from cyclone.web import RequestHandler, Application
class MainHandler(RequestHandler):
def get(self):
self.write("Hello, world")
def post(self):
self.write("Hello, world")
if __name__ == "__main__":
routes = [(r"/", MainHandler)]
application = Application(routes)
print port
if do_tornado:
application.listen(port)
tornado.ioloop.IOLoop.instance().start()
else:
reactor.listenTCP(port, application)
reactor.run()
Here is the test code I am using to generate requests:
from twisted.internet import reactor, defer
from twisted.internet.task import LoopingCall
from twisted.web.client import Agent, HTTPConnectionPool
from twisted.web.iweb import IBodyProducer
from zope.interface import implements
pool = HTTPConnectionPool(reactor, persistent=True)
pool.retryAutomatically = False
pool.maxPersistentPerHost = 10
agent = Agent(reactor, pool=pool)
bid_url = 'http://localhost:8888'
class StringProducer(object):
implements(IBodyProducer)
def __init__(self, body):
self.body = body
self.length = len(body)
def startProducing(self, consumer):
consumer.write(self.body)
return defer.succeed(None)
def pauseProducing(self):
pass
def stopProducing(self):
pass
def callback(a):
pass
def error_callback(error):
pass
def loop():
d = agent.request('POST', bid_url, None, StringProducer("Hello, world"))
#d = agent.request('GET', bid_url)
d.addCallback(callback).addErrback(error_callback)
def main():
exchange = LoopingCall(loop)
exchange.start(0.02)
#log.startLogging(sys.stdout)
reactor.run()
main()
Note that this code does not leak with CPython nor with Tornado and Pypy! The code leaks only when using Twisted and Pypy together, and ONLY when using a POST request.
To see the leak, you have to send hundreds of thousands of requests.
Note that when setting PYPY_GC_MAX, the process eventually crashes.
What's going on?
Turns out that the cause of the leak is the BytesIO module.
Here's how to simulate the leak on Pypy.
from io import BytesIO
while True: a = BytesIO()
Here's the fix:
https://bitbucket.org/pypy/pypy/commits/40fa4f3a0740e3aac77862fe8a853259c07cb00b