Load zip file from url with asyncio and aiohttp - python-3.x

How can I load Zip file with GET request?
I use asyncio and aiohttp in my Python app.
That's my code:
async def fetch_page(session, url):
with aiohttp.Timeout(10):
async with session.get(url) as response:
assert response.status == 200
return await response.read()
loop = asyncio.get_event_loop()
links = ['http://www2.census.gov/census_2010/04-Summary_File_1/Louisiana/la2010.sf1.zip']
for link in links:
with aiohttp.ClientSession(loop=loop) as session:
response = loop.run_until_complete(fetch_page(session, url=link))
print(type(response))
And then I get asyncio.TimeoutError

Related

Python asynchronous function calls using aiohttp

I am trying to understand aiohttp a little better. Can someone check why my code is not printing the response of the request, instead it just prints the coroutine.
import asyncio
import aiohttp
import requests
async def get_event_1(session):
url = "https://stackoverflow.com/"
headers = {
'content-Type': 'application/json'
}
response = await session.request('GET', url)
return response.json()
async def get_event_2(session):
url = "https://google.com"
headers = {
'content-Type': 'application/json'
}
response = await session.request('GET', url)
return response.json()
async def main():
async with aiohttp.ClientSession() as session:
return await asyncio.gather(
get_event_1(session),
get_event_2(session)
)
loop = asyncio.get_event_loop()
x = loop.run_until_complete(main())
loop.close()
print(x)
Output:
$ python async.py
[<coroutine object ClientResponse.json at 0x10567ae60>, <coroutine object ClientResponse.json at 0x10567aef0>]
sys:1: RuntimeWarning: coroutine 'ClientResponse.json' was never awaited
How do i print the responses instead?
The error message you received is informing you that a coroutine was never awaited.
You can see from the aiohttp documentation that response.json() is a also a coroutine and therefore must be awaited. https://docs.aiohttp.org/en/stable/client_quickstart.html#json-response-content
return await response.json()

Exception handling on asyncronously HTTP-requests in Python 3.x

I'm trying to handle an asynchronous HTTP request. I call the async_provider() function from another module and with the resulting response.text() I perform subsequent tasks.
It only works if all requests are successful. But I can't handle any exceptions for failed requests (whatever the reason for the exception). Thank you for your help.
Here is the relevant part of the code:
import asyncio
import aiohttp
# i call this function from another module
def async_provider():
list_a, list_b = asyncio.run(main())
return list_a, list_b
async def fetch(session, url):
# session.post request cases
if url == "http://...1":
referer = "http://...referer"
user_agent = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) "
"AppleWebKit/605.1.15 (KHTML, like Gecko) "
"Version/12.1 Safari/605.1.15"
)
payload = {'key1': 'value1', 'key2': 'value2'}
async with session.post(
url, data=payload, headers={"Referer": referer, "User-Agent": user_agent}
) as response:
if response.status != 200:
response.raise_for_status()
return await response.text()
# session.get request cases
else:
async with session.get(url) as response:
if response.status != 200:
response.raise_for_status()
return await response.text()
async def fetch_all(session, urls):
results = await asyncio.gather(
*[asyncio.create_task(fetch(session, url)) for url in urls]
)
return results
async def main():
urls = ["http://...1", "http://...2", "http://...3"]
async with aiohttp.ClientSession() as session:
response_text_1, response_text_2, response_text_3 = await fetch_all(
session, urls
)
# some task with response text
Any exception breaks all requests
Check "return_exceptions" flag on gather.
results = await asyncio.gather(
*[asyncio.create_task(fetch(session, url)) for url in urls],
return_exceptions=True
)
It will return you list of finished tasks. You can then use their Task.result() or
Task.exception() methods to reraise or check if there was exception.

How to post group of requests to 2 urls with aiohttp

I have 2 URLs and 60k+ requests. Basically, I need to post every request to both URLs, then compare their responses, but not to wait for the response to post another request.
I've tried to do it with aiohttp and asyncio
import asyncio
import time
import aiohttp
import os
from aiofile import AIOFile
testURL = ""
prodURL = ""
directoryWithRequests = ''
directoryToWrite = ''
headers = {'content-type': 'application/soap+xml'}
i = 1
async def fetch(session, url, reqeust):
global i
async with session.post(url=url, data=reqeust.encode('utf-8'), headers=headers) as response:
if response.status != 200:
async with AIOFile(directoryToWrite + str(i) + '.xml', 'w') as afp:
await afp.write(reqeust)
i += 1
return await response.text()
async def fetch_all(session, urls, request):
results = await asyncio.gather(*[asyncio.create_task(fetch(session, url, request)) for url in urls])
return results
async def asynchronousRequests(requestBody):
urls = [testURL, prodURL]
global i
with open(requestBody) as my_file:
body = my_file.read()
async with aiohttp.ClientSession() as session:
htmls = await fetch_all(session, urls, body)
# some conditions
async def asynchronous():
try:
start = time.time()
futures = [asynchronousRequests(directoryWithRequests + i) for i in os.listdir(directoryWithRequests)]
for future in asyncio.as_completed(futures):
result = await future
print("Process took: {:.2f} seconds".format(time.time() - start))
except Exception as e:
print(str(e))
if __name__ == '__main__':
try:
# AsyncronTest
ioloop = asyncio.ProactorEventLoop()
ioloop.run_until_complete(asynchronous())
ioloop.close()
if i == 1:
print('Regress is OK')
else:
print('Number of requests to check = {}'.format(i))
except Exception as e:
print(e)
I believe that the code above works, but it creates N futures, where the N equals to the number of request files. This brings to sort of ddos because the server can't response to that number of requests at the same time.
Found suitable solution. Basically it's just 2 async tasks:
tasks = [
postRequest(testURL, client, body),
postRequest(prodURL, client, body)
]
await asyncio.wait(tasks)
It's not the same performance as the code in the question with afortable number of requests, but as least it doesn't ddos the server that much.

TypeError: _request() got an unexpected keyword argument 'cookies' (aiohttp)

import random
import asyncio
import json
import aiohttp
import sys
import urllib
from lxml.html.soupparser import parse
from aiohttp import ClientSession
from threading import Thread
def ttest():
async def fetch(url, session):
headers = {
'Host': 'example.com'
}
cookies2 = {
'test': 'test'
}
data = '{"test":"test"}'
async with session.post(url, data=data, headers=headers, cookies=cookies2) as response:
return await response.read()
async def bound_fetch(sem, url, session):
async with sem:
html = await fetch(url, session)
print(html)
async def run(r):
url = "https://test.com"
tasks = []
sem = asyncio.Semaphore(1000)
async with aiohttp.ClientSession() as session:
for i in range(r):
task = asyncio.ensure_future(bound_fetch(sem, url, session))
tasks.append(task)
responses = asyncio.gather(*tasks)
await responses
number = 1
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run(number))
loop.run_until_complete(future)
ttest()
This is the error: TypeError: _request() got an unexpected keyword argument 'cookies'
I want use cookies like you see in the code, but i can not, can anyone help me?
The feature was added on aiohttp GitHub master but not released yet.
Please either install aiohttp from GitHub or wait for a while for aiohttp 3.5 release.
I hope to publish it in a few days.

Fetching multiple urls with aiohttp in python

In a previous question, a user suggested the following approach for fetching multiple urls (API calls) with aiohttp:
import asyncio
import aiohttp
url_list = ['https://api.pushshift.io/reddit/search/comment/?q=Nestle&size=30&after=1530396000&before=1530436000', 'https://api.pushshift.io/reddit/search/comment/?q=Nestle&size=30&after=1530436000&before=1530476000']
async def fetch(session, url):
async with session.get(url) as response:
return await response.json()['data']
async def fetch_all(session, urls, loop):
results = await asyncio.gather(*[loop.create_task(fetch(session, url)) for url in urls], return_exceptions= True)
return results
if __name__=='__main__':
loop = asyncio.get_event_loop()
urls = url_list
with aiohttp.ClientSession(loop=loop) as session:
htmls = loop.run_until_complete(fetch_all(session, urls, loop))
print(htmls)
However, this results in only returning Attribute errors:
[AttributeError('__aexit__',), AttributeError('__aexit__',)]
(which I enabled, otherwhise it would just break). I really hope there is somebody here, who can help with this, it is still kind of hard to find resources for asyncio etc. The returned data is in json format. In the end I would like to put all json dicts in a list.
Working example:
import asyncio
import aiohttp
import ssl
url_list = ['https://api.pushshift.io/reddit/search/comment/?q=Nestle&size=30&after=1530396000&before=1530436000',
'https://api.pushshift.io/reddit/search/comment/?q=Nestle&size=30&after=1530436000&before=1530476000']
async def fetch(session, url):
async with session.get(url, ssl=ssl.SSLContext()) as response:
return await response.json()
async def fetch_all(urls, loop):
async with aiohttp.ClientSession(loop=loop) as session:
results = await asyncio.gather(*[fetch(session, url) for url in urls], return_exceptions=True)
return results
if __name__ == '__main__':
loop = asyncio.get_event_loop()
urls = url_list
htmls = loop.run_until_complete(fetch_all(urls, loop))
print(htmls)

Resources