Hi i am writing a n/w bound server application using python asyncio which can accept a post request.
In post request i am accepting a symbol parameter
please tell me the best way to deal with n/w bound application.where i am collecting the data from another web api's by sending the post request to them.
Following is the code :
import asyncio
import aiohttp
import json
import logging
# async def fetch_content(url, symbols):
# yield from aiohttp.post(url, symbols=symbols)
#asyncio.coroutine
def fetch_page(writer, url, data):
response = yield from aiohttp.post(url, data=data)
resp = yield from response.read_and_close()
print(resp)
writer.write(resp)
return
#asyncio.coroutine
def process_payload(writer, data, scale):
tasks = []
data = data.split('\r\n\r\n')[1]
data = data.split('\n')
data = [x.split(':') for x in data]
print(data)
data = {x[0]: x[1] for x in data}
print(data)
# data = data[0].split(':')[1]
data = data['symbols']
print(data)
data = data.split(',')
data_len = len(data)
data_first = 0
data_last = scale
url = 'http://xxxxxx.xxxxxx.xxx/xxxx/xxxx'
while data_last < data_len:
tasks.append(asyncio.ensure_future(fetch_page(writer, url,{'symbols': ",".join(data[data_first:data_last])})))
data_first += scale
data_last += scale
tasks.append(asyncio.ensure_future(fetch_page(writer, url,{'symbols': ",".join(data[data_first:data_last])})))
loop.run_until_complete(tasks)
return
#asyncio.coroutine
def process_url(url):
pass
#asyncio.coroutine
def echo_server():
yield from asyncio.start_server(handle_connection, 'xxxxxx.xxxx.xxx', 3000)
#asyncio.coroutine
def handle_connection(reader, writer):
data = yield from reader.read(8192)
if data:
message = data.decode('utf-8')
print(message)
yield from process_payload(writer, message, 400)
writer.write_eof()
writer.close()
#url = 'http://XXXXXXX.xxxxx.xxx/xxxx/xxxxxx/xxx'
data = {'symbols': 'GD-US,14174T10,04523Y10,88739910,03209R10,46071F10,77543110,92847N10'}
loop = asyncio.get_event_loop()
loop.run_until_complete(echo_server())
try:
loop.run_forever()
finally:
loop.close()
But i am receiving the following error:
future: <Task finished coro=<handle_connection() done, defined at fql_server_async_v2.py:53> exception=AttributeError("'module' object has no attribute 'ensure_future'",)>
Traceback (most recent call last):
File "/home/user/anugupta/lib/python3.4/asyncio/tasks.py", line 234, in _step
result = coro.send(value)
File "fql_server_async_v2.py", line 60, in handle_connection
yield from process_payload(writer, message, 400)
File "/home/user/anugupta/lib/python3.4/asyncio/coroutines.py", line 141, in coro
res = func(*args, **kw)
File "fql_server_async_v2.py", line 41, in process_payload
tasks.append(asyncio.ensure_future(fetch_page(writer, url, {'symbols':",".join(data[data_first:data_last])})))
AttributeError: 'module' object has no attribute 'ensure_future'
^CTraceback (most recent call last):
File "fql_server_async_v2.py", line 72, in <module>
loop.run_forever()
File "/home/user/anugupta/lib/python3.4/asyncio/base_events.py", line 236, in run_forever
self._run_once()
File "/home/user/anugupta/lib/python3.4/asyncio/base_events.py", line 1017, in _run_once
event_list = self._selector.select(timeout)
File "/home/user/anugupta/lib/python3.4/selectors.py", line 424, in select
fd_event_list = self._epoll.poll(timeout, max_ev)
ensure_future was added in asyncio 3.4.4, use async for earlier versions.
While async is deprecated now it will be supported in oversable future.
Related
Somewhere in the mist i have tangled myself running these code gives me "weird" errors and it seems like i am missing a module but cant seem to get it work even after reading the error messages many times.
Anyone that has a clue on whats wrong here?
Happy new year and thanks in advance!
import requests
from bs4 import BeautifulSoup
import csv
def get_page(url):
response = requests.get(url)
if not response.ok:
print('Server responded:', response.status_code)
else:
soup = BeautifulSoup(response.text, 'lxml')
return soup
def get_detail_data(soup):
try:
product = soup.find('span',{'class':'a-size-large product-title-word-break'}).text
except:
product = ''
try:
price = soup.find('span',{'class':'a-size-medium a-color-price priceBlockBuyingPriceString'}).text.strip()
currency, price = p.split(' ')
except:
currency = ''
price = ''
try:
amount = soup.find('span', class_='a-size-medium a-color-state').find('a').text.strip()
except:
amount = ''
data = {
'product': product,
'price': price,
'currency': currency,
'amount': amount,
}
return data
def get_index_data(soup):
try:
links = soup.find_all('a',class_='a-link-normal a-text-normal')
except:
links = []
urls = [item.get('href') for item in links]
return urls
def write_csv(data, url):
with open('hardware.csv', 'a') as csvfile:
writer = csv.writer(csvfile)
row = [data['title'], data['price'], data['currency'], data['amount'], url]
writer.writerow(row)
def main():
url = 'https://www.amazon.se/s?k=grafikkort&page=1'
products = get_index_data(get_page(url))
for link in products:
data = get_detail_data(get_page(link))
write_csv(data, link)
if __name__ == '__main__':
main()
And the Error messages.
Traceback (most recent call last):
File "scrp.py", line 75, in <module>
main()
File "scrp.py", line 71, in main
data = get_detail_data(get_page(link))
File "scrp.py", line 7, in get_page
response = requests.get(url)
File "/usr/lib/python3/dist-packages/requests/api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "/usr/lib/python3/dist-packages/requests/api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 519, in request
prep = self.prepare_request(req)
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 452, in prepare_request
p.prepare(
File "/usr/lib/python3/dist-packages/requests/models.py", line 313, in prepare
self.prepare_url(url, params)
File "/usr/lib/python3/dist-packages/requests/models.py", line 387, in prepare_url
raise MissingSchema(error)
requests.exceptions.MissingSchema: Invalid URL '/ASUS-NVIDIA-GeForce-grafikkort-kylning/dp/B07489XSJP?dchild=1': No schema supplied. Perhaps you meant http:///ASUS-NVIDIA-GeForce-grafikkort-kylning/dp/B07489XSJP?dchild=1?
What is happening here is that you are only getting the URL suffixes from your products, as seen for instance with /ASUS-NVIDIA-GeForce-grafikkort-kylning.
A quick solution is to prepend `'https://amazon.se' to all your urls:
def main():
url = 'https://www.amazon.se/s?k=grafikkort&page=1'
products = get_index_data(get_page(url))
for link in products:
data = get_detail_data(get_page('https://www.amazon.se' + link))
write_csv(data, link)
I have inspected this SO question on how to gracefully close out the asyncio process. Although, when I perform it on my code:
async def ob_main(product_id: str, freq: int) -> None:
assert freq >= 1, f'The minimum frequency is 1s. Adjust your value: {freq}.'
save_loc = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'ob', product_id)
while True:
close = False
try:
full_save_path = create_save_location(save_loc)
file = open(full_save_path, 'a', encoding='utf-8')
await ob_collector(product_id, file)
await asyncio.sleep(freq)
except KeyboardInterrupt:
close = True
task.cancel()
loop.run_forever()
task.exception()
except:
exc_type, exc_value, exc_traceback = sys.exc_info()
error_msg = repr(traceback.format_exception(exc_type, exc_value, exc_traceback))
print(error_msg)
logger.warning(f'[1]-Error encountered collecting ob data: {error_msg}')
finally:
if close:
loop.close()
cwow()
exit(0)
I get the following traceback printed in terminal:
^C['Traceback (most recent call last):\n', ' File "/anaconda3/lib/python3.7/asyncio/runners.py", line 43, in run\n return loop.run_until_complete(main)\n', ' File "/anaconda3/lib/python3.7/asyncio/base_events.py", line 555, in run_until_complete\n self.run_forever()\n', ' File "/anaconda3/lib/python3.7/asyncio/base_events.py", line 523, in run_forever\n self._run_once()\n', ' File "/anaconda3/lib/python3.7/asyncio/base_events.py", line 1722, in _run_once\n event_list = self._selector.select(timeout)\n', ' File "/anaconda3/lib/python3.7/selectors.py", line 558, in select\n kev_list = self._selector.control(None, max_ev, timeout)\n', 'KeyboardInterrupt\n', '\nDuring handling of the above exception, another exception occurred:\n\n', 'Traceback (most recent call last):\n', ' File "coinbase-collector.py", line 98, in ob_main\n await asyncio.sleep(freq)\n', ' File "/anaconda3/lib/python3.7/asyncio/tasks.py", line 564, in sleep\n return await future\n', 'concurrent.futures._base.CancelledError\n']
and the code keeps running.
task and loop are the variables from the global scope, defined in the __main__:
loop = asyncio.get_event_loop()
task = asyncio.run(ob_main(args.p, 10))
Applying this question's method solves the issue. So in the above case:
try:
loop.run_until_complete(ob_main(args.p, 10))
except KeyboardInterrupt:
cwow()
exit(0)
However, I do not uderstand why that works.
I have following code which creates a testrail client and executes testrail's GET_SUITES API call.
I have a function to call the GET_SUITES API and I am passing testrail client & test_rail_project_id as params
I am trying to use multiprocessing to execute over my list of projects to speed up things and I am can't pickle error
My code:
from itertools import product
def get_suites(client, project_id):
try:
path = 'get_suites/{projectid}'.format(projectid=project_id)
test_rail_response = client.send_get(path)
return test_rail_response
except Exception as e:
raise Exception(str(e))
if __name__ == "__main__":
testRailClient = APIClient(TESTRAIL_URL)
pool = Pool(2)
all_project_ids = [100, 200, 300]
data = pool.starmap(get_suites, product([testRailClient], all_project_ids))
Error stack:
Traceback (most recent call last):
File "main.py", line 57, in <module>
data = pool.starmap(testrailapi.get_suites, product([testRailClient], all_project_ids))
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/pool.py", line 274, in starmap
return self._map_async(func, iterable, starmapstar, chunksize).get()
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/pool.py", line 644, in get
raise self._value
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/pool.py", line 424, in _handle_tasks
put(task)
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: can't pickle SSLContext objects
Any suggestions please?
Thank you
PS: I am using Python3.6
UPDATE:
As suggested I tried removing the API client as a parameter and it worked but I am getting the same error when I have "get_suites" as a method. Please see my updated code below
class TestRailExecution:
def __init__(self, url, username, password):
self.url = url
self.username = username
self.password = password
self.client = APIClient(self.url)
self.client.user = username
self.client.password = password
def get_suites(self, project_id):
try:
path = 'get_suites/{projectid}'.format(projectid=project_id)
test_rail_response = self.client.send_get(path)
return test_rail_response
except Exception as e:
raise Exception(str(e))
if __name__ == "__main__":
testRailClient = TestRailExecution(TESTRAIL_URL, user, password)
pool = Pool(2)
data = pool.map(get_suites, [100, 200, 300])
i am puzzled getting the following to work with asyncio:
the code snippet below is querying a number devices (via snmp) and returns a dictionary, it works fine, but is limited by multiprocessing.cpu_count()
def do_polling(netelement, snmp_comm):
msg = {}
msg.update({
'bgp' : do_lookup_bgp(netelement, snmp_comm),
'iface' : do_lookup_iface(netelement, snmp_comm),
'ifidx' : do_lookup_ifindex(netelement, snmp_comm),
'agg' : do_lookup_agg(netelement, snmp_comm),
})
return msg
def save(netelement, job):
data[netelement] = job.result()
with concurrent.futures.ProcessPoolExecutor(max_workers=multiprocessing.cpu_count()) as executor:
for k,v in INFO['dev'].items():
job = executor.submit(do_polling, k, v['snmp_comm'])
job.add_done_callback(functools.partial(save, k))
so i would like to migrate to the asyncio approach by changing like this:
#asyncio.coroutine
def do_polling(netelement, snmp_comm):
msg = {}
msg['bgp'] = yield from do_lookup_bgp(netelement, snmp_comm)
msg['iface'] = yield from do_lookup_iface(netelement, snmp_comm)
msg['ifidx'] = yield from do_lookup_ifindex(netelement, snmp_comm)
msg['agg'] = yield from do_lookup_agg(netelement, snmp_comm)
#asyncio.coroutine
def schedule(INFO):
for k,v in INFO['dev'].items():
asyncio.async(do_polling(k, v))
asyncio.get_event_loop().run_until_complete(schedule)
but i am getting the following error:
Traceback (most recent call last):
File "/home/app/ip-spotlight/code/ixmac.py", line 60, in <module>
main()
File "/home/app/ip-spotlight/code/ixmac.py", line 16, in main
app.ixmac.initialize.run(INFO)
File "/home/app/ip-spotlight/code/app/ixmac/initialize.py", line 191, in run
asyncio.get_event_loop().run_until_complete(schedule)
File "/usr/lib64/python3.4/asyncio/base_events.py", line 353, in run_until_complete
future = tasks.ensure_future(future, loop=self)
File "/usr/lib64/python3.4/asyncio/tasks.py", line 553, in ensure_future
raise TypeError('A Future, a coroutine or an awaitable is required')
TypeError: A Future, a coroutine or an awaitable is required
could you please advise what i am doing wrong ?
You are not using it as a coroutine. You should change the last line to:
asyncio.get_event_loop().run_until_complete(schedule(the_info_variable))
I have a websocket server built with aiohttp.
I keep getting this exception in the server error stream.
Task exception was never retrieved
future: <Task finished coro=<read() done, defined at /usr/local/lib/python3.4/dist-packages/aiohttp/streams.py:576> exception=ClientDisconnectedError()>
Traceback (most recent call last):
File "/usr/lib/python3.4/asyncio/tasks.py", line 234, in _step
result = coro.throw(exc)
File "/usr/local/lib/python3.4/dist-packages/aiohttp/streams.py", line 578, in read
result = yield from super().read()
File "/usr/local/lib/python3.4/dist-packages/aiohttp/streams.py", line 433, in read
yield from self._waiter
File "/usr/lib/python3.4/asyncio/futures.py", line 386, in __iter__
yield self # This tells Task to wait for completion.
File "/usr/lib/python3.4/asyncio/tasks.py", line 287, in _wakeup
value = future.result()
File "/usr/lib/python3.4/asyncio/futures.py", line 275, in result
raise self._exception
aiohttp.errors.ClientDisconnectedError
The client shows a message as:
Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x7f67ec0f0588>
The code in the handler is:
#asyncio.coroutine
def sync(self, request):
ws = web.WebSocketResponse()
yield from ws.prepare(request)
# while True:
msg = yield from ws.receive()
if msg.tp == aiohttp.MsgType.text:
payload = msg.data
pypayload = json.loads(payload)
result = {'result': {}}
for store in pypayload:
try:
sync_obj = yield from asyncio.async(self.prepare(store))
except (IndexError, TypeError, ValidationError) as exc:
yield from asyncio.async(self.handle_internal_error(exc, store))
else:
try:
sync_result, request_type = yield from asyncio.async(self.send_data(sync_obj))
except DuplicateMappingsFound as exc:
yield from asyncio.async(self.handle_internal_error(exc, store))
else:
if sync_result.status == 200 and request_type == 'post':
yield from asyncio.async(self.process_data(sync_result))
elif sync_result.status >= 400:
yield from asyncio.async(self.handle_error(sync_result, sync_obj))
result['result'].update(
{store['store_id']: sync_result.status}
)
yield from asyncio.async(sync_result.release())
ws.send_str(json.dumps(result))
elif msg.tp == aiohttp.MsgType.error:
print('ws connection closed with exception {0}'.format(ws.exception()))
yield from asyncio.async(ws.close())
print('websocket connection closed')
return ws
The client code is:
#asyncio.coroutine
def sync_store():
resp = yield from aiohttp.get('http://localhost/stores/search')
stores = yield from resp.json()
total_page = stores['page']['total_page']
page = stores['page']['current_page']
total_resp = []
ws_sockets = []
while True:
for page in range(page, total_page):
session = aiohttp.ClientSession()
ws = yield from asyncio.async(session.ws_connect('ws://localhost:8765/stores'))
ws_sockets.append(ws)
ws.send_str(json.dumps(stores['data']))
resp = yield from asyncio.async(ws.receive())
total_resp.append(resp.data)
# print(resp)
stores_resp = yield from asyncio.async(aiohttp.post('http://localhost/stores/search',
data=json.dumps({'page': page+1}),
headers={'content-type': 'application/json'}
))
stores = yield from asyncio.async(stores_resp.json())
while ws_sockets:
session = ws_sockets.pop(0)
msg = yield from session.receive()
if not(msg.tp == aiohttp.MsgType.close or msg.tp == aiohttp.MsgType.closed):
ws_sockets.append(session)
else:
print(ws_sockets)
break
print(total_resp)
What could be the problem with this?
I also tried enabling the debugging mode but that also doesn't seem to give any useful output.