scrap.py
class Scraping:
async def open_browser():
url = "https://www.myurl.com"
async with async_playwright() as p:
browser = await p.firefox.launch()
page = await browser.new_page()
return await page.goto(url, timeout=0)
async def search(self, page, num: str):
await page.fill('input#search', num)
await page.locator("div[class='srch-btn']").click()
core.py
from scrap import *
#myroute("/route1")
def main(self):
a = Scraping()
brow = a.open_browser()
self.asn = asyncio.run(brow)
query.action('/route2')
#myroute("/route2")
def main(self, num):
a = Scraping()
b = a.search(self.asn, num)
How can I run open_browser() function in '/route1' and get its page content in '/route2' and work with search() methods
I've already try in my code but it doesn't work
THANKS!!!!
Related
I'm trying to use the aiobotocore library with context managers, but I'm having a hard time trying to configure my credentials.
I need to create a class that configure my AWS client so I can use the put, read and delete functions in this library.
The following code is being used to this:
from contextlib import AsyncExitStack
from aiobotocore.session import AioSession
from credentials import aws_access_key_id, aws_secret_access_key
class AWSConnectionManager:
def __init__(self, aws_acces_key_id, aws_secret_access_key):
self.aws_acces_key_id = aws_acces_key_id
self.aws_secret_access_key = aws_secret_access_key
self._exit_stack = AsyncExitStack()
self._client = None
print('__init__')
async def __aenter__(self):
session = AioSession
self._client = await self._exit_stack.enter_async_context(session.create_client('s3'))
print('__aenter__')
async def __aexit__(self, exc_type, exc_val, exc_tb):
await self._exit_stack.__aexit__(exc_type, exc_val, exc_tb)
print('__aexit__')
res = AWSConnectionManager(aws_acces_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)
But, it doesn't pass through aenter and aexit method.
With the code above I have the following output:
__init__
<__main__.AWSConnectionManager object at 0x7f03921ac640>
Does anyone know what can be wrong with my code?
First: you need to fix 'session = AioSession' => 'session = AioSession()' + add return + pass credentials
async def __aenter__(self):
session = AioSession()
self._client = await self._exit_stack.enter_async_context(
session.create_client(
's3',
aws_secret_access_key=self.aws_secret_access_key,
aws_access_key_id=self.aws_access_key_id,
)
)
return self
Second: you need to write/add proxy calls for put_object/get_object or make _client public by rename _client => client
async def save_file(self, content, s3_filename: str):
await self._client.put_object(Bucket=self.bucket, Body=content, Key=f'{s3_filename}')
async def load_file(self, name):
obj = await self._client.get_object(Bucket=self.bucket, Key=f'{name}')
return obj['Body'].read()
now you can use like
async with SkyFileStorageProxy() as storage:
await storage.load_file(name='test.txt')
I am trying to implement an async database connection library to my FastAPI program, but it seems like python skips my await keyword.
My expected behaviour is that the login func will call get_user_info, which will print "before calling query" and then call _query and print "about to execute" and then print the result.
The actual outcome:
before calling query
user: None
INFO: 127.0.0.1:60548 - "POST /api/auth/login HTTP/1.1" 401 Unauthorized
2021-05-24 12:31:44.007 | INFO | authentication.routes:login:31 - 127.0.0.1 tried to log in with user x
my code:
route.py
#auth_app.post("/auth/login", response_model=Token, tags=["Authentication"])
async def login(request: Request, form_data: OAuth2PasswordRequestForm = Depends()):
credentials_exceptions = HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Incorrect credentials.",
headers={"Authenticate": "Bearer"}
)
username = form_data.username
password = form_data.password
async with MysqlConnector() as conn:
user = await conn.get_user_info(username)
print("user:", user)
logger.info(f"{request.client.host} tried to log in with user {username}")
if not user:
raise credentials_exceptions
My MysqlConnector() class:
class MysqlConnector:
async def __aenter__(self):
await self._create_connection()
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
await self.connection.disconnect()
def __await__(self):
return self.__aenter__().__await__()
#staticmethod
def _create_conn_string():
return f"mysql://{MYSQL_SETTINGS['user']}:{MYSQL_SETTINGS['password']}#{MYSQL_SETTINGS['host']}:" \
f"{MYSQL_SETTINGS['port']}/{MYSQL_SETTINGS['database']}"
async def disconnect(self):
await self.connection.disconnect()
async def _create_connection(self):
MYSQL_SETTINGS['use_unicode'] = True
MYSQL_SETTINGS['charset'] = 'utf8'
self.connection = Database(self._create_conn_string())
await self.connection.connect()
async def _query(self, sql_query, dictionary=False, **kwargs):
print("about to execute")
await self.connection.execute(query=sql_query, values=kwargs)
self.res = await self.connection.fetch_all(query=sql_query)
print("result: ", self.res)
async def get_user_info(self, username: str) -> str:
sql_query = """
CALL `sp_get_user`(:username);
"""
try:
print("before calling query")
await self._query(sql_query, username=username)
result = self.res
print("result:")
print(result)
except IndexError:
return
user = User( # User is a pydantic model
user_id=result[0],
username=result[1],
first_name=result[2],
last_name=result[3],
role=result[4],
hashed_password=result[5],
email_addr=result[6],
phone=result[7],
desc=result[8],
last_login=datetime.strptime(result[9], '%Y-%m-%d %H:%M:%S.%f'),
active=result[10],
api_access=result[11]
)
print("user:", user)
return user
Thank you for any help!
I am trying to connect through websocket and keeping it alive to retrieve continously message from the server. I wrote the clientHelper and find the socketManager and ReconnectingWebsocket on the internet and I have not much idea on what going wrong with it as I do not receive anything thought the clientHelper.process_user_message function.
Can someone point me out the error please?
import websockets as ws
import asyncio
from client import Client
class ClientHelper(Client):
def __init__(self, api_key, api_secret):
super().__init__(api_key, api_secret)
self.loop = asyncio.get_event_loop()
def _request(self, method, uri, signed, force_params=False, **kwargs):
kwargs = self._get_request_kwargs(method, signed, force_params, **kwargs)
response = getattr(self.session, method)(uri, **kwargs)
return self._handle_response(response, method)
def _request(self, method, path, signed=False, version=API_VERSION, **kwargs):
uri = self._create_api_uri(path, signed, version)
return self._request(method, uri, signed, **kwargs)
def get_listen_key(self):
res = self._request('post', 'userDataStream', signed=True, data={})
return res['listenKey']
async def start_websockets(self):
self.sm = SocketManager(self, self.loop)
await self.sm.start_socket(self.process_user_message)
async def process_user_message(self, msg):
self.msg = msg
print(msg)
async def main(self)
await self.start_websockets()
while True:
await client.getInfo()
if 'data' in self.msg:
print(self.msg['data'])
def start(self):
self.loop.run_until_complete(self.main())
class SocketManager:
STREAM_URL = url
def __init__(self, client, loop, user_timeout=DEFAULT_USER_TIMEOUT):
self._client = client
self._loop = loop
self._conns = None
async def _start_user_socket(self, path, coro, prefix='ws/'):
if path in self._conns:
return False
self._conns[path] = ReconnectingWebsocket(self._loop, path, coro, prefix)
return path
async def start_user_socket(self, coro):
user_listen_key = await self._client.stream_get_listen_key() # manage to get the key from serveur
conn_key = await self._start_user_socket('user', user_listen_key, coro)
return conn_key
class ReconnectingWebsocket:
STREAM_URL = url
MAX_RECONNECTS = 5
MAX_RECONNECT_SECONDS = 60
MIN_RECONNECT_WAIT = 0.1
TIMEOUT = 10
def __init__(self, loop, path, coro, prefix='ws/'):
self._loop = loop
self._log = logging.getLogger(__name__)
self._path = path
self._coro = coro
self._prefix = prefix
self._reconnects = 0
self._conn = None
self._socket = None
self._connect()
def _connect(self):
self._conn = asyncio.ensure_future(self._run(), loop=self._loop)
async def _run(self):
keep_waiting = True
async with ws.connect(self.STREAM_URL) as socket:
self._socket = socket
self._reconnects = 0
try:
while keep_waiting:
try:
#evt = await self._coro(evt_obj)
evt = await asyncio.wait_for(self._socket.recv(), timeout=self.TIMEOUT)
except asyncio.TimeoutError:
#self._log.debug("no message in {} seconds".format(self.TIMEOUT))
print("no message in {} seconds".format(self.TIMEOUT))
await self.send_ping()
except asyncio.CancelledError:
#self._log.debug("cancelled error")
print("cancelled error")
await self.send_ping()
else:
try:
evt_obj = json.loads(evt)
except ValueError:
#self._log.debug('error parsing evt json:{}'.format(evt))
print('error parsing evt json:{}'.format(evt))
else:
await self._coro(evt_obj)
except ws.ConnectionClosed as e:
#self._log.debug('ws connection closed:{}'.format(e))
print('ws connection closed:{}'.format(e))
await self._reconnect()
except Exception as e:
#self._log.debug('ws exception:{}'.format(e))
print('ws exception:{}'.format(e))
await self._reconnect()
def _get_reconnect_wait(self, attempts: int) -> int:
expo = 2 ** attempts
return round(random() * min(self.MAX_RECONNECT_SECONDS, expo - 1) + 1)
async def _reconnect(self):
await self.cancel()
self._reconnects += 1
if self._reconnects < self.MAX_RECONNECTS:
reconnect_wait = self._get_reconnect_wait(self._reconnects)
await asyncio.sleep(reconnect_wait)
self._connect()
else:
self._log.error('Max reconnections {} reached:'.format(self.MAX_RECONNECTS))
async def send_ping(self):
if self._socket:
await self._socket.ping()
async def cancel(self):
self._conn.cancel()
self._socket = None
Hello good day stackoverflow pips,
Issue: stack and data was never scraped in a russian retailer which is in this case www.vseinstrumenti.ru
code:
import asyncio
from pyppeteer import launch
class PyppeteerRequests:
def __init__(self):
self.headers = {}
def get_url(self, url):
data = None
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
data = loop.run_until_complete(self.main(url))
print(data)
except Exception as e:
print(str(e))
return data
async def main(self, url):
browser = await launch(headless=True)
page = await browser.newPage()
await page.goto(url, options={'timeout':1000000, 'waitUntil':['load', 'networkidle2']}),
loaded_html = await page.content()
await page.waitForNavigation()
print("closing context...")
await asyncio.sleep(0.3)
await page.close()
await browser.close()
print("closing browser...")
await asyncio.sleep(0.3)
return loaded_html
if __name__=="__main__":
requester = PyppeteerRequests()
url = 'https://www.vseinstrumenti.ru/ruchnoy-instrument/sadoviy-instrument-i-inventar/topory/fiskars/x11-s-1015640/'
data = requester.get_url(url)
print(data)
It just stacked and get ERROR: Navigation Timeout Exceeded: 1000000 ms exceeded.
What part of the code should I change? Is it scrape-able on your side? Kindly let me know how to improve my code using asnycio. Thanks!
Here's a sample code.
class Foo:
def __init__(self):
self._run_coro()
def _run_coro(self):
async def init():
bar = #some I/O op
self.bar = bar
loop = asyncio.get_event_loop()
loop.run_until_complete(init())
async def spam(self):
return await #I/O op
async def main():
foo = Foo()
await foo.spam()
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
When I run this code, I get following exception:
RuntimeError: This event loop is already running
If I initialize Foo outside main, the code runs without any exception. I want to initialize Foo such that during initialization it runs a coroutine which creates a class attribute bar.
I am unable to figure how to do it correctly. How can I run a coroutine from __init__.
Any help would be highly appreciated.
class Foo:
def __init__(self):
self.session = requests.Session()
self.async_session = None
#I guess this can be done to initialize it.
s = self.init_async_session()
try:
s.send(None)
except StopIteration:
pass
finally:
s.close()
async def init_async_session(self):
#ClientSession should be created inside a coroutine.
self.async_session = aiohttp.ClientSession()
What would be the right way to initialize self.async_session
If some method uses something asynchronous it should be explicitly defined as asynchronous either. This is a core idea behind asyncio: make you write code a way you always know if some arbitrary method may do something asynchronous.
In your snippet you want to do async thing (bar I/O) inside sync method __init__ and asyncio prohibits it. You should make _run_coro async and initialize Foo asynchronously, for example, using __await__ method:
import asyncio
class Foo:
def __await__(self):
return self._run_coro().__await__()
async def _run_coro(self): # real async initializer
async def init():
await asyncio.sleep(1) # bar I/O
self.bar = 123
await init()
return self
async def spam(self):
return await asyncio.sleep(1) # I/O op
async def main():
foo = await Foo()
await foo.spam()
asyncio.run(main()) # instead of two lines in Python 3.7+
You may be interested in reading this answer to understand better how asyncio works and how to handle it.
Upd:
s = self.init_async_session()
try:
s.send(None)
Don't do such things: generator's method are only details of implementation in regard of coroutines. You can predict how coroutine will react on calling .send() method and you can rely on this behavior.
If you want to execute coroutine use await, if you want to start it "in background" use task or other functions from asyncio doc.
What would be the right way to initialize self.async_session
When it comes to aiohttp.ClientSession it should not only be created, but properly closed also. Best way to do it is to use async context manager as shown in aiohttp doc.
If you want to hide this operation inside Foo you can make it async manager either. Complete example:
import aiohttp
class Foo:
async def __aenter__(self):
self._session = aiohttp.ClientSession()
await self._session.__aenter__()
return self
async def __aexit__(self, *args):
await self._session.__aexit__(*args)
async def spam(self):
url = 'http://httpbin.org/delay/1'
resp = await self._session.get(url)
text = await resp.text()
print(text)
async def main():
async with Foo() as foo:
await foo.spam()
asyncio.run(main())
Upd2:
You can combine ways to init/close object from above to achive result you like. As long as you keep in mind both operations are asynchronous and thus should be awaited, everything should be fine.
One more possible way:
import asyncio
import aiohttp
class Foo:
def __await__(self):
return self._init().__await__()
async def _init(self):
self._session = aiohttp.ClientSession()
await self._session.__aenter__()
return self
async def close(self):
await self._session.__aexit__(None, None, None)
async def spam(self):
url = 'http://httpbin.org/delay/1'
resp = await self._session.get(url)
text = await resp.text()
print(text)
async def main():
foo = await Foo()
try:
await foo.spam()
finally:
await foo.close()
asyncio.run(main())
Here's my solution.
class Session:
def __init__(self, headers):
self._headers = headers
self._session = requests.Session()
self._async_session = None
async def _init(self):
self._session = aiohttp.ClientSession(headers=headers)
async def async_request(self, url):
while True:
try:
async with self._async_session.get(url) as resp:
resp.raise_for_status()
return await resp.text()
except aiohttp.client_exceptions.ClientError:
#retry or raise
except AttributeError:
if isinstance(self._async_session, aiohttp.ClientSession):
raise
await self._init()
def request(self, url):
return self._session.get(url).text
async def close(self):
if isinstance(self._async_session, aiohttp.ClientSession):
await self._session.close()
async def main():
session = Session({})
print(await session.async_request('https://httpstat.us/200')
await session.close()
asyncio.run(main())
I can initialize the Session class and make synchronous as well as asynchronous requests. I do not have to explicitly call await session._init() to initialize self._async_session as when session._async_request is called and self._async_session is None, then await session._init() will be called and the request will be retried.