Unable to upload to Azure blob asynchronously with python - python-3.x

I'm trying to upload images to azure blob storage and before uploading I'm checking with head request to make sure that file doesn't exists.
Code runs well but stops after sometime. I tried to close file but it is not working.
Below is my code
from azure.identity.aio import ClientSecretCredential
from azure.storage.blob.aio import BlobClient
from azure.core.exceptions import ResourceExistsError
import asyncio
import aiohttp
import os
blob_url = ''
sourceDir = ''
accountName = ''
accountKey = ''
containerName = ''
async def fetch(session, url):
async with session.head(url) as response:
await response.text()
return response.status
async def task(name, work_queue):
while not work_queue.empty():
f, blob_name = await work_queue.get()
print("Processing :", f)
blob_name = blob_name.replace(sourceDir, '')
blobClient = BlobClient(
"https://{}.blob.core.windows.net".format(accountName),
credential = accountKey,
container_name = containerName,
blob_name = blob_name,
)
async with aiohttp.ClientSession() as session:
status = await fetch(session, blob_url+blob_name)
if status != 200:
async with blobClient:
with open(f, "rb") as data:
await blobClient.upload_blob(data, overwrite=True)
data.close()
def main():
work_queue = asyncio.Queue()
for (path, dirs, files) in os.walk(sourceDir):
for f in files:
blob_hash_dir = f[0:2]
work_queue.put_nowait((os.path.join(path, f), os.path.join(path, blob_hash_dir, f)))
loop = asyncio.get_event_loop()
loop.run_until_complete(
asyncio.gather(
*[task("{}".format(num), work_queue) for num in range(1,51)]
)
)
loop.close()
main()
This is the error I'm getting after few minutes of run.
Traceback (most recent call last):
File "upload_to_blob.py", line 57, in <module>
File "upload_to_blob.py", line 49, in main
File "/home/root/miniconda3/envs/other/lib/python3.8/asyncio/base_events.py", line 612, in run_until_complete
File "upload_to_blob.py", line 36, in task
OSError: [Errno 24] Too many open files: '/home/other/bfd78bca2ec99487668.jpg'
libgcc_s.so.1 must be installed for pthread_cancel to work
Aborted (core dumped)

Related

asyncpg + aiogram. cannot perform operation: another operation is in progress

How I can fix it? I played with it a lot and for a long time, but nothing came of it.
sql_db.py:
import asyncio
import asyncpg
LOG_PG = {"database": 'test_bot',
"user": 'misha',
"password": '1234',
"host": 'localhost'}
class Database:
SELECT_USER_LANG = "SELECT lang FROM lang_set WHERE user_id = $1 AND bot_token = $2"
def __init__(self, loop: asyncio.AbstractEventLoop):
self.pool = loop.run_until_complete(
asyncpg.create_pool(**LOG_PG)
)
async def get_lang(self, user_id, token):
search_d = [user_id, token]
res = await self.pool.fetchval(self.SELECT_USER_LANG, *search_d)
if res is None:
return "ru"
return res
I tried to insert this loop everywhere, run without it, multiple combinations in the code itself. But nothing changed. I do not know how to describe the problem in more detail
main.py:
from aiogram import Bot, Dispatcher
from aiogram.types import Message
import asyncio
from sql_db import Database
loop = asyncio.get_event_loop()
token = "TOKEN"
dp = Dispatcher()
bot = Bot(token=token, parse_mode="HTML")
db = Database(loop)
async def echo_msg(message: Message):
user_id = message.from_user.id
await message.send_copy(user_id)
await db.get_lang(user_id, token)
dp.message.register(callback=echo_msg)
if __name__ == '__main__':
dp.run_polling(bot, skip_updates=True)
error:
...
File "/home/mickey/Desktop/chat_admin/venv/lib/python3.8/site-packages/asyncpg/pool.py", line 867, in release
return await asyncio.shield(ch.release(timeout))
File "/home/mickey/Desktop/chat_admin/venv/lib/python3.8/site-packages/asyncpg/pool.py", line 224, in release
raise ex
File "/home/mickey/Desktop/chat_admin/venv/lib/python3.8/site-packages/asyncpg/pool.py", line 214, in release
await self._con.reset(timeout=budget)
File "/home/mickey/Desktop/chat_admin/venv/lib/python3.8/site-packages/asyncpg/connection.py", line 1367, in reset
await self.execute(reset_query, timeout=timeout)
File "/home/mickey/Desktop/chat_admin/venv/lib/python3.8/site-packages/asyncpg/connection.py", line 318, in execute
return await self._protocol.query(query, timeout)
File "asyncpg/protocol/protocol.pyx", line 323, in query
File "asyncpg/protocol/protocol.pyx", line 707, in asyncpg.protocol.protocol.BaseProtocol._check_state
asyncpg.exceptions._base.InterfaceError: cannot perform operation: another operation is in progress
Works through such a launch. It must be turned on through the aiogram. I do not know how to formulate, but I was lucky to understand the problem
...
data_ = {}
class Database:
def __init__(self, pool: asyncpg.create_pool):
self.pool = pool
async def get_lang(self, user_id, token):
search_d = [user_id, token]
async with self.pool.acquire() as conn:
res = await conn.fetchval(SELECT_USER_LANG, *search_d)
if res is None:
return "ru"
return res
async def create_pool():
pool = await asyncpg.create_pool(**LOG_PG)
data_["db"] = Database(pool)
async def echo_msg(message: Message):
user_id = message.from_user.id
await message.send_copy(user_id)
await data_["db"].get_lang(user_id, token)
dp.message.register(callback=echo_msg)
if __name__ == '__main__':
dp.startup.register(create_pool) # ANSWER
dp.run_polling(bot, skip_updates=True)

Absolute path works when hardcoded but not when stored in variable in python

I am trying to write a python bot that will just simply upload mass files in a directory to a server. (Mostly game clips with a few screenshots.) The issue is when I pass the file path dynamically I get a file not found error. When passing it hardcoded it works fine. I have printed and even sent to discord the file path and it is correct. Tried .strip() and .encode('unicode-escape') and various other options but haven't found anything that works. This has me a bit puzzled. Any ideas?
import os
import discord
import time
from discord.ext import commands
client = commands.Bot(command_prefix = '!!')
#locations to upload
locations = [
'/root/discord/',
'/home/discord',
]
#file types to not upload
bad_files = [
'viminfo',
'txt',
'sh',
'',
'bat',
]
#walk through directory and upload files
async def dir_walk(ctx,p):
for roots,dirs,files in os.walk(p):
for i in dirs:
for x in files:
#check to see if file extension matches one listed to not upload.
if x.split('.')[-1] in bad_files:
pass
else:
try:
#upload files
file_path = os.path.join(roots,i,x)
f = open(full_path,'rb')
await ctx.send(i,file = discord.File(f,filename = x))
time.sleep(5)
except:
raise
time.sleep(5)
#client.command(pass_context=True, name="walk")
async def list_dir(ctx):
for x in locations:
await dir_walk(ctx, x)
client.run('')
The traceback is :
Ignoring exception in command walk:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/discord/ext/commands/core.py", li ne 85, in wrapped
ret = await coro(*args, **kwargs)
File "newwalk.py", line 50, in list_dir
await dir_walk(ctx,x)
File "newwalk.py", line 40, in dir_walk
f = open(x,'rb')
FileNotFoundError: [Errno 2] No such file or directory: 'ss dec_2019_1_20_0008.jpg'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/discord/ext/commands/bot.py", lin e 939, in invoke
await ctx.command.invoke(ctx)
File "/usr/local/lib/python3.8/dist-packages/discord/ext/commands/core.py", li ne 863, in invoke
await injected(*ctx.args, **ctx.kwargs)
File "/usr/local/lib/python3.8/dist-packages/discord/ext/commands/core.py", li ne 94, in wrapped
raise CommandInvokeError(exc) from exc
discord.ext.commands.errors.CommandInvokeError: Command raised an exception: Fil eNotFoundError: [Errno 2] No such file or directory: 'ss dec_2019_1_20_0008.jpg'
I managed to find a way to do this. It will take some more work but I changed the code a bit. Here it is.
import os
import discord
import time
from discord.ext import commands
client = commands.Bot(command_prefix = '!!')
#locations to upload
locations = [
'',
]
#file types to not upload
good_files = [
'png',
'jpg',
'jpeg',
'mp4',
'mpg',
'mpeg',
'wav',
'flv',
'mov',
'gif',
'tif',
'bmp',
]
#walk through directory and upload files
async def dir_walk(ctx,p):
for roots,dirs,files in os.walk(p):
for i in dirs:
os.chdir(os.path.join(roots,i))
for x in os.listdir('.'):
if os.path.isfile(x):
if x.split('.')[-1] in good_files:
try:
with open(x,'rb') as f:
await ctx.send(i,file = discord.File(f,filename = x))
time.sleep(1)
except:
pass
#client.command(pass_context=True, name="walk")
async def list_dir(ctx):
for x in locations:
await dir_walk(ctx,x)
client.run('')

netdev lib co-routine exception

I've been trying netdev lib for some time now and the program below never worked it throughs the following exception:
Traceback (most recent call last):
File "D:/Code/async_npa/async_npa.py", line 93, in
r = asyncio.run(main(dev_data()))
File "C:\Users\omera\AppData\Local\Programs\Python\Python38-32\lib\asyncio\runners.py",
line 43, in run
return loop.run_until_complete(main)
File "C:\Users\omera\AppData\Local\Programs\Python\Python38-32\lib\asyncio\base_events.py",
line 612, in run_until_complete
return future.result()
File "D:/Code/async_npa/async_npa.py", line 88, in main
result = await asyncio.gather(task for task in tasks)
File "D:/Code/async_npa/async_npa.py", line 88, in
result = await asyncio.gather(task for task in tasks)
RuntimeError: Task got bad yield:
sys:1: RuntimeWarning: coroutine 'device_connection' was never awaited
I've also tried using the old syntax of asyncio creating an event loop and tasks but still no luck
code block:
from jinja2 import Environment, FileSystemLoader
import yaml
import asyncio
import netdev
def j2_command(file_name: dict = 'script.j2', directory: str = '.') -> dict:
env = Environment(loader=FileSystemLoader(directory))
temp = env.get_template(file_name)
temp_1 = temp.render()
temp_1 = temp_1.split('\n')
return temp_1
def get_host_name(open_connection) -> str:
hostname = open_connection.base_prompt()
hostname = hostname.split('#')[0]
return hostname
def write_to_file(data, dev_conn):
with open(f'./output/config_{get_host_name(dev_conn)}.txt', 'w') as conf:
conf.write(data)
def load_yml(yaml_file='inventory.yml'):
with open(yaml_file) as f:
host_obj = yaml.safe_load(f)
return host_obj
async def device_connection(connect_param):
dev_connect = netdev.create(**connect_param)
await dev_connect.connect()
commands = j2_command()
output = [f'\n\n\n\n\n########################## 1'
f' ##########################\n\n\n\n\n']
for command in commands:
breaker = f'\n\n\n\n\n########################## {command} ##########################\n\n\n\n\n'
command_result = await dev_connect.send_command(command)
output.append(breaker + command_result)
await dev_connect.disconnect()
output_result_string = "\n\n".join(output)
return output_result_string
def dev_data():
device_data = []
# devices_names = []
host_obj = load_yml()
generic_data = host_obj[0]['generic_data']
generic_username = generic_data['username']
generic_password = generic_data['password']
devices = host_obj[0]['devices']
device_type = generic_data['device_type']
device_secret = generic_data['secret']
for device in devices:
device_ip = device['ip_address']
try:
if device["username"]: generic_username = device['username']
if device['password']: generic_password = device['password']
if device["device_type"]: device_type = device['device_type']
if device['secret']: device_secret = device['secret']
except:
pass
dev = {
'device_type': device_type,
'host': device_ip,
'username': generic_username,
'password': generic_password,
'secret': device_secret
}
print(dev)
device_data.append(dev)
return device_data
async def main(device_data):
tasks = [device_connection(dev) for dev in device_data]
result = await asyncio.gather(task for task in tasks)
return result
if __name__ == '__main__':
r = asyncio.run(main(dev_data()))
print(r)
any help would be appreciated
Sorry for my late response but i hope it will help you. it seems like you have problem on running tasks.
instead of returning results in device_connection() you can define a global output_result_stringvariable and append it in every task. By this way you, don't have to collect anything in main()
Then change the main() with run() like below:
async def run(device_data):
tasks = [device_connection(dev) for dev in device_data]
await asyncio.wait(tasks)
and start it in your main block:
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(run())
Here is the docs link: netdev example link

where should I add another await for asyncio to work correctly?

I'm downloading all the images from pexels.com with a given keywork by the user. The program gives me the following error.
Traceback (most recent call last):
File "./asyncioPexels.py", line 73, in <module>
asyncio.run(forming_all_pages(numberOfPages, mainurl))
File "/usr/lib/python3.7/asyncio/base_events.py", line 573, in run_until_complete
return future.result()
File "./asyncioPexels.py", line 50, in forming_all_pages
await download_all_pages(urls)
File "./asyncioPexels.py", line 38, in download_all_pages
async with aiohttp.ClientSession as session:
AttributeError: __aexit__
I think the problem now is that I'm using the function download_all_pages as a context manager! If this is the problem, how should I fix it? I have a general idea to make it work as a context manager or there is an easier solution?
here goes my whole code:
async def download_single_image(subsession, imgurl):
print(f'Downloading img {imgurl}')
async with session.get(imgurl) as res:
imgFile = open(os.path.join(str(keyword), os.path.basename(imgurl)), 'wb')
for chunk in res.iter_content(100000):
imgFile.write(chunk)
imgFile.close()
async def download_all_images(imgurls):
async with aiohttp.ClientSession as subsession:
subtasks = []
for imgurl in imgurls:
subtask = asyncio.ensure_future(download_single_image(subsession, imgurl))
subtasks.append(subtask)
await asyncio.gather(*subtasks, return_exception=True)
async def download_single_page(session, url):
print(f'Downloading page {url}...')
imgurls = []
async with session.get(url) as response:
imgs = response.text.split('infiniteScrollingAppender.append')[1:]
for img in imgs:
soup = BeautifulSoup(img[2:-5].replace("\\'", "'").replace('\\"', '"'), 'html.parser')
imgurls.append(soup.select('.photo-item__img')[0].get('srcset'))
await download_all_images(imgurls)
async def download_all_pages(urls):
async with aiohttp.ClientSession as session:
tasks = []
for url in urls:
task = asyncio.ensure_future(download_single_page(session, url))
tasks.append(task)
await asyncio.gather(*tasks, return_exception=True)
async def forming_all_pages(numberOfPages, mainurl):
urls = []
for _ in range(1, numberOfPages + 1):
page = mainurl + str(_)
urls.append(page)
await download_all_pages(urls)
if __name__ == "__main__":
asyncio.run(forming_all_pages(numberOfPages, mainurl))
How to solve this problem for the code to run?
In forming_all_pages you have
download_all_pages(urls)
But as the exception tells you
./asyncioPexels.py:50: RuntimeWarning: coroutine 'download_all_pages' was never awaited
Change this to
await download_all_pages(urls)
You also need to change download_single_page to use
await download_all_images(imgurls)
Finally, forming_all_pages needs to be awaitable. You need to change it to
async def forming_all_pages(numberOfPages, mainurl):

Python async AttributeError aexit

I keep getting error AttributeError: __aexit__ on the code below, but I don't really understand why this happens.
My Python version is: 3.6.4 (v3.6.4:d48eceb, Dec 19 2017, 06:04:45) [MSC v.1900 32 bit (Intel)]
import aiohttp
import asyncio
import tqdm
async def fetch_url(session_, url_, timeout_=10):
with aiohttp.Timeout(timeout_):
async with session_.get(url_) as response:
text = await response.text()
print("URL: {} - TEXT: {}".format(url_, len(text)))
return text
async def parse_url(session, url, timeout=10):
# get doc from url
async with await fetch_url(session, url, timeout) as doc:
print("DOC: {}".format(doc, len(doc)))
return doc
async def parse_urls(session, urls, loop):
tasks = [parse_url(session, url) for url in urls]
responses = [await f for f in tqdm.tqdm(asyncio.as_completed(tasks), total = len(tasks))]
return responses
if __name__ == '__main__':
tickers = ['CTXS', 'MSFT', 'AAPL', 'GPRO', 'G', 'INTC', 'SYNC', 'SYNA']
urls = ["https://finance.yahoo.com/quote/{}".format(ticker) for ticker in tickers]
loop = asyncio.get_event_loop()
with aiohttp.ClientSession(loop=loop) as session:
parsed_data = loop.run_until_complete(parse_urls(session, urls, loop))
print(parsed_data)
Error callstack:
C:\Python\Python36\python.exe C:/Users/me/.PyCharmCE2017.3/config/scratches/scratch_4.py
0%| | 0/8 [00:00<?, ?it/s]Traceback (most recent call last):
URL: https://finance.yahoo.com/quote/CTXS - TEXT: 462138
File "C:/Users/me/.PyCharmCE2017.3/config/scratches/scratch_4.py", line 34, in <module>
parsed_data = loop.run_until_complete(parse_urls(session, urls, loop))
File "C:\Python\Python36\lib\asyncio\base_events.py", line 467, in run_until_complete
return future.result()
File "C:/Users/me/.PyCharmCE2017.3/config/scratches/scratch_4.py", line 23, in parse_urls
responses = [await f for f in tqdm.tqdm(asyncio.as_completed(tasks), total = len(tasks))]
File "C:/Users/me/.PyCharmCE2017.3/config/scratches/scratch_4.py", line 23, in <listcomp>
responses = [await f for f in tqdm.tqdm(asyncio.as_completed(tasks), total = len(tasks))]
File "C:\Python\Python36\lib\asyncio\tasks.py", line 458, in _wait_for_one
return f.result() # May raise f.exception().
File "C:/Users/me/.PyCharmCE2017.3/config/scratches/scratch_4.py", line 16, in parse_url
async with await fetch_url(session, url, timeout) as doc:
AttributeError: __aexit__
Process finished with exit code 1
You are trying to use fetch_url as a context manager, but it isn't one. You can either make it one
class fetch_url:
def __init__(self, session, url, timeout=10):
self.session = session
self.url = url
self.timeout = timeout
async def __aenter__(self):
with aiohttp.Timeout(self.timeout):
async with self.session.get(self.url) as response:
text = await response.text()
print("URL: {} - TEXT: {}".format(self.url, len(text)))
return text
async def __aexit__(self, exc_type, exc, tb):
# clean up anything you need to clean up
or change your code to
async def parse_url(session, url, timeout=10):
# get doc from url
doc = await fetch_url(session, url, timeout)
print("DOC: {}".format(doc, len(doc)))
return doc

Resources