Progress bar for upload file celery with nginx - python-3.x

I have docker services as web, nginx, celery. I have a dash(plotly) upload component to upload the files. For upload I am displaying a progress bar to show its progress.
app.py # showing main code lines
dbc.Progress(id="pbar"),
dcc.Interval(id="poller", n_intervals=0, max_intervals=0, interval=1000),
dcc.Store(id="store_data"),
dcc.Upload(id="upload", children=html.Div(["Drag and Drop or ",html.A("Select File", title="Click to select file.",),]),multiple=False,)
#callback(Output()..,
Input(component_id="upload_dataset", component_property="filename"),
Input(component_id="upload_dataset", component_property="contents"),*
)
def launch_poll_cancel_upload_task(filename: str, contents: str, *):
triggered_id: Any = ctx.triggered_id
if filename and contents: # upload gets initiated
res1: AsyncResult = task_upload_csv.apply_async(args=(filename, contents), queue="upload")
res2: dict[str, Any] = {
"poller_max_intervals": -1,
"poller_n_intervals": no_update,
"upload_filename": no_update,
"upload_contents": no_update,
"upload_disabled": True,
}
return tuple([v for k, v in res2.items()])
if triggered_id == "poller": # display progress of progress bar during polling operation
res3 = result_from_tuple(data, app=celery_app) # task object
progress = min(n_intervals % 110, 100)
res4: dict[str, Any] = {
"pbar_value": progress,
"pbar_label": f"{progress} %" if progress >= 1 else "",
}
return tuple([v for k, v in res4.items()])
task.py
#celery_app.task
def task_upload_csv(filename: str, content: str):
status = False
file: Path = DIR_NAME / filename
content_type, content_string = content.split(",")
decoded = base64.b64decode(content_string)
if file.is_file():
file.unlink()
with open(file, "wb") as fp:
fp.write(decoded)
msg = f"Dataset {filename} is copied"
status = True
return status
Case-1: uploading small file-size upto 2-3MB, works fine, display progress(1,2,...,100) and get copied/uploaded successfully.
Case-2: uploading large file-size upto 10-100MB, did not work, progress bar is displayed but no animation 1,2... ie empty progress bar and I get warnings as below
[warn] 22#22: *50 a client request body is buffered to a temporary file /tmp/client_temp/0000000019,
and then I see multiple requests are getting fired and returns task(output) as SUCCESS but there is no update/animation in progress bar.But in the end the file is copied successfully.
My understanding is above task_upload_csv() is working only for small file-size (as all the data content is stored in single contents argument of the task_upload_csv()). For large file-size, I think as per warning some data is still available to upload ie. contents argument of the task_upload_csv() is not all content, its in chuncks and thereby progress bar is getting screwed up.
What I am missing or Is there any other way available?

Related

Python Selenium: Check if a new file in the download folder is added

I have this when I press on a link it downloads to the download folder.
my Url looks something like so
url='https://vle......ac.uk/pluginfile.php/2814969/mod_page/content/16/Statistics_for_Business_and_Economics_----_%28Unit_I_Introduction%29.pdf'
driver.execute_script("window.open('%s', '_blank')" % URL)
Where the URL is a pdf file that I am trying to download.
I want to write a code that waits until number of files in the download folder increases to move on to the next itteration in the loop.
I wrote this code:
def wait_till_number_of_files_is_byound_the_current_file():
path_download=r'\\Mac\Home\Downloads\*'
list_of_files = glob.glob(path_download)
a=len(list_of_files)
while len(list_of_files)==a:
time.sleep(1)
list_of_files = glob.glob(path_download)
In my for loop I also tried this code
item = WebDriverWait(driver, 10).until(lambda driver: driver.execute_script("window.open('%s', '_blank')" % URL))
but this made the file being pressed infinitely not only once.
The best way, to get around this (I hope there would be a better way) is to use the following function
def download_wait(directory, timeout, nfiles=None):
"""
Wait for downloads to finish with a specified timeout.
Args
----
directory : str
The path to the folder where the files will be downloaded.
timeout : int
How many seconds to wait until timing out.
nfiles : int, defaults to None
If provided, also wait for the expected number of files.
"""
seconds = 0
dl_wait = True
while dl_wait and seconds < timeout:
time.sleep(1)
dl_wait = False
files = os.listdir(directory)
if nfiles and len(files) != nfiles:
dl_wait = True
for fname in files:
if fname.endswith('.crdownload'):
dl_wait = True
seconds += 1
return seconds
In my for loop, I wrote the following
for url in hyper_link_of_files:
# Click on this link
driver.execute_script("window.open('%s', '_blank')" % url)
# time.sleep(2)
download_wait(r'\\Mac\Home\Downloads', 10, nfiles=None)
time.sleep(2)
# move the last download file into the destination folder
Move_File(dest_folder)
I will share my Move_File function for reference to those who are interested in moving the downloaded file into a new destination
def Move_File(path_needed):
# Get the working directory of the downloads folder
path_download=r'\\Mac\Home\Downloads\*'
list_of_files = glob.glob(path_download)
latest_file = max(list_of_files, key=os.path.getctime)
# Copy to the new file into the destination
path_destination=os.path.join(path_needed,os.path.basename(latest_file))
shutil.move(latest_file,path_destination)

Django file object always 0 bytes when uploaded from python requests

I have been trying to upload a file to django REST using python requests.
I put the file, and some other data, to the server.
r = self.session.put(
f"{hello_url}/shadow_pbem/savefile_api/",
files=test_files,
data={"hash": test_file_hash, 'leader': 78},
headers=good_token_header,
)
I get a 200 response, the model saves all the data correctly as expected, including a correctly named save file in /media, except the save file in /media is always 0 bytes.
This is how I create the file object...
with open(testfile_path, "rb") as testfile:
...and verify the length, which is not 0.
testfile.seek(0, os.SEEK_END)
filesize = testfile.tell()
I create the files object for upload...
test_files = {
"file": ("testfile.zip", testfile, "application/zip")
}
I put some code in the view to verify, and the file object in the view is there, but it is 0 bytes.
here is the relevent part of the view. It seems to work fine, but all files are 0 bytes.
class SaveFileUploadView(APIView):
parser_class = (FileUploadParser,)
def put(self, request):
if "file" not in request.data:
raise ParseError("Empty content")
f = request.data["file"]
print(f"file {f} size:{f.size}")
# prints file testfile.zip size:0
# rest of view works fine...
I have tried with various files and formats, also using post. Files are always 0 bytes.
Any help appreciated I am going crazy....
If you do
testfile.seek(0, os.SEEK_END)
filesize = testfile.tell()
as you say,
you'll need to also rewind back to the start – otherwise there is indeed zero bytes for Requests to read anymore.
testfile.seek(0)

How to use Progress bar in pytube?

I want to implement a progress bar in my code, but neither the old nor the new way of implementation is working.
How to add progress bar?
this fix dosen't work in the latest version.
Here is the latest documentation
https://pypi.org/project/pytube/
from pytube import YouTube
url="https://youtu.be/J5EXnh53A1k"
path=r'D://'
yt = YouTube(url)
yt.register_on_progress_callback(show_progress_bar)#by commenting this line code works fine but no progress bar is displyed
yt.streams.filter(file_extension='mp4').first().download(path)
def show_progress_bar(stream, _chunk, _file_handle, bytes_remaining):
current = ((stream.filesize - bytes_remaining)/stream.filesize)
percent = ('{0:.1f}').format(current*100)
progress = int(50*current)
status = '█' * progress + '-' * (50 - progress)
sys.stdout.write(' ↳ |{bar}| {percent}%\r'.format(bar=status, percent=percent))
sys.stdout.flush()
You first need to define the progress bar function, say progress_function:
def progress_function(chunk, file_handle, bytes_remaining):
global filesize
current = ((filesize - bytes_remaining)/filesize)
percent = ('{0:.1f}').format(current*100)
progress = int(50*current)
status = '█' * progress + '-' * (50 - progress)
sys.stdout.write(' ↳ |{bar}| {percent}%\r'.format(bar=status, percent=percent))
sys.stdout.flush()
Then register the above defined function progress_function with the on_progress_callback as follows:
yt_obj = YouTube(<<youtube_video_url>>, on_progress_callback = progress_function)
Rest of the code follows:
yt_obj.streams.filter(progressive=True, file_extension='mp4').get_highest_resolution().download(output_path='/home/myusername/Videos', filename='MyVideo')
Output looks like this:
↳ |██████████████████████████████████----------------| 68.4%
Have fun!!
I'm using progressbar2
def progress_Check(stream = None, chunk = None, file_handle = None, remaining = None):
percent = file_size - remaining + 1000000
try:
# updates the progress bar
bar.update(round(percent/1000000,2))
except:
# progress bar dont reach 100% so a little trick to make it 100
bar.update(round(file_size/1000000,2))
yt = YouTube(url, on_progress_callback=progress_Check)
yt = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download()
Here is function used to download youtube video and display progress bar from shell:
from pytube import YouTube
from pytube.cli import on_progress
fuchsia = '\033[38;2;255;00;255m' # color as hex #FF00FF
reset_color = '\033[39m'
# url is url of youtube video to download.
def download_youtube(url):
""" Instantiates YouTube class and downloads selected video. Uses Built-in
pytube.cli function on_progress to show a DOS style progress bar. """
yt = YouTube(url, on_progress_callback=on_progress)
# following line displays title and number of times video has been viewed.
print(f'\n' + fuchsia + 'Downloading: ', yt.title, '~ viewed', yt.views,
'times.')
# creates download and downloads to subdirectory called 'downloads'
yt.streams.first().download('.\\downloads\\')
# displays message verifying download is complete, and resets color scheme
# back to original color scheme.
print(f'\nFinished downloading: {yt.title}' + reset_color)
Display colors were switched because the default progress bar is fairly bright. In event video was previously downloaded the 'Finished downloading:' message will display but the progress bar won't displayed.
Please see this Showing progress in pytube regarding the use of pytube's built-in on_progress function.
# importing YouTube from pytube
import progressbar as progress
from pytube import YouTube
def progress(streams, chunk: bytes, bytes_remaining: int):
contentsize = video.filesize
size = contentsize - bytes_remaining
print('\r' + '[Download progress]:[%s%s]%.2f%%;' % (
'█' * int(size*20/contentsize), ' '*(20-int(size*20/contentsize)), float(size/contentsize*100)), end='')
url = 'https://www.youtube.com/watch?v=qOVAbKKSH10'
yt = YouTube(url, on_progress_callback=progress)
video = yt.streams.get_highest_resolution()
video.download()

Selenium (Python) - waiting for a download process to complete using Chrome web driver

I'm using selenium and python via chromewebdriver (windows) in order to automate a task of downloading large amount of files from different pages.
My code works, but the solution is far from ideal: the function below clicks on the website button that initiating a java script function that generating a PDF file and then downloading it.
I had to use a static wait in order to wait for the download to be completed (ugly) I cannot check the file system in order to verify when the download is completed since i'm using multi threading (downloading lot's of files from different pages at once) and also the the name of the files is generated dynamically in the website itself.
My code:
def file_download(num, drivervar):
Counter += 1
try:
drivervar.get(url[num])
download_button = WebDriverWait(drivervar, 20).until(EC.element_to_be_clickable((By.ID, 'download button ID')))
download_button.click()
time.sleep(10)
except TimeoutException: # Retry once
print('Timeout in thread number: ' + str(num) + ', retrying...')
.....
Is it possible to determine download completion in webdriver? I want to avoid using time.sleep(x).
Thanks a lot.
You can get the status of each download by visiting chrome://downloads/ with the driver.
To wait for all the downloads to finish and to list all the paths:
def every_downloads_chrome(driver):
if not driver.current_url.startswith("chrome://downloads"):
driver.get("chrome://downloads/")
return driver.execute_script("""
var items = document.querySelector('downloads-manager')
.shadowRoot.getElementById('downloadsList').items;
if (items.every(e => e.state === "COMPLETE"))
return items.map(e => e.fileUrl || e.file_url);
""")
# waits for all the files to be completed and returns the paths
paths = WebDriverWait(driver, 120, 1).until(every_downloads_chrome)
print(paths)
Was updated to support changes till version 81.
I have had the same problem and found a solution. You can check weither or not a .crdownload is in your download folder. If there are 0 instances of a file with .crdownload extension in the download folder then all your downloads are completed. This only works for chrome and chromium i think.
def downloads_done():
while True:
for filename in os.listdir("/downloads"):
if ".crdownload" in i:
time.sleep(0.5)
downloads_done()
Whenever you call downloads_done() it will loop itself untill all downloads are completed. If you are downloading massive files like 80 gigabytes then i don't recommend this because then the function can reach maximum recursion depth.
2020 edit:
def wait_for_downloads():
print("Waiting for downloads", end="")
while any([filename.endswith(".crdownload") for filename in
os.listdir("/downloads")]):
time.sleep(2)
print(".", end="")
print("done!")
The "end" keyword argument in print() usually holds a newline but we replace it.
While there are no filenames in the /downloads folder that end with .crdownload
sleep for 2 seconds and print one dot without newline to console
I don't really recommend using selenium anymore after finding out about requests but if it's a very heavily guarded site with cloudflare and captchas etc then you might have to resort to selenium.
With Chrome 80, I had to change the answer from #florent-b by the code below:
def every_downloads_chrome(driver):
if not driver.current_url.startswith("chrome://downloads"):
driver.get("chrome://downloads/")
return driver.execute_script("""
return document.querySelector('downloads-manager')
.shadowRoot.querySelector('#downloadsList')
.items.filter(e => e.state === 'COMPLETE')
.map(e => e.filePath || e.file_path || e.fileUrl || e.file_url);
""")
I believe this is retro-compatible, I mean this shall be working with older versions of Chrome.
There are issues with opening chrome://downloads/ when running Chrome in headless mode.
The following function uses a composite approach that works whether the mode is headless or not, choosing the better approach available in each mode.
It assumes that the caller clears all files downloaded at file_download_path after each call to this function.
import os
import logging
from selenium.webdriver.support.ui import WebDriverWait
def wait_for_downloads(driver, file_download_path, headless=False, num_files=1):
max_delay = 60
interval_delay = 0.5
if headless:
total_delay = 0
done = False
while not done and total_delay < max_delay:
files = os.listdir(file_download_path)
# Remove system files if present: Mac adds the .DS_Store file
if '.DS_Store' in files:
files.remove('.DS_Store')
if len(files) == num_files and not [f for f in files if f.endswith('.crdownload')]:
done = True
else:
total_delay += interval_delay
time.sleep(interval_delay)
if not done:
logging.error("File(s) couldn't be downloaded")
else:
def all_downloads_completed(driver, num_files):
return driver.execute_script("""
var items = document.querySelector('downloads-manager').shadowRoot.querySelector('#downloadsList').items;
var i;
var done = false;
var count = 0;
for (i = 0; i < items.length; i++) {
if (items[i].state === 'COMPLETE') {count++;}
}
if (count === %d) {done = true;}
return done;
""" % (num_files))
driver.execute_script("window.open();")
driver.switch_to_window(driver.window_handles[1])
driver.get('chrome://downloads/')
# Wait for downloads to complete
WebDriverWait(driver, max_delay, interval_delay).until(lambda d: all_downloads_completed(d, num_files))
# Clear all downloads from chrome://downloads/
driver.execute_script("""
document.querySelector('downloads-manager').shadowRoot
.querySelector('#toolbar').shadowRoot
.querySelector('#moreActionsMenu')
.querySelector('button.clear-all').click()
""")
driver.close()
driver.switch_to_window(driver.window_handles[0])
import os
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
class MySeleniumTests(unittest.TestCase):
selenium = None
#classmethod
def setUpClass(cls):
cls.selenium = webdriver.Firefox(...)
...
def test_download(self):
os.chdir(self.download_path) # default download directory
# click the button
self.selenium.get(...)
self.selenium.find_element_by_xpath(...).click()
# waiting server for finishing inner task
def download_begin(driver):
if len(os.listdir()) == 0:
time.sleep(0.5)
return False
else:
return True
WebDriverWait(self.selenium, 120).until(download_begin) # the max wating time is 120s
# waiting server for finishing sending.
# if size of directory is changing,wait
def download_complete(driver):
sum_before=-1
sum_after=sum([os.stat(file).st_size for file in os.listdir()])
while sum_before != sum_after:
time.sleep(0.2)
sum_before = sum_after
sum_after = sum([os.stat(file).st_size for file in os.listdir()])
return True
WebDriverWait(self.selenium, 120).until(download_complete) # the max wating time is 120s
You must do these thing
Wait for server to finish inner business( for example, query from database).
Wait for server to finish sending the files.
(my English is not very well)
To obtain the return of more than one item, I had to change the answer of #thdox by the code below:
def every_downloads_chrome(driver):
if not driver.current_url.startswith("chrome://downloads"):
driver.get("chrome://downloads/")
return driver.execute_script("""
var elements = document.querySelector('downloads-manager')
.shadowRoot.querySelector('#downloadsList')
.items
if (elements.every(e => e.state === 'COMPLETE'))
return elements.map(e => e.filePath || e.file_path || e.fileUrl || e.file_url);
""")
This may not work for all usecases but for my simple need to wait for one pdf to download it works great. Based off of Walter's comment above.
def get_non_temp_len(download_dir):
non_temp_files = [i for i in os.listdir(download_dir) if not (i.endswith('.tmp') or i.endswith('.crdownload'))]
return len(non_temp_files)
download_dir = 'your/download/dir'
original_count = get_non_temp_len(download_dir) # get the file count at the start
# do your selenium stuff
while original_count == get_non_temp_len(download_dir):
time.sleep(.5) # wait for file count to change
driver.quit()
I had the same problem and this method worked for me.
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import ElementClickInterceptedException
from threading import Thread
import os
import datetime
def checkFilePresence(downloadPath, numberOfFilesInitially, artistName,
songTitle):
timeNow = datetime.datetime.now()
found = False
while not found:
numberOfFilesNow = len(os.listdir(downloadPath))
if numberOfFilesNow > numberOfFilesInitially:
for folders, subfolders, files in os.walk(downloadPath):
for file in files:
modificationTime = datetime.datetime.fromtimestamp\
(os.path.getctime(os.path.join(folders, file)))
if modificationTime > timeNow:
if file.endswith('.mp3'):
return
This code work in headless mode and return downloaded file name (based on
#protonum code):
def wait_for_downloads(download_path):
max_delay = 30
interval_delay = 0.5
total_delay = 0
file = ''
done = False
while not done and total_delay < max_delay:
files = [f for f in os.listdir(download_path) if f.endswith('.crdownload')]
if not files and len(file) > 1:
done = True
if files:
file = files[0]
time.sleep(interval_delay)
total_delay += interval_delay
if not done:
logging.error("File(s) couldn't be downloaded")
return download_path + '/' + file.replace(".crdownload", "")
def wait_for_download_to_be_don(self, path_to_folder, file_name):
max_time = 60
counter = 0
while not os.path.exists(path_to_folder + file_name) and time_counter < max_time:
sleep(0.5)
time_counter += 0.5
if time_counter == max_time:
assert os.path.exists(path_to_folder + file_name), "The file wasn't downloaded"
When using test automation, its crucial that developers make the software testable. It is your job to check the software combined with the testability, meaning that you need to request a spinner or a simple HTML tag which indicates when the download is done successfully.
In a case as yours, where you cannot check it in the UI and you cannot check in system, this is the best way to solve it.

attaching a log file as an Email attachment - log content become unreadable

I'm sending an execution log through an Email in my code, but the decoding is failing the file content becomes:
mg×£h¿m5ë]÷çMC<䓆ÛiÿývïM5ç]ô÷û²È¨Ÿÿ{míׇ6áþû÷W5åíÞ{¦šãnºç¯î®[«–m§ÿé‹›²Ö {V³º·°y»"µë'zßìzË"¢r÷¶ÞÑÝxsnï¿us^
I'm running the next code:
def send_email(self):
email_filename = WebUIOrgResorces.logfilename
fp = codecs.open(email_filename, 'rb')
filecontent = fp.read()
encodedcontent = base64.b64encode(filecontent)

Resources