How to set a destination for shutil.copyfileobj? - python-3.x

This code saves a discord image to the folder which it is in. I tried to set a destination for the save file, but I haven't found anything on the shutil website which sets the destination. I tried to put a destination in the shutil.copyfileobj brackets, but that didn't work. Also I an relatively new to coding.
This is the code:
import uuid
import requests
import shutil
from discord.ext import commands
class filesaver:
#bot.command()
async def save(ctx):
try:
url = ctx.message.attachments[0].url
except IndexError:
print("Error: No Attachments")
await ctx.send("No Attachments detected!")
else:
if url[0:26] == "https://cdn.discordapp.com":
r= requests.get(url, stream=True)
imageName = str(uuid.uuid4()) + '.jpg'
with open(imageName, 'wb') as out_file:
print('saving image: ' + imageName)
shutil.copyfileobj(r.raw, out_file)
await ctx.send(f"text")

Your imageName doesn't contain a path, so it opens in whatever is your current working directory. That's a bit unpredictable. It's also easy to fix.
from pathlib import Path
imageName = str(Path.home() / Path(str(uuid.uuid4()) + '.jpg'))
You can of course replace Path.home() with any destination path you'd prefer.

Related

How can I make web resources avialable offline?

Their is a folder in my PC with Linux OS, which contains a website (webpages etc.). The webpages and other complimentary files in the folder use cdns to bring resources like jquery, datatables etc.
I want to make these resources offline. I know I can manually search all files for occurrence of "http" keyword, download files from these URLs keep them in folder and accordingly change source file path. But as these are too many files it seems troublesome. I want to ask is there any better and elegant way of doing so. Thanks in advance
I made a python script to do the job:
import re
import os
import aiohttp
import asyncio
import pathlib
import string
import random
import chardet
# Decode byte sequence using chardet to avoid "Type error"
def decode_bytes(byte_sequence):
result = chardet.detect(byte_sequence)
encoding = result['encoding']
return byte_sequence.decode(encoding)
VALID_URL_REGEX = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_#.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
# Downloader, I lazily have used resp.status as success criteria but it have logical issue you can also include other logic
async def download_file(session, url, local_path):
async with session.get(url, allow_redirects=True, ssl=False) as resp:
if resp.status == 200:
print("Content path is "+str(local_path))
with open(local_path, "wb") as f:
while True:
print(local_path)
chunk = await resp.content.read(4196)
if not chunk:
break
chunk = chunk.encode("utf-8")
f.write(chunk)
downloaded_urls = set()
async def process_file(file_path, session):
print("File during Read "+str(file_path))
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
contents = f.read()
try:
contents = decode_bytes(contents)
except UnicodeDecodeError as e:
# To avoid Type error
print(f"Error decoding file {file_path}: {e}")
return
urls = re.findall(VALID_URL_REGEX, contents)
try:
for url in urls:
file_name = url.split("/")[-1]
if len(file_name)==0:
continue
if url in downloaded_urls:
local_path = downloaded_urls[url]
# generating random strings to avoid same file name but different urls
res = ''.join(random.choices(string.ascii_uppercase +string.digits, k=5))
file_name=res+file_name
local_path = os.path.join("downloaded", file_name)
if not os.path.exists(local_path):
await download_file(session, url, local_path)
# To avoid redownloading
downloaded_urls.add(url)
contents = contents.replace(url, local_path)
except:
pass
print("File during write "+str(file_path))
with open(file_path, "w", encoding="utf-8", errors="ignore") as f:
f.write(contents)
async def process_directory(directory):
if not os.path.exists("downloaded"):
os.makedirs("downloaded")
conn = aiohttp.TCPConnector(limit=2200,limit_per_host=20,ttl_dns_cache=22)
async with aiohttp.ClientSession(connector=conn) as session:
tasks = []
try:
for filepath in pathlib.Path(directory).glob('**/*'):
fp=filepath.absolute()
if str(fp).endswith(".md") or str(fp).endswith(".txt"):
continue
if os.path.isfile(fp):
tasks.append(process_file(fp, session))
except:
pass
await asyncio.gather(*tasks)
if __name__ == '__main__':
directory = input("Enter root directory") asyncio.run(process_directory(directory))
I will also try "substitution" module and update answer accordingly.

os.listdir() won't go after nine files

Hello i am trying to make a program that would automaticly go to imgur, enter the name that you typed and download top 10 images.Everything is working except the os library.When i try to do os.listdir() after nine files it wont show anymore files.I tried googling and found nothing if you see something that i messed up please tell me.Thanks in advance.Sorry for bad grammar.
Here is the code sample:
#! python3
import requests, os, sys
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
os.chdir('imgur/')
broswer = webdriver.Chrome(executable_path=r'C:\Users\{YOUR USERNAME}\Downloads\chromedriver.exe')
broswer.get('https://imgur.com/')
broswer.maximize_window()
search_bar = broswer.find_element_by_tag_name('input')
search_bar.send_keys('happy')
search_bar.send_keys(Keys.ENTER)
pictures = broswer.find_elements_by_tag_name('img')
for i in range(1, 11):
res = requests.get(pictures[i].get_attribute('src'))
try:
res.raise_for_status()
except:
print('Link doesnt exist')
if os.listdir() == []:
picture = open('picture1.png', 'wb')
else:
picture = open('picture' + str(int(os.listdir()[-1][7:-4]) + 1) + '.png', 'wb')
print(os.listdir())
for chunk in res.iter_content(100000):
picture.write(chunk)
picture.close()
os.listdir(".") #u missed adding address

How to automatically transfer Images to a Folder

I've been trying to test out my auto file transfer program, but I can't figure out a way to make it know if the file is a ".png/.jpg" So it can automatically transfer screenshots to a folder instead from the desktop.
I have been trying different methods to do this, but there is just no luck. Please help.
from watchdog.observers import Observer
import time
from watchdog.events import FileSystemEventHandler
import os
import json
class MyHandler(FileSystemEventHandler):
def on_modified(self, event):
for filename in os.listdir(folder_to_track):
src = folder_to_track + "/" + filename
new_destination = folder_destination + "/" + filename
os.rename(src, new_destination)
folder_to_track = '/Users/mattbecute/Desktop'
folder_destination = '/Users/mattbecute/Desktop/Screenshots'
event_handler = MyHandler()
observer = Observer()
observer.schedule(event_handler, folder_to_track, recursive=True)
observer.start()
try:
while True:
time.sleep(10)
except KeyboardInterrupt:
observer.stop()
observer.join(3)
You can use the glob package to list only those files ending with a particular extension.
import glob
folder_to_track = '/Users/mattbecute/Desktop'
im_files=glob.glob(folder_to_track+"/*.png")

Why is being indent's wrong causes wrong function?

I cannot understand why this error happen.
First,I wrote
import urllib.request
from bs4 import BeautifulSoup
import time
import os
def download_image(url,name):
path = "./scrape_image/"
imagename = str(name) + ".jpg"
if not os.path.exists(path):
os.makedirs(path)
print(path)
urllib.request.urlretrieve(url,path+imagename)
url = "https://api.XXXkeyword=YYY&limit=1000"
response = urllib.request.urlopen(url)
rss = response.read().decode("utf-8")
soup = BeautifulSoup(rss, "xml")
name=0
for s in soup.find_all("photo"):
url = s.find_all("image_url")[0].string
name+=1
download_image(url, name)
by running this code,I can get 1 image from the API.But originally the right code can get 1000 images from the API.I fixed indents in the first code, so my code is like
import urllib.request
from bs4 import BeautifulSoup
import time
import os
def download_image(url,name):
path = "./image/"
imagename = str(name) + ".jpg"
if not os.path.exists(path):
os.makedirs(path)
print(path)
urllib.request.urlretrieve(url, path+imagename)
time.sleep(1)
url = "https://api.XXXkeyword=YYY&limit=1000"
response = urllib.request.urlopen(url)
rss = response.read().decode("utf-8")
soup = BeautifulSoup(rss, "xml")
name = 0
for s in soup.find_all("photo"):
url = s.find_all("image_url")[0].string
name+=1
download_image(url,name)
At last,I can get 1000 images from the API. But I cannot understand why I can do so by fixing the indent.Please give me some explanations.
Because in the first example you're only getting the image if your condition passes:
if not os.path.exists(path):
And that condition will only pass once because you immediately create the path:
os.makedirs(path)
For every other iteration of the loop, the condition is false. So the code within the conditional block doesn't execute.
Basically, an if block only executes if the condition is true. When you move your code out of the if block, it always executes regardless of the condition.

Trying to download ~200 files using ThreadPoolExecutor -> some files are skipped

I need to download ~200 files. If I run code below for the 1st time, the code downloads ~100 files. Then I need to run this code few more times in order to download the rest files (ie. if I run the code 2nd time - I can get +20-30 new files, if 3rd time - again +20-30, and so on). Why does this happen\how to fix? Maybe this is important - server may generate some files up to 10 sec.
import os
import concurrent.futures
import urllib.request
import shutil
def get_cities(osm_id, file_name, place_type):
file_folder = os.path.join(os.getcwd(), place_type)
file_name = file_name + '_' + place_type
file_path = os.path.join(file_folder, file_name)
if not os.path.exists(file_folder):
os.makedirs(file_folder)
if not os.path.exists(file_path):
area_id = str(3600000000 + osm_id)
url = 'http://overpass-api.de/api/interpreter?data=(node["place"="city"](area:'+area_id+'););out;'
with urllib.request.urlopen(url) as response, open(file_path, 'wb') as out_file:
shutil.copyfileobj(response, out_file)
def cities_controller(place_type='cities'):
countries = Countries.objects.filter(status=1).exclude(osm_id=None)
en_group_inst = Languages.objects.filter(iso_code='en').first().group
with concurrent.futures.ThreadPoolExecutor(max_workers=multiprocessing.cpu_count()) as executor:
for country in countries:
osm_id = country.osm_id
file_name = CountriesTranslations.objects.get(
country=country, lang_group=en_group_inst).common_name.lower().replace(' ', '_')
executor.submit(get_cities, osm_id, file_name, place_type)
cities_controller()

Resources