PermissionError: [Errno 13] Permission denied based on file size - python-3.x

I am trying to transfer an image file to the server
Small files(6KB, 20KB..) can be transferred successfully,
But when I try large files (at least 1MB?) I get permission denied issues in the process of opening the file.
Code to transfer files to server:
clientSock = socket(AF_INET, SOCK_STREAM)
clientSock.connect(('host', 5091))
print('checked')
# ------------------------------------------------
class Handler(FileSystemEventHandler):
def on_created(self, event):
print (f'event type : {event.event_type}\n'
f'event src_path : {event.src_path}')
file_path = event.src_path
if event.is_directory:
print ("Create directory")
else: # not event.is_directory
Fname, Extension = os.path.splitext(os.path.basename(event.src_path))
clientSock.send(b'ready')
print ("send to get ready")
def send_img(Fname, Extension):
capture_file_name = Fname+Extension
file = open(capture_file_name, "rb")
time.sleep(5)
img_size = os.path.getsize(capture_file_name)
print("img size : ")
clientSock.send(b'%d' % img_size)
print(img_size)
time.sleep(3)
img = file.read(img_size)
file.close()
clientSock.sendall(img)
filename_list = [1]
file_count = len(filename_list)
for i in filename_list:
send_img(Fname, Extension)
Error when trying to file name 4.png size 2.41MB:
Exception in thread Thread-8:
Traceback (most recent call last):
File "C:\Users\user\anaconda3\lib\threading.py", line 932, in _bootstrap_inner
self.run()
File "C:\Users\user\anaconda3\lib\site-
packages\watchdog\observers\api.py", line 203, in run
self.dispatch_events(self.event_queue, self.timeout)
File "C:\Users\user\anaconda3\lib\site-packages\watchdog\observers\api.py", line 376, in dispatch_events
handler.dispatch(event)
File "C:\Users\user\anaconda3\lib\site-packages\watchdog\events.py", line 331, in dispatch
{
File "<ipython-input-1-7e86cca481fd>", line 66, in on_created
File "<ipython-input-1-7e86cca481fd>", line 52, in send_img
PermissionError: [Errno 13] Permission denied: '4.png'
What is the cause?

I solved it by giving a delay before opening the file, just like I gave the delay to send the image file size to the server
like this:
def send_img(Fname, Extension):
capture_file_name = Fname+Extension
time.sleep(2)
file = open(capture_file_name, "rb")
time.sleep(5)
img_size = os.path.getsize(capture_file_name)
print("img size : ")
clientSock.send(b'%d' % img_size)

Related

Python3 gzip create new file which did not exist yet

I struggle with python3.9, maybe I'm just blind, so give me a hint please.
Accoring to documententation the following code should create a ".gz"-file.
import gzip
content = b"Lots of content here"
with gzip.open('/home/joe/file.txt.gz', 'wb') as f:
f.write(content)
Is it correct to assume that the file file.txt.gz does not exist before that operation and it will be created during that operation?
My code looks like:
import gzip
...
class FileHandler:
...
def archive(self):
self.content = 'Hello compressed World'
zipFile = '/home/user/archive/test.gz'
print(f'{zipFile}')
with gzip.open(zipFile, 'wt') as f:
f.write(self.content)
...
if __name__ == '__main__':
fp = FileHandler()
fp.archive()
I get the following exception:
Traceback (most recent call last):
File "/home/user/filehandling.py", line 55, in <module>
fp.archive()
File "/home/user/filehandling.py", line 46, in archive
with gzip.open(zipFile, 'wt') as f:
File "/usr/lib/python3.9/gzip.py", line 58, in open
binary_file = GzipFile(filename, gz_mode, compresslevel)
File "/usr/lib/python3.9/gzip.py", line 173, in __init__
fileobj = self.myfileobj = builtins.open(filename, mode or 'rb')
FileNotFoundError: [Errno 2] No such file or directory: '/home/user/archive/test.gz'
Obvisiously the file does not exist. Why is it not created as mentioned in the documentation?
So what I am doing wrong here?
---- SOLVED ----
#Czaporka is right, I was missing a part of the path

Why doesn't my multi processor program take the whole path of my image?

I've been trying to use multiprocessing on a program that uses tesseract to extract text from images. But when I give the name to my image, it only searches for the first letter of the name of the image in the directory
def tess(all_clips):
img_text={}
start = timeit.default_timer()
image_dirs=[]
for a in all_clips:
image_dirs.append(Image.open('E:/fin_100_images/'+a))
num=0
for img in image_dirs:
df_temp = pytesseract.image_to_data(img,lang='mar',output_type='data.frame')
df_temp=df_temp.dropna().drop(['level', 'page_num', 'par_num', 'line_num', 'word_num','conf'],axis=1).reset_index().drop('index',1)
img_text[img] = df_temp.T.to_dict()
print('Total images done: '+str(num+1)+'\t Total images remaining: '+str(len(all_clips)-num-1)+' ',end='\r')
num=num+1
stop = timeit.default_timer()
df_temp = pd.DataFrame(list(img_text.items()), columns=['name', 'word_detail'])
print('Time: ', stop - start)
return df_temp
if __name__=='__main__':
all_clips = list(set([f for f in listdir('E:/fin_100_images/') if isfile(join('E:/fin_100_images/', f))]))
processes=[]
for i in all_clips:
process = Process(target=tess, args=(i,))
processes.append(process)
process.start()
for process in processes:
process.join()
print("Multiprocessing complete")
I'm getting the following error
FileNotFoundError: [Errno 2] No such file or directory: 'E:/fin_100_images/l'
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 297, in _bootstrap
self.run()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\PRATHAMESH\Desktop\TEST_multi.py", line 28, in tess
image_dirs.append(Image.open('E:/fin_100_images/'+a))
File "C:\ProgramData\Anaconda3\lib\site-packages\PIL\Image.py", line 2770, in open
fp = builtins.open(filename, "rb")

paramiko sftp - when trying to download a file, I get OSError File path illegal

Initializing paramiko sftp client:
t = paramiko.Transport((self.sftp_server, 7790))
t.connect(None,self.sftp_user, self.sftp_pw)
sftp = paramiko.SFTPClient.from_transport(t)
listing the files:
dirlist = sftp.listdir(".")
print("Dirlist: %s" % dirlist)
files = sftp.listdir(path=self.location)
print(files)
gives all the files correctly.
but when reading a file form the list
with sftp.open('/VA_EBAC_UP/EBAC_Article_KPI_Report.txt', "r") as f:
data = f.read()
I get an error:
Caught exception: : File path
[EBAC_Article_KPI_Report.txt] illegal.
full stack trace:
Traceback (most recent call last):
File "get_sftp_file_yohan.py", line 234, in get_ftp_files
with sftp.open('/VA_EBAC_UP/EBAC_Article_KPI_Report.txt', "r") as f:
File "/lib/python3.6/site-packages/paramiko/sftp_client.py", line 372, in open
t, msg = self._request(CMD_OPEN, filename, imode, attrblock)
File "/lib/python3.6/site-packages/paramiko/sftp_client.py", line 813, in _request
return self._read_response(num)
File "/lib/python3.6/site-packages/paramiko/sftp_client.py", line 865, in _read_response
self._convert_status(msg)
File "/lib/python3.6/site-packages/paramiko/sftp_client.py", line 898, in _convert_status
raise IOError(text)
OSError: File path [EBAC_Article_KPI_Report.txt] illegal.
this was a Permissions issue. The SFTP server only allowed uploads and I was trying to download from the same server. Maybe this will help someone else - but wish that the error message was verbose enough to highlight permission issue.
**d-wx------** 1 0 0 4096 05 Nov 00:00 VA_EBAC_UP
reading any file from the above folder gives:
Caught exception: : File path [] illegal.

FileNotFoundError: [Errno 2] No such file or directory: '2MCREF~E.JPG'

I'm trying to open an image that resides at a different location than my script
Code:
import os
from PIL import Image
folder = '/Users/abc'
if not os.listdir(folder):
print('Folder not found')
else:
print('"{}" found'.format(folder))
for file in os.listdir(folder):
print(file)
data = Image.open(file,'r')
print('Done')
Error:
"/Users/abc" found
2MCREF~E.JPG
Traceback (most recent call last):
File "img_to_s3bucket.py", line 25, in <module>
data = Image.open(file,'r')
File "/Users/AjayB/anaconda3/envs/MyDjangoEnv/lib/python3.6/site-packages/PIL/Image.py", line 2770, in open
fp = builtins.open(filename, "rb")
FileNotFoundError: [Errno 2] No such file or directory: '2MCREF~E.JPG'
How to tackle this?
This could be because your working directory and the location of the file are not same.
You could do this by specifying the full file path in the below command:
data = Image.open(file,'r')
you can do this by:
data = Image.open(os.path.join(folder, file),'r'))

Handling Exceptions on requests

I have a bunch of URL's (over 50k) in a CSV file from different Newspapers. I primarily looking for the main headline <h1> and the main paragraphs <p>.
I'm getting an exception that I'm not quite familiar with or don't know how to handle. Her is the message I get back:
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/urllib3/connection.py", line 141, in _new_conn
(self.host, self.port), self.timeout, **extra_kw)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/urllib3/util/connection.py", line 60, in create_connection
for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/socket.py", line 745, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 8] nodename nor servname provided, or not known
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/urllib3/connectionpool.py", line 601, in urlopen
chunked=chunked)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/urllib3/connectionpool.py", line 346, in _make_request
self._validate_conn(conn)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/urllib3/connectionpool.py", line 850, in _validate_conn
conn.connect()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/urllib3/connection.py", line 284, in connect
conn = self._new_conn()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/urllib3/connection.py", line 150, in _new_conn
self, "Failed to establish a new connection: %s" % e)
urllib3.exceptions.NewConnectionError: <urllib3.connection.VerifiedHTTPSConnection object at 0x118e1a6a0>: Failed to establish a new connection: [Errno 8] nodename nor servname provided, or not known
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/requests/adapters.py", line 440, in send
timeout=timeout
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/urllib3/connectionpool.py", line 639, in urlopen
_stacktrace=sys.exc_info()[2])
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/urllib3/util/retry.py", line 388, in increment
raise MaxRetryError(_pool, url, error or ResponseError(cause))
urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='www.cnn.com', port=443): Max retries exceeded with url: /2019/02/01/us/chicago-volunteer-homeless-cold-trnd/index.html?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+CNN+-+Top+Stories%29 (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x118e1a6a0>: Failed to establish a new connection: [Errno 8] nodename nor servname provided, or not known',))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Volumes/FELIPE/english_news/pass_news.py", line 24, in <module>
request_to_url = requests.get(urls).text
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/requests/api.py", line 72, in get
return request('get', url, params=params, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/requests/api.py", line 58, in request
return session.request(method=method, url=url, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/requests/sessions.py", line 508, in request
resp = self.send(prep, **send_kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/requests/sessions.py", line 640, in send
history = [resp for resp in gen] if allow_redirects else []
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/requests/sessions.py", line 640, in <listcomp>
history = [resp for resp in gen] if allow_redirects else []
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/requests/sessions.py", line 218, in resolve_redirects
**adapter_kwargs
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/requests/sessions.py", line 618, in send
r = adapter.send(request, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/requests/adapters.py", line 508, in send
raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPSConnectionPool(host='www.cnn.com', port=443): Max retries exceeded with url: /2019/02/01/us/chicago-volunteer-homeless-cold-trnd/index.html?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+CNN+-+Top+Stories%29 (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x118e1a6a0>: Failed to establish a new connection: [Errno 8] nodename nor servname provided, or not known',)))
Her is the code:
import uuid
import pandas as pd
import os
import requests
from bs4 import BeautifulSoup
cwd = os.path.dirname(os.path.realpath(__file__))
csv_file = os.path.join(cwd, "csv_data", "data.csv")
text_data = os.path.join(cwd, "raw_text2")
if not os.path.exists(text_data):
os.makedirs(text_data)
df = pd.read_csv(csv_file)
for link, source in df.iterrows():
urls = source['Link']
source_name = source["Source"]
request_to_url = requests.get(urls).text
soup = BeautifulSoup(request_to_url, 'html.parser')
try:
h = soup.find_all('h1')
try:
text_h = h.get_text()
except AttributeError:
text_h = ""
p = soup.find_all('p')
text_p = ([p.get_text() for p in soup('p')])
text_bb = str(" ".join(repr(e) for e in text_p))
source_dir = os.path.join(text_data, source_name)
try:
os.makedirs(source_dir)
except FileExistsError as e:
pass
filename = str(uuid.uuid4())
write = open(os.path.join(source_dir, filename + ".txt"), "w+", encoding="utf-8")
write.write(text_h + "\n" + text_bb)
write.close()
data = pd.Series(text_h + text_bb)
with open("raw_text.csv", "a") as f:
data.to_csv(f, encoding="utf-8", header=False, index=None)
except:
# Removes all <div> with id "sponsor-slug"
for child_div in soup.find_all("div", id="sponsor-slug"):
child_div.decompose()
# Remove all <p> with class "copyright"
for child_p in soup.find_all('p', attrs={'class': "copyright"}):
child_p.decompose()
# Removes all <a> tags an keeps the content if any
a_remove = soup.find_all("a")
for unwanted_tag in a_remove:
unwanted_tag.replaceWithChildren()
# Removes all <span> content and keeps content if any
span_remove = soup.find_all("span")
for unwanted_tag in span_remove:
unwanted_tag.replaceWithChildren()
# Removes all <em> content and keeps content if any
span_remove = soup.find_all("em")
for unwanted_tag in span_remove:
unwanted_tag.replaceWithChildren()
What is the best way of handling these exceptions?
Is it possible to ignore the connection if not possible and go to the next URL?
I want to crawl and add the content into another CSV file or add them to the current CSV if possible. At the same time create different folders with the different sources and add the corresponding text to that folder.
Its basically what this code is doing:
filename = str(uuid.uuid4())
write = open(os.path.join(source_dir, filename + ".txt"), "w+", encoding="utf-8")
write.write(text_h + "\n" + text_bb)
write.close()
data = pd.Series(text_h + text_bb)
with open("raw_text.csv", "a") as f:
data.to_csv(f, encoding="utf-8", header=False, index=None)
I want to use NLP on each text and later try to use some sentiment analyzing tools on the text.
Before getting the text value of response, in this line:
request_to_url = requests.get(urls).text
You can check if link is available or NOT. I wrote simple function for this action:
import requests
# Open session
s = requests.Session()
page_url = "http://wp.meQ/testBadUrl" # example of bad URL
def get_response(page_url):
""" Get good or bad response from page_url"""
# Create 'bad' Response object
bad_resp = requests.Response()
bad_resp.status_code = 404
try:
# By default 'allow_redirects' = True
good_resp = s.get(page_url, timeout=(3, 10))
if good_resp.ok:
return good_resp
else:
return bad_resp
except requests.exceptions.ConnectionError:
print("Exception! Bad Request for URL: " + page_url)
return bad_resp
except requests.exceptions.Timeout:
print("Exception! Timeout for URL: " + page_url)
return bad_resp
except:
print("Unknown Exception!: " + page_url)
return bad_resp
page_resp = get_response(page_url)
if page_resp.ok:
# Your code for good URLs
print("Append URL into 'GOOD' list")
else:
# Your code for bad URLs
print("Skip BAD url here...")
You can also add and handle different requests exceptions (full list here), if you need.
I hope it will help you.

Resources