implement FileNotFound exception in databricks using pyspark - apache-spark

I am trying to implement exception handeling using Pyspark in Databricks, where I need to check the file if it exists in the source location.
df = spark.read.csv.option("inferschema", "true").load("mnt/pnt/abc.csv")
try:
df = open("abc.csv", "rt")
print("File opened")
except FileNotFoundError:
print("File does not exist")
except:
print("Other error")**
I wish to have something like the above code snippet however i am not being able to take the approach. I would request some help would be really thankful

You can't directly Except java.io errors, however you could do something like:
def read_file(path):
try:
dbutils.fs.ls(path)
return spark.read.option("inferschema","true").csv(path)
except Exception as e:
if 'java.io.FileNotFoundException' in str(e):
print('File does not exists')
else:
print('Other error')
read_file('mnt/pnt/abc.csv')

Related

Python3 Concurrent.Futures with Requests

I'm trying to implement concurrent requests to speed up the checking of a list of URL's but it doesn't seem to be working with my code as it's still checking them 1 by 1.
for domain in list:
try:
follow_url = requests.head(f'http://{domain}', allow_redirects=True, timeout=60)
with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
executor.submit(follow_url)
with open("alive.txt", "a") as file:
file.write(f'{domain}\n')
except Exception as e:
print(e)
You are not applying it correctly. You are creating parallel processes inside an iteration. Correct way could be like this:
def parallel_req(domain):
try:
follow_url = requests.head(f'http://{domain}', allow_redirects=True, timeout=60)
with open("alive.txt", "a") as file:
file.write(f'{domain}\n')
except requests.exceptions.RequestException as e:
print(e)
with ThreadPoolExecutor() as e:
e.map(parallel_req, domains)

For Python 3.8 Azure data lake Gen 2, how do I check if a file exists on a filesystem?

I'm using Python 3.8, Azure Data Lake gen 2 and the following plugins ...
azure-storage-blob==12.4.0
azure-storage-file-datalake==12.1.1
How do I check if a specific path exists on a file system? I tried this
from azure.storage.filedatalake import DataLakeFileClient
...
            file = DataLakeFileClient.from_connection_string(
                DATA_LAKE_CONN_STR, 
                file_system_name=filesystem, 
                file_path=path
            )
but am getting an error that the "exists" method does not exist for DataLakeFileClient .
A much easier way to test file or path existance:
from azure.storage.filedatalake import DataLakeServiceClient
...
try:
file_system_client = service_client.get_file_system_client(file_system="my-file-system")
if file_system_client.get_file_client("my-file").exists():
print("file exists")
else:
print("file does not exist")
except Exception as e:
print(e)
Change get_file_client() to get_directory_client() for testing a path.
If you want to check if a file exists on a filesystem, please refer to the following code
from azure.storage.filedatalake import DataLakeFileClient
account_name = 'testadls05'
account_key = 'CpfCQot******JOLvB+aJOZbsQ=='
file_system_name='test'
file_client = DataLakeFileClient(account_url="{}://{}.dfs.core.windows.net".format(
"https",
account_name
),
file_system_name=file_system_name,
file_path='test.txt',
credential=account_key
)
try:
file_client.get_file_properties()
except Exception as error:
print(error)
if type(error).__name__ =='ResourceNotFoundError':
print("the path does not exist")

How do I make my python program to write a new file

I am writing a program by which I can extract data from a file, and then based on some condition, I have to write that data to other files. These files do not exist and only the code will create these new files. I have tried every possible combination of print parameters but nothing is helping. The program seems to run fine with no error in IDLE but no new files are created. Can somebody give me a solution?
Here is my code:
try:
data= open('sketch.txt')
for x in data:
try:
(person, sentence)= x.split(':',1)"""data is in form of sentences with: symbol present"""
man=[] # list to store person
other=[] #list to store sentence
if person=="Man":
man.append(sentence)
elif person=="Other Man":
other.append(sentence)
except ValueError:
pass
data.close()
except IOError:
print("file not found")
try:
man_file=open("man_file.txt","w")""" otherman_file and man_file are for storing data"""
otherman_file=open("otherman_file.txt", "w")
print(man,file= man_file.txt)
print(other, file=otherman_file.txt)
man_file.close()
otherman_file.close()
except IOError:
print ("file error")
2 problems
you should use
man_file = open("man_file.txt", "w+")
otherman_file = open("otherman_file.txt", "w+")
w+ - create file if it doesn't exist and open it in write mode
Modes 'r+', 'w+' and 'a+' open the file for updating (reading and writing); note that 'w+' truncates the file..
https://docs.python.org/2/library/functions.html
2.
print(man,file= man_file.txt)
print(other, file=otherman_file.txt)
if sketch.txt file do not exist then "man" and "other" will not initialized
and in the print method will throw another exception
try to run this script
def func():
man = [] # list to store person
other = [] # list to store sentence
try:
data = open('sketch.txt', 'r')
for x in data:
try:
(person, sentence) = x.split(':', 1)
if person == "Man":
man.append(sentence)
elif person == "Other Man":
other.append(sentence)
except ValueError:
pass
data.close()
except IOError:
print("file not found")
try:
man_file = open("man_file.txt", "w+")
otherman_file = open("otherman_file.txt", "w+")
# print(man, man_file.txt)
# print(other, otherman_file.txt)
man_file.close()
otherman_file.close()
except IOError:
print ("file error")
func()

How to create a directory/file on the disk on runtime

I Was trying to run this piece of code to generate a directory and a file inside it, the thing is that both the of them won't get created until the program is terminated.
What can i do to create them while the program is still running?
def makeDirectory(dirRoot, path, fileName, fromaddr):
try:
os.mkdir(dirRoot, 0o0755)
except OSError as e:
print(e)
try:
os.mkdir(path, 0o0755)
except OSError as e:
print(e)
path = os.path.abspath(path + '/')
completeName = os.path.join(path, 'foo.txt')
cnt = open(completeName, 'a')
cnt.close()

Recover after exception

How can i recover from exception and continue line string from opened file? I'm stuck!
try:
while True:
with open('us.txt') as f:
for user in f:
for tweet in tweepy.Cursor(api.user_timeline, screen_name=user, ).items():
print(tweet.user.screen_name)
csvWriter.writerow(tweet.user.screen_name)
except tweepy.TweepError as e:
print(e.reason)
sys.exit()
If your (admittedly a bit confusing) question is asking how to ignore the exception, but still continue from where you were in the file, you should try and keep your try: and except: blocks as close to the problematic line.
For example, if csvWriter.writerow(tweet.user.screen_name) is the line which fails, you could do:
while True:
with open('us.txt') as f:
for user in f:
for tweet in tweepy.Cursor(api.user_timeline, screen_name=user,).items():
print(tweet.user.screen_name)
try:
csvWriter.writerow(tweet.user.screen_name)
except tweepy.TweepError as e:
print(e)
If instead the error is in the for tweet in tweepy.Cursor(... line, you could do this:
while True:
with open('us.txt') as f:
for user in f:
try:
for tweet in tweepy.Cursor(api.user_timeline, screen_name=user,).items():
print(tweet.user.screen_name)
csvWriter.writerow(tweet.user.screen_name)
except tweepy.TweepError as e:
print(e)
I hope this helps!

Resources