I am trying to implement exception handeling using Pyspark in Databricks, where I need to check the file if it exists in the source location.
df = spark.read.csv.option("inferschema", "true").load("mnt/pnt/abc.csv")
try:
df = open("abc.csv", "rt")
print("File opened")
except FileNotFoundError:
print("File does not exist")
except:
print("Other error")**
I wish to have something like the above code snippet however i am not being able to take the approach. I would request some help would be really thankful
You can't directly Except java.io errors, however you could do something like:
def read_file(path):
try:
dbutils.fs.ls(path)
return spark.read.option("inferschema","true").csv(path)
except Exception as e:
if 'java.io.FileNotFoundException' in str(e):
print('File does not exists')
else:
print('Other error')
read_file('mnt/pnt/abc.csv')
Related
I'm trying to implement concurrent requests to speed up the checking of a list of URL's but it doesn't seem to be working with my code as it's still checking them 1 by 1.
for domain in list:
try:
follow_url = requests.head(f'http://{domain}', allow_redirects=True, timeout=60)
with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
executor.submit(follow_url)
with open("alive.txt", "a") as file:
file.write(f'{domain}\n')
except Exception as e:
print(e)
You are not applying it correctly. You are creating parallel processes inside an iteration. Correct way could be like this:
def parallel_req(domain):
try:
follow_url = requests.head(f'http://{domain}', allow_redirects=True, timeout=60)
with open("alive.txt", "a") as file:
file.write(f'{domain}\n')
except requests.exceptions.RequestException as e:
print(e)
with ThreadPoolExecutor() as e:
e.map(parallel_req, domains)
I'm using Python 3.8, Azure Data Lake gen 2 and the following plugins ...
azure-storage-blob==12.4.0
azure-storage-file-datalake==12.1.1
How do I check if a specific path exists on a file system? I tried this
from azure.storage.filedatalake import DataLakeFileClient
...
file = DataLakeFileClient.from_connection_string(
DATA_LAKE_CONN_STR,
file_system_name=filesystem,
file_path=path
)
but am getting an error that the "exists" method does not exist for DataLakeFileClient .
A much easier way to test file or path existance:
from azure.storage.filedatalake import DataLakeServiceClient
...
try:
file_system_client = service_client.get_file_system_client(file_system="my-file-system")
if file_system_client.get_file_client("my-file").exists():
print("file exists")
else:
print("file does not exist")
except Exception as e:
print(e)
Change get_file_client() to get_directory_client() for testing a path.
If you want to check if a file exists on a filesystem, please refer to the following code
from azure.storage.filedatalake import DataLakeFileClient
account_name = 'testadls05'
account_key = 'CpfCQot******JOLvB+aJOZbsQ=='
file_system_name='test'
file_client = DataLakeFileClient(account_url="{}://{}.dfs.core.windows.net".format(
"https",
account_name
),
file_system_name=file_system_name,
file_path='test.txt',
credential=account_key
)
try:
file_client.get_file_properties()
except Exception as error:
print(error)
if type(error).__name__ =='ResourceNotFoundError':
print("the path does not exist")
I am writing a program by which I can extract data from a file, and then based on some condition, I have to write that data to other files. These files do not exist and only the code will create these new files. I have tried every possible combination of print parameters but nothing is helping. The program seems to run fine with no error in IDLE but no new files are created. Can somebody give me a solution?
Here is my code:
try:
data= open('sketch.txt')
for x in data:
try:
(person, sentence)= x.split(':',1)"""data is in form of sentences with: symbol present"""
man=[] # list to store person
other=[] #list to store sentence
if person=="Man":
man.append(sentence)
elif person=="Other Man":
other.append(sentence)
except ValueError:
pass
data.close()
except IOError:
print("file not found")
try:
man_file=open("man_file.txt","w")""" otherman_file and man_file are for storing data"""
otherman_file=open("otherman_file.txt", "w")
print(man,file= man_file.txt)
print(other, file=otherman_file.txt)
man_file.close()
otherman_file.close()
except IOError:
print ("file error")
2 problems
you should use
man_file = open("man_file.txt", "w+")
otherman_file = open("otherman_file.txt", "w+")
w+ - create file if it doesn't exist and open it in write mode
Modes 'r+', 'w+' and 'a+' open the file for updating (reading and writing); note that 'w+' truncates the file..
https://docs.python.org/2/library/functions.html
2.
print(man,file= man_file.txt)
print(other, file=otherman_file.txt)
if sketch.txt file do not exist then "man" and "other" will not initialized
and in the print method will throw another exception
try to run this script
def func():
man = [] # list to store person
other = [] # list to store sentence
try:
data = open('sketch.txt', 'r')
for x in data:
try:
(person, sentence) = x.split(':', 1)
if person == "Man":
man.append(sentence)
elif person == "Other Man":
other.append(sentence)
except ValueError:
pass
data.close()
except IOError:
print("file not found")
try:
man_file = open("man_file.txt", "w+")
otherman_file = open("otherman_file.txt", "w+")
# print(man, man_file.txt)
# print(other, otherman_file.txt)
man_file.close()
otherman_file.close()
except IOError:
print ("file error")
func()
I Was trying to run this piece of code to generate a directory and a file inside it, the thing is that both the of them won't get created until the program is terminated.
What can i do to create them while the program is still running?
def makeDirectory(dirRoot, path, fileName, fromaddr):
try:
os.mkdir(dirRoot, 0o0755)
except OSError as e:
print(e)
try:
os.mkdir(path, 0o0755)
except OSError as e:
print(e)
path = os.path.abspath(path + '/')
completeName = os.path.join(path, 'foo.txt')
cnt = open(completeName, 'a')
cnt.close()
How can i recover from exception and continue line string from opened file? I'm stuck!
try:
while True:
with open('us.txt') as f:
for user in f:
for tweet in tweepy.Cursor(api.user_timeline, screen_name=user, ).items():
print(tweet.user.screen_name)
csvWriter.writerow(tweet.user.screen_name)
except tweepy.TweepError as e:
print(e.reason)
sys.exit()
If your (admittedly a bit confusing) question is asking how to ignore the exception, but still continue from where you were in the file, you should try and keep your try: and except: blocks as close to the problematic line.
For example, if csvWriter.writerow(tweet.user.screen_name) is the line which fails, you could do:
while True:
with open('us.txt') as f:
for user in f:
for tweet in tweepy.Cursor(api.user_timeline, screen_name=user,).items():
print(tweet.user.screen_name)
try:
csvWriter.writerow(tweet.user.screen_name)
except tweepy.TweepError as e:
print(e)
If instead the error is in the for tweet in tweepy.Cursor(... line, you could do this:
while True:
with open('us.txt') as f:
for user in f:
try:
for tweet in tweepy.Cursor(api.user_timeline, screen_name=user,).items():
print(tweet.user.screen_name)
csvWriter.writerow(tweet.user.screen_name)
except tweepy.TweepError as e:
print(e)
I hope this helps!