Send email from Databricks Notebook with attachment - apache-spark

I'm a newbie in Python and Spark world. And am trying to build a pyspark code to send an email from Databricks along with the attachment from the mount point location. I'm using below code to implement the same -
import smtplib
from pathlib import Path
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email.mime.text import MIMEText
from email.utils import COMMASPACE, formatdate
from email import encoders
def send_mail(send_from = <from_email>, send_to = <to_email>, subject = "Test", message = "Test", files=["/mnt/<Mounted Point Directory>/"],
server="<SMTP Host>", port=<SMTP Port>, username='<SMTP Username>', password='<SMTP Password>',
use_tls=True):
msg = MIMEMultipart()
msg['From'] = send_from
msg['To'] = COMMASPACE.join(send_to)
msg['Date'] = formatdate(localtime=True)
msg['Subject'] = subject
msg.attach(MIMEText(message))
for path in files:
part = MIMEBase('application', "octet-stream")
with open(path, 'rb') as file:
part.set_payload(file.read())
encoders.encode_base64(part)
part.add_header('Content-Disposition',
'attachment; filename="{}"'.format(Path(path).name))
msg.attach(part)
smtp = smtplib.SMTP(server, port)
if use_tls:
smtp.starttls()
smtp.login(username, password)
smtp.sendmail(send_from, send_to, msg.as_string())
smtp.quit()
But for some reason the code is giving me File or directory not exists exception.
Am I missing anything over here.
Thanks

You need to modify the code to make it working with DBFS, because the open function doesn't know anything about DBFS or other file systems, and can work only with local files (see documentation about DBFS).
You can do it as following:
if you're on "full Databricks", not Community Edition then you need to prepend the /dbfs to the file name, like, /dbfs/mnt/.... - this /dbfs mount is the way of accessing files on DBFS from the code that works with local files (but there are some limitations when writing to that location).
Or you can use dbutils.fs.cp command to copy file from DBFS to local file, and use that copy of file to attach, like this:
dbutils.fs.cp("/mnt/...", "file:///tmp/local-name")
with open("/tmp/local-name", "r"):
...

Related

Unable to save emails in DRAFT folder using Python GMAIL IMAP

I am trying to save multiple emails in my drafts folder so I could review and press the SEND button in my web browser but I seem to be running into multiple issues.
I spent about half a day checking google and stackoverflow but didnt get much luck:
Here are a few examples that seemed relevant but didnt work
How do I create a draft in Gmail using IMAP using Python
Programmatically Save Draft in Gmail drafts folder
Creating a Draft message in Gmail using the imaplib in Python
Programmatically Save Draft in Gmail drafts folder
Below is my code, which executes and completes with a 0 code but nothing is saved to my drafts folder. Can anyone please help ?
import imaplib
import ssl
import email.message
import email.charset
import time
class DraftMailDemo:
def send(self):
tls_context = ssl.create_default_context()
server = imaplib.IMAP4_SSL('imap.gmail.com')
#server.starttls(ssl_context=tls_context)
server.login('some.email#gmail.com', 'pass123')
# Select mailbox
server.select("INBOX.Drafts")
# Create message
new_message = email.message.Message()
new_message["From"] = "sender#mydomain.com"
new_message["To"] = "Jimmy <recipient#mydomain.com>"
new_message["Subject"] = "Your subject"
new_message.set_payload("""
This is your message.
It can have multiple lines and
contain special characters: äöü.
""")
# Fix special characters by setting the same encoding we'll use later to encode the message
new_message.set_charset(email.charset.Charset("utf-8"))
encoded_message = str(new_message).encode("utf-8")
print(encoded_message)
server.append('INBOX.Drafts', '', imaplib.Time2Internaldate(time.time()), encoded_message)
# Cleanup
#server.close()
server.logout()
if __name__ == '__main__':
mail = DraftMailDemo()
mail.send()
Output of the program:
/Library/Frameworks/Python.framework/Versions/3.10/bin/python3 /Users/prashanth/PycharmProjects/pythonTools/DraftMailDemo.py
b'From: sender#mydomain.com\nTo: Jimmy <recipient#mydomain.com>\nSubject: Your subject\nMIME-Version: 1.0\nContent-Type: text/plain; charset="utf-8"\nContent-Transfer-Encoding: base64\n\nCiAgICAgICAgVGhpcyBpcyB5b3VyIG1lc3NhZ2UuCiAgICAgICAgSXQgY2FuIGhhdmUgbXVsdGlw\nbGUgbGluZXMgYW5kCiAgICAgICAgY29udGFpbiBzcGVjaWFsIGNoYXJhY3RlcnM6IMOkw7bDvC4K\nICAgICAgICA=\n'
Process finished with exit code 0
I even tried this following code and it executes with a 0 exit code but still doesnt save anything to the Drafts folder.
import imaplib
import time
import email
def createdraft():
conn = imaplib.IMAP4_SSL('imap.gmail.com', port=993)
conn.login('some.email#gmail.com', 'pass123')
conn.select('[Gmail]/Drafts')
conn.append("[Gmail]/Drafts", '', imaplib.Time2Internaldate(time.time()), str(email.message_from_string('TEST')).encode('UTF-8'))
class SecondTryDraft:
pass
if __name__ == '__main__':
mail = SecondTryDraft()
createdraft()
My Environment:
Python : 3.10
OS : Mac OS Big Sur 11.6.5
All mail servers known to me added letters to INBOX.
It probably depends on the server settings.
You can try moving them after adding.
Ok, I finally figured this out after multiple tries and tweaking the other examples. I was able to create a MultiMime message that I can save to the Drafts folder and it works, hurray !!
It doesnt seem very efficient though (execution takes around 3-5 secs - I could use some suggestions on making it more efficient.
import imaplib
import time
from email.message import EmailMessage
from email.headerregistry import Address
def createdraft(lead_name, lead_email):
msg = EmailMessage()
msg['Subject'] = "My Introduction"
msg['From'] = Address("Jane Doe", "Jane.Doe", "gmail.com")
msg['To'] = Address(lead_name, lead_email.split("#")[0], lead_email.split("#")[1])
msg.set_type('text/html')
html_msg = f"""
<div class="gmail_default" style="color:rgb(0,0,255)"><font size="2">Hello {lead_name.split(" ")[0]},</font></div>
<div class="gmail_default" style="color:rgb(0,0,255)"><font size="2"><br></font></div>
<div class="gmail_default" style="color:rgb(0,0,255)"><font size="2">My name is John and I would like to connect with you.
<div class="gmail_default" style="color:rgb(0,0,255)"><font size="2">Look forward to hearing from you.<br></font></div>
<div class="gmail_default" style="color:rgb(0,0,255)"><font size="2"><br clear="all"></font></div>
<div><font size="2"><b><span style="color:rgb(0,0,255)">Jane</span></b></font></div>
<div><font size="2"><b><span style="color:rgb(0,0,255)">Mobile : 111-222-3333</span></b></font></div>
<div><font size="2"><b><span style="color:rgb(0,0,255)">Email : Jane.Doe#gmail.<wbr>com</span></b></font></div>
"""
msg.add_alternative(html_msg, subtype="html")
conn = imaplib.IMAP4_SSL('imap.gmail.com', port=993)
conn.login('jane.doe#gmail.com', 'pass123')
conn.select('[Gmail]/Drafts')
conn.append("[Gmail]/Drafts", '', imaplib.Time2Internaldate(time.time()), str(msg).encode('UTF-8'))
class CreateDraftMultiMimeText:
pass
if __name__ == '__main__':
mail = CreateDraftMultiMimeText()
createdraft('John Doe', 'John.Doe#gmail.com')
Environment :
Python : 3.10
OS : Mac OS Big Sur 11.6.5

How to specify remote file attachment for SMTP mail in Python

How can the remote attachment file be specified to be included in SMTP mail? Attachement file is located on different server (own username/password access)
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders
mail_content = """ This is body content """
sender_addr = "sender_addr#server.com"
sender_pass = "apassword"
receiver_addr = "receiver_addr#server.com"
# Create MIME header
msg = MIMEMultipart()
msg['From'] = sender_addr
msg['To'] = receiver_addr
msg['Subject'] = 'A test mail subject sent by Python'
msg.attach(MIMEText(mail_content, 'plain'))
fname = "doc_1.pdf"
attach_file = open(fname, 'rb') # **<- How can I specify the remote path here?**
payload = MIMEBase('application', 'octet-stream')
# Attach an attachment to payload
payload.set_payload((attach_file).read())
encoders.encode_base64(payload)
# Add payload header with filename
payload.add_header('Content-Disposition', 'attachment', filename=fname)
msg.attach(payload)
# Create SMTP client
client = smtplib.SMTP('smtp.gmail.com', 587)
client.starttls()
client.login(sender_addr, sender_pass)
text = msg.as_string()
client.sendmail(sender_addr, receiver_addr, text)
client.quit()
print('Mail sent!')
https://www.rfc-editor.org/rfc/rfc2110#section-4.1 explains the Content-Location: header.
Assuming the content you want to link to is in the variable attach_uri, something like
# Create MIME header
msg = MIMEMultipart()
msg['From'] = sender_addr
msg['To'] = receiver_addr
msg['Subject'] = 'A test mail subject sent by Python'
msg.attach(MIMEText(mail_content, 'plain'))
payload = MIMEBase('application', 'octet-stream')
payload.add_header('Content-Location', attach_uri)
msg.attach(payload)
If you are asking how to retrieve stuff from a remote location and include it in an email, that's a pretty broad topic; but assuming you have the content on an HTTP server, try something like
import requests
r = requests.get(attachment_uri)
payload.set_payload(r.content)
before you msg.attach(payload).
With this, there should be no need to set the Content-Location: to the original URI any longer (why would the user care where it came from? And you don't want to reveal the password etc).

Python's Email Message library output not getting accepted by Outlook 365 when i have a named attachments from

I've created a sample function to test sending emails with an attached html file, which i intend to use for reporting on automated test runs in the future (replacing an existing external powershell script). Note that I'm attaching the html file, not using the html as inline text in the body. I'm using our company's mailgun smtp account service to send the email.
I seem to have an issue with Outlook 365 (web hosted - uses the outlook.office.com domain) either rejecting or blocking the sent email, but interestingly the same email is received and accepted by my personal hotmail address (outlook.live.com domain). I've found Outlook 365 blocks or does not accept the email when I attempt to name the file in the email message object. But if I don't name it, it will come through (with a default name of "ATT00001.htm" ).
My code for this is below but they key line seems to be
msg.add_attachment(open_file.read(), maintype='text', subtype='html', filename=filename)
If I drop the filename key it works (but with a default assigned filename) e.g.
msg.add_attachment(open_file.read(), maintype='text', subtype='html')
I have a suspicion there is something in the attachment's header or Content-disposition that Outlook 365 doesn't agree with, but i'm not sure what it is or how to work around.
I'm using the following (Python 3.6.5, on Windows 10 machine, smtplib and email.message seem to be built in)
Here is the code:
import smtplib
from email.message import EmailMessage
import os
def send_mail():
MAILGUN_SMTP_LOGIN = "<my company's mailgun login>"
MAILGUN_SMTP_PASSWORD = "<my company's mailgun password>"
fromaddr = "muppet#sharklasers.com" # the from address seems to be inconsequential
toaddr = ['me#mycompanysdomainusingoffice365.com.au', 'me#hotmail.com']
msg = EmailMessage()
msg.preamble = 'This is preamble. Not sure where it should show in the email'
msg['From'] = fromaddr
msg['To'] = ', '.join(toaddr)
msg['Subject'] = 'Testing attached html results send'
msg.set_content(""" This is a test of attached html """)
filename = 'api_automatedtests_20180903_1341.html'
filepath = os.path.abspath('D:/work/temp/api_automatedtests_20180903_1341.html')
open_file = open(filepath, "rb")
# msg.make_mixed()
msg.add_attachment(open_file.read(), maintype='text', subtype='html', filename=filename)
# msg.add_attachment(open_file.read(), maintype='text', subtype='html')
server = smtplib.SMTP(host="smtp.mailgun.org", port=587)
server.ehlo()
server.starttls()
server.login(MAILGUN_SMTP_LOGIN, MAILGUN_SMTP_PASSWORD)
server.set_debuglevel(1)
server.send_message(msg)
server.quit()
if __name__ == "__main__":
send_mail()
What I've tried
Tried sending with the same code using a textfile (with appropriate types). e.g.
msg.add_attachment(open_file.read(), maintype='text', subtype='plain', filename=filename)
Result: This works as expected (comes through with the given name - the filename is a string variable e.g. testfile.txt)
adding msg.make_mixed() to make sure it is identified as a multipart message. Result: No effect
Turning on the smtp debug level 1, Result: Mailgun says that everything has worked fine (and the messages do appear as expected in my hotmail account)
Not using the filename key in the msg.add_attachment call.
Result: This works the attachment comes through at ATT00001.htm
Interestingly the default name is *.htm while the filename I'm trying to use is *.html
Tried using a filename with *.htm and a subtype of 'htm' (instead of html)
Result: Same as for html (received on hotmail but not on outlook 365)
Tried using the generic types of maintype=''application', subtype='octet-stream'.
e.g. msg.add_attachment(open_file.read(), maintype='application', subtype='octet-stream', filename=filename)
Result: Same as for html (received on hotmail but not on outlook 365)
Tried using mimetypes.guess as shown in this link
https://docs.python.org/3.6/library/email.examples.html
ctype, encoding = mimetypes.guess_type(path)
if ctype is None or encoding is not None:
# No guess could be made, or the file is encoded (compressed), so
# use a generic bag-of-bits type.
ctype = 'application/octet-stream'
maintype, subtype = ctype.split('/', 1)
with open(path, 'rb') as fp:
msg.add_attachment(fp.read(),
maintype=maintype,
subtype=subtype,
filename=filename)
Result: It's determined as maintype='text', subtype='html' and I get the same result as with my original code (ie arrives in hotmail but blocked by 365).
Checking my spam and clutter folders - was not there
Any suggestions on why the use of filename would be breaking it?
Update
After sending to a other email addresses with various providers I discovered:
1) muppet#sharklasers.com was not a trusted sender (can change this)
2) I discovered the attachment was being flagged as unsafe. The html file comes from pytest's html report with the single file option. It contains javascript for row expanders. Gmail warns the attachment may not be safe (office 365 just straight out blocks the email altogether).
Not sure how to work around 2). I can email the same file to myself between outlook 365 and gmail and vice versa and the file doesn't get blocked. It only get's blocked when I use the above script using python's libraries and Mailgun SMTP. I suspect there is something I need to change in the email header to get around this. But I don't know what.
There seems to be some connection between trying to add the filename and the attachment being marked as unsafe
Okay I figured it out. The problem was the content-type needed to include "name=filename" in it's value.
Also I needed to use maintype='multipart', subtype='mixed'.
I have 2 solutions.
solution 1
import smtplib
from email.message import EmailMessage
import os
def send_mail(body_text, fromaddr, recipient_list, smtp_login, smtp_pass, file_path):
msg = EmailMessage()
msg.preamble = 'This is preamble. Not sure where it should show'
msg['From'] = fromaddr
msg['To'] = ', '.join(recipient_list)
msg['Subject'] = 'API Testing results'
msg.set_content(body_text)
filename = os.path.basename(file_path)
open_file = open(file_path, "rb")
msg.add_attachment(open_file.read(), maintype='multipart', subtype='mixed; name=%s' % filename, filename=filename)
server = smtplib.SMTP(host="smtp.mailgun.org", port=587)
server.ehlo()
server.starttls()
server.login(smtp_login, smtp_pass)
server.send_message(msg)
server.quit()
if __name__ == "__main__":
smtp_login = "<my smtp login>"
smtp_pass = "<my smtp password>"
recipient_list = ['user1#mycompany.com.au', 'user2#mycompany.com.au']
file_path = os.path.abspath('D:/work/temp/api_automatedtests_20180903_1341.html')
body_text = "test results for 03/09/2018 "
fromaddr = 'autotesting#mycompany.com.au'
send_mail(body_text=body_text, recipient_list=recipient_list, smtp_login=smtp_login, smtp_pass=smtp_pass,
file_path=file_path)
solution 2 (according to the documentation using the email.mime libraries is a legacy solution and the EmailMessage method is supposed to be used in preference.
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders
import os
def send_mail(body_text, fromaddr, recipient_list, smtp_login, smtp_pass, file_path):
msg = MIMEMultipart()
msg['From'] = fromaddr
msg['To'] = ', '.join(recipient_list)
msg['Subject'] = "Sending API test results"
msg.attach(MIMEText(body_text, 'plain'))
filename = os.path.basename(file_path)
attachment = open(file_path, "rb")
part = MIMEBase('multipart', 'mixed; name=%s' % filename)
part.set_payload(attachment.read())
encoders.encode_base64(part)
part.add_header('Content-Disposition', "attachment; filename= %s" % filename)
msg.attach(part)
server = smtplib.SMTP(host="smtp.mailgun.org", port=587)
server.starttls()
server.login(smtp_login, smtp_pass)
text = msg.as_string()
server.set_debuglevel(1)
server.sendmail(fromaddr, recipient_list, text)
server.quit()
if __name__ == '__main__':
smtp_login = "<my smtp login>"
smtp_pass = "<my smtp password>"
recipient_list = ['user1#mycompany.com.au', 'user2#mycompany.com.au']
file_path = os.path.abspath('D:/work/temp/api_automatedtests_20180903_1341.html')
body_text = " Api test results for 03/09/2018 "
fromaddr = "autotest#mycompany.com.au"
send_mail(body_text=body_text, fromaddr=fromaddr, recipient_list=recipient_list, smtp_login=smtp_login, smtp_pass=smtp_pass,
file_path=file_path)

File Partially Download with urllib.request.urlretrieve

I have this code Which is trying to retrieve file from Git Hub Repositories.
import os
import tarfile
from six.moves import urllib
import urllib.request
DOWNLOAD_ROOT = "https://github.com/ageron/handson-ml/tree/master/"
HOUSING_PATH = os.path.join("datasets", "housing").replace("\\","/")
print(HOUSING_PATH)
HOUSING_URL = DOWNLOAD_ROOT + HOUSING_PATH
print(HOUSING_URL)
print(os.getcwd())
def fetch_housing_data(housing_url=HOUSING_URL, housing_path=HOUSING_PATH):
if not os.path.isdir(housing_path):
os.makedirs(housing_path)
tgz_path = os.path.join(housing_path, "housing.tgz").replace("\\","/")
print(tgz_path)
urllib.request.urlretrieve(housing_url, tgz_path)
housing_tgz = tarfile.open(tgz_path)
housing_tgz.extractall(path=housing_path)
housing_tgz.close()
fetch_housing_data()
After Executing the code I got this Error ReadError: file could not be opened successfully. I did checked the actual file size and the file which is download after executing this code and I came to know that file is downloaded partially.
So is their any way to download the whole file ? Thanks in Advance
Finally I got the problem. It was with the link that I was using to retrieve the file. I didn't knew that RAW link should be used along with the file name (Not using file name will give you 404 Error) in Git Hub Repositories.
So I little bit of modification is needs to be done in actual code posted in my question.
That is :
Change the link from
DOWNLOAD_ROOT = "https://github.com/ageron/handson-ml/tree/master/"
To this :
DOWNLOAD_ROOT = "https://raw.githubusercontent.com/ageron/handson-ml/master/"
And this
HOUSING_URL = DOWNLOAD_ROOT + HOUSING_PATH
to
HOUSING_URL = DOWNLOAD_ROOT + "datasets/housing/housing.tgz" \\**( Actual File name is needed)**
Thank you !

How do I download a Google Sheet (Not workbook) in pipe delimited format from Google Drive using Python

I'm coding to download a file from google drive to local system using google drive api. Below is my code. I've following questions:
1) Is there a way to specify sheet number to download only that from a workbook or complete workbook will be downloaded?
2) We have MIMETYPE 'text/csv', is there a way to save them using other delimiter say , pipe line?
3) Can we specify the location of download? Right now it's downloading where python script is.
from apiclient.discovery import build
from httplib2 import Http
from oauth2client import file , client , tools
import io
from apiclient.http import MediaIoBaseDownload
try:
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None
#Set the scope for authorization and specify json file
SCOPES = 'https://www.googleapis.com/auth/drive'
CLIENT_SECRET = 'client_secret.json'
#Atleast once we will have to allow our program to access document, after that it would be stored in storage.json file
store = file.Storage('storage.json')
credz = store.get()
if not credz or credz.invalid:
flow = client.flow_from_clientsecrets(CLIENT_SECRET , SCOPES)
credz = tools.run_flow(flow,store,flags)\
if flags else tools.run(flow,store)
DRIVE = build('drive','v2',http=credz.authorize(Http()))
MIMETYPE='text/csv'
file_id='1p3yRgi093TKbsBrxkUkV1cP-6h8dWUIKXycU62i9Arc'
request = DRIVE.files().export_media(fileId=file_id,mimeType=MIMETYPE)
fh = io.FileIO('Google App Scripts for beginner.csv','wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print ("Download %d%%." % int(status.progress() * 100))

Resources