How do I download attachments from gmail? - python-3.x

I was able to locate the JSON file I want to download from attachment using :
part.get_content_type() == "application/json"
But have no idea on how to actually download and save it to local directory can someone pls help?
Here is the whole method:
#based on Python example from
#https://github.com/codingforentrepreneurs/30-Days-of-Python/blob/master/tutorial-reference/Day%209/inbox.py
import imaplib
host = 'imap.gmail.com' #inbox
def get_inbox(tempList):
mail = imaplib.IMAP4_SSL(host) #server
mail.login(tempList[0], tempList[2]) #login user name, user pass
mail.select("inbox") #defualt inbox
_, search_data = mail.search(None, 'UNSEEN')
my_message = []
for num in search_data[0].split():
email_data = {}
_, data = mail.fetch(num, '(RFC822)') #getting the msg data from gmail
_, b = data[0] #data in bytes
email_message = email.message_from_bytes(b)
for part in email_message.walk():
if part.get_content_type() == "application/json":
pass
my_message.append(email_data)
return my_message

if the attachment is in another file type
just change the json in
if part.get_content_type() == "application/json"
to the file type you want to downlaod

Related

Why does the body part of email is not found Python imap?

Found an article about reading email with Python. The sender of the letter and the subject are found but there are problems with body of the mail - it is not found. Maybe there are some other ways to read body?
import imaplib
import email
from email.header import decode_header
import webbrowser
import os
# account credentials
username = "username#mail.ru"
password = "password"
def clean(text):
return "".join(c if c.isalnum() else "_" for c in text)
imap = imaplib.IMAP4_SSL("imap.mail.ru")
imap.login(username, password)
# it is spam folder id
status, messages = imap.select("&BCEEPwQwBDw-")
messages = int(messages[0])
for i in range(messages, 0, -1):
res, msg = imap.fetch(str(i), "(RFC822)")
for response in msg:
if isinstance(response, tuple):
msg = email.message_from_bytes(response[1])
subject, encoding = decode_header(msg["Subject"])[0]
if isinstance(subject, bytes):
subject = subject.decode(encoding)
From, encoding = decode_header(msg.get("From"))[0]
if isinstance(From, bytes):
From = From.decode(encoding)
print("Subject:", subject)
print("From:", From)
if msg.is_multipart():
for part in msg.walk():
# extract content type of email
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
try:
body = part.get_payload(decode=True).decode()
except:
pass
if content_type == "text/plain" and "attachment" not in content_disposition:
print(body)
elif "attachment" in content_disposition:
filename = part.get_filename()
if filename:
folder_name = clean(subject)
if not os.path.isdir(folder_name):
os.mkdir(folder_name)
filepath = os.path.join(folder_name, filename)
open(filepath, "wb").write(part.get_payload(decode=True))
else:
content_type = msg.get_content_type()
body = msg.get_payload(decode=True).decode()
if content_type == "text/plain":
print(body)
if content_type == "text/html":
folder_name = clean(subject)
if not os.path.isdir(folder_name):
os.mkdir(folder_name)
filename = "index.html"
filepath = os.path.join(folder_name, filename)
open(filepath, "w").write(body)
webbrowser.open(filepath)
imap.close()
imap.logout()
For some reason, the body is not located and because of this, an error appears.
NameError: name 'body' is not defined
body is not defined because of this:
try:
body = part.get_payload(decode=True).decode()
except:
pass
You tried to define body, but failed due to some kind of error that you allowed to pass silently (hint: don't do that!). Then the next blocks of code assumed that body had already been assigned when it hadn't.
Solution: define body outside of the try/except clause:
body = None
try:
body = part.get_payload(decode=True).decode()
except:
# pass
import traceback
traceback.print_exception()
# this way you'll at least know what the error was

Get text to csv format using python

I am able to get the data from pdf to text.
But now i need to get the data in csv format with table structure.
I tried it to get the table structure with but it didn't happen.Any inputs?
Also, i'm able to generate it through json.
Is there a way to get the result into table csv format?
any inputs ?
Below is the code i have used.
import boto3
import time
# Document
s3BucketName = "textractanalysisexample"
documentName = "sheet_example.pdf"
def startJob(s3BucketName, objectName):
response = None
client = boto3.client('textract')
response = client.start_document_text_detection(
DocumentLocation={
'S3Object': {
'Bucket': s3BucketName,
'Name': objectName
}
})
return response["JobId"]
def isJobComplete(jobId):
# For production use cases, use SNS based notification
# Details at: https://docs.aws.amazon.com/textract/latest/dg/api-async.html
time.sleep(5)
client = boto3.client('textract')
response = client.get_document_text_detection(JobId=jobId)
status = response["JobStatus"]
#print("Job status: {}".format(status))
while(status == "IN_PROGRESS"):
time.sleep(5)
response = client.get_document_text_detection(JobId=jobId)
status = response["JobStatus"]
#print("Job status: {}".format(status))
return status
def getJobResults(jobId):
pages = []
client = boto3.client('textract')
response = client.get_document_text_detection(JobId=jobId)
pages.append(response)
print("Resultset page recieved: {}".format(len(pages)))
nextToken = None
if('NextToken' in response):
nextToken = response['NextToken']
while(nextToken):
response = client.get_document_text_detection(JobId=jobId, NextToken=nextToken)
pages.append(response)
#print("Resultset page recieved: {}".format(len(pages)))
nextToken = None
if('NextToken' in response):
nextToken = response['NextToken']
return pages
def lambda_handler(event, context):
jobId = startJob(s3BucketName, documentName)
#print("Started job with id: {}".format(jobId))
if(isJobComplete(jobId)):
response = getJobResults(jobId)
# Print detected text
for resultPage in response:
for item in resultPage["Blocks"]:
if item["BlockType"] == "LINE":
print (item["Text"]) ```
You can import CSV to write to a csv file like so:
import csv
with open('my_pdf.txt', 'r') as in_file:
stripped = (line.strip() for line in in_file)
lines = (line.split(",") for line in stripped if line)
with open('my_pdf.csv', 'w') as out_file:
writer = csv.writer(out_file)
writer.writerow(('title', 'intro'))
writer.writerows(lines)
You can just put in the rows you need, and this splits your data into comma separated values. You can see more information for CSV writer (and csv python in general) here (Python Docs).

How to parse eml file and extract meta-data informations

I have an eml file with some attachments. I want to read text content in eml file and I want to extract meta-data information like(sender, from, cc, bcc, subject). Also I want to download the attachments as well. With the help of the below code I am only able to extract information/ text content in the body of the email.
import email
from email import policy
from email.parser import BytesParser
import glob
file_list = glob.glob('*.eml') # returns list of files
with open(file_list[2], 'rb') as fp: # select a specific email file from the list
msg = BytesParser(policy=policy.default).parse(fp)
text = msg.get_body(preferencelist=('plain')).get_content()
print(text)
There was module name emaildata which was available for Python 2 did the job.
Extracting MetaData Informations
import email
from emaildata.metadata import MetaData
message = email.message_from_file(open('message.eml'))
extractor = MetaData(message)
data = extractor.to_dict()
print data.keys()
Extracting Attachment Information
import email
from emaildata.attachment import Attachment
message = email.message_from_file(open('message.eml'))
for content, filename, mimetype, message in Attachment.extract(message):
print filename
with open(filename, 'w') as stream:
stream.write(content)
# If message is not None then it is an instance of email.message.Message
if message:
print "The file {0} is a message with attachments.".format(filename)
But this library is now deprecated and is of now use. Is there any other library that could extract the meta-data and attachment related information?
Meta-data information could be accessed using below code in Python 3.x
from email import policy
from email.parser import BytesParser
with open(eml_file, 'rb') as fp:
msg = BytesParser(policy=policy.default).parse(fp)
print('To:', msg['to'])
print('From:', msg['from'])
print('Subject:', msg['subject'])
Remaining header informations could be accessed using msg.keys()
For downloading attachments from an eml file you can use the below code:
import sys
import os
import os.path
from collections import defaultdict
from email.parser import Parser
eml_mail = 'your eml file'
output_dir = 'mention the directory where you want the files to be download'
def parse_message(filename):
with open(filename) as f:
return Parser().parse(f)
def find_attachments(message):
"""
Return a tuple of parsed content-disposition dict, message object
for each attachment found.
"""
found = []
for part in message.walk():
if 'content-disposition' not in part:
continue
cdisp = part['content-disposition'].split(';')
cdisp = [x.strip() for x in cdisp]
if cdisp[0].lower() != 'attachment':
continue
parsed = {}
for kv in cdisp[1:]:
key, val = kv.split('=')
if val.startswith('"'):
val = val.strip('"')
elif val.startswith("'"):
val = val.strip("'")
parsed[key] = val
found.append((parsed, part))
return found
def run(eml_filename, output_dir):
msg = parse_message(eml_filename)
attachments = find_attachments(msg)
print ("Found {0} attachments...".format(len(attachments)))
if not os.path.isdir(output_dir):
os.mkdir(output_dir)
for cdisp, part in attachments:
cdisp_filename = os.path.normpath(cdisp['filename'])
# prevent malicious crap
if os.path.isabs(cdisp_filename):
cdisp_filename = os.path.basename(cdisp_filename)
towrite = os.path.join(output_dir, cdisp_filename)
print( "Writing " + towrite)
with open(towrite, 'wb') as fp:
data = part.get_payload(decode=True)
fp.write(data)
run(eml_mail, output_dir)
Have a look at: ParsEML it bulk extracts attachments from all eml files in a directory (originally from Stephan Hügel). And i used a modified version of MeIOC to easily extract all metadata in json format; if you want i can share that to.

How to download all attachments of a mail using python IMAP

I need to download all the attachments from a particular mail in outlook.
The below code is working fine if there is single attachment but when the mail has multiple attachment, it only download one.
Can anyone help me regarding this? Thanks.
I'm running this on python 3.7.
import imaplib
import email
import os
server =imaplib.IMAP4_SSL('outlook.office365.com',993)
server.login('Email id','Password')
server.select()
typ, data = server.search(None, '(SUBJECT "Subject name")')
mail_ids = data[0]
id_list = mail_ids.split()
for num in data[0].split():
typ, data = server.fetch(num, '(RFC822)' )
raw_email = data[0][1]
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
for part in email_message.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
fileName = part.get_filename()
if bool(fileName):
filePath = os.path.join('C:\\Users\\lokesing\\', fileName)
if not os.path.isfile(filePath) :
fp = open(filePath, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
server.logout
print("Attachment downloaded from mail")
The output should be all attachments downloaded to my system at defined path.
You may use imap_tools package:
https://pypi.org/project/imap-tools/
from imap_tools import MailBox, Q
# get all attachments for each email from INBOX folder
with MailBox('imap.mail.com').login('test#mail.com', 'password') as mailbox:
for msg in mailbox.fetch():
for att in msg.attachments:
print(att.filename, att.content_type)
with open('C:/1/{}'.format(att.filename), 'wb') as f:
f.write(att.payload)
Here is an example of Downloading Attachments from Outlook Emails using Python.
I used the library called: exchangelib.
https://medium.com/#theamazingexposure/accessing-shared-mailbox-using-exchangelib-python-f020e71a96ab
Here is the Code Snippet:
from exchangelib import Credentials, Account, FileAttachment
import os.path
from pathlib import Path
credentials = Credentials('Firstname.Lastname#someenterprise.com', 'Your_Password_Here')
account = Account('shared_mail_box_name#someenterprise.com', credentials=credentials, autodiscover=True)
filtered_items = account.inbox.filter(subject__contains='Data is ready for')
print("Getting latest email...")
for item in account.inbox.filter(subject__contains='Data is ready for').order_by('-datetime_received')[:1]:
print(item.subject, item.sender, item.datetime_received)
for attachment in item.attachments:
if isinstance(attachment, FileAttachment):
filepath = os.path.join('C:\\path\\to\\your\\directory', attachment.name) #this part will download the attachment to local file system
with open(filepath, 'wb') as f:
f.write(attachment.content)
print('Saved attachment to:', filepath)

Encoded image gives different code when it was send wtih smtplib

I've tried to encode an image with this method :
def resim_ac(self):
dosya_ismi = QFileDialog.getOpenFileName(self, "Resim Aç", os.getenv("HOME"))
with open(dosya_ismi[0], "rb") as file:
image = file.read()
encode_image = base64.encodestring(image)
if str(encode_image) != "":
return str(encode_image)
else:
return ""
encoded_image equals to image: b'iVBORw0KGgoAAAANSUhEUgAAAgAAAAI...BlLm9yZ5vuPBoAAAAASUVORK5CYII=\n'(It is encoded image)
Whenever I try to send this code with email with this method:
mesaj = MIMEMultipart()
mesaj_govdesi2 = MIMEText(str(self.resim_ac), "plain")
mesaj.attach(mesaj_govdesi2)
mesaj["Subject"] = self.subject_text.text() #LineEdit
mail = smtplib.SMTP("smtp.gmail.com", 587)
mail.ehlo()
mail.starttls()
mail.login(self.email, self.passw) # e-mail adress and it's password
mail.sendmail(self.email,self.email_to, mesaj.as_string())
print("Mail Sended....")
mail.close()
It gives me this
rather than b'iVBORw0KGgoAAAANSUhEUgAAAgAAAAI...BlLm9yZ5vuPBoAAAAASUVORK5CYII=\n'
My question is why these two are different? And how can I make them same
okay I solved it. The problem is that I should define encode_image with self. So it is easily called in another function in the class.
content = str(self.encryption())
mesaj = MIMEMultipart()
mesaj_govdesi = MIMEText(str(content), "plain")
mesaj_govdesi2 = MIMEText(str(self.encode_image), "plain")
mesaj.attach(mesaj_govdesi)
mesaj.attach(mesaj_govdesi2)
mesaj["Subject"] = self.subject_text.text() #LineEdit
try:
mail = smtplib.SMTP("smtp.gmail.com", 587)
mail.ehlo()
mail.starttls()
mail.login(self.email, self.passw) # e-mail adress and it's password
mail.sendmail(self.email,self.email_to, mesaj.as_string())

Resources