Found an article about reading email with Python. The sender of the letter and the subject are found but there are problems with body of the mail - it is not found. Maybe there are some other ways to read body?
import imaplib
import email
from email.header import decode_header
import webbrowser
import os
# account credentials
username = "username#mail.ru"
password = "password"
def clean(text):
return "".join(c if c.isalnum() else "_" for c in text)
imap = imaplib.IMAP4_SSL("imap.mail.ru")
imap.login(username, password)
# it is spam folder id
status, messages = imap.select("&BCEEPwQwBDw-")
messages = int(messages[0])
for i in range(messages, 0, -1):
res, msg = imap.fetch(str(i), "(RFC822)")
for response in msg:
if isinstance(response, tuple):
msg = email.message_from_bytes(response[1])
subject, encoding = decode_header(msg["Subject"])[0]
if isinstance(subject, bytes):
subject = subject.decode(encoding)
From, encoding = decode_header(msg.get("From"))[0]
if isinstance(From, bytes):
From = From.decode(encoding)
print("Subject:", subject)
print("From:", From)
if msg.is_multipart():
for part in msg.walk():
# extract content type of email
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
try:
body = part.get_payload(decode=True).decode()
except:
pass
if content_type == "text/plain" and "attachment" not in content_disposition:
print(body)
elif "attachment" in content_disposition:
filename = part.get_filename()
if filename:
folder_name = clean(subject)
if not os.path.isdir(folder_name):
os.mkdir(folder_name)
filepath = os.path.join(folder_name, filename)
open(filepath, "wb").write(part.get_payload(decode=True))
else:
content_type = msg.get_content_type()
body = msg.get_payload(decode=True).decode()
if content_type == "text/plain":
print(body)
if content_type == "text/html":
folder_name = clean(subject)
if not os.path.isdir(folder_name):
os.mkdir(folder_name)
filename = "index.html"
filepath = os.path.join(folder_name, filename)
open(filepath, "w").write(body)
webbrowser.open(filepath)
imap.close()
imap.logout()
For some reason, the body is not located and because of this, an error appears.
NameError: name 'body' is not defined
body is not defined because of this:
try:
body = part.get_payload(decode=True).decode()
except:
pass
You tried to define body, but failed due to some kind of error that you allowed to pass silently (hint: don't do that!). Then the next blocks of code assumed that body had already been assigned when it hadn't.
Solution: define body outside of the try/except clause:
body = None
try:
body = part.get_payload(decode=True).decode()
except:
# pass
import traceback
traceback.print_exception()
# this way you'll at least know what the error was
Related
I was able to locate the JSON file I want to download from attachment using :
part.get_content_type() == "application/json"
But have no idea on how to actually download and save it to local directory can someone pls help?
Here is the whole method:
#based on Python example from
#https://github.com/codingforentrepreneurs/30-Days-of-Python/blob/master/tutorial-reference/Day%209/inbox.py
import imaplib
host = 'imap.gmail.com' #inbox
def get_inbox(tempList):
mail = imaplib.IMAP4_SSL(host) #server
mail.login(tempList[0], tempList[2]) #login user name, user pass
mail.select("inbox") #defualt inbox
_, search_data = mail.search(None, 'UNSEEN')
my_message = []
for num in search_data[0].split():
email_data = {}
_, data = mail.fetch(num, '(RFC822)') #getting the msg data from gmail
_, b = data[0] #data in bytes
email_message = email.message_from_bytes(b)
for part in email_message.walk():
if part.get_content_type() == "application/json":
pass
my_message.append(email_data)
return my_message
if the attachment is in another file type
just change the json in
if part.get_content_type() == "application/json"
to the file type you want to downlaod
I have a problem to scrape gmail.
Candidates should extract or pull information relating to financial transactions from Gmail. The information could be invoices, subscription alerts, bills, etc. We want you to connect with a Gmail account and scrape or pull data of invoices, subscriptions, upcoming bills. You can scrape the emails for words like upcoming invoice or subscription or invoice etc and pull the
amount, date, attachment if any all these details.
I have to collect information and also store all the attachments. Is there any specific simple way to do it?
my Code
import imaplib
import os
import email, getpass
import sys
import json
class GmailFinin():
def helloWorld(self):
print("\nHello I'm here to help you")
def initializeVariables(self):
self.usr = ""
self.pwd = ""
self.mail = object
self.mailbox = ""
self.mailCount = 0
self.destFolder = ""
self.data = []
self.ids = []
self.idsList = []
def getLogin(self):
print("\nPlease enter your Gmail login details below.")
self.usr = input("Email: ")
# self.pwd = input("Password: ")
self.pwd = getpass.getpass("Enter your password --> ")
def attemptLogin(self):
self.mail = imaplib.IMAP4_SSL("imap.gmail.com", 993)
if self.mail.login(self.usr, self.pwd):
print("\nLogon SUCCESSFUL")
self.destFolder = input("\nPlease choose a destination folder in the form of /Users/username/dest/ (do not forget trailing slash!): ")
if not self.destFolder.endswith("/"): self.destFolder+="/"
return True
else:
print("\nLogon FAILED")
return False
def checkIfUsersWantsToContinue(self):
print("\nWe have found "+str(self.mailCount)+" emails in the mailbox "+self.mailbox+".")
return True if input("Do you wish to continue extracting all the emails into "+self.destFolder+"? (y/N) ").lower().strip()[:1] == "y" else False
def selectMailbox(self):
# self.mailbox = input("\nPlease type the name of the mailbox you want to extract, e.g. Inbox: ")
self.mailbox = "Inbox"
bin_count = self.mail.select(self.mailbox)[1]
self.mailCount = int(bin_count[0].decode("utf-8"))
return True if self.mailCount > 0 else False
def searchThroughMailbox(self):
type, self.data = self.mail.search(None, "ALL")
self.ids = self.data[0]
self.idsList = self.ids.split()
def parseEmails(self):
jsonOutput = {}
for anEmail in self.data[0].split():
type, self.data = self.mail.fetch(anEmail, '(UID RFC822)')
raw = self.data[0][1]
try:
raw_str = raw.decode("utf-8")
except UnicodeDecodeError:
try:
raw_str = raw.decode("ISO-8859-1") # ANSI support
except UnicodeDecodeError:
try:
raw_str = raw.decode("ascii") # ASCII ?
except UnicodeDecodeError:
pass
msg = email.message_from_string(raw_str)
jsonOutput['subject'] = msg['subject']
jsonOutput['from'] = msg['from']
jsonOutput['date'] = msg['date']
raw = self.data[0][0]
raw_str = raw.decode("utf-8")
uid = raw_str.split()[2]
# Body #
if msg.is_multipart():
for part in msg.walk():
partType = part.get_content_type()
## Get Body ##
if partType == "text/plain" and "attachment" not in part:
jsonOutput['body'] = part.get_payload()
## Get Attachments ##
if part.get('Content-Disposition') is not None:
attchName = part.get_filename()
print(attchName)
if bool(attchName):
attchFilePath = str(self.destFolder)+str(uid)+str("/")+str(attchName)
print(attchFilePath)
os.makedirs(os.path.dirname(attchFilePath), exist_ok=True)
with open(attchFilePath, "wb") as f:
f.write(part.get_payload(decode=True))
else:
# jsonOutput['body'] = msg.get_payload(decode=True).decode("utf-8") # Non-multipart email, perhaps no attachments or just text.
jsonOutput['body'] = msg.get_payload()
outputDump = json.dumps(jsonOutput)
emailInfoFilePath = str(self.destFolder)+str(uid)+str("/")+str(uid)+str(".json")
os.makedirs(os.path.dirname(emailInfoFilePath), exist_ok=True)
print(emailInfoFilePath)
with open(emailInfoFilePath, "w") as f:
f.write(outputDump)
def __init__(self):
self.initializeVariables()
self.helloWorld()
self.getLogin()
if self.attemptLogin():
not self.selectMailbox() and sys.exit()
else:
sys.exit()
not self.checkIfUsersWantsToContinue() and sys.exit()
self.searchThroughMailbox()
self.parseEmails()
if __name__ == "__main__":
run = GmailFinin()
I have tried using below for search, but I don't think i.e optimal because it is searching only in subject and How to add multiple or condition for list of keywords.
type, self.data = self.mail.search(None, '(OR TEXT "bill" SUBJECT "bill")')
Iam getting this error while trying to download a attachment file from gmail account.
fp = open(filename, 'wb')
TypeError: expected str, bytes or os.PathLike object, not NoneType
Earlier i could make fluent downloads from this script but now having this error. Please help me out what's wrong with the script?
import imaplib, email
import io
#log in and select the inbox
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login('youmailid#gmail.com', 'yourpassword')
mail.select('testing')
#get uids of all messages
result, data = mail.uid('search', None, 'ALL')
uids = data[0].split()
print (uids)
#read the lastest message
result, data = mail.uid('fetch', uids[-1], '(RFC822)')
m = email.message_from_string(data[0][1].decode())
if m.get_content_maintype() == 'multipart':
for part in m.walk():
if part.get_content_maintype() == 'multipart': continue
if part.get('Content-Disposition') is None: continue
filename = part.get_filename()
fp = open(filename, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
print (f'{filename} saved!')
Looks like filename is None
Add the condition
if filename is None: continue
after the line
filename = part.get_filename()
I've tried to encode an image with this method :
def resim_ac(self):
dosya_ismi = QFileDialog.getOpenFileName(self, "Resim Aç", os.getenv("HOME"))
with open(dosya_ismi[0], "rb") as file:
image = file.read()
encode_image = base64.encodestring(image)
if str(encode_image) != "":
return str(encode_image)
else:
return ""
encoded_image equals to image: b'iVBORw0KGgoAAAANSUhEUgAAAgAAAAI...BlLm9yZ5vuPBoAAAAASUVORK5CYII=\n'(It is encoded image)
Whenever I try to send this code with email with this method:
mesaj = MIMEMultipart()
mesaj_govdesi2 = MIMEText(str(self.resim_ac), "plain")
mesaj.attach(mesaj_govdesi2)
mesaj["Subject"] = self.subject_text.text() #LineEdit
mail = smtplib.SMTP("smtp.gmail.com", 587)
mail.ehlo()
mail.starttls()
mail.login(self.email, self.passw) # e-mail adress and it's password
mail.sendmail(self.email,self.email_to, mesaj.as_string())
print("Mail Sended....")
mail.close()
It gives me this
rather than b'iVBORw0KGgoAAAANSUhEUgAAAgAAAAI...BlLm9yZ5vuPBoAAAAASUVORK5CYII=\n'
My question is why these two are different? And how can I make them same
okay I solved it. The problem is that I should define encode_image with self. So it is easily called in another function in the class.
content = str(self.encryption())
mesaj = MIMEMultipart()
mesaj_govdesi = MIMEText(str(content), "plain")
mesaj_govdesi2 = MIMEText(str(self.encode_image), "plain")
mesaj.attach(mesaj_govdesi)
mesaj.attach(mesaj_govdesi2)
mesaj["Subject"] = self.subject_text.text() #LineEdit
try:
mail = smtplib.SMTP("smtp.gmail.com", 587)
mail.ehlo()
mail.starttls()
mail.login(self.email, self.passw) # e-mail adress and it's password
mail.sendmail(self.email,self.email_to, mesaj.as_string())
I currently use this code to Extract attachments from EML files. And I wanted to know if I can link the attachment to mail (EML file). That is, add the eml file name as the attachment name prefix.
So I can know the attachment belongs to what mail.
Thank You
import os, re
import email
import argparse
import olefile
def extractAttachment(msg, eml_files, output_path):
#print len(msg.get_payload())
#print msg.get_payload()
if len(msg.get_payload()) > 2:
if isinstance(msg.get_payload(), str):
try:
extractOLEFormat(eml_files, output_path)
except IOError:
#print 'Could not process %s. Try manual extraction.' % (eml_files)
#print '\tHeader of file: %s\n' % (msg.get_payload()[:8])
pass
elif isinstance(msg.get_payload(), list):
count = 0
while count < len(msg.get_payload()):
payload = msg.get_payload()[count]
#récupérer les pièces jointes
filename = payload.get_filename()
#os.rename(filename,'rrrrr'+filename)
#filename=os.path.join(str(filename), str(eml_files))
if filename is not None:
try:
magic = payload.get_payload(decode=True)[:4]
except TypeError:
magic = "None"
# Print the magic deader and the filename for reference.
printIT(eml_files, magic, filename)
# Write the payload out.
writeFile(filename, payload, output_path)
count += 1
elif len(msg.get_payload()) == 2:
payload = msg.get_payload()[1]
filename = payload.get_filename()
try:
magic = payload.get_payload(decode=True)[:4]
except TypeError:
magic = "None"
# Print the magic deader and the filename for reference.
printIT(eml_files, magic, filename)
# Write the payload out.
writeFile(filename, payload, output_path)
elif len(msg.get_payload()) == 1:
attachment = msg.get_payload()[0]
payload = attachment.get_payload()[1]
filename = attachment.get_payload()[1].get_filename()
try:
magic = payload.get_payload(decode=True)[:4]
except TypeError:
magic = "None"
# Print the magic deader and the filename for reference.
printIT(eml_files, magic, filename)
# Write the payload out.
writeFile(filename, payload, output_path)
#else:
# print 'Could not process %s\t%s' % (eml_files, len(msg.get_payload()))
def extractOLEFormat(eml_files, output_path):
data = '__substg1.0_37010102'
filename = olefile.OleFileIO(eml_files)
msg = olefile.OleFileIO(eml_files)
attachmentDirs = []
for directories in msg.listdir():
if directories[0].startswith('__attach') and directories[0] not in attachmentDirs:
attachmentDirs.append(directories[0])
for dir in attachmentDirs:
filename = [dir, data]
if isinstance(filename, list):
filenames = "/".join(filename)
filename = msg.openstream(dir + '/' + '__substg1.0_3707001F').read().replace('\000', '')
payload = msg.openstream(filenames).read()
magic = payload[:4]
# Print the magic deader and the filename for reference.
printIT(eml_files, magic, filename)
# Write the payload out.
writeOLE(filename, payload, output_path)
#filename = str(eml_files)+"--"+str(filename)
def printIT(eml_files, magic, filename):
filename = str(eml_files)+"--"+str(filename)
print ('Email Name: %s\n\tMagic: %s\n\tSaved File as: %s\n' % (eml_files, magic, filename))
def writeFile(filename, payload, output_path):
filename = str(eml_files)+"--"+str(filename)
try:
file_location = output_path + filename
open(os.path.join(file_location), 'wb').write(payload.get_payload(decode=True))
except (TypeError, IOError):
pass
def writeOLE(filename, payload, output_path):
open(os.path.join(output_path + filename), 'wb')
def main():
parser = argparse.ArgumentParser(description='Attempt to parse the attachment from EML messages.')
parser.add_argument('-p', '--path',default='C:\\Users\\hamd\\Desktop\\TEX\\emails' ,help='eml')#Path to EML files
parser.add_argument('-o', '--out', default='C:\\Users\\hamd\\Desktop\\TEX\\PJ\\eml_files\\',help='pj')#Path to write attachments to.
args = parser.parse_args()
if args.path:
input_path = args.path
else:
print ("You need to specify a path to your EML files.")
exit(0)
if args.out:
output_path = args.out
else:
print ("You need to specify a path to write your attachments to.")
exit(0)
for root, subdirs, files in os.walk(input_path):
for file_names in files:
eml_files = os.path.join(root, file_names)
msg = email.message_from_file(open(eml_files))
extractAttachment(msg, eml_files, output_path)
if __name__ == "__main__":
main()
I tried to write this as a comment but is was too long. I won't give a full blown solution, but I'll explain the idea.
A possible solution would be to create an hard link to the extracted attachment, giving to the hard link the same name of EML file. You can append an incremental suffix if you have more attachments in the same EML file:
whatever.eml (original email file)
whatever_001.attch (hard link to first extracted attachment)
whatever_002.attch (hard link to second extracted attachment)
...
This way:
you are free to move the extracted attachment anywhere else (but in the same disk, because hard links by definition work only on the same disk)
you can keep a copy of the attachment (the hard link) together with the EML file without consuming disk space
in case the extracted file is deleted you have a backup copy of the attachment (the hard links) without consuming disk space
In Python you can create an hard link simply with:
import os
os.link(existing_target_file, new_link_name)