Python 3 - How to extract only email body

Python 3 - How to extract only email body - python-3.x

I have this code to extract email body, but the output show the message and some encrypted information. I need help to get only the message.
Last version i've try the lib imaplib, but i don´t have sucess because all message it´s encrypted, so i change to poplib.
As future updates i want to add Subject, date and sender
#!/usr/bin/env python3
# -- coding: utf-8 --
import email
import poplib
login = input('Email: ')
password = input('Password: ')
pop_server = 'pop-mail.outlook.com'
pop_port = 995
mail_box = poplib.POP3_SSL(pop_server, pop_port)
mail_box.user(login)
mailbox.pass_(password)
numMessages = len(mail_box.list()[1])
if numMessages > 15:
numMessages = 15
for i in range(15):
(server_msg, body, octets) = mail_box.retr(i+1)
for j in body:
try:
msg = email.message_from_string(j.decode("utf-8"))
strtext = msg.get_payload()
print(strtext)
except:
pass

If a is the raw email string:
msg = email.message_from_string(a)
if msg.is_multipart():
for part in msg.walk():
payload = part.get_payload(decode=True) #returns a bytes object
strtext = payload.decode() #utf-8 is default
print(strtext)
else:
payload = msg.get_payload(decode=True)
strtext = payload.decode()
print(strtext)

Related

Get OTP code from LiberoMail with imaplib

i need help. I would like to read the OTP code in an e-mail (specifically in the 'Social' tab from LiberoMail). I found this code on the web but I can't get it to work, I get an error: otp_split = [str(i) for i in str(otp[0])]
IndexError: list index out of range
Can you help me, please? This is the code:
def get_otp():
otp = []
# create an IMAP4 class with SSL
imap = imaplib.IMAP4_SSL("imapmail.libero.it", 993)
# authenticate
imap.login(username, password)
print('Successfully logged in')
status, messages = imap.select("Inbox")
# number of top emails to fetch
N = 1
# total number of emails
messages = int(messages[0])
for i in range(messages, messages - N, -1):
# fetch the email message by ID
res, msg = imap.fetch(str(i), "(RFC822)")
for response in msg:
if isinstance(response, tuple):
# parse a bytes email into a message object
msg = email.message_from_bytes(response[1])
if msg.is_multipart():
# iterate over email parts
for part in msg.walk():
# extract content type of email
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
try:
# get the email body
body = part.get_payload(decode=True).decode()
except:
pass
if content_type == "text/plain" and "attachment" not in content_disposition:
# print text/plain emails and skip attachments
# depending on your where your OTP is in the email, you will have to modify the string split method
body = body.split(' is your confirmation code.')[1]
body = body.split('In case if you have not tried to login')[0]
otp.append(int(body))
imap.close()
imap.logout()
return otp
# wait for OTP
time.sleep(10)
# get OTP
otp = get_otp()
otp_split = [str(i) for i in str(otp[0])]
# fills in 4 pin OTP code
for i in range(4):
otp_elem = browser.find_element_by_xpath('//*[#id="loginForm"]/div[1]/div/div[1]/input' + str([i + 1]))
otp_elem.send_keys(otp_split[i])
otp_login_elem = browser.find_element_by_xpath('XPath of submit button')
otp_login_elem.click()

How can I run a .py file in a different .py file 10 times, each time posting the variables gotten from the first python file as a message in discord

import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import base64
import time
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/gmail.modify']
def main():
"""Shows basic usage of the Gmail API.
Lists the user's Gmail labels.
"""
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
service = build('gmail', 'v1', credentials=creds)
# Call the Gmail API
repeat = 0
while repeat <= 10:
labelName = "READ-BY-SCRIPT"
LABEL_ID = 'Label_8507504117657095973'
results = service.users().messages().list(
userId='me', q="-label:"+labelName, maxResults=1).execute()
messages = results.get('messages', [])
body = []
if not messages:
repeat += 10
time.sleep(60)
else:
for message in messages:
msg = service.users().messages().get(
userId='me', id=message['id']).execute()
labels = msg['labelIds']
if "INBOX" in labels:
body.append(msg['payload']['parts'])
body = base64.urlsafe_b64decode(
body[0][0]['body']['data'])
body = str(body)
if 'b"\\r\\nHi MOHAMMAD,\\r\\n' or "b'\\r\\nHi MOHAMMAD,\\r\\n" in body:
if 'posted a new assignment in IX K \\r\\n<https://classroom.google.com/c/MTEyNDMxODgyMTE0>.' in body:
body = body.replace(
"\\r\\nIf you don\\'t want to receive emails from Classroom, you can unsubscribe \\r\\n<https://classroom.google.com/s>.\\r\\n\\r\\nGoogle LLC\\r\\n1600 Amphitheatre Pkwy\\r\\nMountain View, CA 94043 USA\\r\\n'", "")
body = body.replace("b\"", "").replace("b'", "").replace('"', '').replace(" ", ' ').replace(
" ", ' ').replace(" \\n<https://classroom.google.com/c/MTEyNDMxODgyMTE0>", "").replace("\\n", "\\n\\n")
body = body.replace("\\r\\n\\nHi MOHAMMAD,\\r\\n\\n", "").replace(" \\r\\n\\n<https://classroom.google.com/c/MTEyNDMxODgyMTE0>.\\r\\n", "").replace(
"\\r\\n\\nIf you don't want to receive emails from Classroom, you can unsubscribe \\r\\n\\n<https://classroom.google.com/s>.\\r\\n\\n\\r\\n\\nGoogle LLC\\r\\n\\n1600 Amphitheatre Pkwy\\r\\n\\nMountain View, CA 94043 USA\\r\\n\\n", "")
body = body.replace("\\r", "\r").replace(
"\\n", "\n").replace("\n\n", "\n").replace("\\\\", "\\")
body = body.replace("\\xe2", "").replace(
"\\x80", "").replace("\\x99", "").replace("\\x98", "")
body = body.replace(
"\\r\\nIf you don\\'t want to receive emails from Classroom, you can unsubscribe \\r\\n<https://classroom.google.com/s>.\\r\\n\\r\\nGoogle LLC\\r\\n1600 Amphitheatre Pkwy\\r\\nMountain View, CA 94043 USA\\r\\n'", "")
body = body.replace("\\'", "")
body = body.replace(
"\\r\\nIf you don\\'t want to receive emails from Classroom, you can unsubscribe", "")
body = body.replace(
"If you dont want to receive emails from Classroom, you can unsubscribe\n <https://classroom.google.com/s>.\nGoogle LLC\n1600 Amphitheatre Pkwy\nMountain View, CA 94043 USA\n", "")
TEACHER_NAME = body.split("posted", 1)[0]
body = body.replace(TEACHER_NAME, "")
LINK = str(body.split("\r\nOPEN \r\n<", 1)[1])
LINK = LINK[:-1]
body = body.replace(LINK, "").replace("<", "").replace(">", "").replace(
'posted a new assignment in IX K\n\r\n', "").replace("\r\nOPEN \r\n", "")
if 'Due: ' in body:
body = body.replace("\n", " ", 1)
DATE = body.split(' ')[0]
body = body.split(' ')[1]
else:
body = body
DATE = 'No Due Date Provided'
service.users().messages().modify(userId='me', id=message['id'], body={
'addLabelIds': ['Label_8507504117657095973']}).execute()
repeat += 1
else:
service.users().messages().modify(userId='me', id=message['id'], body={
'addLabelIds': [LABEL_ID]}).execute()
pass
else:
service.users().messages().modify(userId='me', id=message['id'], body={
'addLabelIds': [LABEL_ID]}).execute()
pass
else:
service.users().messages().modify(userId='me', id=message['id'], body={
'addLabelIds': ['Label_8507504117657095973']}).execute()
pass
if __name__ == '__main__':
main()
This script gets the most recent unread email from gmail. If there are no new emails then it instantly ends the script. If there is an email that isn’t from google classroom it marks that email as read and then repeats the process again until a there are either no new unread emails or if an email from google classroom is found. If there is a new email from google classroom it gets the teacher’s name [TEACHER_NAME], link to the assignment [LINK], due date [DATE], and details of the assignment [body] and then ends the script
I want to make a different python file (bot.py) that runs another python file (gmail.py) containing the above script, 10 times. Each time gmail.py is run, bot.py via discord.py sends each of these variables in different messages if they are defined by gmail.py, and to do nothing if they are not defined by gmail.py. bot.py then waits 1 minute before repeating the entire process over again. How can this be done? Where should I start?

You don't really 'run' the python file, you should rather import the file (or the function from the file) and run the function however many times fit using a for loop.

How to listen for incoming emails in python 3?

With the objective of having an application that runs in python 3 and reads incoming emails on an specific gmail account, how would one listen for the reception of this emails?
What it should do is wait until a new mail is received on the inbox, read the subject and body from the email and get the text from the body (without format).
This is what I got so far:
import imaplib
import email
import datetime
import time
mail = imaplib.IMAP4_SSL('imap.gmail.com', 993)
mail.login(user, password)
mail.list()
mail.select('inbox')
status, data = mail.search(None, 'ALL')
for num in data[0].split():
status, data = mail.fetch(num, '(RFC822)')
email_msg = data[0][1]
email_msg = email.message_from_bytes(email_msg)
maintype = email_msg.get_content_maintype()
if maintype == 'multipart':
for part in email_msg.get_payload():
if part.get_content_maintype() == 'text':
print(part.get_payload())
elif maintype == 'text':
print(email_msg.get_payload())
But this has a couple of problems: When the message is multipart each part is printed and sometimes after that the last part is basically the whole message but in html format.
Also, this prints all the messages from the inbox, how would one listen for new emails with imaplib? or with other library.

I'm not sure about the synchronous way of doing that, but if you don't mind having an async loop and defining unread emails as your target then it could work.
(I didn't implement the IMAP polling loop, only the email fetching loop)
My changes
Replace the IMAP search filter from 'ALL' to '(UNSEEN)' to fetch unread emails.
Change the serializing policy to policy.SMTP from the default policy.Compat32.
Use the email.message.walk() method (new API) to run & filter message parts.
Replace the legacy email API calls with the new ones as described in the docs, and demonstrated in these examples.
The result code
import imaplib, email, getpass
from email import policy
imap_host = 'imap.gmail.com'
imap_user = 'example#gmail.com'
# init imap connection
mail = imaplib.IMAP4_SSL(imap_host, 993)
rc, resp = mail.login(imap_user, getpass.getpass())
# select only unread messages from inbox
mail.select('Inbox')
status, data = mail.search(None, '(UNSEEN)')
# for each e-mail messages, print text content
for num in data[0].split():
# get a single message and parse it by policy.SMTP (RFC compliant)
status, data = mail.fetch(num, '(RFC822)')
email_msg = data[0][1]
email_msg = email.message_from_bytes(email_msg, policy=policy.SMTP)
print("\n----- MESSAGE START -----\n")
print("From: %s\nTo: %s\nDate: %s\nSubject: %s\n\n" % ( \
str(email_msg['From']), \
str(email_msg['To']), \
str(email_msg['Date']), \
str(email_msg['Subject'] )))
# print only message parts that contain text data
for part in email_msg.walk():
if part.get_content_type() == "text/plain":
for line in part.get_content().splitlines():
print(line)
print("\n----- MESSAGE END -----\n")

Have you check below script (3_emailcheck.py) from here posted by git user nickoala? Its a python 2 script and in Python3 you need to decode the bytes with the email content first.
import time
from itertools import chain
import email
import imaplib
imap_ssl_host = 'imap.gmail.com' # imap.mail.yahoo.com
imap_ssl_port = 993
username = 'USERNAME or EMAIL ADDRESS'
password = 'PASSWORD'
# Restrict mail search. Be very specific.
# Machine should be very selective to receive messages.
criteria = {
'FROM': 'PRIVILEGED EMAIL ADDRESS',
'SUBJECT': 'SPECIAL SUBJECT LINE',
'BODY': 'SECRET SIGNATURE',
}
uid_max = 0
def search_string(uid_max, criteria):
c = list(map(lambda t: (t[0], '"'+str(t[1])+'"'), criteria.items())) + [('UID', '%d:*' % (uid_max+1))]
return '(%s)' % ' '.join(chain(*c))
# Produce search string in IMAP format:
# e.g. (FROM "me#gmail.com" SUBJECT "abcde" BODY "123456789" UID 9999:*)
def get_first_text_block(msg):
type = msg.get_content_maintype()
if type == 'multipart':
for part in msg.get_payload():
if part.get_content_maintype() == 'text':
return part.get_payload()
elif type == 'text':
return msg.get_payload()
server = imaplib.IMAP4_SSL(imap_ssl_host, imap_ssl_port)
server.login(username, password)
server.select('INBOX')
result, data = server.uid('search', None, search_string(uid_max, criteria))
uids = [int(s) for s in data[0].split()]
if uids:
uid_max = max(uids)
# Initialize `uid_max`. Any UID less than or equal to `uid_max` will be ignored subsequently.
server.logout()
# Keep checking messages ...
# I don't like using IDLE because Yahoo does not support it.
while 1:
# Have to login/logout each time because that's the only way to get fresh results.
server = imaplib.IMAP4_SSL(imap_ssl_host, imap_ssl_port)
server.login(username, password)
server.select('INBOX')
result, data = server.uid('search', None, search_string(uid_max, criteria))
uids = [int(s) for s in data[0].split()]
for uid in uids:
# Have to check again because Gmail sometimes does not obey UID criterion.
if uid > uid_max:
result, data = server.uid('fetch', uid, '(RFC822)') # fetch entire message
msg = email.message_from_string(data[0][1])
uid_max = uid
text = get_first_text_block(msg)
print 'New message :::::::::::::::::::::'
print text
server.logout()
time.sleep(1)

How can I return a string from a Google BigQuery row iterator object?

My task is to write a Python script that can take results from BigQuery and email them out. I've written a code that can successfully send an email, but I am having trouble including the results of the BigQuery script in the actual email. The query results are correct, but the actual object I am returning from the query (results) always returns as a Nonetype.
For example, the email should look like this:
Hello,
You have the following issues that have been "open" for more than 7 days:
-List issues here from bigquery code
Thanks.
The code reads in contacts from a contacts.txt file, and it reads in the email message template from a message.txt file. I tried to make the bigquery object into a string, but it still results in an error.
from google.cloud import bigquery
import warnings
warnings.filterwarnings("ignore", "Your application has authenticated using end user credentials")
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from string import Template
def query_emailtest():
client = bigquery.Client(project=("analytics-merch-svcs-thd"))
query_job = client.query("""
select dept, project_name, reset, tier, project_status, IssueStatus, division, store_number, top_category,
DATE_DIFF(CURRENT_DATE(), in_review, DAY) as days_in_review
from `analytics-merch-svcs-thd.MPC.RESET_DETAILS`
where in_review IS NOT NULL
AND IssueStatus = "In Review"
AND DATE_DIFF(CURRENT_DATE(), in_review, DAY) > 7
AND ready_for_execution IS NULL
AND project_status = "Active"
AND program_name <> "Capital"
AND program_name <> "SSI - Capital"
LIMIT 50
""")
results = query_job.result() # Waits for job to complete.
return results #THIS IS A NONETYPE
def get_queryresults(results): #created new method to put query results into a for loop and store it in a variable
for i,row in enumerate(results,1):
bq_data = (i , '. ' + str(row.dept) + " " + row.project_name + ", Reset #: " + str(row.reset) + ", Store #: " + str(row.store_number) + ", " + row.IssueStatus + " for " + str(row.days_in_review)+ " days")
print (bq_data)
def get_contacts(filename):
names = []
emails = []
with open(filename, mode='r', encoding='utf-8') as contacts_file:
for a_contact in contacts_file:
names.append(a_contact.split()[0])
emails.append(a_contact.split()[1])
return names, emails
def read_template(filename):
with open(filename, 'r', encoding='utf-8') as template_file:
template_file_content = template_file.read()
return Template(template_file_content)
names, emails = get_contacts('mycontacts.txt') # read contacts
message_template = read_template('message.txt')
results = query_emailtest()
bq_results = get_queryresults(query_emailtest())
import smtplib
# set up the SMTP server
s = smtplib.SMTP(host='smtp-mail.outlook.com', port=587)
s.starttls()
s.login('email', 'password')
# For each contact, send the email:
for name, email in zip(names, emails):
msg = MIMEMultipart() # create a message
# bq_data = get_queryresults(query_emailtest())
# add in the actual person name to the message template
message = message_template.substitute(PERSON_NAME=name.title())
message = message_template.substitute(QUERY_RESULTS=bq_results) #SUBSTITUTE QUERY RESULTS IN MESSAGE TEMPLATE. This is where I am having trouble because the Row Iterator object results in Nonetype.
# setup the parameters of the message
msg['From']='email'
msg['To']='email'
msg['Subject']="This is TEST"
# body = str(get_queryresults(query_emailtest())) #get query results from method to put into message body
# add in the message body
# body = MIMEText(body)
#msg.attach(body)
msg.attach(MIMEText(message, 'plain'))
# query_emailtest()
# get_queryresults(query_emailtest())
# send the message via the server set up earlier.
s.send_message(msg)
del msg
Message template:
Dear ${PERSON_NAME},
Hope you are doing well. Please find the following alert for Issues that have been "In Review" for greater than 7 days.
${QUERY_RESULTS}
If you would like more information, please visit this link that contains a complete dashboard view of the alert.
ISE Services

The BQ result() function returns a generator, so I think you need to change your return to yield from.
I'm far from a python expert, but the following pared-down code worked for me.
from google.cloud import bigquery
import warnings
warnings.filterwarnings("ignore", "Your application has authenticated using end user credentials")
def query_emailtest():
client = bigquery.Client(project=("my_project"))
query_job = client.query("""
select field1, field2 from `my_dataset.my_table` limit 5
""")
results = query_job.result()
yield from results # NOTE THE CHANGE HERE
results = query_emailtest()
for row in results:
print(row.field1, row.field2)

Parsing Attached .MSG Files with Python3

I'm trying to monitor a phishing inbox that could receive both normal emails (i.e. HTML/text based with potential attachments) as well as emails that have a .MSG file attached to it.
The goal is to have users send emails to phishing#company.com and once I parse out the various links (potentially malicious) as well as attachments (also potentially malicious, I'll perform some analysis on them.
The issue I'm running into is the body of the .msg file that is attached.
With the code below, I'm able to pull the to, from, subject, and all links within the original email. It also pulls down any attachments with the .msg file (i.e. on my test I was able to pull down a PDF within the .msg). However, I cannot get any of the to, from, subject, or body of the .msg file.
When I print it out as raw I get some of it in a very ugly format, but apparently with the multi-parts, I'm doing something wrong to get that piece of information.
I'm fairly new to Python so any help would be greatly appreciated.
import imaplib
import base64
import os
import email
from bs4 import BeautifulSoup
server = 'mail.server.com'
email_user = 'phishing#company.com'
email_pass = 'XXXXXXXXXXXX'
output_dir = '/tmp/attachments/'
body = ""
def get_body(msg):
if msg.is_multipart():
return get_body(msg.get_payload(0))
else:
return msg.get_payload(None, True)
def get_attachments(msg):
for part in msg.walk():
if part.get_content_maintype()=='multipart':
continue
if part.get('Content-Disposition') is None:
continue
fileName = part.get_filename()
if bool(fileName):
filePath = os.path.join(output_dir, fileName)
with open(filePath,'wb') as f:
f.write(part.get_payload(decode=True))
mail = imaplib.IMAP4_SSL(server)
mail.login(email_user, email_pass)
mail.select('INBOX')
result, data = mail.search(None, 'UNSEEN')
mail_ids = data[0]
id_list = mail_ids.split()
print(id_list)
for emailid in id_list:
result, email_data = mail.fetch(emailid, '(RFC822)')
raw_email = email_data[0][1]
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
email_from = str(email.header.make_header(email.header.decode_header(email_message['From'])))
email_to = str(email.header.make_header(email.header.decode_header(email_message['To'])))
subject = str(email.header.make_header(email.header.decode_header(email_message['Subject'])))
print('From: ' + email_from)
print('To: ' + email_to)
print('Subject: ' + subject)
get_attachments(raw_email)
for part in email_message.walk():
body = part.get_payload(0)
content = body.get_payload(decode=True)
soup = BeautifulSoup(content, 'html.parser')
for link in soup.find_all('a'):
print('Link: ' + link.get('href'))
break

I got this working with the following code. I basically had to do multiple for loops within the .msg walk and then only pull out the relevant information within the text/html sections.
for emailid in id_list:
result, data = mail.fetch(emailid, '(RFC822)')
raw = email.message_from_bytes(data[0][1])
get_attachments(raw)
#print(raw)
header_from = mail.fetch(emailid, "(BODY[HEADER.FIELDS (FROM)])")
header_from_str = str(header_from)
mail_from = re.search('From:\s.+<(\S+)>', header_from_str)
header_subject = mail.fetch(emailid, "(BODY[HEADER.FIELDS (SUBJECT)])")
header_subject_str = str(header_subject)
mail_subject = re.search('Subject:\s(.+)\'\)', header_subject_str)
#mail_body = mail.fetch(emailid, "(BODY[TEXT])")
print(mail_from.group(1))
print(mail_subject.group(1))
for part in raw.walk():
if part.get_content_type() == 'message/rfc822':
part_string = str(part)
original_from = re.search('From:\s.+<(\S+)>\n', part_string)
original_to = re.search('To:\s.+<(\S+)>\n', part_string)
original_subject = re.search('Subject:\s(.+)\n', part_string)
print(original_from.group(1))
print(original_to.group(1))
print(original_subject.group(1))
if part.get_content_type() == 'text/html':
content = part.get_payload(decode=True)
#print(content)
soup = BeautifulSoup(content, 'html.parser')
for link in soup.find_all('a'):
print('Link: ' + link.get('href'))

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Python 3 - How to extract only email body - python-3.x

Related

Get OTP code from LiberoMail with imaplib

How can I run a .py file in a different .py file 10 times, each time posting the variables gotten from the first python file as a message in discord

How to listen for incoming emails in python 3?

How can I return a string from a Google BigQuery row iterator object?

Parsing Attached .MSG Files with Python3

Categories

Resources