I would like to check if the mail i get have an .msg attachment file, so when i try to parse my mailbox, it's recognized as an mail and not as an attachment file
I tried to get every type of attachment file, but it doesn't work
mBoxes = mail.list()
mail.select("INBOX")
result, data = mail.search(None, "ALL")
ids = data[0].split() # data is a list.
for email_id in ids:
fstab.write("\n")
result, data = mail.fetch(email_id, "(RFC822)") # fetch the email body (RFC822) for the given ID
email_body=data[0][1]
listattachments=[]
m = email.message_from_bytes(email_body)
for part in m.walk():
if bool(fileName):
filePath = os.path.join("/home/nguyen/Documents/SPAMS/", 'attachments', str(len(listfilename))+fileName )
if not os.path.isfile(filePath) and ".jpg" not in fileName and ".p7s" not in fileName and ".png" not in fileName and "#" not in fileName :
listfilename.append(fileName)
listattachments.append(fileName) #Check number of attachments per mail
fstab.write(filePath+"\n")
fp = open(filePath, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
I expect to get .msg file attachment too, but the script read the file attachment as a mail directly
Related
The section of Python code below is working for me to send a single attachment to an email when FileList = "/Users/jamescook/Documents/MailTest/MC70165.pdf".
with open(FileList, "rb") as attachment:
part = MIMEBase("application", "octet-stream")
part.set_payload(attachment.read())
encoders.encode_base64(part)
part.add_header(
"Content-Disposition",
f"attachment; filename= {FileList}",
)
msg.attach(part)
text = message.as_string()
But I need to sometimes have multiple attachments. Between indenting and where a loop would end, I've been unable to successfully loop through when FileList = '/Users/jamescook/Documents/MailTest/MC70165.pdf', '/Users/jamescook/Documents/MailTest/MT40125.pdf','/Users/jamescook/Documents/MailTest/ReadMe.txt'.
I am struggling to download one particular pdf file(having watermark) as an email attachment? I can send it to your email, if you give me your email address?
I tried below piece:-
fp = open(mail.filePath, 'wb')
body = mail.part.get_payload(decode = True)
file_data = base64.encodestring(body).decode()
file_data = file_data.encode('UTF-8')
#file_data = base64.urlsafe_b64decode(mail.part.get_payload(decode=True).encode('UTF-8'))
fp.write(file_data)
fp.close()
Try to use high level lib.
from imap_tools import MailBox
# get all attachments from INBOX and save them to files
with MailBox('imap.my.ru').login('acc', 'pwd', 'INBOX') as mailbox:
for msg in mailbox.fetch():
for att in msg.attachments:
print(att.filename, att.content_type)
with open('C:/1/{}'.format(att.filename), 'wb') as f:
f.write(att.payload)
https://github.com/ikvk/imap_tools
I'm trying to monitor a phishing inbox that could receive both normal emails (i.e. HTML/text based with potential attachments) as well as emails that have a .MSG file attached to it.
The goal is to have users send emails to phishing#company.com and once I parse out the various links (potentially malicious) as well as attachments (also potentially malicious, I'll perform some analysis on them.
The issue I'm running into is the body of the .msg file that is attached.
With the code below, I'm able to pull the to, from, subject, and all links within the original email. It also pulls down any attachments with the .msg file (i.e. on my test I was able to pull down a PDF within the .msg). However, I cannot get any of the to, from, subject, or body of the .msg file.
When I print it out as raw I get some of it in a very ugly format, but apparently with the multi-parts, I'm doing something wrong to get that piece of information.
I'm fairly new to Python so any help would be greatly appreciated.
import imaplib
import base64
import os
import email
from bs4 import BeautifulSoup
server = 'mail.server.com'
email_user = 'phishing#company.com'
email_pass = 'XXXXXXXXXXXX'
output_dir = '/tmp/attachments/'
body = ""
def get_body(msg):
if msg.is_multipart():
return get_body(msg.get_payload(0))
else:
return msg.get_payload(None, True)
def get_attachments(msg):
for part in msg.walk():
if part.get_content_maintype()=='multipart':
continue
if part.get('Content-Disposition') is None:
continue
fileName = part.get_filename()
if bool(fileName):
filePath = os.path.join(output_dir, fileName)
with open(filePath,'wb') as f:
f.write(part.get_payload(decode=True))
mail = imaplib.IMAP4_SSL(server)
mail.login(email_user, email_pass)
mail.select('INBOX')
result, data = mail.search(None, 'UNSEEN')
mail_ids = data[0]
id_list = mail_ids.split()
print(id_list)
for emailid in id_list:
result, email_data = mail.fetch(emailid, '(RFC822)')
raw_email = email_data[0][1]
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
email_from = str(email.header.make_header(email.header.decode_header(email_message['From'])))
email_to = str(email.header.make_header(email.header.decode_header(email_message['To'])))
subject = str(email.header.make_header(email.header.decode_header(email_message['Subject'])))
print('From: ' + email_from)
print('To: ' + email_to)
print('Subject: ' + subject)
get_attachments(raw_email)
for part in email_message.walk():
body = part.get_payload(0)
content = body.get_payload(decode=True)
soup = BeautifulSoup(content, 'html.parser')
for link in soup.find_all('a'):
print('Link: ' + link.get('href'))
break
I got this working with the following code. I basically had to do multiple for loops within the .msg walk and then only pull out the relevant information within the text/html sections.
for emailid in id_list:
result, data = mail.fetch(emailid, '(RFC822)')
raw = email.message_from_bytes(data[0][1])
get_attachments(raw)
#print(raw)
header_from = mail.fetch(emailid, "(BODY[HEADER.FIELDS (FROM)])")
header_from_str = str(header_from)
mail_from = re.search('From:\s.+<(\S+)>', header_from_str)
header_subject = mail.fetch(emailid, "(BODY[HEADER.FIELDS (SUBJECT)])")
header_subject_str = str(header_subject)
mail_subject = re.search('Subject:\s(.+)\'\)', header_subject_str)
#mail_body = mail.fetch(emailid, "(BODY[TEXT])")
print(mail_from.group(1))
print(mail_subject.group(1))
for part in raw.walk():
if part.get_content_type() == 'message/rfc822':
part_string = str(part)
original_from = re.search('From:\s.+<(\S+)>\n', part_string)
original_to = re.search('To:\s.+<(\S+)>\n', part_string)
original_subject = re.search('Subject:\s(.+)\n', part_string)
print(original_from.group(1))
print(original_to.group(1))
print(original_subject.group(1))
if part.get_content_type() == 'text/html':
content = part.get_payload(decode=True)
#print(content)
soup = BeautifulSoup(content, 'html.parser')
for link in soup.find_all('a'):
print('Link: ' + link.get('href'))
Working on a Python 3 win32com.client script that only searches for emails from a specific sender and downloads 1 out of multiple attachments.
The issue I have is that in the instances where an email has two attachments, it tries to download and rename both, which overwrites the first file that I want with the second attachment.
The file attachment has a specific filename but there is another attachment with a similar name.
So far I have:
import win32com.client
import os
mydesktop = os.path.expanduser('~') + '/Desktop/'
outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
# Select main Inbox
inbox = outlook.GetDefaultFolder(6)
messages = inbox.Items
sender = 'mysender#domain'
MyDailyfolder = mydesktop + 'My Daily Data/'
try:
for message in messages:
msg_date = message.SentOn.strftime('%Y-%m-%d')
try:
s = message.sender
s = str(s)
if s == sender:
for att in message.Attachments:
if "Dashboard2_dashboard2" in att.FileName: #<---This doesn't work.
outfile_name2 = 'MycustomName' + msg_date + '.csv'
outfile_path2 = MyDailyfolder + outfile_name2
if not os.path.exists(MyDailyfolder): os.makedirs(MyDailyfolder)
# save file
att.SaveASFile(outfile_path2)
print('Saved file:', outfile_name2)
except:
x=1
except:
x=1
The result downloads all of the attached files in an email to a new folder on my desktop but it overwrites each copy. I'm trying to select only the attachment that contains "Dashboard2dashboard2" on it. I think I have to use "for part in msg.walk():" but have never used that command before. Any ideas?
Ah the issue is a typo in my filename search. An extra underscore. Added an extra print in each step to make sure each part is valid.
sender = 'mysender#domain'
MyDailyfolder = mydesktop + 'My Daily Data/'
try:
for message in messages:
msg_date = message.SentOn.strftime('%Y-%m-%d')
try:
s = message.sender
s = str(s)
if s == sender:
print('Sender:' , message.sender)
for att in message.Attachments:
if "dashboard_2.csv" in att.FileName:
outfile_name = msg_date + att.FileName
print ('Match search confirmed')
# Backup test, print filename and watch spelling
print (att.FileName)
# Create a folder and copy/paste attachment there
outfile_path = MyDailyfolder + outfile_name
if not os.path.exists(MyDailyfolder): os.makedirs(MyDailyfolder)
# save file
att.SaveASFile(outfile_path)
except:
pass
except:
pass
I'm sending an execution log through an Email in my code, but the decoding is failing the file content becomes:
mg×£h¿m5ë]÷çMC<䓆ÛiÿývïM5ç]ô÷û²È¨Ÿÿ{míׇ6áþû÷W5åíÞ{¦šãnºç¯î®[«–m§ÿé‹›²Ö {V³º·°y»"µë'zßìzË"¢r÷¶ÞÑÝxsnï¿us^
I'm running the next code:
def send_email(self):
email_filename = WebUIOrgResorces.logfilename
fp = codecs.open(email_filename, 'rb')
filecontent = fp.read()
encodedcontent = base64.b64encode(filecontent)