I currently use this code to Extract attachments from EML files. And I wanted to know if I can link the attachment to mail (EML file). That is, add the eml file name as the attachment name prefix.
So I can know the attachment belongs to what mail.
Thank You
import os, re
import email
import argparse
import olefile
def extractAttachment(msg, eml_files, output_path):
#print len(msg.get_payload())
#print msg.get_payload()
if len(msg.get_payload()) > 2:
if isinstance(msg.get_payload(), str):
try:
extractOLEFormat(eml_files, output_path)
except IOError:
#print 'Could not process %s. Try manual extraction.' % (eml_files)
#print '\tHeader of file: %s\n' % (msg.get_payload()[:8])
pass
elif isinstance(msg.get_payload(), list):
count = 0
while count < len(msg.get_payload()):
payload = msg.get_payload()[count]
#récupérer les pièces jointes
filename = payload.get_filename()
#os.rename(filename,'rrrrr'+filename)
#filename=os.path.join(str(filename), str(eml_files))
if filename is not None:
try:
magic = payload.get_payload(decode=True)[:4]
except TypeError:
magic = "None"
# Print the magic deader and the filename for reference.
printIT(eml_files, magic, filename)
# Write the payload out.
writeFile(filename, payload, output_path)
count += 1
elif len(msg.get_payload()) == 2:
payload = msg.get_payload()[1]
filename = payload.get_filename()
try:
magic = payload.get_payload(decode=True)[:4]
except TypeError:
magic = "None"
# Print the magic deader and the filename for reference.
printIT(eml_files, magic, filename)
# Write the payload out.
writeFile(filename, payload, output_path)
elif len(msg.get_payload()) == 1:
attachment = msg.get_payload()[0]
payload = attachment.get_payload()[1]
filename = attachment.get_payload()[1].get_filename()
try:
magic = payload.get_payload(decode=True)[:4]
except TypeError:
magic = "None"
# Print the magic deader and the filename for reference.
printIT(eml_files, magic, filename)
# Write the payload out.
writeFile(filename, payload, output_path)
#else:
# print 'Could not process %s\t%s' % (eml_files, len(msg.get_payload()))
def extractOLEFormat(eml_files, output_path):
data = '__substg1.0_37010102'
filename = olefile.OleFileIO(eml_files)
msg = olefile.OleFileIO(eml_files)
attachmentDirs = []
for directories in msg.listdir():
if directories[0].startswith('__attach') and directories[0] not in attachmentDirs:
attachmentDirs.append(directories[0])
for dir in attachmentDirs:
filename = [dir, data]
if isinstance(filename, list):
filenames = "/".join(filename)
filename = msg.openstream(dir + '/' + '__substg1.0_3707001F').read().replace('\000', '')
payload = msg.openstream(filenames).read()
magic = payload[:4]
# Print the magic deader and the filename for reference.
printIT(eml_files, magic, filename)
# Write the payload out.
writeOLE(filename, payload, output_path)
#filename = str(eml_files)+"--"+str(filename)
def printIT(eml_files, magic, filename):
filename = str(eml_files)+"--"+str(filename)
print ('Email Name: %s\n\tMagic: %s\n\tSaved File as: %s\n' % (eml_files, magic, filename))
def writeFile(filename, payload, output_path):
filename = str(eml_files)+"--"+str(filename)
try:
file_location = output_path + filename
open(os.path.join(file_location), 'wb').write(payload.get_payload(decode=True))
except (TypeError, IOError):
pass
def writeOLE(filename, payload, output_path):
open(os.path.join(output_path + filename), 'wb')
def main():
parser = argparse.ArgumentParser(description='Attempt to parse the attachment from EML messages.')
parser.add_argument('-p', '--path',default='C:\\Users\\hamd\\Desktop\\TEX\\emails' ,help='eml')#Path to EML files
parser.add_argument('-o', '--out', default='C:\\Users\\hamd\\Desktop\\TEX\\PJ\\eml_files\\',help='pj')#Path to write attachments to.
args = parser.parse_args()
if args.path:
input_path = args.path
else:
print ("You need to specify a path to your EML files.")
exit(0)
if args.out:
output_path = args.out
else:
print ("You need to specify a path to write your attachments to.")
exit(0)
for root, subdirs, files in os.walk(input_path):
for file_names in files:
eml_files = os.path.join(root, file_names)
msg = email.message_from_file(open(eml_files))
extractAttachment(msg, eml_files, output_path)
if __name__ == "__main__":
main()
I tried to write this as a comment but is was too long. I won't give a full blown solution, but I'll explain the idea.
A possible solution would be to create an hard link to the extracted attachment, giving to the hard link the same name of EML file. You can append an incremental suffix if you have more attachments in the same EML file:
whatever.eml (original email file)
whatever_001.attch (hard link to first extracted attachment)
whatever_002.attch (hard link to second extracted attachment)
...
This way:
you are free to move the extracted attachment anywhere else (but in the same disk, because hard links by definition work only on the same disk)
you can keep a copy of the attachment (the hard link) together with the EML file without consuming disk space
in case the extracted file is deleted you have a backup copy of the attachment (the hard links) without consuming disk space
In Python you can create an hard link simply with:
import os
os.link(existing_target_file, new_link_name)
Related
from github import Github
from os import walk
#connection
token = input('please enter your token access ')
user = Github(token)
print(user)
#repo to upload files
repo = user.get_user().get_repo('test4 ')
all_files = []
contents = repo.get_contents("")
#repo content
while contents:
file_content = contents.pop(0)
if file_content.type == "dir":
contents.extend(repo.get_contents(file_content.path))
else:
file = file_content
all_files.append(str(file).replace('ContentFile(path="','').replace('")',''))
#return files inside a folder /ansible
files_names=[]
path_files= []
for (dirpath, dirnames, filenames) in walk('/home/bilel/ansible'):
files_names = filenames
for names in filenames:
path = '/home/bilel/ansible/' + names
path_files.append(path)
for i in range(0 , len(files_names)):
with open(path_files[i], 'r') as file:
content = file.read()
if files_names[i] not in all_files:
repo.create_file(files_names[i] , "Pré_commit", content , branch="main")
print(files_names[i] + ' CREATED')
# contents = repo.get_contents(files_names[i])
# repo.update_file(contents.path, "Pré_commit", content, contents.sha, branch="main")
# print(files_names[i] + ' UPDATED')
else:
print('ur file existe')
the result is expected but I don't know why it only loads some of the files not all of them.
and also when I try again nothing works cause when I import the repo using
repo = user.get_user().get_repo('reponame') there is a bad credentials error show up
I am trying to write binary data to a file. the program will check first if the file exists. If the file does not exist, the program will create the file and write data into it. While if it does exist the data will be appended to the file. Yet, when ever i try to read the file I cannot read the appanded data only the data written when the file was first created.
def getText(self):
self.readKey()
st = self.inBox.get('1.0', 'end')
fen = Fernet(self.readKey())
encrypted = fen.encrypt(st.encode())
return encrypted
def writeFile(self):
if (os.path.exists('data.txt') == False):
file = open('data.txt',mode='wb' )
file.write(self.getText())
file.close()
else:
file = open('data.txt',mode='ab' )
#sts = file.read()
file.write(self.getText())
file.close()
self.inBox.delete('1.0','end')
def openFile(self):
self.outBox.delete('1.0','end')
fen = Fernet(self.readKey())
try:
f = open("data.txt", mode='rb')
except:
alert_popup(self,'Error','No File Exists')
self.outBox.insert(tk.END, fen.decrypt(f.read()))
Use mode 'a'.
if (os.path.exists('data.txt') == False):
file = open('data.txt',mode='a' )
file.write(self.getText())
file.close()
Iam getting this error while trying to download a attachment file from gmail account.
fp = open(filename, 'wb')
TypeError: expected str, bytes or os.PathLike object, not NoneType
Earlier i could make fluent downloads from this script but now having this error. Please help me out what's wrong with the script?
import imaplib, email
import io
#log in and select the inbox
mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login('youmailid#gmail.com', 'yourpassword')
mail.select('testing')
#get uids of all messages
result, data = mail.uid('search', None, 'ALL')
uids = data[0].split()
print (uids)
#read the lastest message
result, data = mail.uid('fetch', uids[-1], '(RFC822)')
m = email.message_from_string(data[0][1].decode())
if m.get_content_maintype() == 'multipart':
for part in m.walk():
if part.get_content_maintype() == 'multipart': continue
if part.get('Content-Disposition') is None: continue
filename = part.get_filename()
fp = open(filename, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
print (f'{filename} saved!')
Looks like filename is None
Add the condition
if filename is None: continue
after the line
filename = part.get_filename()
I am trying to segregate the PDF files from the password protected and normal ones and finally I want to print all details to a log file along with total number of files scanned from the source directory.
I feel code is perfectly till before the last print() statement but the last print() function is not called by the program.
import PyPDF2
import os, sys, datetime
#import shutil
src = 'C:/Users/Balaji.B.R/Downloads/'
dst = 'C:/Users/Balaji.B.R/Downloads/success/'
op = 'C:/Users/Balaji.B.R/Desktop/op.txt'
sys.stdout = open(op,'w')
print("Date & Time :", datetime.datetime.now(), "\n")
files = os.listdir(src)
i=0
for f in files:
if (f.endswith('.pdf')):
#print("The File Name is",f)
pdffileobj = open(src+f, 'rb')
pdfReader = PyPDF2.PdfFileReader(pdffileobj)
if pdfReader.isEncrypted:
print("The File Name is",f)
print("PDF is either protected or corrupted, kindly rescan \n")
i+=1
pdffileobj.close()
else:
i+=1
#shutil.move(src+f,dst+f)
pdffileobj.close()
else:
pass
print("Total PDF files scanned are:", i)
sys.stdout.close()
I'm trying to make function which asks the user for a filename. If the file is not found, it will keep asking. This what I have please help..
def return_text_file(infile):
while infile:
try:
file = open(infile)
except IOError:
print("Could not find the file specified")
infile = input ("Enter the file name")
return open_infile
file_input = input ("Enter the file name")
return_text_file(file_input)
You can create a function (e.g. ask_file_name below) to get a valid answer from the user. It will repeat constantly until an existing name is given.
import os
path_str = '/home/userblabla/ProjectBlabla/'
def ask_file_name():
files_detected = os.listdir(path_str)
while True:
print('\nFiles:')
for file in files_detected:
print(file)
file_name_given = input('\nFile name?')
if file_name_given not in files_detected:
print("Could not find the file specified")
else:
print('Thanks friend.')
return file_name_given
my_file_name = ask_file_name()
with open(my_file_name, 'r') as opened_file:
# Do stuff on opened_file
......
with open() automatically closes the file, and it might be better if you use it instead of open().