Python3 - Web Scraper w/ BeautifulSoup, Email Price Alert - python-3.x

I'm attempting to setup a script to email me when a phone meets my price and condition threshold on swappa.com. I've gotten to the point when I can email out the latest post that meets my criteria, but I've hit a bit of wall. I need it to only email me when a new post meets my criteria. Currently, it will email me each time the script is ran, whether or not the listing is new.
I'm very new to Python and spent the better part of yesterday and today hobbling together this script, but any insight or information that would help accomplish my goal would be much appreciated!
from bs4 import BeautifulSoup
import re
import requests
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
# define the url in a variable and get the content
page_url = 'https://swappa.com/buy/google-pixel-2-xl-unlocked?sort=date_min'
page_content = requests.get(page_url)
# Put the content into a variable using BeautifulSoup
swappa = BeautifulSoup(page_content.text, 'html.parser')
# Pull text from the h1 tag
phone_name = swappa.h1
# Pull the device details into a list
condition = swappa.find(class_='condition_label')
color = swappa.find(class_='color_label')
storage = swappa.find(class_='storage_label')
price = swappa.find(class_='price')
links = []
for link in swappa.findAll('a', attrs={'href': re.compile("^/listing")}):
links.append(link.get('href'))
listing_detail = [
phone_name.contents[0], condition.contents[0], color.contents[0], storage.contents[0],
'$' + price.contents[1], 'https://swappa.com' + links[0]
]
def listing_result():
if (price.contents[1] <= '420') and (condition.contents[0] == 'Good' or condition.contents[0] == 'Mint'):
return listing_detail
with open("result.txt", "w+") as result_file:
result_file.write(str(listing_result()))
with open('result.txt', 'r') as result_file_read:
result_data = result_file_read.read().replace('\n', '')
if result_data != result_file:
# setup sending an email via SMTP
server = smtplib.SMTP_SSL('smtp.gmail.com', 465)
server.login("redacted", "redacted")
# use mime to send email
msg = MIMEMultipart()
# setup parameters of the email
msg['From'] = 'redacted'
msg['To'] = 'redacted'
msg['Subject'] = 'Swappa - New Pixel 2 XL Listing'
# add in the message body
body = str(listing_result())
body = MIMEText(body) # convert the body to a MIME compatible string
msg.attach(body) # attach it to your main message
# send the email
server.send_message(msg)
del msg
My idea was to have the latest listing be sent to a text file, then have the contents of that text file saved to a variable so that I could compare it to the text being created. However, I'm at a loss for how to accomplish that.

Related

How to download an image from the internet using google colab jupyter

I need to download an image using a url. I managed to obtain the urls of the images I need to download, but now I'm lost on how to download it to my local computer. I'm using google colab/ jupyter. Thank you!
here's my code so far:
from bs4 import BeautifulSoup
import requests
import json
import urllib.request
#use Globe API to get data
#input userid - plan: have program read userids from csv or excel file
userid = xxxxxxxx
#use Globe API to get data
source = requests.get('https://api.globe.gov/search/v1/measurement/protocol/measureddate/userid/?protocols=land_covers&startdate=2020-05-04&enddate=2020-07-16&userid=' + str(userid) +'&geojson=FALSE&sample=FALSE').text
#set up BeautifulSoup4
soup = BeautifulSoup(source, 'lxml')
#Isolate the Json data and put it into a string called "paragraph"
body = soup.find('body')
paragraph = body.p.text
#load the string into a python object
data = json.loads(paragraph)
#pick out the needed information and store them
for landcover in data['results']:
siteId = landcover['siteId']
measuredDate = landcover['measuredDate']
latitude = landcover['latitude']
longitude = landcover['longitude']
protocol = landcover['protocol']
DownURL = landcover['data']['landcoversDownwardPhotoUrl']
#Here is where I want to download the url contained in 'DownURL'
Try
from google.colab import files as FILE
import os
img_data = requests.get(DownURL).content
with open('image_name.jpg', 'wb') as handler:
handler.write(img_data)
FILE.download('image_name.jpg')
os.remove('image_name.jpg') # to save up space
You can call a random function in case you do not wish to set an image name or a counter variable which keeps increments at each loop iteration.

Python - How can I send email with attachments in Python using gmail?

As you may well know, some mail addresses need to turn off security for less secure apps in gmail.
Turning off options works like a charm with smtplib and attachments, but without turning off it don't works at all.
Then I discovered an API way using Auth 2.0 in ezgmail module, and it can send emails very easy, but however the attachments are not attached well. they have some problem at encoded, because they don't display well the documents.
The code I ended up with is:
import ezgmail
import os, sys, glob
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email import encoders
pathFile = '<path>'
folder = os.getcwd()
def compose():
# email_from = '<my email>'
subject = 'Hello,'
body = '''Here I put the text I want.'''
return [subject, body]
def send_email(email_to):
subject, body = compose()
ezgmail.EMAIL_ADDRESS = '<my email>' # email_from
os.chdir(pathFile)
list_pdfs = glob.glob('*.pdf')
file1 = max(list_pdfs, key=os.path.getctime) # select latest file
# this should do the *encode part*
attachment = open(file1, 'rb')
file = MIMEBase('application','octet-stream')
file.set_payload((attachment).read())
encoders.encode_base64(file)
os.chdir(folder)
ezgmail.send(email_to, subject, body, file.as_string())
print("Mail sent")
send_email(to#example.com)
And the question it's : how to attach properly documents (pdf, word, excel) to ezgmail ?
The attachment(s) need to be a string (or list of strings) representing the path(s) to the document(s):
def send_email(email_to):
subject, body = compose()
ezgmail.EMAIL_ADDRESS = '<my email>' # email_from
os.chdir(pathFile)
list_pdfs = glob.glob('*.pdf')
file1 = max(list_pdfs, key=os.path.getctime) # select latest file
os.chdir(folder)
ezgmail.send(email_to, subject, body, os.sep.join([pathFile, file1]))
print("Mail sent")
The encoding is a separate issue.
The author says:
EZGmail isn't meant to be comprehensive and do everything the Gmail API lets
you do, it's meant to make the simple things simple
It does attempt to do the MIME conversion, but there is nothing specific for pdf files
the following code needs to be inserted into the EZgmail module in the _createMessageWithAttachments method:
elif sub_type == 'pdf':
mimePart = MIMEBase('application','octet-stream')
mimePart.set_payload((fp).read())
encoders.encode_base64(mimePart)
and you need to import encoders from email

Edit header in '.eml'

As a brief summary, I have a bunch of '.eml' files in a directory. I need to forward these emails back to 'email#example.com'.
The problem is that the field 'From' in the header of the '.eml' file contains another email address which doesn't match with 'email#example.com'.
I've searched for a way to parse the file and update the content of the header.
At first, I was using the following modules:
eml.parser to parse the file.
pyo365 to connect to MSGraph API
I was able to send the content of the body but when I would try to send the attachments, I had to decode from base64 and extract the attachments in a folder, then send everything. I did not need to change the content of the header.
I know this was a bad move because there is probably a way to send the attachments encoded.
Also, since MSGraph attachment's file size limit is 4mb per requests, I decided to try to change for:
smtplib to send the email
I tried mail-parser without success to update anything in the content since the updated values would not be permanent, for instance:
mail = mailparser.parse_from_bytes(byte_mail)
mail.from_ = [('My Name' , 'email#example.com')]
print(mail.headers) #This would print the original header
I also tried with mail.update() and various method using this module without success.
I found a post Python: Modify Values in eml file (email header) which suggested to use Parser, replace_header and as_string from email but I was unable to make it work either as I wouldn't be able to call replace_header and as_string:
from email.message import EmailMessage #contains as_string
from email.parser import HeaderParser
file = open(filename, 'r')
h = HeaderParser().parse(file)
#stuck here
I know this is probably not only one question but the main goal is to send eml files back to a particular address, from 'email#example.com'.
The issue was resolved by parsing the email with eml_parser. I created my own header, attached the HTML body content and the attachments.
from passlib.context import CryptContext
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication
from email.header import Header
def send(self, dst):
try:
self.m = MIMEMultipart()
self.m['From'] = self.client_addr
self.m['To'] = dst
# Must have Header() in python 3.x otherwise you get UnicodeError
self.m['Subject'] = Header(self.get_subject(), 'utf-8')
#Attach HTML body with the right encoding
self.m.attach(MIMEText(self.get_body().encode('utf-8'), 'html', 'utf-8'))
# Extract attachments to self.attachment_path
self.extract_attachments(self.parsed_eml)
server = smtplib.SMTP('smtp.office365.com', 587)
server.ehlo()
server.starttls()
# Compare hash in config.json file
if self.pwd_context.verify(self.client_plain_secret, self.client_secret):
server.login(self.client_addr, self.client_plain_secret)
server.sendmail(self.m['From'], self.m['To'], self.m.as_string())
server.quit()
except:
print("An error occured trying to send the email.")
finally:
self.clean_attachments()

Sending multi-part email using smtplib in Python3 but second part not displayed

I'm using the following demo code to send multi-part html email in Python3. But weirdly, the first part, namely the plain text, is not displayed in the received email but only the second part, the html content, is displayed. Can anyone help? Thank you!
#!/usr/bin/env python
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
# me == my email address
# you == recipient's email address
me = "my#email.com"
you = "your#email.com"
# Create message container - the correct MIME type is multipart/alternative.
msg = MIMEMultipart('alternative')
msg['Subject'] = "Link"
msg['From'] = me
msg['To'] = you
# Create the body of the message (a plain-text and an HTML version).
text = "Hi!\nHow are you?\nHere is the link you wanted:\nhttps://www.python.org"
html = """\
<html>
<head></head>
<body>
<p>Hi!<br>
How are you?<br>
Here is the link you wanted.
</p>
</body>
</html>
"""
# Record the MIME types of both parts - text/plain and text/html.
part1 = MIMEText(text, 'plain')
part2 = MIMEText(html, 'html')
# Attach parts into message container.
# According to RFC 2046, the last part of a multipart message, in this case
# the HTML message, is best and preferred.
msg.attach(part1)
msg.attach(part2)
# Send the message via local SMTP server.
s = smtplib.SMTP('localhost')
# sendmail function takes 3 arguments: sender's address, recipient's address
# and message to send - here it is sent as one string.
s.sendmail(me, you, msg.as_string())
s.quit()
You have declared Content-Type: as multipart/alternative.
Email reader will decide which of alternative version to display.
See Mail multipart/alternative vs multipart/mixed

How to get the body text of email with imaplib?

I am in python3.4 .
import imaplib
import email
user="XXXX"
password="YYYY"
con=imaplib.IMAP4_SSL('imap.gmail.com')
con.login(user,password)
con.list()
con.select("INBOX")
result,data=con.fetch(b'1', '(RFC822)')
raw=email.message_from_bytes(data[0][1])
>>> raw["From"]
'xxxx'
>>> raw["To"]
'python-list#python.org'
>>> raw["Subject"]
'Re:get the min date from a list'
When i run 'print(raw)' there are many lines of the body of the email ,
i can't get it with raw[TEXT] OR raw['TEXT'] OR raw['BODY'] ,
how can i get the body of the email text?
You're asking it for a header named TEXT or BODY, and obviously there is no such thing. I think you're mixing up IMAP4 part names (the things you pass in con.fetch) and RFC2822 header names (the things you use in an email.message.Message).
As the email.message documentation explains, a Message consists of headers and a payload. The payload is either a string (for non-multipart messages) or a list of sub-Messages (for multipart). Either way, what you want here is raw.get_payload().
If you want to handle both, you can either first check raw.is_multipart(), or you can check the type returned from get_payload(). Of course you have to do decide what you want to do in the case of a multipart message; what counts as "the body" when there are three parts? Do you want the first? The first text/plain? The first text/*? The first text/plain if there is one, the first text/* if not, and the first of anything if even that doesn't exist? Or all of them concatenated together?
Let's assume you just want the first one. To do that:
def get_text(msg):
if msg.is_multipart():
return get_text(msg.get_payload(0))
else:
return msg.get_payload(None, True)
If you want something different, hopefully you can figure out how to do it yourself. (See the get_content_type and/or get_content_maintype methods on Message.)
Following up using Python 3.8 - parses all the parts that have an associated encoding and turns it into a single HTML page
import imaplib
import email
import webbrowser
import tempfile
import webbrowser
def email_to_html(parsed):
all_parts = []
for part in parsed.walk():
if type(part.get_payload()) == list:
for subpart in part.get_payload():
all_parts += email_to_html(subpart)
else:
if encoding := part.get_content_charset():
all_parts.append(part.get_payload(decode=True).decode(encoding))
return ''.join(all_parts)
# Login
imap = imaplib.IMAP4_SSL("imap.gmail.com")
result = imap.login("username", "password")
# Select the inbox, grab only the unseen emails
status, resp = imap.select('INBOX')
status, response = imap.search(None, '(UNSEEN)')
unread_msg_nums = response[0].split()
email_bodies = []
for idx in unread_msg_nums:
_, msg = imap.fetch(str(int(idx)), "(RFC822)")
for response in msg:
if isinstance(response, tuple):
raw_email = response[1]
parsed = email.message_from_bytes(raw_email)
email_bodies.append(email_to_html(parsed))
# If you want to view/check the emails in your browser
def display(html):
with tempfile.NamedTemporaryFile('w', delete=False, suffix='.html') as f:
url = 'file://' + f.name
f.write(html)
webbrowser.open(url)
for body in email_bodies:
display(body)

Resources