'str' object has no attribute 'text' in BeautifulSoup4 - python-3.x

I'm trying to login to a website using python Request for an automation purpose. And I'm getting an error as 'str' object has no attribute 'text'
My code is
from BeautifulSoup import BeautifulSoup
logging.basicConfig(filename='app.log',level=logging.INFO)
headers = {
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
def post_request_curl(data):
try:
with requests.Session() as s:
login_data=data['login_details']
r = s.get(url=data['url'], headers=headers)
soup = BeautifulSoup(r.content, 'html.parser')
login_data['form_build_id'] = soup.find_all('input', attrs={'name': 'form_build_id'})['value']
r = s.post(url=data['url'], data=login_data, headers=headers)
return r
except Exception as e:
logging.error('Error occurred ' + str(e))

Try:
from bs4 import BeautifulSoup
import requests
logging.basicConfig(filename='app.log',level=logging.INFO)
headers = {
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
def post_request_curl(data):
try:
with requests.Session() as s:
login_data=data['login_details']
r = s.get(url=data['url'], headers=headers)
soup = BeautifulSoup(r.content, 'html.parser')
login_data['form_build_id'] = soup.find_all('input', attrs={'name': 'form_build_id'})['value']
r = s.post(url=data['url'], data=login_data, headers=headers)
return r
except Exception as e:
logging.error('Error occurred ' + str(e))

Related

Get response 403 when i'm trying to crawling, user agent doesn't work in Python 3

I'm trying to crawling this website and get the message:
"You don't have permission to access"
there is a way to bypass this ? already used user agents and urlopen
Here is my code:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
from urllib.request import Request, urlopen
url = 'https://www.oref.org.il/12481-he/Pakar.aspx'
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36'}
res = requests.get(url, headers=header)
soup = BeautifulSoup(res.content, 'html.parser')
print(res)
output:
<Response [403]>
also tried to do that:
req = Request(url, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'})
webpage = urlopen(req).read()
output:
HTTP Error 403: Forbidden
still blocked and get response 403, Anyone who can help?

Scraping a Video from a website and downloading it as mp4

I am trying to scrape this webiste and download the soccer goal that is in it. I saw this Stackoverflow post and I tried the solution and it still does not work.
Here is the code I have done
import requests
from bs4 import BeautifulSoup
# specify the URL of the archive here
url = 'https://cdn-cf-east.streamable.com/video/mp4/g6f986.mp4p'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'}
with open('video.mp4', 'wb') as f_out:
r = requests.get(url, headers=headers, stream=True)
print(r)
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f_out.write(chunk)
when I printed the request it showed me 403 and because of that the video will not open or get downloaded, any thoughts on how I can download the video?
Use this below url.
import requests
from bs4 import BeautifulSoup
# specify the URL of the archive here
url = 'https://cdn-cf-east.streamable.com/video/mp4/g6f986.mp4?Expires=1621994280&Signature=IqySuJxyVi9pCmC~JUhl-iyp-LmG6OiAFfQeu-~-a55osCfu9VrksEhzaQzJlMxAHcSt1R4j9Pt-G8sblQeFt3UtGqY-neHJkC4mUxuHjxGWAWdksyiAxkMb8DYRLkvIseUfkbKbeO6Dt807QwMkspFmXYdzljm8DLho6nMQfC--jtfy8B2gONhA9YUmK2o~fUHwTHzTXXqNGct2hQl-B9cFLDBdj8LXWTj-75YInwWxLwtoenKK~qLahGtJXKXvxTVltxMvUYXXvP9F~WfhNIhNqns1JKrrrqJ~N1XunZHCv~IVJyzOEvrn2G4J5LMIn~dcEZ9frV3APHsE4D~HQA__&Key-Pair-Id=APKAIEYUVEN4EVB2OKEQ'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'}
with open('video.mp4', 'wb') as f_out:
r = requests.get(url, headers=headers, stream=True)
print(r)
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f_out.write(chunk)
This is how you extract the video link from Streamable if you want to in a programmatic way
import requests
from bs4 import BeautifulSoup
# specify the URL of the archive here
url = "https://streamable.com/a50s3e"
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
# Pull all text from the BodyText div
video = soup.find_all('video')
temp = video[0].get("src")
url = "https:"+temp
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'}
with open('video.mp4', 'wb') as f_out:
r = requests.get(url, headers=headers, stream=True)
print(r)
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f_out.write(chunk)

Not able to download images from google`

Below is the code to download images from google. When I run the code below, only 80 images are downloaded.
import os
import requests
from bs4 import BeautifulSoup
GOOGLE_IMAGE = 'https://www.google.com/search?site=&tbm=isch&source=hp&biw=1873&bih=990&'
# PATH = r'C:\Program Files\edge\msedgedriver.exe'
usr_agent = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/89.0.4389.90 Safari/537.36 Edg/89.0.774.63',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'none',
'Accept-Language': 'en-US,en;q=0.8',
'Connection': 'keep-alive',
}
SAVE_FOLDER = 'C:\\Users\\sansh\\Desktop\\Py\scrape\\new'
def main():
if not os.path.exists(SAVE_FOLDER):
os.mkdir(SAVE_FOLDER)
download_images()
def download_images():
data = input('What are u searching for?')
n_images = int(input('How many images do you want?'))
print('Start searching ...')
search_url = GOOGLE_IMAGE + 'q=' + data.replace('_', '+')
print(search_url)
response = requests.get(search_url, headers=usr_agent)
soup = BeautifulSoup(response.text, 'html.parser')
results = soup.find_all('img', {'class': 'rg_i Q4LuWd'}, limit=n_images)
image_links = []
for res in results:
try:
link = res['data-src']
image_links.append(link)
except KeyError:
continue
print(f'Found {len(image_links)} images')
print("Starting downloader ...")
for i, image_link in enumerate(image_links):
down = requests.get(image_link)
image_name = SAVE_FOLDER + '/' + data + str(i + 1) + '.jpg'
with open(image_name, 'wb') as file:
file.write(down.content)
print("Download completed ...")
if __name__ == '__main__':
main()
I am not sure what the error is. Also, no error is shown. If possible, can anyone help me solve this problem?
Here, is the screenshot of the result after running this code.
[screenshot][1]

BeautifulSoup find() is returning none

import requests
from bs4 import BeautifulSoup
URL = 'https://www.amazon.de/BenQ-GL2580H-Monitor-Eye-Care-Reaktionszeit/dp/B073NTJHYY/ref=sr_1_3?__mk_de_DE=%C3%85M%C3%85%C5%BD%C3%95%C3%91&dchild=1&keywords=bildschirm&qid=1597391122&sr=8-3'
headers = {
"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36'
}
page = requests.get(URL, headers=headers)
soup = BeautifulSoup(page.content, 'html.parser')
title = soup.find(id="productTitle")
price = soup.find(id="priceblock_ourprice")
print("Titel:",title,"\n","Preis:",price)
Output is always:
Titel: None
Preis: None
I already checked the steps before, but everything is working fine until it reaches the find function.
I never asked a question before, so forgive me if i made mistakes.
Thanks for the help.
you have to use different parser
try to make following changes:
soup = BeautifulSoup(page.content, 'html.parser')
to
soup = BeautifulSoup(page.content, 'lxml')

how to click the button with beautifulsoup

I want to click the button before scrape data
My code:
import requests
from bs4 import BeautifulSoup
headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 4.3; nl-nl; SAMSUNG GT-I9505 Build/JSS15J) AppleWebKit/537.36 (KHTML, like Gecko) Version/1.5 Chrome/28.0.1500.94 Mobile Safari/537.36'}
url = "https://berdu.id/cek-resi?courier=sicepat&code=000361453759"
r = requests.get(url, headers=headers)
soup = BeautifulSoup(r.text, 'lxml')
print(soup)
how to do it right

Resources