Python REST API scraping keyError - python-3.x

I have Scraped a REST API, and here is my code:
import json
from pprint import pprint
import sqlite3
import datetime
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import requests
url = "https://cex.io/api/ohlcv/hd/20180125/BTC/USD"
headers = {'User-Agent':'Mozilla/5.0'}
page = requests.get(url)
soup = soup(page.text, "html.parser")
a = soup("data1d")
I want the data of "data1d" from soup but when I try to do this it shows:
File "C:\Users\mubee\Downloads\Annaconda\lib\site-packages\bs4\element.py", line 1011, in __getitem__
return self.attrs[key]
KeyError: 'data1d'
while there is a data present in "data1d" in variable soup. How can I get the data present in "data1d" only, from the variable soup?

As the page is just json it is simple, no need for soup:
import requests
url = "https://cex.io/api/ohlcv/hd/20180125/BTC/USD"
page = requests.get(url)
page_dict=page.json()
print(page_dict['data1d'])

Related

How to get csrf-token from meta tag

import requests
import re
import random
from bs4 import BeautifulSoup
with requests.Session() as s:
login_session = s.get('https://prod.facilgo.com/users/sign_in')
data = login_session.text
soup = BeautifulSoup(data, "lxml")
print(soup)
csrf_token = soup.find("meta", name="csrf-token")
print(csrf_token["content"])
Is there any solution to get the csrf token from the meta tag using beautiful soup . I am getting error: TypeError: find() got multiple values for argument 'name'
Use dictionary to provide name attribute to beautiful soup:
import requests
import re
import random
from bs4 import BeautifulSoup
with requests.Session() as s:
login_session = s.get('https://prod.facilgo.com/users/sign_in')
data = login_session.text
soup = BeautifulSoup(data, "lxml")
print(soup)
csrf_token = soup.find("meta", {"name": "csrf-token"})
print(csrf_token["content"])

python webscraping with BeautifulSoup Problem

Im trying to scrape the box score table from https://www.nascar.com/stats/2021/1/box-score
my code is not working if someone could take a look and point me in right direction.
`import requests
from bs4 import BeautifulSoup
import pandas as pd
import requests
from bs4 import BeautifulSoup
import pandas as pd
url = ('https://www.nascar.com/stats/2021/1/box-score')
response = requests.get(url, headers = headers)
response.content
soup = BeautifulSoup(response.content, 'html.parser')
stats = soup.find_all('table', class_ = "stats-box-score-table-driver")
stats
for row in stats.find_all('tr'):
for cell in row.find_all('td'):
print(cell.text)

How to scrape from web all children of an attribute with one class?

I have tried to get the highlighted area (in the screenshot) in the website using BeautifulSoup4, but I cannot get what I want. Maybe you have a recommendation doing it with another way.
Screenshot of the website I need to get data from
from bs4 import BeautifulSoup
import requests
import pprint
import re
import pyperclip
import urllib
import csv
import html5lib
urls = ['https://e-mehkeme.gov.az/Public/Cases?page=1',
'https://e-mehkeme.gov.az/Public/Cases?page=2'
]
# scrape elements
for url in urls:
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
content = soup.findAll("input", class_="casedetail filled")
print(content)
My expected output is like this:
Ətraflı məlumat:
İşə baxan hakim və ya tərkib
Xəyalə Cəmilova - sədrlik edən hakim
İlham Kərimli - tərkib üzvü
İsmayıl Xəlilov - tərkib üzvü
Tərəflər
Cavabdeh: MAHMUDOV MAQSUD SOLTAN OĞLU
Cavabdeh: MAHMUDOV MAHMUD SOLTAN OĞLU
İddiaçı: QƏHRƏMANOVA AYNA NUĞAY QIZI
İşin mahiyyəti
Mənzil mübahisələri - Mənzildən çıxarılma
Using the base url first get all the caseid and then pass those caseid to target url and then get the value of the first td tag.
import requests
from bs4 import BeautifulSoup
urls = ['https://e-mehkeme.gov.az/Public/Cases?page=1',
'https://e-mehkeme.gov.az/Public/Cases?page=2'
]
target_url="https://e-mehkeme.gov.az/Public/CaseDetail?caseId={}"
for url in urls:
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
for caseid in soup.select('input.casedetail'):
#print(caseid['value'])
soup1=BeautifulSoup(requests.get(target_url.format(caseid['value'])).content,'html.parser')
print(soup1.select_one("td").text)
I would write it this way. Extracting the id that needs to be put in GET request for detailed info
import requests
from bs4 import BeautifulSoup as bs
urls = ['https://e-mehkeme.gov.az/Public/Cases?page=1','https://e-mehkeme.gov.az/Public/Cases?page=2']
def get_soup(url):
r = s.get(url)
soup = bs(r.content, 'lxml')
return soup
with requests.Session() as s:
for url in urls:
soup = get_soup(url)
detail_urls = [f'https://e-mehkeme.gov.az/Public/CaseDetail?caseId={i["value"]}' for i in soup.select('.caseId')]
for next_url in detail_urls:
soup = get_soup(next_url)
data = [string for string in soup.select_one('[colspan="4"]').stripped_strings]
print(data)

I am trying to scrape info from a website(Program name and program ID and location) using BeautifulSoup

I am trying to scrape info from a website(Program name and program ID) and it is returning empty list.
I am not sure if i am mixing up the syntax but this is what i have
soup.find_all('h3', class_='ama__h3')
the website link is https://freida.ama-assn.org/Freida/#/programs?program=residencies&specialtiesToSearch=140
from urllib.request import urlopen
from bs4 import BeautifulSoup as BS
import pandas as pd
from urllib.parse import urlparse, urlsplit
import requests
res = requests.get('https://freida.ama-assn.org/Freida/#/programs?program=residencies&specialtiesToSearch=140')
soup = bs4.BeautifulSoup(res.text, 'html5lib')
print(soup.prettify())
soup.find_all('h3', class_='ama__h3')
Your error is because you are parsing with html5lib. For any well formed html, the parser choice is not really important. However for a non well formed html (like this one), html5lib seems to have issues. You should use html.parser or lxml (apparently html.parser is safer)
However this code is doing what you want to do :
soup = BeautifulSoup(res.text, 'html.parser')
programs = soup.find_all("a", class_='ama__promo--background')
for program in programs:
program_name = program.find("h3").text
program_id = program.find_all("small")[-1].text.split(': ')[1].strip()
print(program_name, program_id

Beautifulsoup image downloading error

I am trying to download the images from the imageurls that come back from a Beautifulsoup scrape. I was trying to get this code to work after reading on some other examples tho the
Getting error:
f.write(requests.get(img))
TypeError: a bytes-like object is required, not 'Response
line: f.write(requests.get(img)[What goes here?]) is causing me trouble now. I use soup = BeautifulSoup(source, 'html.parser')
Where as the reference uses soup = BeautifulSoup(r.content)
import os
dir_path = os.path.dirname(os.path.realpath(__file__))
import urllib.request
from bs4 import BeautifulSoup
import codecs
import sys
import requests
from os.path import basename
lines = open('list.txt').read().splitlines()
for designers in lines:
for i in range(1):
url = 'https://www.example.com/Listings?st=' + author + '{}'.format(i)
source = urllib.request.urlopen(url)
soup = BeautifulSoup(source, 'html.parser')
for products in soup.find_all('li', class_='widget'):
image = products.find('img', class_='lazy-load')
print(image.get('data-src'))
img = (image.get('data-src'))
with open(basename(img), "wb") as f:
f.write(requests.get(img)**[What goes here?]**)
Thanks!

Resources