Beautiful Soup scrape Multiple URLs with Python 3 - python-3.x
Below code works fine, but I need to scrape multiple URLs and I don't know really how...
Would be nice also if possible to scrape the urls from a CSV file...
Basically I'm trying to get a redirect link from a search link
from bs4 import BeautifulSoup
import requests
url = "https://www.tennis-point.fr/index.php?stoken=737F2976&lang=1&cl=search&searchparam=E705Y-0193"
# Getting the webpage, creating a Response object.
response = requests.get(url)
# Extracting the source code of the page.
data = response.text
# Passing the source code to BeautifulSoup to create a BeautifulSoup object for it.
soup = BeautifulSoup(data, 'lxml')
# Extracting all the <a> tags into a list.
tags = soup.find("div", {"class": "productsPicture"}).findAll("a")
# Extracting URLs from the attribute href in the <a> tags.
for tag in tags:
print(tag.get('href'))
This code will fetch all urls (href):
Code:
from bs4 import BeautifulSoup
import requests
url = "https://www.tennis-point.fr/index.php?stoken=737F2976&lang=1&cl=search&searchparam=E705Y-0193"
# Getting the webpage, creating a Response object.
response = requests.get(url)
# Extracting the source code of the page.
data = response.text
# Passing the source code to BeautifulSoup to create a BeautifulSoup object for it.
soup = BeautifulSoup(data, 'html.parser')
# print soup
urls =[ item.get("href") for item in soup.find_all("a")]
print(urls)
output:
[u'https://www.tennis-point.fr/frais-d-expedition-et-de-livraison/', u'https://www.tennis-point.fr/garantie-satisfait-ou-rembourse/', u'https://www.tennis-point.fr/protection-des-donnees/', u'tel:+33(0)368331651', u'https://www.tennis-point.fr/', u'https://www.tennis-point.fr/index.php?cl=account&', u'#loginBox', u'https://www.tennis-point.fr/index.php?cl=register&', u'https://www.tennis-point.fr/index.php?cl=account&', u'https://www.tennis-point.fr/index.php?cl=account_order&', u'https://www.tennis-point.fr/index.php?cl=account_user&', u'https://www.tennis-point.fr/index.php?cl=newsletter&', u'https://www.tennis-point.fr/aide-services/', u'#', u'https://www.tennis-point.fr/index.php?cl=account_wishlist&', u'https://www.tennis-point.fr/index.php?cl=basket&', u'#', u'https://www.tennis-point.fr/index.php?cl=basket&', u'#', u'https://www.tennis-point.fr/raquettes-de-tennis/', u'/raquettes-de-tennis/enfants/', u'/raquettes-de-tennis/unisex/', u'https://www.tennis-point.fr/raquettes-de-tennis/', u'https://www.tennis-point.fr/raquettes-de-tennis-raquettes-de-competition/', u'https://www.tennis-point.fr/raquettes-de-tennis-raquettes-polyvalentes/', u'https://www.tennis-point.fr/raquettes-de-tennis-raquettes-confort/', u'https://www.tennis-point.fr/raquettes-de-tennis-raquettes-enfants/', u'https://www.tennis-point.fr/raquettes-de-tennis-raquettes-d-occasion/', u'https://www.tennis-point.fr/raquettes-de-tennis-lots-de-raquettes/', u'https://www.tennis-point.fr/raquettes-de-tennis-accessoires-raquettes/', u'/raquettes-de-tennis/babolat/', u'/raquettes-de-tennis/dunlop/', u'/raquettes-de-tennis/head/', u'/raquettes-de-tennis/kirschbaum/', u'/raquettes-de-tennis/pacific/', u'/raquettes-de-tennis/prince/', u'/raquettes-de-tennis/prokennex/', u'/raquettes-de-tennis/tecnifibre/', u'/raquettes-de-tennis/tennis-point/', u'/raquettes-de-tennis/topspin/', u'/raquettes-de-tennis/tourna/', u'/raquettes-de-tennis/voelkl/', u'/raquettes-de-tennis/wilson/', u'/raquettes-de-tennis/yonex/', u'/marques/raquettes-de-tennis/', u'https://www.tennis-point.fr/vetements-de-tennis/', u'/vetements-de-tennis/enfants/', u'/vetements-de-tennis/femmes/', u'/vetements-de-tennis/filles/', u'/vetements-de-tennis/garcons/', u'/vetements-de-tennis/hommes/', u'/vetements-de-tennis/unisex/', u'https://www.tennis-point.fr/vetements-de-tennis/', u'https://www.tennis-point.fr/vetements-de-tennis-robes/', u'https://www.tennis-point.fr/vetements-de-tennis-shirts-tops/', u'https://www.tennis-point.fr/vetements-de-tennis-jupes/', u'https://www.tennis-point.fr/vetements-de-tennis-vestes/', u'https://www.tennis-point.fr/vetements-de-tennis-sweats-hoodies/', u'https://www.tennis-point.fr/vetements-de-tennis-shorts/', u'https://www.tennis-point.fr/vetements-de-tennis-pantalons/', u'https://www.tennis-point.fr/vetements-de-tennis-survetements/', u'https://www.tennis-point.fr/vetements-de-tennis-compression/', u'https://www.tennis-point.fr/vetements-de-tennis-chaussettes/', u'https://www.tennis-point.fr/vetements-de-tennis-sous-vetements/', u'https://www.tennis-point.fr/vetements-de-tennis-accessoires/', u'/vetements-de-tennis/adidas/', u'/vetements-de-tennis/asics/', u'/vetements-de-tennis/babolat/', u'/vetements-de-tennis/bidi-badu/', u'/vetements-de-tennis/bidi-badu-by-kilian-kerner/', u'/vetements-de-tennis/bjoern-borg/', u'/vetements-de-tennis/dunlop/', u'/vetements-de-tennis/erima/', u'/vetements-de-tennis/fila/', u'/vetements-de-tennis/head/', u'/vetements-de-tennis/hydrogen/', u'/vetements-de-tennis/lacoste/', u'/vetements-de-tennis/limited-sports/', u'/vetements-de-tennis/lotto/', u'/vetements-de-tennis/nike/', u'/vetements-de-tennis/puma/', u'/vetements-de-tennis/reebok/', u'/vetements-de-tennis/sergio-tacchini/', u'/vetements-de-tennis/tennis-point/', u'/vetements-de-tennis/tonic/', u'/vetements-de-tennis/under-armour/', u'/vetements-de-tennis/wilson/', u'/vetements-de-tennis/yonex/', u'/marques/vetements-de-tennis/', u'https://www.tennis-point.fr/chaussures-de-tennis/', u'/chaussures-de-tennis/enfants/', u'/chaussures-de-tennis/femmes/', u'/chaussures-de-tennis/hommes/', u'/chaussures-de-tennis/unisex/', u'https://www.tennis-point.fr/chaussures-de-tennis/', u'https://www.tennis-point.fr/chaussures-de-tennis-toutes-surfaces/', u'https://www.tennis-point.fr/chaussures-de-tennis-terre-battue/', u'https://www.tennis-point.fr/chaussures-de-tennis-moquette/', u'https://www.tennis-point.fr/chaussures-de-tennis-loisir/', u'https://www.tennis-point.fr/chaussures-de-tennis-accessoires-chaussures/', u'/chaussures-de-tennis/adidas/', u'/chaussures-de-tennis/asics/', u'/chaussures-de-tennis/babolat/', u'/chaussures-de-tennis/erdal/', u'/chaussures-de-tennis/head/', u'/chaussures-de-tennis/ivybands/', u'/chaussures-de-tennis/k-swiss/', u'/chaussures-de-tennis/lotto/', u'/chaussures-de-tennis/mizuno/', u'/chaussures-de-tennis/new-balance/', u'/chaussures-de-tennis/nike/', u'/chaussures-de-tennis/prince/', u'/chaussures-de-tennis/pro-touch/', u'/chaussures-de-tennis/salomon/', u'/chaussures-de-tennis/under-armour/', u'/chaussures-de-tennis/wilson/', u'/chaussures-de-tennis/yonex/', u'/marques/chaussures-de-tennis/', u'https://www.tennis-point.fr/sacs-de-tennis/', u'/sacs-de-tennis/enfants/', u'/sacs-de-tennis/femmes/', u'/sacs-de-tennis/hommes/', u'/sacs-de-tennis/unisex/', u'https://www.tennis-point.fr/sacs-de-tennis/', u'https://www.tennis-point.fr/sacs-de-tennis-sacs-a-raquettes/', u'https://www.tennis-point.fr/sacs-de-tennis-sacs-a-dos/', u'https://www.tennis-point.fr/sacs-de-tennis-sacs-de-sport/', u'https://www.tennis-point.fr/sacs-de-tennis-autres-sacs/', u'/sacs-de-tennis/adidas/', u'/sacs-de-tennis/asics/', u'/sacs-de-tennis/babolat/', u'/sacs-de-tennis/bidi-badu/', u'/sacs-de-tennis/dunlop/', u'/sacs-de-tennis/head/', u'/sacs-de-tennis/lacoste/', u'/sacs-de-tennis/nike/', u'/sacs-de-tennis/prince/', u'/sacs-de-tennis/tecnifibre/', u'/sacs-de-tennis/tennis-point/', u'/sacs-de-tennis/topspin/', u'/sacs-de-tennis/under-armour/', u'/sacs-de-tennis/wilson/', u'/sacs-de-tennis/yonex/', u'/marques/sacs-de-tennis/', u'https://www.tennis-point.fr/balles-de-tennis/', u'https://www.tennis-point.fr/balles-de-tennis/', u'https://www.tennis-point.fr/balles-de-tennis-balles-de-competition/', u'https://www.tennis-point.fr/balles-de-tennis-balles-d-entrainement/', u'https://www.tennis-point.fr/balles-de-tennis-balles-geantes/', u'https://www.tennis-point.fr/balles-de-tennis-balles-intermediaires/', u'https://www.tennis-point.fr/balles-de-tennis-balles-lots-de-balles/', u'https://www.tennis-point.fr/balles-de-tennis-balles-officielles-itf/', u'https://www.tennis-point.fr/balles-de-tennis-balles-sans-pression/', u'https://www.tennis-point.fr/balles-de-tennis-lots-de-balles/', u'/balles-de-tennis/babolat/', u'/balles-de-tennis/balls-unlimited/', u'/balles-de-tennis/dunlop/', u'/balles-de-tennis/head/', u'/balles-de-tennis/tennis-point/', u'/balles-de-tennis/tretorn/', u'/balles-de-tennis/wilson/', u'/marques/balles-de-tennis/', u'https://www.tennis-point.fr/cordages-de-tennis/', u'https://www.tennis-point.fr/cordages-de-tennis/', u'https://www.tennis-point.fr/cordages-de-tennis-bobines-cordage/', u'https://www.tennis-point.fr/cordages-de-tennis-cordages-en-set/', u'/cordages-de-tennis/babolat/', u'/cordages-de-tennis/dunlop/', u'/cordages-de-tennis/gamma/', u'/cordages-de-tennis/head/', u'/cordages-de-tennis/isospeed/', u'/cordages-de-tennis/kirschbaum/', u'/cordages-de-tennis/luxilon/', u'/cordages-de-tennis/msv/', u'/cordages-de-tennis/pacific/', u'/cordages-de-tennis/polyfibre/', u'/cordages-de-tennis/prince/', u'/cordages-de-tennis/signum-pro/', u'/cordages-de-tennis/solinco/', u'/cordages-de-tennis/tecnifibre/', u'/cordages-de-tennis/tennis-point/', u'/cordages-de-tennis/topspin/', u'/cordages-de-tennis/tourna/', u'/cordages-de-tennis/weiss-cannon/', u'/cordages-de-tennis/wilson/', u'/cordages-de-tennis/yonex/', u'https://www.tennis-point.fr/autres/', u'/autres/femmes/', u'/autres/unisex/', u'https://www.tennis-point.fr/autres/', u'https://www.tennis-point.fr/autres-grips-de-tennis/', u'https://www.tennis-point.fr/autres-padel/', u'https://www.tennis-point.fr/autres-squash/', u'https://www.tennis-point.fr/autres-badminton/', u'https://www.tennis-point.fr/autres-accessoires-pour-entraineurs/', u'https://www.tennis-point.fr/autres-equipement-court-de-tennis/', u'https://www.tennis-point.fr/autres-accessoires/', u'https://www.tennis-point.fr/autres-bons-d-achat/', u'/autres/adidas/', u'/autres/babolat/', u'/autres/bidi-badu/', u'/autres/dunlop/', u'/autres/gamma/', u'/autres/head/', u'/autres/nike/', u'/autres/pacific/', u'/autres/prince/', u'/autres/rehband/', u'/autres/schildkroet-fitness/', u'/autres/sergio-tacchini/', u'/autres/tecnifibre/', u'/autres/tegra/', u'/autres/tomtom/', u'/autres/toolz/', u'/autres/topspin/', u'/autres/tourna/', u'/autres/tretorn/', u'/autres/universal-sport/', u'/autres/wilson/', u'/marques/autres/', u'https://www.tennis-point.fr/marques/', u'https://www.tennis-point.fr/nike/', u'https://www.tennis-point.fr/adidas/', u'https://www.tennis-point.fr/wilson/', u'https://www.tennis-point.fr/head/', u'https://www.tennis-point.fr/babolat/', u'https://www.tennis-point.fr/asics/', u'https://www.tennis-point.fr/bidi-badu/', u'https://www.tennis-point.fr/k-swiss/', u'https://www.tennis-point.fr/babolat/', u'https://www.tennis-point.fr/head/', u'https://www.tennis-point.fr/toolz/', u'https://www.tennis-point.fr/wilson/', u'https://www.tennis-point.fr/2xu/', u'https://www.tennis-point.fr/70love/', u'https://www.tennis-point.fr/adidas/', u'https://www.tennis-point.fr/asics/', u'https://www.tennis-point.fr/atp/', u'https://www.tennis-point.fr/balls-unlimited/', u'https://www.tennis-point.fr/bidi-badu/', u'https://www.tennis-point.fr/bidi-badu-by-kilian-kerner/', u'https://www.tennis-point.fr/bjoern-borg/', u'https://www.tennis-point.fr/boot-doc/', u'https://www.tennis-point.fr/cep/', u'https://www.tennis-point.fr/currex/', u'https://www.tennis-point.fr/diadora/', u'https://www.tennis-point.fr/dunlop/', u'https://www.tennis-point.fr/enebe/', u'https://www.tennis-point.fr/energetics/', u'https://www.tennis-point.fr/erdal/', u'https://www.tennis-point.fr/erima/', u'https://www.tennis-point.fr/falke/', u'https://www.tennis-point.fr/fila/', u'https://www.tennis-point.fr/fitbit/', u'https://www.tennis-point.fr/gamma/', u'https://www.tennis-point.fr/garmin/', u'https://www.tennis-point.fr/hydrogen/', u'https://www.tennis-point.fr/isospeed/', u'https://www.tennis-point.fr/ivybands/', u'https://www.tennis-point.fr/k-swiss/', u'https://www.tennis-point.fr/kirschbaum/', u'https://www.tennis-point.fr/lacoste/', u'https://www.tennis-point.fr/limited-sports/', u'https://www.tennis-point.fr/lobster/', u'https://www.tennis-point.fr/lotto/', u'https://www.tennis-point.fr/luxilon/', u'https://www.tennis-point.fr/mikros/', u'https://www.tennis-point.fr/mizuno/', u'https://www.tennis-point.fr/msv/', u'https://www.tennis-point.fr/nasara/', u'https://www.tennis-point.fr/new-balance/', u'https://www.tennis-point.fr/nike/', u'https://www.tennis-point.fr/pacific/', u'https://www.tennis-point.fr/polyfibre/', u'https://www.tennis-point.fr/prince/', u'https://www.tennis-point.fr/pro-touch/', u'https://www.tennis-point.fr/prokennex/', u'https://www.tennis-point.fr/puma/', u'https://www.tennis-point.fr/reebok/', u'https://www.tennis-point.fr/rehband/', u'https://www.tennis-point.fr/salomon/', u'https://www.tennis-point.fr/schildkroet-fitness/', u'https://www.tennis-point.fr/sergio-tacchini/', u'https://www.tennis-point.fr/signum-pro/', u'https://www.tennis-point.fr/solinco/', u'https://www.tennis-point.fr/sports-tutor/', u'https://www.tennis-point.fr/sportsmed/', u'https://www.tennis-point.fr/syneo/', u'https://www.tennis-point.fr/talbot/', u'https://www.tennis-point.fr/tecnifibre/', u'https://www.tennis-point.fr/tegra/', u'https://www.tennis-point.fr/tennis-point/', u'https://www.tennis-point.fr/tomtom/', u'https://www.tennis-point.fr/tonic/', u'https://www.tennis-point.fr/topspin/', u'https://www.tennis-point.fr/tourna/', u'https://www.tennis-point.fr/tretorn/', u'https://www.tennis-point.fr/tri-tennis/', u'https://www.tennis-point.fr/under-armour/', u'https://www.tennis-point.fr/universal-sport/', u'https://www.tennis-point.fr/voelkl/', u'https://www.tennis-point.fr/weiss-cannon/', u'https://www.tennis-point.fr/x-bionic/', u'https://www.tennis-point.fr/x-socks/', u'https://www.tennis-point.fr/yonex/', u'https://www.tennis-point.fr/professionnels/', u'https://www.tennis-point.fr/roger-federer/', u'https://www.tennis-point.fr/serena-williams/', u'https://www.tennis-point.fr/rafael-nadal/', u'https://www.tennis-point.fr/victoria-azarenka/', u'https://www.tennis-point.fr/andy-murray/', u'https://www.tennis-point.fr/maria-sharapova/', u'https://www.tennis-point.fr/novak-djokovic/', u'https://www.tennis-point.fr/angelique-kerber/', u'https://www.tennis-point.fr/agnieszka-radwanska/', u'https://www.tennis-point.fr/alexander-zverev/', u'https://www.tennis-point.fr/alize-cornet/', u'https://www.tennis-point.fr/andrea-petkovic/', u'https://www.tennis-point.fr/andy-murray/', u'https://www.tennis-point.fr/angelique-kerber/', u'https://www.tennis-point.fr/', u'https://www.tennis-point.fr/benoit-paire/', u'https://www.tennis-point.fr/bernard-tomic/', u'https://www.tennis-point.fr/borna-coric/', u'https://www.tennis-point.fr/carina-witthoeft/', u'https://www.tennis-point.fr/caroline-garcia/', u'https://www.tennis-point.fr/caroline-wozniacki/', u'https://www.tennis-point.fr/coco-vandeweghe/', u'https://www.tennis-point.fr/david-ferrer/', u'https://www.tennis-point.fr/david-goffin/', u'https://www.tennis-point.fr/dominic-thiem/', u'https://www.tennis-point.fr/dominika-cibulkova/', u'https://www.tennis-point.fr/', u'https://www.tennis-point.fr/elena-vesnina/', u'https://www.tennis-point.fr/', u'https://www.tennis-point.fr/eugenie-bouchard/', u'https://www.tennis-point.fr/fabio-fognini/', u'https://www.tennis-point.fr/feliciano-lopez/', u'https://www.tennis-point.fr/fernando-verdasco/', u'https://www.tennis-point.fr/florian-mayer/', u'https://www.tennis-point.fr/frances-tiafoe/', u'https://www.tennis-point.fr/gael-monfils/', u'https://www.tennis-point.fr/garbine-muguruza/', u'https://www.tennis-point.fr/gilles-muller/', u'https://www.tennis-point.fr/gilles-simon/', u'https://www.tennis-point.fr/grigor-dimitrov/', u'https://www.tennis-point.fr/ivo-karlovic/', u'https://www.tennis-point.fr/jack-sock/', u'https://www.tennis-point.fr/jan-lennard-struff/', u'https://www.tennis-point.fr/jelena-ostapenko/', u'https://www.tennis-point.fr/jo-wilfried-tsonga/', u'https://www.tennis-point.fr/johanna-konta/', u'https://www.tennis-point.fr/juan-martin-del-potro/', u'https://www.tennis-point.fr/julia-goerges/', u'https://www.tennis-point.fr/karolina-pliskova/', u'https://www.tennis-point.fr/kei-nishikori/', u'https://www.tennis-point.fr/kevin-anderson/', u'https://www.tennis-point.fr/kiki-bertens/', u'https://www.tennis-point.fr/kristina-mladenovic/', u'https://www.tennis-point.fr/', u'https://www.tennis-point.fr/lucas-pouille/', u'https://www.tennis-point.fr/lucie-afarova/', u'https://www.tennis-point.fr/', u'https://www.tennis-point.fr/magdalena-rybarikova/', u'https://www.tennis-point.fr/marcos-baghdatis/', u'https://www.tennis-point.fr/maria-sharapova/', u'https://www.tennis-point.fr/marin-cilic/', u'https://www.tennis-point.fr/martin-klizan/', u'https://www.tennis-point.fr/milos-raonic/', u'https://www.tennis-point.fr/mischa-zverev/', u'https://www.tennis-point.fr/mona-barthel/', u'https://www.tennis-point.fr/', u'https://www.tennis-point.fr/nick-kyrgios/', u'https://www.tennis-point.fr/novak-djokovic/', u'https://www.tennis-point.fr/pablo-cuevas/', u'https://www.tennis-point.fr/petra-kvitova/', u'https://www.tennis-point.fr/philipp-kohlschreiber/', u'https://www.tennis-point.fr/rafael-nadal/', u'https://www.tennis-point.fr/richard-gasquet/', u'https://www.tennis-point.fr/roberto-bautista-agut/', u'https://www.tennis-point.fr/roger-federer/', u'https://www.tennis-point.fr/samantha-stosur/', u'https://www.tennis-point.fr/serena-williams/', u'https://www.tennis-point.fr/simona-halep/', u'https://www.tennis-point.fr/sloane-stephens/', u'https://www.tennis-point.fr/stan-wawrinka/', u'https://www.tennis-point.fr/', u'https://www.tennis-point.fr/tomas-berdych/', u'https://www.tennis-point.fr/victoria-azarenka/', u'https://www.tennis-point.fr/viktor-troicki/', u'https://www.tennis-point.fr/yulia-putintseva/', u'https://www.tennis-point.fr/promos/', u'/promos/enfants/', u'/promos/femmes/', u'/promos/filles/', u'/promos/garcons/', u'/promos/hommes/', u'/promos/unisex/', u'https://www.tennis-point.fr/promos/', u'https://www.tennis-point.fr/promos-raquettes-de-tennis/', u'https://www.tennis-point.fr/promos-vetements-de-tennis/', u'https://www.tennis-point.fr/promos-chaussures-de-tennis/', u'https://www.tennis-point.fr/promos-sacs-de-tennis/', u'https://www.tennis-point.fr/promos-balles-de-tennis/', u'https://www.tennis-point.fr/promos-cordages-de-tennis/', u'https://www.tennis-point.fr/promos-grips/', u'https://www.tennis-point.fr/promos-autres/', u'/promos/2xu/', u'/promos/70love/', u'/promos/adidas/', u'/promos/asics/', u'/promos/atp/', u'/promos/babolat/', u'/promos/bidi-badu/', u'/promos/bidi-badu-by-kilian-kerner/', u'/promos/bjoern-borg/', u'/promos/cep/', u'/promos/diadora/', u'/promos/dunlop/', u'/promos/enebe/', u'/promos/erima/', u'/promos/falke/', u'/promos/fila/', u'/promos/gamma/', u'/promos/head/', u'/promos/hydrogen/', u'/promos/isospeed/', u'/promos/k-swiss/', u'/promos/lacoste/', u'/promos/limited-sports/', u'/marques/promos/', u'javascript:history.back()', u'https://www.tennis-point.fr/', u'/chaussures-de-tennis/', None, u'/39-5/?searchparam=E705Y-0193', u'/40/?searchparam=E705Y-0193', u'/40-5/?searchparam=E705Y-0193', u'/41-5/?searchparam=E705Y-0193', u'/42-5/?searchparam=E705Y-0193', u'/42/?searchparam=E705Y-0193', u'/43-5/?searchparam=E705Y-0193', u'/44/?searchparam=E705Y-0193', u'/44-5/?searchparam=E705Y-0193', u'/45/?searchparam=E705Y-0193', u'/46-5/?searchparam=E705Y-0193', u'/46/?searchparam=E705Y-0193', u'/47/?searchparam=E705Y-0193', u'/49/?searchparam=E705Y-0193', u'https://www.tennis-point.fr/?cl=search&searchparam=E705Y-0193&searchparam=E705Y-0193', u'https://www.tennis-point.fr/?cl=search&searchparam=E705Y-0193&searchparam=E705Y-0193&listorderby=oxpos&listorder=desc', u'https://www.tennis-point.fr/?cl=search&searchparam=E705Y-0193&searchparam=E705Y-0193&listorderby=tc_first_stock_date&listorder=desc', u'https://www.tennis-point.fr/?cl=search&searchparam=E705Y-0193&searchparam=E705Y-0193&listorderby=oxprice&listorder=asc', u'https://www.tennis-point.fr/?cl=search&searchparam=E705Y-0193&searchparam=E705Y-0193&listorderby=oxprice&listorder=desc', u'https://www.tennis-point.fr/?cl=search&searchparam=E705Y-0193&searchparam=E705Y-0193&listorderby=_value_uvp&listorder=desc', u'https://www.tennis-point.fr/?cl=search&searchparam=E705Y-0193&searchparam=E705Y-0193&listorderby=oxrating&listorder=desc', u'https://www.tennis-point.fr/asics-gel-game-6-02013802643000.html', u'https://www.tennis-point.fr/asics-gel-game-6-02013802643000.html', u'https://www.tennis-point.fr/avantage-tennis-point/', u'https://www.tennis-point.fr/frais-d-expedition-et-de-livraison/', u'https://www.tennis-point.fr/garantie-satisfait-ou-rembourse/', u'tel:+33 (0) 3 68 33 16 51', u'mailto:info#tennis-point.fr', u'https://www.tennis-point.fr/contact/', u'https://www.trustedshops.com/bewertung/info_X06E4EA65E7491784124112051AF688AD.html', u'https://www.facebook.com/tennispoint.fr/', u'https://www.instagram.com/tennis_point_official/', u'https://www.tennis-point.fr/mentions-legales/', u'https://www.tennis-point.fr/conditions-generales/', u'https://www.tennis-point.fr/retractation-formulaire-de-retractation/', u'https://www.tennis-point.fr/protection-des-donnees/', u'https://www.tennis-point.fr/garantie-satisfait-ou-rembourse/', u'https://www.tennis-point.fr/aide-services/', u'https://www.tennis-point.fr/service-retour/', u'https://www.tennis-point.fr/affiliate/', u'https://www.tennis-point.fr/connexionentraineurs/', u'https://www.tennis-point.fr/modes-de-paiement/', u'https://www.tennis-point.fr/frais-d-expedition-et-de-livraison/', u'https://www.tennis-point.fr/retour-reclamation/', u'https://www.tennis-point.fr/index.php?cl=account&', u'https://www.tennis-point.de', u'https://www.tennis-point.com', u'https://www.tennis-point.fr', u'https://www.tennis-point.it', u'https://www.tennis-point.es', u'https://www.tennis-point.nl', u'https://www.tennis-point.be', u'https://www.tennis-point.cz', u'https://www.tennis-point.sk', u'https://www.tennis-point.ch', u'https://www.tennis-point.at', u'https://www.tennis-point.co.uk', u'https://www.tennis-point.dk', u'https://www.tennis-point.se', u'/nike/', u'/adidas/', u'/wilson/', u'/babolat/', u'/head/', u'/asics/', u'/dunlop/', u'/tennis-point/', u'/under-armour/', u'/k-swiss/', u'/yonex/', u'/prince/', u'/lacoste/', u'/lotto/', u'/tretorn/', u'/limited-sports/', u'/fila/', u'/bjoern-borg/', u'/raquettes-de-tennis/', u'/vetements-de-tennis/', u'/chaussures-de-tennis/', u'/sacs-de-tennis/', u'/balles-de-tennis/', u'/cordages-de-tennis/', u'/marques/', u'/professionnels/', u'/promos/', u'https://www.tennis-point.fr/index.php?cl=forgotpwd&', u'https://www.tennis-point.fr/index.php?cl=register&']
You're going to have to scrap each of your URLs individually. If you want to scrape the same stuff from each page, a simple loop will suffice.
urls = ['url1','url2','url3']
for u in urls:
response = requests.get(u)
data = response.text
soup = BeautifulSoup(data,'lxml')
#
#Scraping Code
#
Related
I am trying to make a web scraper that checks for all the courses in a coursera specialisation..and then gives the list of them
The problem is there is a button saying "SHOW MORE" on the webpage...because of that the script can't access all the courses .... from bs4 import BeautifulSoup import requests res = requests.get('https://www.coursera.org/specializations/digital-manufacturing-design- technology#courses') txt = res.text status = res.status_code #print(txt) #print(status)## 200 is the code for success page = requests.get('https://www.coursera.org/specializations/digital-manufacturing-design- technology#courses') soup = BeautifulSoup(page.content,'lxml') #Display the title of the specialisation specialization_title = soup.find('h1') print(specialization_title.text) print("\n") #Display the courses inside the specialisation number_of_courses = soup.find('h2',class_ = 'headline-4-text bold m-b-1') print(number_of_courses.text) print("\n") course_cards = soup.find_all('h3',class_= 'headline-3-text bold m-t-1 m-b-2') for course in course_cards: print(course.text)
The data is stored inside the page in form of javascript object. You can use re/json module to decode it: import re import json import requests from textwrap import shorten url = "https://www.coursera.org/specializations/digital-manufacturing-design-technology#courses" html_doc = requests.get(url).text data = json.loads( re.search(r"window.__APOLLO_STATE__ = (.*});", html_doc).group(1) ) # uncomment to print all data: # print(json.dumps(data, indent=4)) i = 1 for k, v in data.items(): if "SDPCourse:" in k and "." not in k: print( "{:<3} {:<8} {:<60} {}".format( i, v["averageInstructorRating"], v["name"], shorten(v["description"], 40), ) ) i += 1 Prints: 1 4.6 Digital Manufacturing & Design This course will expose you to the [...] 2 4.56 Digital Thread: Components This course will help you [...] 3 4.76 Digital Thread: Implementation There are opportunities throughout [...] 4 4.41 Advanced Manufacturing Process Analysis Variability is a fact of life in [...] 5 4.56 Intelligent Machining Manufacturers are increasingly [...] 6 4.55 Advanced Manufacturing Enterprise Enterprises that seek to become [...] 7 4.55 Cyber Security in Manufacturing The nature of digital [...] 8 4.54 MBSE: Model-Based Systems Engineering This Model-Based Systems [...] 9 4.68 Roadmap to Success in Digital Manufacturing & Design Learners will create a roadmap to [...]
Scraping a dynamic table using Selenium in Python3
I am trying to scrape the symbols from this page, https://www.barchart.com/stocks/indices/sp/sp400?page=all When I look at the source in the Firefox browser (using Ctrl-U), none of the symbols turns up. Thinking maybe Selenium might be able to obtain the dynamic table, I ran the following code. sp400_url= "https://www.barchart.com/stocks/indices/sp/sp400?page=all" from bs4 import BeautifulSoup from selenium import webdriver driver = webdriver.Firefox() driver.get(sp400_url) html = driver.page_source soup = BeautifulSoup(html) print(soup) The print command doesn't show any of the symbols we see on the page. Is there a way to scrape the symbols from this page? Edited to clarify: I am interested in just the symbols and not the prices. So the list should read: AAN, AAXN, ACC, ACHC, ...
You can easily feed this into pandas' .read_html() to get the table and turn the symbols column into a list. Note: I used chromedriver instead of firefox import pandas as pd from selenium import webdriver sp400_url= "https://www.barchart.com/stocks/indices/sp/sp400?page=all" driver = webdriver.Chrome('C:/chromedriver_win32/chromedriver.exe') driver.get(sp400_url) html = driver.page_source df = pd.read_html(html)[-1] driver.close() symbolsList = list(df['Symbol']) Output: print(symbolsList) ['AAN', 'AAXN', 'ACC', 'ACHC', 'ACIW', 'ACM', 'ADNT', 'ADS', 'AEO', 'AFG', 'AGCO', 'ALE', 'AM', 'AMCX', 'AMED', 'AMG', 'AN', 'ARW', 'ARWR', 'ASB', 'ASGN', 'ASH', 'ATGE', 'ATI', 'ATR', 'AVNS', 'AVNT', 'AVT', 'AYI', 'BC', 'BCO', 'BDC', 'BHF', 'BJ', 'BKH', 'BLD', 'BLKB', 'BOH', 'BRO', 'BRX', 'BXS', 'BYD', 'CABO', 'CACI', 'CAR', 'CASY', 'CATY', 'CBRL', 'CBSH', 'CBT', 'CC', 'CCMP', 'CDAY', 'CDK', 'CFR', 'CFX', 'CGNX', 'CHDN', 'CHE', 'CHH', 'CHX', 'CIEN', 'CIT', 'CLGX', 'CLH', 'CLI', 'CMC', 'CMD', 'CMP', 'CNK', 'CNO', 'CNX', 'COHR', 'COLM', 'CONE', 'COR', 'CPT', 'CR', 'CREE', 'CRI', 'CRL', 'CRS', 'CRUS', 'CSL', 'CTLT', 'CUZ', 'CVLT', 'CW', 'CXW', 'CZR', 'DAN', 'DAR', 'DCI', 'DECK', 'DEI', 'DKS', 'DLPH', 'DLX', 'DNKN', 'DOC', 'DY', 'EBS', 'EGP', 'EHC', 'EME', 'ENPH', 'ENR', 'ENS', 'EPC', 'EPR', 'EQT', 'ESNT', 'ETRN', 'ETSY', 'EV', 'EVR', 'EWBC', 'EXEL', 'EXP', 'FAF', 'FCFS', 'FCN', 'FDS', 'FFIN', 'FHI', 'FHN', 'FICO', 'FIVE', 'FL', 'FLO', 'FLR', 'FNB', 'FR', 'FSLR', 'FULT', 'GATX', 'GBCI', 'GEF', 'GEO', 'GGG', 'GHC', 'GMED', 'GNRC', 'GNTX', 'GNW', 'GO', 'GRUB', 'GT', 'HAE', 'HAIN', 'HCSG', 'HE', 'HELE', 'HIW', 'HNI', 'HOG', 'HOMB', 'HPP', 'HQY', 'HR', 'HRC', 'HUBB', 'HWC', 'HXL', 'IART', 'IBKR', 'IBOC', 'ICUI', 'IDA', 'IDCC', 'IIVI', 'INGR', 'INT', 'ITT', 'JACK', 'JBGS', 'JBL', 'JBLU', 'JCOM', 'JEF', 'JHG', 'JLL', 'JW.A', 'JWN', 'KAR', 'KBH', 'KBR', 'KEX', 'KMPR', 'KMT', 'KNX', 'KRC', 'LAMR', 'LANC', 'LEA', 'LECO', 'LFUS', 'LGND', 'LHCG', 'LII', 'LITE', 'LIVN', 'LOGM', 'LOPE', 'LPX', 'LSI', 'LSTR', 'MAC', 'MAN', 'MANH', 'MASI', 'MAT', 'MCY', 'MD', 'MDU', 'MIDD', 'MKSI', 'MLHR', 'MMS', 'MOH', 'MPW', 'MPWR', 'MRCY', 'MSA', 'MSM', 'MTX', 'MTZ', 'MUR', 'MUSA', 'NATI', 'NAVI', 'NCR', 'NDSN', 'NEU', 'NFG', 'NGVT', 'NJR', 'NKTR', 'NNN', 'NSP', 'NTCT', 'NUS', 'NUVA', 'NVT', 'NWE', 'NYCB', 'NYT', 'OC', 'OFC', 'OGE', 'OGS', 'OHI', 'OI', 'OLED', 'OLLI', 'OLN', 'ORI', 'OSK', 'OZK', 'PACW', 'PB', 'PBF', 'PBH', 'PCH', 'PCTY', 'PDCO', 'PEB', 'PEN', 'PENN', 'PII', 'PK', 'PNFP', 'PNM', 'POOL', 'POST', 'PPC', 'PRAH', 'PRI', 'PRSP', 'PSB', 'PTC', 'PZZA', 'QDEL', 'QLYS', 'R', 'RAMP', 'RBC', 'RGA', 'RGEN', 'RGLD', 'RH', 'RIG', 'RLI', 'RNR', 'RPM', 'RS', 'RYN', 'SABR', 'SAFM', 'SAIC', 'SAM', 'SBH', 'SBNY', 'SBRA', 'SCI', 'SEDG', 'SEIC', 'SF', 'SFM', 'SGMS', 'SIGI', 'SIX', 'SKX', 'SLAB', 'SLGN', 'SLM', 'SMG', 'SMTC', 'SNV', 'SNX', 'SON', 'SR', 'SRC', 'SRCL', 'STL', 'STLD', 'STOR', 'STRA', 'SVC', 'SWX', 'SXT', 'SYNA', 'SYNH', 'TCBI', 'TCF', 'TCO', 'TDC', 'TDS', 'TECH', 'TER', 'TEX', 'TGNA', 'THC', 'THG', 'THO', 'THS', 'TKR', 'TMHC', 'TOL', 'TPH', 'TPX', 'TR', 'TREE', 'TREX', 'TRIP', 'TRMB', 'TRMK', 'TRN', 'TTC', 'TTEK', 'TXRH', 'UBSI', 'UE', 'UFS', 'UGI', 'UMBF', 'UMPQ', 'UNVR', 'URBN', 'UTHR', 'VAC', 'VC', 'VLY', 'VMI', 'VSAT', 'VSH', 'VVV', 'WAFD', 'WBS', 'WEN', 'WERN', 'WEX', 'WH', 'WOR', 'WPX', 'WRI', 'WSM', 'WSO', 'WTFC', 'WTRG', 'WW', 'WWD', 'WWE', 'WYND', 'X', 'XEC', 'XPO', 'Y', 'YELP', 'Symbol']
If elements are not present in page source try to implement ExplicitWait: from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC driver = webdriver.Firefox() driver.get(sp400_url) wait = WebDriverWait(driver, 10) symbols = wait.until(EC.presence_of_all_elements_located((By.XPATH, '//td[contains(#class, "symbol")]//a[starts-with(#href, "/stocks/quotes/")]'))) for symbol in symbols: print(symbol.text)
I am not sure why you want to scrape compete page. if you need just Symbols. You can simply get list of all such elements and then put in a list. driver = webdriver.Firefox(executable_path=r'..\drivers\geckodriver.exe') driver.get("https://www.barchart.com/stocks/indices/sp/sp400?page=all") # Waiting for table to laod WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.XPATH, "//h4[contains(text(),'S&P 400 Components')]"))) symbols = driver.find_elements_by_xpath("//div[#class='bc-table-scrollable-inner']//a[#data-ng-bind='cell']") symbolList = [] for symbol in symbols: symbolList.append(symbol.text) print(len(symbolList)) #Length of list print(symbolList) #Content of list Out Put:
Web scraping : not able to scrape text and href for a given div, class and to skip the span tag
Trying to get the text and href for top news but not able to scrape it. website : News site My code: import requests from bs4 import BeautifulSoup import psycopg2 import time def checkResponse(url): response = requests.get(url) if response.status_code == 200: return response.content else: return None def getTitleURL(): url = 'http://sandesh.com/' response = checkResponse(url) if response is not None: html = BeautifulSoup(response, 'html.parser') for values in html.find_all('div', class_='d-top-news-latest'): headline = values.find(class_='d-s-NSG-regular').text url = values.find(class_='d-s-NSG-regular').['href'] print(headline + "->" + url) if __name__ == '__main__': print('Getting the list of names....') names = getTitleURL() print('... done.\n') Output: Getting the list of names.... Corona live મેડિકલ સ્ટાફ પર હુમલા અંગે અમિત શાહે ડોક્ટર્સ સાથે કરી ચર્ચા, સુરક્ષાની ખાતરી આપતા કરી અપીલ Ahmedabad ગુજરાતમાં કૂદકેને ભૂસકે વધ્યો કોરોના વાયરસનો કહેર, આજે નવા 94 કેસ નોંધાયા, જાણો કયા- કેટલા કેસ નોંધાયા Corona live જીવન અને મોત વચ્ચે સંઘર્ષ કરી રહ્યો છે દુનિયાનો સૌથી મોટો તાનાશાહ કિમ જોંગ! ટ્રમ્પે કહી આ વાત Ahmedabad અમદાવાદમાં નર્સિંગ સ્ટાફનો ગુસ્સો ફૂટ્યો, ‘અમારું કોઈ સાંભળતું નથી, અમારો કોરોના ટેસ્ટ જલદી કરાવો’ Business ભારતીય ટેલિકોમ જગતમાં સૌથી મોટી ડીલ, ફેસબુક બની જિયોની સૌથી મોટી શેરહોલ્ડર ->http://sandesh.com/amit-shah-talk-with-ima-and-doctors-through-video-conference-on-attack/ ... done. I want to skip text inside the tag and also I am able to get only 1 href. Also the headline is a list. how do I get each title and url. I am trying to scrape the part in red:
First, At for values in html.find_all('div', class_='d-top-news-latest') you don't need use for because at DOM just have one class d-top-news=latest. Second, to get the title, you can use select('span') because of your title into the span tag. Third, you knew the headline is a list, so you need to use for to get each title and URL. values = html.find('div', class_='d-top-news-latest') for i in values.find_all('a', href = True): print(i.select('span')) print(i['href']) OUTPUT Getting the list of names.... [<span> Corona live </span>] http://sandesh.com/maharashtra-home-minister-anil-deshmukh-issue-convicts-list-of- palghar-case/ [<span> Corona live </span>] http://sandesh.com/two-doctors-turn-black-after-treatment-of-coronavirus-in-china/ [<span> Corona live </span>] http://sandesh.com/bihar-asi-gobind-singh-suspended-for-holding-home-guard-jawans- after-stopping-officers-car-asi/ [<span> Ahmedabad </span>] http://sandesh.com/jayanti-ravi-surprise-statement-sparks-outcry-big-decision-taken- despite-more-patients-in-gujarat/ [<span> Corona live </span>] http://sandesh.com/amit-shah-talk-with-ima-and-doctors-through-video-conference-on- attack/ ... done.
to remove the "span" part: values = html.find('div', class_='d-top-news-latest') for i in values.find_all('a', href=True): i.span.decompose() print(i.text) print(i['href']) Output: Getting the list of names.... ગુજરાતમાં કોરોનાનો કહેરઃ રાજ્યમાં આજે કોરોનાના 135 નવા કેસ, વધુ 8 લોકોનાં મોત http://sandesh.com/gujarat-corona-update-206-new-cases-and-18-deaths/ ચીનના વૈજ્ઞાનિકોએ જ ખોલી જીનપિંગની પોલ, કોરોના વાયરસને લઈને કર્યો સનસની ખુલાસો http://sandesh.com/chinese-scientists-claim-over-corona-virus/ શું લોકડાઉન ફરી વધારાશે? PM મોદી 27મીએ ફરી એકવાર તમામ CM સાથે કરશે ચર્ચા http://sandesh.com/pm-modi-to-hold-video-conference-with-cms-on-april-27-lockdown- extension/ કોરોના વાયરસને લઈ મોટી ભવિષ્યવાણી, દુનિયાના 30 દેશો પર ઉભુ થશે ભયંકર સંકટ http://sandesh.com/after-corona-attack-now-hunger-will-kill-many-people-in-the-world/ દેશમાં 24 કલાકમાં 1,486 કોરોનાનાં નવા કેસ, પરંતુ મળ્યા સૌથી મોટા રાહતનાં સમાચાર http://sandesh.com/recovery-rate-increased-in-corona-patients-says-health-ministry/ ... done.
Beautiful Soup findAll() doesn't find the first one
I'm working on a coreference-resolution system based on Neural Networks for my Bachelor's Thesis, and i have a problem when i read the corpus. The corpus is already preproccesed, and i only need to read it to do my stuff. I use Beautiful Soup 4 to read the xml files of each document that contains the data i need. the files look like this: <?xml version='1.0' encoding='ISO-8859-1'?> <!DOCTYPE markables SYSTEM "markables.dtd"> <markables xmlns="www.eml.org/NameSpaces/markable"> <markable id="markable_102" span="word_390" grammatical_role="vc" coref_set="empty" visual="none" rel_type="none" np_form="indefnp" type="" entity="NO" nb="UNK" def="INDEF" sentenceid="19" lemmata="premia" pos="nn" head_pos="word_390" wikipedia="" mmax_level="markable"/> <markable id="markable_15" span="word_48..word_49" grammatical_role="vc" coref_set="empty" visual="none" rel_type="none" np_form="defnp" type="" entity="NO" nb="SG" def="DEF" sentenceid="3" lemmata="Grozni hegoalde" pos="nnp nn" head_pos="word_48" wikipedia="Grozny" mmax_level="markable"/> <markable id="markable_101" span="word_389" grammatical_role="sbj" coref_set="set_21" coref_type="named entities" visual="none" rel_type="coreferential" sub_type="exact repetition" np_form="ne_o" type="enamex" entity="LOC" nb="SG" def="DEF" sentenceid="19" lemmata="Mosku" pos="nnp" head_pos="word_389" wikipedia="" mmax_level="markable"/> ... i need to extract all the spans here, so try to do it with this code (python3): ... from bs4 import BeautifulSoup ... file1 = markables+filename+"_markable_level.xml" xml1 = open(file1) #markable soup1 = BeautifulSoup(xml1, "html5lib") #markable ... ... for markable in soup1.findAll('markable'): try: span = markable.contents[1]['span'] print(span) spanA = span.split("..")[0] spanB = span.split("..")[-1] ... (I ignored most of the code, as they are 500 lines) python3 aurreprozesaketaSTM.py train --- 28.329787254333496 seconds --- &&&&&&&&&&&&&&&&&&&&&&&&& egun.06-1-p0002500.2000-06-01.europa word_48..word_49 word_389 word_385..word_386 word_48..word_52 ... if you conpare the xml file with the output, you can see that word_390 is missing. I get almost all the data that i need, then preproccess everything, build the system with neural networks, and finally i get scores and all... But as I loose the first word of each document, my systems accuracy is a bit lower than what should be. Can anyone help me with this? Any idea where is the problem?
You are parsing XML with html5lib. It is not supported for parsing XML. lxml’s XML parser ... The only currently supported XML parser https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser
is that posible scraping (crawling) tooltip after click map marker?
[Problem] I can't request left mouse click event to marker for activating tooltip through selenium. [My intention] scraping (crawling) text from tooltip window on map marker from this web service with selenium (python code) daum service web map: http://www.socar.kr/reserve#jeju <map id="daum.maps.Marker.Area:13u" name="daum.maps.Marker.Area:13u"><area href="javascript:void(0)" alt="" shape="rect" coords="0,0,40,38" title="" style="-webkit-tap-highlight-color: transparent;"></map> <div class="tooltip myInfoWindow"><h4><a class="map_zone_name" href="#"><em class="map_zone_id" style="display:none;">2390</em><span title="제주대 후문주차장">제주대 후문주차장</span><span class="bg"></span></a></h4><p><a title="제주도 제주시 아라1동 368-60">제주도 제주시 아라1동 368-6...</a><br>운영차량 : 총 <em>4</em>대</p><p class="btn"><em class="map_zone_id" style="display:none;">2390</em><a class="btn_overlay_search" href="#"><img src="/template/asset/images/reservation/btn_able_socar.png" alt="예약가능 쏘카 보기"></a></p><img src="/template/asset/images/reservation/btn_layer_close.png" alt="닫기"></div> P.S : is it possible crawling text of tooltip window on google map marker
When you click a tooltip, an xhr request is sent to https://api.socar.kr/reserve/zone_info using a zone_id, you may have to filter out the zones you want by using the page content, I don't have any more time to spend on this right now but this recreates the requests: import requests from time import time, sleep # These params will be for https://api.socar.kr/reserve/oneway_zone_list # which we can get the zone_ids from. params = {"type": "start", "_": str(time())} # We use the zone_id from each dict we parse from the json receievd params2 = {"zone_id": ""} with requests.Session() as s: s.get("http://www.socar.kr/reserve#jeju") s.headers.update({ "User-Agent": "Mozilla/5.0 (X11; Linux x86_64)"}) r = s.get("https://api.socar.kr/reserve/oneway_zone_list", params=params) result = r.json()["result"] for d in result: params2["zone_id"] = d["zone_id"] params2["_"] = str(time()) sleep(1) r2 = s.get("https://api.socar.kr/reserve/zone_info", params=params2) print(r2.json()) Each d in result is a dict like: {u'zone_lat': u'37.248859', u'zone_id': u'2902', u'zone_region1_short': u'\uacbd\uae30', u'zone_open_time': u'00:00:00', u'zone_region1': u'\uacbd\uae30\ub3c4', u'zone_close_time': u'23:59:59', u'zone_name': u'SK\ud558\uc774\ub2c9\uc2a4 \uc774\ucc9c', u'open_weekend': u'close', u'zone_region3': u'\ubd80\ubc1c\uc74d', u'zone_region2': u'\uc774\ucc9c\uc2dc', u'zone_lng': u'127.490639', u'zone_addr': u'\uacbd\uae30\ub3c4 \uc774\ucc9c\uc2dc \ubd80\ubc1c\uc74d \uc544\ubbf8\ub9ac 707'} There probably other info in that that would allow you to filter by specific place, I don't speak korean so I cannot completely follow how the data relates. The second requests gives us a dict like: {u'retCode': u'1', u'retMsg': u'', u'result': {u'oper_way': u'\uc655\ubcf5', u'notice': u'<br>\u203b \ubc18\ub4dc\uc2dc \ubc29\ubb38\uc790 \uc8fc\ucc28\uc7a5 \uc9c0\uc815\uc8fc\ucc28\uad6c\uc5ed\uc5d0 \ubc18\ub0a9\ud574\uc8fc\uc138\uc694.<br>', u'notice_oneway': u'', u'zone_addr': u'\uacbd\uae30\ub3c4 \uc774\ucc9c\uc2dc \ubd80\ubc1c\uc74d \uc544\ubbf8\ub9ac 707', u'total_num': 2, u'able_num': 2, u'visit': u'\uc131\uc6b02\ub2e8\uc9c0 \uc544\ud30c\ud2b8 \uae30\uc900 \uc804\ubc29 \ud604\ub300\uc5d8\ub9ac\ubca0\uc774\ud130 \ubc29\uba74\uc73c\ub85c \ud6a1\ub2e8\ubcf4\ub3c4 \uc774\uc6a9 \ud6c4 \ud558\uc774\ub2c9\uc2a4 \uc774\ucc9c \ubc29\ubb38\uc790 \uc8fc\ucc28\uc7a5 \ub0b4 \uc3d8\uce74\uc804\uc6a9\uc8fc\ucc28\uad6c\uc5ed', u'zone_alias': u'\ud558\uc774\ub2c9\uc2a4 \ubc29\ubb38\uc790 \uc8fc\ucc28\uc7a5', u'zone_attr': u'[\uc774\ubca4\ud2b8]', u'state': True, u'link': u'http://blog.socar.kr/4074', u'oper_time': u'00:00~23:59', u'lat': u'37.248859', u'zone_name': u'SK\ud558\uc774\ub2c9\uc2a4 \uc774\ucc9c', u'lng': u'127.490639', u'zone_props': 0, u'visit_link': u'http://dmaps.kr/24ij6', u'zone_id': u'2902'}} Again not sure of all that is in there but you can see html tags under u'notice and lots of other info.