How to Detect Popup and Close it Using Selenium Python (Google Chrome) - python-3.x

Hey I am scraping Shopify Review Shop Url, but while I am navigating from the search results, a pop up appears and I have no idea how to detect it and close it.
Here's my code
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
driver = webdriver.Chrome()
url='https://apps.shopify.com/sales-pop'
driver.get(url)
#Loop and Navigate Through the Search Results
page_number = 2
while True:
try:
link = driver.find_element_by_link_text(str(page_number))
except NoSuchElementException:
break
if page_number > 8:
timeout = 20
try:
WebDriverWait(driver,timeout).until(EC.visibility_of_element_located((By.XPATH,'//div[#title="close"]')))
except TimeoutException:
print("Timed out waiting for page to load")
driver.quit()
#Switch to the Popup
driver.switch_to_alert()
driver.find_element_by_xpath('//div[#title="close"]').click()
driver.implicitly_wait(5)
link.click()
print(driver.current_url)
page_number += 1
else:
driver.implicitly_wait(5)
link.click()
print(driver.current_url)
page_number += 1
#Scraping Rating
stars = driver.find_elements_by_xpath('//figure[#class="resourcesreviews-reviews-star"]')
starstars = []
for star in stars:
starstar=star.find_element_by_xpath('.//div/span')
starstars.append(starstar.get_attribute('class'))
#Scraping URL
urls = driver.find_elements_by_xpath('//figcaption[#class="clearfix"]')
titles=[]
for url in urls:
title=url.find_element_by_xpath('.//strong/a')
titles.append(title.get_attribute('href'))
#Print Titles and Rating Side by Side
for titless, starstarss in zip(titles, starstars):
print(titless + " " + starstarss)

You can just use WebDriverWaitandwindow_handles. Specifically, you can probably replace your #Switch to Popup section with something like:
WebDriverWait(driver, 5).until(lambda d: len(d.window_handles) == 2)
driver.switch_to_window(driver.window_handles[1]).close()
driver.switch_to_window(driver.window_handles[0])

Related

How to open and access multiple (nearly 50) tabs in Chrome using ChromeDriver and Selenium through Python

I'm trying to gather some information from certain webpages using selenium and python.I have a working code for a single tab. But now i have a situation where i need to open 50 tabs in chrome at once and process each page data.
1) So open 50 tabs at once - The code i got already
2) Change the control between tabs and process the information from the page and close the tab and move to next tab and do the same.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import psycopg2
import os
import datetime
final_results=[]
positions=[]
saerched_url=[]
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
#options.add_argument('--headless')
options.add_argument("—-incognito")
browser = webdriver.Chrome(executable_path='/users/user_123/downloads/chrome_driver/chromedriver', chrome_options=options)
browser.implicitly_wait(20)
#def db_connect():
try:
DSN = "dbname='postgres' user='postgres' host='localhost' password='postgres' port='5432'"
TABLE_NAME = 'staging.search_url'
conn = psycopg2.connect(DSN)
print("Database connected...")
cur = conn.cursor()
cur.execute("SET datestyle='German'")
except (Exception, psycopg2.Error) as error:
print('database connection failed')
quit()
def get_products(url):
browser.get(url)
names = browser.find_elements_by_xpath("//span[#class='pymv4e']")
upd_product_name_list=list(filter(None, names))
product_name = [x.text for x in upd_product_name_list]
product = [x for x in product_name if len(x.strip()) > 2]
upd_product_name_list.clear()
product_name.clear()
return product
links = ['https://www.google.com/search?q=Vitamin+D',
'https://www.google.com/search?q=Vitamin+D3',
'https://www.google.com/search?q=Vitamin+D+K2',
'https://www.google.com/search?q=D3',
'https://www.google.com/search?q=Vitamin+D+1000']
for link in links:
# optional: we can wait for the new tab to open by comparing window handles count before & after
tabs_count_before = len(browser.window_handles)
# open a link
control_string = "window.open('{0}')".format(link)
browser.execute_script(control_string)
# optional: wait for windows count to increment to ensure new tab is opened
WebDriverWait(browser, 1).until(lambda browser: tabs_count_before != len(browser.window_handles))
# get list of currently opened tabs
tabs_list = browser.window_handles
print(tabs_list)
# switch control to newly opened tab (the last one in the list)
last_tab_opened = tabs_list[len(tabs_list)-1]
browser.switch_to_window(last_tab_opened)
# now you can process data on the newly opened tab
print(browser.title)
for lists in tabs_list:
last_tab_opened = tabs_list[len(tabs_list)-1]
browser.switch_to_window(last_tab_opened)
filtered=[]
filtered.clear()
filtered = get_products(link)
saerched_url.clear()
if not filtered:
new_url=link+'+kaufen'
get_products(link)
print('Modified URL :'+link)
if filtered:
print(filtered)
positions.clear()
for x in range(1, len(filtered)+1):
positions.append(str(x))
saerched_url.append(link)
gobal_position=0
gobal_position=len(positions)
print('global postion first: '+str(gobal_position))
print("\n")
company_name_list = browser.find_elements_by_xpath("//div[#class='LbUacb']")
company = []
company.clear()
company = [x.text for x in company_name_list]
print('Company Name:')
print(company, '\n')
price_list = browser.find_elements_by_xpath("//div[#class='e10twf T4OwTb']")
price = []
price.clear()
price = [x.text for x in price_list]
print('Price:')
print(price)
print("\n")
urls=[]
urls.clear()
find_href = browser.find_elements_by_xpath("//a[#class='plantl pla-unit-single-clickable-target clickable-card']")
for my_href in find_href:
url_list=my_href.get_attribute("href")
urls.append(url_list)
print('Final Result: ')
result = zip(positions,filtered, urls, company,price,saerched_url)
final_results.clear()
final_results.append(tuple(result))
print(final_results)
print("\n")
print('global postion end :'+str(gobal_position))
i=0
try:
for d in final_results:
while i <= gobal_position:
print( d[i])
cur.execute("""INSERT into staging.pla_crawler_results(position, product_name, url,company,price,searched_url) VALUES (%s, %s, %s,%s, %s,%s)""", d[i])
print('Inserted succesfully')
conn.commit()
i=i+1
except (Exception, psycopg2.Error) as error:
print (error)
pass
browser.close()
Ideally you shouldn't attempt to open 50 tabs at once as:
Handling 50 concurrent TABs through Selenium will invite complicated logic/algorithm to maintain.
Additionally, you may run into CPU and memory usage issues as:
Chrome maintains many processes.
Where as at times Firefox uses too much RAM
Solution
If you are having a List of the urls as follows:
['https://selenium.dev/downloads/', 'https://selenium.dev/documentation/en/']
You can iterate over the list to open them one by one in the adjacent tab for scraping using the following Locator Strategy:
Code Block:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, WebDriverException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.alert import Alert
from selenium.webdriver.common.keys import Keys
links = ['https://selenium.dev/downloads/', 'https://selenium.dev/documentation/en/']
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
for link in links:
driver = webdriver.Chrome(options=options, executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe')
driver.get(link)
print(driver.title)
print("Perform webscraping here")
driver.quit()
print("End of program")
Console Output:
Downloads
Perform webscraping here
The Selenium Browser Automation Project :: Documentation for Selenium
Perform webscraping here
End of program
Reference
You can find a relevant detailed discussion in:
WebScraping JavaScript-Rendered Content using Selenium in Python

Get access to data in dynamic table using Selenium

Below code works and fills out two forms which is required to get to the table on this page: https://forsikringsguiden.dk/#!/bilforsikring/resultatside
Once the forms are filled out the table shows an overview over difference insurance firms and how much you will pay yearly for car insurance. So a comparison service. I need to scrape this overview once a week.
I am unsure how to do this. I know how to use BS4 for scraping, but I need the firm names as well, and not only the information, and this is not available by inspecting the website in Chrome. If I dive into
the network and look into XHR i find this link
https://forsikringsguiden.dk/signalr/poll?transport=longPolling&messageId=d-D7589F50-A%2C0%7C9%2C0%7C_%2C1%7C%3A%2C0&clientProtocol=1.4&connectionToken=fUYa3MT52oKf77Y6yU1sLnXiVzPw2CD4XgA8x50EfifJlz8XTPjBeP0klHUKt2uXmnisqO0KLk3fCb5bjOZ8k%2FeJl8zaXAgtRIALW9rzMF%2F8L7Pk3MOYwPRY4md1sDk5&connectionData=%5B%7B%22name%22%3A%22insuranceofferrequesthub%22%7D%5D&tid=9&_=1572505813840
This shows me all the data I need, but I cannot navigate to this page in selenium.
How do I tackle this problem?
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
chrome_options = Options()
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--disable-gpu")
# enable browser logging
d = DesiredCapabilities.CHROME
d['loggingPrefs'] = { 'browser':'ALL' }
driver = webdriver.Chrome(desired_capabilities = d, options=chrome_options)
driver.fullscreen_window()
wait = WebDriverWait(driver,1)
driver.get("https://forsikringsguiden.dk/#!/bilforsikring/manuel")
#time.sleep(5)
#remove cookie bar
driver.find_element_by_id('cookieBarAccept').click()
maerke = driver.find_element_by_xpath('//*[#id="s2id_carSelectedMake"]/a').click()
driver.find_element_by_xpath('//*[#id="s2id_autogen1_search"]').send_keys("Hyundai")
driver.minimize_window()
driver.maximize_window()
driver.find_element_by_xpath('//*[#id="select2-drop"]').click()
model = driver.find_element_by_xpath('//*[#id="s2id_autogen2"]').click()
driver.find_element_by_xpath('//*[#id="s2id_autogen3_search"]').send_keys("i30")
driver.minimize_window()
driver.maximize_window()
time.sleep(1)
driver.find_element_by_xpath('//*[#id="select2-drop"]').click()
driver.execute_script("scrollBy(0,250)")
aargang = driver.find_element_by_xpath('//*[#id="s2id_autogen4"]/a').click()
time.sleep(1)
driver.find_element_by_xpath('//*[#id="s2id_autogen5_search"]').send_keys("2009")
driver.minimize_window()
driver.maximize_window()
time.sleep(1)
driver.find_element_by_xpath('//*[#id="select2-drop"]').click()
driver.execute_script("scrollBy(0,250)")
motor_str = driver.find_element_by_xpath('//*[#id="s2id_autogen6"]/a').click()
time.sleep(1)
driver.find_element_by_xpath('//*[#id="s2id_autogen7_search"]').send_keys("1,6")
driver.minimize_window()
driver.maximize_window()
time.sleep(1)
driver.find_element_by_xpath('//*[#id="select2-drop"]').click()
variant = driver.find_element_by_xpath('//*[#id="s2id_autogen8"]').click()
time.sleep(1)
driver.find_element_by_xpath('//*[#id="s2id_autogen9_search"]').send_keys("1,6 CRDi 116HK 5d")
driver.minimize_window()
driver.maximize_window()
time.sleep(1)
driver.find_element_by_xpath('//*[#id="select2-drop"]').click()
driver.execute_script("scrollBy(0,250)")
godkend_oplysninger = driver.find_element_by_xpath('//*[#id="content"]/div[4]/form/div[6]/div/button').click()
#Om dig siden
driver.get("https://forsikringsguiden.dk/#!/bilforsikring/omdig")
alder = wait.until(EC.presence_of_element_located((By.XPATH,'//*[#id="content"]/div/div[2]/div[2]/form/div[1]/div[1]/div/input')))
alder.send_keys("50")
adresse = wait.until(EC.presence_of_element_located((By.XPATH,'//*[#id="adresse-autocomplete"]')))
adresse.send_keys("Havevang 8, 3. th, 4300 Holbæk", Keys.ENTER)
aar = wait.until(EC.presence_of_element_located((By.XPATH,'//*[#id="content"]/div/div[2]/div[2]/form/div[2]/div/div/input')))
aar.send_keys("10")
driver.execute_script("scrollBy(0,250)")
#Antal skader
driver.find_element_by_xpath('/html/body/div[6]/div/div[2]/div/div[2]/div[2]/form/div[3]/div/div/div[2]').click()
wait
driver.find_element_by_xpath('/html/body/div[11]/ul/li[3]').click()
driver.minimize_window()
driver.maximize_window()
time.sleep(1)
driver.find_element_by_xpath('//*[#id="select2-drop"]').click()
time.sleep(1)
#skade 1
driver.find_element_by_xpath('/html/body/div[6]/div/div[2]/div/div[2]/div[2]/form/div[4]/div/div[1]/div/div[2]').click()
wait
driver.find_element_by_xpath('/html/body/div[12]/ul/li[5]').click()
driver.minimize_window()
driver.maximize_window()
time.sleep(1)
driver.find_element_by_xpath('//*[#id="select2-drop"]').click()
time.sleep(1)
#skade 2
driver.find_element_by_xpath('/html/body/div[6]/div/div[2]/div/div[2]/div[2]/form/div[4]/div/div[2]/div/div[2]').click()
wait
driver.find_element_by_xpath('/html/body/div[13]/ul/li[3]').click()
driver.minimize_window()
driver.maximize_window()
time.sleep(1)
driver.find_element_by_xpath('//*[#id="select2-drop"]').click()
time.sleep(1)
find_daekning = driver.find_element_by_xpath('//*[#id="content"]/div/div[2]/div[2]/form/div[5]/div/button').click()
EDIT:
I added this to my code using Selenium:
##### Get Data #####
driver.get("https://forsikringsguiden.dk/#!/bilforsikring/resultatside")
wait = WebDriverWait(driver,10)
wait
res_element = driver.find_elements_by_xpath('/html/body/div[7]/div/div[2]/div[1]/div[2]/div[2]')
res = [x.text for x in res_element]
print(res, "\n")
But it doesn't get me the numbers, just some of the text.
Here the result
['Sortér efter: Forklaring\nGå til selskab\nDin dækning\nkr./år -\nMed
samlerabat kr./år\nSelvrisiko\nSe detaljer\nSammenlign\nGå til selskab\nDin dækning\nkr./år -\nMed samlerabat kr./år\nSelvrisiko
\nSe detaljer\nSammenlign\nGå til selskab\nDin dækning\nkr./år -
\nMed samlerabat kr./år\nSelvrisiko\nSe detaljer\nSammenlign\n

Selenium finds only a fraction of href links

I am trying to get all the products' url from this webpage, but I managed to get only a fraction of it.
My first attempt was to scrape the webpage with Beautifulsoup, but then I realized selenium would be better as I needed to click the "Show more" button several times. I also added a code to scroll down the page as I though that was the problem, but the result didn't change.
import time
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
def getListingLinks(link):
# Open the driver
driver = webdriver.Chrome(executable_path="")
driver.maximize_window()
driver.get(link)
time.sleep(3)
# scroll down: repeated to ensure it reaches the bottom and all items are loaded
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(3)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(3)
listing_links = []
while True:
try:
driver.execute_script("return arguments[0].scrollIntoView(true);", WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.XPATH, '//*[#id="main-content"]/div[2]/div[2]/div[4]/button'))))
driver.execute_script("arguments[0].click();", WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#main-content > div:nth-child(2) > div.main-column > div.btn-wrapper.center > button"))))
print("Button clicked")
links = driver.find_elements_by_class_name('fop-contentWrapper')
for link in links:
algo=link.find_element_by_css_selector('.fop-contentWrapper a').get_attribute('href')
print(algo)
listing_links.append(str(algo))
except:
print("No more Buttons")
break
driver.close()
return listing_links
fresh_food = getListingLinks("https://www.ocado.com/browse/fresh-20002")
print(len(fresh_food)) ## Output: 228
As you can see, I get 228 urls while I would like to get 5605 links, that is the actual number of products in the webpage according to Ocado. I believe I have a problem with the order of my code, but can't find the proper order. I would sincerely appreciate any help.

How to check if auto suggestion exists?

I need to check whether the window with search suggestions exists.When you type something in search a list of suggested searches appears. I need to check whether this pop up window exists.
That window
Code trials:
import time
import unittest
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
class YandexSearchRu(unittest.TestCase):
def setUp(self):
self.driver = webdriver.Chrome()
def test_search(self):
driver = self.driver
driver.get("http://www.yandex.ru")
try:
input = driver.find_element_by_xpath("//*[#id='text']")
except NoSuchElementException:
driver.close()
input.send_keys("Тензор")
input.send_keys(Keys.RETURN)
time.sleep(5)
def tearDown(self):
self.driver.close()
if __name__ == "__main__":
unittest.main()
Try to do this:
driver.get("http://www.yandex.ru")
try:
input = driver.find_element_by_xpath("//*[#id='text']")
input.send_keys("adfadf")
popup = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "body > div. i-bem.popup > div.popup__content")))
if popup.is_displayed():
print("popup disyplayed")
else:
print("popup not visible")
except NoSuchElementException:
The element isn't a pop up window but Auto Suggestions and to extract the Auto Suggestions you can use the following solution:
Code Block:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument('start-maximized')
options.add_argument('disable-infobars')
options.add_argument('--disable-extensions')
driver = webdriver.Chrome(chrome_options=options, executable_path=r'C:\WebDrivers\chromedriver.exe')
driver.get('http://www.yandex.ru')
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input.input__control.input__input"))).send_keys("Тензор")
print([auto_suggestion.text for auto_suggestion in WebDriverWait(driver, 5).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.popup__content>div.suggest2__content.suggest2__content_theme_normal li>span.suggest2-item__text")))])
Console Output:
['тензор', 'тензор официальный сайт', 'тензор техподдержка', 'тензорное исчисление', 'тензор спб', 'тензорные ядра', 'тензорный анализ', 'тензор эцп', 'тензорезистор', 'тензор инерции']

How to loop elements from webpage that keeps refreshing the web elements

I need to loop trough elements of a webpage and scrap data from each of the elements, but the webelements keep refreshing every 25 sec and my code does not finish iterating all elements in that amount of time, after that moment i get the element is not attached to the page document error:
driver.get("https://www.luckia.es/apuestas")
time.sleep(5)
driver.switch_to.frame("sbtechBC")
eventos_de_hoy=driver.find_element_by_id("today_event_btn")
eventos_de_hoy.click()
time.sleep(7)
ligi = driver.find_elements_by_class_name("leagueWindow ")
print(len(ligi))
for items in ligi:
driver.execute_script("arguments[0].scrollIntoView(true);", items)
nume_liga= items.find_element_by_tag_name("h5")
print(nume_liga.text)
I am fresh all out of ideas.
You can try below code to avoid script brake on StaleElementReferenceException:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException
driver = webdriver.Chrome()
driver.get("https://www.luckia.es/apuestas")
WebDriverWait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it("sbtechBC"))
eventos_de_hoy = driver.find_element_by_id("today_event_btn")
eventos_de_hoy.click()
ligi_len = len(WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "leagueWindow "))))
print(ligi_len)
for index in range(ligi_len):
try:
item = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "leagueWindow ")))[index]
driver.execute_script("arguments[0].scrollIntoView(true);", item)
nume_liga = item.find_element_by_tag_name("h5")
print(nume_liga.text)
except StaleElementReferenceException:
item = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "leagueWindow ")))[index]
driver.execute_script("arguments[0].scrollIntoView(true);", item)
nume_liga = item.find_element_by_tag_name("h5")
print(nume_liga.text)

Resources