I need to loop trough elements of a webpage and scrap data from each of the elements, but the webelements keep refreshing every 25 sec and my code does not finish iterating all elements in that amount of time, after that moment i get the element is not attached to the page document error:
driver.get("https://www.luckia.es/apuestas")
time.sleep(5)
driver.switch_to.frame("sbtechBC")
eventos_de_hoy=driver.find_element_by_id("today_event_btn")
eventos_de_hoy.click()
time.sleep(7)
ligi = driver.find_elements_by_class_name("leagueWindow ")
print(len(ligi))
for items in ligi:
driver.execute_script("arguments[0].scrollIntoView(true);", items)
nume_liga= items.find_element_by_tag_name("h5")
print(nume_liga.text)
I am fresh all out of ideas.
You can try below code to avoid script brake on StaleElementReferenceException:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException
driver = webdriver.Chrome()
driver.get("https://www.luckia.es/apuestas")
WebDriverWait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it("sbtechBC"))
eventos_de_hoy = driver.find_element_by_id("today_event_btn")
eventos_de_hoy.click()
ligi_len = len(WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "leagueWindow "))))
print(ligi_len)
for index in range(ligi_len):
try:
item = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "leagueWindow ")))[index]
driver.execute_script("arguments[0].scrollIntoView(true);", item)
nume_liga = item.find_element_by_tag_name("h5")
print(nume_liga.text)
except StaleElementReferenceException:
item = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "leagueWindow ")))[index]
driver.execute_script("arguments[0].scrollIntoView(true);", item)
nume_liga = item.find_element_by_tag_name("h5")
print(nume_liga.text)
Related
I'm trying to scrape from the moneycontrol.com. When I tried to send value in the search box I keep getting the same error in except block as "Element not Found".
I tried using XPath id as well as using the full XPath but in both cases, it doesn't work.
WITHOUT MAXIMIZING THE WINDOW
XPath id - //*[#id="search_str"]
Full XPath - /html/body/div[1]/header/div[1]/div[1]/div/div/div[2]/div/div/form/input[5]
Attaching the full code below:
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
def search_stock():
driver = webdriver.Chrome(
r'./chromedriver')
driver.get('https://www.moneycontrol.com/')
time.sleep(5)
search_icon = driver.find_element_by_xpath(
'//*[#id="fixedheader"]/div[4]/span')
search_icon.click()
time.sleep(2)
try:
search_box = driver.find_element_by_xpath('//*[#id="search_str"]')
print("Element is visible? " + str(search_box.is_displayed()))
time.sleep(10)
if search_box.is_displayed():
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
except NoSuchElementException:
print("Element not found")
driver.close()
search_stock()
Sometimes, it started working but most of the time it throwing exceptions and errors. Struggling since 3 days but none of the solutions working.
web scraping like that seems quite inefficient it is prob better to use requests and bs4. However if you want to do it like this you could try using action chains. found here Or you can do driver.get('https://www.moneycontrol.com/india/stockpricequote/consumer-food/zomato/Z') from the start instead of typing it in.
You may wanna try the below code :
def search_stock():
driver = webdriver.Chrome(r'./chromedriver')
driver.maximize_window()
driver.implicitly_wait(30)
driver.get('https://www.moneycontrol.com/')
wait = WebDriverWait(driver, 10)
time.sleep(5)
try:
ActionChains(driver).move_to_element(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[id='search_str']")))).perform()
search_box = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[id='search_str']")))
print("Element is visible? ", search_box.is_displayed())
time.sleep(10)
if search_box.is_displayed():
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
except NoSuchElementException:
print("Element not found")
Imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
Try clicking on search_box and only after that sending text there.
search_box = driver.find_element_by_xpath('//form[#id="form_topsearch"]//input[#id="search_str"]')
search_box.click()
time.sleep(0.1)
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
Additionally I would advise you using explicit waits of expected conditions instead of hardcoded sleeps.
With it your code will be faster and more reliable.
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def search_stock():
driver = webdriver.Chrome(r'./chromedriver')
wait = WebDriverWait(driver, 20)
driver.get('https://www.moneycontrol.com/')
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="fixedheader"]/div[4]/span')).click()
search_box = wait.until(EC.element_to_be_clickable((By.XPATH, '//form[#id="form_topsearch"]//input[#id="search_str"]')))
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
#I'm not sure you should close the driver immediately after involving searching....
#driver.close()
search_stock()
UPD
Let's try this
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
def search_stock():
driver = webdriver.Chrome(r'./chromedriver')
wait = WebDriverWait(driver, 20)
actions = ActionChains(driver)
driver.get('https://www.moneycontrol.com/')
search_icon = wait.until(EC.presence_of_element_located((By.XPATH, '//*[#id="fixedheader"]/div[4]/span')).click()
time.sleep(0.5)
driver.execute_script("arguments[0].scrollIntoView();", search_icon)
driver.execute_script("arguments[0].click();", search_icon)
search_box = wait.until(EC.presence_of_element_located((By.XPATH, '//form[#id="form_topsearch"]//input[#id="search_str"]')))
driver.execute_script("arguments[0].scrollIntoView();", search_icon)
driver.execute_script("arguments[0].click();", search_icon)
time.sleep(0.5)
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
#I'm not sure you should close the driver immediately after involving searching....
#driver.close()
search_stock()
If the above solution is still not working instead of
actions.move_to_element(search_box).click().perform()
try
driver.execute_script("arguments[0].click();", search_box)
I am trying to click an element using selenium chromedriver by the Use of ID of an element to click.
I want to Click Year '2020" from the following webpage: 'https://www.satp.org/datasheet-terrorist-attack/major-incidents/Pakistan'
I tried with the below code.
driver = webdriver.Chrome(executable_path=ChromeDriver_Path, options = options)
driver.get('https://www.satp.org/datasheet-terrorist-attack/major-incidents/Pakistan')
Id = "ctl00_ContentPlaceHolder1_gvMajorIncident_ct123_lbtnYear" ### Id of an Element 2020
wait = WebDriverWait(driver, 20) ##Wait for 20 seconds
element = wait.until(EC.element_to_be_clickable((By.ID, Id)))
driver.execute_script("arguments[0].scrollIntoView();", element)
element.click()
time.sleep(10)
but unfortunately this gives an Error as below:
element = wait.until(EC.element_to_be_clickable((By.ID, Id)))
File "C:\Users\Pavan\AppData\Local\Programs\Python\Python37-32\lib\site-packages\selenium\webdriver\support\wait.py", line 80, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
Please anyone help me on this... Thanks;
I don't know if your imports were correct. Also, your code doesn't scroll to the bottom of the page. Use this.
import os
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
driver = webdriver.Chrome(executable_path='driver path')
driver.get('https://www.satp.org/datasheet-terrorist-attack/major-incidents/Pakistan')
driver.maximize_window()
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(3)
element = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="ctl00_ContentPlaceHolder1_gvMajorIncident_ctl23_lbtnYear"]')))
element.click()
I'm trying to gather some information from certain webpages using selenium and python.I have a working code for a single tab. But now i have a situation where i need to open 50 tabs in chrome at once and process each page data.
1) So open 50 tabs at once - The code i got already
2) Change the control between tabs and process the information from the page and close the tab and move to next tab and do the same.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import psycopg2
import os
import datetime
final_results=[]
positions=[]
saerched_url=[]
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
#options.add_argument('--headless')
options.add_argument("—-incognito")
browser = webdriver.Chrome(executable_path='/users/user_123/downloads/chrome_driver/chromedriver', chrome_options=options)
browser.implicitly_wait(20)
#def db_connect():
try:
DSN = "dbname='postgres' user='postgres' host='localhost' password='postgres' port='5432'"
TABLE_NAME = 'staging.search_url'
conn = psycopg2.connect(DSN)
print("Database connected...")
cur = conn.cursor()
cur.execute("SET datestyle='German'")
except (Exception, psycopg2.Error) as error:
print('database connection failed')
quit()
def get_products(url):
browser.get(url)
names = browser.find_elements_by_xpath("//span[#class='pymv4e']")
upd_product_name_list=list(filter(None, names))
product_name = [x.text for x in upd_product_name_list]
product = [x for x in product_name if len(x.strip()) > 2]
upd_product_name_list.clear()
product_name.clear()
return product
links = ['https://www.google.com/search?q=Vitamin+D',
'https://www.google.com/search?q=Vitamin+D3',
'https://www.google.com/search?q=Vitamin+D+K2',
'https://www.google.com/search?q=D3',
'https://www.google.com/search?q=Vitamin+D+1000']
for link in links:
# optional: we can wait for the new tab to open by comparing window handles count before & after
tabs_count_before = len(browser.window_handles)
# open a link
control_string = "window.open('{0}')".format(link)
browser.execute_script(control_string)
# optional: wait for windows count to increment to ensure new tab is opened
WebDriverWait(browser, 1).until(lambda browser: tabs_count_before != len(browser.window_handles))
# get list of currently opened tabs
tabs_list = browser.window_handles
print(tabs_list)
# switch control to newly opened tab (the last one in the list)
last_tab_opened = tabs_list[len(tabs_list)-1]
browser.switch_to_window(last_tab_opened)
# now you can process data on the newly opened tab
print(browser.title)
for lists in tabs_list:
last_tab_opened = tabs_list[len(tabs_list)-1]
browser.switch_to_window(last_tab_opened)
filtered=[]
filtered.clear()
filtered = get_products(link)
saerched_url.clear()
if not filtered:
new_url=link+'+kaufen'
get_products(link)
print('Modified URL :'+link)
if filtered:
print(filtered)
positions.clear()
for x in range(1, len(filtered)+1):
positions.append(str(x))
saerched_url.append(link)
gobal_position=0
gobal_position=len(positions)
print('global postion first: '+str(gobal_position))
print("\n")
company_name_list = browser.find_elements_by_xpath("//div[#class='LbUacb']")
company = []
company.clear()
company = [x.text for x in company_name_list]
print('Company Name:')
print(company, '\n')
price_list = browser.find_elements_by_xpath("//div[#class='e10twf T4OwTb']")
price = []
price.clear()
price = [x.text for x in price_list]
print('Price:')
print(price)
print("\n")
urls=[]
urls.clear()
find_href = browser.find_elements_by_xpath("//a[#class='plantl pla-unit-single-clickable-target clickable-card']")
for my_href in find_href:
url_list=my_href.get_attribute("href")
urls.append(url_list)
print('Final Result: ')
result = zip(positions,filtered, urls, company,price,saerched_url)
final_results.clear()
final_results.append(tuple(result))
print(final_results)
print("\n")
print('global postion end :'+str(gobal_position))
i=0
try:
for d in final_results:
while i <= gobal_position:
print( d[i])
cur.execute("""INSERT into staging.pla_crawler_results(position, product_name, url,company,price,searched_url) VALUES (%s, %s, %s,%s, %s,%s)""", d[i])
print('Inserted succesfully')
conn.commit()
i=i+1
except (Exception, psycopg2.Error) as error:
print (error)
pass
browser.close()
Ideally you shouldn't attempt to open 50 tabs at once as:
Handling 50 concurrent TABs through Selenium will invite complicated logic/algorithm to maintain.
Additionally, you may run into CPU and memory usage issues as:
Chrome maintains many processes.
Where as at times Firefox uses too much RAM
Solution
If you are having a List of the urls as follows:
['https://selenium.dev/downloads/', 'https://selenium.dev/documentation/en/']
You can iterate over the list to open them one by one in the adjacent tab for scraping using the following Locator Strategy:
Code Block:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, WebDriverException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.alert import Alert
from selenium.webdriver.common.keys import Keys
links = ['https://selenium.dev/downloads/', 'https://selenium.dev/documentation/en/']
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
for link in links:
driver = webdriver.Chrome(options=options, executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe')
driver.get(link)
print(driver.title)
print("Perform webscraping here")
driver.quit()
print("End of program")
Console Output:
Downloads
Perform webscraping here
The Selenium Browser Automation Project :: Documentation for Selenium
Perform webscraping here
End of program
Reference
You can find a relevant detailed discussion in:
WebScraping JavaScript-Rendered Content using Selenium in Python
After logging in, my program starts looping over a list of songs to add them to my Spotify Playlist. But after the first loop, it raises the "stale element reference: element is not attached to the page document" exception.
Link I'm working on
driver=webdriver.Chrome()
driver.maximize_window()
actionChain = ActionChains(driver)
driver.get('https://open.spotify.com/browse/featured')
#Login Procedure
psw=''
login=driver.find_element_by_xpath("//button[2]").click()
sleep(1)
email=driver.find_element_by_id('login-username').send_keys('abc#yahoo.com')
password=driver.find_element_by_id('login-password').send_keys(psw)
login=driver.find_element_by_id('login-button').click()
ignored_exceptions=(StaleElementReferenceException,NoSuchElementException)
def wdwfind(path):
return WebDriverWait(driver, 15,ignored_exceptions=ignored_exceptions).until(
EC.presence_of_element_located((By.XPATH,(path))))
def wdwclick(path):
return WebDriverWait(driver, 15,ignored_exceptions=ignored_exceptions).until(
EC.element_to_be_clickable((By.XPATH,(path))))
for n in range(len(songs)):
wdwfind("//li[2]/div/a/div/span").click() #going to search tab
wdwfind("//input").send_keys(songs[n]) #sending elements to navigation bar
gotosong=wdwclick("//a[#class='d9eb38f5d59f5fabd8ed07639aa3ab77-scss _59c935afb8f0130a69a7b07e50bac04b-scss']") #right clicking the name of the song
actionChain.context_click(gotosong).perform()
wdwfind("//nav/div[4]").click() #Selecting the add to playlist option
wdwfind("//div[#class='mo-coverArt-hoverContainer']").click() #clicking on the playlist to add the song to
sleep(2)
clear=wdwclick("//input[#class='_2f8ed265fb69fb70c0c9afef329ae0b6-scss']").send_keys(Keys.SHIFT,Keys.ARROW_UP) #clearing the search box
driver.refresh()
sleep(1)
I have investigated reason behind your issue. please find solution if you are trying to add songs in playlist. You are facing above issue due to the dynamic elements in the DOM. Also after running below code I am getting membership window so cant proceed further.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.support.ui import WebDriverWait as Wait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver import ActionChains
# Open Chrome
driver = webdriver.Chrome(executable_path=r"C:\New folder\chromedriver.exe")
driver.get("https://open.spotify.com/browse/featured")
element = WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.XPATH, "//li[2]/div/a/div/span")))
element.click()
WebDriverWait(driver, 20).until(
EC.visibility_of_element_located((By.XPATH,"//input[#class='SearchInputBox__input']"))).send_keys("songs")
driver.find_element_by_xpath("//*[text()='Log in']").click()
WebDriverWait(driver, 20).until(
EC.visibility_of_element_located((By.XPATH,"//input[#id='login-username']"))).send_keys("")
WebDriverWait(driver, 20).until(
EC.visibility_of_element_located((By.XPATH,"//input[#id='login-password']"))).send_keys("")
WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.XPATH, "//button[#id='login-button']"))).click()
items = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.XPATH, "//div[#class='react-contextmenu-wrapper']/div/div/a")))
print(len(items))
for song in items:
print song.text
actionChains = ActionChains(driver)
actionChains.context_click(song).perform()
element = WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.XPATH, "//*[text()='Add to Playlist']")))
element.click()
element12 = WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.XPATH, "//button[#class='btn asideButton-button btn-green btn-small']")))
actionChains.move_to_element(element12).click().perform()
actionChains.context_click(song).perform()
element00=WebDriverWait(driver, 20).until(
EC.visibility_of_element_located((By.XPATH, "//input[#class='inputBox-input']"))).send_keys("testPlayList")
element11 = WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.XPATH, "//div[#class='button-group button-group--horizontal']//div[2]/button")))
actionChains.move_to_element(element11).click().perform()
elem=WebDriverWait(driver, 20).until(
EC.visibility_of_element_located((By.XPATH, "//div[#class='TrackListHeader__entity-name']//span")))
print elem.text
break
I am trying to scrape the links of different poems from https://www.poets.org/poetsorg/poems but I am getting a Stale Element Reference Exception error. I have tried increasing sleep time and WebDriverWait as well with no success. Any help will be greatly appreciated. My code is below.
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def poem_scraper(url):
driver = webdriver.Chrome("\chromedriver.exe")
driver.get(url)
all_links = []
for _ in range(10):
soup = BeautifulSoup(driver.page_source,"html.parser")
total_poems = soup.find_all('td',attrs={'class':"views-field views-field-title"})
for div in total_poems:
links = div.find_all('a')
for a in links:
all_links.append('https://www.poets.org'+a['href'])
timeout = 15
#time.sleep(6)
try:
element_present = EC.presence_of_element_located((By.LINK_TEXT, 'next'))
WebDriverWait(driver, timeout).until(element_present)
except TimeoutException:
print("Timed out waiting for page to load")
test = driver.find_element_by_link_text('next')
time.sleep(6)
test.click()
return(all_links)
StaleElementReferenceException: Message: stale element reference: element is not attached to the page document