The web scraping issue in python, web page not loading in time - python-3.x

I am making a program for scrapping the Amazon websites mobile phones but my program is giving me timeout exception even after the page is loaded on time.
Here is my code
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import NoSuchElementException
from bs4 import BeautifulSoup
import urllib.request
class Amazon_all_mobile_scraper:
def __init__(self):
self.driver = webdriver.Firefox()
self.delay = 60
self.url = "https://www.amazon.in/mobile-phones/b/ref=sd_allcat_sbc_mobcomp_all_mobiles?ie=UTF8&node=1389401031"
def load_amazon(self):
self.driver.get(self.url)
try:
wait = WebDriverWait(self.driver,self.delay)
wait.until(EC.presence_of_element_located((By.CLASS_NAME,"acs-ln-link")))
print("Page is ready.")
except TimeoutException:
print("Took too much time to load!")
except:
print("Something went wrong in loading part!!")
def extract_list_of_mobiles(self):
try:
mobile_list = self.driver.find_element_by_xpath('//div[#class = "acs-ln-link"]')
print(mobile_list)
except NoSuchElementException:
print("Sorry, Unable to get the requested element")
scraper = Amazon_all_mobile_scraper()
scraper.load_amazon()
scraper.extract_list_of_mobiles()
Please help me to figure out whats wrong in this code.

Only changing from acs-ln-link to acs-ln-links will not do the trick. Your xpath should look more like '//div[contains(#class,"acs-ln-nav-expanded")]//*[#class="acs-ln-links"]//a'. This is, however, you can cope with to get the required output:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
class Amazon_all_mobile_scraper:
url = "https://www.amazon.in/mobile-phones/b/ref=sd_allcat_sbc_mobcomp_all_mobiles?ie=UTF8&node=1389401031"
def __init__(self):
self.driver = webdriver.Chrome()
self.wait = WebDriverWait(self.driver, 15)
def load_n_get_from_amazon(self):
self.driver.get(self.url)
mobile_list = self.wait.until(EC.presence_of_all_elements_located((By.XPATH,'//div[contains(#class,"acs-ln-nav-expanded")]//*[#class="acs-ln-links"]//a')))
return mobile_list
def __del__(self):
self.driver.close()
if __name__ == '__main__':
scraper = Amazon_all_mobile_scraper()
for item in scraper.load_n_get_from_amazon():
print(f'{item.text}\n{item.get_attribute("href")}\n')

The class wasn't matching "acs-ln-link" should be "acs-ln-links".

Related

Login into a Website with data from csv file with python selenium

I am trying to create a script that automatically logs in different accounts to a website, the login data should be taken from a CSV file. Unfortunately I get no result, maybe someone has a solution for me?
import pandas as pd
import os
from selenium import webdriver
from twocaptcha import TwoCaptcha
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
loginlist = pd.read_csv('/Volumes/GoogleDrive/Meine Ablage/Privat/py_scripts/login.csv', header=None, skiprows=\[0\], sep =',')
print(loginlist)
driver = webdriver.Chrome()
driver.get("https://freebitco.in/signup/?op=s")
time.sleep(6)
webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
time.sleep(1)
login = driver.find_element(By.CLASS_NAME, "login_menu_button")
login.click()
time.sleep(3)
def fulfill_form(email, password):
input_email = driver.find_element(By.ID, 'login_form_btc_address')
input_password = driver.find_element(By.ID, 'login_form_password')
input_email.send_keys(email)
time.sleep(1)
input_password.send_keys(password)
time.sleep(5)
failed_attempts = \[\]
for customer in loginlist:
try:
fulfill_form(str(loginlist\[0\]), str(loginlist\[1\]))
except:
failed_attempts.append(loginlist\[0\])
pass
if len(failed_attempts) \> 0:
print("{} cases have failed".format(len(failed_attempts)))
print("Procedure concluded")
I tried several older solutions from other posts, unfortunately they led nowhere.

Python Selenium Element not found and Not Intractable

I'm trying to scrape from the moneycontrol.com. When I tried to send value in the search box I keep getting the same error in except block as "Element not Found".
I tried using XPath id as well as using the full XPath but in both cases, it doesn't work.
WITHOUT MAXIMIZING THE WINDOW
XPath id - //*[#id="search_str"]
Full XPath - /html/body/div[1]/header/div[1]/div[1]/div/div/div[2]/div/div/form/input[5]
Attaching the full code below:
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
def search_stock():
driver = webdriver.Chrome(
r'./chromedriver')
driver.get('https://www.moneycontrol.com/')
time.sleep(5)
search_icon = driver.find_element_by_xpath(
'//*[#id="fixedheader"]/div[4]/span')
search_icon.click()
time.sleep(2)
try:
search_box = driver.find_element_by_xpath('//*[#id="search_str"]')
print("Element is visible? " + str(search_box.is_displayed()))
time.sleep(10)
if search_box.is_displayed():
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
except NoSuchElementException:
print("Element not found")
driver.close()
search_stock()
Sometimes, it started working but most of the time it throwing exceptions and errors. Struggling since 3 days but none of the solutions working.
web scraping like that seems quite inefficient it is prob better to use requests and bs4. However if you want to do it like this you could try using action chains. found here Or you can do driver.get('https://www.moneycontrol.com/india/stockpricequote/consumer-food/zomato/Z') from the start instead of typing it in.
You may wanna try the below code :
def search_stock():
driver = webdriver.Chrome(r'./chromedriver')
driver.maximize_window()
driver.implicitly_wait(30)
driver.get('https://www.moneycontrol.com/')
wait = WebDriverWait(driver, 10)
time.sleep(5)
try:
ActionChains(driver).move_to_element(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[id='search_str']")))).perform()
search_box = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[id='search_str']")))
print("Element is visible? ", search_box.is_displayed())
time.sleep(10)
if search_box.is_displayed():
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
except NoSuchElementException:
print("Element not found")
Imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
Try clicking on search_box and only after that sending text there.
search_box = driver.find_element_by_xpath('//form[#id="form_topsearch"]//input[#id="search_str"]')
search_box.click()
time.sleep(0.1)
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
Additionally I would advise you using explicit waits of expected conditions instead of hardcoded sleeps.
With it your code will be faster and more reliable.
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def search_stock():
driver = webdriver.Chrome(r'./chromedriver')
wait = WebDriverWait(driver, 20)
driver.get('https://www.moneycontrol.com/')
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="fixedheader"]/div[4]/span')).click()
search_box = wait.until(EC.element_to_be_clickable((By.XPATH, '//form[#id="form_topsearch"]//input[#id="search_str"]')))
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
#I'm not sure you should close the driver immediately after involving searching....
#driver.close()
search_stock()
UPD
Let's try this
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
def search_stock():
driver = webdriver.Chrome(r'./chromedriver')
wait = WebDriverWait(driver, 20)
actions = ActionChains(driver)
driver.get('https://www.moneycontrol.com/')
search_icon = wait.until(EC.presence_of_element_located((By.XPATH, '//*[#id="fixedheader"]/div[4]/span')).click()
time.sleep(0.5)
driver.execute_script("arguments[0].scrollIntoView();", search_icon)
driver.execute_script("arguments[0].click();", search_icon)
search_box = wait.until(EC.presence_of_element_located((By.XPATH, '//form[#id="form_topsearch"]//input[#id="search_str"]')))
driver.execute_script("arguments[0].scrollIntoView();", search_icon)
driver.execute_script("arguments[0].click();", search_icon)
time.sleep(0.5)
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
#I'm not sure you should close the driver immediately after involving searching....
#driver.close()
search_stock()
If the above solution is still not working instead of
actions.move_to_element(search_box).click().perform()
try
driver.execute_script("arguments[0].click();", search_box)

How to check if auto suggestion exists?

I need to check whether the window with search suggestions exists.When you type something in search a list of suggested searches appears. I need to check whether this pop up window exists.
That window
Code trials:
import time
import unittest
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
class YandexSearchRu(unittest.TestCase):
def setUp(self):
self.driver = webdriver.Chrome()
def test_search(self):
driver = self.driver
driver.get("http://www.yandex.ru")
try:
input = driver.find_element_by_xpath("//*[#id='text']")
except NoSuchElementException:
driver.close()
input.send_keys("Тензор")
input.send_keys(Keys.RETURN)
time.sleep(5)
def tearDown(self):
self.driver.close()
if __name__ == "__main__":
unittest.main()
Try to do this:
driver.get("http://www.yandex.ru")
try:
input = driver.find_element_by_xpath("//*[#id='text']")
input.send_keys("adfadf")
popup = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "body > div. i-bem.popup > div.popup__content")))
if popup.is_displayed():
print("popup disyplayed")
else:
print("popup not visible")
except NoSuchElementException:
The element isn't a pop up window but Auto Suggestions and to extract the Auto Suggestions you can use the following solution:
Code Block:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument('start-maximized')
options.add_argument('disable-infobars')
options.add_argument('--disable-extensions')
driver = webdriver.Chrome(chrome_options=options, executable_path=r'C:\WebDrivers\chromedriver.exe')
driver.get('http://www.yandex.ru')
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input.input__control.input__input"))).send_keys("Тензор")
print([auto_suggestion.text for auto_suggestion in WebDriverWait(driver, 5).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.popup__content>div.suggest2__content.suggest2__content_theme_normal li>span.suggest2-item__text")))])
Console Output:
['тензор', 'тензор официальный сайт', 'тензор техподдержка', 'тензорное исчисление', 'тензор спб', 'тензорные ядра', 'тензорный анализ', 'тензор эцп', 'тензорезистор', 'тензор инерции']

Stale Element Reference Exception in Selenium in Python

I am trying to scrape the links of different poems from https://www.poets.org/poetsorg/poems but I am getting a Stale Element Reference Exception error. I have tried increasing sleep time and WebDriverWait as well with no success. Any help will be greatly appreciated. My code is below.
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def poem_scraper(url):
driver = webdriver.Chrome("\chromedriver.exe")
driver.get(url)
all_links = []
for _ in range(10):
soup = BeautifulSoup(driver.page_source,"html.parser")
total_poems = soup.find_all('td',attrs={'class':"views-field views-field-title"})
for div in total_poems:
links = div.find_all('a')
for a in links:
all_links.append('https://www.poets.org'+a['href'])
timeout = 15
#time.sleep(6)
try:
element_present = EC.presence_of_element_located((By.LINK_TEXT, 'next'))
WebDriverWait(driver, timeout).until(element_present)
except TimeoutException:
print("Timed out waiting for page to load")
test = driver.find_element_by_link_text('next')
time.sleep(6)
test.click()
return(all_links)
StaleElementReferenceException: Message: stale element reference: element is not attached to the page document

PhantomJS python issue

My python selenium tests are working on firefox dirver (GUI) without any issue. But i wanted to run my tests in headless mode. When i try to run the same script with headless mode (with few modifications). it gives wierd errors.
Ex:
selenium.common.exceptions.NoSuchElementException: Message{"errorMessage":"Unable to find element with id 'ext-gen1499
python script :
import os
import time
from selenium.webdriver.common.proxy import *
from selenium.webdriver.common.by import By
phantomjs_path=r"/home/xxxx/nodejs-installs/phantomjs-2.1.1-linux-x86_64/bin/phantomjs"
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
service_args = ['--proxy=x.x.x.x:80','--proxy-type=https']
driver = webdriver.PhantomJS(executable_path=r'/home/xxxx/nodejs-installs/phantomjs-2.1.1-linux-x86_64/bin/phantomjs',service_args=service_args)
os.environ['MOZ_HEADLESS'] = '1'
driver.get("https://aaaaa.com")
def Login():
try:
driver.find_element_by_id("username").send_keys("test#aaaa.com")
driver.find_element_by_id("password").send_keys("xxxxxxx")
driver.find_element_by_id("Submit").click()
login_flag=1
except:
print("Error Loading Login page")
login_flag=0
finally:
return login_flag
def CreateMail():
try:
element = WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.ID, "button-1143-btnInnerEl")))
driver.find_element_by_id("button-1143-btnInnerEl").click()
except TimeoutException:
print ("Loading took too much time - Create New Mail")
driver.find_element_by_id("ext-gen1499").send_keys("test#test.com")
driver.find_element_by_id("textfield-1265-inputEl").send_keys("Automated Test Mail from Selenium")
driver.find_element_by_id("button-1252-btnIconEl").click()
Am i missing anything ?
It is a good practice to add an implicit wait of at-least 10 seconds , for allowing the target page element/s to load completely.
driver.implicitly_wait(10)

Resources