Scroll down pages with selenium and python - python-3.x

this is the problem:
I am using selenium to download all the successful projects from this webpage ("https://www.rockethub.com/projects"). The url does not change if a click on any button.
I'm interested in successful project, thus I click on the button status and then I click on successful.
Once on this page I need to scroll down repedetly to make other urls appear.
Here is the problem. So far I have been not able to scroll down the page
This is my code:
from selenium.webdriver import Firefox
from selenium import webdriver
url="https://www.rockethub.com/projects"
link=[]
wd = webdriver.Firefox()
wd.get(url)
next_button = wd.find_element_by_link_text('Status')
next_button.click()
next_but = wd.find_element_by_link_text('Successful')
next_but.click()
wd.execute_script("window.scrollTo(0, document.body.scrollHeight);")
Any idea on how to solve this?
Thanks
Giangi

Since the content is updated dynamically, you need to wait for a change of the content before executing the next step:
class element_is_not(object):
""" An expectation for checking that the element returned by
the locator is not equal to a given element.
"""
def __init__(self, locator, element):
self.locator = locator
self.element = element
def __call__(self, driver):
new_element = driver.find_element(*self.locator)
return new_element if self.element != new_element else None
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
driver = webdriver.Firefox()
wait = WebDriverWait(driver, 10)
driver.get("https://www.rockethub.com/projects")
# get the last box
by_last_box = (By.CSS_SELECTOR, '.project-box:last-of-type')
last_box = wait.until(element_is_not(by_last_box, None))
# click on menu Status > Successful
driver.find_element_by_link_text('Status').click()
driver.find_element_by_link_text('Successful').click()
# wait for a new box to be added
last_box = wait.until(element_is_not(by_last_box, last_box))
# scroll down the page
driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")
# wait for a new box to be added
last_box = wait.until(element_is_not(by_last_box, last_box))

Run the wd.execute_script("window.scrollTo(0, document.body.scrollHeight);") in loop, since each time the script is executed only certain number of data is rerieved, so you have to execute it in a loop.
If you are just looking to retrieve all the successful projects at once and not interested in simulating the scrolling down to the page, then look at this answer, it may help.

Related

Sendkeys and javascript executor aren't working, how do I get text into this textbox with Selenium Python?

I am trying to enter text into the min price and max price textboxes under the price option for Airbnb.com, for example:
https://www.airbnb.com/s/Miami--Florida--United-States/homes?tab_id=home_tab&refinement_paths%5B%5D=%2Fhomes&flexible_trip_dates%5B%5D=august&flexible_trip_dates%5B%5D=july&flexible_trip_lengths%5B%5D=weekend_trip&date_picker_type=calendar&adults=4&source=structured_search_input_header&search_type=filter_change&place_id=ChIJEcHIDqKw2YgRZU-t3XHylv8
But I cannot get any of my input to send over the default entries of min price 10 and max price 1000. I've tried send keys and the best I could do was get the entries to 0 by sending ctrl + all + del keys and after that the default values just came back. I saw that when send keys doesn't work, javascript executor can be a good alternative so I also tried that even with waiting for the elements to be clickable, but nothing is working. Here is my code and commented out is my failed attempt with send keys:
# price_min = self.driver.find_element_by_id("price_filter_min")
element = WebDriverWait(self.driver, 5).until(
EC.presence_of_element_located((By.ID, "price_filter_min"))
)
# price_min.clear()
# price_min.click()
# price_min.send_keys(Keys.CONTROL + 'a', Keys.BACKSPACE)
# price_min.send_keys(min_price)
self.driver.execute_script("arguments[0].click();", element)
self.driver.execute_script("arguments[0].setAttribute('value',arguments[1]);", element, 354)
# price_max = self.driver.find_element_by_id("price_filter_max")
element2 = WebDriverWait(self.driver, 5).until(
EC.presence_of_element_located((By.ID, "price_filter_max"))
)
# price_max.clear()
# price_max.click()
# price_max.send_keys(Keys.CONTROL + 'a', Keys.BACKSPACE)
# price_max.send_keys(max_price)
self.driver.execute_script("arguments[0].click();", element2)
self.driver.execute_script("arguments[0].setAttribute('value',arguments[1]);", element2, 800)
What am I doing wrong?
These inputs are not really inputs to set the mix and max values.
They only reflecting limits set by horizontal slide bars.
Try swiping them as described here or here
Alternatively you can try setting the scroll bar attributes directly. See if that will work.
I have written a code to replicate your scenario. Please take a look into it and let me know if you don't understand anything.
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
driver = webdriver.Chrome(options=options)
driver.get(
'https://www.airbnb.co.in/s/Miami--Florida--United-States/homes?tab_id=home_tab&refinement_paths%5B%5D=%2Fhomes&'
'flexible_trip_dates%5B%5D=august&flexible_trip_dates%5B%5D=july&flexible_trip_lengths%5B%5D=weekend_trip&date_'
'picker_type=calendar&adults=4&source=structured_search_input_header&search_type=filter_change&place_id=ChIJEcHID'
'qKw2YgRZU-t3XHylv8&locale=en&_set_bev_on_new_domain=1625828364_YTljYWI4OTA5ZjZh')
wait = WebDriverWait(driver, 30)
action = ActionChains(driver)
# Press OK for Cookie Preference
try:
wait.until(EC.visibility_of_element_located((By.XPATH, "(//button[text()='OK'])[2]"))).click()
except:
pass
# Click on Price Button
wait.until(EC.visibility_of_element_located((By.XPATH, '//span[text()="Price"]/ancestor::button'))).click()
# The minimum value present for the slider is 750. So you should not change it to 0.
minimum_Scroll = wait.until(EC.visibility_of_element_located((By.XPATH, '//button[#aria-label="Minimum Price"]')))
# Here for the maximum price you can use the action class with drag_and_drop_by_offset method.
maximum_Scroll = wait.until(EC.visibility_of_element_located((By.XPATH, '//button[#aria-label="Maximum Price"]')))
# This action will drag the slider to near 10,000Rs [Currency Coming in Rs because I am in India, But you can adjust the Xoffset as per your requirement].
action.drag_and_drop_by_offset(maximum_Scroll, -202, 0).perform()
# Clicking save button
SaveBtn = wait.until(EC.visibility_of_element_located((By.ID, 'filter-panel-save-button'))).click()
Hope this is what you are looking for.

Can't scroll through a div more than once | Selenium | Python

When I run this, it only manages to scroll down once, and it throws a "Message: element not interactable" error. (it's supposed to scroll twice). When I tried to run it in a loop (made a try and except to ignore the error), and scrolled around with it manually, it would keep pushing me back up to a specific position. But that's strange, because I'm using arrow keys here, not a move to element:
ActionChains(driver).move_to_element(driver.sl.find_element_by_id('my-id')).perform()
I've tried: giving everything more time to load with sleep, hovering over the element and clicking it to make it interactable, using other methods to scroll such as this one and others like it: driver.execute_script("window.scrollTo(0, Y)")
I'm very lost at this point, don't know what to do
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
from datetime import date
from datetime import datetime
from time import sleep
from random import *
import random, json, selenium, os.path, os
driver = webdriver.Chrome('/Users/apple/Downloads/chromedriver')
driver.maximize_window()
driver.get('https://instagram.com')
sleep(7)
username_form = driver.find_element_by_xpath('/html/body/div[1]/section/main/article/div[2]/div[1]/div/form/div/div[1]/div/label/input')
username_form.clear()
username_form.send_keys('ENTER INSTA USER HERE')
password_form = driver.find_element_by_xpath('/html/body/div[1]/section/main/article/div[2]/div[1]/div/form/div/div[2]/div/label/input')
password_form.clear()
password_form.send_keys('ENTER INSTA PASS HERE')
button_click = driver.find_element_by_xpath('/html/body/div[1]/section/main/article/div[2]/div[1]/div/form/div/div[3]/button')
try:
button_click.click()
except:
driver.execute_script("arguments[0].click();", button_click)
sleep(4)
driver.get('https://instagram.com/p/CQ_sfAeFl5s/')
sleep(4)
like_meter = driver.find_element_by_class_name('zV_Nj')
like_meter.click()
sleep(1)
try:
scroll_zone = driver.find_element_by_xpath('/html/body/div[5]/div/div/div[2]/div/div')
except:
scroll_zone = driver.find_element_by_xpath('/html/body/div[4]/div/div/div[2]/div/div')
scroll_zone.click()
sleep(0.5)
hover = ActionChains(driver).move_to_element(scroll_zone)
hover.perform()
sleep(0.5)
scroll_zone.send_keys(Keys.ARROW_DOWN)
scroll_zone.send_keys(Keys.ARROW_DOWN)
If you want to scroll that list of persons liked that page you can do this:
like_meter = driver.find_element_by_class_name('zV_Nj')
like_meter.click()
sleep(1)
elem = driver.find_element_by_css_selector("div[role='dialog'] div[style*='padding']")
for n in range(10):
driver.execute_script("arguments[0].scrollDown += 20", elem)
The range of 10 and 20 pixels scrolling can be changed according to your needs

Selenium problem [don't show up error](download few items)

I'm in need of a solution to my code, I tried to web scraping a dynamic web page call easy.cl and just get 4 items and sometimes none (only when I download title, cant download price because don't show anything). Well, anyhow, I need a guide of where is my error, because Selenium don't show me any in my result (Sublime text3). Also, easy.cl is dynamic going with a button to show more info products. Finally, I'm thinking in a scroll solution but can't tell, What would you do in my position? any tip to try to find a solution?
in advanced, thanks.
import random
from time import sleep
from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
PATH = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(PATH)
#baseUrl = 'https://www.easy.cl'
driver.get('https://www.easy.cl/tienda/categoria/ceramicas?cur_page=1&cur_view=grid&bc_links=pisos-y-muros&bc_names=Pisos')
#boton = driver.find_element_by_xpath('//button[#class="primary_plp load-more-products"]')
inicio = []
for i in range(5):
try:
boton = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//button[#class="primary_plp load-more-products"]')))
boton.click()
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH,'//a[#class="product_image"]')))
except:
break
# Espero que carguen los productos...
links_productos = WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.XPATH,'//a[#class="product_image"]')))
# obtengo los links de los detalles de los productos
links_pagina=[]
for tag_a in links_productos:
links_pagina.append(tag_a.get_attribute("href"))
for link in links_pagina:
try:
driver.get(link)
titulo = driver.find_element(By.XPATH, '//h1[#class="product-details__title"]').text
#if driver.find_element(By.XPATH, '//span[#class="priceNumber priceNumberAlignRight"]'):
# precio = find_element_by_xpath('//span[#class="priceNumber priceNumberAlignRight"]').text
#else:
# precio = "Sin precio M2"
print(titulo)
#print(precio)
except:
break
The load more products button appears on the bottom of the page, out of the visible screen, so possibly after the element is presented (loaded) you need to scroll to that element before clicking it
from selenium.webdriver.common.action_chains import ActionChains
boton = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//button[#class="primary_plp load-more-products"]')))
actions = ActionChains(driver)
actions.move_to_element(boton).perform()
boton.click()
But the main issue here, as I see, is
links_productos = WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.XPATH,'//a[#class="product_image"]')))
You assume that this will give you all the elements that can be presented on that page matching that locator, but actually this expected condition is waiting for at least 1 element matching this condition and once it catches it - it returns a list of web elements matching that locator it found.
I see no expected condition in Python like we have in Java like this ExpectedConditions.numberOfElementsToBe or this ExpectedConditions.numberOfElementsToBeMoreThan that is actually waiting for desired amount of elements located by passed locator. So, what I can advise you here, is to add a delay of 1-2 seconds after clicking the boton.click() before reading the links_productos. This should resolve your problem.

Selenium finds only a fraction of href links

I am trying to get all the products' url from this webpage, but I managed to get only a fraction of it.
My first attempt was to scrape the webpage with Beautifulsoup, but then I realized selenium would be better as I needed to click the "Show more" button several times. I also added a code to scroll down the page as I though that was the problem, but the result didn't change.
import time
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
def getListingLinks(link):
# Open the driver
driver = webdriver.Chrome(executable_path="")
driver.maximize_window()
driver.get(link)
time.sleep(3)
# scroll down: repeated to ensure it reaches the bottom and all items are loaded
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(3)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(3)
listing_links = []
while True:
try:
driver.execute_script("return arguments[0].scrollIntoView(true);", WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.XPATH, '//*[#id="main-content"]/div[2]/div[2]/div[4]/button'))))
driver.execute_script("arguments[0].click();", WebDriverWait(driver,20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#main-content > div:nth-child(2) > div.main-column > div.btn-wrapper.center > button"))))
print("Button clicked")
links = driver.find_elements_by_class_name('fop-contentWrapper')
for link in links:
algo=link.find_element_by_css_selector('.fop-contentWrapper a').get_attribute('href')
print(algo)
listing_links.append(str(algo))
except:
print("No more Buttons")
break
driver.close()
return listing_links
fresh_food = getListingLinks("https://www.ocado.com/browse/fresh-20002")
print(len(fresh_food)) ## Output: 228
As you can see, I get 228 urls while I would like to get 5605 links, that is the actual number of products in the webpage according to Ocado. I believe I have a problem with the order of my code, but can't find the proper order. I would sincerely appreciate any help.

Unable to click HREF under headers (invisible elements)

I am wanting to click all the Href tabs under the main headers and to navigate to those pages to scrape them. For speed of the job, I do am wanting to click the href without having to click the headers. My question is, is there a way to click these buttons even though it is not visible like the page on the right? It does not seem to be working for me. It seems to give me:
Traceback (most recent call last):
File "C:/Users/Bain3/PycharmProjects/untitled4/Centrebet2.py", line 58, in <module>
EC.element_to_be_clickable((By.XPATH, '(//*[#id="accordionMenu1_ulSports"]/li/ul/li/ul/li/a)[%s]' % str(index + 1)))).click()
File "C:\Users\Bain3\Anaconda3\lib\site-packages\selenium\webdriver\support\wait.py", line 80, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
I have replaced
EC.element_to_be_clickable((By.XPATH, '(//*[#id="accordionMenu1_ulSports"]/li/ul/li/ul/li/a)[%s]' % str(index + 1)))).click()
with
driver.find_element_by_xpath('(//*[#id="accordionMenu1_ulSports"]/li/ul/li/ul/li/a)[%s]' % str(index + 1)).click()
This however does not seem to remedy it as it only clicks visible elements.
My code below is:
from random import shuffle
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium import webdriver as web
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import TimeoutException
from random import randint
from time import sleep
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import csv
import requests
import time
from selenium import webdriver
success = False
while not success:
try:
driver = webdriver.Chrome()
driver.set_window_size(1024, 600)
driver.maximize_window()
driver.get('http://centrebet.com/')
success = True
except:
driver.quit()
sleep(5)
sports = driver.find_element_by_id("accordionMenu1_ulSports")
if sports.get_attribute("style") == "display: none;":
driver.find_element_by_xpath('//ul[#id="menu_acc"]/li[3]/a').click()
driver.find_element_by_xpath(".//*[#data-type ='sports_l1'][contains(text(), 'Soccer')]").click()
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
options = driver.find_elements_by_xpath('//*[#id="accordionMenu1_ulSports"]/li/ul/li/ul/li/a')
# Get list of inetegers [1, 2, ... n]
indexes = [index for index in range(len(options))]
# Shuffle them
shuffle(indexes)
for index in indexes:
# Click on random option
wait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '(//*[#id="accordionMenu1_ulSports"]/li/ul/li/ul/li/a)[%s]' % str(index + 1)))).click()
I have also tried:
driver.execute_script('document.getElementByxpath("//*[#id="accordionMenu1_ulSports"]/li/ul/li/ul/li/a").style.visibility = "visible";')
To remedy this. Though this simply gives an error. Any ideas on how to resolve this issue of invisible elements?
driver.execute_script('document.getElementByxpath("//*[#id="accordionMenu1_ulSports"]/li/ul/li/ul/li/a").style.visibility = "visible";')
gives you error because it's not correct way to use XPath in Javascript. Correct way you can find here
To scrape required data you can use below code:
import requests
import time
from selenium import webdriver
url = "http://centrebet.com/"
success = False
while not success:
try:
driver = webdriver.Chrome()
driver.set_window_size(1024, 600)
driver.maximize_window()
driver.get(url)
success = True
except:
driver.quit()
time.sleep(5)
sports = driver.find_element_by_id("accordionMenu1_ulSports")
links = [url + link.get_attribute("onclick").replace("menulink('", "").replace("')", "") for link in sports.find_elements_by_xpath('.//a[starts-with(#onclick, "menulink")]')]
for link in links:
print(requests.get(link).text)
Instead of clicking on each link, you can request content of each page with HTTP-GET
You can even try using JavascriptExecutor.
Use below code to make your style attribute = display:block;
driver.execute_script("arguments[0].style.display = 'none'", driver.find_element_by_xpath("//*[#id='accordionMenu1_ulSports']/li/ul/li/ul"))
Note : Make sure you are using correct xpath. your <ul> element is hidden not <a> so so take xpath of that <ul> tag only and try

Resources