Python / Selenium looping problem with 'for'

Python / Selenium looping problem with 'for' - python-3.x

im trying to loop this code with "for" but it gives a error. Can you help me to solve it, thank you.
Error Code:
IndentationError: expected an indented block
Source Code:
from selenium import webdriver
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
binary = FirefoxBinary("C:\\Program Files\\Mozilla Firefox\\firefox.exe")
profile = FirefoxProfile("C:/Users/Baran/AppData/Roaming/Mozilla/Firefox/Profiles/oy6k3nay.yarrak")
driver = webdriver.Firefox(firefox_profile=profile, firefox_binary=binary, executable_path="C:\WebDrivers\geckodriver.exe")
driver.get('http://www.ipsorgu.com/')
time.sleep(5)
driver.close()
What i tried:
.
.
.
.
driver = webdriver.Firefox(firefox_profile=profile, firefox_binary=binary, executable_path="C:\WebDrivers\geckodriver.exe")
#################################################################
for i in range(10):
###################################################################
driver.get('http://www.ipsorgu.com/')
time.sleep(5)
driver.close()

This is because every thing in a for loop has to be indented. So you final code could look like this:
from selenium import webdriver
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
binary = FirefoxBinary("C:\\Program Files\\Mozilla Firefox\\firefox.exe")
profile = FirefoxProfile("C:/Users/Baran/AppData/Roaming/Mozilla/Firefox/Profiles/oy6k3nay.yarrak")
driver = webdriver.Firefox(firefox_profile=profile, firefox_binary=binary, executable_path="C:\WebDrivers\geckodriver.exe")
for i in range(10):
driver.get('http://www.ipsorgu.com/')
time.sleep(5)
driver.close()

Related

selenium in python is skipping articles while trying to scrape the data

Im trying to extract data from articles using selenium in python, the code is identifying the articles but while running the loop a few articles are skipped randomly. Any help resolving this issue will be appreciated.
#Importing libraries
import requests
import os
import json
from selenium import webdriver
import pandas as pd
from bs4 import BeautifulSoup
import time
import requests
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import traceback
from webdriver_manager.chrome import ChromeDriverManager
#opening a chrome instance
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options, executable_path=r"C:/selenium/chromedriver.exe")
#getting into the website
driver.get('https://academic.oup.com/rof/issue/2/2')
#getting the articles
articles = WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.XPATH, '/html/body/div[3]/main/section/div/div/div[1]/div/div[3]/div[2]/div[3]/div/div/div/div/h5')))
#loop to get in and out of articles
for article in articles:
try:
ActionChains(driver).key_down(Keys.CONTROL).click(article).key_up(Keys.CONTROL).perform()
WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(2))
window1 = driver.window_handles[1]
driver.switch_to_window(window1)
driver.close()
driver.switch_to_window(window0)
except:
print("couldnt get the article")

First, for collect all article element, you can use this css selector:
articles = WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, '.customLink.item-title a')))
Second, This is wrong method:
driver.switch_to_window(window1)
It's should:
driver.switch_to.window(window1)
See the difference between _ and . above.
Third, you forgot to initialize the window0 variable:
window0 = driver.window_handles[0]
And finally, try the following code:
#getting into the website
driver.get('https://academic.oup.com/rof/issue/2/2')
#getting the articles
articles = WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, '.customLink.item-title a')))
#loop to get in and out of articles
for article in articles:
try:
ActionChains(driver).key_down(Keys.CONTROL).click(article).key_up(Keys.CONTROL).perform()
WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(2))
window1 = driver.window_handles[1]
driver.switch_to.window(window1)
driver.close()
window0 = driver.window_handles[0]
driver.switch_to.window(window0)
except:
print("couldnt get the article")
driver.quit()

How to scrape price from booking.com using beautifulsoup?

I am trying to scrape price from booking.com but not successful. Any suggestions
My code as follows
#Importing necessary library
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.firefox.options import Options
import pandas as pd
import time
import re
import requests
from itertools import zip_longest
from webdriver_manager.chrome import ChromeDriverManager
price = []
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get('https://www.booking.com/searchresults.en-gb.html?label=gen173nr-1FCAEoggI46AdIM1gEaK4BiAEBmAEJuAEXyAEM2AEB6AEB-AELiAIBqAIDuALnhOzyBcACAQ&lang=en-gb&sid=422b3ff3c0e98b522259ad1cad2505ea&sb=1&src=searchresults&src_elem=sb&error_url=https%3A%2F%2Fwww.booking.com%2Fsearchresults.en-gb.html%3Flabel%3Dgen173nr-1FCAEoggI46AdIM1gEaK4BiAEBmAEJuAEXyAEM2AEB6AEB-AELiAIBqAIDuALnhOzyBcACAQ%3Bsid%3D422b3ff3c0e98b522259ad1cad2505ea%3Btmpl%3Dsearchresults%3Bclass_interval%3D1%3Bdest_id%3D-1506909%3Bdest_type%3Dcity%3Bdtdisc%3D0%3Bfrom_sf%3D1%3Bgroup_adults%3D2%3Bgroup_children%3D0%3Binac%3D0%3Bindex_postcard%3D0%3Blabel_click%3Dundef%3Bno_rooms%3D1%3Boffset%3D0%3Bpostcard%3D0%3Braw_dest_type%3Dcity%3Broom1%3DA%252CA%3Bsb_price_type%3Dtotal%3Bshw_aparth%3D1%3Bslp_r_match%3D0%3Bsrc%3Dindex%3Bsrc_elem%3Dsb%3Bsrpvid%3D912403b6d1220012%3Bss%3DAuckland%3Bss_all%3D0%3Bssb%3Dempty%3Bsshis%3D0%3Bssne%3DAuckland%3Bssne_untouched%3DAuckland%3Btop_ufis%3D1%26%3B&sr_autoscroll=1&ss=Auckland&is_ski_area=0&ssne=Auckland&ssne_untouched=Auckland&city=-1506909&checkin_year=2020&checkin_month=9&checkin_monthday=1&checkout_year=2020&checkout_month=9&checkout_monthday=2&group_adults=2&group_children=0&no_rooms=1&from_sf=1')
time.sleep(5)
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
for item in soup.findAll('label', {'class': "tpi_price_label tpi_price_label__orange"}):
price.append(item.get_text(strip=True))
print(price)
The above code is not showing any output. It gives an empty list.

You need properly wait for the page to load.
This is done using WebDriverWait and it will throw exception if the page isnt loaded during the specified timeout.
Try running my sample code bellow:
# test_scrape.py
import atexit
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
URL = ("https://www.booking.com/searchresults.en-gb.html?"
"label=gen173nr-1FCAEoggI46AdIM1gEaK4BiAEBmAEJuAEXyAEM2AEB6AEB-AELiAIBqAIDuALnhOzyBcACAQ"
"&lang=en-gb&sid=422b3ff3c0e98b522259ad1cad2505ea&sb=1&src=searchresults&src_elem=sb"
"&error_url=https%3A%2F%2Fwww.booking.com%2Fsearchresults.en-gb.html%3Flabel%3Dgen173nr-"
"1FCAEoggI46AdIM1gEaK4BiAEBmAEJuAEXyAEM2AEB6AEB-AELiAIBqAIDuALnhOzyBcACAQ%3Bsid%3D422b3ff"
"3c0e98b522259ad1cad2505ea%3Btmpl%3Dsearchresults%3Bclass_interval%3D1%3Bdest_id%3D-150690"
"9%3Bdest_type%3Dcity%3Bdtdisc%3D0%3Bfrom_sf%3D1%3Bgroup_adults%3D2%3Bgroup_children%3D0%3"
"Binac%3D0%3Bindex_postcard%3D0%3Blabel_click%3Dundef%3Bno_rooms%3D1%3Boffset%3D0%3Bpostcar"
"d%3D0%3Braw_dest_type%3Dcity%3Broom1%3DA%252CA%3Bsb_price_type%3Dtotal%3Bshw_aparth%3D1%3Bs"
"lp_r_match%3D0%3Bsrc%3Dindex%3Bsrc_elem%3Dsb%3Bsrpvid%3D912403b6d1220012%3Bss%3DAuckland%3B"
"ss_all%3D0%3Bssb%3Dempty%3Bsshis%3D0%3Bssne%3DAuckland%3Bssne_untouched%3DAuckland%3Btop_ufi"
"s%3D1%26%3B&sr_autoscroll=1&ss=Auckland&is_ski_area=0&ssne=Auckland&ssne_untouched=Auckland&ci"
"ty=-1506909&checkin_year=2020&checkin_month=9&checkin_monthday=1&checkout_year=2020&checkout_m"
"onth=9&checkout_monthday=2&group_adults=2&group_children=0&no_rooms=1&from_sf=1'")
class page_loaded:
def __call__(self, driver):
document_ready = driver.execute_script("return document.readyState;") == "complete"
jquery_ready = driver.execute_script("return jQuery.active == 0;")
print(f"document ready: [({type(document_ready).__name__}){document_ready}]")
print(f"jquery ready: [({type(jquery_ready).__name__}){jquery_ready}]")
return document_ready and jquery_ready
def wait_for_page_to_load(driver, timeout_seconds=20):
WebDriverWait(driver, timeout_seconds, 0.2).until(page_loaded(), f"Page could not load in {timeout_seconds} s.!")
def go_to_url(driver, url):
driver.get(url)
wait_for_page_to_load(driver)
def get_orange_prices(soup):
return [price_label.get_text(strip=True)
for price_label
in soup.select("label.tpi_price_label.tpi_price_label__orange")]
def get_normal_prices(soup):
return [price_label.get_text(strip=True)
for price_label
in soup.select("div[class*=bui-price-display__value]")]
def start_driver():
driver = webdriver.Chrome()
atexit.register(driver.quit)
driver.maximize_window()
return driver
def main():
driver = start_driver()
go_to_url(driver, URL)
soup = BeautifulSoup(driver.page_source, 'html.parser')
orange_prices = get_orange_prices(soup)
print(orange_prices)
normal_prices = get_normal_prices(soup)
print(normal_prices)
if __name__ == '__main__':
main()
If you're having issues with the chromedriver not being discovered, try specify exact path to it like this:
def start_driver():
driver = webdriver.Chrome(executable_path="/path/to/cromedriver")
atexit.register(driver.quit)
driver.maximize_window()
return driver

Python and Selenium Get list of who liked post in instagram

i'm new in python, so plz don't bite me =)
The problem is next, i can't understand how to scroll the modal window in instagram using python and selenium. My task is get all people who liked the post. Now i stuck after the modal is opening a
from lxml import html
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-setuid-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument('--headless')
driver = webdriver.Chrome('/usr/bin/chromedriver',
chrome_options=chrome_options)
driver.get('https://www.instagram.com/p/B2Y2nwpCiYa/?igshid=sv6ovbyg07hx')
driver.implicitly_wait(0.7)
for elem in driver.find_elements_by_xpath(
'.//span[#class = "glyphsSpriteGrey_Close u-__7"]'):
elem.click()
print('close login')
button = driver.find_elements_by_xpath('.//button[#class = "sqdOP
yWX7d _8A5w5 "]')
button[0].click()
driver.implicitly_wait(60)
#user_loc = By.CSS_SELECTOR,
'div.Igw0E.rBNOH.eGOV_.ybXk5._4EzTm.XfCBB.HVWg4'
user_loc = By.CSS_SELECTOR, 'button.sqdOP.L3NKy'
#user_loc = By.CSS_SELECTOR, 'div._7UhW9.xLCgt.MMzan.KV-D4.fDxYl'
# Wait for first XHR complete
wait(driver, 20).until(EC.visibility_of_element_located(user_loc))
# Get current length of user list
current_len = len(driver.find_elements(*user_loc))
print(current_len)
while True:
driver.find_element(*user_loc).send_keys(Keys.END)
try:
wait(driver, 10).until(lambda x:
len(driver.find_elements(*user_loc)) > current_len)
current_len = len(driver.find_elements(*user_loc))
# # Return full list of songs
except TimeoutException:
user_list = [user for user in driver.find_elements(*user_loc)]
break
driver.get_screenshot_as_file('test.png')
print(len(user_list))

Select checkbox with Selenium -error: Element could not be scrolled into view

Please, help me to fix this, I cannot select the checkbox with python3 and selenium.
This is the error message
selenium.common.exceptions.ElementNotInteractableException: Message:
Element could not be scrolled into view
Link to geckodriver https://github.com/mozilla/geckodriver/releases
--Code from here ---
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from bs4 import BeautifulSoup
import re
import pandas as pd
import os
url = "https://partner.ingatlan.com/zalabanyi.rezso"
driver = webdriver.Firefox(executable_path='/Users/Test/Desktop/geckodriver')
driver.implicitly_wait(30)
driver.get(url)
python_button = driver.find_element_by_id('btn-contact-email')
python_button.click()
search_input_box = driver.find_element_by_id("contact_message_name")
search_input_box.send_keys("John Doe")
find_phone = driver.find_element_by_id("contact_message_phoneNumber")
find_phone.send_keys("00447455555")
find_email = driver.find_element_by_id("contact_message_email")
find_email.send_keys("noreply#email.com")
find_message = driver.find_element_by_id("contact_message_message")
find_message.send_keys("This is my message for you")
find_agree = driver.find_element_by_id("contact_message_acceptPrivacyPolicy")
driver.implicitly_wait(10)
find_agree.click()

You can select checkbox using JavaScript:
driver.execute_script("arguments[0].click();", driver.find_element_by_id("contact_message_acceptPrivacyPolicy"))
Another way is to use Actions to click on label:
from selenium.webdriver.common import action_chains
...
checkbox = driver.find_element_by_css_selector("label[for='contact_message_acceptPrivacyPolicy']")
action = action_chains.ActionChains(driver)
action.move_to_element_with_offset(checkbox, 1, 1).click().perform()

I'm trying to detect the skip add button on you tube with selenium(python)

I'm using idle and selenium.
this is my code :
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.binary_location = "/usr/bin/chromium"
driver = webdriver.Chrome()
driver.get('https://www.youtube.com/')
def check_exist_by_class():
try:
driver.find_element_by_class_name("ytp-ad-skip-button ytp-button")
#time.sleep(1)
return 0
except NoSuchElementException:
#time.sleep(1)
return 1
while True:
print(check_exist_by_class())
time.sleep(0.5)
I'm only getting 1 even if the skip add button is visible. tried using the x path but the x path for the button changes with the window size.
Update-
X path doesn't change with the window side. its kind of random. Any idea how to click the skip add button with selenium?
X path s of few ad buttons:
***//*[#id="skip-button:3f"]/span/button
//*[#id="skip-button:2v"]/span/button
//*[#id="skip-button:2v"]/span/button
//*[#id="skip-button:a"]/span/button
//*[#id="skip-button:a"]/span/button
//*[#id="skip-button:a"]/span/button
//*[#id="skip-button:2b"]/span/button***

I know this might not be what you asked for but alternatively you can just get an adblocker for Youtube and load that extension in Selenium:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
path_to_extension = r'C:\Users\YOUR_USER_NAME\Desktop\1.9.0_0'
chrome_options = Options()
chrome_options.add_argument('load-extension=' + path_to_extension)
driver = webdriver.Chrome(chrome_options=chrome_options)
driver.create_options()
driver.get("http://www.google.com")

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Python / Selenium looping problem with 'for' - python-3.x

Related

selenium in python is skipping articles while trying to scrape the data

How to scrape price from booking.com using beautifulsoup?

Python and Selenium Get list of who liked post in instagram

Select checkbox with Selenium -error: Element could not be scrolled into view

I'm trying to detect the skip add button on you tube with selenium(python)

Categories

Resources