How to make a webpage stop loading and extract text from it - python-3.x

I want to extract the text from a url-shortner using this code :
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
os.environ['PATH'] += 'C:/Selenium Drivers'
driver = webdriver.Chrome()
driver.implicitly_wait(10)
driver.get('https://pastebin.com/vkuagfwV')
strings = str(driver.find_element(By.CLASS_NAME, 'textarea').text)
strings = strings.replace("\n", " ")
driver.close()
print(strings)
But this code is not working until I manually stop the web-page from stop loading. I tried using XPATH as well but it didn't work.

Instead of implicitly_wait try using Expected Conditions visibility_of_element_located method here.
Also as mentioned in comments you don't need to use str casting there.
Please try this:
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
os.environ['PATH'] += 'C:/Selenium Drivers'
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 20)
driver.get('https://pastebin.com/vkuagfwV')
strings = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "textarea"))).text
strings = strings.replace("\n", " ")
driver.close()
print(strings)
UPD
Please add eager pageLoadStrategy configuration.
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
os.environ['PATH'] += 'C:/Selenium Drivers'
caps = DesiredCapabilities().CHROME
caps["pageLoadStrategy"] = "eager"
driver = webdriver.Chrome(desired_capabilities=caps, executable_path=r'C:\path\to\chromedriver.exe')
#driver = webdriver.Chrome()
wait = WebDriverWait(driver, 20)
driver.get('https://pastebin.com/vkuagfwV')
strings = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "textarea"))).text
strings = strings.replace("\n", " ")
driver.close()
print(strings)

Related

scroll google map webpage sidebar using selenium and python

I am trying to collect data on a google map webpage, this is the link. link
This is the code I have tried. My idea is to scroll to the "website name" (you can find the website name once you scroll down) once is present in the browser. but it is not scrolling.
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
driver.maximize_window()
driver.get("https://www.google.com/maps/place/Amsterdamsche+Athleten+Club+Hercules/#52.36937,4.8049968,16.25z/data=!4m5!3m4!1s0x47c5e3c9cbb3d913:0xef85f93ef996cc06!8m2!3d52.3692292!4d4.8056684")
img_result = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH,'//*[#id="QA0Szd"]/div/div/div[1]/div[2]/div/div[1]/div/div/div[7]/div[5]/a/div[1]/div[2]/div[1]')))
driver.execute_script("arguments[0].scrollIntoView(true);",img_result)
print(img_result.text)
driver.close()
what is the solution for this?
EDIT:This is what I'm trying to get.
At least on my side I see no need to scroll.
The following code worked:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 20)
url = "https://www.google.com/maps/place/Amsterdamsche+Athleten+Club+Hercules/#52.36937,4.8049968,16z/data=!4m5!3m4!1s0x47c5e3c9cbb3d913:0xef85f93ef996cc06!8m2!3d52.3692292!4d4.8056684"
driver.get(url)
name = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "[data-item-id='authority']"))).text
print(name)
Output:
aachercules.nl
The same could be done with XPath instead of CSS Selectors.
This is the XPath I used:
name = wait.until(EC.visibility_of_element_located((By.XPATH, "//a[#data-item-id='authority']"))).text

Scraping dynamic web page with selenium

I'm trying to get the links of the posts on this page, but they are apparently generated by clicking each of the post images. I'm using Selenium and beautifulsoup4 in Python 3.8.
Any idea how to get the links while selenium continues to the next pages?
url: https://www.goplaceit.com/cl/mapa?id_modalidad=1&tipo_pro//*[#id=%22gpi-property-list-container%22]/div[3]/div[1]/div[1]/imgpiedad=1%2C2&selectedTool=list#12/-33.45/-70.66667
after clicking on the image it opens a new tab with the following type of shortening url: https://www.goplaceit.com/propiedad/6198212
which sends me to a url type:
https://www.goplaceit.com/cl/propiedad/venta/departamento/santiago/6198212-depto-con-1d-1b-y-terraza-a-pasos-del-metro-toesca-bodega
My code:
from bs4 import BeautifulSoup
from selenium import webdriver
import time
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import winsound
from timeit import default_timer as timer
from selenium.webdriver.common.keys import Keys
start = timer()
PROXY = "PROXY" # IP:PORT or HOST:PORT
path_to_extension = r"extension"
options = Options()
#options.add_argument("--incognito")
options.add_argument('load-extension=' + path_to_extension)
#options.add_argument('--disable-java')
options.headless = False
prefs = {"profile.default_content_setting_values.notifications" : 2}
prefs2 = {"profile.managed_default_content_settings.images": 2}
prefs.update(prefs2)
prefs3 = {"profile.default_content_settings.cookies": 2}
prefs.update(prefs3)
options.add_experimental_option("prefs",prefs)
options.add_argument("--start-maximized")
options.add_argument('--proxy-server=%s' % PROXY)
driver = webdriver.Chrome('chromedriver.exe', options=options)
driver.get('https://www.goplaceit.com/cl/')
WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="root"]/nav/div/div[2]/div[1]/button'))).click()
correo = driver.find_element(By.XPATH, '//*[#id="email"]')
correo.send_keys("Mail")
contraseña = driver.find_element(By.XPATH, '//*[#id="password"]')
contraseña.send_keys("password")
contraseña.send_keys(Keys.ENTER)
time.sleep(7)
elem.driver.find_element(By.XPATH, '//*[#id="gpi-main-landing-search-input"]/div/input')
elem.click()
elem.send_keys("keywords")
WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="gpi-main-landing-search-input"]/div/div[1]/ul/li[1]/a/div/div[1]'))).click()
buscador.send_keys(Keys.ENTER)
WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="root"]/div/div/div[1]/div[2]/div/div[1]/div/div[1]/button'))).click()
WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="custom-checkbox"]'))).click()
page_number = 0
max_page_number = 30
while page_number<=max_page_number:
WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(),"paginator-btn-right")]'))).click()
You can get easily the urls by clicking on an image, saving your url, coming back to the first page and repeating this for all the images:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
driver.get("https://www.goplaceit.com/cl/mapa?id_modalidad=1&tipo_propiedad=1%2C2&selectedTool=list#8/-33.958/-71.206")
images = WebDriverWait(driver, 30).until(EC.presence_of_all_elements_located((By.XPATH, "//div[#class='sc-iyvyFf ljSqTz']//img")))
urls = []
for i, image in enumerate(images):
window_before = driver.window_handles[0]
image.click()
driver.implicitly_wait(2)
window_after = driver.window_handles[i+1]
driver.switch_to.window(window_after)
urls.append(driver.current_url)
driver.switch_to.window(window_before)

selenium - Not able to find input

Trying to create script which will subscribe to news automatically, but stuck with a problem, selenium not able to find email input and submit button. Everytime getting selenium.common.exceptions.NoSuchElementException:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = Options()
chrome_options.add_argument("--window-size=1920x1080")
path_to_chromedriver = 'chromedriver'
driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=path_to_chromedriver)
driver.get('https://dataengweekly.com/')
driver.find_element_by_tag_name("body").send_keys(Keys.PAGE_DOWN)
email_input = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, 'input[type="email"]'))
)
email_input.send_keys("email#test.com")
driver.find_element_by_css_selector('button.subscribe-btn').click()
time.sleep(10)
Note - Your subscription textbox is in a different iframe, To work with that iframe you need to first switch to that iframe.
Try below code and let me know if you need more clarification -
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
import time
chrome_options = Options()
chrome_options.add_argument("--window-size=1920x1080")
driver = webdriver.Chrome(options=chrome_options)
wait = WebDriverWait(driver, 5)
action = ActionChains(driver)
driver.get('https://dataengweekly.com/')
iframe = driver.find_element_by_xpath('//iframe')
driver.switch_to.frame(iframe)
email_input = wait.until(EC.presence_of_element_located((By.XPATH, "//input[#type='email']")))
action.move_to_element(email_input).click().send_keys("email#test.com").perform()
driver.find_element_by_css_selector('button.subscribe-btn').click()
time.sleep(2)

how to Login gmail using Python Selenium

Hi all am trying to automate login the gmail account using python but am unable to do so , it just opens the gmail but nothing happens, what am i missing?
from selenium import webdriver
browser = webdriver.Chrome("C:/Users/INCT-KaviChand/Downloads/chromedriver_win32/chromedriver.exe")
browser.get('http://gmail.com')
emailElem = browser.find_element_by_id('Email')
emailElem.send_keys('kavi')
nextButton = browser.find_element_by_id('next')
nextButton.click()
passwordElem = browser.find_element_by_id('kavisam')
passwordElem.send_keys('MyPassword')
signinButton = browser.find_element_by_id('signIn')
signinButton.click()
also tried below one
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
browser = webdriver.Chrome("C:/Users/INCT-KaviChand/Downloads/chromedriver_win32/chromedriver.exe")
browser.get('http://gmail.com')
wait = WebDriverWait(browser, 10)
emailElem = browser.find_element_by_id('Email')
emailElem.send_keys("abc#gmail.com")
password_elem = wait.until(EC.presence_of_element_located((By.ID,'Passwd')))
password_elem.send_keys("xyz")
browser.find_element_by_name('signIn').click()
Try to find element in different way, code below is working for me:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(executable_path="C:/TestFiles/chromedriver.exe")
driver.get('http://gmail.com')
driver.find_element_by_xpath('//input[#type="email"]').send_keys('example', Keys.ENTER)
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//input[#type="password"]')))
driver.find_element_by_xpath('//input[#type="password"]').send_keys('example', Keys.ENTER)

How to display the count of dropdown items using selenium and python?

I am trying to get all elements within a drop down and print the total count. Here's the code, which I have written for this.
import unittest
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
class dropdown(unittest.TestCase):
def setUp(self):
self.driver=webdriver.Chrome()
def test_selectmethod(self):
driver=self.driver
driver.get("http://magento-demo.lexiconn.com/")
driver.find_element_by_xpath("//*[#id='select-language']").click()
dropdown=Select(driver.find_element_by_xpath("//*[#id='select-language']"))
print str(len(dropdown)+ "products found"
for i in dropdown:
print(i.text)
def tearDown(self):
self.driver.close()
However, this throws error while printing str(len(dropdown).
Try str(len(dropdown.options)).
To display the count of dropdown items using selenium python you don't even have to identify the <select> tag and you can use the following solution:
Code Block:
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument('disable-infobars')
driver=webdriver.Chrome(chrome_options=options, executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe')
driver.get("http://magento-demo.lexiconn.com/")
count = len(WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//select[#id='select-language']//option"))))
print(str(count) + " items found." )
Console Output:
3 items found.
But if you want to print the <options> within the <select> tag you have to identify the <select> tag and you can use the following solution:
Code Block:
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument('disable-infobars')
driver=webdriver.Chrome(chrome_options=options, executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe')
driver.get("http://magento-demo.lexiconn.com/")
count = len(WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//select[#id='select-language']//option"))))
print(str(count) + " items found." )
dropdown = Select(driver.find_element_by_xpath("//*[#id='select-language']"))
print("Items are: " )
for i in dropdown.options:
print(i.get_attribute('innerHTML'))
Console Output:
3 items found.
Items are:
English
French
German

Resources