Can't find element Selenium - python-3.x

I am trying to fill in the forms in the Gmail creation page. But for some reason, my driver can't find the code. I've used all the options, path, id, name, class name, but nothing works
chrome_driver = './chromedriver'
driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver)
driver.get('https://www.google.com/intl/nl/gmail/about/#')
try:
print('locating create account button')
create_account_button = driver.find_element_by_class_name('h-c-button')
except:
print("error, couldn't find create account button")
try:
create_account_button.click()
print('navigating to creation page')
except:
print('error navigating to creation page')
time.sleep(15)
first_name_form = driver.find_element_by_class_name('whsOnd zHQkBf')
(the sleep is just temporary, to make sure it loads completely, I know it's not efficient)
here's the link to the Gmail page:
https://accounts.google.com/signup/v2/webcreateaccount?service=mail&continue=https%3A%2F%2Fmail.google.com%2Fmail%2F%3Fpc%3Dtopnav-about-n-en&flowName=GlifWebSignIn&flowEntry=SignUp
This is the error I'm getting:
Exception has occurred: NoSuchElementException
Message: no such element: Unable to locate element: {"method":"css
selector","selector":".whsOnd zHQkBf"}
(Session info: chrome=81.0.4044.129)
Thank you for your help

I found your error and I have the solution for you. Let me first determine to you the problem. When you click on the 'Create New Account' a new window is opening. BUT your bot is still undestanding that you are located in the first window (in the one that you click on the first button in order to create the account). Thus, the bot is trying to see if there is a First Name input. That's why is failing. So, the solution is that you have to change the window that you want to specify. The way you can do it is written inside the code block.
CODE
from selenium import webdriver
import time
path = '/home/avionerman/Documents/stack'
driver = webdriver.Firefox(path)
driver.get('https://www.google.com/intl/nl/gmail/about/#')
try:
print('locating create account button')
create_account_button = driver.find_element_by_class_name('h-c-button')
except:
print("error, couldn't find create account button")
try:
create_account_button.click()
print('navigating to creation page')
except:
print('error navigating to creation page')
time.sleep(15)
# Keeping all the windows into the array named as handles
handles = driver.window_handles
# Keeping the size of the array in order to know how many windows are open
size = len(handles)
# Switch to the second opened window (id:1)
driver.switch_to.window(handles[1])
# Print the title of the current page in order to validate if it's the proper one
print(driver.title)
time.sleep(10)
first_name_input = driver.find_element_by_id('firstName')
first_name_input.click()
first_name_input.send_keys("WhateverYouWant")
last_name_input = driver.find_element_by_id('lastName')
last_name_input.click()
last_name_input.send_keys("WhateverYouWant2")
username_input = driver.find_element_by_id('username')
username_input.click()
username_input.send_keys('somethingAsAUsername')
pswd_input = driver.find_element_by_name('Passwd')
pswd_input.click()
pswd_input.send_keys('whateveryouwant')
pswd_conf_input = driver.find_element_by_name('ConfirmPasswd')
pswd_conf_input.click()
pswd_conf_input.send_keys('whateveryouwant')
time.sleep(20)
So, if you will go to line 21 you will see that I have some comments in order to tell you what these lines (from 21 until 31) are doing.
Also, I inserted all the needed code for you (first name, last name, et.c). You only have to locate the creation button (last one).
Note: Try to use ids in such cases and not class names (when the ids are clear and unique) as I already did for you.

Related

How can I resolve this selenium stale reference error when scraping page 2 of a website?

I'm using Selenium to scrape Linkedin for jobs but I'm getting the stale reference error.
I've tried refresh, wait, webdriverwait, a try catch block.
It always fails on page 2.
I'm aware it could be a DOM issue and have run through a few of the answers to that but none of them seem to work for me.
def scroll_to(self, job_list_item):
"""Just a function that will scroll to the list item in the column
"""
self.driver.execute_script("arguments[0].scrollIntoView();", job_list_item)
job_list_item.click()
time.sleep(self.delay)
def get_position_data(self, job):
"""Gets the position data for a posting.
Parameters
----------
job : Selenium webelement
Returns
-------
list of strings : [position, company, location, details]
"""
# This is where the error is!
[position, company, location] = job.text.split('\n')[:3]
details = self.driver.find_element_by_id("job-details").text
return [position, company, location, details]
def wait_for_element_ready(self, by, text):
try:
WebDriverWait(self.driver, self.delay).until(EC.presence_of_element_located((by, text)))
except TimeoutException:
logging.debug("wait_for_element_ready TimeoutException")
pass
logging.info("Begin linkedin keyword search")
self.search_linkedin(keywords, location)
self.wait()
# scrape pages,only do first 8 pages since after that the data isn't
# well suited for me anyways:
for page in range(2, 3):
jobs = self.driver.find_elements_by_class_name("occludable-update")
#jobs = self.driver.find_elements_by_css_selector(".occludable-update.ember-view")
#WebDriverWait(self.driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'occludable-update')))
for job in jobs:
self.scroll_to(job)
#job.click
[position, company, location, details] = self.get_position_data(job)
# do something with the data...
data = (position, company, location, details)
#logging.info(f"Added to DB: {position}, {company}, {location}")
writer.writerow(data)
# go to next page:
bot.driver.find_element_by_xpath(f"//button[#aria-label='Page {page}']").click()
bot.wait()
logging.info("Done scraping.")
logging.info("Closing DB connection.")
f.close()
bot.close_session()
I expect that when job_list_item.click() is performed the page is loaded, in this case since you are looping jobs which is a list of WebDriverElement will become stale. You are returning back to the page but your jobs is already stale.
Usually to prevent a stale element, I always prevent the use of the element in a loop or store an element to a variable, especially if the element may change.

web scraping data from glassdoor using selenium

please I need some help to run this code (https://github.com/PlayingNumbers/ds_salary_proj/blob/master/glassdoor_scraper.py)
In order to scrape job offer data from Glassdoor
Here's the code snippet:
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
from selenium import webdriver
import time
import pandas as pd
options = webdriver.ChromeOptions()
#Uncomment the line below if you'd like to scrape without a new Chrome window every time.
#options.add_argument('headless')
#Change the path to where chromedriver is in your home folder.
driver = webdriver.Chrome(executable_path=path, options=options)
driver.set_window_size(1120, 1000)
url = "https://www.glassdoor.com/Job/jobs.htm?suggestCount=0&suggestChosen=false&clickSource=searchBtn&typedKeyword="+'data scientist'+"&sc.keyword="+'data scientist'+"&locT=&locId=&jobType="
#url = 'https://www.glassdoor.com/Job/jobs.htm?sc.keyword="' + keyword + '"&locT=C&locId=1147401&locKeyword=San%20Francisco,%20CA&jobType=all&fromAge=-1&minSalary=0&includeNoSalaryJobs=true&radius=100&cityId=-1&minRating=0.0&industryId=-1&sgocId=-1&seniorityType=all&companyId=-1&employerSizes=0&applicationType=0&remoteWorkType=0'
driver.get(url)
#Let the page load. Change this number based on your internet speed.
#Or, wait until the webpage is loaded, instead of hardcoding it.
time.sleep(5)
#Test for the "Sign Up" prompt and get rid of it.
try:
driver.find_element_by_class_name("selected").click()
except NoSuchElementException:
pass
time.sleep(.1)
try:
driver.find_element_by_css_selector('[alt="Close"]').click() #clicking to the X.
print(' x out worked')
except NoSuchElementException:
print(' x out failed')
pass
#Going through each job in this page
job_buttons = driver.find_elements_by_class_name("jl")
I'm getting an empty list
job_buttons
[]
Your problem is with wrong except argument.
With driver.find_element_by_class_name("selected").click() you are trying to click non-existing element. There is no element matching "selected" class name on that page. This causes NoSuchElementException exception as you can see yourself while you are trying to catch ElementClickInterceptedException exception.
To fix this you should use the correct locator or at least the correct argument in except.
Like this:
try:
driver.find_element_by_class_name("selected").click()
except NoSuchElementException:
pass
Or even
try:
driver.find_element_by_class_name("selected").click()
except:
pass
I'm not sure what elements do you want to get into job_buttons.
The search results containing all the details per each job can be found by this:
job_buttons = driver.find_elements_by_css_selector("li.react-job-listing")

Selecting values from drop down of website through selenium Python

I am trying web scraping using Python selenium. I am getting the following error: Message: element not interactable (Session info: chrome=78.0.3904.108) I am trying to access option element by id, value or text. It is giving me this error. I am using Python-3. Can someone help to explain where am I going wrong. I used the select tag using xpath and also tried css_selector. The select tag is selected and I can get the output of select tag selected. Here is my code for a better understanding:
Code-1:-
path = r'D:\chromedriver_win32\chromedriver.exe'
browser = webdriver.Chrome(executable_path = path)
website = browser.get("https://publicplansdata.org/resources/download-avs-cafrs/")
el = browser.find_element_by_xpath('//*[#id="ppd-download-state"]/select')
for option in el.find_elements_by_tag_name('option'):
if option.text != None:
option.click()
break
Blockquote
Code-2:-
select_element = Select(browser.find_element_by_xpath('//*[#id="ppd-download-state"]/select'))
# this will print out strings available for selection on select_element, used in visible text below
print(o.value for o in select_element.options)
select_element.select_by_value('AK')
Both codes give the same error how can I select values from drop down from website
Same as question:
Python selenium select element from drop down. Element Not Visible Exception
But the error is different. Tried the methods in comments
State, Plan, and Year:
browser.find_element_by_xpath("//span[text()='State']").click()
browser.find_element_by_xpath("//a[text()='West Virginia']").click()
time.sleep(2) # wait for Plan list to be populated
browser.find_element_by_xpath("//span[text()='Plan']").click()
browser.find_element_by_xpath("//a[text()='West Virginia PERS']").click()
time.sleep(2) # wait for Year list to be populated
browser.find_element_by_xpath("//span[text()='Year']").click()
browser.find_element_by_xpath("//a[text()='2007']").click()
Don't forget to import time

python3 More button clickable in the 1st page but NOT clickable in the 2nd page

This is the extended question on how to click 'More' button on a webpage.
Below is my previous question and one person kindly answered for it.
Since I'm not that familiar with 'find element by class name' function, I just added that person's revised code on my existing code. So my revised code would not be efficient (my apology).
Python click 'More' button is not working
The situation is, there are two types of 'More' button. 1st one is in the property description part and the 2nd one is in the text reviews part. If you click only one 'More' button from any of the reviews, reviews will be expanded so that you can see the full text reviews.
The issue I run into is that I can click 'More' button for the reviews that are in the 1st page but not clickable for the reviews in the 2nd page.
Below is the error message I get but my code still runs (without stopping once it sees an error).
Message:
no such element: Unable to locate element: {"method":"tag name","selector":"span"}
Based on my understanding, there is entry class and corresponding span for every review. I don't understand why it says python can't find it.
from selenium import webdriver
from selenium.webdriver import ActionChains
from bs4 import BeautifulSoup
review_list=[]
review_appended_list=[]
review_list_v2=[]
review_appended_list_v2=[]
listed_reviews=[]
listed_reviews_v2=[]
listed_reviews_total=[]
listed_reviews_total_v2=[]
final_list=[]
#Incognito Mode
option = webdriver.ChromeOptions()
option.add_argument("--incognito")
#Open Chrome
driver=webdriver.Chrome(executable_path="C:/Users/chromedriver.exe",options=option)
#url I want to visit (I'm going to loop over multiple listings but for simplicity, I just added one listing url).
lists = ['https://www.tripadvisor.com/VacationRentalReview-g30196-d6386734-Hot_51st_St_Walk_to_Mueller_2BDR_Modern_sleeps_7-Austin_Texas.html']
for k in lists:
driver.get(k)
time.sleep(3)
#click 'More' on description part.
link = driver.find_element_by_link_text('More')
try:
ActionChains(driver).move_to_element(link)
time.sleep(1) # time to move to link
link.click()
time.sleep(1) # time to update HTML
except Exception as ex:
print(ex)
time.sleep(3)
# first "More" shows text in all reviews - there is no need to search other "More"
try:
first_entry = driver.find_element_by_class_name('entry')
more = first_entry.find_element_by_tag_name('span')
#more = first_entry.find_element_by_link_text('More')
except Exception as ex:
print(ex)
try:
ActionChains(driver).move_to_element(more)
time.sleep(1) # time to move to link
more.click()
time.sleep(1) # time to update HTML
except Exception as ex:
print(ex)
#begin parsing html and scraping data.
html =driver.page_source
soup=BeautifulSoup(html,"html.parser")
listing=soup.find_all("div", class_="review-container")
all_reviews = driver.find_elements_by_class_name('wrap')
for review in all_reviews:
all_entries = review.find_elements_by_class_name('partial_entry')
if all_entries:
review_list=[all_entries[0].text]
review_appended_list.extend([review_list])
for i in range(len(listing)):
review_id=listing[i]["data-reviewid"]
listing_v1=soup.find_all("div", class_="rating reviewItemInline")
rating=listing_v1[i].span["class"][1]
review_date=listing_v1[i].find("span", class_="ratingDate relativeDate")
review_date_detail=review_date["title"]
listed_reviews=[review_id, review_date_detail, rating[7:8]]
listed_reviews.extend([k])
listed_reviews_total.append(listed_reviews)
for a,b in zip (listed_reviews_total,review_appended_list):
final_list.append(a+b)
#loop over from the 2nd page of the reviews for the same listing.
for j in range(5,20,5):
url_1='-'.join(k.split('-',3)[:3])
url_2='-'.join(k.split('-',3)[3:4])
middle="-or%d-" % j
final_k=url_1+middle+url_2
driver.get(final_k)
time.sleep(3)
link = driver.find_element_by_link_text('More')
try:
ActionChains(driver).move_to_element(link)
time.sleep(1) # time to move to link
link.click()
time.sleep(1) # time to update HTML
except Exception as ex:
print(ex)
# first "More" shows text in all reviews - there is no need to search other "More"
try:
first_entry = driver.find_element_by_class_name('entry')
more = first_entry.find_element_by_tag_name('span')
except Exception as ex:
print(ex)
try:
ActionChains(driver).move_to_element(more)
time.sleep(2) # time to move to link
more.click()
time.sleep(2) # time to update HTML
except Exception as ex:
print(ex)
html =driver.page_source
soup=BeautifulSoup(html,"html.parser")
listing=soup.find_all("div", class_="review-container")
all_reviews = driver.find_elements_by_class_name('wrap')
for review in all_reviews:
all_entries = review.find_elements_by_class_name('partial_entry')
if all_entries:
#print('--- review ---')
#print(all_entries[0].text)
#print('--- end ---')
review_list_v2=[all_entries[0].text]
#print (review_list)
review_appended_list_v2.extend([review_list_v2])
#print (review_appended_list)
for i in range(len(listing)):
review_id=listing[i]["data-reviewid"]
#print review_id
listing_v1=soup.find_all("div", class_="rating reviewItemInline")
rating=listing_v1[i].span["class"][1]
review_date=listing_v1[i].find("span", class_="ratingDate relativeDate")
review_date_detail=review_date["title"]
listed_reviews_v2=[review_id, review_date_detail, rating[7:8]]
listed_reviews_v2.extend([k])
listed_reviews_total_v2.append(listed_reviews_v2)
for a,b in zip (listed_reviews_total_v2,review_appended_list_v2):
final_list.append(a+b)
print (final_list)
if len(listing) !=5:
break
How to enable clicking 'More' button for the 2nd and rest of the pages? so that I can scrape the full text reviews?
Edited Below:
The error messages I get are these two lines.
Message: no such element: Unable to locate element: {"method":"tag name","selector":"span"}
Message: stale element reference: element is not attached to the page document
I guess my whole codes still run because I used try and except function? Usually when python runs into an error, it stops running.
Try it like:
driver.execute_script("""
arguments[0].click()
""", link)

selenium - clicking a button

I am trying to pull out the names of all courses offered by Lynda.com together with the subject so that it appears on my list as '2D Drawing -- Project Soane: Recover a Lost Monument with BIM with Paul F. Aubin'. So I am trying to write a script that will go to each subject on http://www.lynda.com/sitemap/categories and pull out the list of courses. I already managed to get Selenium to go from one subject to another and pull the courses. My only problem is that there is a button 'See X more courses' to see the rest of the courses. Sometimes you have to click it couple of times that´s why I used while loop. But selenium doesn´t seem to execute this click. Does anyone know why?
This is my code:
from selenium import webdriver
url = 'http://www.lynda.com/sitemap/categories'
mydriver = webdriver.Chrome()
mydriver.get(url)
course_list = []
for a in [1,2,3]:
for b in range(1,73):
mydriver.find_element_by_xpath('//*[#id="main-content"]/div[2]/div[3]/div[%d]/ul/li[%d]/a' % (a,b)).click()
while True:
#click the button 'See more results' as long as it´s available
try:
mydriver.find_element_by_xpath('//*[#id="main-content"]/div[1]/div[3]/button').click()
except:
break
subject = mydriver.find_element_by_tag_name('h1') # pull out the subject
courses = mydriver.find_elements_by_tag_name('h3') # pull out the courses
for course in courses:
course_list.append(str(subject.text)+" -- " + str(course.text))
# go back to the initial site
mydriver.get(url)
Scroll to element before clicking:
see_more_results = browser.find_element_by_css_selector('button[class*=see-more-results]')
browser.execute_script('return arguments[0].scrollIntoView()', see_more_results)
see_more_results.click()
One solution how to repeat these actions could be:
def get_number_of_courses():
return len(browser.find_elements_by_css_selector('.course-list > li'))
number_of_courses = get_number_of_courses()
while True:
try:
button = browser.find_element_by_css_selector(CSS_SELECTOR)
browser.execute_script('return arguments[0].scrollIntoView()', button)
button.click()
while True:
new_number_of_courses = get_number_of_courses()
if (new_number_of_courses > number_of_courses):
number_of_courses = new_number_of_courses
break
except:
break
Caveat: it's always better to use build-in explicit wait than while True:
http://www.seleniumhq.org/docs/04_webdriver_advanced.jsp#explicit-waits
The problem is that you're calling a method to find element by class name, but you're passing a xpath. if you're sure this is the correct xpath you'll simply need to change to method to 'find_element_by_xpath'.
A recommendation if you allow: Try to stay away from these long xpaths and go through some tutorials on how to write efficient xpath for example.

Resources