I recently started doing botting to try and get my hands on a better graphics card. My issue is this button: button html which has no name or anything useful that would allow my bot to detect it. I have tried using its XPath but to no success. my XPath attempt (I used the real XPath don't worry). Any help would be much appreciated :)
driver.execute_script('return document.querySelector("core-button").shadowRoot.querySelector("a")').click()
use execute_script and then return the element inside the shadowRoot
I used the following code to fix my issue.
browser = webdriver.Chrome()
def expand_shadow_element(element):
shadow_root = browser.execute_script('return arguments[0].shadowRoot', element)
return shadow_root
root1 = browser.find_element_by_tag_name('app-shell')
shadow_root1 = expand_shadow_element(root1)
root2 = shadow_root1.find_element_by_css_selector('request-page')
shadow_root2 = expand_shadow_element(root2)
root3 = shadow_root2.find_element_by_css_selector('box-content')
shadow_root3 = expand_shadow_element(root3)
pay_button = shadow_root3.find_element_by_css_selector("core-button")
pay_button.click()
Related
I'm trying to scrape Glassdoor using the code given here
https://github.com/PlayingNumbers/ds_salary_proj/blob/master/glassdoor_scraper.py
While executing the code, there are no errors and the website opens, but then nothing happens. I think they have changed the tags on the website. I've tried changing the tags but it's still working.
Here's the code snippet:
def get_jobs(keyword, num_jobs, verbose, path, slp_time):
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(executable_path=path, options=options)
driver.set_window_size(1120, 1000)
url = 'https://www.glassdoor.com/Job/jobs.htm?sc.keyword="' + keyword + '"&locT=C&locId=1147401&locKeyword=San%20Francisco,%20CA&jobType=all&fromAge=-1&minSalary=0&includeNoSalaryJobs=true&radius=100&cityId=-1&minRating=0.0&industryId=-1&sgocId=-1&seniorityType=all&companyId=-1&employerSizes=0&applicationType=0&remoteWorkType=0'
driver.get(url)
jobs = []
while len(jobs) < num_jobs:
time.sleep(slp_time)
try:
driver.find_element_by_class_name("selected").click()
except ElementClickInterceptedException:
pass
time.sleep(.1)
try:
driver.find_element_by_css_selector('[alt="Close"]').click()
print(' x out worked')
except NoSuchElementException:
print('x out failed')
pass
You can find the whole code in the link given above.
Any help would be greatly appreciated!
Can you check the URL generated by the
url = 'https://www.glassdoor.com/Job/jobs.htm?sc.keyword="' + keyword + '"&locT=C&locId=1147401&locKeyword=San%20Francisco,%20CA&jobType=all&fromAge=-1&minSalary=0&includeNoSalaryJobs=true&radius=100&cityId=-1&minRating=0.0&industryId=-1&sgocId=-1&seniorityType=all&companyId=-1&employerSizes=0&applicationType=0&remoteWorkType=0'
And verify it manually if results are displayed in the page
I made my own Twitter complaint bot that tweets at my ISP if the network drops.
Code works perfect, until it has to find the Twitter textbox to type the tweet.
Main error is:
StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
I have tried:
Adding time delays
Using Firefox Driver instead of Google
Adding page refreshes before the tweet_at_provider() looks for the textbox
Clicking the "Tweet" button to bring up the textbox to then try type in it
Using find.element_by_id but twitter changes id every pageload
When I comment out the first function call to test, it will find and type 6/10 times.
But when both functions are called the tweet_at_provider() always fails at grabbing the textbox and I get the StaleElement error.
import selenium, time, pyautogui
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import ElementClickInterceptedException, NoSuchElementException, StaleElementReferenceException
PROMISED_DOWN = 200
PROMISED_UP = 10
CHROME_DRIVER_PATH = "C:\Development\chromedriver.exe"
GECKODRIVER_PATH = "C:\\Users\\meeha\\Desktop\\geckodriver\\geckodriver.exe"
TWITTER_USERNAME = "my_username"
TWITTER_PASSWORD = "my_password"
class InternetSpeedTwitterBot():
def __init__(self, driver_path):
self.driver = webdriver.Chrome(executable_path=driver_path)
self.down = 0
self.up = 0
def get_internet_speed(self):
self.driver.get("https://www.speedtest.net/")
self.driver.maximize_window()
time.sleep(2)
go = self.driver.find_element_by_xpath("//*[#id='container']/div/div[3]/div/div/div/div[2]/div[3]/div[1]/a/span[4]")
go.click()
time.sleep(40)
self.down = self.driver.find_element_by_xpath("//*[#id='container']/div/div[3]/div/div/div/div[2]/div[3]/div[3]/div/div[3]/div/div/div[2]/div[1]/div[2]/div/div[2]/span")
self.up = self.driver.find_element_by_xpath("//*[#id='container']/div/div[3]/div/div/div/div[2]/div[3]/div[3]/div/div[3]/div/div/div[2]/div[1]/div[3]/div/div[2]/span")
print(f"Download Speed: {self.down.text} Mbps")
print(f"Upload Speed: {self.up.text} Mbps")
time.sleep(3)
def tweet_at_provider(self):
self.driver.get("https://twitter.com/login")
self.driver.maximize_window()
time.sleep(3)
username = self.driver.find_element_by_name("session[username_or_email]")
password = self.driver.find_element_by_name("session[password]")
username.send_keys(TWITTER_USERNAME)
password.send_keys(TWITTER_PASSWORD)
password.submit()
time.sleep(5)
tweet_compose = self.driver.find_element_by_xpath('//*[#id="react-root"]/div/div/div[2]/header/div/div/div/div[1]/div[3]/a/div/span/div/div/span/span')
tweet_compose.click()
time.sleep(2)
textbox = self.driver.find_element_by_xpath('//*[#id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div[3]/div/div/div/div[1]/div/div/div/div/div[2]/div[1]/div/div/div/div/div/div/div/div/div/div[1]/div/div/div/div[2]/div/div/div/div')
textbox.send_keys(f"Hey #Ask_Spectrum, why is my internet speed {self.down.text} down / {self.up.text} up when I pay for {PROMISED_DOWN} down / {PROMISED_UP} up???")
bot = InternetSpeedTwitterBot(CHROME_DRIVER_PATH)
bot.get_internet_speed()
bot.tweet_at_provider()
I had the same error there and figured out that HTML tag was instantly changing as soon as I was typing something on the twitter text-box.
tackle this problem using XPATH of span tag that was showing up after typing space from my side. break tag is the initial tag when there is not any text prompted by you, only after you type anything turns into and that's when you have to copy XPATH and use it for your application
Hi I am scraping a website and I am using selenium python. The code is as below
url ='https://www.hkexnews.hk/'
options = webdriver.ChromeOptions()
browser = webdriver.Chrome(chrome_options=options, executable_path=r'chromedriver.exe')
browser.get(url)
tier1 = browser.find_element_by_id('tier1-select')
tier1.click()
tier12 = browser.find_element_by_xpath('//*[#data-value="rbAfter2006"]')
tier12.click()
time.sleep(1)
tier2 = browser.find_element_by_id('rbAfter2006')
tier2.click()
tier22 = browser.find_element_by_xpath("//*[#id='rbAfter2006']//*[#class='droplist-item droplist-item-level-1']//*[text()='Circulars']")
tier22.click()
tier23 = browser.find_element_by_xpath("//*[#id='rbAfter2006']//*[#class='droplist-item droplist-item-level-2']//*[text()='Securities/Share Capital']")
tier23.click()
tier24 = browser.find_element_by_xpath("//*[#id='rbAfter2006']//*[#class='droplist-group droplist-submenu level3']//*[text()='Issue of Shares']")
tier24.click()
It works stops at tier23 by showing ElementNoVisibleException. I have tried with different class, yet it seems like not working. Thank you for you help
There are two elements that can be selected by your XPath. The first one is hidden. Try below to select required element:
tier23 = browser.find_element_by_xpath("//*[#id='rbAfter2006']//li[#aria-expanded='true']//*[#class='droplist-item droplist-item-level-2']//*[text()='Securities/Share Capital']")
or shorter
tier23 = browser.find_element_by_xpath("//li[#aria-expanded='true']//a[.='Securities/Share Capital']")
tier23.location_once_scrolled_into_view
tier23.click()
P.S. Note that that option will still be not visible because you need to scroll list down first. I used tier23.location_once_scrolled_into_view for this purpose
Also it's better to use Selenium built-in Waits instead of time.sleep
Hey so I'm attempting to sign in on twitter via selenium but my script only fills in the username form and nothing else. I've tried finding the elements via xpath but still nothing. No errors/exceptions raised either so I'm pretty clueless, here's the code:
def login():
"""login script"""
service_args = [
'--proxy=52.183.30.241:8888',
'--proxy-type=http',
'--ignore-ssl-errors=true'
]
execpath = r'C:\Users\Ben\AppData\Roaming\npm\node_modules\phantomjs-prebuilt\lib\phantom\bin\phantomjs.exe'
driver = webdriver.PhantomJS(service_args=service_args, executable_path=execpath)
driver.set_window_size(1024, 768)
driver.get('https:twitter.com/login')
elem = driver.find_element_by_class_name('js-username-field')
elem.send_keys('username')
driver.implicitly_wait(5)
elem = driver.find_element_by_class_name('js-password-field')
elem.send_keys('password')
elem = driver.find_element_by_css_selector('button.submit.btn.primary-btn').click()
time.sleep(5)
driver.save_screenshot('C:\\Users\\Ben\\Pictures\\screen.png')
Here's the screenshot it gives me.
Any help would be appreciated!
I am trying to pull out the names of all courses offered by Lynda.com together with the subject so that it appears on my list as '2D Drawing -- Project Soane: Recover a Lost Monument with BIM with Paul F. Aubin'. So I am trying to write a script that will go to each subject on http://www.lynda.com/sitemap/categories and pull out the list of courses. I already managed to get Selenium to go from one subject to another and pull the courses. My only problem is that there is a button 'See X more courses' to see the rest of the courses. Sometimes you have to click it couple of times that´s why I used while loop. But selenium doesn´t seem to execute this click. Does anyone know why?
This is my code:
from selenium import webdriver
url = 'http://www.lynda.com/sitemap/categories'
mydriver = webdriver.Chrome()
mydriver.get(url)
course_list = []
for a in [1,2,3]:
for b in range(1,73):
mydriver.find_element_by_xpath('//*[#id="main-content"]/div[2]/div[3]/div[%d]/ul/li[%d]/a' % (a,b)).click()
while True:
#click the button 'See more results' as long as it´s available
try:
mydriver.find_element_by_xpath('//*[#id="main-content"]/div[1]/div[3]/button').click()
except:
break
subject = mydriver.find_element_by_tag_name('h1') # pull out the subject
courses = mydriver.find_elements_by_tag_name('h3') # pull out the courses
for course in courses:
course_list.append(str(subject.text)+" -- " + str(course.text))
# go back to the initial site
mydriver.get(url)
Scroll to element before clicking:
see_more_results = browser.find_element_by_css_selector('button[class*=see-more-results]')
browser.execute_script('return arguments[0].scrollIntoView()', see_more_results)
see_more_results.click()
One solution how to repeat these actions could be:
def get_number_of_courses():
return len(browser.find_elements_by_css_selector('.course-list > li'))
number_of_courses = get_number_of_courses()
while True:
try:
button = browser.find_element_by_css_selector(CSS_SELECTOR)
browser.execute_script('return arguments[0].scrollIntoView()', button)
button.click()
while True:
new_number_of_courses = get_number_of_courses()
if (new_number_of_courses > number_of_courses):
number_of_courses = new_number_of_courses
break
except:
break
Caveat: it's always better to use build-in explicit wait than while True:
http://www.seleniumhq.org/docs/04_webdriver_advanced.jsp#explicit-waits
The problem is that you're calling a method to find element by class name, but you're passing a xpath. if you're sure this is the correct xpath you'll simply need to change to method to 'find_element_by_xpath'.
A recommendation if you allow: Try to stay away from these long xpaths and go through some tutorials on how to write efficient xpath for example.