I made my own Twitter complaint bot that tweets at my ISP if the network drops.
Code works perfect, until it has to find the Twitter textbox to type the tweet.
Main error is:
StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
I have tried:
Adding time delays
Using Firefox Driver instead of Google
Adding page refreshes before the tweet_at_provider() looks for the textbox
Clicking the "Tweet" button to bring up the textbox to then try type in it
Using find.element_by_id but twitter changes id every pageload
When I comment out the first function call to test, it will find and type 6/10 times.
But when both functions are called the tweet_at_provider() always fails at grabbing the textbox and I get the StaleElement error.
import selenium, time, pyautogui
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import ElementClickInterceptedException, NoSuchElementException, StaleElementReferenceException
PROMISED_DOWN = 200
PROMISED_UP = 10
CHROME_DRIVER_PATH = "C:\Development\chromedriver.exe"
GECKODRIVER_PATH = "C:\\Users\\meeha\\Desktop\\geckodriver\\geckodriver.exe"
TWITTER_USERNAME = "my_username"
TWITTER_PASSWORD = "my_password"
class InternetSpeedTwitterBot():
def __init__(self, driver_path):
self.driver = webdriver.Chrome(executable_path=driver_path)
self.down = 0
self.up = 0
def get_internet_speed(self):
self.driver.get("https://www.speedtest.net/")
self.driver.maximize_window()
time.sleep(2)
go = self.driver.find_element_by_xpath("//*[#id='container']/div/div[3]/div/div/div/div[2]/div[3]/div[1]/a/span[4]")
go.click()
time.sleep(40)
self.down = self.driver.find_element_by_xpath("//*[#id='container']/div/div[3]/div/div/div/div[2]/div[3]/div[3]/div/div[3]/div/div/div[2]/div[1]/div[2]/div/div[2]/span")
self.up = self.driver.find_element_by_xpath("//*[#id='container']/div/div[3]/div/div/div/div[2]/div[3]/div[3]/div/div[3]/div/div/div[2]/div[1]/div[3]/div/div[2]/span")
print(f"Download Speed: {self.down.text} Mbps")
print(f"Upload Speed: {self.up.text} Mbps")
time.sleep(3)
def tweet_at_provider(self):
self.driver.get("https://twitter.com/login")
self.driver.maximize_window()
time.sleep(3)
username = self.driver.find_element_by_name("session[username_or_email]")
password = self.driver.find_element_by_name("session[password]")
username.send_keys(TWITTER_USERNAME)
password.send_keys(TWITTER_PASSWORD)
password.submit()
time.sleep(5)
tweet_compose = self.driver.find_element_by_xpath('//*[#id="react-root"]/div/div/div[2]/header/div/div/div/div[1]/div[3]/a/div/span/div/div/span/span')
tweet_compose.click()
time.sleep(2)
textbox = self.driver.find_element_by_xpath('//*[#id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div[3]/div/div/div/div[1]/div/div/div/div/div[2]/div[1]/div/div/div/div/div/div/div/div/div/div[1]/div/div/div/div[2]/div/div/div/div')
textbox.send_keys(f"Hey #Ask_Spectrum, why is my internet speed {self.down.text} down / {self.up.text} up when I pay for {PROMISED_DOWN} down / {PROMISED_UP} up???")
bot = InternetSpeedTwitterBot(CHROME_DRIVER_PATH)
bot.get_internet_speed()
bot.tweet_at_provider()
I had the same error there and figured out that HTML tag was instantly changing as soon as I was typing something on the twitter text-box.
tackle this problem using XPATH of span tag that was showing up after typing space from my side. break tag is the initial tag when there is not any text prompted by you, only after you type anything turns into and that's when you have to copy XPATH and use it for your application
Related
Good evening to all. I am new to python and was just doing one of this exercise in Udemy course where we are tasked to prepare a program using webdriver and getting Internet speed test results on speedtest.net. I was stuck when I found some one gave solution of using:
WebDriverWait(self.driver, 50).until(EC.visibility_of_element_located((By.CSS_SELECTOR,"div[data-result-id*='true']"))
My question is how and where this value for the CSS SELECTOR can be found on the website. Please explain. Please also give as much insight as you can regarding selenium or web driver and CSS in python.
#Libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
#Constants
PROMISED_DOWN = 150
PROMISED_UP = 10
TWITTER_EMAIL = "my email"
TWITTER_PASSWORD = "my password"
#Class
class InternetSpeedTwitterBot:
def __init__(self):
self.s = Service('D:\Python Related Documents and Programsweb development folder\chromedriver_win32\chromedriver.exe')
self.driver = webdriver.Chrome(service=self.s)
self.down = None
self.up = None
def get_internet_speed(self):
self.driver.get("https://www.speedtest.net/")
time.sleep(5)
go_botton = self.driver.find_element(By.CSS_SELECTOR, '.start-button a')
go_botton.click()
#speed_download = self.driver.find_element(By.CSS_SELECTOR, ".download-speed")
speed_download = self.driver.find_element(By.XPATH, '//*[#id="container"]/div/div[3]/div/div/div/div[2]/div[3]/div[3]/div/div[3]/div/div/div[2]/div[1]/div[1]/div/div[2]/span')
WebDriverWait(self.driver, 50).until(
EC.visibility_of_element_located((By.CSS_SELECTOR, "div[data-result-id*='true']"))
)
speed_results = self.driver.find_elements(
By.CSS_SELECTOR, ".result-container-speed span.result-data-large.number.result-data-value"
)
self.down, self.up = (float(result.text) for result in speed_results)
print(f"Down Speed: {self.down}, Up Speed: {self.up}" )
#go_botton.click()
def tweet_at_provider(self):
pass
#Object creation
bot = InternetSpeedTwitterBot()
#Calling methods
bot.get_internet_speed()
bot.get_internet_speed()
What I do to find the elements?:
Open the inspect/dev tool from your browser (I use chrome) using F12 or right click --> Inspect
Click on element tab
Then make Ctrl + f on the element window and new search bar will appear
As you can see that in that bar you can search elements in your page by xpath or CSS.
Click in the button located on the top left of your dev tool. Then that element will be located on your dev tool.
If in your search bar you write a xpath or css selector the element will be highlighted in your screen, in that way you will know what is the selector you can use
Some doc:
How to find elements by CSS selector?: Small guide for CSS
How to find elements by XPATH selector?: Small guide for xpath
First off, I'm new to coding and Python. This is the first project that I came up with to try. When I run this code there are no errors and I get to the point of opening the Dell website, inputting the service tag and pressing the enter key. At that point the Dell website gives me a pop up that says "Please wait while we validate this action. Once validated, please submit your request again." and gives a 30 second countdown.
import openpyxl as xl
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
wb = xl.load_workbook('/home/user/Code/learning/inventory.xlsx')
sheet = wb['Sheet1']
for row in range(2, sheet.max_row + 1):
service_tag_cell = sheet.cell(row, 4).value
warranty_cell: str
warranty_cell = sheet.cell(row, 5).value
if service_tag_cell != '': # and warranty_cell == '':
driver = webdriver.Firefox()
driver.get('https://www.dell.com/support/home/en-us')
# Find the search box, enter the service tag number and press the enter key
driver.find_element_by_id('inpEntrySelection').send_keys(service_tag_cell + Keys.ENTER)
# Find warranty field
warranty_date = driver.find_element_by_class_name('warrantyExpiringLabel')
warranty_cell = warranty_date.value_of_css_property
driver.close()
sheet.cell(row, 5).value = warranty_cell
wb.save('inventory2.xlsx')
I've tried searching google to understand what prompts this message from Dell. I get the sense it just doesn't want bots like mine searching their website. But is the message a result of my poor implementation that could be corrected? Or is my goal of taking a spreadsheet of service tags and returning the expiration date dead in water?
If they're using methods of detected automated actions then you'll be playing cat-and-mouse.
I can suggest that you try setting a random User-Agent with a library like Random User Agents:
from random_user_agent.user_agent import UserAgent
from random_user_agent.params import SoftwareName
user_agent_rotator = UserAgent(software_names=[SoftwareName.CHROME.value], limit=100)
user_agent = user_agent_rotator.get_random_user_agent()
options.add_argument(f'user-agent={user_agent}')
driver = webdriver.Chrome(chrome_options=options)
But if that doesn't work, there's plenty of other ways in which Selenium can be detected. This and This have good information that may helpful.
I'm trying to download all of the zip files from this page that don't end with the word 'CHECKSUM', so far I have managed to write a code that it's supposed to do that but it's not working as expected, here it is:
import time
import numpy as np
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
opt = Options() #the variable that will store the selenium options
opt.add_experimental_option("debuggerAddress", "localhost:9222") #this allows bulk-dozer to take control of your Chrome Browser in DevTools mode.
s = Service(r'C:\Users\ResetStoreX\AppData\Local\Programs\Python\Python39\Scripts\chromedriver.exe') #Use the chrome driver located at the corresponding path
driver = webdriver.Chrome(service=s, options=opt) #execute the chromedriver.exe with the previous conditions
#Why using MarkPrices: https://support.btse.com/en/support/solutions/articles/43000557589-index-price-and-mark-price#:~:text=Index%20Price%20is%20an%20important,of%20cryptocurrencies%20on%20major%20exchanges.&text=Mark%20Price%20is%20the%20price,be%20fair%20and%20manipulation%20resistant.
time.sleep(2)
if driver.current_url == 'https://data.binance.vision/?prefix=data/futures/um/daily/markPriceKlines/ALICEUSDT/1h/' :
number = 2 #initialize an int variable to 2 because the desired web elements in this page starts from 2
counter = 0
while number <= np.size(driver.find_elements(By.XPATH, '//*[#id="listing"]/tr')): #iterate over the tbody array
data_file_name = driver.find_element(By.XPATH, f'//*[#id="listing"]/tr[{number}]/td[1]/a').text
if data_file_name.endswith('CHECKSUM') == False:
current_data_file = driver.find_element(By.XPATH, f'//*[#id="listing"]/tr[{number}]/td[1]/a')
element_position = current_data_file.location
y_position = str(element_position.get('y'))
driver.execute_script(f"window.scrollBy(0,{y_position})", "") #scroll down the page to know what's being added
current_data_file.click()
print(f'saving {data_file_name}')
time.sleep(0.5)
counter += 1
number += 1
print(counter)
My problem occurs at the 20th element (ALICEUSDT-1h-2022-02-04.zip.CHECKSUM), the program stops and throws errors like the one down below:
ElementClickInterceptedException: element click intercepted: Element
is not clickable at point (418, 1294)
Or this other one with a negative position:
ElementClickInterceptedException: element click intercepted: Element
is not clickable at point (418, -1221)
So, I would like to know how could I improve the code above to handle the errors shown, I know it has everything to do with the scrollbar, but I ran out of ideas after using this other line y_position = str(element_position.get('y')+100) and keep getting the same errors.
I've written a script in python to parse the name, tweets, following and follower of those available in view all section in my profile page of twitter. It is currently doing it's job. However, I find two problems with this scraper:
Every pages it parses the documents from are jamming on the taskbar.
The scraper has got a clumsy look.
Here is what I've written:
from selenium import webdriver
import time
def twitter_data():
driver = webdriver.Chrome()
driver.get('https://twitter.com/?lang=en')
driver.find_element_by_xpath('//input[#id="signin-email"]').send_keys('username')
driver.find_element_by_xpath('//input[#id="signin-password"]').send_keys('password')
driver.find_element_by_xpath('//button[#type="submit"]').click()
driver.implicitly_wait(15)
#Clicking the viewall link
driver.find_element_by_xpath("//small[#class='view-all']//a[contains(#class,'js-view-all-link')]").click()
time.sleep(10)
for links in driver.find_elements_by_xpath("//div[#class='stream-item-header']//a[contains(#class,'js-user-profile-link')]"):
processing_files(links.get_attribute("href"))
#going on to the each profile falling under viewall section
def processing_files(item_link):
driver = webdriver.Chrome()
driver.get(item_link)
# getting information of each profile holder
for prof in driver.find_elements_by_xpath("//div[#class='route-profile']"):
name = prof.find_elements_by_xpath(".//h1[#class='ProfileHeaderCard-name']//a[contains(#class,'ProfileHeaderCard-nameLink')]")[0]
tweet = prof.find_elements_by_xpath(".//span[#class='ProfileNav-value']")[0]
following = prof.find_elements_by_xpath(".//span[#class='ProfileNav-value']")[1]
follower = prof.find_elements_by_xpath(".//span[#class='ProfileNav-value']")[2]
print(name.text, tweet.text, following.text, follower.text)
twitter_data()
I've used both the implicitly_wait and time.sleep in my scraper cause when i found that it was necessary to keep the bot wait a bit longer I used the latter. Thanks in advance to take a look into it.
You can use driver.quit() to close the pages as given below. This will reduce pages in the task bar.
from selenium import webdriver
import time
def twitter_data():
driver = webdriver.Chrome()
driver.get('https://twitter.com/?lang=en')
driver.find_element_by_xpath('//input[#id="signin-email"]').send_keys('username')
driver.find_element_by_xpath('//input[#id="signin-password"]').send_keys('password')
driver.find_element_by_xpath('//button[#type="submit"]').click()
driver.implicitly_wait(15)
#Clicking the viewall link
driver.find_element_by_xpath("//small[#class='view-all']//a[contains(#class,'js-view-all-link')]").click()
time.sleep(10)
for links in driver.find_elements_by_xpath("//div[#class='stream-item-header']//a[contains(#class,'js-user-profile-link')]"):
processing_files(links.get_attribute("href"))
driver.quit()
#going on to the each profile falling under viewall section
def processing_files(item_link):
driver1 = webdriver.Chrome()
driver1.get(item_link)
# getting information of each profile holder
for prof in driver1.find_elements_by_xpath("//div[#class='route-profile']"):
name = prof.find_elements_by_xpath(".//h1[#class='ProfileHeaderCard-name']//a[contains(#class,'ProfileHeaderCard-nameLink')]")[0]
tweet = prof.find_elements_by_xpath(".//span[#class='ProfileNav-value']")[0]
following = prof.find_elements_by_xpath(".//span[#class='ProfileNav-value']")[1]
follower = prof.find_elements_by_xpath(".//span[#class='ProfileNav-value']")[2]
print(name.text, tweet.text, following.text, follower.text)
driver1.quit ()
twitter_data()
I am trying to scrap reviews from verizon website and I found the xpath of reviews by doing inspect on webpage. I am executing below code but this review.text doesnt seems to be working perfectly all the time. I get correct text sometimes and sometimes it just prints Error in message -
Not sure , what am I doing wrong..
from selenium import webdriver
url = 'https://www.verizonwireless.com/smartphones/samsung-galaxy-s7/'
browser = webdriver.Chrome(executable_path='/Users/userName/PycharmProjects/Verizon/chromedriver')
browser.get(url)
reviews = []
xp = '//*[#id="BVRRContainer"]/div/div/div/div/div[3]/div/ul/li[2]/a/span[2]'
# read first ten pages of reviews ==>
for j in range(10):
reviews.extend(browser.find_elements_by_xpath('//*[#id="BVRRContainer"]/div/div/div/div/ol/li[*]/div/div[1]'
'/div/div[2]/div/div/div[1]/p'))
try:
next = browser.find_element_by_xpath(xp)
next.click()
except:
print(j,"error clicking")
# Print reviews ===>
for i, review in enumerate(reviews):
try:
print(review.text)
except:
print("Error in :" review)
You should improve the logic of your code. Note, that you cannot get text of elements from the first page after redirection to next page- you need to get text before clicking "Next" button.
Try to use below code instead:
from selenium import webdriver
from selenium.common.exceptions import WebDriverException
import time
url = 'https://www.verizonwireless.com/smartphones/samsung-galaxy-s7/'
browser = webdriver.Chrome()
browser.get(url)
reviews = []
xp = '//a[span[#class="bv-content-btn-pages-next"]]'
# read first ten pages of reviews ==>
for i in range(10):
for review in browser.find_elements_by_xpath('//div[#class="bv-content-summary-body-text"]/p'):
reviews.append(review.text)
try:
next = browser.find_element_by_xpath(xp)
next.location_once_scrolled_into_view
time.sleep(0.5) # To wait until scrolled down to "Next" button
next.click()
time.sleep(2) # To wait for page "autoscrolling" to first review + until modal window dissapeared
except WebDriverException:
print("error clicking")
for review in reviews:
print(review)