Scrape data from a website using selenium - python-3.x

I am still quite amateur at python, I am trying to scrape data from a website using selenium
<small class="fxs_price_ohl"> <span>Open 1.29814</span> <span>High 1.29828</span> <span>Low 1.29775</span> </small> </div> </div> </li> <script type="application/ld+json">
trying to obtain the data Open 1.29814, High 1.29828 and Low 1.29775 from the html code above^
count_element = browser.find_element_by_xpath("//small[#class='fxs_price_ohl']//span")
print(count_element.text)
I'm using selenium with python, this is my code ^
But count_element.text prints empty, how to get the data Open 1.29814, High 1.29828 and Low 1.29775

Use
"find_elements_by_xpath"
if you want to retrieve multiple elements.
count_elements = browser.find_elements_by_xpath("//small[#class='fxs_price_ohl']//span")
for ele in count_elements:
print(ele.text)

You can also use a css selector of class for the parents with descendant combinator and type selector for the child spans but you also need a wait condition as page is slow loading
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
browser = webdriver.Chrome()
browser.get('https://www.fxstreet.com/rates-charts/gbpusd')
before_text = ''
while True: #this could be improved with a timeout
elements = [i for i in WebDriverWait(browser,20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".fxs_chart_cag_cont .fxs_price_ohl span")))]
elem = elements[-1]
if elem.text != before_text:
break
print([elem.text for elem in elements])

Related

Selenium (Python) not finding dynamically loaded JavaScript table after automated login occurs

Im using Selenium with Python3 on a Service Now Website.
So the process is as follows: selenium loads up the ServiceNow URL and then I use sendKeys to automate typing in of username and password, then the page is loaded which has a table of incidents I need to extract. Unfortunately I have to login in every single time because of the group policy I have.
This works up until I have to find the dynamically rendered Javascript table with data and I can't for the life of me seem to find it. I even tried to put a sleep in there for 15 seconds to allow it to load.
I also double checked the XPaths and Id / Class names and they match up. When I print query.page_source I don't see anything rendered by JS.
I've used beautiful soup too but that also doesn't work.
Any ideas?
from time import sleep
from collections import deque
from selenium import webdriver
from selenium.webdriver.support.ui import Select # for <SELECT> HTML form
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
query = webdriver.Firefox()
get_query = query.get("SERVICENOW URL")
query.implicitly_wait(10)
login_username = query.find_element_by_id('username')
login_password = query.find_element_by_id('password')
login_button = query.find_element_by_id('signOnButton')
username = "myUsername"
password = "myPassword"
login_username.send_keys(username)
login_password.send_keys(password)
login_button.click()
sleep(10)
incidentTableData = []
print(query.page_source)
// *** THESE ALL FAIL AND RETURN NONE ***
print(query.find_elements())
tableById = query.find_element_by_id('service-now-table-id')
tableByXPath = query.find_element_by_xpath('service-now-xpath')
tableByClass = query.find_element_by_id('service-now-table-class')
Since it's a dynamically rendered Javascript table, I would suggest you to implement explicit wait in your code.
so instead of this :
tableById = query.find_element_by_id('service-now-table-id')
tableByXPath = query.find_element_by_xpath('service-now-xpath')
tableByClass = query.find_element_by_id('service-now-table-class')
re-write these lines like this :
wait = WebDriverWait(query, 10)
service_now_with_id = wait.until(EC.element_to_be_clickable((By.ID, "service-now-table-id")))
service_now_with_xpath = wait.until(EC.element_to_be_clickable((By.XPATH, "service-now-xpath")))
service_now_with_class = wait.until(EC.element_to_be_clickable((By.ID, "service-now-table-class")))
You are gonna need to use the below imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as E
PS :- service_now_with_id, service_now_with_xpath, service_now_with_class, these are web elements returned by explicit waits. you may wanna have to interact with them as per your requirement meaning, clicking on it or sending keys or whatever.

Can't locate elements from a website using selenium

Trying to scrape data from a business directory but I keep getting the data was not found
name =
driver.find_elements_by_xpath('/html/body/div[3]/div/div/div[1]/div/div[1]/div/div[1]/h4')[0].text
# Results in: IndexError: list index out of range
So I tried to use WebDriverWait to make the code wait for the data to load but it doesn't find the elements, even though the data get loaded to the website.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
from bs4 import BeautifulSoup
import requests
import time
url='https://www.dmcc.ae/business-search?directory=1&submissionGuid=2c8df029-a92e-4b5d-a014-7ef9948e664b'
driver = webdriver.Firefox()
driver.get(url)
wait=WebDriverWait(driver,50)
wait.until(EC.visibility_of_element_located((By.CLASS_NAME,'searched-list ng-scope')))
name = driver.find_elements_by_xpath('/html/body/div[3]/div/div/div[1]/div/div[1]/div/div[1]/h4')[0].text
print(name)
driver.switch_to.frame(driver.find_element_by_css_selector("#pym-0 iframe"))
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located(
(By.CSS_SELECTOR, '.searched-list.ng-scope')))
name = driver.find_elements_by_xpath(
'/html/body/div[3]/div/div/div[1]/div/div[1]/div/div[1]/h4')[0].text
its inside iframe , to interact with iframe element switch to it first. Here iframe doesn't have any unique identified . So we used the parent div which had unique id as reference from that we found the child iframe
now if you want to interact outside iframe use;
driver.switch_to.default_content()
<iframe src="https://dmcc.secure.force.com/Business_directory_Page?initialWidth=987&childId=pym-0&parentTitle=List%20of%20Companies%20Registered%20in%20Dubai%2C%20DMCC%20Free%20Zone&parentUrl=https%3A%2F%2Fwww.dmcc.ae%2Fbusiness-search%3Fdirectory%3D1%26submissionGuid%3D2c8df029-a92e-4b5d-a014-7ef9948e664b" width="100%" scrolling="no" marginheight="0" frameborder="0" height="3657px"></iframe>
Switch to iframe and handle the accept button.
driver.get('https://www.dmcc.ae/business-search?directory=1&submissionGuid=2c8df029-a92e-4b5d-a014-7ef9948e664b')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#hs-eu-confirmation-button"))).click()
wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,'#pym-0 > iframe')))
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,'.searched-list.ng-scope')))
name = driver.find_elements_by_xpath('//*[#id="directory_list"]/div/div/div/div[1]/h4')[0]
print(name.text))
Outputs
1 BOXOFFICE DMCC

.sendkeys method not working to upload file using Python Selenium

I'm trying to automate facebook marketplace posts. But i'm struggling to upload pictures to it.
I already locate the element. When i click the element it will show the 'box' showing the file manager so that i can click on the folders and then the desired image.
ele = wait.until(EC.element_to_be_clickable((By.XPATH,'//*[#id="rc.js_c"]/div/div[1]/div[5]/div[2]/div/div/div/div/div[1]/div/div/span/div/a/div[2]')))
ele.click()
But when i try this:
ele.send_keys('/file_path/rasp.jpeg')
It raises this exception:
selenium.common.exceptions.ElementNotInteractableException: Message: element not interactable
I also tried using the os library:
ele.send_keys(os.getcwd() + '/home/br1/Downloads/rasp.jpeg')
Getting the same exception error.
The html code where the element is visible (element used in code):
<div class="_3jk">
which is the parent of (where the element is not visible):
<input accept="image/*" multiple="" name="composer_photo" title="Elige un archivo para subir" data-testid="add-more-photos" display="inline-block" type="file" class="_n _5f0v" id="js_wg">
Here is all the code if you want to try it:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By 10
# driver protocols
options = Options()
options.add_argument('disable-notifications')
options.add_argument('start-maximized')
driver = webdriver.Chrome(options=options, executable_path='/chromedriver')
wait = WebDriverWait(driver,10)
# url
driver.get('http://facebook.com/marketplace')
driver.implicitly_wait(10)
# logging
driver.find_element_by_id('email').send_keys('username')
driver.find_element_by_id('pass').send_keys('password')
driver.find_element_by_id('u_0_2').click()
# entering marketplace
driver.find_element_by_xpath('//*[contains(text(), "Vender algo")]').click()
driver.find_element_by_xpath('//*[contains(text(), "Artículo en venta")]').click()
ele = wait.until(EC.element_to_be_clickable((By.XPATH,'//*[#id="rc.js_c"]/div/div[1]/div[5]/div[2]/div/div/div/div/div[1]/div/div/span/div/a/div[2]')))
ele.send_keys('/file_path/rasp.jpeg')
Any ideas and suggestions will be aprecciate it.
I'm a Linux user.
You should try using the input to send the file path rather the div.
Try the below.
ele = wait.until(EC.presence_of_element_located((By.XPATH,'//input[#name="composer_photo" and #type="file"]')))
ele.send_keys("file_to_be_uploaded")

Contains text in Selenium Python

I am trying to capture an Error which would restart my program and change proxy but I am unable to catch the error as its stored like this and classes are dynamically named :
<p class="g4Vm4">By signing up, you agree to our <a target="_blank" href="https://help.instagram.com/581066165581870">Terms</a> . Learn how we collect, use and share your data in our <a target="_blank" href="https://help.instagram.com/519522125107875">Data Policy</a> and how we use cookies and similar technology in our <a target="_blank" href="/legal/cookies/">Cookies Policy</a> .</p>
so I am trying to catch the xpath by this function but I am un able to do so.
def has_error(browser):
try: #/*[contains(text(), 'technology')]/html/body/span/section/main/div/article/div/div[1]/div/form/p"
browser.find_element_by_xpath("/html/body//*[contains(text(),'technology')]")
return False
except: return True
if not has_error(browser):
print('Error found! , aborted!')
browser.quit()
os.execv(sys.executable, ['python'] + sys.argv)
To Handle dynamic element use WebDriverwait and following Xpath Startegy.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
element=WebDriverWait(driver,30).until(expected_conditions.element_to_be_clickable((By.XPATH,'//p[contains(.,"technology")]')))
print(element.text)
You can check if the source of the web-page contains special text.
if 'By signing up, you agree to our ' in browser.page_source:
pass
# TODO Exception

How to wait for non empty input field in Selenium Python

I'm trying to automatically run the currency converter in https://www.mastercard.us/en-us/consumers/get-support/convert-currency.html using Selenium in Python. Here is what I got so far:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link1 = 'https://www.mastercard.us/en-us/consumers/get-support/convert-currency.html'
driver1 = webdriver.PhantomJS()
driver1.get(link1)
script = """ var select = arguments[0];
for(var i = 0; i < select.options.length; i++) {
if(select.options[i].value == arguments[1]) {
select.options[i].selected = true;
}
}
"""
driver1.find_element_by_id('getDate').send_keys('05-Sep-2017')
select = driver1.find_element_by_id('firstID')
driver1.execute_script(script, select, 'USD');
driver1.find_element_by_name('txtTAmt').send_keys('1.00')
driver1.find_element_by_name('txtBankFee').send_keys('0.00')
select = driver1.find_element_by_id('newID')
driver1.execute_script(script, select, 'EUR');
driver1.find_element_by_id('btnSubmit').click()
wait = WebDriverWait(driver1, 100)
element = wait.until(EC.presence_of_element_located((By.XPATH,
'//*[#name="txtCardAmt" and text() != ""]')))
print(element.text)
The problem is that the field "txtCardAmt" never gets populated and I'm getting a timeout exception. My question is, how can I wait for the server to finish the computation?
PS: I know there is easier ways to select options using the Select class, however in this website they do not work for some reason.
Your problem is that you wait until the text of the element with name txtCardAmt is not empty. The problem is that this is always true.
If you take a look to the interested html:
<input type="text" name="txtCardAmt" ng-model="mcz.txtCardAmt"
class="mczreadonly ng-pristine ng-valid mczblue" placeholder="0"
readonly="readonly" disabled="">
you can see that the there isn't text.
The info that you are you looking for (not visible in the html) is in the attribute value:
That is 7.38 in my example.
So:
elem = driver1.find_element_by_name('txtCardAmt')
value = elem.get_attribute("value")
print(value)
Your code regarding the selection of the date and the currencies doesn't work. In my example I used the xpath in order to do that. I'm sure there are better way to do this tasks. I used the xpath returned by the tools of the inspector of my browser.
The entire example:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link1 = 'https://www.mastercard.us/en-us/consumers/get-support/convert-currency.html'
driver1 = webdriver.PhantomJS(executable_path=r'/pathTo/phantomjs')
driver1.get(link1)
driver1.find_element_by_id('getDate').click()
wait = WebDriverWait(driver1, 20)
wait.until(EC.presence_of_element_located((By.XPATH,"/html/body/div[1]/div/div/div/div[2]/div[3]/div/div/div[2]/div/div/div/a[1]/span")))
driver1.find_element_by_xpath("/html/body/div[1]/div/div/div/div[2]/div[3]/div/div/div[2]/div/div/div/a[1]/span").click()
driver1.find_element_by_xpath("//*[#id='transactiondatepicker']/div/table/tbody/tr[2]/td[3]/a").click()
#select = driver1.find_element_by_id('firstID')
#driver1.execute_script(script, select, 'USD');
driver1.find_element_by_xpath("//*[#id='mczRowC']/div[2]/button").click()
wait.until(EC.presence_of_element_located((By.XPATH,"//*[#id='mczRowC']/div[2]/div/ul/li[146]/a")))
driver1.find_element_by_xpath("//*[#id='mczRowC']/div[2]/div/ul/li[146]/a").click()
driver1.find_element_by_name('txtTAmt').send_keys('1.00')
driver1.find_element_by_name('txtBankFee').send_keys('2.00')
#select = driver1.find_element_by_id('newID')
#driver1.execute_script(script, select, 'EUR');
driver1.find_element_by_xpath("//*[#id='mczRowD']/div[2]/button").click()
wait.until(EC.presence_of_element_located((By.XPATH,"//*[#id='mczRowD']/div[2]/div/ul/li[49]/a")))
driver1.find_element_by_xpath("//*[#id='mczRowD']/div[2]/div/ul/li[49]/a").click()
driver1.find_element_by_id('btnSubmit').click()
time.sleep(3)
elem = driver1.find_element_by_name('txtCardAmt')
value = elem.get_attribute("value")
print(value)

Resources