Trying to scrape data from a business directory but I keep getting the data was not found
name =
driver.find_elements_by_xpath('/html/body/div[3]/div/div/div[1]/div/div[1]/div/div[1]/h4')[0].text
# Results in: IndexError: list index out of range
So I tried to use WebDriverWait to make the code wait for the data to load but it doesn't find the elements, even though the data get loaded to the website.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
from bs4 import BeautifulSoup
import requests
import time
url='https://www.dmcc.ae/business-search?directory=1&submissionGuid=2c8df029-a92e-4b5d-a014-7ef9948e664b'
driver = webdriver.Firefox()
driver.get(url)
wait=WebDriverWait(driver,50)
wait.until(EC.visibility_of_element_located((By.CLASS_NAME,'searched-list ng-scope')))
name = driver.find_elements_by_xpath('/html/body/div[3]/div/div/div[1]/div/div[1]/div/div[1]/h4')[0].text
print(name)
driver.switch_to.frame(driver.find_element_by_css_selector("#pym-0 iframe"))
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located(
(By.CSS_SELECTOR, '.searched-list.ng-scope')))
name = driver.find_elements_by_xpath(
'/html/body/div[3]/div/div/div[1]/div/div[1]/div/div[1]/h4')[0].text
its inside iframe , to interact with iframe element switch to it first. Here iframe doesn't have any unique identified . So we used the parent div which had unique id as reference from that we found the child iframe
now if you want to interact outside iframe use;
driver.switch_to.default_content()
<iframe src="https://dmcc.secure.force.com/Business_directory_Page?initialWidth=987&childId=pym-0&parentTitle=List%20of%20Companies%20Registered%20in%20Dubai%2C%20DMCC%20Free%20Zone&parentUrl=https%3A%2F%2Fwww.dmcc.ae%2Fbusiness-search%3Fdirectory%3D1%26submissionGuid%3D2c8df029-a92e-4b5d-a014-7ef9948e664b" width="100%" scrolling="no" marginheight="0" frameborder="0" height="3657px"></iframe>
Switch to iframe and handle the accept button.
driver.get('https://www.dmcc.ae/business-search?directory=1&submissionGuid=2c8df029-a92e-4b5d-a014-7ef9948e664b')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#hs-eu-confirmation-button"))).click()
wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,'#pym-0 > iframe')))
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,'.searched-list.ng-scope')))
name = driver.find_elements_by_xpath('//*[#id="directory_list"]/div/div/div/div[1]/h4')[0]
print(name.text))
Outputs
1 BOXOFFICE DMCC
Related
I am trying to search with python via Google Maps and I want to get the URL from the results.
Following steps I approach:
open google
Accept cookies
Search for random thing (in this example "pediatrician in Aargau")
switch to google maps
This is where I get the error, as I am trying to wait for the results to load, but I always get a timeout. I can see in the window that opens, that the results are fully loaded.
Is there anything wrong with my code? I would like to extract the website URL of the results.
Here is the code that I have so far:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# Start the browser
driver = webdriver.Chrome()
# Open google.de and accept cookies
driver.get("https://www.google.de/")
wait = WebDriverWait(driver, 25)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#L2AGLb > div"))).click()
# Search for "Kinderarzt Kanton Aargau"
search_box = driver.find_element(By.NAME, "q")
search_box.send_keys("Kinderarzt Kanton Aargau")
search_box.submit()
# Switch to Maps tab
wait.until(EC.element_to_be_clickable((By.XPATH, "//a[contains(text(), 'Maps')]"))).click()
# Wait for links and extract
results = wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div[aria-label^='Results for'] > div > div > a")))
for result in results:
link = result.get_attribute("href")
print(link)
# Close the browser
driver.quit()
PS: I have tried to increase the time for the webdriver, but that won't help. I think it can not find the object and there must be another way to identify the objects.
First, you can skip several steps by just building the URL for google maps with the desired search string. Second, your "Wait for results to load" locator was not on my page. My guess is that the class you are using is changing regularly. I used a different CSS selector and found it just fine.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# Start the browser
driver = webdriver.Chrome()
# Declare string to search for and encode it
search_string = "Kinderarzt Kanton Aargau"
partial_url = search_string.replace(" ", "+")
# Open google.de and accept cookies
driver.get(f"https://www.google.de/maps/search/{partial_url}/")
wait = WebDriverWait(driver, 25)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#L2AGLb > div"))).click()
# Wait for links and extract
results = wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div[aria-label^='Results for'] > div > div > a")))
for result in results:
link = result.get_attribute("href")
print(link)
# Close the browser
driver.quit()
The result is
https://www.google.de/maps/place/Dr.+med.+Helena+Gerritsma+Schirlo/data=!4m7!3m6!1s0x47903be8d0d4a09d:0xc97d85a6fa076207!8m2!3d47.3906733!4d8.0443884!16s%2Fg%2F1tghc1gd!19sChIJnaDU0Og7kEcRB2IH-qaFfck?authuser=0&hl=en&rclk=1
https://www.google.de/maps/place/Kinderarztpraxis+Dr.+med.+Armin+B%C3%BChler+%26+Thomas+Justen/data=!4m7!3m6!1s0x479069d7b30c674b:0xd04693e64cbc42b0!8m2!3d47.5804824!4d8.2163541!16s%2Fg%2F1ptw0srs4!19sChIJS2cMs9dpkEcRsEK8TOaTRtA?authuser=0&hl=en&rclk=1
https://www.google.de/maps/place/Kinderarztpraxis+Lenzburg/data=!4m7!3m6!1s0x4790160e650976b1:0x5352d33510a53d99!8m2!3d47.3855278!4d8.1753395!16s%2Fg%2F11hz17jwcy!19sChIJsXYJZQ4WkEcRmT2lEDXTUlM?authuser=0&hl=en&rclk=1
https://www.google.de/maps/place/Kinderarzthaus+-+Kinderarztpraxis/data=!4m7!3m6!1s0x47903bf002633251:0xf029086640b016ee!8m2!3d47.391928!4d8.051698!16s%2Fg%2F11cfdn2j8!19sChIJUTJjAvA7kEcR7hawQGYIKfA?authuser=0&hl=en&rclk=1
https://www.google.de/maps/place/Dr.+med.+Nils+Hammerich/data=!4m7!3m6!1s0x4790160e650976b1:0x7116ed2cc14996ea!8m2!3d47.3856086!4d8.1753854!16s%2Fg%2F1tl0w7qv!19sChIJsXYJZQ4WkEcR6pZJwSztFnE?authuser=0&hl=en&rclk=1
https://www.google.de/maps/place/Kinderarzt+Berikon/data=!4m7!3m6!1s0x47900e152314a493:0x72ca7fe58b7b3a5f!8m2!3d47.3612625!4d8.3674472!16s%2Fg%2F11c311g_px!19sChIJk6QUIxUOkEcRXzp7i-V_ynI?authuser=0&hl=en&rclk=1
https://www.google.de/maps/place/Dr.+med.+Hana+Balent+Ilitsch/data=!4m7!3m6!1s0x4790697f95fe3a73:0xaff715a22ab56e78!8m2!3d47.5883105!4d8.2882387!16s%2Fg%2F11hyjwg_32!19sChIJczr-lX9pkEcReG61KqIV968?authuser=0&hl=en&rclk=1
https://www.google.de/maps/place/Dr.+med.+Belzer+Heierling+Tanja/data=!4m7!3m6!1s0x47906d2a4e9698fd:0x6865ac23234b8dc9!8m2!3d47.4637622!4d8.3284463!16s%2Fg%2F1tksm8d9!19sChIJ_ZiWTiptkEcRyY1LIyOsZWg?authuser=0&hl=en&rclk=1
https://www.google.de/maps/place/Praxis+f%C3%BCr+Kinder-+und+Jugendmedizin+Dr.+Dirk+Bock/data=!4m7!3m6!1s0x47906b5c9071d861:0x516c763f7642c9ff!8m2!3d47.4731839!4d8.1959905!16s%2Fg%2F11mpc9wm91!19sChIJYdhxkFxrkEcR_8lCdj92bFE?authuser=0&hl=en&rclk=1
https://www.google.de/maps/place/Alleviamed+Kinderarztpraxis+Meisterschwanden/data=!4m7!3m6!1s0x4790193bdf03b5f1:0xfef98e265772814a!8m2!3d47.2956342!4d8.2279202!16s%2Fg%2F11gr2z_z2f!19sChIJ8bUD3zsZkEcRSoFyVyaO-f4?authuser=0&hl=en&rclk=1
https://www.google.de/maps/place/Kinderarztpraxis+Suhrepark+AG/data=!4m7!3m6!1s0x47903c69ae471281:0xcb34880030319dd7!8m2!3d47.3727496!4d8.0809937!16s%2Fg%2F1v3kl_4v!19sChIJgRJHrmk8kEcR150xMACINMs?authuser=0&hl=en&rclk=1
I am trying to create an Account on Walmart using selenium python. I successfully opened https://www.walmart.com/ and successfully go to create an account button under Sign In tab. Moreover, I also successfully entered the details of First name, Last name, Email Address and Password. However, once I clicked on Create account button, I got TimeoutException error despite using EC.visibility_of_element_located().click () method.
Can anyone kindly guide me what is wrong with my approach. Thanks in advance.
The source code of the website for Create Account button is as follows:
<button class="button m-margin-top text-inherit" type="submit" data-automation-id="signup-submit-btn" data-tl-id="signup-submit-btn" aria-label="Create Account, By clicking Create Account, the user is acknowledging that they have read and agreed to the Terms of Use and Privacy Policy">Create account</button>
My Python code is as follows:
import time
import requests
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
url = "https://www.walmart.com/"
first_name = "chuza"
last_name = "123"
email_id = "chuza123#gmail.com"
password = "Eureka1#"
options = Options()
s=Service('C:/Users/Samiullah/.wdm/drivers/chromedriver/win32/96.0.4664.45/chromedriver.exe')
driver = webdriver.Chrome(service=s, options=options)
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source":
"const newProto = navigator.__proto__;"
"delete newProto.webdriver;"
"navigator.__proto__ = newProto;"
})
wait = WebDriverWait(driver, 20)
actions = ActionChains(driver)
driver.get(url)
sign_in_btn = wait.until(EC.visibility_of_element_located((By.XPATH, "//div[text()='Sign In']")))
actions.move_to_element(sign_in_btn).perform()
time.sleep(0.5)
wait.until(EC.visibility_of_element_located((By.XPATH, '//button[normalize-space()="Create an account"]'))).click()
f_name = driver.find_element(By.ID, 'first-name-su')
l_name = driver.find_element(By.ID, 'last-name-su')
email = driver.find_element(By.ID, 'email-su')
pswd = driver.find_element(By.ID, 'password-su')
f_name.send_keys(first_name)
driver.implicitly_wait(2)
l_name.send_keys(last_name)
driver.implicitly_wait(1.5)
email.send_keys(email_id)
driver.implicitly_wait(2)
pswd.send_keys(password)
driver.implicitly_wait(0.5)
###
wait.until(EC.visibility_of_element_located((By.XPATH, '//button[normalize-space()="Create account"]'))).click()
I see this css selector that represent the desired webelement:
button[data-automation-id='signup-submit-btn']
and xpath would be:
//button[#data-automation-id='signup-submit-btn']
there are 3 matching nodes for each CSS and XPath and Selenium will look for the first match, the CSS and XPath basically are first matching node.
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[data-automation-id='signup-submit-btn']"))).click()
or
wait.until(EC.element_to_be_clickable((By.XPATH, "//button[#data-automation-id='signup-submit-btn']"))).click()
It makes more sense to use element_to_be_clickable when trying to click on a web element instead of visibility_of_element_located. Also, CSS are much better locator as compared to XPath.
//button[normalize-space()="Create account"] locator matches 3 elements on that page, you need to use more precise locator.
This locator is unique: //form[#id='sign-up-form']//button[#data-tl-id='signup-submit-btn']
So, this should work:
wait.until(EC.visibility_of_element_located((By.XPATH, "//form[#id='sign-up-form']//button[#data-tl-id='signup-submit-btn']"))).click()
This xpath based Locator Strategy...
//button[normalize-space()="Create account"]
...identifies three(3) elements within the DOM Tree and your desired element is the second in the list.
Solution
The desired element is a dynamic element so to click on the clickable element instead of visibility_of_element_located() you need to induce WebDriverWait for the element_to_be_clickable() and you can use the following Locator Strategy:
Using XPATH:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//form[#id='sign-up-form']//button[normalize-space()='Create account']"))).click()
Note: You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
This issue is because of selenium, you can fix this by creating it manually, follow the steps:
Create an account on Walmart IO platform here by clicking on the man icon just before the search box.
Login to the account and accept "Terms of Use"
Click on "Create Your Application" to create a new application and fill in appropriate details.
You can follow this tutorial to generate two sets of public/private keys -one set will be used for production and the other set will be used for stage.
Upload both public keys using this: https://walmart.io/key-upload?app_name=<your app name>
Consumer ID will be generated for both sets for prod and stage which can be seen on the dashboard
Click on "Request Access" for OPD APIs at here and fill out the form
Im using Selenium with Python3 on a Service Now Website.
So the process is as follows: selenium loads up the ServiceNow URL and then I use sendKeys to automate typing in of username and password, then the page is loaded which has a table of incidents I need to extract. Unfortunately I have to login in every single time because of the group policy I have.
This works up until I have to find the dynamically rendered Javascript table with data and I can't for the life of me seem to find it. I even tried to put a sleep in there for 15 seconds to allow it to load.
I also double checked the XPaths and Id / Class names and they match up. When I print query.page_source I don't see anything rendered by JS.
I've used beautiful soup too but that also doesn't work.
Any ideas?
from time import sleep
from collections import deque
from selenium import webdriver
from selenium.webdriver.support.ui import Select # for <SELECT> HTML form
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
query = webdriver.Firefox()
get_query = query.get("SERVICENOW URL")
query.implicitly_wait(10)
login_username = query.find_element_by_id('username')
login_password = query.find_element_by_id('password')
login_button = query.find_element_by_id('signOnButton')
username = "myUsername"
password = "myPassword"
login_username.send_keys(username)
login_password.send_keys(password)
login_button.click()
sleep(10)
incidentTableData = []
print(query.page_source)
// *** THESE ALL FAIL AND RETURN NONE ***
print(query.find_elements())
tableById = query.find_element_by_id('service-now-table-id')
tableByXPath = query.find_element_by_xpath('service-now-xpath')
tableByClass = query.find_element_by_id('service-now-table-class')
Since it's a dynamically rendered Javascript table, I would suggest you to implement explicit wait in your code.
so instead of this :
tableById = query.find_element_by_id('service-now-table-id')
tableByXPath = query.find_element_by_xpath('service-now-xpath')
tableByClass = query.find_element_by_id('service-now-table-class')
re-write these lines like this :
wait = WebDriverWait(query, 10)
service_now_with_id = wait.until(EC.element_to_be_clickable((By.ID, "service-now-table-id")))
service_now_with_xpath = wait.until(EC.element_to_be_clickable((By.XPATH, "service-now-xpath")))
service_now_with_class = wait.until(EC.element_to_be_clickable((By.ID, "service-now-table-class")))
You are gonna need to use the below imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as E
PS :- service_now_with_id, service_now_with_xpath, service_now_with_class, these are web elements returned by explicit waits. you may wanna have to interact with them as per your requirement meaning, clicking on it or sending keys or whatever.
I can get the element which attribute contains X but I can't get the attribute itself. Why?
This code does not raise any errors:
links = browser.find_elements_by_xpath('//div[#aria-label="Conversations"]//a[contains(#data-href, "https://www.messenger.com/t/")]')
This code raises NoSuchElementException error:
links = browser.find_elements_by_xpath('//div[#aria-label="Conversations"]//a[contains(#data-href, "https://www.messenger.com/t/")]/#data-href')
This code is extracted from Messenger main page with the chats. I would like to get all links in the ul list...
I don't get it... Any help please?
To get all the links data-href value you need to traverse links elements and use the get_attribute()
links = browser.find_elements_by_xpath('//div[#aria-label="Conversations"]//a[contains(#data-href, "https://www.messenger.com/t/")]')
ul_list=[link.get_attribute("data-href") for link in links]
print(ul_list)
wait = WebDriverWait(driver, 20)
lists=wait.until(EC.presence_of_all_elements_located((By.XPATH, '//div[#aria-label="Conversations"]//a[contains(#data-href, "https://www.messenger.com/t/")]/#data-href')))
for element in lists:
print element.text
Note: Add below imports to your solution
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
I am still quite amateur at python, I am trying to scrape data from a website using selenium
<small class="fxs_price_ohl"> <span>Open 1.29814</span> <span>High 1.29828</span> <span>Low 1.29775</span> </small> </div> </div> </li> <script type="application/ld+json">
trying to obtain the data Open 1.29814, High 1.29828 and Low 1.29775 from the html code above^
count_element = browser.find_element_by_xpath("//small[#class='fxs_price_ohl']//span")
print(count_element.text)
I'm using selenium with python, this is my code ^
But count_element.text prints empty, how to get the data Open 1.29814, High 1.29828 and Low 1.29775
Use
"find_elements_by_xpath"
if you want to retrieve multiple elements.
count_elements = browser.find_elements_by_xpath("//small[#class='fxs_price_ohl']//span")
for ele in count_elements:
print(ele.text)
You can also use a css selector of class for the parents with descendant combinator and type selector for the child spans but you also need a wait condition as page is slow loading
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
browser = webdriver.Chrome()
browser.get('https://www.fxstreet.com/rates-charts/gbpusd')
before_text = ''
while True: #this could be improved with a timeout
elements = [i for i in WebDriverWait(browser,20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".fxs_chart_cag_cont .fxs_price_ohl span")))]
elem = elements[-1]
if elem.text != before_text:
break
print([elem.text for elem in elements])