I'm learning how to use selenium and I want to make a program that opens up youtube, searches for a video and prints the title of the first video to appear. I don't know why but it instead prints the title of the first video on youtube's homepage.
from logging import exception
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
PATH = 'C:\\Program Files (x86)\\chromedriver.exe'
driver = webdriver.Chrome(PATH)
driver.get("https://www.youtube.com/")
search = driver.find_element(By.CSS_SELECTOR, 'input#search')
search.send_keys("busqueda")
time.sleep(1)
search.send_keys(Keys.RETURN)
time.sleep(5)
try:
element = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH, '//*[#id="video-title"]')))
print("tÃtulo: ", element.get_attribute("innerHTML"))
except BaseException as e:
print(e)
finally:
driver.quit()
Please use below xpath, which will print the YouTube videos title with,
'.//a[#class = "yt-simple-endpoint style-scope ytd-video-renderer"]//yt-formatted-string[#class = "style-scope ytd-video-renderer"]'
elem=driver.find_element(By.CSS_SELECTOR,"a#video-title yt-formatted-string.style-scope.ytd-video-renderer")
print(elem.text)
Should suffice for the first element with that selector. After you add a wait.
Related
I'm trying to take a screenshot of each comment in a reddit post using selenium python. All comments have the same id/class and that's what I have used to select them.
Here's my code;
import requests
from bs4 import BeautifulSoup
import pyttsx3, pyautogui
from PIL import Image
from io import BytesIO
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome(executable_path='C:\Selenium_Drivers\chromedriver.exe')
url = 'https://www.reddit.com/user/UoPeople09/comments/wlt4qj/what_made_you_apply_at_uopeople/'
driver.get(url)
driver.implicitly_wait(5)
total_height = int(driver.execute_script("return document.body.scrollHeight"))
u = 1
for i in range(1, total_height*2, 50):
driver.execute_script(f"window.scrollTo(0, {i})")
comment = driver.find_element(By.CSS_SELECTOR, 'div#t1_ikllxsq._3sf33-9rVAO_v4y0pIW_CH')
comment.screenshot(f'E:\WEB SCRAPING PROJECTS\PROJECTS\Reddit Scraping\shot{u}.png')
u += 1
Well my code scrolls down the page and saves screenshots in my desired path. But the problem is that all the screenshots are of the first element(comment) in the reddit post.
I want my code to save a screenshot of each comment separately. Need help
Here you have an exmample including the scroll till the end of the page:
# Needed libs
from selenium.webdriver import ActionChains, Keys
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium import webdriver
# Initialize drivver and navigate
driver = webdriver.Chrome()
driver.maximize_window()
url = 'https://www.reddit.com/user/UoPeople09/comments/wlt4qj/what_made_you_apply_at_uopeople/'
wait = WebDriverWait(driver, 5)
driver.get(url)
# Wait for reject cookies button and push on it
reject_cookies_button = wait.until(EC.presence_of_element_located((By.XPATH, f"(//section[#class='_2BNSty-Ld4uppTeWGfEe8r']//button)[2]")))
reject_cookies_button.click()
# Make scroll till the end of the page
while True:
high_before_scroll = driver.execute_script('return document.body.scrollHeight')
driver.execute_script('window.scrollTo(100, document.body.scrollHeight);')
time.sleep(2)
if driver.execute_script('return document.body.scrollHeight') == high_before_scroll:
break
# We take how many comments we have
comments = wait.until(EC.presence_of_all_elements_located((By.XPATH, f"//div[contains(#class, 'Comment')]")))
# We take an screenshot for every comment and we save it
u = 1
for comment in comments:
driver.execute_script("arguments[0].scrollIntoView();", comment)
comment.screenshot(f'./shot{u}.png')
u += 1
I hope the comments in the code help you to understand what is happening
My code is done for linux, but just initialize the driver with your linux chromedriver
To get the screenshots of each comments, you need to identify the comment elements and then scroll to each comments and then take the screen shot.
This approach works for me.
url='https://www.reddit.com/user/UoPeople09/comments/wlt4qj/what_made_you_apply_at_uopeople/'
driver.get(url)
#disabled coockie button
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//button[contains(.,'Reject non-essential')]"))).click()
#Get all the comments
comments = driver.find_elements(By.CSS_SELECTOR, "[data-testid='comment_author_link']")
print(len(comments))
for i in range(len(comments)):
#Scroll to each comment
comments[i].location_once_scrolled_into_view
time.sleep(2)# slowdown the scripts to take the screenshot
driver.save_screenshot(f'E:\WEB SCRAPING PROJECTS\PROJECTS\Reddit Scraping\shot{i+1}.png')
Note: you have all the libraries, you need import time library only.
I'm trying to scrape from the moneycontrol.com. When I tried to send value in the search box I keep getting the same error in except block as "Element not Found".
I tried using XPath id as well as using the full XPath but in both cases, it doesn't work.
WITHOUT MAXIMIZING THE WINDOW
XPath id - //*[#id="search_str"]
Full XPath - /html/body/div[1]/header/div[1]/div[1]/div/div/div[2]/div/div/form/input[5]
Attaching the full code below:
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
def search_stock():
driver = webdriver.Chrome(
r'./chromedriver')
driver.get('https://www.moneycontrol.com/')
time.sleep(5)
search_icon = driver.find_element_by_xpath(
'//*[#id="fixedheader"]/div[4]/span')
search_icon.click()
time.sleep(2)
try:
search_box = driver.find_element_by_xpath('//*[#id="search_str"]')
print("Element is visible? " + str(search_box.is_displayed()))
time.sleep(10)
if search_box.is_displayed():
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
except NoSuchElementException:
print("Element not found")
driver.close()
search_stock()
Sometimes, it started working but most of the time it throwing exceptions and errors. Struggling since 3 days but none of the solutions working.
web scraping like that seems quite inefficient it is prob better to use requests and bs4. However if you want to do it like this you could try using action chains. found here Or you can do driver.get('https://www.moneycontrol.com/india/stockpricequote/consumer-food/zomato/Z') from the start instead of typing it in.
You may wanna try the below code :
def search_stock():
driver = webdriver.Chrome(r'./chromedriver')
driver.maximize_window()
driver.implicitly_wait(30)
driver.get('https://www.moneycontrol.com/')
wait = WebDriverWait(driver, 10)
time.sleep(5)
try:
ActionChains(driver).move_to_element(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[id='search_str']")))).perform()
search_box = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[id='search_str']")))
print("Element is visible? ", search_box.is_displayed())
time.sleep(10)
if search_box.is_displayed():
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
except NoSuchElementException:
print("Element not found")
Imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
Try clicking on search_box and only after that sending text there.
search_box = driver.find_element_by_xpath('//form[#id="form_topsearch"]//input[#id="search_str"]')
search_box.click()
time.sleep(0.1)
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
Additionally I would advise you using explicit waits of expected conditions instead of hardcoded sleeps.
With it your code will be faster and more reliable.
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def search_stock():
driver = webdriver.Chrome(r'./chromedriver')
wait = WebDriverWait(driver, 20)
driver.get('https://www.moneycontrol.com/')
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="fixedheader"]/div[4]/span')).click()
search_box = wait.until(EC.element_to_be_clickable((By.XPATH, '//form[#id="form_topsearch"]//input[#id="search_str"]')))
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
#I'm not sure you should close the driver immediately after involving searching....
#driver.close()
search_stock()
UPD
Let's try this
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
def search_stock():
driver = webdriver.Chrome(r'./chromedriver')
wait = WebDriverWait(driver, 20)
actions = ActionChains(driver)
driver.get('https://www.moneycontrol.com/')
search_icon = wait.until(EC.presence_of_element_located((By.XPATH, '//*[#id="fixedheader"]/div[4]/span')).click()
time.sleep(0.5)
driver.execute_script("arguments[0].scrollIntoView();", search_icon)
driver.execute_script("arguments[0].click();", search_icon)
search_box = wait.until(EC.presence_of_element_located((By.XPATH, '//form[#id="form_topsearch"]//input[#id="search_str"]')))
driver.execute_script("arguments[0].scrollIntoView();", search_icon)
driver.execute_script("arguments[0].click();", search_icon)
time.sleep(0.5)
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
#I'm not sure you should close the driver immediately after involving searching....
#driver.close()
search_stock()
If the above solution is still not working instead of
actions.move_to_element(search_box).click().perform()
try
driver.execute_script("arguments[0].click();", search_box)
the code:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
browser = webdriver.Chrome(r"C:\Users\Desktop\chromedriver.exe")
browser.get('https://www.youtube.com/')
browser.maximize_window()
search = browser.find_element_by_name('search_query')
time.sleep(5)
search.send_keys("shakira waka waka")
search.send_keys(Keys.RETURN)
time.sleep(5)
browser.find_element_by_class_name("style-scope yt-img-shadow").click()
Hello friends, I want the video to play by searching the song names and the singer name generically on the youtube search section and clicking the image that comes after it. But the program gives "element not interactable" error. How can I play the video by clicking the incoming picture?
Try the below code :
driver = webdriver.Chrome()
driver.maximize_window()
wait = WebDriverWait(driver, 10)
driver.get('https://www.youtube.com/')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[id='search']"))).send_keys("shakira waka waka")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[id='search-icon-legacy']"))).click()
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "ytd-video-renderer.style-scope.ytd-item-section-renderer"))).click()
I'm trying to automatize the reddit logIn with selenium from python and i'm using the following code in order to do it
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from time import sleep
driver = webdriver.Chrome(executable_path=r'C:\Program Files (x86)\chromedriver.exe')
driver.get("https://www.reddit.com/")
login=driver.find_element_by_link_text("Log In")
login.click()
username = "the-username" # Enter your username
password = "the-password" # Enter your password
def slow_typing(element, text):
for character in text:
element.send_keys(character)
sleep(0.3)
def logIn(): # Log In Function.
try:
sleep(15)
#username_in = driver.find_element_by_class_name("AnimatedForm__textInput")
username_in = driver.find_element_by_xpath("//*[#id='loginUsername']")
slow_typing(username_in, username)
pass_in = driver.find_element_by_xpath("//*[#id='loginPassword']")
slow_typing(pass_in,password)
pass_in.send_keys(Keys.ENTER)
sleep(5)
except NoSuchElementException:
print("Llegue aqui xd xd")
logIn()
There's a little more code, but I'm posting a summary so I can tell my problem to you guys. When it is running, it comes to the moment where the input of the username is selected, but it doesn't send the keys. I don't know what to do or change, so I ask for some help here.
def logIn(): # Log In Function.
try:
driver.switch_to_frame(driver.find_element_by_tag_name('iframe'))
sleep(5)
print("hii")
#username_in = driver.find_element_by_class_name("AnimatedForm__textInput")
username_in = driver.find_element_by_xpath("//*[#id='loginUsername']")
slow_typing(username_in, username)
pass_in = driver.find_element_by_xpath("//*[#id='loginPassword']")
slow_typing(pass_in, password)
pass_in.send_keys(Keys.ENTER)
sleep(5)
driver.switch_to_default_content()
except NoSuchElementException:
print("Llegue aqui xd xd")
driver.switch_to_default_content()
The login is inside an iframe witch to it first
To login within reddit using Selenium and python you need to:
Induce WebDriverWait for the frame to be available and switch to it.
Induce WebDriverWait for the desired element to be clickable.
You can use either of the following Locator Strategies:
Using XPATH:
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("prefs", { \
"profile.default_content_setting_values.notifications": 1
})
driver = webdriver.Chrome(options=options, executable_path=r'C:\WebDrivers\chromedriver.exe')
driver.get("https://www.reddit.com/")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//a[starts-with(#href, 'https://www.reddit.com/login')]"))).click()
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"//iframe[starts-with(#src, 'https://www.reddit.com/login')]")))
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//input[#id='loginUsername']"))).send_keys("debanjanb")
driver.find_element(By.XPATH, "//input[#id='loginPassword']").send_keys("zergcore")
driver.find_element(By.XPATH, "//button[#class='AnimatedForm__submitButton m-full-width']").click()
Note : You have to add the following imports :
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Browser Snapshot:
That happens because you don't click inside the input element - below you'll find a small change in your slow typing method, also if have a close look at their code they also have an animation for those fields, clicking the input should solve your issue.
def slow_typing(element, text):
element.click()
for character in text:
element.send_keys(character)
sleep(0.3)
The second recommendation is to use ids instead of XPath whenever you have the chance. Ids give you the best performance and also helps the framework to find the elements easily and not to parse the entire DOM to match the xpath.
So I am trying to scrape some information from a website, and when I try to get element by xpath I am getting an error "Unable to locate element" when the path that I provide is copied directly from the inspection tool. I tried a couple of things but it did not work, so I told my self I was going to try an easier path (TEST) but still don't work. Is it possible that the website does not show all the html code when inspecting?
Here is the code, with the website and the xpath that I tried.
URL_TRADER = 'https://www.tipranks.com/analysts/joseph-foresi?benchmark=none&period=yearly'
TEST = 'html/body/div[#id="app"]/div[#class="logged-out free"]/div[#class="client-components-app-app__wrapper undefined undefined"]'#/div/div[1]/div/div[2]/div/section/main/table/tbody/tr[3]/td[3]/div/div/div/div[1]/span'
X_PATH = '//*[#id="app"]/div/div/div[2]/div/div[1]/div/div[2]/div/section/main/table/tbody/tr[1]/td[3]/div/div/div/div[1]/span'
The main function is:
def trader_table():
# Loading Chrome and getting to the website
driver = webdriver.Chrome(executable_path='/usr/local/bin/chromedriver')
driver.get(URL_TRADER)
driver.implicitly_wait(10)
text = driver.find_element_by_xpath(X_PATH).get_attribute('innerHTML')
return text
I added a wait condition and used a css selector combination instead but this is the same as your xpath I think
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
url = 'https://www.tipranks.com/analysts/joseph-foresi?benchmark=none&period=yearly'
driver = webdriver.Chrome()
driver.get(url)
data = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".client-components-experts-infoTable-expertTable__table .client-components-experts-infoTable-expertTable__dataRow td:nth-child(3)"))).get_attribute('innerHTML')
print(data)
You have provided all the necessary details required to construct an answer but you didn't explicitly mention which element you were trying to get.
However, the commented out xpath within TEST gives us a hint you were after the Price Target and to extract the text within those elements as the elements are JavaScript enabled elements, you need to induce WebDriverWait for the visibility_of_all_elements_located() and you can use the following solution:
Code Block:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument('start-maximized')
options.add_argument('disable-infobars')
options.add_argument('--disable-extensions')
driver = webdriver.Chrome(chrome_options=options, executable_path=r'C:\WebDrivers\chromedriver.exe')
driver.get("https://www.tipranks.com/analysts/joseph-foresi?benchmark=none&period=yearly")
print([element.get_attribute('innerHTML') for element in WebDriverWait(driver,10).until(EC.visibility_of_all_elements_located((By.XPATH, "//div[#class='client-components-experts-infoTable-expertTable__isBuy']//span")))])
Console Output:
['$14.00', '$110.00', '$237.00', '$36.00', '$150.00', '$71.00', '$188.00', '$91.00', '$101.00', '$110.00']
I guess you are looking after price Here you go.
from selenium import webdriver
URL_TRADER = 'https://www.tipranks.com/analysts/joseph-foresi?benchmark=none&period=yearly'
TEST = 'html/body/div[#id="app"]/div[#class="logged-out free"]/div[#class="client-components-app-app__wrapper undefined undefined"]'#/div/div[1]/div/div[2]/div/section/main/table/tbody/tr[3]/td[3]/div/div/div/div[1]/span'
X_PATH = "//div[#class='client-components-experts-infoTable-expertTable__isBuy']/div/span"
def trader_table():
driver = webdriver.Chrome(executable_path='/usr/local/bin/chromedriver')
driver.get(URL_TRADER)
driver.implicitly_wait(10)
text = driver.find_element_by_xpath(X_PATH).get_attribute('innerHTML')
print(text)
return text
Edited for All rows
from selenium import webdriver
URL_TRADER = 'https://www.tipranks.com/analysts/joseph-foresi?benchmark=none&period=yearly'
X_PATH = "//div[#class='client-components-experts-infoTable-expertTable__isBuy']/div/span"
def trader_table():
driver = webdriver.Chrome(executable_path='/usr/local/bin/chromedriver')
driver.get(URL_TRADER)
driver.implicitly_wait(10)
list_ele= driver.find_elements_by_xpath(X_PATH)
price_list = []
for ele in list_ele:
print(ele.text)
price_list.append(ele.text)
return price_list
list=trader_table()
print(list)
from selenium import webdriver
import time
driver = webdriver.Chrome("your webdriver location")
driver.get("https://www.tipranks.com/analysts/joseph-foresi?benchmark=none&period=yearly")
time.sleep(10)
y = driver.find_element_by_id('app').get_attribute('innerHTML')
print(y)
prints full inner html