I am writing a script to play an online game and I need it to know when it is between rounds.
In my main script I have
import custom_conditions
wait = WebDriverWait(driver, 60)
waiting_for_next_game = wait.until(custom_conditions.wait_for_text_to_be(
(By.CLASS_NAME, "message-text"), "Wait for the next round"))
In my custom_conditions.py I have
from selenium.webdriver.support import expected_conditions as EC
class wait_for_text_to_be(object):
def __init__(self, locator, text_):
self.locator = locator
self.text = text_
def __call__(self, driver):
print('checking!')
element_text = EC._find_element(driver, self.locator).text
return element_text == self.text
That print statement never logs so I conclude that the condition is never called. It always times out and throws selenium.common.exceptions.TimeoutException.
The text I am trying to target is <span class="message-text">Wait for the next round</span> and it changes dynamically depending on the game state.
I have also tried using
wait.until(
EC.text_to_be_present_in_element((By.CLASS_NAME, "message-text"), "Wait for the next round"))
)
and the similar text_to_be_present_in_element_value.
I've also tried using (By.CSS_SELECTOR, "span.message-text")
Can anyone see where I am going wrong please?
Your code works for me. The only change I made is replace the find_element method call. That is because I avoid calling private methods.
from selenium.webdriver import Chrome
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.common.by import By
class WaitForTextToBe:
def __init__(self, locator: tuple, text: str) -> None:
self.locator = locator
self.text = text
def __call__(self, driver: Chrome) -> None:
print("checking...")
element = driver.find_element(*self.locator)
return self.text in element.text
driver = Chrome()
wait = WebDriverWait(driver, timeout=10)
driver.get("https://stackoverflow.com/questions/66725562/checking-when-text-changes-to-specific-text-with-selenium-custom-condition-not")
output = wait.until(WaitForTextToBe((By.ID, "question-header"), "Checking when text changes to specific text with Selenium. Custom condition not being called"))
print(output)
driver.quit()
Output
checking...
True
I'm trying to test out chromedriver and headless chrome on this site.
https://car.gocompare.com/vehicle
However when i try with normal chrome it works fine, I'll get a response for a car reg I've put in.
When I use headless chrome it says car cannot be found.
Does anyone know what could be up with it, is it the driver, or the website that is not producing the results back, it seems to work with firefox, so its a little strange.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
# Ability to run headless
from selenium.webdriver.firefox.options import Options as f_Options
from selenium.webdriver.chrome.options import Options as c_Options
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
# This allows you to download the page
from parsel import Selector
import time
import datetime
import os
class headlessbypass:
my_date_time = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
def my_set_up(self):
"""Executed before running, i.e. opening browser"""
# This is required for running on the pipeline
headless = os.getenv('HEADLESS_MODE')
def firefox_headless_func():
self.options = f_Options()
self.options.headless = True
binary = FirefoxBinary('c:/Users/Anish/AppData/Local/Mozilla Firefox/firefox.exe')
self.driver = webdriver.Firefox(firefox_binary=binary, executable_path='bin/geckodriver.exe', options=self.options)#, options=self.options, executable_path='bin/geckodriver.exe')
def chrome_headless_func():
self.options = c_Options()
#self.options.headless = True
self.options.add_argument("--window-size=1920, 1080")
#self.options.add_argument("--disable-extensions")
#self.options.add_argument("--proxy-server='direct://'")
#self.options.add_argument("--proxy-bypass-list=*")
#self.options.add_argument("--start-maximized")
self.options.add_argument('--headless')
self.options.add_argument('--disable-gpu')
#self.options.add_argument('--disable-dev-shm-usage')
#self.options.add_argument('--no-sandbox')
#self.options.add_argument('--ignore-certificate-errors')
#self.options.add_argument("--allow-insecure-localhost")
#self.options.add_argument("--allow-running-insecure-content")
#self.options.add_argument('--disable-browser-side-navigation')
self.options.add_argument("--enable-javascript")
self.options.add_argument("--user-agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:72.0) Gecko/20100101 Firefox/72.0'")
#self.options.binary_location = "C:/Program Files (x86)/Google/Chrome/Application/chrome.exe"
self.driver = webdriver.Chrome(options=self.options, executable_path='bin/chromedriver')
# This is for running locally; select/toggle what you want to run
headless_firefox = 0
headless_chrome = 0
chrome = 1
safari = 0
if headless:
firefox_headless_func()
else:
if headless_firefox:
firefox_headless_func()
elif headless_chrome:
chrome_headless_func()
elif chrome:
self.driver = webdriver.Chrome(executable_path='bin/chromedriver.exe')
else:
self.driver = webdriver.Firefox(executable_path='bin/geckodriver.exe')
self.driver.implicitly_wait(30)
self.driver.maximize_window()
main_window = self.driver.current_window_handle
self.driver.switch_to.window(main_window)
def my_tear_down(self):
"""Executed after running, i.e. closing browser"""
self.driver.quit()
def my_decorator(func):
"""my_set_up and my_tear_down decorator, so that my_set_up is run before and my_tear_down is run after"""
def wrapper(self, *args, **kwargs):
self.my_set_up()
func(self, *args, **kwargs)
self.my_tear_down()
return wrapper
#my_decorator
def visit_site(self):
"""Extract quotes"""
self.driver.get("https://mygocompare.gocompare.com/newcustomer/")
time.sleep(2)
print(self.driver.page_source)
# Enter registration number
reg_field = self.driver.find_element(By.XPATH, "//fieldset[1]/div[2]/div[2]/div/input")
reg_field.send_keys("AK47")
time.sleep(5)
print("Take screenshot")
html = self.driver.find_element_by_tag_name('html')
html.send_keys(Keys.PAGE_UP)
self.driver.save_screenshot("csv_json_files/firstpagescreenshot.png")
self.driver.find_element(By.XPATH, "//span[contains(text(), 'Find car')]").click()
time.sleep(2)
print("Take screenshot")
html = self.driver.find_element_by_tag_name('html')
html.send_keys(Keys.PAGE_UP)
self.driver.save_screenshot("csv_json_files/firstpagescreenshot2.png")
if __name__ == '__main__':
start_time = time.time()
scrape = headlessbypass()
scrape.visit_site()
This question already has answers here:
Selenium : How to stop geckodriver process impacting PC memory, without calling driver.quit()?
(1 answer)
PhantomJS web driver stays in memory
(1 answer)
Closed 3 years ago.
I wrote some code in python using selenium and multiprocessing to parallelize data collection. I am collecting some data from YouTube. I have a method which initiates a chrome webdriver. I used multiprocessing to collect data faster. The issue is that when the timeout for the multiprocessing is reached, the function with the chromedriver exits the function before driver.quit() command can register. This leads to the accumulation of idle chromedrivers which I cannot close within python since (to my knowledge) there is no way to reference them. Is there any way to close all chromedrivers without explicitly using the driver objects?
I wrote the code in python3. The chromedriver is Chrome version 72.
# Web related modules
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.common.exceptions import WebDriverException
from bs4 import BeautifulSoup
from urllib.request import urlopen
import html2text
# YouTube download module
from pytube import YouTube
# Multiprocessing tools
from multiprocessing import Lock, Manager, Queue, Pool
import multiprocessing as mp
# Misc modules
import time, re, pickle, os, shutil, argparse, glob, unicodedata, datetime
from argparse import RawTextHelpFormatter
# Irrelevant to the problem
def save_vids(vid_ids,save_loc):
print('Irrelevant Function')
# Function that generates the initial list of urls to visit
def explore_home(chromedriver_path,chrome_options,caps):
driver=webdriver.Chrome(executable_path=chromedriver_path,options=chrome_options,desired_capabilities=caps)
driver.get('https://www.youtube.com')
time.sleep(1)
html_source = driver.page_source
driver.close()
parts=html_source.split('{"webCommandMetadata":{"url":"/watch_videos?')[1:]
vids=[]
for part in parts:
part=part[part.find('video_ids=')+10:]
if part.find('\\u')!=-1:
if part.find('"')!=-1:
end=min(part.find('\\u'),part.find('"'))
else:
end=part.find('\\u')
elif part.find('"')!=-1:
end=part.find('"')
else:
print('fuck')
concat_list=part[:end]
vids.extend(concat_list.split('%2C'))
vids=[vid for vid in vids if len(re.findall(r'[0-9]|[a-z]|[A-Z]|_|-',vid))==11 and len(vid)==11]
return vids
# The function that generates chromedrivers and fails to quit if a multiprocessing timeout occurs.
def explore_vid(chromedriver_path,chrome_options,caps,vid,ads,save_loc,l):
driver=webdriver.Chrome(executable_path=chromedriver_path,options=chrome_options,desired_capabilities=caps)
driver.get('https://www.youtube.com/watch?v='+vid)
time.sleep(2)
sec_html = driver.page_source
soup=BeautifulSoup(sec_html,'lxml')
mydivs = str(soup.findAll("div", {"class": "style-scope ytd-watch-next-secondary-results-renderer"}))
inds=[m.start() for m in re.finditer('ytimg.com/vi/', mydivs)]
rec_vids=['https://www.youtube.com/watch?v='+mydivs[ind+13:ind+24] for ind in inds]
browser_log = driver.get_log('performance')
adInfo=find_ad(browser_log,vid)
if adInfo:
#Check if it is the first time this ad has been seen
adID=adInfo[0]
l.acquire()
try:
if adID in ads:
ads[adID][0].append(adInfo[1])
else:
try:
element = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".ytp-ad-button.ytp-ad-visit-advertiser-button.ytp-ad-button-link")))
element.click()
driver.switch_to.window(driver.window_handles[-1])
ad_website_URL=driver.current_url
ad_website_HTML=driver.page_source
clean_text=html2text.html2text(ad_website_HTML)
save_vids(adID,save_loc)
textName=os.path.join(save_loc,adID,'adwebsite.txt')
file = open(textName,"w")
file.write(ad_website_URL)
file.write('\n')
file.write(clean_text)
file.close()
ads[adID]=[[adInfo[1]],ad_website_URL]
except WebDriverException:
print('Button click failed: %s:%s' %(vid,adInfo[0]))
finally:
l.release()
# The quit command for the chrome driver
driver.quit()
return rec_vids
def find_ad(browser_log,vid):
for k in range(len(browser_log)):
if browser_log[k]['message'].find('adunit')!=-1 and browser_log[k]['message'].find(vid)!=-1:
ind=browser_log[k]['message'].find('https://www.youtube.com/get_video_info?html5=1&video_id=')
vid_id=browser_log[k]['message'][ind+56:ind+67]
return (vid_id,time.localtime())
return None
def positive_int(argument):
num=int(argument)
if num<1:
msg="Maximum depth parameter must be a positive number. You entered: %s" %argument
raise argparse.ArgumentTypeError(msg)
return num
def valid_pickle(argument):
file=str(argument)
if not file.endswith('.pickle'):
msg="ad_save_loc must end with .pickle You entered: %s" %file
raise argparse.ArgumentTypeError(msg)
return file
def valid_dir(argument):
directory=str(argument)
if not os.path.isdir(directory):
msg="vid_save_loc must be a valid directory. You entered: %s" %directory
raise argparse.ArgumentTypeError(msg)
return directory
if __name__ == '__main__':
# Argument Parsing
parser = argparse.ArgumentParser(description='Scrapes Youtube ads and advertising company websites. \nUse --restart to restart the scraping from scratch by deleting previous data\nExample Usage: python finalReader.py E:\ads\ads.pickle E:\ads --ncpu 2', formatter_class=RawTextHelpFormatter)
parser.add_argument('ad_save_loc',help='Save Location for Ad Main Dictionary', type=valid_pickle)
parser.add_argument('vid_save_loc',help='Save Location for Ad Videos', type=valid_dir)
parser.add_argument('chromedriver_path', help='Path of the chrome executable', type=str)
parser.add_argument('--restart', help='Restart collection', action="store_true", default=False, dest='restartCollection')
parser.add_argument('--ncpu', nargs='?', help='Number of cores for multiprocessing, 1 by default', default=1, type=int, dest='mpcpu')
parser.add_argument('--timeout',nargs='?', help='For how long the data collection will take place (in seconds), infinite by default', default=float('inf'), type=float, dest='time_limit')
parser.add_argument('--max_depth', nargs='?', help='Depth of Youtube exploration tree', default=1, type=positive_int, dest='search_depth')
args = parser.parse_args()
ad_save_loc=args.ad_save_loc
vid_save_loc=args.vid_save_loc
vid_save_loc=os.path.join(vid_save_loc,'ad_data')
mpcpu=max(args.mpcpu,1)
time_limit=args.time_limit
chromedriver_path=args.chromedriver_path
search_depth=args.search_depth
if not os.path.isdir(vid_save_loc):
os.mkdir(vid_save_loc)
if args.restartCollection:
for the_file in os.listdir(vid_save_loc):
file_path = os.path.join(vid_save_loc, the_file)
try:
if os.path.isfile(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
except Exception as e:
print(e)
if os.path.isfile(ad_save_loc):
os.remove(ad_save_loc)
ads={}
else:
if os.path.isfile(ad_save_loc):
pickle_in = open(ad_save_loc,"rb")
ads = pickle.load(pickle_in)
else:
ads={}
# Chrome Driver Options
chrome_options=Options()
chrome_options.add_argument('--mute-audio')
caps = DesiredCapabilities.CHROME
caps['loggingPrefs'] = {'performance': 'ALL'}
startTime=time.time()
currentTime=time.time()
# Data Collection Loop - Multiprocessing
while currentTime-startTime<time_limit:
print('Time from start: %s' %str(datetime.timedelta(seconds=currentTime-startTime)))
rec_vids=explore_home(chromedriver_path,chrome_options,caps)
while not rec_vids:
time.sleep(60)
rec_vids=explore_home(chromedriver_path,chrome_options,caps)
m = Manager()
lock = m.Lock()
pool = Pool(processes=mpcpu)
for depth in range(search_depth):
print('Depth %s' %depth)
multiple_results=[pool.apply_async(explore_vid, (chromedriver_path,chrome_options,caps,vid,ads,vid_save_loc,lock)) for vid in rec_vids]
branching_vids=[]
for res in multiple_results:
try:
branching_vids.append(res.get(timeout=30))
if time.time()-startTime<time_limit:
break
except mp.TimeoutError:
print('Timeout')
res_vids=branching_vids.copy()
pickle_out = open(ad_save_loc,"wb")
pickle.dump(ads, pickle_out)
pickle_out.close()
currentTime=time.time()
I am making a program for scrapping the Amazon websites mobile phones but my program is giving me timeout exception even after the page is loaded on time.
Here is my code
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import NoSuchElementException
from bs4 import BeautifulSoup
import urllib.request
class Amazon_all_mobile_scraper:
def __init__(self):
self.driver = webdriver.Firefox()
self.delay = 60
self.url = "https://www.amazon.in/mobile-phones/b/ref=sd_allcat_sbc_mobcomp_all_mobiles?ie=UTF8&node=1389401031"
def load_amazon(self):
self.driver.get(self.url)
try:
wait = WebDriverWait(self.driver,self.delay)
wait.until(EC.presence_of_element_located((By.CLASS_NAME,"acs-ln-link")))
print("Page is ready.")
except TimeoutException:
print("Took too much time to load!")
except:
print("Something went wrong in loading part!!")
def extract_list_of_mobiles(self):
try:
mobile_list = self.driver.find_element_by_xpath('//div[#class = "acs-ln-link"]')
print(mobile_list)
except NoSuchElementException:
print("Sorry, Unable to get the requested element")
scraper = Amazon_all_mobile_scraper()
scraper.load_amazon()
scraper.extract_list_of_mobiles()
Please help me to figure out whats wrong in this code.
Only changing from acs-ln-link to acs-ln-links will not do the trick. Your xpath should look more like '//div[contains(#class,"acs-ln-nav-expanded")]//*[#class="acs-ln-links"]//a'. This is, however, you can cope with to get the required output:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
class Amazon_all_mobile_scraper:
url = "https://www.amazon.in/mobile-phones/b/ref=sd_allcat_sbc_mobcomp_all_mobiles?ie=UTF8&node=1389401031"
def __init__(self):
self.driver = webdriver.Chrome()
self.wait = WebDriverWait(self.driver, 15)
def load_n_get_from_amazon(self):
self.driver.get(self.url)
mobile_list = self.wait.until(EC.presence_of_all_elements_located((By.XPATH,'//div[contains(#class,"acs-ln-nav-expanded")]//*[#class="acs-ln-links"]//a')))
return mobile_list
def __del__(self):
self.driver.close()
if __name__ == '__main__':
scraper = Amazon_all_mobile_scraper()
for item in scraper.load_n_get_from_amazon():
print(f'{item.text}\n{item.get_attribute("href")}\n')
The class wasn't matching "acs-ln-link" should be "acs-ln-links".
I'm VERY new to automation, fair warning.
I have an automation script to verify that the canonical tag of a page is present. I also need to assert that it's all lower case. Would I just create an assert after "driver.find_element..." to assert islower() ?
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import unittest
class homepage_canonical(unittest.TestCase):
def setUp(self):
global driver
driver = webdriver.Firefox()
driver.get("websiteurlhere")
def test_hpcanonical(self):
WebDriverWait(driver, 10)
driver.find_element_by_css_selector("link[href='canonicalurlhere'][rel='canonical']")
def tearDown(self):
driver.quit()
if __name__ == "__main__":
unittest.main()
I would use assertTrue :
def test_hpcanonical(self):
wait = WebDriverWait(driver, 10)
element = driver.find_element_by_css_selector("link[href='canonicalurlhere'][rel='canonical']")
self.assertTrue(element.text.islower())