Experimental Chrome Options in Selenium Node.js

Experimental Chrome Options in Selenium Node.js - node.js

I need to convert these python lines:
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(executable_path='<path-to-chrome>', options=options)
From this issue: Python selenium: DevTools listening on ws://127.0.0.1
I don't know how to add experimental options in Node.js, I can't find any documentation.

I also couldn't find the experimental chrome options in Selenium Node.js. But if you want to exclude a chrome switch I think you can do it like this:
const {Builder} = require('selenium-webdriver')
const chrome = require('selenium-webdriver/chrome');
const chromeOptions = new chrome.Options()
chromeOptions.excludeSwitches("enable-logging")
let driver = await new Builder().forBrowser('chrome').setChromeOptions(chromeOptions).build();

Related

Python selenium cannot saved to default download path

Selenium 4.6.0
Python 3.10.9
If I'm using headless mode then this issue won't happen, files can be downloaded to Downloads/test folder.
But if it's non-headless mode it ignores the options prefs and download to Downloads folder.
options = webdriver.ChromeOptions()
ua = UserAgent()
userAgent = ua.random
print(userAgent)
options.add_argument('--user-agent=User-Agent: {}'.format(userAgent))
# options.add_argument('--headless')
options.add_argument("--user-data-dir=C:/Users/Myname/AppData/Local/Google/Chrome/User Data")
options.add_argument("--profile-directory=Profile 2")
prefs = {"download.default_directory" : r"C:\Users\Myname\Downloads\test\\"}
options.add_experimental_option("prefs", prefs)
self.webdriver = webdriver.Chrome(executable_path="../chromedriver_win32/chromedriver.exe",options=options)
self.webdriver.maximize_window()
print ("Headless Chrome Initialized")

Does Selenium Grid 4 for Firefox lack extension addon option?

I have Selenium working well locally, adding extensions with the following setup.
from selenium import webdriver
from selenium.webdriver.firefox.options import Options as firefox_options
def init_firefox(self,threadname,headless,images_off):
if headless == True:
options=firefox_options()
options.add_argument("--headless")
driver = webdriver.Firefox(options=options,executable_path=(r"C:\Users\charl\OneDrive\python\gecko\geckodriver.exe"))
else:
options=firefox_options()
driver = webdriver.Firefox(executable_path=(r"C:\Users\charl\OneDrive\python\gecko\geckodriver.exe"))
extension_dir = ('C:\\Users\\charl\\OneDrive\\python\\gecko\\extensions\\')
extensions = [
'firefox#vid.io.xpi',
'noimages.xpi',
]
for extension in extensions:
driver.install_addon(extension_dir + extension, temporary=True)
self.close_tab(driver)
self.login(driver)
return driver
But when I try the same on Selenium Grid 4 using this code:-
from selenium import webdriver
from selenium.webdriver.firefox.options import Options as firefox_options
def init_firefox_remote(self,threadname,headless,images_off):
if headless == True:
#options = Options()
options=firefox_options()
options.add_argument("--headless")
driver = webdriver.Remote(command_executor='http://xx.xxx.xx.xx:4444/wd/hub', options=options)
else:
options=firefox_options()
driver = webdriver.Remote(command_executor='http://xx.xx.xx.xx:4444/wd/hub', options=options)
extension_dir = ('/dev/shm/extensons/')
extensions = [
'firefox#vid.io.xpi',
'noimages.xpi',
]
for extension in extensions:
driver.install_addon(extension_dir + extension, temporary=True)
self.close_tab(driver)
self.login(driver)
return driver
I get an error:-
AttributeError: 'WebDriver' object has no attribute 'install_addon'
The version of Selenium Grid I am using is created like this
$ docker run -d -p 4444:4444 -v /dev/shm:/dev/shm selenium/standalone-firefox:4.0.0-beta-3-prerelease-20210321
Any ideas? Does Selenium Grid for Firefox lack the install extension option?

Installing addons in a remote Firefox browser is done by creating a Firefox profile and adding the extension there:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options as FirefoxOptions
profile = webdriver.FirefoxProfile()
options = FirefoxOptions()
profile.add_extension("/path/to/extension.xpi")
driver = webdriver.Remote(command_executor="http://xx.xx.xx.xx:4444/wd/hub",
options=options,
browser_profile=profile)
I don't think it is possible to load a temporary addon in a remote firefox browser.

install_addon is only available for local webdrivers. A simple workaround is required when using remote webdrivers, as mentioned in this issue.
More specifically, change this line:
driver.install_addon(extension_dir + extension, temporary=True)
to
webdriver.Firefox.install_addon(driver, extension_dir + extension, temporary=True)
The full code should look like the following:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options as firefox_options
def init_firefox_remote(self,threadname,headless,images_off):
if headless == True:
#options = Options()
options=firefox_options()
options.add_argument("--headless")
driver = webdriver.Remote(command_executor='http://xx.xxx.xx.xx:4444/wd/hub', options=options)
else:
options=firefox_options()
driver = webdriver.Remote(command_executor='http://xx.xx.xx.xx:4444/wd/hub', options=options)
extension_dir = ('/dev/shm/extensons/')
extensions = [
'firefox#vid.io.xpi',
'noimages.xpi',
]
for extension in extensions:
webdriver.Firefox.install_addon(driver, extension_dir + extension, temporary=True)
self.close_tab(driver)
self.login(driver)
return driver
I have opened a pull request to the Selenium Docs to clarify such usages.

How is my scraper being detected immediately by a search engine

I am using Scrapy with Selenium in order to scrape urls from a particular search engine (ekoru). Here is a screenshot of the response I get back from the search engine with just ONE request:
Since I am using selenium, I'd assume that my user-agent should be fine so what else could the issue be that makes the search engine detect the bot immediately?
Here is my code:
class CompanyUrlSpider(scrapy.Spider):
name = 'company_url'
def start_requests(self):
yield SeleniumRequest(
url='https://ekoru.org',
wait_time=3,
screenshot=True,
callback=self.parseEkoru
)
def parseEkoru(self, response):
driver = response.meta['driver']
search_input = driver.find_element_by_xpath("//input[#id='fld_q']")
search_input.send_keys('Hello World')
search_input.send_keys(Keys.ENTER)
html = driver.page_source
response_obj = Selector(text=html)
links = response_obj.xpath("//div[#class='serp-result-web-title']/a")
for link in links:
yield {
'ekoru_URL': link.xpath(".//#href").get()
}

Sometimes you need to pass other parameters in order to avoid being detected by any webpage.
Let me share a code you can use:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
#This code helps to simulate a "human being" visiting the website
chrome_options = Options()
chrome_options.add_argument('--start-maximized')
driver = webdriver.Chrome(options=chrome_options, executable_path=r"chromedriver")
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source":
"""Object.defineProperty(navigator,
'webdriver', {get: () => undefined})"""})
url = 'https://ekoru.org'
driver.get(url)
Yields (Check out below the bar address "Chrome is being controlled..."):

Downloading files with selenium (python3) on ubuntu server 18.04

I wrote a simple script using the user Fayçal's code from
Downloading with chrome headless and selenium the script worked on my Mac but when I went to run it on the server nothing was downloaded.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_experimental_option("prefs", {
"download.default_directory": "/download/path/",
"download.prompt_for_download": False,
})
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(options=chrome_options)
driver.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
params = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': "/Download/path/"}}
command_result = driver.execute("send_command", params)
driver.set_page_load_timeout(10)
#navigate to advanced search
driver.get(loginUrl)
driver.find_element_by_name("login_username").send_keys("username")
driver.find_element_by_name("login_password").send_keys("password")
driver.find_element_by_name("login").click()
driver.get(targetUrl)
file = driver.find_element_by_xpath("/html/body/table/tbody/tr/td/table/tbody/tr/td/table/tbody/tr/td/table[2]/tbody/tr/td/table/tbody/tr[2]/td[6]/a")
file.click()
The script runs and does not return any errors nothing but the target path remains empty.

Setting up tor with selenium web driver. (Windows)

i have tried to set up my tor with selenium but it continuously throws up exceptions.
I have tried setting up the binary as well as profiles but no luck.
from selenium import webdriver
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
import os
torexe = os.popen(r'C:\Users\Jawad Ahmad Khan\Desktop\Tor Browser\Browser\firefox.exe')
profile = FirefoxProfile(r'C:\Users\Jawad Ahmad Khan\Desktop\Tor Browser\Browser\TorBrowser\Data\Browser\profile.default')
profile.set_preference('network.proxy.type', 1)
profile.set_preference('network.proxy.socks', '127.0.0.1')
profile.set_preference('network.proxy.socks_port', 9050)
profile.set_preference("network.proxy.socks_remote_dns", False)
profile.update_preferences()
driver = webdriver.Firefox(firefox_profile= profile,
executable_path=r'D:\geckodriver\geckodriver.exe')
driver.get("http://check.torproject.org")
This is the error message:
selenium.common.exceptions.WebDriverException: Message: Reached error page: about:neterror?e=proxyConnectFailure&u=https%3A//check.torproject.org/&c=UTF-8&f=regular&d=Firefox%20is%20configured%20to%20use%20a%20proxy%20server%20that%20is%20refusing%20connections.

This works on my Mac with Chrome with Tor.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def get_chrome_webdriver():
tor_proxy = "127.0.0.1:9150"
chrome_options = Options()
chrome_options.add_argument("--test-type")
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_argument('--disable-extensions')
chrome_options.add_argument('disable-infobars')
chrome_options.add_argument("--incognito")
chrome_options.add_argument('--proxy-server=socks5://%s' % tor_proxy)
driver = webdriver.Chrome('/usr/local/bin/chromedriver', options=chrome_options)
return driver
def get_chrome_browser(url):
browser = get_chrome_webdriver()
browser.get(url)
return browser
get_chrome_browser('https://check.torproject.org/')

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Experimental Chrome Options in Selenium Node.js - node.js

Related

Python selenium cannot saved to default download path

Does Selenium Grid 4 for Firefox lack extension addon option?

How is my scraper being detected immediately by a search engine

Downloading files with selenium (python3) on ubuntu server 18.04

Setting up tor with selenium web driver. (Windows)

Categories

Resources