Downloading files with selenium (python3) on ubuntu server 18.04

Downloading files with selenium (python3) on ubuntu server 18.04 - python-3.x

I wrote a simple script using the user Fayçal's code from
Downloading with chrome headless and selenium the script worked on my Mac but when I went to run it on the server nothing was downloaded.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_experimental_option("prefs", {
"download.default_directory": "/download/path/",
"download.prompt_for_download": False,
})
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(options=chrome_options)
driver.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
params = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': "/Download/path/"}}
command_result = driver.execute("send_command", params)
driver.set_page_load_timeout(10)
#navigate to advanced search
driver.get(loginUrl)
driver.find_element_by_name("login_username").send_keys("username")
driver.find_element_by_name("login_password").send_keys("password")
driver.find_element_by_name("login").click()
driver.get(targetUrl)
file = driver.find_element_by_xpath("/html/body/table/tbody/tr/td/table/tbody/tr/td/table/tbody/tr/td/table[2]/tbody/tr/td/table/tbody/tr[2]/td[6]/a")
file.click()
The script runs and does not return any errors nothing but the target path remains empty.

Related

Python selenium cannot saved to default download path

Selenium 4.6.0
Python 3.10.9
If I'm using headless mode then this issue won't happen, files can be downloaded to Downloads/test folder.
But if it's non-headless mode it ignores the options prefs and download to Downloads folder.
options = webdriver.ChromeOptions()
ua = UserAgent()
userAgent = ua.random
print(userAgent)
options.add_argument('--user-agent=User-Agent: {}'.format(userAgent))
# options.add_argument('--headless')
options.add_argument("--user-data-dir=C:/Users/Myname/AppData/Local/Google/Chrome/User Data")
options.add_argument("--profile-directory=Profile 2")
prefs = {"download.default_directory" : r"C:\Users\Myname\Downloads\test\\"}
options.add_experimental_option("prefs", prefs)
self.webdriver = webdriver.Chrome(executable_path="../chromedriver_win32/chromedriver.exe",options=options)
self.webdriver.maximize_window()
print ("Headless Chrome Initialized")

Being not able to set multiple chrome options at the same time (blocking notifications and cookies) in selenium and python

the code is only including blocking notifications:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from time import sleep
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.default_content_setting_values.notifications" : 2}
chrome_options.add_experimental_option("prefs",prefs)
driver=webdriver.Chrome(executable_path="C:\\Users\\Desktop\\chromedriver.exe",chrome_options=chrome_options)
driver.maximize_window()
driver.get("https://www.hurriyet.com.tr/")
sleep(5)
Hello friends, I can not be able to set multiple chrome options (blocking notifications and cookies) at the same time. How can I set the blocking notifications and the cookies at the same time? Is tehere any solution I want to learn. I think that I could use somehow these together but I couldn't. :
"prefs", {"profile.default_content_settings.cookies": 2} "prefs", {"profile.default_content_setting_values.notifications" : 2 }

Why not something like this :
executable_path = r"C:\\Users\\Selenium+Python\\chromedriver.exe"
options.add_experimental_option("prefs", {"profile.default_content_setting_values.notifications": 2})
options.add_experimental_option("prefs", {"profile.default_content_settings.cookies": 2})
options.add_argument("start-maximized")
driver = webdriver.Chrome(executable_path, options=options)

Does Selenium Grid 4 for Firefox lack extension addon option?

I have Selenium working well locally, adding extensions with the following setup.
from selenium import webdriver
from selenium.webdriver.firefox.options import Options as firefox_options
def init_firefox(self,threadname,headless,images_off):
if headless == True:
options=firefox_options()
options.add_argument("--headless")
driver = webdriver.Firefox(options=options,executable_path=(r"C:\Users\charl\OneDrive\python\gecko\geckodriver.exe"))
else:
options=firefox_options()
driver = webdriver.Firefox(executable_path=(r"C:\Users\charl\OneDrive\python\gecko\geckodriver.exe"))
extension_dir = ('C:\\Users\\charl\\OneDrive\\python\\gecko\\extensions\\')
extensions = [
'firefox#vid.io.xpi',
'noimages.xpi',
]
for extension in extensions:
driver.install_addon(extension_dir + extension, temporary=True)
self.close_tab(driver)
self.login(driver)
return driver
But when I try the same on Selenium Grid 4 using this code:-
from selenium import webdriver
from selenium.webdriver.firefox.options import Options as firefox_options
def init_firefox_remote(self,threadname,headless,images_off):
if headless == True:
#options = Options()
options=firefox_options()
options.add_argument("--headless")
driver = webdriver.Remote(command_executor='http://xx.xxx.xx.xx:4444/wd/hub', options=options)
else:
options=firefox_options()
driver = webdriver.Remote(command_executor='http://xx.xx.xx.xx:4444/wd/hub', options=options)
extension_dir = ('/dev/shm/extensons/')
extensions = [
'firefox#vid.io.xpi',
'noimages.xpi',
]
for extension in extensions:
driver.install_addon(extension_dir + extension, temporary=True)
self.close_tab(driver)
self.login(driver)
return driver
I get an error:-
AttributeError: 'WebDriver' object has no attribute 'install_addon'
The version of Selenium Grid I am using is created like this
$ docker run -d -p 4444:4444 -v /dev/shm:/dev/shm selenium/standalone-firefox:4.0.0-beta-3-prerelease-20210321
Any ideas? Does Selenium Grid for Firefox lack the install extension option?

Installing addons in a remote Firefox browser is done by creating a Firefox profile and adding the extension there:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options as FirefoxOptions
profile = webdriver.FirefoxProfile()
options = FirefoxOptions()
profile.add_extension("/path/to/extension.xpi")
driver = webdriver.Remote(command_executor="http://xx.xx.xx.xx:4444/wd/hub",
options=options,
browser_profile=profile)
I don't think it is possible to load a temporary addon in a remote firefox browser.

install_addon is only available for local webdrivers. A simple workaround is required when using remote webdrivers, as mentioned in this issue.
More specifically, change this line:
driver.install_addon(extension_dir + extension, temporary=True)
to
webdriver.Firefox.install_addon(driver, extension_dir + extension, temporary=True)
The full code should look like the following:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options as firefox_options
def init_firefox_remote(self,threadname,headless,images_off):
if headless == True:
#options = Options()
options=firefox_options()
options.add_argument("--headless")
driver = webdriver.Remote(command_executor='http://xx.xxx.xx.xx:4444/wd/hub', options=options)
else:
options=firefox_options()
driver = webdriver.Remote(command_executor='http://xx.xx.xx.xx:4444/wd/hub', options=options)
extension_dir = ('/dev/shm/extensons/')
extensions = [
'firefox#vid.io.xpi',
'noimages.xpi',
]
for extension in extensions:
webdriver.Firefox.install_addon(driver, extension_dir + extension, temporary=True)
self.close_tab(driver)
self.login(driver)
return driver
I have opened a pull request to the Selenium Docs to clarify such usages.

Setting up tor with selenium web driver. (Windows)

i have tried to set up my tor with selenium but it continuously throws up exceptions.
I have tried setting up the binary as well as profiles but no luck.
from selenium import webdriver
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
import os
torexe = os.popen(r'C:\Users\Jawad Ahmad Khan\Desktop\Tor Browser\Browser\firefox.exe')
profile = FirefoxProfile(r'C:\Users\Jawad Ahmad Khan\Desktop\Tor Browser\Browser\TorBrowser\Data\Browser\profile.default')
profile.set_preference('network.proxy.type', 1)
profile.set_preference('network.proxy.socks', '127.0.0.1')
profile.set_preference('network.proxy.socks_port', 9050)
profile.set_preference("network.proxy.socks_remote_dns", False)
profile.update_preferences()
driver = webdriver.Firefox(firefox_profile= profile,
executable_path=r'D:\geckodriver\geckodriver.exe')
driver.get("http://check.torproject.org")
This is the error message:
selenium.common.exceptions.WebDriverException: Message: Reached error page: about:neterror?e=proxyConnectFailure&u=https%3A//check.torproject.org/&c=UTF-8&f=regular&d=Firefox%20is%20configured%20to%20use%20a%20proxy%20server%20that%20is%20refusing%20connections.

This works on my Mac with Chrome with Tor.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def get_chrome_webdriver():
tor_proxy = "127.0.0.1:9150"
chrome_options = Options()
chrome_options.add_argument("--test-type")
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_argument('--disable-extensions')
chrome_options.add_argument('disable-infobars')
chrome_options.add_argument("--incognito")
chrome_options.add_argument('--proxy-server=socks5://%s' % tor_proxy)
driver = webdriver.Chrome('/usr/local/bin/chromedriver', options=chrome_options)
return driver
def get_chrome_browser(url):
browser = get_chrome_webdriver()
browser.get(url)
return browser
get_chrome_browser('https://check.torproject.org/')

Python: Is it possible to download ENTIRE web page in PhantomJS

I have used PhantomJS for scraping purpose. I would like to know about possibility of download all contents of a URL(inclduing Images, CSS and JS) and save locally for browsing?

# -*- coding: utf-8 -*-
from selenium import webdriver #for cookies collections after all AJAX/JS being executed
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36")
driver = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any', '--web-security=false'])
driver.set_window_size(1366,768)
driver.get('http://stackoverflow.com')
driver.page_source
This is complete code that uses Python Selenium + PhantomJS and at the end you have complete page source.

we can use evaluate() function to get the content. I use this in nodejs.
var webPage = require('webpage');
var page = webPage.create();
page.open('http://google.com', function(status) {
var title = page.evaluate(function() {
return document.title;
});
console.log(title);
phantom.exit();
});`

In the case of wget being installed, this task is rather easy:
domain = "www.google.de"
from subprocess import call
call(["wget", "-mk", domain])

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Downloading files with selenium (python3) on ubuntu server 18.04 - python-3.x

Related

Python selenium cannot saved to default download path

Being not able to set multiple chrome options at the same time (blocking notifications and cookies) in selenium and python

Does Selenium Grid 4 for Firefox lack extension addon option?

Setting up tor with selenium web driver. (Windows)

Python: Is it possible to download ENTIRE web page in PhantomJS

Categories

Resources