Start playwright driver session but not incognito - python-3.x

Currently i open a new browser session using the code below, but it always starts as incognito, can I start a new chromium session but not as incognito?:
from behave import *
from playwright.sync_api import sync_playwright
import time
class session_driver:
driver = None
def open_browser(self, url):
playW_sync_instace = sync_playwright().start()
global browser
browser = playW_sync_instace.chromium.launch(headless=False)
browser.new_context(record_video_dir="videos/",
record_video_size={"width": 640, "height": 480})
self.driver = browser.new_page()
self.driver.goto(url)

from playwright.sync_api import sync_playwright
import os
user_dir = '/tmp/playwright'
if not os.path.exists(user_dir):
os.makedirs(user_dir)
with sync_playwright() as p:
browser = p.chromium.launch_persistent_context(user_dir, headless=False)
page = browser.new_page()
page.goto('https://playwright.dev/python', wait_until='domcontentloaded')

This is what I have done in Typescript code base. But this would leverage the existing logged-in session, and it would not ask for fresh user login.
const userDataDir = 'C:/Users/yuv****dir/AppData/Local/Temp/tjwmm3m0.hmt';
context = await chromium.launchPersistentContext(userDataDir,{
headless: false,
args: [ ]
});
I hope this is like above only what is there in Python code.

Related

How can I found all secure and third party cookies on any website using python selenium

I was tried different approach to get secure and third party cookies. Pasting approach I was tried.
With cookiesjar:
import urllib
import http.cookiejar
url = "https://www.google.com"
cookie_jar = http.cookiejar.CookieJar()
url_opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookie_jar))
url_opener.open(url)
for cookie in cookie_jar:
print(cookie)
with Requests module:
import requests
r = requests.get('http://www.google.com')
for cookie in r.cookies:
print(cookie.__dict__)
print(cookie.secure)
with PhantomJS:
from selenium import webdriver
cookie_file_path = 'cookie.txt'
args = ['--cookies-file={}'.format(cookie_file_path)]
driver = webdriver.PhantomJS(service_args=args)
driver.get('http://google.com')
driver.get('http://facebook.com')
with open(cookie_file_path) as f:
print(f.read())
with Selenium:
driver = webdriver.Chrome(driver_exe, options=options, desired_capabilities=capabilities)
driver.get('https://google.com')
cookies = driver.get_cookies()
for cookie in cookies:
print(cookie)
Any help or docs related to this would be appriacted.
Thanks :)
Does this give you cookies?
import requests
r = requests.get('http://www.google.com')
print(r.cookies.get_dict())
we can use selenium devtool to extract all the cookies present on the website.
Ref:
[1]: https://www.selenium.dev/documentation/webdriver/bidirectional/chrome_devtools/

How can I start a webdriver instance in flask server and use it globally

I have this function which I want it to be called in the main function in flask app, and want to pass it to another class. The catch here is I want to create the instance only once while starting the server and have to pass it globally to all the classes
def webdriver_instance():
from selenium import webdriver
from selenium.webdriver import FirefoxOptions
opts = FirefoxOptions()
opts.add_argument("--headless")
opts.add_argument("start-maximized")
opts.add_argument("disable-infobars")
opts.add_argument("--disable-extensions")
opts.add_argument('--no-sandbox')
opts.add_argument('--disable-application-cache')
opts.add_argument('--disable-gpu')
opts.add_argument("--disable-dev-shm-usage")
browser = webdriver.Firefox(firefox_options=opts)
return browser
you are looking for re-using selenium sessions. You can start the browser on once and store the session id and executor url somewhere and grab it, when needed:
from selenium import webdriver
driver = webdriver.Firefox()
executor_url = driver.command_executor._url
session_id = driver.session_id
driver.get("http://tarunlalwani.com")
print session_id
print executor_url
def create_driver_session(session_id, executor_url):
from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
# Save the original function, so we can revert our patch
org_command_execute = RemoteWebDriver.execute
def new_command_execute(self, command, params=None):
if command == "newSession":
# Mock the response
return {'success': 0, 'value': None, 'sessionId': session_id}
else:
return org_command_execute(self, command, params)
# Patch the function before creating the driver object
RemoteWebDriver.execute = new_command_execute
new_driver = webdriver.Remote(command_executor=executor_url, desired_capabilities={})
new_driver.session_id = session_id
# Replace the patched function with original function
RemoteWebDriver.execute = org_command_execute
return new_driver
driver2 = create_driver_session(session_id, executor_url)
print driver2.current_url
source: https://tarunlalwani.com/post/reusing-existing-browser-session-selenium/

How is my scraper being detected immediately by a search engine

I am using Scrapy with Selenium in order to scrape urls from a particular search engine (ekoru). Here is a screenshot of the response I get back from the search engine with just ONE request:
Since I am using selenium, I'd assume that my user-agent should be fine so what else could the issue be that makes the search engine detect the bot immediately?
Here is my code:
class CompanyUrlSpider(scrapy.Spider):
name = 'company_url'
def start_requests(self):
yield SeleniumRequest(
url='https://ekoru.org',
wait_time=3,
screenshot=True,
callback=self.parseEkoru
)
def parseEkoru(self, response):
driver = response.meta['driver']
search_input = driver.find_element_by_xpath("//input[#id='fld_q']")
search_input.send_keys('Hello World')
search_input.send_keys(Keys.ENTER)
html = driver.page_source
response_obj = Selector(text=html)
links = response_obj.xpath("//div[#class='serp-result-web-title']/a")
for link in links:
yield {
'ekoru_URL': link.xpath(".//#href").get()
}
Sometimes you need to pass other parameters in order to avoid being detected by any webpage.
Let me share a code you can use:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
#This code helps to simulate a "human being" visiting the website
chrome_options = Options()
chrome_options.add_argument('--start-maximized')
driver = webdriver.Chrome(options=chrome_options, executable_path=r"chromedriver")
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source":
"""Object.defineProperty(navigator,
'webdriver', {get: () => undefined})"""})
url = 'https://ekoru.org'
driver.get(url)
Yields (Check out below the bar address "Chrome is being controlled..."):

how can I use selenium in python to send direct messages in instagram?

Hi i'm a newbie and i want to try and make an instagram bot using selenium to send birthday messages to my friends and i ran into a problem when I learned that to send dms in chrome you have to inspect the page and click on the mobile option then select a mobile device on which to view it then reload the page; all of which i don't know how to do. i have searched for an answer but have not found any.
from selenium import webdriver
from time import sleep
from secrets import pw
from secrets import username
class InstaBot:
def __init__(self, username, pw):
self.driver = webdriver.Chrome('D:\Movies/chromedriver.exe')
self.username = username
self.driver.get("https://instagram.com")
sleep(6)
self.driver.find_element_by_xpath('/html/body/div[1]/section/main/article/div[2]/div[1]/div/form/div[2]/div/label/input')\
.send_keys(username)
self.driver.find_element_by_xpath("//input[#name=\"password\"]")\
.send_keys(pw)
self.driver.find_element_by_xpath('//button[#type="submit"]')\
.click()
sleep(4)
self.driver.find_element_by_xpath("//button[contains(text(), 'Not Now')]")\
.click()
#keeps window open
while x == 1:
sleep(10)
x = 1
InstaBot(username, pw)
I would rather go for appium than using selenium for this, in browser I dont think they allow to send direct messages. Use appium to automate the instagram app
Refer this
https://medium.com/the-mission/how-to-automate-an-effective-instagram-bot-that-isnt-spammy-b2146a2c0b19
https://github.com/zhehaowang/themistalkles/blob/master/README.md
mobile_emulation = { "deviceName": "Nexus 5" }
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument ("lang = en_us")
chrome_options.add_experimental_option("mobileEmulation", mobile_emulation)

How to store websockets at the server side with aiohttp?

I'm trying to make simple web-chat with rooms using aiohttp. Can you please advise me how to store my websockets connections? Some code below are simplified a bit. I'm getting an EOF error from socket time by time (and I can reproduce it), but i don't know why. So, i got a question, am i do it right? Should i close websockets everytime when i reload or follow to link? If not, so, how i will connect client with my already opened socket? Sorry for my eng ^^ thanks.
app.py
import asyncio
import aiohttp_jinja2
import jinja2
import hashlib
import collections
import os
from aiohttp_session import session_middleware
from aiohttp_session.cookie_storage import EncryptedCookieStorage
from aiohttp import web
from routes import routes
from middlewares import authorize
from motor import motor_asyncio as ma
from settings import *
basedir = os.path.dirname(os.path.realpath(__file__))
photo_dir = os.path.join(basedir, 'static/photo/')
async def on_shutdown(app):
for ws in app['websockets']:
await ws.close(code=1001, mesage='Server shutdown')
middle = [
session_middleware(EncryptedCookieStorage(hashlib.sha256(bytes(SECRET_KEY, 'utf-8')).digest())),
authorize
]
app = web.Application(middlewares=middle)
aiohttp_jinja2.setup(app, loader=jinja2.FileSystemLoader('templates'))
for route in routes:
app.router.add_route(*route[:3], name=route[3])
app['static_root_url'] = '/static'
app.router.add_static('/static', 'static', name='static')
app.client = ma.AsyncIOMotorClient(MONGO_HOST)
app.db = app.client[MONGO_DB_NAME]
app.on_cleanup.append(on_shutdown)
app['websockets'] = collections.defaultdict(list)
app['online'] = {}
app['photo_dir'] = photo_dir
web.run_app(app)
and websocket handler
class CompanyWebSocket(web.View):
async def get(self):
ws = web.WebSocketResponse()
await ws.prepare(self.request)
session = await get_session(self.request)
self_id = session.get('user')
login = session.get('login')
company_id = self.request.rel_url.query.get('company_id')
message = Message(self.request.app.db)
company = Company(self.request.app.db)
my_companys = await company.get_company_by_user(self_id)
for c in my_companys:
self.request.app['websockets'][str(c['_id'])].append(ws)
async for msg in ws:
if msg.type == WSMsgType.TEXT:
if msg.data == 'close':
await ws.close()
else:
await message.save_for_company({'data': 'data'})
mess = {
'data': 'data'
}
# send mess to users in company
for company_ws in self.request.app['websockets'][company_id]:
await company_ws.send_json(mess)
elif msg.type == WSMsgType.ERROR:
log.debug('ws connection closed with exception %s' % ws.exception())
try:
self.request.app['websockets'][company_id].remove(ws)
except:
pass
for _ws in self.request.app['websockets'][company_id]:
await _ws.send_json({'user': login, 'type': 'left'})
return ws

Resources