Python selenium can't run with chrome v93 - python-3.x

yesterday i can run my code successfully with chrome v92.0.4515.107.But after chrome auto updating to v93 today.here's a part of code
class CNVD(object):
def __init__(self):
self.options=webdriver.ChromeOptions()
self.options.add_experimental_option("detach", True)
self.options.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging'])
self.driver = webdriver.Chrome(chrome_options=self.options)
# self.driver.maximize_window()
def login(self):
#headers设置,缺少会导致session实效
headers = {
'Host':'www.cnvd.org.cn',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36',
'Accept':'*/*',
'Accept-Language':'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Accept-Encoding':'gzip, deflate',
'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With':'XMLHttpRequest',
'Origin':'https://www.cnvd.org.cn',
'Referer':'https://www.cnvd.org.cn/user/login'
}
data = 'password=xxxx'
response = session.post(url="https://www.cnvd.org.cn/user/doLogin/loginForm",data=data,headers=headers)
response.encoding='utf-8'
self.driver.get("https://www.cnvd.org.cn")
self.driver.add_cookie({'name':'__jsl_clearance_s','value':session.cookies.get_dict()['__jsl_clearance_s']})
self.driver.add_cookie({'name':'JSESSIONID','value':session.cookies.get_dict()['JSESSIONID']})
self.driver.add_cookie({'name':'__jsluid_s','value':session.cookies.get_dict()['__jsluid_s']})
self.driver.get("https://www.cnvd.org.cn/user/doLogin/loginForm")
i'm sure it can run with chrome v92.0.4515.107.
could somebody help me pls QAQ

If your chrome has automatically updated, then doing the following two steps can hopefully solve your problem.
Download updated version of Chrome driver, which in your case in ChromeDriver 93.0.4577.15.
Update your following line of your code
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'
You can obtain the User-Agent from network tab in chrome inspect.

Related

Python requests code worked yesterday but now returns TooManyRedirects: Exceeded 30 redirects

I am trying to get the data from a site using requests using this simple code (running on Google Colab):
import requests, json
def GetAllStocks():
url = 'https://iboard.ssi.com.vn/dchart/api/1.1/defaultAllStocks'
res = requests.get(url)
return json.loads(res.text)
This worked well until this morning and I could not figure out why it is returning "TooManyRedirects: Exceeded 30 redirects." error now.
I can still get the data just by browsing the url directly from Google Chrome in Incognito mode so I donot think this is because of the Cookies. I tried passing the whole headers but still it does not work. I tried passing 'allow_redirects=False' and the returned status_code is 302.
I am not sure if there is anything I could try as this is so strange to me.
Any guidance is much appreciated. Thank you very much!
You need to send user-agent header to mimic a regular browser behaviour.
import requests, json, random
def GetAllStocks():
user_agents = [
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.79 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:101.0) Gecko/20100101 Firefox/101.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:77.0) Gecko/20190101 Firefox/77.0",
"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:77.0) Gecko/20100101 Firefox/77.0",
]
headers = {
"User-Agent": random.choice(user_agents),
"Accept": "application/json",
}
url = "https://iboard.ssi.com.vn/dchart/api/1.1/defaultAllStocks"
res = requests.get(url, headers=headers)
return json.loads(res.text)
data = GetAllStocks()
print(data)

Redirect Django not working and not redirecting

views.py:
def showLoginPage(request):
if request.method == "POST":
try:
body_unicode = request.body.decode('utf-8')
if 'csrfmiddlewaretoken' not in body_unicode:
body = json.loads(body_unicode)
user_obj = AuthenticateUser()
user_obj.validate_user(body)
c={}
c.update(csrf(request))
return redirect('http://abchostname/mainPage/')
# return redirect('/mainPage') This is another url which i want to redirect after
# successful login
except Exception as exe:
print("Inside Exception : ",exe)
raise
else:
print("Inside else {}".format(request.method))
return render(request, 'login.html')
#login_required(login_url="/login/")
def showMainPage(request):
return render(request, 'mainPage.html')
I want to redirect after a successful login, I see the login is getting successful and it is hitting by backend correctly as well.
[07/Jul/2020:06:59:29 +0000] "GET /login/ HTTP/1.1" 200 2082 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18362"
[07/Jul/2020:06:59:36 +0000] "POST /login/ HTTP/1.1" 200 2081 "http://abchostname/login/" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18362"
[07/Jul/2020:06:59:36 +0000] "POST /login/ HTTP/1.1" 302 306 "http://abchostname/login/" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18362"
In the third option 302 status code is shown which means it is redirecting. I need some help on this.
To perform a redirect, its better to give named urls and acccess url with the name,
return redirect('main-page')
redirect() will try to use its given arguments to reverse a URL.
path('/main-page/', showMainPage, name='main-page')
Even if giving the url directly, dont give the full url, give a relative url like:
return redirect('/mainPage/')
Read More: https://realpython.com/django-redirects/#:~:text=Django%20Redirects%3A%20A%20Super%20Simple%20Example,-In%20Django%2C%20you&text=Just%20call%20redirect()%20with,then%20return%20from%20your%20view.&text=Assuming%20this%20is%20the%20main,to%20%2Fredirect%2Dsuccess%2F%20.

python3 - request - cookie -

How can i get cookie of last page with?
My codes are here:
headerMain = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125"}
istekMain = requests.get("https://www.example.org/", headers=headerMain)
cookie = istekMain.cookies.get_dict()
istekLazim = {"display_type":"popup"}
istekLogin = requests.get("https://www.example.org/", headers=headerMain, params=istekLazim, cookies=cookie)
print(istekLogin.text)

not able to scrape the data and even the links are not changing while clciking on pagination using python

want to scrape data from of each block and want to change the pages, but not able to do that,help me someone to crack this.
i tried to crawl data using headers and form data , but fail to do that.
below is my code.
from bs4 import BeautifulSoup
import requests
url='http://www.msmemart.com/msme/listings/company-list/agriculture-product-stocks/1/585/Supplier'
headers={
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"Cookie": "unique_visitor=49.35.36.33; __utma=189548087.1864205671.1549441624.1553842230.1553856136.3; __utmc=189548087; __utmz=189548087.1553856136.3.3.utmcsr=nsic.co.in|utmccn=(referral)|utmcmd=referral|utmcct=/; csrf=d665df0941bbf3bce09d1ee4bd2b079e; ci_session=ac6adsb1eb2lcoogn58qsvbjkfa1skhv; __utmt=1; __utmb=189548087.26.10.1553856136",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
"Accept": "application/json, text/javascript, */*; q=0.01",
}
data ={
'catalog': 'Supplier',
'category':'1',
'subcategory':'585',
'type': 'company-list',
'csrf': '0db0757db9473e8e5169031b7164f2a4'
}
r = requests.get(url,data=data,headers=headers)
soup = BeautifulSoup(html,'html.parser')
div = soup.find('div',{"id":"listings_result"})
for prod in div.find_all('b',string='Products/Services:').next_sibling:
print(prod)
getting "ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host" 2-3 times, i want to crawl all text details in a block.plz someone help me to found this.

Login using python requests doesn't work for pythonanywhere.com

I am trying login to the site pythonanywhere.com
import requests
url='https://www.pythonanywhere.com/login'
s = requests.session()
values = {
'auth-username': 'username',
'auth-password': 'password'}
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
u = s.post(url, data=values, headers=headers)
But I am getting a <Response [403]> , Csrf verification failed. How do I login to that site?
You need to get page first.So you can get the crsftoken and sessionid.And remember to set Referer=https://www.pythonanywhere.com/login/
import requests
url='https://www.pythonanywhere.com/login'
s = requests.session()
s.get(url)
values = {
'auth-username': 'username',
'auth-password': 'password',
"csrfmiddlewaretoken" : s.cookies.get("csrftoken"),
"login_view-current_step" : "auth"
}
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36',
'Referer': 'https://www.pythonanywhere.com/login/'}
u = s.post(url, data=values, headers=headers)
print(u.content)

Resources