Python3 ValueError: Invalid initialization option when using requests package - python-3.x

I have stumbled upon an error which I could not resolve. When I ran my python code, this error occurs and it only happens when I am making an API call using the requests package.
Code calling the API:
def getAccs(id):
accountid = ''
url = "{}/{}".format(acc_api, id)
req = requests.get(url, headers=head)
result = json.loads(req.text)
if result['id'] is None:
# Fetches accountid from another api call after updating
accountid = updateCorp(result['name'], id)
else:
accountid = result['id']
return accountid
if __name__ == "__main__":
### Get data from appSettings.json
with open('appSettingsStg.json') as app:
data = json.load(app)
acc_api = data['Urls']['Accounts']
# Header
head = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36",
"Content-Type": "application/json"
}
Error:
Traceback (most recent call last):
File "insert.py", line 313, in <module>
acc.append(res)
File "insert.py", line 102, in getAccs
req = requests.get(url, headers=head)
File "/home/dev/.local/lib/python3.7/site-packages/requests/api.py", line 76, in get
return request('get', url, params=params, **kwargs)
File "/home/dev/.local/lib/python3.7/site-packages/requests/api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "/home/dev/.local/lib/python3.7/site-packages/requests/sessions.py", line 530, in request
resp = self.send(prep, **send_kwargs)
File "/home/dev/.local/lib/python3.7/site-packages/requests/sessions.py", line 685, in send
r.content
File "/home/dev/.local/lib/python3.7/site-packages/requests/models.py", line 829, in content
self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b''
File "/home/dev/.local/lib/python3.7/site-packages/requests/models.py", line 751, in generate
for chunk in self.raw.stream(chunk_size, decode_content=True):
File "/home/dev/.local/lib/python3.7/site-packages/urllib3/response.py", line 571, in stream
for line in self.read_chunked(amt, decode_content=decode_content):
File "/home/dev/.local/lib/python3.7/site-packages/urllib3/response.py", line 738, in read_chunked
self._init_decoder()
File "/home/dev/.local/lib/python3.7/site-packages/urllib3/response.py", line 376, in _init_decoder
self._decoder = _get_decoder(content_encoding)
File "/home/dev/.local/lib/python3.7/site-packages/urllib3/response.py", line 147, in _get_decoder
return GzipDecoder()
File "/home/dev/.local/lib/python3.7/site-packages/urllib3/response.py", line 74, in __init__
self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
ValueError: Invalid initialization option
Some things I have tried
Importing zlib and urllib3 (As mentioned in this post)
Upgrading requests package (v2.24.0 as of 25/08/2020)
Reinstalling requests and urllib3 (Didnt work but I thought it was at least worth a try)
Any advice is much appreciated !

Related

ProtocolError: Received header value surrounded by whitespace in requests_async

write an asynchronous scraper for RSS feeds and sometimes the following error occurs with some sites, for example:
In [1]: import requests_async as requests
In [2]: headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Ch
...: rome/79.0.3945.79 Safari/537.36'}
In [3]: r = await requests.get('https://albumorientedpodcast.com/category/album-oriented/feed/', headers=headers)
Here is the full traceback of this error:
Traceback (most recent call last):
File "rss_parser.py", line 55, in rss_downloader
response = await requests.get(rss, headers=headers)
File "C:\Python3\lib\site-packages\requests_async\api.py", line 11, in get
return await request("get", url, params=params, **kwargs)
File "C:\Python3\lib\site-packages\requests_async\api.py", line 6, in request
return await session.request(method=method, url=url, **kwargs)
File "C:\Python3\lib\site-packages\requests_async\sessions.py", line 79, in request
resp = await self.send(prep, **send_kwargs)
File "C:\Python3\lib\site-packages\requests_async\sessions.py", line 157, in send
async for resp in self.resolve_redirects(r, request, **kwargs):
File "C:\Python3\lib\site-packages\requests_async\sessions.py", line 290, in resolve_redirects
resp = await self.send(
File "C:\Python3\lib\site-packages\requests_async\sessions.py", line 136, in send
r = await adapter.send(request, **kwargs)
File "C:\Python3\lib\site-packages\requests_async\adapters.py", line 48, in send
response = await self.pool.request(
File "C:\Python3\lib\site-packages\http3\interfaces.py", line 49, in request
return await self.send(request, verify=verify, cert=cert, timeout=timeout)
File "C:\Python3\lib\site-packages\http3\dispatch\connection_pool.py", line 130, in send
raise exc
File "C:\Python3\lib\site-packages\http3\dispatch\connection_pool.py", line 120, in send
response = await connection.send(
File "C:\Python3\lib\site-packages\http3\dispatch\connection.py", line 56, in send
response = await self.h2_connection.send(request, timeout=timeout)
File "C:\Python3\lib\site-packages\http3\dispatch\http2.py", line 52, in send
status_code, headers = await self.receive_response(stream_id, timeout)
File "C:\Python3\lib\site-packages\http3\dispatch\http2.py", line 126, in receive_response
event = await self.receive_event(stream_id, timeout)
File "C:\Python3\lib\site-packages\http3\dispatch\http2.py", line 159, in receive_event
events = self.h2_state.receive_data(data)
File "C:\Python3\lib\site-packages\h2\connection.py", line 1463, in receive_data
events.extend(self._receive_frame(frame))
File "C:\Python3\lib\site-packages\h2\connection.py", line 1486, in _receive_frame
frames, events = self._frame_dispatch_table[frame.__class__](frame)
File "C:\Python3\lib\site-packages\h2\connection.py", line 1560, in _receive_headers_frame
frames, stream_events = stream.receive_headers(
File "C:\Python3\lib\site-packages\h2\stream.py", line 1055, in receive_headers
events[0].headers = self._process_received_headers(
File "C:\Python3\lib\site-packages\h2\stream.py", line 1298, in _process_received_headers
return list(headers)
File "C:\Python3\lib\site-packages\h2\utilities.py", line 335, in _reject_pseudo_header_fields
for header in headers:
File "C:\Python3\lib\site-packages\h2\utilities.py", line 291, in _reject_connection_header
for header in headers:
File "C:\Python3\lib\site-packages\h2\utilities.py", line 275, in _reject_te
for header in headers:
File "C:\Python3\lib\site-packages\h2\utilities.py", line 264, in _reject_surrounding_whitespace
raise ProtocolError(
h2.exceptions.ProtocolError: Received header value surrounded by whitespace b'3.vie _dca '
At the same time, this same site is normally loaded through common requests library:
In [1]: import requests
In [2]: headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Ch
...: rome/79.0.3945.79 Safari/537.36'}
In [3]: r = requests.get('https://albumorientedpodcast.com/category/album-oriented/feed/', headers=headers)
In [4]: r
Out[4]: <Response [200]>
I tried to find at least some information on this error, but nothing. Can someone tell me what I can do to avoid a similar error and load the site normally?
requests-async has been archived, but its github page contains a link to the successor - httpx.
httpx seems to have similar syntax and actively maintained.
Consider try it: many bugs may had been fixed there.

How to fix error urllib.error.HTTPError: HTTP Error 400: BAD REQUEST?

I have a script(test.py) to test some api, like this:
def get_response(fct, data, method=GET):
"""
Performs the query to the server and returns a string containing the
response.
"""
assert(method in (GET, POST))
url = f'http://{hostname}:{port}/{fct}'
if method == GET:
encode_data = parse.urlencode(data)
response = request.urlopen(f'{url}?{encode_data}')
elif method == POST:
response = request.urlopen(url, parse.urlencode(data).encode('ascii'))
return response.read()
In terminal I call:
python test.py -H 0.0.0.0 -P 5000 --add-data
The traceback:
Traceback (most recent call last):
File "test.py", line 256, in <module>
add_plays()
File "test.py", line 82, in add_plays
get_response("add_channel", {"name": channel}, method=POST)
File "test.py", line 43, in get_response
response = request.urlopen(url, parse.urlencode(data).encode('ascii'))
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 532, in open
response = meth(req, response)
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 570, in error
return self._call_chain(*args)
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 504, in _call_chain
result = func(*args)
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 400: BAD REQUEST
The data is {"name": "Channel1"}. I couldn't understand what is wrong. Please can someone give some tip or show whats's wrong?
When I call using curl, works:
curl -X POST -H "Content-Type: application/json" -d '{"name": "Channel1"}' http://0.0.0.0:5000/add_channel
I solved the problem change the test script:
The api was expected a JSON_MIME_TYPE = 'application/json', so I add a header in a request as follow bellow.
The scrit was using a wrong encode because some text in JSON couldn't be encode in Unicode, Eg:"Omö" encode in ascii launch the exception UnicodeEncodeError: 'ascii' codec can't encode character '\xf6' in position 1: ordinal not in range(128). So I changed to utf8.
Here is the fixed code:
def get_response(fct, data, method=GET):
"""
Performs the query to the server and returns a string containing the
response.
"""
assert(method in (GET, POST))
url = f'http://{hostname}:{port}/{fct}'
if method == GET:
encode_data = parse.urlencode(data)
req = request.Request(f'{url}?{encode_data}'
, headers={'content-type': 'application/json'})
response = request.urlopen(req)
elif method == POST:
params = json.dumps(data)
binary_data = params.encode('utf8')
req = request.Request(url
, data= binary_data
, headers={'content-type': 'application/json'})
response = request.urlopen(req)
x = response.read()
return x

Requests.post resulting in "TooManyRedirects" error

Tried to execute the following code but invariably get the "TooManyRedirects" error. What am I doing incorrectly?
My code:
import requests, json
Address = '100 W Grant Street'
City = 'Orlando'
State = 'FL'
url = 'https://tools.usps.com/tools/app/ziplookup/zipByAddress'
data = {'company':'', 'address1': Address, 'address2':'','city': City, 'state': 'State', 'zip': ''}
raw = requests.post(url, data=data)
Here's the massive error message I get:
Traceback (most recent call last):
File "<pyshell#1347>", line 1, in <module>
raw = requests.post(url, data=data)
File "C:\Users\Karun\AppData\Local\Programs\Python\Python36\lib\site-packages\requests\api.py", line 112, in post
return request('post', url, data=data, json=json, **kwargs)
File "C:\Users\Karun\AppData\Local\Programs\Python\Python36\lib\site-packages\requests\api.py", line 58, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\Karun\AppData\Local\Programs\Python\Python36\lib\site-packages\requests\sessions.py", line 508, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\Karun\AppData\Local\Programs\Python\Python36\lib\site-packages\requests\sessions.py", line 640, in send
history = [resp for resp in gen] if allow_redirects else []
File "C:\Users\Karun\AppData\Local\Programs\Python\Python36\lib\site-packages\requests\sessions.py", line 640, in <listcomp>
history = [resp for resp in gen] if allow_redirects else []
File "C:\Users\Karun\AppData\Local\Programs\Python\Python36\lib\site-packages\requests\sessions.py", line 140, in resolve_redirects
raise TooManyRedirects('Exceeded %s redirects.' % self.max_redirects, response=resp)
requests.exceptions.TooManyRedirects: Exceeded 30 redirects.
This particular url, for some reason, wants me to include the headers['User-Agent'] in the requests.post statement also. Then I get an appropriate response. So here's the new code:
import requests
s = requests.Session()
url = 'https://tools.usps.com/tools/app/ziplookup/zipByAddress'
payload = {'companyName':'', 'address1':'10570 Main St', 'address2':'', 'city':'Fairfax', 'state':'VA', 'zip':''}
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}
r = s.post(url, data = payload, headers = headers)

i m using user agent for scraping secured site,still i got this error; raise HTTPError(req.full_url, code, msg, hdrs, fp) HTTPError: Forbidden

I m scraping and parsing the html content of secured website justdial.com into a csv file ,since i m using user agent also but still i m getting this error- raise HTTPError(req.full_url, code, msg, hdrs, fp) HTTPError: Forbidden .
My code is -
import urllib.request
import urllib
from urllib.request import urlopen
import bs4
from bs4 import BeautifulSoup
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1)
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
url = 'https://www.justdial.com/Mumbai/311/B2b_fil'
req = urllib.request.Request(url, None, headers)
response = urllib.request.urlopen(req)
print(response.read())
html= urllib.request.urlopen(url).read()
soup = BeautifulSoup(html)
After printing response , i have to parse the content from html to csv file but it is giving this error
File "<ipython-input-21-c589d79bf43d>", line 1, in <module>
runfile('C:/Users/justdial.py', wdir='C:/Users')
File "C:\ProgramData\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 710, in runfile
execfile(filename, namespace)
File "C:\ProgramData\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 101, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/justdial.py", line 21, in <module>
html= urllib.request.urlopen(url).read()
File "C:\ProgramData\Anaconda3\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "C:\ProgramData\Anaconda3\lib\urllib\request.py", line 532, in open
response = meth(req, response)
File "C:\ProgramData\Anaconda3\lib\urllib\request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "C:\ProgramData\Anaconda3\lib\urllib\request.py", line 570, in error
return self._call_chain(*args)
File "C:\ProgramData\Anaconda3\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\ProgramData\Anaconda3\lib\urllib\request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
HTTPError: Forbidden

HTTP error 403 in Python 3 Tumblr Scraping even after User-Agent headers added

I try to crawl the most recent images that contain specific keyword tags from tumblr without logining or using api. However I receive error 403 when I run my code, even though I have added User-Agent data into the headers.
Could anyone help me? Thank you and sorry about my poor English.
Here is my code:
import urllib.request
import urllib.parse
from bs4 import BeautifulSoup
import re
url='https://www.tumblr.com/search/flowers/recent'
data={}
data['q']='flowers'
data['sort']='recent'
data['post_view']='masonry'
data['blogs_before']=8
data['num_blogs_shown']=8
data['num_posts_shown']=0
data['before']=0
data['blog_page']=1
data['post_page']=1
data['filter_nsfw']='true'
data['filter_post_type']='photo'
data['next_ad_offset']=0
data['ad_placement_id']=0
data['more_posts']='true'
data=urllib.parse.urlencode(data).encode('utf-8')
req=urllib.request.Request(url,data)
#add headers
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36')
response=urllib.request.urlopen(req)
html=response.read().decode('utf8')
Traceback (most recent call last):
File "C:\Users\D47\Desktop\tumblr.py", line 29, in <module>
response=urllib.request.urlopen(req)
File "D:\Temp\python\lib\urllib\request.py", line 162, in urlopen
return opener.open(url, data, timeout)
File "D:\Temp\python\lib\urllib\request.py", line 471, in open
response = meth(req, response)
File "D:\Temp\python\lib\urllib\request.py", line 581, in http_response
'http', request, response, code, msg, hdrs)
File "D:\Temp\python\lib\urllib\request.py", line 509, in error
return self._call_chain(*args)
File "D:\Temp\python\lib\urllib\request.py", line 443, in _call_chain
result = func(*args)
File "D:\Temp\python\lib\urllib\request.py", line 589, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden

Resources