CCXT binance fetch_ohlcv function - binance

I wanted to get crypto market data from binance api with CCXT using the fetch_ohlcv(), but received error when running the code below.
I have tried running without the since keyword and it worked fine. May I ask what is wrong with my startTime parameter? Or is there an issue with CCXT? Thanks!
Below is the error message:
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
~/Desktop/crypto/trading/env/lib/python3.8/site-packages/ccxt/base/exchange.py in fetch(self, url, method, headers, body)
592 self.logger.debug("%s %s, Response: %s %s %s", method, url, http_status_code, headers, http_response)
--> 593 response.raise_for_status()
594
~/Desktop/crypto/trading/env/lib/python3.8/site-packages/requests/models.py in raise_for_status(self)
942 if http_error_msg:
--> 943 raise HTTPError(http_error_msg, response=self)
944
HTTPError: 400 Client Error: Bad Request for url: https://api.binance.com/api/v3/klines?symbol=ETHBTC&interval=1d&limit=50&startTime=1589817600.0&endTime=5909817599.0
During handling of the above exception, another exception occurred:
BadRequest Traceback (most recent call last)
<ipython-input-113-43d055cced8d> in <module>
----> 1 exchange.fetch_ohlcv(symbol, timeframe, since=startDate, limit=limit)
~/Desktop/crypto/trading/env/lib/python3.8/site-packages/ccxt/binance.py in fetch_ohlcv(self, symbol, timeframe, since, limit, params)
1516 else:
1517 method = 'publicGetTickerBookTicker'
-> 1518 response = getattr(self, method)(query)
1519 return self.parse_tickers(response, symbols)
1520
~/Desktop/crypto/trading/env/lib/python3.8/site-packages/ccxt/base/exchange.py in inner(_self, params)
459 if params is not None:
460 inner_kwargs['params'] = params
--> 461 return entry(_self, **inner_kwargs)
462 return inner
463 to_bind = partialer()
~/Desktop/crypto/trading/env/lib/python3.8/site-packages/ccxt/binance.py in request(self, path, api, method, params, headers, body)
3572 elif (type == 'delivery') or (type == 'inverse'):
3573 method = 'dapiPrivateGetPositionRisk'
-> 3574 else:
3575 raise NotSupported(self.id + ' fetchIsolatedPositions() supports linear and inverse contracts only')
3576 response = getattr(self, method)(self.extend(request, params))
~/Desktop/crypto/trading/env/lib/python3.8/site-packages/ccxt/base/exchange.py in fetch2(self, path, api, method, params, headers, body)
480 self.lastRestRequestTimestamp = self.milliseconds()
481 request = self.sign(path, api, method, params, headers, body)
--> 482 return self.fetch(request['url'], request['method'], request['headers'], request['body'])
483
484 def request(self, path, api='public', method='GET', params={}, headers=None, body=None):
~/Desktop/crypto/trading/env/lib/python3.8/site-packages/ccxt/base/exchange.py in fetch(self, url, method, headers, body)
607 except HTTPError as e:
608 details = ' '.join([self.id, method, url])
--> 609 self.handle_errors(http_status_code, http_status_text, url, method, headers, http_response, json_response, request_headers, request_body)
610 self.handle_http_status_code(http_status_code, http_status_text, url, method, http_response)
611 raise ExchangeError(details) from e
~/Desktop/crypto/trading/env/lib/python3.8/site-packages/ccxt/binance.py in handle_errors(self, code, reason, url, method, headers, body, response, requestHeaders, requestBody)
3566 raise NotSupported(self.id + ' fetchIsolatedPositions() supports linear and inverse contracts only')
3567 defaultType = self.safe_string_2(self.options, 'fetchIsolatedPositions', 'defaultType', defaultType)
-> 3568 type = self.safe_string(params, 'type', defaultType)
3569 params = self.omit(params, 'type')
3570 if (type == 'future') or (type == 'linear'):
~/Desktop/crypto/trading/env/lib/python3.8/site-packages/ccxt/base/exchange.py in throw_exactly_matched_exception(self, exact, string, message)
498 def throw_exactly_matched_exception(self, exact, string, message):
499 if string in exact:
--> 500 raise exact[string](message)
501
502 def throw_broadly_matched_exception(self, broad, string, message):
BadRequest: binance {"code":-1100,"msg":"Illegal characters found in parameter 'startTime'; legal range is '^[0-9]{1,20}$'."}
Code:
symbol = 'ETH/BTC'
timeframe = '1d'
limit = 50 #Default best for binance
startDate = "2020-05-19"
startDate = datetime.strptime(startDate, "%Y-%m-%d")
startDate = datetime.timestamp(startDate)
config = {
'rateLimit': 10000,
'apiKey': apiKey,
'secret': secretKey
}
exchange = ccxt.binance(config)
exchange.fetch_ohlcv(symbol, timeframe, since=startDate, limit=limit)

your mistake is startDate format:
startDate = datetime.timestamp(startDate)
print(startDate) #output: 1589846400.0
first you must convert it to integer and then multiply by 1000 to convert to millisecond:
from datetime import datetime
startDate = "2020-05-19"
startDate = datetime.strptime(startDate, "%Y-%m-%d")
startDate = datetime.timestamp(startDate)
startDate = int(startDate) * 1000
print(startDate) #output: 1589846400000

Related

Slow Serializion process Django rest framework

Im using django_rest_framework for my project and i have a problem.
models.py:
from django.db import models
class RelatedField3_2(models.Model):
name = models.CharField(max_length=100)
def __str__(self):
return self.name
class RelatedField3(models.Model):
name = models.CharField(max_length=100)
relfield3_2 = models.ForeignKey(RelatedField3_2, on_delete=models.CASCADE)
def __str__(self):
return self.name
class RelatedField2(models.Model):
name = models.CharField(max_length=100)
def __str__(self):
return self.name
class RelatedField1(models.Model):
name = models.CharField(max_length=200)
def __str__(self):
return self.name
class MainTable(models.Model):
owner = models.ForeignKey('auth.User', on_delete=models.CASCADE)
field1 = models.IntegerField()
field2 = models.IntegerField()
field3 = models.IntegerField()
field4 = models.DecimalField(max_digits=10, decimal_places=1)
field5 = models.IntegerField(null=True)
field6 = models.IntegerField()
relfield1 = models.ForeignKey(RelatedField1, on_delete=models.CASCADE)
relfield2 = models.ForeignKey(RelatedField2, on_delete=models.CASCADE)
relfield3 = models.ForeignKey(RelatedField3, on_delete=models.CASCADE)
serializers.py:
from rest_framework import serializers
from main.models import MainTable, RelatedField1, RelatedField2, RelatedField3
class MainTableRelatedFields(serializers.RelatedField):
def display_value(self, instance):
return instance
def to_representation(self, value):
return str(value)
def to_internal_value(self, data):
return self.queryset.model.objects.get(name=data)
class MainTableSerializerList(serializers.ListSerializer):
def create(self, validated_data):
records = [MainTable(**item) for item in validated_data]
return self.child.Meta.model.objects.bulk_create(records)
class MainTableSerializer(serializers.ModelSerializer):
class Meta:
model = MainTable
list_serializer_class = MainTableSerializerList
id = serializers.IntegerField(write_only=False, required=False)
owner = serializers.ReadOnlyField(source='owner.username')
relfield3 = PartnerPropRelatedFields(queryset=RelatedField3.objects.all())
relfield2 = PartnerPropRelatedFields(queryset=RelatedField2.objects.all())
relfield1 = PartnerPropRelatedFields(queryset=RelatedField1.objects.all())
def create(self, validated_data):
return self.Meta.model.objects.create(**validated_data)
views.py:
from rest_framework.decorators import action
from rest_framework.response import Response
from rest_framework import viewsets
from rest_framework.permissions import IsAuthenticated
from .serializers import MainTableSerializer
class MainTableUploadView(viewsets.ModelViewSet):
permission_classes = (IsAuthenticated,)
#action(['post'], detail=False)
def upload_records(self, request, *args, **kwargs):
seriazlizer = MainTableSerializer(data=request.data, many=True)
seriazlizer.is_valid(raise_exception=True)
seriazlizer.save(owner=request.user)
return Response(seriazlizer.data)
send_json.py:
import requests
data_load_list = [{
"field1": 1,
"field2": 2,
"field3": 3,
"field4": "field4",
"field5": 5,
"field6": "field6",
"relfield1": "test1",
"relfield2": "test2",
"relfield3" : "test3"
} for i in range(1000)
]
load_list_response = requests.post(url=url_upload, headers={'Authorization': f'Token {TOKEN}'},
json=data_load_list)
When i start send_json.py for sending 1000 records, i just get error:
RemoteDisconnected Traceback (most recent call last)
~/.local/share/virtualenvs/DKykH6t1/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
669 # Make the request on the httplib connection object.
--> 670 httplib_response = self._make_request(
671 conn,
~/.local/share/virtualenvs/DKykH6t1/lib/python3.8/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
425 # Otherwise it looks like a bug in the code.
--> 426 six.raise_from(e, None)
427 except (SocketTimeout, BaseSSLError, SocketError) as e:
~/.local/share/virtualenvs/DKykH6t1/lib/python3.8/site-packages/urllib3/packages/six.py in raise_from(value, from_value)
~/.local/share/virtualenvs/DKykH6t1/lib/python3.8/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
420 try:
--> 421 httplib_response = conn.getresponse()
422 except BaseException as e:
/usr/lib/python3.8/http/client.py in getresponse(self)
1331 try:
-> 1332 response.begin()
1333 except ConnectionError:
/usr/lib/python3.8/http/client.py in begin(self)
302 while True:
--> 303 version, status, reason = self._read_status()
304 if status != CONTINUE:
/usr/lib/python3.8/http/client.py in _read_status(self)
271 # sending a valid response.
--> 272 raise RemoteDisconnected("Remote end closed connection without"
273 " response")
RemoteDisconnected: Remote end closed connection without response
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
~/.local/share/virtualenvs/DKykH6t1/lib/python3.8/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
438 if not chunked:
--> 439 resp = conn.urlopen(
440 method=request.method,
~/.local/share/virtualenvs/DKykH6t1/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
725
--> 726 retries = retries.increment(
727 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
~/.local/share/virtualenvs/DKykH6t1/lib/python3.8/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
402 if read is False or not self._is_method_retryable(method):
--> 403 raise six.reraise(type(error), error, _stacktrace)
404 elif read is not None:
~/.local/share/virtualenvs/DKykH6t1/lib/python3.8/site-packages/urllib3/packages/six.py in reraise(tp, value, tb)
733 if value.__traceback__ is not tb:
--> 734 raise value.with_traceback(tb)
735 raise value
~/.local/share/virtualenvs/DKykH6t1/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
669 # Make the request on the httplib connection object.
--> 670 httplib_response = self._make_request(
671 conn,
~/.local/share/virtualenvs/DKykH6t1/lib/python3.8/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
425 # Otherwise it looks like a bug in the code.
--> 426 six.raise_from(e, None)
427 except (SocketTimeout, BaseSSLError, SocketError) as e:
~/.local/share/virtualenvs/DKykH6t1/lib/python3.8/site-packages/urllib3/packages/six.py in raise_from(value, from_value)
~/.local/share/virtualenvs/DKykH6t1/lib/python3.8/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
420 try:
--> 421 httplib_response = conn.getresponse()
422 except BaseException as e:
/usr/lib/python3.8/http/client.py in getresponse(self)
1331 try:
-> 1332 response.begin()
1333 except ConnectionError:
/usr/lib/python3.8/http/client.py in begin(self)
302 while True:
--> 303 version, status, reason = self._read_status()
304 if status != CONTINUE:
/usr/lib/python3.8/http/client.py in _read_status(self)
271 # sending a valid response.
--> 272 raise RemoteDisconnected("Remote end closed connection without"
273 " response")
ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
<timed exec> in <module>
~/.local/share/virtualenvs/DKykH6t1/lib/python3.8/site-packages/requests/api.py in post(url, data, json, **kwargs)
117 """
118
--> 119 return request('post', url, data=data, json=json, **kwargs)
120
121
~/.local/share/virtualenvs/DKykH6t1/lib/python3.8/site-packages/requests/api.py in request(method, url, **kwargs)
59 # cases, and look like a memory leak in others.
60 with sessions.Session() as session:
---> 61 return session.request(method=method, url=url, **kwargs)
62
63
~/.local/share/virtualenvs/DKykH6t1/lib/python3.8/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
528 }
529 send_kwargs.update(settings)
--> 530 resp = self.send(prep, **send_kwargs)
531
532 return resp
~/.local/share/virtualenvs/DKykH6t1/lib/python3.8/site-packages/requests/sessions.py in send(self, request, **kwargs)
641
642 # Send the request
--> 643 r = adapter.send(request, **kwargs)
644
645 # Total elapsed time of the request (approximately)
~/.local/share/virtualenvs/DKykH6t1/lib/python3.8/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
496
497 except (ProtocolError, socket.error) as err:
--> 498 raise ConnectionError(err, request=request)
499
500 except MaxRetryError as e:
ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
When i start send_json.py for sending 100 records, the data is loaded into the database and returned with a code 200. This takes 15 seconds.
How and with what can I optimize my code so that I can send 1000+ records to the database and get information in the response body quickly?
the solution that helped me (see changes in pull request):
https://github.com/hax2000/Relaed-Fields-API/pull/1/files

HTTP Error 404: Not Found in downloading Caltech101 dataset

I am trying to download Caltech101 dataset in Google colab, however I am getting the following error:
!wget 'http://www.vision.caltech.edu/Image_Datasets/Caltech256/256_ObjectCategories.tar.gz'
'--2020-07-24 04:02:05-- http://www.vision.caltech.edu/Image_Datasets/Caltech256/256_ObjectCategories.tar.gz
Resolving www.vision.caltech.edu (www.vision.caltech.edu)... 34.208.54.77
Connecting to www.vision.caltech.edu (www.vision.caltech.edu)|34.208.54.77|:80... connected.
HTTP request sent, awaiting response... 404 Not Found
2020-07-24 04:02:05 ERROR 404: Not Found.'
I also tried:
import urllib
urllib.request.urlretrieve(
"http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz", "my-tar.gz"
)
and got the same error:
HTTPError Traceback (most recent call
last) in ()
1 import urllib
2 urllib.request.urlretrieve(
----> 3 "http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz",
"my-tar.gz"
4 )
6 frames /usr/lib/python3.6/urllib/request.py in urlretrieve(url,
filename, reporthook, data)
246 url_type, path = splittype(url)
247
--> 248 with contextlib.closing(urlopen(url, data)) as fp:
249 headers = fp.info()
250
/usr/lib/python3.6/urllib/request.py in urlopen(url, data, timeout,
cafile, capath, cadefault, context)
221 else:
222 opener = _opener
--> 223 return opener.open(url, data, timeout)
224
225 def install_opener(opener):
/usr/lib/python3.6/urllib/request.py in open(self, fullurl, data,
timeout)
530 for processor in self.process_response.get(protocol, []):
531 meth = getattr(processor, meth_name)
--> 532 response = meth(req, response)
533
534 return response
/usr/lib/python3.6/urllib/request.py in http_response(self, request,
response)
640 if not (200 <= code < 300):
641 response = self.parent.error(
--> 642 'http', request, response, code, msg, hdrs)
643
644 return response
/usr/lib/python3.6/urllib/request.py in error(self, proto, *args)
568 if http_err:
569 args = (dict, 'default', 'http_error_default') + orig_args
--> 570 return self._call_chain(*args)
571
572 # XXX probably also want an abstract factory that knows when it makes
/usr/lib/python3.6/urllib/request.py in _call_chain(self, chain, kind,
meth_name, *args)
502 for handler in handlers:
503 func = getattr(handler, meth_name)
--> 504 result = func(*args)
505 if result is not None:
506 return result
/usr/lib/python3.6/urllib/request.py in http_error_default(self, req,
fp, code, msg, hdrs)
648 class HTTPDefaultErrorHandler(BaseHandler):
649 def http_error_default(self, req, fp, code, msg, hdrs):
--> 650 raise HTTPError(req.full_url, code, msg, hdrs, fp)
651
652 class HTTPRedirectHandler(BaseHandler):
HTTPError: HTTP Error 404: Not Found
Can you help me figure out what is the problem here?
Thanks
The issue is in that the server doesn't actually have the content you are looking for.
The CalTech 256 dataset is not available on the site.
You can try downloading it from https://www.kaggle.com/jessicali9530/caltech256

HTTPError: HTTP Error 403: Forbidden or None is returned when defining headers during download of csv files from the links in Python3

Please advise how I can download with Python3 csv files from https://www.hesa.ac.uk.
My scraped links to the csv files:
csv_link = ['/data-and-analysis/finances/table-2.csv', '/data-and-analysis/finances/table-3.csv','/data-and-analysis/finances/table-3s.csv','/data-and-analysis/finances/table-4.csv','/data-and-analysis/finances/table-9.csv','/data-and-analysis/finances/table-10.csv']
My code to download
import wget
for link in csv_link:
full_link = 'https://www.hesa.ac.uk' + link
print(print(full_link))
wget.download(full_link)
Receive 403 error:
https://www.hesa.ac.uk/data-and-analysis/finances/table-2.csv
None
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
<ipython-input-7-6d016e0bdd56> in <module>
3 full_link = 'https://www.hesa.ac.uk' + link
4 print(print(full_link))
----> 5 wget.download(full_link)
6
/usr/local/lib/python3.7/dist-packages/wget.py in download(url, out, bar)
524 else:
525 binurl = url
--> 526 (tmpfile, headers) = ulib.urlretrieve(binurl, tmpfile, callback)
527 filename = detect_filename(url, out, headers)
528 if outdir:
/usr/lib/python3.7/urllib/request.py in urlretrieve(url, filename, reporthook, data)
245 url_type, path = splittype(url)
246
--> 247 with contextlib.closing(urlopen(url, data)) as fp:
248 headers = fp.info()
249
/usr/lib/python3.7/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
220 else:
221 opener = _opener
--> 222 return opener.open(url, data, timeout)
223
224 def install_opener(opener):
/usr/lib/python3.7/urllib/request.py in open(self, fullurl, data, timeout)
529 for processor in self.process_response.get(protocol, []):
530 meth = getattr(processor, meth_name)
--> 531 response = meth(req, response)
532
533 return response
/usr/lib/python3.7/urllib/request.py in http_response(self, request, response)
639 if not (200 <= code < 300):
640 response = self.parent.error(
--> 641 'http', request, response, code, msg, hdrs)
642
643 return response
/usr/lib/python3.7/urllib/request.py in error(self, proto, *args)
567 if http_err:
568 args = (dict, 'default', 'http_error_default') + orig_args
--> 569 return self._call_chain(*args)
570
571 # XXX probably also want an abstract factory that knows when it makes
/usr/lib/python3.7/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
501 for handler in handlers:
502 func = getattr(handler, meth_name)
--> 503 result = func(*args)
504 if result is not None:
505 return result
/usr/lib/python3.7/urllib/request.py in http_error_default(self, req, fp, code, msg, hdrs)
647 class HTTPDefaultErrorHandler(BaseHandler):
648 def http_error_default(self, req, fp, code, msg, hdrs):
--> 649 raise HTTPError(req.full_url, code, msg, hdrs, fp)
650
651 class HTTPRedirectHandler(BaseHandler):
HTTPError: HTTP Error 403: Forbidden
If I modify my code to use headers then I get None and a warning:
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:3: DeprecationWarning: AppURLopener style of invoking requests is deprecated. Use newer urlopen functions/methods
This is separate from the ipykernel package so we can avoid doing imports until
class AppURLopener(urllib.request.FancyURLopener):
version = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.69 Safari/537.36"
urllib._urlopener = AppURLopener()
for link in csv_link:
full_link = 'https://www.hesa.ac.uk' + link
print(print(full_link))
urllib._urlopener.retrieve(full_link)
Please advise how to change my code so I can download my files. Also really want to understand what is the proper way of downloading files from scraped links in Python 3 using Juputer Notebooks.
I made it work with help of os.system. Still looking for a proper way to do this, but here is my code that solved my problem temporarly.
import os
for link in csv_link:
full_url = 'https://www.hesa.ac.uk' + link
os.system('wget ' + full_url)

Python async AttributeError aexit

I keep getting error AttributeError: __aexit__ on the code below, but I don't really understand why this happens.
My Python version is: 3.6.4 (v3.6.4:d48eceb, Dec 19 2017, 06:04:45) [MSC v.1900 32 bit (Intel)]
import aiohttp
import asyncio
import tqdm
async def fetch_url(session_, url_, timeout_=10):
with aiohttp.Timeout(timeout_):
async with session_.get(url_) as response:
text = await response.text()
print("URL: {} - TEXT: {}".format(url_, len(text)))
return text
async def parse_url(session, url, timeout=10):
# get doc from url
async with await fetch_url(session, url, timeout) as doc:
print("DOC: {}".format(doc, len(doc)))
return doc
async def parse_urls(session, urls, loop):
tasks = [parse_url(session, url) for url in urls]
responses = [await f for f in tqdm.tqdm(asyncio.as_completed(tasks), total = len(tasks))]
return responses
if __name__ == '__main__':
tickers = ['CTXS', 'MSFT', 'AAPL', 'GPRO', 'G', 'INTC', 'SYNC', 'SYNA']
urls = ["https://finance.yahoo.com/quote/{}".format(ticker) for ticker in tickers]
loop = asyncio.get_event_loop()
with aiohttp.ClientSession(loop=loop) as session:
parsed_data = loop.run_until_complete(parse_urls(session, urls, loop))
print(parsed_data)
Error callstack:
C:\Python\Python36\python.exe C:/Users/me/.PyCharmCE2017.3/config/scratches/scratch_4.py
0%| | 0/8 [00:00<?, ?it/s]Traceback (most recent call last):
URL: https://finance.yahoo.com/quote/CTXS - TEXT: 462138
File "C:/Users/me/.PyCharmCE2017.3/config/scratches/scratch_4.py", line 34, in <module>
parsed_data = loop.run_until_complete(parse_urls(session, urls, loop))
File "C:\Python\Python36\lib\asyncio\base_events.py", line 467, in run_until_complete
return future.result()
File "C:/Users/me/.PyCharmCE2017.3/config/scratches/scratch_4.py", line 23, in parse_urls
responses = [await f for f in tqdm.tqdm(asyncio.as_completed(tasks), total = len(tasks))]
File "C:/Users/me/.PyCharmCE2017.3/config/scratches/scratch_4.py", line 23, in <listcomp>
responses = [await f for f in tqdm.tqdm(asyncio.as_completed(tasks), total = len(tasks))]
File "C:\Python\Python36\lib\asyncio\tasks.py", line 458, in _wait_for_one
return f.result() # May raise f.exception().
File "C:/Users/me/.PyCharmCE2017.3/config/scratches/scratch_4.py", line 16, in parse_url
async with await fetch_url(session, url, timeout) as doc:
AttributeError: __aexit__
Process finished with exit code 1
You are trying to use fetch_url as a context manager, but it isn't one. You can either make it one
class fetch_url:
def __init__(self, session, url, timeout=10):
self.session = session
self.url = url
self.timeout = timeout
async def __aenter__(self):
with aiohttp.Timeout(self.timeout):
async with self.session.get(self.url) as response:
text = await response.text()
print("URL: {} - TEXT: {}".format(self.url, len(text)))
return text
async def __aexit__(self, exc_type, exc, tb):
# clean up anything you need to clean up
or change your code to
async def parse_url(session, url, timeout=10):
# get doc from url
doc = await fetch_url(session, url, timeout)
print("DOC: {}".format(doc, len(doc)))
return doc

'HTTPError: HTTP Error 403: Forbidden' with Python 3.6.1

I am trying to parse this page:http://www.chronicle.com/article/Major-Private-Gifts-to-Higher/128264 with BeautifulSoup4 on iPython. I wrote these lines of code:
import urllib.request as ur
import re
page = ur.urlopen('http://www.chronicle.com/article/Major-Private-Gifts-to-Higher/128264').read()
And then I got this error:
HTTPError Traceback (most recent call
last)
<ipython-input-27-8d5066f9c76f> in <module>()
----> 1 s = ur.urlopen("http://www.chronicle.com/article/Major-Private-
Gifts-to-Higher/128264")
/Users/name/anaconda/lib/python3.6/urllib/request.py in urlopen(url,
data, timeout, cafile, capath, cadefault, context)
221 else:
222 opener = _opener
--> 223 return opener.open(url, data, timeout)
224
225 def install_opener(opener):
/Users/name/anaconda/lib/python3.6/urllib/request.py in open(self,
fullurl, data, timeout)
530 for processor in self.process_response.get(protocol, []):
531 meth = getattr(processor, meth_name)
--> 532 response = meth(req, response)
533
534 return response
/Users/name/anaconda/lib/python3.6/urllib/request.py in
http_response(self, request, response)
640 if not (200 <= code < 300):
641 response = self.parent.error(
--> 642 'http', request, response, code, msg, hdrs)
643
644 return response
/Users/name/anaconda/lib/python3.6/urllib/request.py in error(self,
proto, *args)
568 if http_err:
569 args = (dict, 'default', 'http_error_default') +
orig_args
--> 570 return self._call_chain(*args)
571
572 # XXX probably also want an abstract factory that knows when it
makes
/Users/name/anaconda/lib/python3.6/urllib/request.py in
_call_chain(self, chain, kind, meth_name, *args)
502 for handler in handlers:
503 func = getattr(handler, meth_name)
--> 504 result = func(*args)
505 if result is not None:
506 return result
/Users/name/anaconda/lib/python3.6/urllib/request.py in
http_error_default(self, req, fp, code, msg, hdrs)
648 class HTTPDefaultErrorHandler(BaseHandler):
649 def http_error_default(self, req, fp, code, msg, hdrs):
--> 650 raise HTTPError(req.full_url, code, msg, hdrs, fp)
651
652 class HTTPRedirectHandler(BaseHandler):
HTTPError: HTTP Error 403: Forbidden
How can I fix this? Thank you in advance!
Using the requests module is much easier and has been proven easier to use.
The issue, however, is what the previous Stackoverflow user said, it does require some headers and such. The module requests has inbuild support for such as far as I'm aware. Note that instead of .read() the method we use is.text
import requests
from bs4 import BeautifulSoup as bs
urlopen = requests.get('http://www.chronicle.com/article/Major-Private-Gifts-to-Higher/128264').text
soup = bs(urlopen,'lxml')
print(soup)
you don't need to parse it with beautfiulSoup you can just...
import requests
urlopen = requests.get('http://www.chronicle.com/article/Major-Private-Gifts-to-Higher/128264').text
print(urlopen)
You will probably have to send the required HTTP headers as well. Have a look at the headers that for example Firefox sends to the page by using the dev tools of the browser. Add those to the request. I guess at least User-Agent is one of the headers that have to be set.

Resources