I am trying to get the data from a site using requests using this simple code (running on Google Colab):
import requests, json
def GetAllStocks():
url = 'https://iboard.ssi.com.vn/dchart/api/1.1/defaultAllStocks'
res = requests.get(url)
return json.loads(res.text)
This worked well until this morning and I could not figure out why it is returning "TooManyRedirects: Exceeded 30 redirects." error now.
I can still get the data just by browsing the url directly from Google Chrome in Incognito mode so I donot think this is because of the Cookies. I tried passing the whole headers but still it does not work. I tried passing 'allow_redirects=False' and the returned status_code is 302.
I am not sure if there is anything I could try as this is so strange to me.
Any guidance is much appreciated. Thank you very much!
You need to send user-agent header to mimic a regular browser behaviour.
import requests, json, random
def GetAllStocks():
user_agents = [
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.79 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:101.0) Gecko/20100101 Firefox/101.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:77.0) Gecko/20190101 Firefox/77.0",
"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:77.0) Gecko/20100101 Firefox/77.0",
]
headers = {
"User-Agent": random.choice(user_agents),
"Accept": "application/json",
}
url = "https://iboard.ssi.com.vn/dchart/api/1.1/defaultAllStocks"
res = requests.get(url, headers=headers)
return json.loads(res.text)
data = GetAllStocks()
print(data)
I want to scrape some of the data from here which is implemented based on websockets. So after inspecting the Chrome DevTools for wss address and header:
and the negotiation message:
I wrote:
from websocket import create_connection
headers = {
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9,fa;q=0.8',
'Cache-Control': 'no-cache',
'Connection': 'Upgrade',
'Host': 'stream179.forexpros.com',
'Origin': 'https://www.investing.com',
'Pragma': 'no-cache',
'Sec-WebSocket-Extensions': 'client_max_window_bits',
'Sec-WebSocket-Key': 'ldcvnZNquzPkSNvpSdI09g==',
'Sec-WebSocket-Version': '13',
'Upgrade': 'websocket',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'
}
ws = create_connection('wss://stream179.forexpros.com/echo/894/l27e2ja8/websocket', header=headers)
nego_message = '''["{\"_event\":\"bulk-subscribe\",\"tzID\":8,\"message\":\"pid-1:%%pid-8839:%%pid-166:%%pid-20:%%pid-169:%%pid-170:%%pid-44336:%%pid-27:%%pid-172:%%pid-2:%%pid-3:%%pid-5:%%pid-7:%%pid-9:%%pid-10:%%pid-945629:%%pid-11:%%pid-16:%%pid-68:%%pidTechSumm-1:%%pidTechSumm-2:%%pidTechSumm-3:%%pidTechSumm-5:%%pidTechSumm-7:%%pidTechSumm-9:%%pidTechSumm-10:%%pidExt-1:%%event-393634:%%event-393633:%%event-393636:%%event-393638:%%event-394479:%%event-394518:%%event-394514:%%event-394516:%%event-394515:%%event-394517:%%event-393654:%%event-394467:%%event-393653:%%event-394468:%%event-394545:%%event-394549:%%event-394548:%%event-394547:%%event-394550:%%event-394546:%%event-394551:%%event-394553:%%event-394552:%%event-394743:%%event-394744:%%event-393661:%%event-394469:%%event-394470:%%event-393680:%%event-393682:%%event-393681:%%event-393687:%%event-393694:%%event-393685:%%event-393689:%%event-393688:%%event-393695:%%event-393698:%%event-393704:%%event-393705:%%event-393724:%%event-393723:%%event-393725:%%event-393726:%%event-394591:%%event-393736:%%event-393733:%%event-393734:%%event-393740:%%event-393731:%%event-393732:%%event-393730:%%event-394617:%%event-394616:%%event-393737:%%event-378304:%%event-393645:%%event-394619:%%event-393755:%%event-393757:%%event-393760:%%event-393756:%%event-393758:%%event-393759:%%event-393761:%%event-393762:%%event-394481:%%event-394625:%%event-393754:%%event-394483:%%event-393775:%%event-394621:%%event-394622:%%event-376710:%%event-394623:%%event-394484:%%event-394624:%%isOpenExch-1:%%isOpenExch-2:%%isOpenExch-13:%%isOpenExch-3:%%isOpenExch-4:%%isOpenPair-1:%%isOpenPair-8839:%%isOpenPair-44336:%%cmt-1-5-1:%%domain-1:\"}"]'''
ws.send(nego_message)
while True:
print(ws.recv())
but I'm getting:
o
Traceback (most recent call last):
File "test.py", line 647, in <module>
print(ws.recv())
File "C:\Users\me\AppData\Local\Programs\Python\Python37\lib\site-packages\websocket\_core.py", line 313, in recv
opcode, data = self.recv_data()
File "C:\Users\me\AppData\Local\Programs\Python\Python37\lib\site-packages\websocket\_core.py", line 330, in recv_data
opcode, frame = self.recv_data_frame(control_frame)
File "C:\Users\me\AppData\Local\Programs\Python\Python37\lib\site-packages\websocket\_core.py", line 343, in recv_data_frame
frame = self.recv_frame()
File "C:\Users\me\AppData\Local\Programs\Python\Python37\lib\site-packages\websocket\_core.py", line 377, in recv_frame
return self.frame_buffer.recv_frame()
File "C:\Users\me\AppData\Local\Programs\Python\Python37\lib\site-packages\websocket\_abnf.py", line 361, in recv_frame
self.recv_header()
File "C:\Users\me\AppData\Local\Programs\Python\Python37\lib\site-packages\websocket\_abnf.py", line 309, in recv_header
header = self.recv_strict(2)
File "C:\Users\me\AppData\Local\Programs\Python\Python37\lib\site-packages\websocket\_abnf.py", line 396, in recv_strict
bytes_ = self.recv(min(16384, shortage))
File "C:\Users\me\AppData\Local\Programs\Python\Python37\lib\site-packages\websocket\_core.py", line 452, in _recv
return recv(self.sock, bufsize)
File "C:\Users\me\AppData\Local\Programs\Python\Python37\lib\site-packages\websocket\_socket.py", line 115, in recv
"Connection is already closed.")
websocket._exceptions.WebSocketConnectionClosedException: Connection is already closed.
[Finished in 1.9s]
What am I missing here?
Update 1: updating code using WebSocketApp:
def on_message(ws, message):
print("message:", message)
def on_error(ws, error):
print("error:", error)
def on_close(ws):
print("closed.")
def on_open(ws):
print("opened")
time.sleep(1)
ws.send(nego_message)
ws = websocket.WebSocketApp(
"wss://stream179.forexpros.com/echo/894/l27e2ja8/websocket",
on_open = on_open,
on_message = on_message,
on_error = on_error,
on_close = on_close,
header = headers
)
websocket.enableTrace(True)
ws.run_forever()
but still no success:
--- request header ---
GET /echo/894/l27e2ja8/websocket HTTP/1.1
Upgrade: websocket
Connection: Upgrade
Host: stream179.forexpros.com
Origin: http://stream179.forexpros.com
Accept-Encoding: gzip, deflate, br
Accept-Language: en-US,en;q=0.9,fa;q=0.8
Cache-Control: no-cache
Connection: Upgrade
Host: stream179.forexpros.com
Origin: https://www.investing.com
Pragma: no-cache
Sec-WebSocket-Extensions: client_max_window_bits
Sec-WebSocket-Key: ldcvnZNquzPkSNvpSdI09g==
Sec-WebSocket-Version: 13
Upgrade: websocket
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36
-----------------------
--- response header ---
HTTP/1.1 101 Switching Protocols
Upgrade: websocket
Connection: Upgrade
Sec-WebSocket-Accept: XPKKpUMZLpSYx/1z8Q0499hcobs=
-----------------------
opened
send: b'\x81\xfe\x06{_\xda7\xd2\x04\xf8L\xf0\x00\xbfA\xb71\xae\x15\xe8}\xb8B\xbe4\xf7D\xa7=\xa9T\xa06\xb8R\xf0s\xf8C\xa8\x16\x9e\x15\xe8g\xf6\x15\xbf:\xa9D\xb38\xbf\x15\xe8}\xaa^\xb6r\xeb\r\xf7z\xaa^\xb6r\xe2\x0f\xe1f\xe0\x12\xf7/\xb3S\xffn\xec\x01\xe8z\xffG\xbb;\xf7\x05\xe2e\xff\x12\xa26\xbe\x1a\xe3i\xe3\r\xf7z\xaa^\xb6r\xeb\x00\xe2e\xff\x12\xa26\xbe\x1a\xe6k\xe9\x04\xe4e\xff\x12\xa26\xbe\x1a\xe0h\xe0\x12\xf7/\xb3S\xffn\xed\x05\xe8z\xffG\xbb;\xf7\x05\xe8z\xffG\xbb;\xf7\x04\xe8z\xffG\xbb;\xf7\x02\xe8z\xffG\xbb;\xf7\x00\xe8z\xffG\xbb;\xf7\x0e\xe8z\xffG\xbb;\xf7\x06\xe2e\xff\x12\xa26\xbe\x1a\xebk\xef\x01\xe0f\xe0\x12\xf7/\xb3S\xffn\xeb\r\xf7z\xaa^\xb6r\xeb\x01\xe8z\xffG\xbb;\xf7\x01\xeae\xff\x12\xa26\xbec\xb7<\xb2d\xa72\xb7\x1a\xe3e\xff\x12\xa26\xbec\xb7<\xb2d\xa72\xb7\x1a\xe0e\xff\x12\xa26\xbec\xb7<\xb2d\xa72\xb7\x1a\xe1e\xff\x12\xa26\xbec\xb7<\xb2d\xa72\xb7\x1a\xe7e\xff\x12\xa26\xbec\xb7<\xb2d\xa72\xb7\x1a\xe5e\xff\x12\xa26\xbec\xb7<\xb2d\xa72\xb7\x1a\xebe\xff\x12\xa26\xbec\xb7<\xb2d\xa72\xb7\x1a\xe3o\xe0\x12\xf7/\xb3S\x97\'\xae\x1a\xe3e\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe1i\xe9\x03\xe8z\xffR\xa4:\xb4C\xffl\xe3\x04\xe4l\xe9\r\xf7z\xbfA\xb71\xae\x1a\xe1f\xe9\x01\xe1i\xe0\x12\xf7:\xacR\xbc+\xf7\x04\xebl\xec\x04\xeae\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe6k\xed\x0e\xe8z\xffR\xa4:\xb4C\xffl\xe3\x03\xe7n\xe2\r\xf7z\xbfA\xb71\xae\x1a\xe1f\xee\x02\xe3k\xe0\x12\xf7:\xacR\xbc+\xf7\x04\xebk\xef\x06\xe4e\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe6j\xeb\x02\xe8z\xffR\xa4:\xb4C\xffl\xe3\x03\xe7n\xed\r\xf7z\xbfA\xb71\xae\x1a\xe1f\xe9\x01\xe7k\xe0\x12\xf7:\xacR\xbc+\xf7\x04\xebk\xee\x01\xe5e\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe1i\xef\x04\xe8z\xffR\xa4:\xb4C\xffl\xe3\x03\xe6i\xe2\r\xf7z\xbfA\xb71\xae\x1a\xe1f\xee\x02\xe6j\xe0\x12\xf7:\xacR\xbc+\xf7\x04\xebk\xef\x03\xebe\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe6j\xee\x0f\xe8z\xffR\xa4:\xb4C\xffl\xe3\x03\xe7k\xed\r\xf7z\xbfA\xb71\xae\x1a\xe1f\xee\x02\xe7o\xe0\x12\xf7:\xacR\xbc+\xf7\x04\xebk\xef\x03\xe4e\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe6j\xef\x06\xe8z\xffR\xa4:\xb4C\xffl\xe3\x03\xe7j\xe9\r\xf7z\xbfA\xb71\xae\x1a\xe1f\xee\x02\xe7m\xe0\x12\xf7:\xacR\xbc+\xf7\x04\xebk\xed\x03\xe1e\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe6h\xee\x03\xe8z\xffR\xa4:\xb4C\xffl\xe3\x04\xe4i\xeb\r\xf7z\xbfA\xb71\xae\x1a\xe1f\xee\x03\xe4f\xe0\x12\xf7:\xacR\xbc+\xf7\x04\xebk\xee\x00\xe2e\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe1i\xe2\x07\xe8z\xffR\xa4:\xb4C\xffl\xe3\x04\xe4g\xe8\r\xf7z\xbfA\xb71\xae\x1a\xe1f\xe9\x01\xean\xe0\x12\xf7:\xacR\xbc+\xf7\x04\xebl\xec\x0f\xe5e\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe1i\xe3\x03\xe8z\xffR\xa4:\xb4C\xffl\xe3\x04\xe4g\xef\r\xf7z\xbfA\xb71\xae\x1a\xe1f\xe9\x01\xeaf\xe0\x12\xf7:\xacR\xbc+\xf7\x04\xebl\xec\x0f\xeae\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe1i\xe3\x02\xe8z\xffR\xa4:\xb4C\xffl\xe3\x04\xe4f\xe2\r\xf7z\xbfA\xb71\xae\x1a\xe1f\xe9\x00\xe2k\xe0\x12\xf7:\xacR\xbc+\xf7\x04\xebl\xed\x07\xe7e\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe1h\xe8\x03\xe8z\xffR\xa4:\xb4C\xffl\xe3\x04\xe5m\xe9\r\xf7z\xbfA\xb71\xae\x1a\xe1f\xe9\x00\xe0j\xe0\x12\xf7:\xacR\xbc+\xf7\x04\xebl\xed\x05\xe4e\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe6j\xe3\x06\xe8z\xffR\xa4:\xb4C\xffl\xe3\x04\xe5l\xec\r\xf7z\xbfA\xb71\xae\x1a\xe1f\xe9\x00\xe1l\xe0\x12\xf7:\xacR\xbc+\xf7\x04\xebl\xed\x04\xe6e\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe1h\xee\x07\xe8z\xffR\xa4:\xb4C\xffl\xe3\x04\xe5l\xeb\r\xf7z\xbfA\xb71\xae\x1a\xe1f\xe9\x00\xe1m\xe0\x12\xf7:\xacR\xbc+\xf7\x04\xebl\xed\x04\xe2e\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe6i\xeb\x00\xe8z\xffR\xa4:\xb4C\xffl\xe3\x03\xe4n\xec\r\xf7z\xbfA\xb71\xae\x1a\xe1f\xe9\x00\xe1h\xe0\x12\xf7:\xacR\xbc+\xf7\x04\xe5g\xe9\x07\xe6e\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe1i\xee\x02\xe8z\xffR\xa4:\xb4C\xffl\xe3\x03\xe4n\xe3\r\xf7z\xbfA\xb71\xae\x1a\xe1f\xe9\x00\xe7j\xe0\x12\xf7:\xacR\xbc+\xf7\x04\xebl\xed\x02\xe5e\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe1h\xec\x07\xe8z\xffR\xa4:\xb4C\xffl\xe3\x04\xe5j\xec\r\xf7z\xbfA\xb71\xae\x1a\xe1f\xe9\x00\xe7g\xe0\x12\xf7:\xacR\xbc+\xf7\x04\xebl\xed\x02\xebe\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe1h\xec\x06\xe8z\xffR\xa4:\xb4C\xffl\xe3\x04\xe5i\xe8\r\xf7z\xbfA\xb71\xae\x1a\xe1f\xee\x03\xean\xe0\x12\xf7:\xacR\xbc+\xf7\x04\xebk\xec\x05\xe7e\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe1h\xef\x03\xe8z\xffR\xa4:\xb4C\xffl\xe3\x03\xe6g\xe9\r\xf7z\xbfA\xb71\xae\x1a\xe1f\xe9\x00\xe5j\xe0\x12\xf7:\xacR\xbc+\xf7\x04\xebk\xec\x05\xe3e\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe6i\xe8\x05\xe8z\xffR\xa4:\xb4C\xffl\xed\x01\xe5n\xea\r\xf7z\xbfA\xb71\xae\x1a\xe1f\xee\x01\xe0l\xe0\x12\xf7:\xacR\xbc+\xf7\x04\xebk\xee\x0f\xe6e\xff\x12\xb7)\xbfY\xa6r\xe9\x0e\xe6i\xe8\x03\xe8z\xff^\xa1\x10\xaaR\xbc\x1a\xa2T\xbar\xeb\r\xf7z\xb3D\x9d/\xbfY\x97\'\xb9_\xffm\xe0\x12\xf76\xa9x\xa2:\xb4r\xaa<\xb2\x1a\xe3l\xe0\x12\xf76\xa9x\xa2:\xb4r\xaa<\xb2\x1a\xe1e\xff\x12\xbb,\x95G\xb71\x9fO\xb17\xf7\x03\xe8z\xff^\xa1\x10\xaaR\xbc\x0f\xbb^\xa0r\xeb\r\xf7z\xb3D\x9d/\xbfY\x82>\xb3E\xffg\xe2\x04\xebe\xff\x12\xbb,\x95G\xb71\x8aV\xbb-\xf7\x03\xe6l\xe9\x01\xe8z\xffT\xbf+\xf7\x06\xffj\xf7\x06\xe8z\xffS\xbd2\xbb^\xbcr\xeb\r\xf0"\xf8j'
message: o
send: b'\x88\x82!\xdd\x07\xcf"5'
closed.
[Finished in 2.3s]
I tried to remove all the dashes from the message sent and eventually it worked.
nego_message = '{"_event":"bulk-subscribe","tzID":8,"message":"pid-0:%%isOpenExch-1:%%pid-8849:%%isOpenExch-1004:%%pid-8833:%%pid-8862:%%pid-8830:%%pid-8836:%%pid-8831:%%pid-8916:%%pid-8832:%%pid-169:%%pid-20:%%isOpenExch-2:%%pid-166:%%pid-172:%%isOpenExch-4:%%pid-27:%%isOpenExch-3:%%pid-167:%%isOpenExch-9:%%pid-178:%%isOpenExch-20:%%pid-6408:%%pid-6369:%%pid-13994:%%pid-6435:%%pid-13063:%%pid-26490:%%pid-243:%%pid-1:%%isOpenExch-1002:%%pid-2:%%pid-3:%%pid-5:%%pid-7:%%pid-9:%%pid-10:%%pid-23705:%%pid-23706:%%pid-23703:%%pid-23698:%%pid-8880:%%isOpenExch-118:%%pid-8895:%%pid-1141794:%%pid-1175152:%%isOpenExch-152:%%pid-1175153:%%pid-14958:%%pid-44336:%%isOpenExch-97:%%pid-8827:%%pid-6497:%%pid-941155:%%pid-104395:%%pid-1013048:%%pid-1055979:%%pid-1177973:%%pid-1142416:%%pidExt-1:%%cmt-1-5-1:%%pid-252:%%pid-1031244:%%isOpenExch-125:"}'
ws.send(nego_message)
while True:
print(ws.recv())
Outputs:
a["{\"message\":\"pid-3::{\\\"pid\\\":\\\"3\\\",\\\"last_dir\\\":\\\"greenBg\\\",\\\"last_numeric\\\":149.19,\\\"last\\\":\\\"149.19\\\",\\\"bid\\\":\\\"149.18\\\",\\\"ask\\\":\\\"149.19\\\",\\\"high\\\":\\\"149.29\\\",\\\"low\\\":\\\"149.12\\\",\\\"last_close\\\":\\\"149.26\\\",\\\"pc\\\":\\\"-0.07\\\",\\\"pcp\\\":\\\"-0.05%\\\",\\\"pc_col\\\":\\\"redFont\\\",\\\"turnover\\\":\\\"18.13K\\\",\\\"turnover_numeric\\\":\\\"18126\\\",\\\"time\\\":\\\"0:39:09\\\",\\\"timestamp\\\":1666139948}\"}"]
The while loop is calling ws.recv() twice. If you simply do:
print(ws.recv())
It will not attempt to call .recv() on a closed connection. The result of your message output is printing o before the stack trace.
As an aside, it seems like you might want a longer running connection using websocket.WebSocketApp (example) for a scrape.
I have a large number of short URLs and I want to expand them. I found somewhere online (I missed the source) the following code:
short_url = "t.co/NHBbLlfCaa"
r = requests.get(short_url)
if r.status_code == 200:
print("Actual url:%s" % r.url)
It works perfectly. But I get this error when I ping the same server for many times:
urllib3.exceptions.MaxRetryError:
HTTPConnectionPool(host='www.fatlossadvice.pw', port=80): Max retries
exceeded with url:
/TIPS/KILLED-THAT-TREADMILL-WORKOUT-WORD-TO-TIMMY-GACQUIN.ASP (Caused
by NewConnectionError(': Failed to establish a new connection: [Errno
11004] getaddrinfo failed',))
I tried many solutions like the set here: Max retries exceeded with URL in requests, but nothing worked.
I was thinking about another solution, which is to pass an useragent in the request, and each time I change it randomly (using a large number of useragents):
user_agent_list = [
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:25.0) Gecko/20100101 Firefox/25.0',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0',
'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36',
]
r = requests.get(short_url, headers={'User-Agent': user_agent_list[np.random.randint(0, len(user_agent_list))]})
if r.status_code == 200:
print("Actual url:%s" % r.url)
My problem is that r.url always return the short url instead of the long one (the expanded one).
What am I missing?
You can prevent the error by adding allow_redirects=False to requests.get() method to prevent redirecting to page that doesn't exist (and thus raising the error). You have to examine the header sent by server yourself (replace XXXX by https, remove spaces):
import requests
short_url = ["XXXX t.co /namDL4YHYu",
'XXXX t.co /MjvmV',
'XXXX t.co /JSjtxfaxRJ',
'XXXX t.co /xxGSANSE8K',
'XXXX t.co /ZRhf5gWNQg']
for url in short_url:
r = requests.get(url, allow_redirects=False)
try:
print(url, r.headers['location'])
except KeyError:
print(url, "Page doesn't exist!")
Prints:
XXXX t.co/namDL4YHYu http://gottimechillinaround.tumblr.com/post/133931725110/tip-672
XXXX t.co/MjvmV Page doesn't exist!
XXXX t.co/JSjtxfaxRJ http://www.youtube.com/watch?v=rE693eNyyss
XXXX t.co/xxGSANSE8K http://www.losefattips.pw/Tips/My-stretch-before-and-after-my-workout-is-just-as-important-to-me-as-my-workout.asp
XXXX .co/ZRhf5gWNQg http://www.youtube.com/watch?v=3OK1P9GzDPM
I am trying login to the site pythonanywhere.com
import requests
url='https://www.pythonanywhere.com/login'
s = requests.session()
values = {
'auth-username': 'username',
'auth-password': 'password'}
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
u = s.post(url, data=values, headers=headers)
But I am getting a <Response [403]> , Csrf verification failed. How do I login to that site?
You need to get page first.So you can get the crsftoken and sessionid.And remember to set Referer=https://www.pythonanywhere.com/login/
import requests
url='https://www.pythonanywhere.com/login'
s = requests.session()
s.get(url)
values = {
'auth-username': 'username',
'auth-password': 'password',
"csrfmiddlewaretoken" : s.cookies.get("csrftoken"),
"login_view-current_step" : "auth"
}
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36',
'Referer': 'https://www.pythonanywhere.com/login/'}
u = s.post(url, data=values, headers=headers)
print(u.content)
So I have import urllib2 in Python 3.2, using komodo edit (not sure if that matters)
Using it in these spots
opener = urllib2.build_opener()
...
request = urllib2.Request(src, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0'})
srclen = float(urllib2.urlopen(request).info().get('Content-Length'))
...
request = urllib2.Request(url, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0'})
srclen = float(urllib2.urlopen(request).info().get('Content-Length'))
...
request = urllib2.Request(url, headers=url_headers)
response = urllib2.urlopen(request)
...
request = urllib2.Request(RPTURL + rpt + '.xxx', headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0'})
srclen = float(urllib2.urlopen(request).info().get('Content-Length'))
except urllib2.HTTPError:
I get the error ModuleNotFoundError: No module named 'urllib2'
How can I correct this?