Using urlib in a loop - python-3.x

I am trying to write a simple python script that checks the status code of a specific URL and take necessary actions based on the return code. I am using urllib module to achieve this. The issue with this code is that I have different departments defined within a list (I need to iterate over this list inside the urllib) I can't seem to find a way to substitute/insert elements of list in the for loop.
import getopt, sys
import urllib.request
depts = [ 'support', 'edelivery', 'docs']
for dept in depts:
res = urllib.request.urlopen('https://dept.oracle.com').getcode()
print(res)
I get the below error. Can we actually iterate over a list in urllib module in a loop?
#python3 reg_c_with_all.py
Traceback (most recent call last):
File "/usr/lib64/python3.7/urllib/request.py", line 1350, in do_open
encode_chunked=req.has_header('Transfer-encoding'))
File "/usr/lib64/python3.7/http/client.py", line 1277, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/usr/lib64/python3.7/http/client.py", line 1323, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/usr/lib64/python3.7/http/client.py", line 1272, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/usr/lib64/python3.7/http/client.py", line 1032, in _send_output
self.send(msg)
File "/usr/lib64/python3.7/http/client.py", line 972, in send
self.connect()
File "/usr/lib64/python3.7/http/client.py", line 1439, in connect
super().connect()
File "/usr/lib64/python3.7/http/client.py", line 944, in connect
(self.host,self.port), self.timeout, self.source_address)
File "/usr/lib64/python3.7/socket.py", line 707, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
File "/usr/lib64/python3.7/socket.py", line 752, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno -2] Name or service not known
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "reg_c_with_all.py", line 6, in <module>
res = urllib.request.urlopen('https://dept.oracle.com').getcode()
File "/usr/lib64/python3.7/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib64/python3.7/urllib/request.py", line 525, in open
response = self._open(req, data)
File "/usr/lib64/python3.7/urllib/request.py", line 543, in _open
'_open', req)
File "/usr/lib64/python3.7/urllib/request.py", line 503, in _call_chain
result = func(*args)
File "/usr/lib64/python3.7/urllib/request.py", line 1393, in https_open
context=self._context, check_hostname=self._check_hostname)
File "/usr/lib64/python3.7/urllib/request.py", line 1352, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno -2] Name or service not known>
When I run it individually it returns the response code as expected.
>>> urllib.request.urlopen('https://support.oracle.com').getcode()
200
>>> urllib.request.urlopen('https://docs.oracle.com').getcode()
200
>>> urllib.request.urlopen('https://edelivery.oracle.com').getcode()
200
>>>

Your code iterates over the departments but it does not use them to modify the URL. Try using an fstring to insert the value of dept in the URL:
import getopt, sys
import urllib.request
depts = [ 'support', 'edelivery', 'docs']
for dept in depts:
url = f'https://{dept}.oracle.com'
res = urllib.request.urlopen(url).getcode()
print(f'{url} => {res}')
Output:
https://support.oracle.com => 200
https://edelivery.oracle.com => 200
https://docs.oracle.com => 200

Related

MinIO | Failed to establish a new connection: [Errno -2] Name or service not known

Goal: download all files within a folder, in a MinIO bucket.
Since I want to dynamically download all files in a folder, I use:
objs = list(client.list_objects(bucket, recursive=True)).sort()
That way, I can iterate over each file name for download using:
for o in objs:
obj = client.get_object(bucket, o)
Downloading a specified file also fails:
obj = client.get_object(bucket, 'industry/gri/esg/ESG_COMP.csv')
Code:
from sdg.datasource.MinioConn import MinioConn
client = MinioConn().client()
bucket = 'synthetic-data-gen'
obj = client.get_object(bucket, 'industry/gri/esg/ESG_COMP.csv')
print('###')
print(obj)
objs = list(client.list_objects(bucket, recursive=True)).sort()
dfs = []
for o in objs:
obj = client.get_object(bucket, o)
dfs.append(pd.read_csv(obj).iloc[:, 0].tolist()) # 1 column csvs
MinioConn:
from minio import Minio
class MinioConn:
def __init__(self,
host='foo.bar.foo.bar.com:9000',
access_key='CENSORED', secret_key='CENSORED',
secure=False):
self.host = host
self.access_key = access_key
self.secret_key = secret_key
self.secure = secure
def client(self):
return Minio(self.host, self.access_key, self.secret_key,
secure=self.secure)
Traceback:
Traceback (most recent call last):
File "/home/me/miniconda3/envs/sdg/lib/python3.8/site-packages/urllib3/connection.py", line 174, in _new_conn
conn = connection.create_connection(
File "/home/me/miniconda3/envs/sdg/lib/python3.8/site-packages/urllib3/util/connection.py", line 73, in create_connection
for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
File "/home/me/miniconda3/envs/sdg/lib/python3.8/socket.py", line 918, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno -2] Name or service not known
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/me/miniconda3/envs/sdg/lib/python3.8/site-packages/urllib3/connectionpool.py", line 699, in urlopen
httplib_response = self._make_request(
File "/home/me/miniconda3/envs/sdg/lib/python3.8/site-packages/urllib3/connectionpool.py", line 394, in _make_request
conn.request(method, url, **httplib_request_kw)
File "/home/me/miniconda3/envs/sdg/lib/python3.8/site-packages/urllib3/connection.py", line 239, in request
super(HTTPConnection, self).request(method, url, body=body, headers=headers)
File "/home/me/miniconda3/envs/sdg/lib/python3.8/http/client.py", line 1255, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/home/me/miniconda3/envs/sdg/lib/python3.8/http/client.py", line 1301, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/home/me/miniconda3/envs/sdg/lib/python3.8/http/client.py", line 1250, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/home/me/miniconda3/envs/sdg/lib/python3.8/http/client.py", line 1010, in _send_output
self.send(msg)
File "/home/me/miniconda3/envs/sdg/lib/python3.8/http/client.py", line 950, in send
self.connect()
File "/home/me/miniconda3/envs/sdg/lib/python3.8/site-packages/urllib3/connection.py", line 205, in connect
conn = self._new_conn()
File "/home/me/miniconda3/envs/sdg/lib/python3.8/site-packages/urllib3/connection.py", line 186, in _new_conn
raise NewConnectionError(
urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPConnection object at 0x7fc6f4ee17f0>: Failed to establish a new connection: [Errno -2] Name or service not known
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/me/miniconda3/envs/sdg/lib/python3.8/runpy.py", line 185, in _run_module_as_main
mod_name, mod_spec, code = _get_module_details(mod_name, _Error)
File "/home/me/miniconda3/envs/sdg/lib/python3.8/runpy.py", line 144, in _get_module_details
return _get_module_details(pkg_main_name, error)
File "/home/me/miniconda3/envs/sdg/lib/python3.8/runpy.py", line 111, in _get_module_details
__import__(pkg_name)
File "/mnt/c/Users/me/Documents/GitHub/repo/project/foo/__init__.py", line 5, in <module>
from sdg.sdg import generate_data
File "/mnt/c/Users/me/Documents/GitHub/repo/project/foo/sdg.py", line 10, in <module>
from sdg.industry.gri.generator import Generator as GRIGenerator
File "/mnt/c/Users/me/Documents/GitHub/repo/project/foo/industry/gri/generator.py", line 40, in <module>
class Generator:
File "/mnt/c/Users/me/Documents/GitHub/repo/project/foo/industry/gri/generator.py", line 346, in Generator
dfs = get_esgs()
File "/mnt/c/Users/me/Documents/GitHub/repo/project/foo/industry/gri/generator.py", line 61, in get_esgs
obj = client.get_object(bucket, 'industry/gri/esg/ESG_COMP.csv')
File "/home/me/miniconda3/envs/sdg/lib/python3.8/site-packages/minio/api.py", line 1151, in get_object
return self._execute(
File "/home/me/miniconda3/envs/sdg/lib/python3.8/site-packages/minio/api.py", line 394, in _execute
region = self._get_region(bucket_name, None)
File "/home/me/miniconda3/envs/sdg/lib/python3.8/site-packages/minio/api.py", line 461, in _get_region
response = self._url_open(
File "/home/me/miniconda3/envs/sdg/lib/python3.8/site-packages/minio/api.py", line 266, in _url_open
response = self._http.urlopen(
File "/home/me/miniconda3/envs/sdg/lib/python3.8/site-packages/urllib3/poolmanager.py", line 375, in urlopen
response = conn.urlopen(method, u.request_uri, **kw)
File "/home/me/miniconda3/envs/sdg/lib/python3.8/site-packages/urllib3/connectionpool.py", line 783, in urlopen
return self.urlopen(
File "/home/me/miniconda3/envs/sdg/lib/python3.8/site-packages/urllib3/connectionpool.py", line 783, in urlopen
return self.urlopen(
File "/home/me/miniconda3/envs/sdg/lib/python3.8/site-packages/urllib3/connectionpool.py", line 783, in urlopen
return self.urlopen(
[Previous line repeated 2 more times]
File "/home/me/miniconda3/envs/sdg/lib/python3.8/site-packages/urllib3/connectionpool.py", line 755, in urlopen
retries = retries.increment(
File "/home/me/miniconda3/envs/sdg/lib/python3.8/site-packages/urllib3/util/retry.py", line 574, in increment
raise MaxRetryError(_pool, url, error or ResponseError(cause))
urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='foo.bar.foo.bar.com', port=9000): Max retries exceeded with url: /synthetic-data-gen?location= (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fc6f4ee17f0>: Failed to establish a new connection: [Errno -2] Name or service not known'))
The error message:
socket.gaierror: [Errno -2] Name or service not known
implies that DNS resolution on the host name (foo.bar.foo.bar.com) failed. You likely need to check your DNS configuration or fix the host name if it is incorrect.

Python 3.7 - Download Image - Urllib.request.urlretrieve Error

I am really new to programming and currently learning python on Youtube ('The New Boston - Python 3' )
Was trying to download an image from internet based on the code show in the video but an error pop up.
Here is the code :
import random
import urllib.request
def download_web_image(url):
name = random.randrange(1,1000)
full_name = str(name) + '.gif' #str convert number to word
urllib.request.urlretrieve(url, full_name)
download_web_image ('https://images.freeimages.com/images/large-previews/ed3/a-stormy-paradise-1-1563744.jpg')
And the error:
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/urllib/request.py", line 1317, in do_open
encode_chunked=req.has_header('Transfer-encoding'))
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py", line 1229, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py", line 1275, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py", line 1224, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py", line 1016, in _send_output
self.send(msg)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py", line 956, in send
self.connect()
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/http/client.py", line 1392, in connect
server_hostname=server_hostname)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/ssl.py", line 412, in wrap_socket
session=session
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/ssl.py", line 853, in _create
self.do_handshake()
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/ssl.py", line 1117, in do_handshake
self._sslobj.do_handshake()
ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate in certificate chain (_ssl.c:1056)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/isaactai/PycharmProjects/isaacfirst/IMAGEDOWNLOAD.py", line 10, in
download_web_image ('https://images.freeimages.com/images/large-previews/ed3/a-stormy-paradise-1-1563744.jpg')
File "/Users/isaactai/PycharmProjects/isaacfirst/IMAGEDOWNLOAD.py", line 8, in download_web_image
urllib.request.urlretrieve(url, full_name)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/urllib/request.py", line 247, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/urllib/request.py", line 525, in open
response = self._open(req, data)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/urllib/request.py", line 543, in _open
'_open', req)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/urllib/request.py", line 503, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/urllib/request.py", line 1360, in https_open
context=self._context, check_hostname=self._check_hostname)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/urllib/request.py", line 1319, in do_open
raise URLError(err)
urllib.error.URLError:
Process finished with exit code 1
I am using PyCharm 2018.3 version
Please help me, thank you
Go to the folder where Python is installed. It should have a name like Python 3.x with x being whatever version of python you installed. Now double click on 'Install Certificates.command'. Had this error before and someone on stack helped me fix it also.
The path for me was as follows:
C:\Python33\Tools\Scripts
If that doesn't work here is another workaround using the ssl package:
pip install ssl
Do this before running the code. Then add this to your code.
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
Essentially what this does is make your request "secure" so the HTTPS site will actually accept the request coming from python. You should always do this before trying to access a site with the https prefix.

simple python yweather program, basically in it's documentation returns error 8

import yweather
W = yweather.Client()
MyHouse = W.fetch_woeid("T8T 0B2")
MyWeather = W.fetch_weather(MyHouse)
print(MyWeather)
This is my program, it's the same as in the documentation (https://yweather.readthedocs.io/en/v0.1/usage.html) except the variable names and the location (T8T 0B2) this is my location, and therefore is correct but i get this error when i run the program:
...$ python weather.py
Traceback (most recent call last):
File "/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 1318, in do_open
encode_chunked=req.has_header('Transfer-encoding'))
File "/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 1239, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 1285, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 1234, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 1026, in _send_output
self.send(msg)
File "/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 964, in send
self.connect()
File "/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 936, in connect
(self.host,self.port), self.timeout, self.source_address)
File "/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/socket.py", line 704, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
File "/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/socket.py", line 743, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 8] nodename nor servname provided, or not known
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "weather.py", line 6, in <module>
CedricsWeather = W.fetch_weather(CedricsHouse)
File "/usr/local/lib/python3.6/site-packages/yweather.py", line 180, in fetch_weather
rss = self._fetch_xml(url)
File "/usr/local/lib/python3.6/site-packages/yweather.py", line 344, in _fetch_xml
with contextlib.closing(urlopen(url)) as f:
File "/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 526, in open
response = self._open(req, data)
File "/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 544, in _open
'_open', req)
File "/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 504, in _call_chain
result = func(*args)
File "/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 1346, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 1320, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 8] nodename nor servname provided, or not known>
By the way this is running in python 3.6.1 and on the latest MacOs.
I did try to run exactly the same code as the documentation with the same woeid and got the same error.

Selenium - urllib.error.URLError: <urlopen error [Errno 61] Connection refused>

Note: I spend more than one hour trying to solve this issue and found no solution that worked for me.
At the end it turned out to be a very simple mistake, but I thought I will create the question so in case anybody else has the same issue can find a solution fast.
Problem
I was trying to scrape a site with the following code:
phantomjs_path = '/Users/xxx/xxx/phantomjs-2.1.1-macosx/bin/phantomjs'
driver = webdriver.PhantomJS(executable_path=phantomjs_path)
driver.set_window_size(1024, 768) #optional
driver.get(url)
# wait
element = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.CLASS_NAME, "flightrow")))
response = driver.find_element_by_css_selector('table[class="flighttable"]')
driver.quit()
html = response.get_attribute('outerHTML') #pass from webdrive object to string
And was getting the following error:
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 1254, in do_open
h.request(req.get_method(), req.selector, req.data, headers)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/http/client.py", line 1106, in request
self._send_request(method, url, body, headers)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/http/client.py", line 1151, in _send_request
self.endheaders(body)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/http/client.py", line 1102, in endheaders
self._send_output(message_body)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/http/client.py", line 934, in _send_output
self.send(msg)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/http/client.py", line 877, in send
self.connect()
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/http/client.py", line 849, in connect
(self.host,self.port), self.timeout, self.source_address)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/socket.py", line 711, in create_connection
raise err
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/socket.py", line 702, in create_connection
sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "my_script.py", line 1251, in <module>
MyObject.script_main()
File "my_script.py", line 1232, in script_main
self.parse_js(url)
File "my_script.py", line 1202, in parse_js
print('response:', response.text)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/selenium/webdriver/remote/webelement.py", line 68, in text
return self._execute(Command.GET_ELEMENT_TEXT)['value']
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/selenium/webdriver/remote/webelement.py", line 461, in _execute
return self._parent.execute(command, params)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/selenium/webdriver/remote/webdriver.py", line 234, in execute
response = self.command_executor.execute(driver_command, params)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/selenium/webdriver/remote/remote_connection.py", line 401, in execute
return self._request(command_info[0], url, body=data)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/selenium/webdriver/remote/remote_connection.py", line 471, in _request
resp = opener.open(request, timeout=self._timeout)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 466, in open
response = self._open(req, data)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 484, in _open
'_open', req)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 444, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 1282, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 1256, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 61] Connection refused>
Loading the url manually in the Chrome browser was working.
Anyway, I tried switching the url from https to http, but I still got the same error.
In addition, during the previous day I did not get any error, so I assumed it could not be a problem with firewalls, as I read in some other questions.
See answer for the solution...
It turned out that apparently I had moved the line driver.quit() upwards, so the error was raised when calling 'get_atribute'.
Solution
Just move driver.quit() downwards:
driver = webdriver.PhantomJS(executable_path=phantomjs_path)
driver.set_window_size(1024, 768) #optional
driver.get(url)
# wait
element = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.CLASS_NAME, "flightrow")))
response = driver.find_element_by_css_selector('table[class="flighttable"]')
html = response.get_attribute('outerHTML') #pass from webdrive object to string
#do not move quite() upwards! even if 'driver' is not specifically called with the command 'get_attribute'
#it will raise an error if driver is closed.
driver.quit()

Python 3 issue with connecting web from program

Hi I have a program which just connects to web to read the website data. But when i run the program i am getting some complicated error regarding the web connection. Please see the program below,
from urllib.request import urlopen
html = urlopen("http://www.pythonscraping.com/pages/page1.html")
print(html.read())
The error message is shown below,
Traceback (most recent call last):
File "C:\Python34\lib\urllib\request.py", line 1174, in do_open
h.request(req.get_method(), req.selector, req.data, headers)
File "C:\Python34\lib\http\client.py", line 1090, in request
self._send_request(method, url, body, headers)
File "C:\Python34\lib\http\client.py", line 1128, in _send_request
self.endheaders(body)
File "C:\Python34\lib\http\client.py", line 1086, in endheaders
self._send_output(message_body)
File "C:\Python34\lib\http\client.py", line 924, in _send_output
self.send(msg)
File "C:\Python34\lib\http\client.py", line 859, in send
self.connect()
File "C:\Python34\lib\http\client.py", line 836, in connect
self.timeout, self.source_address)
File "C:\Python34\lib\socket.py", line 491, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
File "C:\Python34\lib\socket.py", line 530, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 11002] getaddrinfo failed
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Python34\scrapetest.py", line 2, in <module>
html = urlopen("http://www.pythonscraping.com/pages/page1.html")
File "C:\Python34\lib\urllib\request.py", line 153, in urlopen
return opener.open(url, data, timeout)
File "C:\Python34\lib\urllib\request.py", line 455, in open
response = self._open(req, data)
File "C:\Python34\lib\urllib\request.py", line 473, in _open
'_open', req)
File "C:\Python34\lib\urllib\request.py", line 433, in _call_chain
result = func(*args)
File "C:\Python34\lib\urllib\request.py", line 1202, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "C:\Python34\lib\urllib\request.py", line 1176, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 11002] getaddrinfo failed>
Please help on this.

Resources