I am not a python guru I am just writing a code to check my api authentication and URL access status. I just want to assure that my api and domain url is accessible to users.
For the above reason I am writing a python script which can check and a cron can send an alert to me.
Here is my Code:
def check(argv):
# I'm going to use argpase.It makes
# command-line args a breeze.
parser = argparse.ArgumentParser()
parser.add_argument('-H', '--hostname', dest='hostname', required=True)
parser.add_argument('-a', '--auth_id', dest='authenticationid')
parser.add_argument('-t', '--auth_token', dest='authenticationtoken')
parser.add_argument('-r', '--dest_url', dest='dest_url',help="""Path to report relative to root, like /v1/ OR /""", required=True)
parser.add_argument("-q", "--quiet", action="store_false", dest="verbose", default=True,
help="don't print status messages to stdout")
args = vars(parser.parse_args())
if args['authenticationid'] and args['authenticationtoken'] and not len(sys.argv) == 7:
authurl = urllib.request.Request('https://{%s}:{%s}#%s%s/%s/' %(args['authenticationid'],args['authenticationtoken'],args['hostname'], args['dest_url'],args['authenticationid']))
return (getAuthResponseCode(authurl))
else:
url = urllib.request.Request("https://%s%s" %(args['hostname'], args['dest_url']))
return(getResponseCode(url))
def getResponseCode(url):
try:
conn = urllib.request.urlopen(url,timeout=10)
code = conn.getcode()
return (status['OK'], code)
except timeout:
return (status['WARNING'], logging.error('socket timed out - URL %s', url))
except urllib.error.URLError as e:
return (status['CRITICAL'], e.reason)
else:
return (status['UNKNOWN'])
def getAuthResponseCode(authurl):
try:
authconn = urllib.request.urlopen(authurl, timeout=10)
authcode = authconn.getcode()
return (status['OK'], authcode)
except timeout:
return (status['WARNING'], logging.error('socket timed out - URL %s'))
except urllib.error.URLError as err:
return (status['CRITICAL'], err.reason)
else:
return (status['UNKNOWN'])
ERROR Message:
G:\Python>python check_http.py -H api.mydomain.com -r /API/Function/ -a 'MAMZMZZGVLMG
FMNTHIYTREETBESSS' -t 'DafniisfnsifnsifsbANBBDSDNBISDExODZlODAwMmZm'
Traceback (most recent call last):
File "C:\Python33\lib\http\client.py", line 770, in _set_hostport
port = int(host[i+1:])
ValueError: invalid literal for int() with base 10: "{'DafniisfnsifnsifsbANBBDSDNBISDExODZlODAw
MmZm'}#api.mydomain.com"
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "check_http.py", line 76, in <module>
print (check(sys.argv[1:]))
File "check_http.py", line 41, in check
return (getAuthResponseCode(authurl))
File "check_http.py", line 61, in getAuthResponseCode
authconn = urllib.request.urlopen(authurl, timeout=10)
File "C:\Python33\lib\urllib\request.py", line 156, in urlopen
return opener.open(url, data, timeout)
File "C:\Python33\lib\urllib\request.py", line 469, in open
response = self._open(req, data)
File "C:\Python33\lib\urllib\request.py", line 487, in _open
'_open', req)
File "C:\Python33\lib\urllib\request.py", line 447, in _call_chain
result = func(*args)
File "C:\Python33\lib\urllib\request.py", line 1283, in https_open
context=self._context, check_hostname=self._check_hostname)
File "C:\Python33\lib\urllib\request.py", line 1219, in do_open
h = http_class(host, timeout=req.timeout, **http_conn_args)
File "C:\Python33\lib\http\client.py", line 1172, in __init__
source_address)
File "C:\Python33\lib\http\client.py", line 749, in __init__
self._set_hostport(host, port)
File "C:\Python33\lib\http\client.py", line 775, in _set_hostport
raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
http.client.InvalidURL: nonnumeric port: '{'DafniisfnsifnsifsbANBBDSDNBISDExODZlODAw
MmZm'}#api.mydomain.com'
I know this is not write my code forum but I am helpless and looking for some help.If some one can tell where exactly I went wrong so that I can fix it.
I am using python3.
You're passing ʹhttps://user:pass#whateverʹ as the url.
Python doesn't understand you're trying to authenticate, and thinks you're passing ʹhttps://domain:port...ʹ
To do basic auth with urllib, you need to use a urllib.request.HTTPBasicAuthHandler
Sorry I'm not posting links and/or example code, but I'm typing this on my phone, and it makes those a pain.
Related
Im currently getting a strange error whenever I try to return a response with the code 200.
Here's the snippet of the code I'm using:
from pathlib import Path
from flask_restful import Resource
from app.models.user import User
from app.models.workspace import Workspace
from app.models.file import File
from app.controllers.filesystem import FileSystem
from flask import request
import os
class Signup(Resource):
def post(self):
if not request.is_json:
return {"msg": "Missing JSON in request"}, 400
email = request.json.get('email', None)
password = request.json.get('password', None)
if not email:
return {"msg": "Missing username parameter"}, 400
if not password:
return {"msg": "Missing password parameter"}, 400
# TODO- Find a more elegant way to do this
try:
user = User.objects.get(email=email)
except User.DoesNotExist:
user = None
if user:
return {"msg": "User already exists"}, 400
new_user = User(email=email, password=password)
new_user.hash_password()
# Create a new workspace called "Microfluidics Examples"
new_workspace = Workspace(name="Microfluidics Examples")
# Step 1 - Go through every file in the examples directory
# Step 2 - Upload these files to s3 and get the file_id
# Step 3 - Create a new file object for each of the file_ids and add it to the workspace
# Go through every file in the examples directory
examples_directory = Path("examples")
for file_name in examples_directory.iterdir():
# Upload the file to s3 and get the file_id
s3_object_id = FileSystem.upload_file(file_name)
# Create a new file object for the file_id and add it to the workspace
new_file = File(file_name=str(file_name.name), s3_path=s3_object_id)
new_file.save()
new_workspace.design_files.append(new_file)
# Save the workspace
new_workspace.save()
# Add the workspace to the user
new_user.workspaces.append(new_workspace)
# Save the user
new_user.save()
# Return success
return {"mes":, "id": str(new_user.id)}, 200
From the error trace, it looks like something originating from the cors package might be interfering with this. Here's the trace:
127.0.0.1 - - [19/Nov/2022 14:18:02] "GET /api/v2/user HTTP/1.1" 500 -
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/flask/app.py", line 2548, in __call__
return self.wsgi_app(environ, start_response)
File "/usr/local/lib/python3.8/dist-packages/flask/app.py", line 2528, in wsgi_app
response = self.handle_exception(e)
File "/usr/local/lib/python3.8/dist-packages/flask_restful/__init__.py", line 271, in error_router
return original_handler(e)
File "/usr/local/lib/python3.8/dist-packages/flask_cors/extension.py", line 165, in wrapped_function
return cors_after_request(app.make_response(f(*args, **kwargs)))
File "/usr/local/lib/python3.8/dist-packages/flask/app.py", line 2525, in wsgi_app
response = self.full_dispatch_request()
File "/usr/local/lib/python3.8/dist-packages/flask/app.py", line 1822, in full_dispatch_request
rv = self.handle_user_exception(e)
File "/usr/local/lib/python3.8/dist-packages/flask_restful/__init__.py", line 271, in error_router
return original_handler(e)
File "/usr/local/lib/python3.8/dist-packages/flask_cors/extension.py", line 165, in wrapped_function
return cors_after_request(app.make_response(f(*args, **kwargs)))
File "/usr/local/lib/python3.8/dist-packages/flask/app.py", line 1820, in full_dispatch_request
rv = self.dispatch_request()
File "/usr/local/lib/python3.8/dist-packages/flask/app.py", line 1796, in dispatch_request
return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)
File "/usr/local/lib/python3.8/dist-packages/flask_restful/__init__.py", line 471, in wrapper
return self.make_response(data, code, headers=headers)
File "/usr/local/lib/python3.8/dist-packages/flask_restful/__init__.py", line 500, in make_response
resp = self.representations[mediatype](data, *args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/flask_restful/representations/json.py", line 21, in output_json
dumped = dumps(data, **settings) + "\n"
File "/usr/lib/python3.8/json/__init__.py", line 234, in dumps
return cls(
File "/usr/lib/python3.8/json/encoder.py", line 201, in encode
chunks = list(chunks)
File "/usr/lib/python3.8/json/encoder.py", line 438, in _iterencode
o = _default(o)
File "/usr/lib/python3.8/json/encoder.py", line 179, in default
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type function is not JSON serializable
I can't figure out why this is happening; for instance, if I change the return code to 400, it just works fine. Can someone help me figure out what this error is?
Apologies to the community, I traced the error incorrectly. The error did not originate because of the given code snippet. I will update the question shortly to reflect the source of the error trace correctly and post the solution
I'm running through a list of locations and trying to find places along my route. This is my first attempt at threading, so any tips would be appreciated! When i run this it'll work fine for the first few iterations, but then i start getting a KeyError and the API response says route is not found (even though it should be). If I search along a shorter route, everything runs fine. When I extend the route past a couple of hours of drive time I start getting these errors. Is it possible that I'm overloading it or does my code look off?
import pandas as pd
from threading import Thread
import threading
import requests
start_input = input("start: ")
end_input = input("end: ")
out_way = input("out of the way: ")
out_way_secs = int(out_way) * 60
thread_local = threading.local()
def get_session():
if not getattr(thread_local, "session", None):
thread_local.session = requests.Session()
return thread_local.session
def get_routes(url, start, end, waypoint, idx):
session = get_session()
with session.get(url, params={'origins': f'{start}|{waypoint}', 'destinations': f'{start}|{end}',
'key': '# key'}) as response:
route = response.json()
if route['rows'][1]['elements'][0]['status'] != 'OK':
results[idx] = {'# info'}
else:
nonstop_route = route['rows'][0]['elements'][1]['duration']['value']
leg1 = route['rows'][1]['elements'][0]['duration']['value']
leg2 = route['rows'][1]['elements'][1]['duration']['value']
time_added = (leg1 + leg2) - nonstop_route
time_added_mins = str(datetime.timedelta(seconds=(leg1 + leg2) - nonstop_route))
more_time = time_added_mins.split(':')
added_time_str = str(f'{more_time[0]}:{more_time[1]}:{more_time[2]} away!')
if time_added < allowable_time:
results[idx] = {# info to return}
return results[idx]
if __name__ == "__main__":
start_time = time.time()
output_df = pd.DataFrame(columns=['Location', 'Added Time', 'Notes'])
threads = [None] * coords[0]
results = [None] * coords[0]
for i in range(len(threads)):
threads[i] = Thread(target=get_routes, args=('https://maps.googleapis.com/maps/api/distancematrix/json',
start_input, end_input, stops[i], i))
threads[i].start()
for i in range(len(threads)):
threads[i].join()
for x in range(len(results)):
output_df = output_df.append(results[x], ignore_index=True)
output_df = output_df.sort_values(['Added Time'], ascending=True)
output_df.to_csv('output.csv', index=False)
there are 3 errors that it will get, this first one pops up by itself and the last 2 will come together. The code is the same when I run it, so not sure why i'm getting different errors.
This is the most common error that comes by itself (the routing duration works fine when run individually):
Exception in thread Thread-171:
Traceback (most recent call last):
File "C:\Python37-32\lib\threading.py", line 917, in _bootstrap_inner
self.run()
File "C:\Python37-32\lib\threading.py", line 865, in run
self._target(*self._args, **self._kwargs)
File "C:program.py", line 46, in get_routes
nonstop_route = route['rows'][0]['elements'][1]['duration']['value']
KeyError: 'duration'
The two below I get together and are less common:
Exception in thread Thread-436:
Traceback (most recent call last):
File "C:\Python37-32\lib\threading.py", line 917, in _bootstrap_inner
self.run()
File "C:\Python37-32\lib\threading.py", line 865, in run
self._target(*self._args, **self._kwargs)
File "C:/program.py", line 40, in get_routes
route = response.json()
File "C:\requests\models.py", line 897, in json
return complexjson.loads(self.text, **kwargs)
File "C:\Python37-32\lib\json\__init__.py", line 348, in loads
return _default_decoder.decode(s)
File "C:\Python37-32\lib\json\decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Python37-32\lib\json\decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
second error:
Exception in thread Thread-196:
Traceback (most recent call last):
File "C:\site-packages\urllib3\response.py", line 360, in _error_catcher
yield
File "C:\urllib3\response.py", line 442, in read
data = self._fp.read(amt)
File "C:\Python37-32\lib\http\client.py", line 447, in read
n = self.readinto(b)
File "C:\Python37-32\lib\http\client.py", line 491, in readinto
n = self.fp.readinto(b)
File "C:\Python37-32\lib\socket.py", line 589, in readinto
return self._sock.recv_into(b)
File "C:\Python37-32\lib\ssl.py", line 1052, in recv_into
return self.read(nbytes, buffer)
File "C:\Python37-32\lib\ssl.py", line 911, in read
return self._sslobj.read(len, buffer)
ConnectionAbortedError: [WinError 10053] An established connection was aborted by the software in your host machine
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\site-packages\requests\models.py", line 750, in generate
for chunk in self.raw.stream(chunk_size, decode_content=True):
File "C:\site-packages\urllib3\response.py", line 494, in stream
data = self.read(amt=amt, decode_content=decode_content)
File "C:\site-packages\urllib3\response.py", line 459, in read
raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
File "C:\Python37-32\lib\contextlib.py", line 130, in __exit__
self.gen.throw(type, value, traceback)
File "C:\site-packages\urllib3\response.py", line 378, in _error_catcher
raise ProtocolError('Connection broken: %r' % e, e)
urllib3.exceptions.ProtocolError: ("Connection broken: ConnectionAbortedError(10053, 'An established connection was aborted by the software in your host machine', None, 10053, None)", ConnectionAbortedError(10053, 'An established connection was aborted by the software in your host machine', None, 10053, None))
Note: I spend more than one hour trying to solve this issue and found no solution that worked for me.
At the end it turned out to be a very simple mistake, but I thought I will create the question so in case anybody else has the same issue can find a solution fast.
Problem
I was trying to scrape a site with the following code:
phantomjs_path = '/Users/xxx/xxx/phantomjs-2.1.1-macosx/bin/phantomjs'
driver = webdriver.PhantomJS(executable_path=phantomjs_path)
driver.set_window_size(1024, 768) #optional
driver.get(url)
# wait
element = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.CLASS_NAME, "flightrow")))
response = driver.find_element_by_css_selector('table[class="flighttable"]')
driver.quit()
html = response.get_attribute('outerHTML') #pass from webdrive object to string
And was getting the following error:
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 1254, in do_open
h.request(req.get_method(), req.selector, req.data, headers)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/http/client.py", line 1106, in request
self._send_request(method, url, body, headers)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/http/client.py", line 1151, in _send_request
self.endheaders(body)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/http/client.py", line 1102, in endheaders
self._send_output(message_body)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/http/client.py", line 934, in _send_output
self.send(msg)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/http/client.py", line 877, in send
self.connect()
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/http/client.py", line 849, in connect
(self.host,self.port), self.timeout, self.source_address)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/socket.py", line 711, in create_connection
raise err
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/socket.py", line 702, in create_connection
sock.connect(sa)
ConnectionRefusedError: [Errno 61] Connection refused
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "my_script.py", line 1251, in <module>
MyObject.script_main()
File "my_script.py", line 1232, in script_main
self.parse_js(url)
File "my_script.py", line 1202, in parse_js
print('response:', response.text)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/selenium/webdriver/remote/webelement.py", line 68, in text
return self._execute(Command.GET_ELEMENT_TEXT)['value']
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/selenium/webdriver/remote/webelement.py", line 461, in _execute
return self._parent.execute(command, params)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/selenium/webdriver/remote/webdriver.py", line 234, in execute
response = self.command_executor.execute(driver_command, params)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/selenium/webdriver/remote/remote_connection.py", line 401, in execute
return self._request(command_info[0], url, body=data)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/selenium/webdriver/remote/remote_connection.py", line 471, in _request
resp = opener.open(request, timeout=self._timeout)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 466, in open
response = self._open(req, data)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 484, in _open
'_open', req)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 444, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 1282, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 1256, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 61] Connection refused>
Loading the url manually in the Chrome browser was working.
Anyway, I tried switching the url from https to http, but I still got the same error.
In addition, during the previous day I did not get any error, so I assumed it could not be a problem with firewalls, as I read in some other questions.
See answer for the solution...
It turned out that apparently I had moved the line driver.quit() upwards, so the error was raised when calling 'get_atribute'.
Solution
Just move driver.quit() downwards:
driver = webdriver.PhantomJS(executable_path=phantomjs_path)
driver.set_window_size(1024, 768) #optional
driver.get(url)
# wait
element = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.CLASS_NAME, "flightrow")))
response = driver.find_element_by_css_selector('table[class="flighttable"]')
html = response.get_attribute('outerHTML') #pass from webdrive object to string
#do not move quite() upwards! even if 'driver' is not specifically called with the command 'get_attribute'
#it will raise an error if driver is closed.
driver.quit()
Hello everyone I am a beginner programmer in language Python and I need help.
this is my code in Python, it gives an error, please help to fix
urllib.error.URLError: urlopen error [Errno 11001] getaddrinfo failed
Python:
# -*- coding: utf-8 -*-
import urllib.request
from lxml.html import parse
WEBSITE = 'http://allrecipes.com'
URL_PAGE = 'http://allrecipes.com/recipes/110/appetizers-and-snacks/deviled-eggs/?page='
START_PAGE = 1
END_PAGE = 5
def correct_str(s):
return s.encode('utf-8').decode('ascii', 'ignore').strip()
for i in range(START_PAGE, END_PAGE+1):
URL = URL_PAGE + str(i)
HTML = urllib.request.urlopen(URL)
page = parse(HTML).getroot()
for elem in page.xpath('//*[#id="grid"]/article[not(contains(#class, "video-card"))]/a[1]'):
href = WEBSITE + elem.get('href')
title = correct_str(elem.find('h3').text)
recipe_page = parse(urllib.request.urlopen(href)).getroot()
print(correct_str(href))
photo_url = recipe_page.xpath('//img[#class="rec-photo"]')[0].get('src')
print('\nName: |', title)
print('Photo: |', photo_url)
This into command prompt: python I get this error:
Traceback (most recent call last):
http://allrecipes.com/recipe/236225/crab-stuffed-deviled-eggs/
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 1240, in do_open
h.request(req.get_method(), req.selector, req.data, headers)
Name: | Crab-Stuffed Deviled Eggs
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 1083, in request
Photo: | http://images.media-allrecipes.com/userphotos/720x405/1091564.jpg
self._send_request(method, url, body, headers)
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 1128, in _send_request
self.endheaders(body)
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 1079, in endheaders
self._send_output(message_body)
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 911, in _send_output
self.send(msg)
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 854, in send
self.connect()
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 826, in connect
(self.host,self.port), self.timeout, self.source_address)
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python35-32\lib\socket.py", line 693, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python35-32\lib\socket.py", line 732, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 11001] getaddrinfo failed
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/Ivan/Dropbox/parser/test.py", line 27, in <module>
recipe_page = parse(urllib.request.urlopen(href)).getroot()
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 162, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 465, in open
response = self._open(req, data)
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 483, in _open
'_open', req)
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 443, in _call_chain
result = func(*args)
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 1268, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "C:\Users\Ivan\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 1242, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 11001] getaddrinfo failed>
Process finished with exit code 1
I'll attempt to explain three main ways to dig into a programming problem:
(1) Use a debugger. You could walk through your code and examine variables before they are used and before they throw an exception. Python comes with pdb. In this problem you would step through the code and print out the href before urlopen().
(2) Assertions. Use Python's assert to assert assumptions in your code. You could, for example, assert not href.startswith('http')
(3) Logging. Log relevant variables before they are used. This is what I used:
I added the following to your code...
href = WEBSITE + elem.get('href')
print(href)
And got...
Photo: | http://images.media-allrecipes.com/userphotos/720x405/1091564.jpg
http://allrecipes.comhttp://dish.allrecipes.com/how-to-boil-an-egg/
From here you can see your getaddrinfo problem: Your system is trying to open a url at a host named allrecipes.comhttp.
This looks to be a problem based upon your assumption that WEBSITE must be prepended to every href you pull from the html.
You can handle the case of an absolute vs relative href with something like this and a function to determine if the url is absolute:
import urlparse
def is_absolute(url):
# See https://stackoverflow.com/questions/8357098/how-can-i-check-if-a-url-is-absolute-using-python
return bool(urlparse.urlparse(url).netloc)
href = elem.get('href')
if not is_absolute(href):
href = WEBSITE + href
A better way to do this is to use parse as such:
from urllib import parse
href = parse.urljoin(base_url, href)
This will will return a complete url for the href in case it is not complete.
I tried to write a simple server-client program using webob.
1. The client send data using 'Transfer-Encoding', 'chunked'
2. the received data is then print in the server side.
The Server.py received the data correctly.
However, I got error a bunch of message from webob.
However, anyone kindly tell me what had happened or just
give a simple guide line for write such a simple program(sending chunk) with webob.
thanks!
the codes and error are as below:
server.py
from webob import Request
from webob import Response
class ChunkApp(object):
def __init__(self):
pass
def __call__(self, environ, start_response):
req = Request(environ)
for buf in self.chunkreadable(req.body_file, 65535):
print buf
resp = Response('chunk received')
return resp(environ, start_response)
def chunkreadable(self, iter, chunk_size=65536):
return self.chunkiter(iter, chunk_size) if \
hasattr(iter, 'read') else iter
def chunkiter(self, fp, chunk_size=65536):
while True:
chunk = fp.read(chunk_size)
if chunk:
yield chunk
else:
break
if __name__ == '__main__':
app = ChunkApp()
from wsgiref.simple_server import make_server
httpd = make_server('localhost', 8080, app)
print 'Serving on http://localhost:%s' % '8080'
try:
httpd.serve_forever()
except KeyboardInterrupt:
print '^C'
client.py
from httplib import HTTPConnection
conn = HTTPConnection("localhost:8080")
conn.putrequest('PUT', "/")
body = "1234567890"
conn.putheader('Transfer-Encoding', 'chunked')
conn.endheaders()
for chunk in body:
#print '%x\r\n%s\r\n' % (len(chunk), chunk)
conn.send('%x\r\n%s\r\n' % (len(chunk), chunk))
conn.send('0\r\n\r\n')
Error
Traceback (most recent call last):
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/wsgiref/handlers.py", line 86, in run
self.finish_response()
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/wsgiref/handlers.py", line 127, in finish_response
self.write(data)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/wsgiref/handlers.py", line 210, in write
self.send_headers()
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/wsgiref/handlers.py", line 268, in send_headers
self.send_preamble()
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/wsgiref/handlers.py", line 192, in send_preamble
'Date: %s\r\n' % format_date_time(time.time())
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py", line 324, in write
self.flush()
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py", line 303, in flush
self._sock.sendall(view[write_offset:write_offset+buffer_size])
error: [Errno 32] Broken pipe
----------------------------------------
Exception happened during processing of request from ('127.0.0.1', 52575)
Traceback (most recent call last):
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/SocketServer.py", line 284, in _handle_request_noblock
self.process_request(request, client_address)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/SocketServer.py", line 310, in process_request
self.finish_request(request, client_address)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/SocketServer.py", line 323, in finish_request
self.RequestHandlerClass(request, client_address, self)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/SocketServer.py", line 641, in __init__
self.finish()
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/SocketServer.py", line 694, in finish
self.wfile.flush()
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py", line 303, in flush
self._sock.sendall(view[write_offset:write_offset+buffer_size])
error: [Errno 32] Broken pipe