KeyError: 'lattitude' - python-3.x

I'm currently trying to use an API to get the data in Buffalo and return it in from a JSON URL and them place it in the format of: longitude, Latitude, and Viodesc.
However, I believe I am reaching difficulties when iterating due to some values not having latitude and longitude thus giving me a KeyError of 'latitude'.
I'm not sure if this is the fault in my code as well as how to go about changing it
import json
from urllib import request
def get_ticket_data(string):
answer = []
urlData = string
webURL = request.urlopen(urlData)
data = webURL.read()
ans = json.loads(data.decode())
for x in ans:
arr = []
arr.append(x["lattitude"])
arr.append(x["longtitude"])
arr.append(x["viodesc"])
return answer.append(ans)

You can catch the Exception 'KeyError' which is raised when the particular key is not found. Handle the exception so that even if the key is missing you can move on to the next record without stopping the code.
Code Snippet:
import json
from urllib import request
def get_ticket_data(string):
answer = []
urlData = string
webURL = request.urlopen(urlData)
data = webURL.read()
ans = json.loads(data.decode())
for x in ans:
try:
arr = []
arr.append(x["lattit**strong text**ude"])
arr.append(x["longtitude"])
arr.append(x["viodesc"])
except KeyError:
continue
return answer.append(ans)
Hope it helps!

Another different attempt would be to check before appending:
import json
from urllib import request
def get_ticket_data(string):
answer = []
urlData = string
webURL = request.urlopen(urlData)
data = webURL.read()
ans = json.loads(data.decode())
for x in ans:
arr = []
arr.append(x["lattit**strong text**ude"]) if x["lattit**strong text**ude""] else pass
arr.append(x["longtitude"]) if x["longitude"] else pass
arr.append(x["viodesc"]) if x["viodesc"] else pass
return answer.append(ans)
Using the inline-if will let you append if the value exists other wise it would not append.
It will all depend on how you will treat the information latter. Another approach would be to fill it with "" in case there is no latitude. For this approach you could do:
import json
from urllib import request
def get_ticket_data(string):
answer = []
urlData = string
webURL = request.urlopen(urlData)
data = webURL.read()
ans = json.loads(data.decode())
for x in ans:
arr = []
arr.append(x["lattit**strong text**ude"]) if x["lattit**strong text**ude""] else ""
arr.append(x["longtitude"]) if x["longitude"] else ""
arr.append(x["viodesc"]) if x["viodesc"] else ""
return answer.append(ans)

Related

adding "na" text to an array within a loop

I've gotten all the data I wanted from scraping this metacritc url (see below) however, I can't seem to put a value in for when I don't find the associated value for list (missing values)
I would like to have it so all the lists are even (so I can right to .csv)
Here is the code I have so far:
from requests import get
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
import pandas as pd
#Define year
year_number = 2018
# Define the URL
i = range(0, 1)
names = []
metascores = []
userscores = []
userscoresNew = []
release_dates = []
release_datesNew = []
publishers = []
ratings = []
genres = []
genresNew = []
for element in i:
url = "http://www.metacritic.com/browse/games/score/metascore/year/pc/filtered?view=detailed&sort=desc&year_selected=" + format(year_number)
print(url)
year_number -= 1
# not sure about this but it works (I was getting blocked by something and this the way I found around it)
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
web_byte = urlopen(req).read()
webpage = web_byte.decode('utf-8')
#this grabs the all the text from the page
html_soup = BeautifulSoup(webpage, 'html5lib')
#this is for selecting all the games in from 1 to 100 (the list of them)
game_names = html_soup.find_all("div", class_="main_stats")
game_metas = html_soup.find_all("a", class_="basic_stat product_score")
game_users = html_soup.find_all("li", class_='stat product_avguserscore')
game_releases = html_soup.find_all("ul", class_='more_stats')
game_publishers = html_soup.find_all("li", class_='stat publisher')
game_ratings = html_soup.find_all("li", class_='stat maturity_rating')
game_genres = html_soup.find_all("li", class_='stat genre')
#Extract data from each game
for games in game_names:
name = games.find()
names.append(name.text.strip())
for games2 in game_metas:
metascore = games2.find()
metascores.append(metascore.text.strip())
for games3 in game_releases:
release_date = games3.find()
release_dates.append(release_date.text.strip())
for games4 in game_users:
userscore = games4.find('span', class_="data textscore textscore_favorable") or games4.find('span', class_="data textscore textscore_mixed")
if userscore:
userscores.append(userscore.text)
for games5 in game_publishers:
publisher = games5.find("span", class_ = "data")
if publisher:
publishers.append(publisher.text)
for games6 in game_ratings:
rating = games6.find("span", class_ = "data")
for games7 in game_genres:
genre = games7.find("span", class_ = "data")
if genre:
genres.append(genre.text)
for x in release_dates:
temp = str(x)
temp2 = temp.replace("Release Date:\n ", "")
release_datesNew.append(temp2)
for z in genres:
temp3 = str(z)
temp4 = temp3.strip()
temp5 = temp4.replace(" ", "")
genresNew.append(temp5)
df = pd.DataFrame({'Games:': names})
not sure how I would work that in to this code
From what I understand it's take all the data it can find but if there is a blank it doesn't know about it
can someone adivse the best solution for this situation
any help would be great
Thanks
Just add else's for the existing conditions...
if userscore:
userscores.append(userscore.text)
else:
userscores.append('na')

urllib error: Too many requests

The below python program asks the user for two reddit usernames and compares their score.
import json
from urllib import request
def obtainKarma(users_data):
users_info = []
for user_data in users_data:
data = json.load(user_data)
posts = data["data"]["children"]
num_posts = len(posts)
scores = []
comments = []
for post_id in range(num_posts):
score = posts[post_id]["data"]["score"]
comment = posts[post_id]["num_comments"]
scores.append(score)
comments.append(comment)
users_info.append((scores,comments))
user_id = 0
for user_info in users_info:
user_id+=1
print("User"+str(user_id))
for user_attr in user_info:
print(user_attr)
def getUserInfo():
count = 2
users_data = []
while count:
count = count + 1
username = input("Please enter username:\n")
url = "https://reddit.com/user/"+username+".json"
try:
user_data = request.urlopen(url)
except:
print("No such user.\nRetry Please.\n")
count = count + 1
raise
users_data.append(user_data)
obtainKarma(users_data)
if __name__ == '__main__':
getUserInfo()
However, when I run the program and enter a username, I get an error:
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 429: Too Many Requests
I tried looking for similar issues but none of them satisfied to solve this specific issue. Looking at the error, it would make sense to say that the URL includes an amount of data that exceeds a specific limit? But that still sounds absurd because it is not that much of a data.
Thanks.
The problem seems to be resolved when you supply a User-Agent with your request.
import json
from urllib import request
def obtainKarma(users_data):
users_info = []
for user_data in users_data:
data = json.loads(user_data) # I've changed 'json.load' to 'json.loads' because you want to parse a string, not a file
posts = data["data"]["children"]
num_posts = len(posts)
scores = []
comments = []
for post_id in range(num_posts):
score = posts[post_id]["data"]["score"]
comment = posts[post_id]["data"]["num_comments"] # I think you forgot '["data"]' here, so I added it
scores.append(score)
comments.append(comment)
users_info.append((scores,comments))
user_id = 0
for user_info in users_info:
user_id+=1
print("User"+str(user_id))
for user_attr in user_info:
print(user_attr)
def getUserInfo():
count = 2
users_data = []
while count:
count = count + 1
username = input("Please enter username:\n")
url = "https://reddit.com/user/"+username+".json"
user_data = None
try:
req = request.Request(url)
req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)')
resp = request.urlopen(req)
user_data = resp.read().decode("utf-8")
except Exception as e:
print(e)
print("No such user.\nRetry Please.\n")
count = count + 1
raise # why raise? --> Program will end if user is not found
if user_data:
print(user_data)
users_data.append(user_data)
obtainKarma(users_data)
if __name__ == '__main__':
getUserInfo()
There were still other issues with your code:
You should not write json.load(user_data), because you are parsing a string. So I changed it to use json.loads(user_data).
The Python documentation for json.loads states:
Deserialize s (a str instance containing a JSON document) to a Python object using this conversion table.
And in the code comment = posts[post_id]["num_comments"], I think you forgot to index on 'data', so I changed it to comment = posts[post_id]["data"]["num_comments"]
And why are you raising the exception in the except-block? This will end the program, however it seems that you expect it not to, from looking at the following code:
print("No such user.\nRetry Please.\n")

I'm stumped at looping through a returned list of URLs

My first python project, I'm trying to scrape restaurant inspection. One site has summaries that offer keys to the detailed reports that I want to scrape. I'm stumped at looping through the keyed list of urls to get the details.
import pandas as pd
import bs4
import datetime
import re
import lxml
from urllib.request import urlopen
from urllib.error import HTTPError
try:
insp = pd.read_csv("ftp://dbprftp.state.fl.us/pub/llweb/5fdinspi.csv",
usecols=[2,14,18,80,81])
except IOError:
print("The file is not accessible.")
insp.columns = ["CountyName", "InspectDate",
"NumHighVio", "LicenseID", "VisitID"]
# filter for alachua county restaurants
alachua = insp[insp.CountyName == 'Alachua']
# filter for restaurants that had at least one serious violation
alachua = alachua[alachua.NumHighVio > 0]
# change date string to date object
alachua['InspectDate'] = pd.to_datetime(alachua['InspectDate'])
# sort most recent
alachua = alachua.sort_values('InspectDate', ascending=False)
# prefer to have user set timedelta below:
today = pd.to_datetime('today')
startDay = datetime.date.today() - datetime.timedelta(days=30)
alachua = alachua[(alachua['InspectDate'] > startDay) &
(alachua['InspectDate'] < today)]
# takes LicenseID and VisitID, passes it into the urls for detailed reports
for index, rows in alachua.iterrows():
visitID = rows['VisitID']
licID = rows['LicenseID']
urls = "https://www.myfloridalicense.com/inspectionDetail.asp?InspVisitID=
%s &licid= %s" % (visitID, licID)
urls = urls.replace(' ', '')
print(urls)
## here's my problem:
for url in urls:
def get_inspect_detail():
html = urlopen(url)
soup = bs4.BeautifulSoup(html.read(), 'lxml')
details = soup.find_all('font', {'face':'verdana'})[10:]
for detail in details:
siteName = details[0].text
licNum = details[2].text
siteRank = details[4].text
expDate = details[6].text
primeStatus = details[8].text
secStatus = details[10].text
siteAddress = details[12].text
inspectResult = details[20].text
observed1 = details[34].get_text
observed2 = details[36].text
observed3 = details[38].text
observed4 = details[40].text
observed5 = details[42].text
observed6 = details[44].text
observed7 = details[46].text
observed8 = details[48].text
observed9 = details[50].text
observed10 = details[52].text
detailsLib = {
'Restaurant': siteName,
'License': licNum,
'Rank': siteRank,
'Expires': expDate,
'Primary': primeStatus,
'Secondary': secStatus,
'Address': siteAddress,
'Result': inspectResult,
'Observed1': observed1,
'Observed2': observed2,
'Observed3': observed3,
'Observed4': observed4,
'Observed5': observed5,
'Observed6': observed6,
'Observed7': observed7,
'Observed8': observed8,
'Observed9': observed9,
'Observed10': observed10
}
repr(get_inspect_detail())
Probably an obvious mistake or lack of knowledge, but I can get the unscrubbed data for one url, but not for all.
I dont see a reason to define your function inside the loop. You would end up with a lot of redundant definitions this way. Second, you could just define a result list and accumulate the detailsLib objects inside it.
def get_inspect_detail(url):
html = urlopen(url)
soup = bs4.BeautifulSoup(html.read(), 'lxml')
details = soup.find_all('font', {'face': 'verdana'})[10:]
result = []
for detail in details:
siteName = details[0].text
licNum = details[2].text
siteRank = details[4].text
expDate = details[6].text
primeStatus = details[8].text
secStatus = details[10].text
siteAddress = details[12].text
inspectResult = details[20].text
observed1 = details[34].get_text
observed2 = details[36].text
observed3 = details[38].text
observed4 = details[40].text
observed5 = details[42].text
observed6 = details[44].text
observed7 = details[46].text
observed8 = details[48].text
observed9 = details[50].text
observed10 = details[52].text
detailsLib = {
'Restaurant': siteName,
'License': licNum,
'Rank': siteRank,
'Expires': expDate,
'Primary': primeStatus,
'Secondary': secStatus,
'Address': siteAddress,
'Result': inspectResult,
'Observed1': observed1,
'Observed2': observed2,
'Observed3': observed3,
'Observed4': observed4,
'Observed5': observed5,
'Observed6': observed6,
'Observed7': observed7,
'Observed8': observed8,
'Observed9': observed9,
'Observed10': observed10
}
result.append(detailsLib)
return result
for url in urls:
repr(get_inspect_detail(url))

How to load from Json?

I'am trying to get the information from the web api with Python 3 but it gives me an error. That's my code:
import json, urllib.request, requests
def findLocation():
"""returns latlng location value in the form of a list."""
send_url = 'http://freegeoip.net/json'
r = requests.get(send_url)
j = json.loads(r.text)
lat = j['latitude']
lon = j['longitude']
return lat, lon
location = findLocation()
print(findLocation()[0])
print(findLocation()[1])
def readJsonUrl(url):
"""reads the Json returned by the google api and converts it into a format
that can be used in python."""
page = urllib.request.urlopen(url)
data_bytes = page.read()
data_str = data_bytes.decode("utf-8")
page.close()
return data_str
search =readJsonUrl("https://maps.googleapis.com/maps/api/place/textsearch/json?query=indian+restaurantsin+Coventry&location=52.4066,-1.5122&key=AIzaSyCI8n1sI4CDRnsYo3hB_oH1trfxbt2IEaw")
print(search['website'])
Error:
Traceback (most recent call last):
File "google api.py", line 28, in <module>
print(search['website'])
TypeError: string indices must be integers
Any help is appreciated.
The function you are using readJsonUrl() returns a string not JSON. Therefore, when you try search['website'] it fails because the indices on a string can only be integers.
Try parsing the string value to a JSON object. To do this you can try the accepted answer here Convert string to JSON using Python
data_str is string ( not dict format ) You should convert data_str to dict! just add this line to your code : convert_to_dict = json.loads(data_str). then return convert_to_dict ... and done.
Try this :
import json, urllib.request, requests
def findLocation():
send_url = 'http://freegeoip.net/json'
r = requests.get(send_url)
j = json.loads(r.text)
lat = j['latitude']
lon = j['longitude']
return lat, lon
location = findLocation()
print(findLocation()[0])
print(findLocation()[1])
def readJsonUrl(url):
page = urllib.request.urlopen(url)
data_bytes = page.read()
data_str = data_bytes.decode("utf-8")
page.close()
convert_to_dict = json.loads(data_str) # new line
return convert_to_dict # updated
search = readJsonUrl("https://maps.googleapis.com/maps/api/place/textsearch/json?query=indian+restaurantsin+Coventry&location=52.4066,-1.5122&key=AIzaSyCI8n1sI4CDRnsYo3hB_oH1trfxbt2IEaw")
print(search['your_key']) # now you can call your keys
The reason for for the TypeError: string indices must be integers is because your readJsonUrl function is returning str object instead of dict object. Using the json.loads function can help you transfer the str object to dict object.
You can try something like the following:
def readJsonUrl(url):
with (urllib.request.urlopen(url)) as page:
raw = page.read().decode("utf-8")
json_data = json.loads(raw)
return json_data
search =readJsonUrl("https://maps.googleapis.com/maps/api/place/textsearch/json?query=indian+restaurantsin+Coventry&location=52.4066,-1.5122&key=AIzaSyCI8n1sI4CDRnsYo3hB_oH1trfxbt2IEaw")
print(search['results'])
Hope it helps.

str(int(time.time())) gives error string indices must be integers in python 3

I am new to python i create one python pip package for my site API in this i need to pass the nonce. I write the code like below
payload = {}
headers = {}
if self.api_key is not None:
payload["api_key"] = self.api_key
if self.secret_key is not None:
payload["secret_key"] = self.secret_key
payload["request"] = method
payload["nonce"] = str(int(time.time()))
payload.update(kwargs)
headers["X-WCX-APIKEY"] = self.api_key
headers["X-WCX-PAYLOAD"] = base64.b64encode(json.dumps(payload).encode('utf-8'))
headers["X-WCX-SIGNATURE"] = 'SIGNATURE'
url = self.base_url.replace('API_CALL',method)
# update the parameters with the API key
session = requests.session()
response = session.post(url, data = payload, headers = headers)
when run this code i got this error "string indices must be integers"
How can i fix it please help anyone
Note i import the "time"

Resources