urllib error: Too many requests - python-3.x

The below python program asks the user for two reddit usernames and compares their score.
import json
from urllib import request
def obtainKarma(users_data):
users_info = []
for user_data in users_data:
data = json.load(user_data)
posts = data["data"]["children"]
num_posts = len(posts)
scores = []
comments = []
for post_id in range(num_posts):
score = posts[post_id]["data"]["score"]
comment = posts[post_id]["num_comments"]
scores.append(score)
comments.append(comment)
users_info.append((scores,comments))
user_id = 0
for user_info in users_info:
user_id+=1
print("User"+str(user_id))
for user_attr in user_info:
print(user_attr)
def getUserInfo():
count = 2
users_data = []
while count:
count = count + 1
username = input("Please enter username:\n")
url = "https://reddit.com/user/"+username+".json"
try:
user_data = request.urlopen(url)
except:
print("No such user.\nRetry Please.\n")
count = count + 1
raise
users_data.append(user_data)
obtainKarma(users_data)
if __name__ == '__main__':
getUserInfo()
However, when I run the program and enter a username, I get an error:
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 429: Too Many Requests
I tried looking for similar issues but none of them satisfied to solve this specific issue. Looking at the error, it would make sense to say that the URL includes an amount of data that exceeds a specific limit? But that still sounds absurd because it is not that much of a data.
Thanks.

The problem seems to be resolved when you supply a User-Agent with your request.
import json
from urllib import request
def obtainKarma(users_data):
users_info = []
for user_data in users_data:
data = json.loads(user_data) # I've changed 'json.load' to 'json.loads' because you want to parse a string, not a file
posts = data["data"]["children"]
num_posts = len(posts)
scores = []
comments = []
for post_id in range(num_posts):
score = posts[post_id]["data"]["score"]
comment = posts[post_id]["data"]["num_comments"] # I think you forgot '["data"]' here, so I added it
scores.append(score)
comments.append(comment)
users_info.append((scores,comments))
user_id = 0
for user_info in users_info:
user_id+=1
print("User"+str(user_id))
for user_attr in user_info:
print(user_attr)
def getUserInfo():
count = 2
users_data = []
while count:
count = count + 1
username = input("Please enter username:\n")
url = "https://reddit.com/user/"+username+".json"
user_data = None
try:
req = request.Request(url)
req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)')
resp = request.urlopen(req)
user_data = resp.read().decode("utf-8")
except Exception as e:
print(e)
print("No such user.\nRetry Please.\n")
count = count + 1
raise # why raise? --> Program will end if user is not found
if user_data:
print(user_data)
users_data.append(user_data)
obtainKarma(users_data)
if __name__ == '__main__':
getUserInfo()
There were still other issues with your code:
You should not write json.load(user_data), because you are parsing a string. So I changed it to use json.loads(user_data).
The Python documentation for json.loads states:
Deserialize s (a str instance containing a JSON document) to a Python object using this conversion table.
And in the code comment = posts[post_id]["num_comments"], I think you forgot to index on 'data', so I changed it to comment = posts[post_id]["data"]["num_comments"]
And why are you raising the exception in the except-block? This will end the program, however it seems that you expect it not to, from looking at the following code:
print("No such user.\nRetry Please.\n")

Related

loop efficiency and performance impact calling api in python

Team:
My concern is on redundancy, efficient use of loops and best approach to get the desired result.
Usecase: get on call user name and create jira ticket with it.
below is my entire code and it runs fine for me. This is my very first OOP project.
Flow: I am calling two APIS (jira and pager api).
First calling pager api and getting who is oncall currently. Here am getting a list of nested dicts as response that am looping on.
Then calling jira api to create ticket with that above oncall user.
i want to learn to calculate Big0 and improve.
since this is my very first time can I get to see if there any problems or inefficiency or divergence from standard practices?
import requests
import json
import os
from jira import JIRA
from pdpyras import APISession
from collections import OrderedDict
JIRA_DICT_KEY = "JIRA"
JIRA_CONFIG = {'server': "https://jirasw.tom.com"}
JIRA_USER = os.environ['JIRA_USER']
JIRA_PW = os.environ['JIRA_PW']
PD_API_KEY = os.environ['PD_API_KEY']
USER_EMAIL = os.environ['USER_EMAIL']
class ZidFinder(object):
def __init__(self):
self.active_zid_errors = dict()
self.team_oncall_dict = dict()
self.onCall = self.duty_oncall()
self.jira = self.init_jira()
def init_jira(self):
jira = JIRA(options=JIRA_CONFIG, auth=(JIRA_USER, JIRA_PW))
return jira
def duty_oncall(self, *args):
session = APISession(PD_API_KEY, default_from=USER_EMAIL)
total = 1 #true or false
limit = 100 # this var is to pull limit records at a time.
teamnm = "Product SRE Team"
team_esp_name = "Product SRE Escalation Policy"
teamid = ""
teamesplcyid = ""
if args:
offset = args[0]
total_teams = args[1]
if offset <= total_teams:
print("\nfunc with args with new offset {} called\n".format(offset))
teams = session.get('/teams?limit={0}&total={1}&offset={2}'.format(limit,total,offset))
else:
print("Reached max teams, no more team records to pull")
return
else:
print("\nPull first set of {} teams as defined by limit var and loop more if team not found..\n".format(limit))
teams = session.get('/teams?limit={0}&total={1}'.format(limit,total))
if not teams.ok:
return
else:
tj = teams.json()
tjd = tj['teams']
print("\n")
for adict in tjd:
if not adict['name'] == teamnm:
continue
elif adict['name'] == teamnm:
teamid = adict['id']
print("Found team..\n",adict['name'], "id: {0}".format(teamid))
esclp = session.get('/escalation_policies?total={0}&team_ids%5B%5D={1}'.format(total,teamid))
if not esclp.ok:
print("Failed pulling Escalation polices for team '{}'".format(teamnm))
return
else:
ep = esclp.json()
epj = esclp.json()['escalation_policies']
if not epj:
print("Escalation polices for team '{}' not defined".format(teamnm))
return
else:
for adict in epj:
if not adict['summary'] == team_esp_name:
continue
else:
teamesplcyid = adict['id']
print("{} id: {}\n".format(team_esp_name, teamesplcyid))
oncalls = session.get('/oncalls?total={0}&escalation_policy_ids%5B%5D={1}'.format(total,teamesplcyid))
if not oncalls.ok:
print("Issue in getting oncalls")
return
else:
ocj = oncalls.json()['oncalls']
for adict in ocj:
if adict['escalation_level'] == 1 or adict['escalation_level'] == 2:
self.team_oncall_dict[adict['schedule']['summary']] = adict['user']['summary']
continue
if self.team_oncall_dict:
if len(self.team_oncall_dict) == 1:
print("\nOnly Primary onCall is defined")
print("\n",self.team_oncall_dict)
else:
print(" Primary and other calls defined")
print("\n",OrderedDict(self.team_oncall_dict),"\n")
return
else:
print("Calling with next offset as team was not found in the records pulled under limit..")
if tj['offset'] <= tj['total'] or tj['more'] == True:
setoffset = limit + tj['offset']
self.onCall(setoffset, tj['total'])
def create_jiras(self):
node = ["node1", "node2"]
zid_label = ["id90"]
labels = [node, zid_label]
print("Creating a ticket for node {} with description: {}".format(node, str(self.active_zid_errors[node])))
if self.msre_oncall_dict:
print("Current onCalls pulled from Duty, use them as assignee in creating jira tickets..")
new_issue = self.jira.create_issue(project='TEST', summary='ZID error on node {}'.format(node),
description=str(self.active_zid_errors[node]), issuetype={'name': 'Bug'}, assignee={'name': self.msre_oncall_dict['Product SRE Primary']},labels=labels)
print("Created a new ticket: ", new_issue.key, new_issue.fields.summary)
self.active_zid_errors[node][JIRA_DICT_KEY] = new_issue.key
else:
print("Current onCalls were not pulled from Duty, create jira with defautl assignee..")
new_issue = self.jira.create_issue(project='TEST', summary='ZID error on node {}'.format(node),
description=str(self.active_zid_errors[node]), issuetype={'name': 'Bug'},labels=labels)
print("Created a new ticket: ", new_issue.key, new_issue.fields.summary)
self.active_zid_errors[node][JIRA_DICT_KEY] = new_issue.key
if __name__== "__main__":
o = ZidFinder()

bad performance for loop with instance and bulk create

I need to use bulk_create to create a lot of "detalle"(details), the problem is i have to iterate trough a json to get the arguments, and i got 4 fk so django ask to me for the instance, not the id. but to have id i have to do a .get(), so i got a bad performance, because its 4 gets by each iteration.
its there a way to get all objects instances and put in a list or something to perform load then the instance without using get every time?
class DetalleFichaAllViewSet(viewsets.ModelViewSet):
serializer_class = DetalleFichaUpdateAllSerializer
def create(self, request, *args, **kwargs):
user = self.request.user
data = request.data
try:
ficha = Ficha.objects.get(autor=user.id)
DetalleFicha.objects.filter(ficha=ficha.id).delete()
except Http404:
pass
# Create Ficha
now = datetime.now()
date_time = now.strftime("%Y-%m-%d %H:%M")
print("AAAAAA DATA:", data)
Ficha.objects.filter(autor=user.id).update(fecha_creacion=date_time, autor=user,
nombre=data["nombreFicha"], descripcion=data["descripcionFicha"])
ficha = Ficha.objects.filter(autor=user.id).last()
recintos = Recinto.objects.all()
productos = Producto.objects.all()
estandar_productos = EstandarProducto.objects.all()
cotizaciones = Cotizacion.objects.all()
detalles_ficha = []
for detalle in data["detalles"]:
recinto = recintos.get(id=detalle[1])
producto = productos.get(id=detalle[10])
estandar_producto = estandar_productos.get(id=detalle[9])
try:
cotizacion = cotizaciones.get(id=detalle[4])
except ObjectDoesNotExist:
cotizacion = None
print("Fecha: ", detalle[8])
detalle = DetalleFicha(carreras=detalle[0],
recinto=recinto, nombre=detalle[2],
cantidad_a_comprar=detalle[3], cotizacion=cotizacion,
valor_unitario=detalle[5], valor_total=detalle[6],
documento=detalle[7], fecha_cotizacion=detalle[8],
estandar_producto=estandar_producto, producto=producto,
ficha=ficha)
detalles_ficha.append(detalle)
DetalleFicha.objects.bulk_create(detalles_ficha)
print("Array convertida", detalles_ficha)
print(detalles_ficha[0])
return Response(status=status.HTTP_200_OK)

tweepy It won't follow some of the tweets

seems like for some of the tweets with the keyword 'follow'
it will follow and for some of them it wont...
other than that it works fine(I didn't notice something else)
can someone pinpoint where is the problem?
class Listener():
def search(self, twts):
global numoftwts
for i in twts:
names = ['follow', 'following']
txt = i.text.lower()
if not any(k in txt for k in keywords) or any(k in txt for k in bannedwords):
continue
if not self.is_user_bot_hunter(str(i.author.screen_name)):
if not i.retweeted:
try:
print("Trying to retweet status_id:{}".format(i.id))
res = api.retweet(i.id)
if res.retweeted:
api.create_favorite(i.id)
print('retweeted', numoftwts, 'times', '-',
str(datetime.datetime.fromtimestamp(time.time()).strftime('%d-%m-%Y %H:%M:%S')))
print(i.text)
print('\n')
else:
print("retweet failed")
if any(c in txt for c in names):
# print("Trying to follow something")
# if hasattr(i, 'retweeted_status'):
# print("trying to fetch user_id")
user_id = i.retweeted_status.user.id_str
res = api.create_friendship(user_id)
res = api.get_user(user_id)
if res.following:
print("Successfully followed :{}".format(user_id))
print('\n')
except Exception as e:
print("Exception:".format(str(e)))
continue
sleep(600)
def run(self):
for eachkey in keywords:
tweets = api.search(q=eachkey, result_type='mixed', lang='en')
self.search(tweets)
if __name__ == '__main__':
while True:
r = Listener()
r.run()
where did I go wrong?
AttributeError: 'Status' object has no attribute 'retweeted_status'
> c:\users\x\desktop\twitterbot\twtbotcopy.py(64)search()
-> user_id = i.retweeted_status.user.id_str
(Pdb) n
> c:\users\x\desktop\twitterbot\twtbotcopy.py(70)search()
-> except Exception as e:
(Pdb) n
If your getting any error where you are unable to get tweets from a particular user then use:
try:
specific_tweets = tweepy.Cursor(api.search, tweet_mode='extended', q= <some query>, lang='en').items(500)
except tweepy.error.TweepError:
pass
And if you want to access the retweeted attribute of a tweet then do this:
if hasattr(tweet, 'retweeted_status'):
extracted_author = tweet.retweeted_status.user.screen_name
else: extracted_author = tweet.user.screen_name
basically check whether hasattr(tweet, 'retweeted_status') of a tweet is true or not. It checks whether the tweet has the attribute named "retweeted_status"
AttributeError: 'Status' object has no attribute 'retweeted_status'
-> user_id = i.retweeted_status.user.id_str
It means that you want to get the user ID of a retweet, for a tweet that is not a retweet.
I you want to know if a tweet is a RT, the test is :
if hasattr(tweet, 'retweeted_status'):
# this tweet is a RT

str(int(time.time())) gives error string indices must be integers in python 3

I am new to python i create one python pip package for my site API in this i need to pass the nonce. I write the code like below
payload = {}
headers = {}
if self.api_key is not None:
payload["api_key"] = self.api_key
if self.secret_key is not None:
payload["secret_key"] = self.secret_key
payload["request"] = method
payload["nonce"] = str(int(time.time()))
payload.update(kwargs)
headers["X-WCX-APIKEY"] = self.api_key
headers["X-WCX-PAYLOAD"] = base64.b64encode(json.dumps(payload).encode('utf-8'))
headers["X-WCX-SIGNATURE"] = 'SIGNATURE'
url = self.base_url.replace('API_CALL',method)
# update the parameters with the API key
session = requests.session()
response = session.post(url, data = payload, headers = headers)
when run this code i got this error "string indices must be integers"
How can i fix it please help anyone
Note i import the "time"

Python 3.4 : continue function after try and except

Question
I am currently pulling data from yahoo finance. When using try and except, the function stops after the error has been reached. How can I continue the function after the except statement to pull the remaining data for stocks in the index?
index = sp500
def yhooKeyStats():
try:
for eachStock in index:
isUrl = 'http://finance.yahoo.com/q/is?s='+eachStock+'+Income+Statement&annual'
bsUrl = 'http://finance.yahoo.com/q/bs?s='+eachStock+'+Balance+Sheet&annual'
cfUrl = 'http://finance.yahoo.com/q/cf?s='+eachStock+'+Cash+Flow&annual'
def bsYhooStats(url):
req = urllib.request.Request(url)
resp = urllib.request.urlopen(req)
respData = resp.read()
dRespData = respData.decode('utf-8')
gw = dRespData.split('Goodwill</td><td align="right">')[1].split(' &nbsp')[0]
if len(gw) < 14:
gw = gw
else:
gw = '-'
return gw
print(eachStock, bsYhooStats(bsUrl))
except IndexError:
pass
yhooKeyStats()
Output
MMM 7,050,000
ABT 10,067,000
ABBV 5,862,000
ACN 2,395,894
ACE -
ACT 24,521,500
ADT 3,738,000
AES 1,458,000
AET 10,613,200
AFL -
Just put the try/except inside the loop. One of several possibilities:
for eachStock in index:
...
try:
def bsYhooStats(url):
...
return gw if len(gw) < 14 else '-'
print(eachStock, bsYhooStats(bsUrl))
except IndexError:
pass

Resources