Failed to Retrieve HTTP Error 401: Unauthorized - python-3.x

I'm running a python script to connect to the twitter api and count my friends. When I run it I get the 401 error in the title. Any help would be appreciated.
`
import urllib.request, urllib.parse, urllib.error
import twurl
import json
import sqlite3
import ssl
TWITTER_URL = 'https://api.twitter.com/1.1/friends/list.json'
conn = sqlite3.connect('friends.sqlite')
cur = conn.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS People
(id INTEGER PRIMARY KEY, name TEXT UNIQUE, retrieved INTEGER)''')
cur.execute('''CREATE TABLE IF NOT EXISTS Follows
(from_id INTEGER, to_id INTEGER, UNIQUE(from_id, to_id))''')
Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
while True:
acct = input('Enter a Twitter account, or quit: ')
if (acct == 'quit'): break
if (len(acct) < 1):
cur.execute('SELECT id, name FROM People WHERE retrieved=0 LIMIT 1')
try:
(id, acct) = cur.fetchone()
except:
print('No unretrieved Twitter accounts found')
continue
else:
cur.execute('SELECT id FROM People WHERE name = ? LIMIT 1',
(acct, ))
try:
id = cur.fetchone()[0]
except:
cur.execute('''INSERT OR IGNORE INTO People
(name, retrieved) VALUES (?, 0)''', (acct, ))
conn.commit()
if cur.rowcount != 1:
print('Error inserting account:', acct)
continue
id = cur.lastrowid
url = twurl.augment(TWITTER_URL, {'screen_name': acct, 'count': '100'})
print('Retrieving account', acct)
try:
connection = urllib.request.urlopen(url, context=ctx)
except Exception as err:
print('Failed to Retrieve', err)
break
data = connection.read().decode()
headers = dict(connection.getheaders())
print('Remaining', headers['x-rate-limit-remaining'])
try:
js = json.loads(data)
except:
print('Unable to parse json')
print(data)
break
Debugging
print(json.dumps(js, indent=4))
if 'users' not in js:
print('Incorrect JSON received')
print(json.dumps(js, indent=4))
continue
cur.execute('UPDATE People SET retrieved=1 WHERE name = ?', (acct, ))
countnew = 0
countold = 0
for u in js['users']:
friend = u['screen_name']
print(friend)
cur.execute('SELECT id FROM People WHERE name = ? LIMIT 1',
(friend, ))
try:
friend_id = cur.fetchone()[0]
countold = countold + 1
except:
cur.execute('''INSERT OR IGNORE INTO People (name, retrieved)
VALUES (?, 0)''', (friend, ))
conn.commit()
if cur.rowcount != 1:
print('Error inserting account:', friend)
continue
friend_id = cur.lastrowid
countnew = countnew + 1
cur.execute('''INSERT OR IGNORE INTO Follows (from_id, to_id)
VALUES (?, ?)''', (id, friend_id))
print('New accounts=', countnew, ' revisited=', countold)
print('Remaining', headers['x-rate-limit-remaining'])
conn.commit()
cur.close()`
Also, I am running this in Visual Studio Code in case that matters.
I tried to cd into the proper folder as at first Visual Studio Code was running the code from a different folder and I thought it didn't have the files it needed to import, etc. I was expecting the file to be found so I could run the program correctly, I assume right now I am having troubles with the OAuth.

Related

Any idea how the id is generated in the database?

I can't understand how the id is automatically generated into the database, I thought it is supposed to have "autoincrement" when add the id column in the "Pages" table
Also can anyone explain this code block, I could not understand the part "if not found : continue" :
for web in webs:
if ( href.startswith(web) ) :
found = True
break
if not found : continue
import sqlite3
import urllib.error
import ssl
from urllib.parse import urljoin
from urllib.parse import urlparse
from urllib.request import urlopen
from bs4 import BeautifulSoup
# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
conn = sqlite3.connect('spider.sqlite')
cur = conn.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS Pages
(id INTEGER PRIMARY KEY, url TEXT UNIQUE, html TEXT,
error INTEGER, old_rank REAL, new_rank REAL)''')
cur.execute('''CREATE TABLE IF NOT EXISTS Links
(from_id INTEGER, to_id INTEGER, UNIQUE(from_id, to_id))''')
cur.execute('''CREATE TABLE IF NOT EXISTS Webs (url TEXT UNIQUE)''')
# Check to see if we are already in progress...
cur.execute('SELECT id,url FROM Pages WHERE html is NULL and error is NULL ORDER BY RANDOM() LIMIT 1')
row = cur.fetchone()
if row is not None:
print("Restarting existing crawl. Remove spider.sqlite to start a fresh crawl.")
else :
starturl = input('Enter web url or enter: ')
if ( len(starturl) < 1 ) : starturl = 'http://www.dr-chuck.com/'
if ( starturl.endswith('/') ) : starturl = starturl[:-1]
web = starturl
if ( starturl.endswith('.htm') or starturl.endswith('.html') ) :
pos = starturl.rfind('/')
web = starturl[:pos]
if ( len(web) > 1 ) :
cur.execute('INSERT OR IGNORE INTO Webs (url) VALUES ( ? )', ( web, ) )
cur.execute('INSERT OR IGNORE INTO Pages (url, html, new_rank) VALUES ( ?, NULL, 1.0 )', ( starturl, ) )
conn.commit()
# Get the current webs
cur.execute('''SELECT url FROM Webs''')
webs = list()
for row in cur:
webs.append(str(row[0]))
print(webs)
many = 0
while True:
if ( many < 1 ) :
sval = input('How many pages:')
if ( len(sval) < 1 ) : break
many = int(sval)
many = many - 1
cur.execute('SELECT id,url FROM Pages WHERE html is NULL and error is NULL ORDER BY RANDOM() LIMIT 1')
try:
row = cur.fetchone()
# print row
fromid = row[0]
url = row[1]
except:
print('No unretrieved HTML pages found')
many = 0
break
print(fromid, url, end=' ')
# If we are retrieving this page, there should be no links from it
cur.execute('DELETE from Links WHERE from_id=?', (fromid, ) )
try:
document = urlopen(url, context=ctx)
html = document.read()
if document.getcode() != 200 :
print("Error on page: ",document.getcode())
cur.execute('UPDATE Pages SET error=? WHERE url=?', (document.getcode(), url) )
if 'text/html' != document.info().get_content_type() :
print("Ignore non text/html page")
cur.execute('DELETE FROM Pages WHERE url=?', ( url, ) )
conn.commit()
continue
print('('+str(len(html))+')', end=' ')
soup = BeautifulSoup(html, "html.parser")
except KeyboardInterrupt:
print('')
print('Program interrupted by user...')
break
except:
print("Unable to retrieve or parse page")
cur.execute('UPDATE Pages SET error=-1 WHERE url=?', (url, ) )
conn.commit()
continue
cur.execute('INSERT OR IGNORE INTO Pages (url, html, new_rank) VALUES ( ?, NULL, 1.0 )', ( url, ) )
cur.execute('UPDATE Pages SET html=? WHERE url=?', (memoryview(html), url ) )
conn.commit()
# Retrieve all of the anchor tags
tags = soup('a')
count = 0
for tag in tags:
href = tag.get('href', None)
if ( href is None ) : continue
# Resolve relative references like href="/contact"
up = urlparse(href)
if ( len(up.scheme) < 1 ) :
href = urljoin(url, href)
ipos = href.find('#')
if ( ipos > 1 ) : href = href[:ipos]
if ( href.endswith('.png') or href.endswith('.jpg') or href.endswith('.gif') ) : continue
if ( href.endswith('/') ) : href = href[:-1]
# print href
if ( len(href) < 1 ) : continue
# Check if the URL is in any of the webs
found = False
for web in webs:
if ( href.startswith(web) ) :
found = True
break
if not found : continue
cur.execute('INSERT OR IGNORE INTO Pages (url, html, new_rank) VALUES ( ?, NULL, 1.0 )', ( href, ) )
count = count + 1
conn.commit()
cur.execute('SELECT id FROM Pages WHERE url=? LIMIT 1', ( href, ))
try:
row = cur.fetchone()
toid = row[0]
except:
print('Could not retrieve id')
continue
# print fromid, toid
cur.execute('INSERT OR IGNORE INTO Links (from_id, to_id) VALUES ( ?, ? )', ( fromid, toid ) )
print(count)
cur.close()

loop efficiency and performance impact calling api in python

Team:
My concern is on redundancy, efficient use of loops and best approach to get the desired result.
Usecase: get on call user name and create jira ticket with it.
below is my entire code and it runs fine for me. This is my very first OOP project.
Flow: I am calling two APIS (jira and pager api).
First calling pager api and getting who is oncall currently. Here am getting a list of nested dicts as response that am looping on.
Then calling jira api to create ticket with that above oncall user.
i want to learn to calculate Big0 and improve.
since this is my very first time can I get to see if there any problems or inefficiency or divergence from standard practices?
import requests
import json
import os
from jira import JIRA
from pdpyras import APISession
from collections import OrderedDict
JIRA_DICT_KEY = "JIRA"
JIRA_CONFIG = {'server': "https://jirasw.tom.com"}
JIRA_USER = os.environ['JIRA_USER']
JIRA_PW = os.environ['JIRA_PW']
PD_API_KEY = os.environ['PD_API_KEY']
USER_EMAIL = os.environ['USER_EMAIL']
class ZidFinder(object):
def __init__(self):
self.active_zid_errors = dict()
self.team_oncall_dict = dict()
self.onCall = self.duty_oncall()
self.jira = self.init_jira()
def init_jira(self):
jira = JIRA(options=JIRA_CONFIG, auth=(JIRA_USER, JIRA_PW))
return jira
def duty_oncall(self, *args):
session = APISession(PD_API_KEY, default_from=USER_EMAIL)
total = 1 #true or false
limit = 100 # this var is to pull limit records at a time.
teamnm = "Product SRE Team"
team_esp_name = "Product SRE Escalation Policy"
teamid = ""
teamesplcyid = ""
if args:
offset = args[0]
total_teams = args[1]
if offset <= total_teams:
print("\nfunc with args with new offset {} called\n".format(offset))
teams = session.get('/teams?limit={0}&total={1}&offset={2}'.format(limit,total,offset))
else:
print("Reached max teams, no more team records to pull")
return
else:
print("\nPull first set of {} teams as defined by limit var and loop more if team not found..\n".format(limit))
teams = session.get('/teams?limit={0}&total={1}'.format(limit,total))
if not teams.ok:
return
else:
tj = teams.json()
tjd = tj['teams']
print("\n")
for adict in tjd:
if not adict['name'] == teamnm:
continue
elif adict['name'] == teamnm:
teamid = adict['id']
print("Found team..\n",adict['name'], "id: {0}".format(teamid))
esclp = session.get('/escalation_policies?total={0}&team_ids%5B%5D={1}'.format(total,teamid))
if not esclp.ok:
print("Failed pulling Escalation polices for team '{}'".format(teamnm))
return
else:
ep = esclp.json()
epj = esclp.json()['escalation_policies']
if not epj:
print("Escalation polices for team '{}' not defined".format(teamnm))
return
else:
for adict in epj:
if not adict['summary'] == team_esp_name:
continue
else:
teamesplcyid = adict['id']
print("{} id: {}\n".format(team_esp_name, teamesplcyid))
oncalls = session.get('/oncalls?total={0}&escalation_policy_ids%5B%5D={1}'.format(total,teamesplcyid))
if not oncalls.ok:
print("Issue in getting oncalls")
return
else:
ocj = oncalls.json()['oncalls']
for adict in ocj:
if adict['escalation_level'] == 1 or adict['escalation_level'] == 2:
self.team_oncall_dict[adict['schedule']['summary']] = adict['user']['summary']
continue
if self.team_oncall_dict:
if len(self.team_oncall_dict) == 1:
print("\nOnly Primary onCall is defined")
print("\n",self.team_oncall_dict)
else:
print(" Primary and other calls defined")
print("\n",OrderedDict(self.team_oncall_dict),"\n")
return
else:
print("Calling with next offset as team was not found in the records pulled under limit..")
if tj['offset'] <= tj['total'] or tj['more'] == True:
setoffset = limit + tj['offset']
self.onCall(setoffset, tj['total'])
def create_jiras(self):
node = ["node1", "node2"]
zid_label = ["id90"]
labels = [node, zid_label]
print("Creating a ticket for node {} with description: {}".format(node, str(self.active_zid_errors[node])))
if self.msre_oncall_dict:
print("Current onCalls pulled from Duty, use them as assignee in creating jira tickets..")
new_issue = self.jira.create_issue(project='TEST', summary='ZID error on node {}'.format(node),
description=str(self.active_zid_errors[node]), issuetype={'name': 'Bug'}, assignee={'name': self.msre_oncall_dict['Product SRE Primary']},labels=labels)
print("Created a new ticket: ", new_issue.key, new_issue.fields.summary)
self.active_zid_errors[node][JIRA_DICT_KEY] = new_issue.key
else:
print("Current onCalls were not pulled from Duty, create jira with defautl assignee..")
new_issue = self.jira.create_issue(project='TEST', summary='ZID error on node {}'.format(node),
description=str(self.active_zid_errors[node]), issuetype={'name': 'Bug'},labels=labels)
print("Created a new ticket: ", new_issue.key, new_issue.fields.summary)
self.active_zid_errors[node][JIRA_DICT_KEY] = new_issue.key
if __name__== "__main__":
o = ZidFinder()

My python gmail api automation isn't retrieving all emails that it found

I'm trying to automate a task to read my emails, based on criteria, download the attachments found and after upload to a google drive folder.
This is a part of my code:
def search_email(service, query_string, label_ids=[]):
try:
message_list_response = (
service.users()
.messages()
.list(userId="me", labelIds=label_ids, q=query_string)
.execute()
)
message_items = message_list_response.get("messages")
nextPageToken = message_list_response.get("nextPageToken")
while nextPageToken:
message_list_response = (
service.users()
.messages()
.list(
userId="me",
labelIDs=label_ids,
q=query_string,
pageToken=nextPageToken,
)
.execute()
)
message_items.extend(message_list_response.get("messages"))
nextPageToken = message_list_response.get("nextPageToken")
return message_items
except Exception as e:
return None
def get_message_detail(service, message_id, format="metadata", metadata_headers=[]):
try:
message_detail = (
service.users()
.messages()
.get(
userId="me",
id=message_id,
format=format,
metadataHeaders=metadata_headers,
)
.execute()
)
return message_detail
except Exception as e:
print(e)
return None
def create_folder_in_drive(service, folder_name, parent_folder=[]):
file_metadata = {
"name": folder_name,
"parents": parent_folder,
"mimeType": "application/vnd.google-apps.folder",
}
file = service.files().create(body=file_metadata, fields="id").execute()
return file
"""
Step 1: Create Google Service instances
"""
gmail_service = construct_service("gmail")
time.sleep(2)
drive_service = construct_service("drive")
"""
Step 2: Search emails (with attachments)
"""
query_string = "has:attachment subject:Rodobens"
email_messages = search_email(gmail_service, query_string, ["INBOX"])
"""
Step 3: Download Emails and Create Drive Folder_name
"""
for email_message in email_messages:
print("email_message:", email_message)
messageId = email_message["threadId"]
messageDetail = get_message_detail(
gmail_service, email_message["id"], format="full", metadata_headers=["parts"]
)
messageDetailPayload = messageDetail.get("payload")
for item in messageDetailPayload["headers"]:
if item["name"] == "Subject":
if item["value"]:
messageSubject = "{0} ({1})".format(item["value"], messageId)
else:
messageSubject = "(No Subject) ({0})".format(messageId)
When I run the entire code, it creates a folder in google drive, but just returns only one email, in this case, the oldest one.
Debugging the code, I reach 3 emails.
You can see below that there are the 3 emails that I want to grab, and the messageSubject returns just the oldest email.
email_message: {'id': '17c80cb8116afd06', 'threadId': '17c80cb31d258cfb'}
email_message: {'id': '17c7a84897a9742b', 'threadId': '17c7a83c87fd98bd'}
email_message: {'id': '17c6bc47dff5a1f4', 'threadId': '17c6bc4111f4f8bc'}
messageSubject: Relatório - Vendas Consórcio Rodobens – Franq (17c6bc4111f4f8bc)
So it recognizes 3 emails, but returns only one.
Any idea what maybe is wrong here?

tweepy It won't follow some of the tweets

seems like for some of the tweets with the keyword 'follow'
it will follow and for some of them it wont...
other than that it works fine(I didn't notice something else)
can someone pinpoint where is the problem?
class Listener():
def search(self, twts):
global numoftwts
for i in twts:
names = ['follow', 'following']
txt = i.text.lower()
if not any(k in txt for k in keywords) or any(k in txt for k in bannedwords):
continue
if not self.is_user_bot_hunter(str(i.author.screen_name)):
if not i.retweeted:
try:
print("Trying to retweet status_id:{}".format(i.id))
res = api.retweet(i.id)
if res.retweeted:
api.create_favorite(i.id)
print('retweeted', numoftwts, 'times', '-',
str(datetime.datetime.fromtimestamp(time.time()).strftime('%d-%m-%Y %H:%M:%S')))
print(i.text)
print('\n')
else:
print("retweet failed")
if any(c in txt for c in names):
# print("Trying to follow something")
# if hasattr(i, 'retweeted_status'):
# print("trying to fetch user_id")
user_id = i.retweeted_status.user.id_str
res = api.create_friendship(user_id)
res = api.get_user(user_id)
if res.following:
print("Successfully followed :{}".format(user_id))
print('\n')
except Exception as e:
print("Exception:".format(str(e)))
continue
sleep(600)
def run(self):
for eachkey in keywords:
tweets = api.search(q=eachkey, result_type='mixed', lang='en')
self.search(tweets)
if __name__ == '__main__':
while True:
r = Listener()
r.run()
where did I go wrong?
AttributeError: 'Status' object has no attribute 'retweeted_status'
> c:\users\x\desktop\twitterbot\twtbotcopy.py(64)search()
-> user_id = i.retweeted_status.user.id_str
(Pdb) n
> c:\users\x\desktop\twitterbot\twtbotcopy.py(70)search()
-> except Exception as e:
(Pdb) n
If your getting any error where you are unable to get tweets from a particular user then use:
try:
specific_tweets = tweepy.Cursor(api.search, tweet_mode='extended', q= <some query>, lang='en').items(500)
except tweepy.error.TweepError:
pass
And if you want to access the retweeted attribute of a tweet then do this:
if hasattr(tweet, 'retweeted_status'):
extracted_author = tweet.retweeted_status.user.screen_name
else: extracted_author = tweet.user.screen_name
basically check whether hasattr(tweet, 'retweeted_status') of a tweet is true or not. It checks whether the tweet has the attribute named "retweeted_status"
AttributeError: 'Status' object has no attribute 'retweeted_status'
-> user_id = i.retweeted_status.user.id_str
It means that you want to get the user ID of a retweet, for a tweet that is not a retweet.
I you want to know if a tweet is a RT, the test is :
if hasattr(tweet, 'retweeted_status'):
# this tweet is a RT

urllib error: Too many requests

The below python program asks the user for two reddit usernames and compares their score.
import json
from urllib import request
def obtainKarma(users_data):
users_info = []
for user_data in users_data:
data = json.load(user_data)
posts = data["data"]["children"]
num_posts = len(posts)
scores = []
comments = []
for post_id in range(num_posts):
score = posts[post_id]["data"]["score"]
comment = posts[post_id]["num_comments"]
scores.append(score)
comments.append(comment)
users_info.append((scores,comments))
user_id = 0
for user_info in users_info:
user_id+=1
print("User"+str(user_id))
for user_attr in user_info:
print(user_attr)
def getUserInfo():
count = 2
users_data = []
while count:
count = count + 1
username = input("Please enter username:\n")
url = "https://reddit.com/user/"+username+".json"
try:
user_data = request.urlopen(url)
except:
print("No such user.\nRetry Please.\n")
count = count + 1
raise
users_data.append(user_data)
obtainKarma(users_data)
if __name__ == '__main__':
getUserInfo()
However, when I run the program and enter a username, I get an error:
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 429: Too Many Requests
I tried looking for similar issues but none of them satisfied to solve this specific issue. Looking at the error, it would make sense to say that the URL includes an amount of data that exceeds a specific limit? But that still sounds absurd because it is not that much of a data.
Thanks.
The problem seems to be resolved when you supply a User-Agent with your request.
import json
from urllib import request
def obtainKarma(users_data):
users_info = []
for user_data in users_data:
data = json.loads(user_data) # I've changed 'json.load' to 'json.loads' because you want to parse a string, not a file
posts = data["data"]["children"]
num_posts = len(posts)
scores = []
comments = []
for post_id in range(num_posts):
score = posts[post_id]["data"]["score"]
comment = posts[post_id]["data"]["num_comments"] # I think you forgot '["data"]' here, so I added it
scores.append(score)
comments.append(comment)
users_info.append((scores,comments))
user_id = 0
for user_info in users_info:
user_id+=1
print("User"+str(user_id))
for user_attr in user_info:
print(user_attr)
def getUserInfo():
count = 2
users_data = []
while count:
count = count + 1
username = input("Please enter username:\n")
url = "https://reddit.com/user/"+username+".json"
user_data = None
try:
req = request.Request(url)
req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)')
resp = request.urlopen(req)
user_data = resp.read().decode("utf-8")
except Exception as e:
print(e)
print("No such user.\nRetry Please.\n")
count = count + 1
raise # why raise? --> Program will end if user is not found
if user_data:
print(user_data)
users_data.append(user_data)
obtainKarma(users_data)
if __name__ == '__main__':
getUserInfo()
There were still other issues with your code:
You should not write json.load(user_data), because you are parsing a string. So I changed it to use json.loads(user_data).
The Python documentation for json.loads states:
Deserialize s (a str instance containing a JSON document) to a Python object using this conversion table.
And in the code comment = posts[post_id]["num_comments"], I think you forgot to index on 'data', so I changed it to comment = posts[post_id]["data"]["num_comments"]
And why are you raising the exception in the except-block? This will end the program, however it seems that you expect it not to, from looking at the following code:
print("No such user.\nRetry Please.\n")

Resources