Incomplete downloading of instagram followers - instagram

I downloaded instaloader from here.
I found the codes here, and I slightly modified it like below:
import os.path
import instaloader
directory = 'c:\\Users\\_Instagram'
os.chdir(directory)
i = 0
filename = "username_%s.txt"
while os.path.exists(filename % i):
i += 1
file_path = os.path.join(directory, filename)
L = instaloader.Instaloader()
# Login or load session
username = "username"
password = "password"
L.login(username, password) # (login)
# Obtain profile metadata
profile = instaloader.Profile.from_username(L.context, username)
# Print list of followees
follow_list = []
count = 0
for followee in profile.get_followers():
follow_list.append(followee.username)
file = open(filename % i, "a+")
file.write(follow_list[count])
file.write("\n")
file.close()
count = count + 1
print(count)
It only writes & counts less than 300 followers when the actual followers count is 4000+.
Can someone help me figure out why? I'm using python 3.7.

Related

Scraping info out of pdf's using Python

I have pdf's distributed over several folders and sub folders.
I've been trying to write a short python script with the idea to search each pdf for any term i enter.
As not all pdf's are searchable, I also tried to implement a list of searchable, and non searchable pdf's with the idea to bring everything in line.
The program seems to work, up to a point. The longer it runs, the slower it goes.
At a certain moment, it just stops. I think it is a memory issue, but i can't seem to find a solution.
The script i have already:
import os
# extracting_text.py
from PyPDF2 import PdfFileReader
search_word = input("enter a word you want to search in file: ")
counter = 0
noTextCounter = 0
SolutionCounter = 0
with open("Solutions.txt", "w") as text_file:
text_file.writelines(f"List of files that contain: {search_word}")
#print(f"List of files that contain: {search_word}", file=text_file)
def text_extractor(path):
with open(path, 'rb') as f:
#variable to find pdf's that only have image. If activated countempty has to be included in the return.
countEmpty = 0
countSolution = 0
pdf = PdfFileReader(f)
# get the first page
page = pdf.getPage(0)
# print(page)
# print('Page type: {}'.format(str(type(page))))
text = page.extractText()
if text == '':
print('No text')
countEmpty = countEmpty + 1
else:
if search_word in text:
print("word found")
countSolution = countSolution + 1
else:
print("word not found")
# print(text)
#Selection of potential returns
#return countEmpty
return countSolution
root = os.getcwd()
try:
for subdir, dirs, files in os.walk(root):
for file in files:
# print os.path.join(subdir, file)
filepath = subdir + os.sep + file
if filepath.endswith(".pdf"):
print(filepath)
counter = counter + 1
print(counter)
if __name__ == '__main__':
path = filepath
indicator = text_extractor(path)
#noTextCounter = noTextCounter + indicator
SolutionCounter = SolutionCounter + indicator
print("indicator: " + str(indicator))
if indicator == 1:
with open("Solutions.txt", "a") as text_file:
text_file.writelines('\n' + path)
#below is option to give 2 lists containing all the pdf's which are images and a list of non images
# #with open("ListOfImagePdfs.txt", "a") as text_file:
# text_file.writelines('\n' + path)
#else:
#with open("ListOfDataPdfs.txt", "a") as text_file:
# text_file.writelines('\n' + path)
#print("amount of image pdf's: " + str(noTextCounter))
except:
pass
#trycatch to be added

while not looping in python

so I have a script with a loop that isn't working and i have no idea why.
i need the menu part to loop until i input 0 how could i do this with a while loop? still learning how to use while loops.
#!/usr/bin/env python3
from ftplib import FTP
host = "localhost"
user = "chris"
password = "qwerty"
ftp = FTP(host,user,password)
#
#current working directory of my ftp
#
ftp.cwd("/home/chris")
#
#list of files
#
files = ftp.nlst()
#
#list length to enter as key values
#
list_length = len(files)
#conversoin of list
def Convert(files):
it = iter(files)
res_dct = dict(zip(range(1,list_length), it))
return res_dct
dico_files = Convert(files)
#
#list of files loop
#
for key in dico_files:
file_list = print(str(key) + ": " + dico_files[key])
#
# menu
#
while selection != 0:
selection = str(input("what file to choose?"))
localfile = open(selection, 'wb')
ftp.retrbinary('RETR ' + selection, localfile.write, 1024)

looking for nodes based on user input in python3

Code works well when I hardcode the nodes (e.g. node1), but not when I use user input - it always returns 0 instead of counting the numbers which are "node3". Here is the page I am using http://py4e-data.dr-chuck.net/comments_678016.xml - node1 = comments, node2 = comment, node3= count. Any suggestions?
import urllib.request, urllib.parse, urllib.error
import xml.etree.ElementTree as ET
import ssl
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
url = input ("Which url?\n")
node1 = input ("Enter node1- ")
node2 = input ("Enter node2- ")
node3 = input ("Enter node3- ")
count = 0
try:
html = urllib.request.urlopen(url, context=ctx).read()
tree = ET.fromstring(html)
x = tree.findall(node1/node2)
for item in x:
c = int(item.find(node3).text)
count = count + c
print(count)
except:
print("Please only input complete urls")
Putting aside the user input angle, if you want "to sum up all numbers under "count"", change your xpath expression to
x = tree.findall('.//comment/count')
and then either do it the long way (which I personally prefer):
total = 0
for count in x:
total += int(count.text)
or use list comprehensions:
sum([int(count.text) for count in x])
In either case, the output is
2348
Found out what the mistake was - needed to concatenate strings:
x = tree.findall(node1 + "/" + node2)
import urllib.request, urllib.parse, urllib.error
import xml.etree.ElementTree as ET
import ssl
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
url = input("Which url?\n")
node1 = input("Enter node1- ")
node2 = input("Enter node2- ")
node3 = input("Enter node3- ")
count = 0
try:
html = urllib.request.urlopen(url, context=ctx).read()
tree = ET.fromstring(html)
x = tree.findall(node1 + "/" + node2)
for item in x:
count += int(item.find(node3).text)
print(count)
except:
print("Please only input complete urls")

Modifying python script to work with version 3.7.6

I currently host the English Amiga Board (EAB) FTP, a FTP server filled with the classic Amiga computer goodies. It is open to everyone but users can also register their own account. I got help in creating a python script that checks that the username being registered at the FTP is a valid account on the EAB forums and that it has at least 50 posts.
However, I've now updated the server (fedora server) and the Python version was updated to 3.7.6. The script have now stopped working and I'm unable to reach the original author.
# python ./eab_post_count.py -u Turran
Traceback (most recent call last):
File "./eab_post_count.py", line 3, in <module>
import sys, re, urllib2
ModuleNotFoundError: No module named 'urllib2'
This script should return "0" if the user exists and have 50 posts.
While I script in some languages, python is not one of them so I would be grateful for any help to modify it to not use urllib2, which I understand is no longer valid for Python 3.
The script:
#!/usr/bin/env python
import sys, re, urllib2
titlecmd = "eab_post_count.py"
version = "1.15"
ignorecase = 0
lastpost = 0
userfound = False
if len(sys.argv) == 1: sys.argv[1:] = ["-h"]
def parseurl(url):
if not url:
print("Empty URL!")
sys.exit(3)
headers = { 'User-Agent' : 'Mozilla/5.0' }
request = urllib2.Request(url, None, headers)
try:
response = urllib2.urlopen(request)
except Exception:
print('URL open failed, EAB down?')
sys.exit(2)
content = response.read()
return content
def pagesearch(content, trigger, start, end):
sane = 0
needlestack = []
while sane == 0:
curpos = content.find(trigger)
if curpos >= 0:
testlen = len(content)
content = content[curpos:testlen]
curpos = content.find('"')
testlen = len(content)
content = content[curpos+1:testlen]
curpos = content.find(end)
needle = content[0:curpos]
result = content[len(start):curpos]
if needle.startswith(start):
needlestack.append(result)
else:
sane = 1
return needlestack
def unescape(s):
s = s.replace("<", "<")
s = s.replace(">", ">")
# this has to be last:
s = s.replace("&", "&")
return s
for idx, arg in enumerate(sys.argv):
if arg == '-h':
print(titlecmd + ' v' + version +' by modrobert in 2017')
print('Function: Returns the number of posts for a given EAB forum user.')
print('Syntax : ' + titlecmd + ' -u <username> [-i] [-l YYYY-MM-DD]')
print('Options : -h this help text.')
print(' : -i ignore case sensivity in user name.')
print(' -l last post after YYYY-MM-DD required.')
print(' -u followed by user name.')
print('Result : 0 = user found, 1 = user not found, 2 = EAB down, 3 = other fail.')
sys.exit(3)
if arg == '-u':
try:
username = sys.argv[idx+1]
except IndexError:
print('Missing username.')
sys.exit(3)
usernameurl = re.sub('[ ]', '%20', username)
if arg == '-i':
ignorecase = 1
if arg == '-l':
lastpost = 1
try:
lpdate = sys.argv[idx+1]
except IndexError:
print('Missing date.')
sys.exit(3)
try:
username
except NameError:
print('Username -u option required.')
sys.exit(3)
if lastpost:
eaburl = "http://eab.abime.net/memberlist.php?do=getall&pp=100&lastpostafter=" + lpdate + "&ausername=" + usernameurl
else:
eaburl = "http://eab.abime.net/memberlist.php?do=getall&pp=100&ausername=" + usernameurl
eabcontent = parseurl(eaburl)
countlist = pagesearch(eabcontent, 'td class', 'alt2">', '</td>')
userlist = pagesearch(eabcontent, 'member.php?', '>', '</a>')
for idx, item in enumerate(userlist):
# lets strip those fancy moderators and admins
userstr = re.sub('<[^<]+?>', '', item)
if ignorecase:
if unescape(str.lower(userstr)) == str.lower(username):
userfound = True;
break
else:
if unescape(str(userstr)) == username:
userfound = True;
break
if userfound == False:
print("User not found: " + username)
sys.exit(1)
usercount = idx
for idx, item in enumerate(countlist):
# hairy stuff below ;)
if idx < (3 * usercount):
continue
stripitem = re.sub('[,]', '', item)
try:
print(int(stripitem))
sys.exit(0)
except Exception:
continue
Thanks in advance!

Python script to get iam user list who has not used Access keys more than 10 day

I have prepared the python script which will get the iam user list who has not used their access key for more than 10 days.
But I'm getting below error:
AttributeError: 'iam.User' object has no attribute 'accesskey_last_used'
import json
import boto3
import datetime
from dateutil.tz import tzutc
resource = boto3.resource('iam')
client = boto3.client('iam')
today = datetime.datetime.now()
final_report = ''
final_result = ''
number = int(1)
for user in resource.users.all():
if user.accesskey_last_used is not None:
delta = (today - user.access_key_last_used.replace(tzinfo=None)).days
if delta >= 10:
final_result= str(number) + " username: " + [user.user_name][0] + " - " + str(delta) + "days\n"
final_report = final_report + final_result
number = number + 1
print("final_report")
The error message is quite accurate. There is no accesskey_last_used attribute on an iam.User object.
Also, a user can have multiple Access Keys and each of these Access Keys can have an AccessKeyLastUsed property.
Therefore, you will need to use client calls instead of resource calls and loop through each Access Key for each User:
import boto3
import datetime
from dateutil.tz import tzutc
resource = boto3.resource('iam')
client = boto3.client('iam')
today = datetime.datetime.now()
final_report = ''
number = 1
# For every user
for user in resource.users.all():
# Get Access Keys for the User
keys_response = client.list_access_keys(UserName=user.user_name)
last_access = None
# For every Access Key associate with the user
for key in keys_response['AccessKeyMetadata']:
last_used_response = client.get_access_key_last_used(AccessKeyId=key['AccessKeyId'])
if 'LastUsedDate' in last_used_response['AccessKeyLastUsed']:
accesskey_last_used = last_used_response['AccessKeyLastUsed']['LastUsedDate']
if last_access is None or accesskey_last_used < last_access:
last_access = accesskey_last_used
# More than x days since last access?
if last_access is not None:
delta = (today - last_access.replace(tzinfo=None)).days
if delta >= 10:
final_report += str(number) + " username: " + [user.user_name][0] + " - " + str(delta) + " days\n"
number += 1
print(final_report)

Resources