I'm writing a program to grab all of the md5sums of a certain file but it stops around 40,000. Any help would be great, here is my program.
NOTE: 40,000 has no meaning other than it just breaks there.
def grabmd5():
for x in range(0, numLinesofFile):
f = open(finfile1, 'r')
for line in f:
line = line.strip('\n')
os.system("md5sum " + line + " 2>/dev/null" + " >> file2.txt")
if x == numLinesofFile:
print("It worked")
Related
#This program takes in a text file and whatever the ser types in ; it searches for the specific word or phrase and then print out in which line this word or phrase is located .
If i feed it a text file with 20 lines , it produces normal results
As soon as i give it a 3000 worded document it produces error
Can anyone explain this
while True:
search = str(input("==>"))
line_number = 1
fslope = open("searching_in_a_textfile")
for line in fslope:
if search.lower() in line:
print("tHE LINE NUMBER IS ", line_number)
print("THE LINE SAYS : " + line)
line_number = line_number + 1
continue
if search.upper() in line:
print("tHE LINE NUMBER IS ", line_number)
print("THE LINE SAYS : " + line)
line_number = line_number + 1
continue
if search.title() in line:
print("tHE LINE NUMBER IS ", line_number)
print("THE LINE SAYS : " + line)
line_number = line_number + 1
continue
else:
line_number = line_number + 1
continue
print("END OF PRIOCESS")
First lets make it simple: (this code is almost the same as yours)
lines = []
with open('searching_in_a_textfile') as f:
lines = f.readlines()
while True:
search = input('==>')
if not search:
break
for line_number, line in enumerate(lines, 1):
if search.lower() in line.lower():
print('tHE LINE NUMBER IS ', line_number, '\nTHE LINE SAYS :', line)
print("END OF PRIOCESS")
Now when the input is '' (empty string / no input) the process will stop.
if you can add your error it could be very helpful.
I have created a lambda that will scan an uploaded file and search for specific phrases which have been listed in another s3 bucket. If a phrase is matched in the original uploaded file, it will print the line of the transcript as well as the response.
This lambda works if we upload each transcript individually, however if we upload more than 1, it stores the original output and adds it to the beginning.
I feel that this issue may be caused by the /tmp/ file not being cleared when the lambda function ends.
Is there a way to clear the /tmp/ file each time a job is done?
The output looks as follows:
ch_0 : Okay. And then, um, how do you guys typically allocate funding for a project like this?
-------------------------------------------------------------
ch_1 : Yeah, we do have capital projects and we've allocated money 3 place, which is and stuff, Um, every year.
ch_0 : Okay. And then, um, how do you guys typically allocate funding for a project like this?
-------------------------------------------------------------
ch_1 : Yeah, we do have capital projects and we've allocated money 3 place, which is and stuff, Um, every year.
ch_0 : Okay. And then, um, how do you guys typically allocate funding for a project like this?
-------------------------------------------------------------
ch_1 : Yeah, we do have capital projects and we've allocated money 3 place, which is and stuff, Um, every year.
However, it should look like this:
ch_0 : Okay. And then, um, how do you guys typically allocate funding for a project like this?
-------------------------------------------------------------
ch_1 : Yeah, we do have capital projects and we've allocated money 3 place, which is and stuff, Um, every year.
My lambda code is as follows:
import boto3
def lambda_handler(event, context):
s3 = boto3.client("s3")
if event:
file_obj = event["Records"][0]
bucketname = str(file_obj['s3']['bucket']['name'])
filename = str(file_obj['s3']['object']['key'])
job_name = filename
print("Filename: ", filename)
fileObj = s3.get_object(Bucket=bucketname, Key=filename)
file_content = fileObj["Body"].read().decode('utf-8')
budget_file = s3.get_object(Bucket= "bantp-phrases", Key="B.txt")
budget_content = budget_file["Body"].read().decode('utf-8')
authority_file = s3.get_object(Bucket= "bantp-phrases", Key="A.txt")
authority_content = authority_file["Body"].read().decode('utf-8')
need_file = s3.get_object(Bucket= "bantp-phrases", Key="N.txt")
need_content = need_file["Body"].read().decode('utf-8')
timeline_file = s3.get_object(Bucket= "bantp-phrases", Key="T.txt")
timeline_content = timeline_file["Body"].read().decode('utf-8')
partner_file = s3.get_object(Bucket= "bantp-phrases", Key="P.txt")
partner_content = partner_file["Body"].read().decode('utf-8')
# Converts all to a list
budgets = budget_content.split("\n")
authorities = authority_content.split("\n")
needs = need_content.split("\n")
timelines = timeline_content.split("\n")
partners = partner_content.split("\n")
lines = file_content.split("\n")
directory_name = filename
mylist = lines
#Budget Phrase Analysis
for b in budgets:
with open("/tmp/budget.txt", "a") as x:
try:
output = None
for index, line in enumerate(lines):
if b.strip() in line:
output = index
break
if output:
x.write("\n" + lines[output] + "\n")
x.write("-------------------------------------------------------------")
x.write("\n" + lines[output +1] + "\n")
print ("It worked!")
break
except (ValueError):
x.write("Nothing found")
print ("It didn't work :(")
break
s3.upload_file(Filename = "/tmp/budget.txt" , Bucket="bantp-analysis", Key = ((directory_name)+'/'+"Budget_" + (filename)))
#Authority Phrase Analysis
for a in authorities:
with open("/tmp/authority.txt", "a") as c:
try:
output = None
for index, line in enumerate(lines):
if a.strip() in line:
output = index
if output:
c.write("\n" + lines[output] + "\n")
c.write("-------------------------------------------------------------")
c.write("\n" + lines[output +1] + "\n")
print ("It worked!")
except (ValueError):
c.write("Nothing found")
print ("It didn't work :(")
s3.upload_file(Filename = "/tmp/authority.txt" , Bucket="bantp-analysis", Key = ((directory_name)+'/'+"Authority_") + (filename))
#Need Phrase Analysis
for n in needs:
with open("/tmp/need.txt", "a") as v:
try:
output = None
for index, line in enumerate(lines):
if n.strip() in line:
output = index
break
if output:
v.write("\n" + lines[output] + "\n")
v.write("-------------------------------------------------------------")
v.write("\n" + lines[output +1] + "\n")
print ("It worked!")
break
except (ValueError):
v.write("Nothing found")
print ("It didn't work :(")
break
s3.upload_file(Filename = "/tmp/need.txt" , Bucket="bantp-analysis", Key = ((directory_name)+'/'+"Need_") + (filename))
#Timeline Phrase Analysis
for t in timelines:
with open("/tmp/timeline.txt", "a") as z:
try:
output = None
for index, line in enumerate(lines):
if t.strip() in line:
output = index
break
if output:
z.write("\n" + lines[output] + "\n")
z.write("-------------------------------------------------------------")
z.write("\n" + lines[output +1] + "\n")
print ("It worked!")
break
except (ValueError):
z.write("Nothing found")
print ("It didn't work :(")
break
s3.upload_file(Filename = "/tmp/timeline.txt" , Bucket="bantp-analysis", Key = ((directory_name)+'/'+"Timeline_") + (filename))
#Partner Phrase Analysis
for p in partners:
with open("/tmp/partner.txt", "a") as q:
try:
output = None
for index, line in enumerate(lines):
if p.strip() in line:
output = index
break
if output:
q.write("\n" + lines[output] + "\n")
q.write("-------------------------------------------------------------")
q.write("\n" + lines[output +1] + "\n")
print ("It worked!")
except (ValueError):
q.write("Nothing found")
print ("It didn't work :(")
s3.upload_file(Filename = "/tmp/partner.txt" , Bucket="bantp-analysis", Key = ((directory_name)+'/'+"Partner_") + (filename))
Welcome to stackoverflow!
Can you try the following solutions and comment the results please
In all your open operation change the mode of opening the file from a to w.
Example
with open("/tmp/timeline.txt", "a") as z:
to
with open("/tmp/timeline.txt", "w") as z:
This change for all open operations, to override the existing metafile. Also do take care of indentation.
The number of "*" printed fluctuates according to the length of string entered
def main():
# check command line argument
if len(argv) != 2:
print("Usage: python bleep.py dictionary")
exit(1)
else:
ban = set()
# Load txt file
with open(argv[1], "r") as f:
for line in f:
# strip the space and add to set
ban.add(line.strip())
# prompt user input
input = get_string("What message would you like to censor?\n")
# Split input into word tokens
token = input.split()
censored_msg = ""
for i in token:
if i.lower() in ban:
censored_msg = (censored_msg + "*"*(len(token)+1) + " ")
else:
censored_msg += i + " "
# print censored message
print(censored_msg.strip())
if __name__ == "__main__":
main()
It prints fine for some cases, such as
Input: heck my gosh
Output: **** my ****
But not so in others (should be **** ****)
Input: heck gosh
Output: * * (just 6 * for 8 letters)
Is it a typo? Check this line very carefully
censored_msg = (censored_msg + "*"*(len(token)+1) + " ")
and remember which is what here for i in token:
**I have a ctl file consist of text as shown below
mycnf_001/mycnf_001_001
mycnf_001/mycnf_001_002
.......................
....................... (a very long list consist of 1000 lines)
I am trying hard but I am not able to get my desired format
mycnf_001_001 mycnf_001
mycnf_001_002 mycnf_001
.......................
....................... (a very long list consist of 1000 lines)
********* DETAILED PROBLEM DESCRIPTION***************************
CURRENT FORMAT mycnf_001/mycnf_001_001
DESIRED FROMAT mycnf_001_001 mycnf_001
////////////CODE/////////////////////
f = open("ms.ctl", "rb")
s = f.readlines()
f.close()
f = open("newms.ctl", "wb")
s.reverse()
for item in s:
print>>f, item
f.close()
When I execute the above code then it simply reverses the order from bottom to top and what I need is clearly mentioned above.
Lets assume after reading file "ms.ctl" you have value like
s="mycnf_001/mycnf_001_001/mycnf_001_002"
Now split the string with respect to '/'
spliteds = [x for x in s.split('/') if x.strip()]
Now you have array of strings. Now try to access the array and from last and save it in another string.
desireds=""
for i in reversed(spliteds):
desireds = desireds + i + " "
Now you can put this string in any file.
CODE:
f = open("ms.ctl", "rb")
s = f.read()
f.close()
N = s.split('\n')
f = open("newms.ctl", "wb")
spliteds = [ x for x in N[0].split('/') if x.strip()]
desireds=""
for i in reversed(spliteds):
desireds = desireds + i + " "
#print desireds
f.write(desireds)
f.close()
You can debug this code using print. Hope it helps.
UPDATE
If you want to run it for more than one lines.
f = open("ms.ctl", "rb")
s = f.read()
f.close()
N = s.split('\n')
lenth = len(N)
f = open("newms.ctl", "wb")
for x in range(0, lenth):
print "We're on time %d" % (x)
spliteds = [ x for x in N[0].split('/') if x.strip()]
desireds=""
for i in reversed(spliteds):
desireds = desireds + i + " "
#print desireds
f.write(desireds)
f.close()
If you want to get the output in one column.
In above code, just replace " " with "\n" in desireds = desireds + i + " "
f = open("ms.ctl", "rb")
s = f.read()
f.close()
N = s.split('\n')
lenth = len(N)
f = open("newms.ctl", "wb")
for x in range(0, lenth):
print "We're on time %d" % (x)
spliteds = [ x for x in N[0].split('/') if x.strip()]
desireds=""
for i in reversed(spliteds):
desireds = desireds + i + "\n"
#print desireds
f.write(desireds)
f.close()
I don't know where does it goes wrong. I can get the correct result if I just call out my valid_ISBN(isbn) function, but when I write the file, the result become all invalid. (maybe something wrong with function call, but I don't know how to fix it)
def main():
# Call and open the File
inFile = open("isbn.txt", "r")
for line in inFile:
line_strip = line.replace("-", "").replace(" ", "").rstrip("\n")
isbn = line_strip # the function call
# Output file
str = []
str.append(line)
outFile = open("isbnOut.txt", "a")
for i in str:
if valid_ISBN(isbn) == "valid":
outFile.write(i.strip() + " valid\n")
else:
outFile.write(i.strip() + " invalid\n")
inFile.close()
outFile.close()
def valid_ISBN(isbn):
if len(isbn) != 10 or (isbn[0:9].isdigit()) == False:
print("invalid")
else:
return partial_sums(isbn)
def partial_sums(s1):
lst1 =[]
sum1 = 0
for i in range(len(s1)):
if (i == (len(s1) -1)) and ((s1[i] == "x") or (s1[i] == "X")):
sum1 = sum1 + 10
else:
sum1 = sum1 + int(s1[i])
lst1.append(sum1)
#print(lst1)
sum_of_s1(lst1)
def sum_of_s1(s2):
lst2 = []
sum2 = 0
for i in s2:
sum2 += i
lst2.append(sum2)
#print(lst2)
checkISBN(lst2[-1])
def checkISBN(value):
if value % 11 == 0:
print("valid")
else:
print("invalid")
main()
2 Test case for isbn text file (no new line):
019-923-3241
818-851-703X
In your main function:
Every time you read a line in from your input file you initialize str and fill it with just one value. You open your output file, do your validity checks for your one value in str, and finally write the one value to the the output file.
The next time you read the file you do the same stuff... so str isn't needed at all
Also using str as a variable name is bad form. In your console write in help(str) and you will see why.
Now to deal with your actual complaint:
Your problem is the fact that there is no new line.
when you say for line in some_open_file_handler:... what python does is populate line with everything up to the next newline character or the end of the file.
If your input file has no new lines seperating isbns then the first value of line would be 019-923-3241 818-851-703X. Thus the line line_strip = line.replace("-", "").replace(" ", "").rstrip("\n") set linestrip to 0199233241818851703X
This should fix it:
'line.split() will yield ['019-923-3241','818-851-703X']
outFile = open("isbnOut.txt", "a")
for line in open("isbn.txt", "r"):
isbns = line.split() #this splits stuff up by whitespace.
for isbn in isbns:
isbn = isbn.replace("-", "").replace(" ", "").rstrip("\n")
if valid_ISBN(isbn) == "valid":
outFile.write(i.strip() + " valid\n")
else:
outFile.write(i.strip() + " invalid\n")