Line by line comparison of text files - length error - python-3.x

#create empty list for wbm.txt
wbm_lines = []
#read the file and append each line to the empty list
with open('wbm.txt') as file_to_read:
for line in file_to_read:
wbm_lines.append(line)
i = 1
#create empty list for docu.txt
docu_lines = []
#read the file and append each line to the empty list
with open('docu.txt') as file_to_read:
for line in file_to_read:
docu_lines.append(line)
if len(wbm_lines) > len(docu_lines):
for line in wbm_lines:
if line != docu_lines:
print("Line " + str(i) + " do not match")
print('wbm.txt ' + wbm_lines[i])
print('docu.txt ' + docu_lines[i])
i += 1
else:
for line in docu_lines:
if line != wbm_lines:
try:
print("Line " + str(i) + " do not match")
print('wbm.txt ' + wbm_lines[i])
print('docu.txt ' + docu_lines[i])
i += 1
except:
continue
But I am getting an error:
print('docu.txt ' + docu_lines[i])
IndexError: list index out of range
The problem is that the two text files have different lengths. How can I resolve this?
Ideally, I would like to print something like --- in place of the non-existent line, while printing the other line normally.

Related

Program doesn't work if fed with a large amount of data

#This program takes in a text file and whatever the ser types in ; it searches for the specific word or phrase and then print out in which line this word or phrase is located .
If i feed it a text file with 20 lines , it produces normal results
As soon as i give it a 3000 worded document it produces error
Can anyone explain this
while True:
search = str(input("==>"))
line_number = 1
fslope = open("searching_in_a_textfile")
for line in fslope:
if search.lower() in line:
print("tHE LINE NUMBER IS ", line_number)
print("THE LINE SAYS : " + line)
line_number = line_number + 1
continue
if search.upper() in line:
print("tHE LINE NUMBER IS ", line_number)
print("THE LINE SAYS : " + line)
line_number = line_number + 1
continue
if search.title() in line:
print("tHE LINE NUMBER IS ", line_number)
print("THE LINE SAYS : " + line)
line_number = line_number + 1
continue
else:
line_number = line_number + 1
continue
print("END OF PRIOCESS")
First lets make it simple: (this code is almost the same as yours)
lines = []
with open('searching_in_a_textfile') as f:
lines = f.readlines()
while True:
search = input('==>')
if not search:
break
for line_number, line in enumerate(lines, 1):
if search.lower() in line.lower():
print('tHE LINE NUMBER IS ', line_number, '\nTHE LINE SAYS :', line)
print("END OF PRIOCESS")
Now when the input is '' (empty string / no input) the process will stop.
if you can add your error it could be very helpful.

For loop randomly stopping and not working

I'm writing a program to grab all of the md5sums of a certain file but it stops around 40,000. Any help would be great, here is my program.
NOTE: 40,000 has no meaning other than it just breaks there.
def grabmd5():
for x in range(0, numLinesofFile):
f = open(finfile1, 'r')
for line in f:
line = line.strip('\n')
os.system("md5sum " + line + " 2>/dev/null" + " >> file2.txt")
if x == numLinesofFile:
print("It worked")

How to print the result of JSON to the output file?

The output of the JSON file has multiple responses like the one seen below
"response_code":1
"scan_date":"2011-07-27 03:44:56"
"permalink":"https://www.virustotal.com/gui/file/1caea01fd9a6c6d12e5ca46007e25a4b1eff640060f45de8213e40aa5b47cd57/detection/f-1caea01fd9a6c6d12e5ca46007e25a4b1eff640060f45de8213e40aa5b47cd57-1311738296"
"verbose_msg":"Scan finished, information embedded"
"total":43
"positives":19
The below code will get the value of "positives" in the JSON output and then print it to the file.
# DOES THE HASH EXISTS IN VT DATABASE?
if response == 0:
print(hash + ": UNKNOWN")
file = open(output,"a")
file.write(hash + " 0")
file.write("\n")
file.close()
# DOES THE HASH EXISTS IN VT DATABASE?
elif response == 1:
positives = int(json_response.get("positives"))
if positives >= 3:
print(hash + ": MALICIOUS")
file = open(output,"a")
file.write(hash + " " + str(positives))
file.write("\n")
file.close()
else:
print(hash + ": NOT MALICIOUS")
file = open(output,"a")
file.write(hash + " 0")
file.write("\n")
file.close()
else: print(hash + ": CAN NOT BE SEARCHED")
So the result of the current code will be something like the below
0136b7453cedf600d6f6aab7900901d3 19
I am trying to get the value of "permalink" in the JSON results and print it in the same output file. So the output must look like the below
0136b7453cedf600d6f6aab7900901d3 19 https://www.virustotal.com/gui/file/1caea01fd9a6c6d12e5ca46007e25a4b1eff640060f45de8213e40aa5b47cd57/detection/f-1caea01fd9a6c6d12e5ca46007e25a4b1eff640060f45de8213e40aa5b47cd57-1311738296
How do I achieve this?
Thanks
You can read it the same way you read the positive values.
elif response == 1:
positives = int(json_response.get("positives"))
permalink = json_response.get("permalink")
if positives >= 3:`enter code here`
print(hash + ": MALICIOUS" + " | URL:" + permalink)
file = open(output,"a")`enter code here`
file.write(hash + " " + str(positives))
file.write("\n")
file.close()

Write input to file, excluding final line

My simple assignment is to write a function that asks for a filename, and then repeatedly reads lines from the user and saves these lines to the named file.
It stops saving the lines when the user input is a single dot on a line by itself. The line containing the single dot is NOT saved.
Example output would look like:
Save to what file: mytest.txt
> This is
> my attempt at
> the problem.
>
> The last line was empty
> .
Saving file mytest.txt
5 lines saved
Here's my attempt:
def savefile():
filename = input("Save to what file: ")
infile = open(filename, "w")
line = ""
lineCount = 0
while line != ".":
line = input("> ")
infile.write(line + "\n")
lineCount += 1
print("Saving file", filename)
print(lineCount, "lines saved")
infile.close()
which works fine, except my while loop also saves the last line (the "." by itself on a line). I've also tried an if-else loop:
if line != ".":
line = input("> ")
infile.write(line + "\n")
lineCount += 1
else:
infile.close()
but this just saves the first line entered.
How can I exclude the last line entered?
Doesn't even need an explanation:
with open("my_file.txt","w") as file:
while True:
line = input("> ")
if line.strip() == ".":
break
else:
file.write(line + "\n")
You could try this by simply interchanging some of the lines in your code as follows:
def savefile():
filename = input("Save to what file: ")
infile = open(filename, "w")
line = input("> ")
lineCount = 0
while line != ".":
infile.write(line + "\n")
lineCount += 1
line = input("> ")
print("Saving file", filename)
print(lineCount, "lines saved")
infile.close()
It is just a little out of order, classic problem, move the input to above the while loop, I hope see why...
def savefile():
filename = input("Save to what file: ")
infile = open(filename, "w")
line = ""
lineCount = 0
# first lets get the line of input
line = input("> ")
# if the line is "." then don't do the following code.
while line != ".":
# if the line was not "." then we do this...
infile.write(line + "\n")
lineCount += 1
# get the input again, and loop, remember if we get "."
# we will break from this loop.
line = input("> ")
print("Saving file", filename)
print(lineCount, "lines saved")
infile.close()

Something wrong with call function

I don't know where does it goes wrong. I can get the correct result if I just call out my valid_ISBN(isbn) function, but when I write the file, the result become all invalid. (maybe something wrong with function call, but I don't know how to fix it)
def main():
# Call and open the File
inFile = open("isbn.txt", "r")
for line in inFile:
line_strip = line.replace("-", "").replace(" ", "").rstrip("\n")
isbn = line_strip # the function call
# Output file
str = []
str.append(line)
outFile = open("isbnOut.txt", "a")
for i in str:
if valid_ISBN(isbn) == "valid":
outFile.write(i.strip() + " valid\n")
else:
outFile.write(i.strip() + " invalid\n")
inFile.close()
outFile.close()
def valid_ISBN(isbn):
if len(isbn) != 10 or (isbn[0:9].isdigit()) == False:
print("invalid")
else:
return partial_sums(isbn)
def partial_sums(s1):
lst1 =[]
sum1 = 0
for i in range(len(s1)):
if (i == (len(s1) -1)) and ((s1[i] == "x") or (s1[i] == "X")):
sum1 = sum1 + 10
else:
sum1 = sum1 + int(s1[i])
lst1.append(sum1)
#print(lst1)
sum_of_s1(lst1)
def sum_of_s1(s2):
lst2 = []
sum2 = 0
for i in s2:
sum2 += i
lst2.append(sum2)
#print(lst2)
checkISBN(lst2[-1])
def checkISBN(value):
if value % 11 == 0:
print("valid")
else:
print("invalid")
main()
2 Test case for isbn text file (no new line):
019-923-3241
818-851-703X
In your main function:
Every time you read a line in from your input file you initialize str and fill it with just one value. You open your output file, do your validity checks for your one value in str, and finally write the one value to the the output file.
The next time you read the file you do the same stuff... so str isn't needed at all
Also using str as a variable name is bad form. In your console write in help(str) and you will see why.
Now to deal with your actual complaint:
Your problem is the fact that there is no new line.
when you say for line in some_open_file_handler:... what python does is populate line with everything up to the next newline character or the end of the file.
If your input file has no new lines seperating isbns then the first value of line would be 019-923-3241 818-851-703X. Thus the line line_strip = line.replace("-", "").replace(" ", "").rstrip("\n") set linestrip to 0199233241818851703X
This should fix it:
'line.split() will yield ['019-923-3241','818-851-703X']
outFile = open("isbnOut.txt", "a")
for line in open("isbn.txt", "r"):
isbns = line.split() #this splits stuff up by whitespace.
for isbn in isbns:
isbn = isbn.replace("-", "").replace(" ", "").rstrip("\n")
if valid_ISBN(isbn) == "valid":
outFile.write(i.strip() + " valid\n")
else:
outFile.write(i.strip() + " invalid\n")

Resources