Format data in a textfile - python-3.x

I have a text file containing data in this format :
[-0.00287209 -0.00815337 -0.00322895 -0.00015178]
[-0.0038058 -0.01238539 -0.00082072 0.00040815]
[-0.00922925 -0.00394288 0.00325778 0.00083047]
[-0.01221899 0.01573175 0.00569081 0.00079524]
[0.02409868 0.02623219 0.00364268 0.00026268]
[ 0.04754814 0.00664801 -0.00204411 -0.00044964]
[-0.02286798 -0.02860896 -0.00671971 -0.00086068]
[-0.079635 -0.03532551 -0.00594647 -0.00067338]
[ 1.13691452e-03 4.88425646e-04 -3.44116748e-05 -1.08364051e-05]
I want to format (removing the brackets, and strip the spaces between the numbers) so it will look like this :
-0.00287209,-0.00815337,-0.00322895,-0.00015178
-0.0038058,-0.01238539,-0.00082072,0.00040815
-0.00922925,-0.00394288,0.00325778,0.00083047
-0.01221899,0.01573175,0.00569081,0.00079524
0.02409868,0.02623219,0.00364268,0.00026268
0.04754814,0.00664801,-0.00204411,-0.00044964
-0.02286798,-0.02860896,-0.00671971,-0.00086068
-0.079635,-0.03532551,-0.00594647,-0.00067338
1.13691452e-03,4.88425646e-04,-3.44116748e-05,-1.08364051e-05

Something basic like this works:
import csv
# assuming the input is in input.txt
with open("input.txt") as input_file:
lines = input_file.readlines() # read in the entire file
fixed_lines = []
for line in lines: # for each line
line = line.strip() # remove the newline at the end
line = line.lstrip("[") # remove brackets from the left
line = line.rstrip("]") # remove brackets from the right
fixed_lines.append(line.strip().split()) # make sure there are no left over spaces and split by whitespace
# write out using the csv module
with open("output.txt", 'w') as f:
csv_writer = csv.writer(f)
csv_writer.writerows(fixed_lines)
Output:
-0.00287209,-0.00815337,-0.00322895,-0.00015178
-0.0038058,-0.01238539,-0.00082072,0.00040815
-0.00922925,-0.00394288,0.00325778,0.00083047
-0.01221899,0.01573175,0.00569081,0.00079524
0.02409868,0.02623219,0.00364268,0.00026268
0.04754814,0.00664801,-0.00204411,-0.00044964
-0.02286798,-0.02860896,-0.00671971,-0.00086068
-0.079635,-0.03532551,-0.00594647,-0.00067338
1.13691452e-03,4.88425646e-04,-3.44116748e-05,-1.08364051e-05

You could do it with a regexp like this
import re
s = """[-0.00287209 -0.00815337 -0.00322895 -0.00015178]
[-0.0038058 -0.01238539 -0.00082072 0.00040815]
[-0.00922925 -0.00394288 0.00325778 0.00083047]
[-0.01221899 0.01573175 0.00569081 0.00079524]
[0.02409868 0.02623219 0.00364268 0.00026268]
[ 0.04754814 0.00664801 -0.00204411 -0.00044964]
[-0.02286798 -0.02860896 -0.00671971 -0.00086068]
[-0.079635 -0.03532551 -0.00594647 -0.00067338]
[ 1.13691452e-03 4.88425646e-04 -3.44116748e-05 -1.08364051e-05]
"""
fouine = re.compile('^\[\s*(-?\d\.?\d+(?:e-\d+)?) \s*(-?\d\.?\d+(?:e-\d+)?) \s*(-?\d\.?\d+(?:e-\d+)?) \s*(-?\d\.?\d+(?:e-\d+)?)]$', re.M)
print re.sub(fouine, r'\1,\2,\3,\4', s)

Another way it to split your content by line and by "column"
import re
s = """[-0.00287209 -0.00815337 -0.00322895 -0.00015178]
[-0.0038058 -0.01238539 -0.00082072 0.00040815]
[-0.00922925 -0.00394288 0.00325778 0.00083047]
[-0.01221899 0.01573175 0.00569081 0.00079524 ]
[0.02409868 0.02623219 0.00364268 0.00026268]
[ 0.04754814 0.00664801 -0.00204411 -0.00044964]
[-0.02286798 -0.02860896 -0.00671971 -0.00086068]
[-0.079635 -0.03532551 -0.00594647 -0.00067338]
[ 1.13691452e-03 4.88425646e-04 -3.44116748e-05 -1.08364051e-05]
"""
# remove the brackets
def remove_brackets(l): return l.strip('[]')
# split the columns and join with a comma
def put_commas(l): return ','.join(re.split(r'\s+', l))
raw_lines = s.splitlines()
clean_lines = map(remove_brackets, raw_lines)
clean_lines = map(put_commas, clean_lines)
print '\n'.join(clean_lines)

Related

Python: If file does not end with , move up the line below

So I have a csv file which is created from a program as a report. This file has lines in it that are not correctly split, all of there lines should end with ,\n What I want to do is if the line does not end with a ,\n the script should move the line from below to the line that is missing , to up
this is how I have tried so far
with open('test.csv', 'r') as data, open('output.csv','w') as output:
for line in data:
if not line.endswith(',\n'):
line = line.rstrip()
output.write(line)`
but this is not working in below case:
name, address, pincode,
amruta, peth, 41578,
ashwini,'peth,
MH', 415407,
akshay, sangli,
478595,
Output should be
name, address, pincode,
amruta, peth, 41578,
ashwini,'peth,MH', 415407,
akshay, sangli, 478595,
Output i am getting
name, address, pincode,
amruta, peth, 41578,
ashwini,'peth,
MH', 415407,
akshay, sangli, 478595,
The problem is that for the lines:
ashwini,'peth,
MH', 415407,
The upper line does, in fact, en with ',', so it does work "as intended".
So, instead, you could work with the column size expected. The following code solves your problem:
with open('test.txt', 'r') as data, open('output.txt','w') as output:
bad_size = False
bad_line = None
for line in data:
columns = line.split(',')
if (len(columns) - 1) < 3:
if bad_size:
line = bad_line.rstrip('\n') + line
bad_size = False
else:
bad_size = True
bad_line = line
if not bad_size:
output.write(line)

How to append a new line to a list in python?

I'm trying to append a new line in a list in python to write to my text file any suggestions?
header = ['Employee ID','Type','Routing Number','Account Number']
header.append("\n")
strOtherLine = [strSsn,strThisType,strThisRoute,strThisAcct]
header.append("\n" + strOtherLine)
fc_otherfile = r"c:\******\*****\gah\\" + strOtherSavedFile
#===Writing to the text file
with open(fc_otherfile,'w', newline='') as t:
dirdep = csv.writer(t, delimiter="\t")
dirdep.writerow(header)
This is what I get in my text file:
Employee ID Type Routing Number Account Number "" ['###########', 'Checking', '###########','###########'] ['###########', 'Checking', '###########', '###########']
But what I want is this:
Employee ID Type Routing Number Account Number
########### Checking ########### ###########
########### Checking ########### ###########
Simply make a string of a list using ' '.join(). Whenever appending line use obj.write('\n') after that.
Example: Using text file.
MyData = ['I', 'am', 'fine']
line1 = ' '.join(MyData)
with open('file.txt', 'a+') as data:
data.write(line1)
data.write('\n')
You're using lists when you should be using strings:
header = 'Employee ID\tType\tRouting Number\tAccount Number'
header += "\n"
strOtherLine = strSsn +'\t'+ strThisType +'\t'+ strThisRoute +'\t'+ strThisAcct
header += "\n"+ strOtherLine
fc_otherfile = r"c:\******\*****\gah\\" + strOtherSavedFile
#===Writing to the text file
with open(fc_otherfile,'w', newline='') as t:
t.write(header)
#confirm write was successful
with open(fc_otherfile,'r') as t2:
print(t2.read())
If you love loops you could try something like this:
toinsert = ""
for each in header:
toinsert.append(str(each)+"\t")
toinsert.rstrip() #remove trailing "\t"
toinsert += "\n"
for each in strOtherLine:
toinsert.append(str(each)+"\t")
toinsert.rstrip()
toinsert += "\n"
with open('file.txt', 'a+') as data:
data.write(toinsert)

Copy the first characters to the line below if a condition is met

I´m new at python and I can´t really write some code to help with the question. I don´t know if you can help me without a code. I will write my Input and desire output with a TXT file.
Input
03000|SOME_TEXT_1|XX|XXXX|21236,85
03100|29|21236,85
03000|SOME_TEXT_2|XX|XXXX|4270,00
03100|29|4270,00
03000|SOME_TEXT_3|XX|XXXX|17425,00
03100|29|17425,00
03000|SOME_TEXT_4|XX|XXXX|10600,00
03100|29|1040,00
03100|30|9560,00
03000|SOME_TEXT_5|XX|XXXX|11569,00
03100|29|11569,00
03000|SOME_TEXT_6|XX|XXXX|11569,00
03000|SOME_TEXT_7|XX|XXXX|11569,00
11111|12
Output
03000|SOME_TEXT_1|XX|XXXX|21236,85
03000|SOME_TEXT_1|XX|XXXX|03100|29|21236,85
03000|SOME_TEXT_2|XX|XXXX|4270,00
03000|SOME_TEXT_2|XX|XXXX|03100|29|4270,00
03000|SOME_TEXT_3|XX|XXXX|17425,00
03000|SOME_TEXT_3|XX|XXXX|03100|29|17425,00
03000|SOME_TEXT_4|XX|XXXX|10600,00
03000|SOME_TEXT_4|XX|XXXX|03100|29|1040,00
03000|SOME_TEXT_4|XX|XXXX|03100|30|9560,00
03000|SOME_TEXT_5|XX|XXXX|11569,00
03000|SOME_TEXT_5|XX|XXXX|03100|29|11569,00
03000|SOME_TEXT_6|XX|XXXX|11569,00
03000|SOME_TEXT_7|XX|XXXX|11569,00
11111|12
Basically, I wanna to copy the first 26 characters from the line that starts with 03000 to the line below if the line starts with 03100, sometimes the line below can starts 11111 or another 03000, so I don´t wanna the text to be copied to those lines
This should do what you want:
data = """03000|SOME_TEXT_1|XX|XXXX|21236,85
03100|29|21236,85
03000|SOME_TEXT_2|XX|XXXX|4270,00
03100|29|4270,00
03000|SOME_TEXT_3|XX|XXXX|17425,00
03100|29|17425,00
03000|SOME_TEXT_4|XX|XXXX|10600,00
03100|29|1040,00
03100|30|9560,00
03000|SOME_TEXT_5|XX|XXXX|11569,00
03100|29|11569,00
03000|SOME_TEXT_6|XX|XXXX|11569,00
03000|SOME_TEXT_7|XX|XXXX|11569,00
11111|12""".splitlines()
_03000_text = ''
output = []
for line in data:
if not line.startswith("03"):
output.append(line)
continue
if line.startswith("03000"):
_03000_text = "|".join(line.split("|")[:-1])
output.append(line)
continue
line = _03000_text + "|" + line
output.append(line)
output = "\n".join(output)
print(output)
Output:
03000|SOME_TEXT_1|XX|XXXX|21236,85
03000|SOME_TEXT_1|XX|XXXX|03100|29|21236,85
03000|SOME_TEXT_2|XX|XXXX|4270,00
03000|SOME_TEXT_2|XX|XXXX|03100|29|4270,00
03000|SOME_TEXT_3|XX|XXXX|17425,00
03000|SOME_TEXT_3|XX|XXXX|03100|29|17425,00
03000|SOME_TEXT_4|XX|XXXX|10600,00
03000|SOME_TEXT_4|XX|XXXX|03100|29|1040,00
03000|SOME_TEXT_4|XX|XXXX|03100|30|9560,00
03000|SOME_TEXT_5|XX|XXXX|11569,00
03000|SOME_TEXT_5|XX|XXXX|03100|29|11569,00
03000|SOME_TEXT_6|XX|XXXX|11569,00
03000|SOME_TEXT_7|XX|XXXX|11569,00
11111|12
For multiple files, this should do what you want:
file_list = ["file1.txt", "file2.txt"]
for file in file_list:
new_file = ".".join(file.split(".")[:-1]) + "_new.txt"
with open(file) as f:
data = f.read().splitlines()
_03000_text = ''
output = []
for line in data:
if not line.startswith("03"):
output.append(line)
continue
if line.startswith("03000"):
_03000_text = "|".join(line.split("|")[:-1])
output.append(line)
continue
line = _03000_text + "|" + line
output.append(line)
output = "\n".join(output)
with open(new_file, "w") as f:
f.write(output)
Note that it puts the output in separate files

How to handle blank line,junk line and \n while converting an input file to csv file

Below is the sample data in input file. I need to process this file and turn it into a csv file. With some help, I was able to convert it to csv file. However not fully converted to csv since I am not able to handle \n, junk line(2nd line) and blank line(4th line). Also, i need help to filter transaction_type i.e., avoid "rewrite" transaction_type
{"transaction_type": "new", "policynum": 4994949}
44uu094u4
{"transaction_type": "renewal", "policynum": 3848848,"reason": "Impressed with \n the Service"}
{"transaction_type": "cancel", "policynum": 49494949, "cancel_table":[{"cancel_cd": "AU"}, {"cancel_cd": "AA"}]}
{"transaction_type": "rewrite", "policynum": 5634549}
Below is the code
import ast
import csv
with open('test_policy', 'r') as in_f, open('test_policy.csv', 'w') as out_f:
data = in_f.readlines()
writer = csv.DictWriter(
out_f,
fieldnames=[
'transaction_type', 'policynum', 'cancel_cd','reason'],lineterminator='\n',
extrasaction='ignore')
writer.writeheader()
for row in data:
dict_row = ast.literal_eval(row)
if 'cancel_table' in dict_row:
cancel_table = dict_row['cancel_table']
cancel_cd= []
for cancel_row in cancel_table:
cancel_cd.append(cancel_row['cancel_cd'])
dict_row['cancel_cd'] = ','.join(cancel_cd)
writer.writerow(dict_row)
Below is my output not considering the junk line,blank line and transaction type "rewrite".
transaction_type,policynum,cancel_cd,reason
new,4994949,,
renewal,3848848,,"Impressed with
the Service"
cancel,49494949,"AU,AA",
Expected output
transaction_type,policynum,cancel_cd,reason
new,4994949,,
renewal,3848848,,"Impressed with the Service"
cancel,49494949,"AU,AA",
Hmm I try to fix them but I do not know how CSV file work, but my small knoll age will suggest you to run this code before to convert the file.
txt = {"transaction_type": "renewal",
"policynum": 3848848,
"reason": "Impressed with \n the Service"}
newTxt = {}
for i,j in txt.items():
# local var (temporar)
lastX = ""
correctJ = ""
# check if in J is ascii white space "\n" and get it out
if "\n" in f"b'{j}'":
j = j.replace("\n", "")
# for grammar purpose check if
# J have at least one space
if " " in str(j):
# if yes check it closer (one by one)
for x in ([j[y:y+1] for y in range(0, len(j), 1)]):
# if 2 spaces are consecutive pass the last one
if x == " " and lastX == " ":
pass
# if not update correctJ with new values
else:
correctJ += x
# remember what was the last value checked
lastX = x
# at the end make J to be the correctJ (just in case J has not grammar errors)
j = correctJ
# add the corrections to a new dictionary
newTxt[i]=j
# show the resoult
print(f"txt = {txt}\nnewTxt = {newTxt}")
Termina:
txt = {'transaction_type': 'renewal', 'policynum': 3848848, 'reason': 'Impressed with \n the Service'}
newTxt = {'transaction_type': 'renewal', 'policynum': 3848848, 'reason': 'Impressed with the Service'}
Process finished with exit code 0

Why are extra blank lines generated while writing to file?

I want to find specific lines in a file, add a string to end of that line, and then update the file, but the updated file has extra blank lines between the lines.
def Reading_Logging(PacketName, PacketTot, PacketNum):
try:
with open("C:\\Users\\Shakib\\Desktop\\test.txt", "r+") as f:
content = f.read().splitlines()
#print(names_list)
for i, l in enumerate(content):
tnow = datetime.datetime.now()
linesplit = l.split(',')
if linesplit[0] == PacketName and linesplit[1] == PacketTot and linesplit[2] == PacketNum:
content[i] = content[i].replace(content[i], content[i] + ',' + str(tnow))
with open("C:\\Users\\Shakib\\Desktop\\newtest.txt", "w") as f:
f.write('\n'.join(content))
I expect the following output without blank lines, but this is my real output:
ZoYt,97,0,3.394531,2019-07-27 14:40:27.671415,2019-07-27 19:22:48.824541
ZoYt,97,1,3.000977,2019-07-27 14:40:27.701415
ZoYt,97,2,1.879883,2019-07-27 14:40:27.731415
ZoYt,97,3,3.681641,2019-07-27 14:40:27.753415
ZoYt,97,4,1.069336,2019-07-27 14:40:27.760416
ZoYt,97,5,1.094727,2019-07-27 14:40:27.773417
ZoYt,97,6,3.077148,2019-07-27 14:40:27.787417
ZoYt,97,7,1.015625,2019-07-27 14:40:27.798418
ZoYt,97,8,3.765625,2019-07-27 14:40:27.813419
ZoYt,97,9,2.797852,2019-07-27 14:40:27.823419
ZoYt,97,10,3.860352,2019-07-27 14:40:27.837420
ZoYt,97,11,3.179688,2019-07-27 14:40:27.849421

Resources