Python print format with dot leader - python-3.x

How can I use print format to place a set of characters (dot leading) between 2 objects?
For example, I have the following code:
os.chdir( "LOGS\\" )
for file in glob.glob('*'):
with open(file) as f:
contents = f.read()
if 'HOST_POWER="ON"' in contents:
print('{0:38} {1:3}'.format(file[:-4]," = ON"))
for file in glob.glob('*'):
with open(file) as f:
contents = f.read()
if 'HOST_POWER="OFF"' in contents:
print('{0:38} {1:3}'.format(file[:-4]," = OFF"))
Output:
server1.web.com = ON
server2.web.com = ON
server3334.web.com = OFF
server5332223.web.com = ON
server2233.web.com = ON
server44.web.com = ON
server1133333.web.com = OFF
But I want the output to look like this:
server1.web.com ............ ON
server2.web.com ............ ON
server3334.web.com ......... OFF
server5332223.web.com ...... ON
server2233.web.com ......... ON
server44.web.com ........... ON
server1133333.web.com ...... OFF
server{SPACE}............{SPACE}ON
server{SPACE}............{SPACE}OFF

You could just edit the string before you pass it to print (edited to get exactly the kind of formatting you want):
import glob
def padStr( x, n ):
x += ' '
return x + '.'*(n - len(x) )
for file in glob.glob('*.*'):
with open(file) as f:
contents = f.read()
if 'HOST_POWER="OFF"' in contents:
print('%s %s' % ( padStr(file[:-4], 38 ),"ON"))
for file in glob.glob('*'):
with open(file) as f:
contents = f.read()
if 'HOST_POWER="OFF"' in contents:
print('%s %s' % ( padStr(file[:-4], 38 ),"OFF"))
Output:
blahblahblah ......................... ON
f1 ................................... ON
tes .................................. OFF

Another (slightly messier) option is to fix the string in line before using it as a format arg:
print('{0} {1:3}'.format((f[:-4]+' ').ljust(38, '.'),"= ON"))

Related

.csv to .arff function on Python

I'm trying to do a convertion function from csv to arff, right now I have this:
def csv2arff(csv_path, arff_path=None):
with open(csv_path, 'r') as fr:
attributes = []
if arff_path is None:
arff_path = csv_path[:-4] + '_prueba.arff' # *.arff -> *.csv
write_sw = False
with open(arff_path, 'w') as fw:
fw.write('#relation base_datos_modelo_3_limpia \n')
firstline = fr.readlines()[0].rstrip()
fw.write(firstline)
and that gives me:
#relation base_datos_modelo_3_limpia
DVJ_Valgus_KneeMedialDisplacement_D_discr,BMI,AgeGroup,ROM-PADF-KE_D,DVJ_Valgus_FPPA_D_discr,TrainFrequency,DVJ_Valgus_FPPA_ND_discr,Asym_SLCMJLanding-pVGRF(10percent)_discr,Asym-ROM-PHIR(≥8)_discr,Asym_TJ_Valgus_FPPA(10percent)_discr,TJ_Valgus_FPPA_ND_discr,Asym-ROM-PHF-KE(≥8)_discr,TJ_Valgus_FPPA_D_discr,Asym_SLCMJ-Height(10percent)_discr,Asym_YBTpl(10percent)_discr,Position,Asym-ROM-PADF-KE(≥8º)_discr,DVJ_Valgus_KneeMedialDisplacement_ND_discr,DVJ_Valgus_Knee-to-ankle-ratio_discr,Asym-ROM-PKF(≥8)_discr,Asym-ROM-PHABD(≥8)_discr,Asym-ROM-PHF-KF(≥8)_discr,Asym-ROM-PHER(≥8)_discr,AsymYBTanterior10percentdiscr,Asym-ROM-PHABD-HF(≥8)_discr,Asym-ROM-PHE(≥8)_discr,Asym(>4cm)-DVJ_Valgus_Knee;edialDisplacement_discr,Asym_SLCMJTakeOff-pVGRF(10percent)_discr,Asym-ROM-PHADD(≥8)_discr,Asym-YBTcomposite(10percent)_discr,Asym_SingleHop(10percent)_discr,Asym_YBTpm(10percent)_discr,Asym_DVJ_Valgus_FPPA(10percent)_discr,Asym_SLCMJ-pLFT(10percent)_discr,DominantLeg,Asym-ROM-PADF-KF(≥8)_discr,ROM-PHER_ND,CPRDmentalskills,POMStension,STAI-R,ROM-PHER_D,ROM-PHIR_D,ROM-PADF-KF_ND,ROM-PADF-KF_D,Age_at_PHV,ROM-PHIR_ND,CPRDtcohesion,Eperience,ROM-PHABD-HF_D,MaturityOffset,Weight,ROM-PHADD_ND,Height,ROM-PHADD_D,Age,POMSdepressio,ROM-PADF-KE_ND,POMSanger,YBTanterior_Dnorm,YBTanterior_NDnorm,POMSvigour,Soft-Tissue_injury_≥4days
So i want to put "#attribute" before each attribute and change the "," to "\n". But don't know how to do it, I tried to make a function to change the "," but didn't work, any idea?
Thank you guys.
Try the liac-arff library.
Here is an example for converting the UCI iris dataset from ARFF to CSV and then back to ARFF:
import csv
import arff
# arff -> csv
content = arff.load(open('./iris.arff', 'r'))
with open('./out.csv', 'w') as fp:
writer = csv.writer(fp)
header = []
for n, t in content['attributes']:
header.append(n)
writer.writerow(header)
writer.writerows(content['data'])
# csv -> arff
with open('./out.csv', 'r') as fp:
reader = csv.reader(fp)
header = None
data = []
for row in reader:
if header is None:
header = row
else:
data.append(row)
content = {}
content['relation'] = "from my csv file"
content['attributes'] = []
for n in header:
if n == "class":
content['attributes'].append((n, ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']))
else:
content['attributes'].append((n, 'NUMERIC'))
content['data'] = data
with open('./out.arff', 'w') as fp:
arff.dump(content, fp)
NB: For the last stage, we need to specify the nominal class values, which you could determine by scanning the data.

Adding new strings line by line from a file to a new one

I have a data output file in the format below from the script I run.
1. xxx %percentage1
2. yyy %percentage1
.
.
.
I am trying to take the percentages only, and append them to the same formatted file line by line (writing a new file once in the process).
1. xxx %percentage1 %percentage2
2. yyy %percentage1 %percentage2
The main idea is every time I run the code with a source data file I want it to add those percentages to the new file line by line.
1. xxx %percentage1 %percentage2 %percentage3 ...
2. yyy %percentage1 %percentage2 %percentage3 ...
This is what I could come up with:
import os
os.chdir("directory")
f = open("data1", "r")
n=3
a = f.readlines()
b = []
for i in range(n):
b.append(a[i].split(" ")[2])
file_lines = []
with open("data1", 'r') as f:
for t in range(n):
for x in f.readlines():
file_lines.append(''.join([x.strip(), b[t], '\n']))
print(b[t])
with open("data2", 'w') as f:
f.writelines(file_lines)
With this code I get the new file but the appending percentages are all from the first line, not different for each line. And I can only get one set of percentages added only and it is overwriting it rather than adding more down the lines.
I hope I explained it properly, if you can give some help I would be glad.
You can use a dict as a structure to load and write your data. This dict can then be pickled to store the data.
EDIT: added missing return statement
EDIT2: Fix return list of get_data
import pickle
import os
output = 'output'
dump = 'dump'
output_dict = {}
if os.path.exists(dump):
with open(dump, 'rb') as f:
output_dict = pickle.load(f)
def read_data(lines):
""" Builds a dict from a list of lines where the keys are
a tuple(w1, w2) and the values are w3 where w1, w2 and w3
are the 3 words composing each line.
"""
d = {}
for line in lines:
elts = line.split()
assert(len(elts)==3)
d[tuple(elts[:2])] = elts[2]
return d
def get_data(data):
""" Recover data from a dict as a list of strings.
The formatting for each element of the list is the following:
k[0] k[1] v
where k and v are the key/values of the data dict.
"""
lines = []
for k, v in data.items():
line = list(k)
line += [v, '\n']
lines.append(' '.join(line))
return lines
def update_data(output_d, new_d):
""" Update a data dict with new data
The values are appended if the key already exists.
Otherwise a new key/value pair is created.
"""
for k, v in new_d.items():
if k in output_d:
output_d[k] = ' '.join([output_d[k], v])
else:
output_d[k] = v
for data_file in ('data1', 'data2', 'data3'):
with open(data_file) as f:
d1 = read_data(f.readlines())
update_data(output_dict, d1)
print("Dumping data", output_dict)
with open(dump, 'wb') as f:
pickle.dump(output_dict, f)
print("Writing data")
with open(output, 'w') as f:
f.write('\n'.join(get_data(output_dict)))

Search pattern and list filenames recursively

My below code is listing all the files irrespective of the pattern is matched or not. The file ss.txt doesn't contain the pattern but still getting the output
Added the code
import os
import re
files = []
pattern = re.compile('my')
for p, d, f in os.walk(r'C:\Users\anaveed\test'):
for file in f:
files.append(os.path.join(p, file))
for f in files:
with open(f, 'r') as x:
for i in x:
Var1 = re.search(pattern, i)
print(f)
x.close()
C:\Users\anaveed\test\sample.txt
C:\Users\anaveed\test\testfile.txt
C:\Users\anaveed\test\hoax\a.txt
C:\Users\anaveed\test\hoax\ss.txt
import os
import re
files = []
pattern = re.compile('my')
for p, d, f in os.walk(r'C:\Users\anaveed\test'):
for file in f:
files.append(os.path.join(p, file))
for f in files:
with open(f, 'r') as x:
Var1 = False
for i in x:
if re.search(pattern, i):
Var1 = True
if Var1:
print(f)
x.close()
C:\Users\anaveed\test\sample.txt
C:\Users\anaveed\test\testfile.txt
C:\Users\anaveed\test\hoax\a.txt

Python-2.7 write to file

I have this script:
f = open("/ggg/darr/file/", "r+")
a = 0
for line in f:
if a ==58:
print (line)
line1 = "google.ca"
f.write(line1)
print line
a = a+1
f.close()
I want to keep my file but only to change what is written on line 58 to "google.ca"
then save it
using linux: mint-17.2
# Read data from file
with open('yourfile.txt', 'r') as file:
# read all line in the file to data array
data = file.readlines()
# change data on line 58 (array start from 0)
data[57] = 'Data you need to change'
# Write data back
with open('yourfile.txt', 'w') as file:
file.writelines(data)
You need to decide whether you want to write a new file (with print) or change the old file (with r+ mode and f.write). You will probably be happiest if you write a new file.
dataRead = []
f = open("/ggg/darr/file/", "r+")
a = 0
for line in f:
if a == 58:
line = "google.ca"
dataRead.append(line)
a = a+1
f.close()
f2 = open("/some/new/file","w")
for line in dataRead:
f2.write(line)
f2.close()
With the answer of Adisak Anusornsrirung I wrote it like this:
with open('sss.txt','r') as file:
data = file.readlines()
print (data[14])
file.close()
data[14] = "some data here"+"\n"
with open ("sss.txt", 'w') as file:
file.writelines(data)
file.close()
f = open("sss.txt", 'r')
print (f.read())
f.close()

How to avoid repetion with my code

I've written a code that extracts all the words from two files, and only returns the words that are in both of the file.
However, i have done some repetition and that is not considered a good style, so i wondering if it would be possible to avoid this with my code?
import re
def print_common_words(filename_1, filename_2):
try:
input_file = open(filename_1, 'r')
source_string = input_file.read().lower()
input_file.close()
all_words1 = set(re.findall('[a-zA-Z]+', source_string))
input_file = open(filename_2, 'r') #Repetition
source_string = input_file.read().lower() #Repetition
input_file.close() #Repetition
all_words2 = set(re.findall('[a-zA-Z]+', source_string)) #Repetition
intersection_list = all_words1.intersection(all_words2)
union_list = []
for word in intersection_list:
union_list += [word]
union_list.sort()
for i in union_list:
print(i)
except FileNotFoundError:
print("A file could not be found.")
Use a method to factor out the duplicated code.
def get_file(file):
input_file = open(file, 'r')
source_string = input_file.read().lower()
input_file.close()
return set(re.findall('[a-zA-Z]+', source_string))
Call it like:
all_words1 = get_file(filename_1)
all_words2 = get_file(filename_2)
Eg:
all_words1 = get_file(filename_1)
all_words2 = get_file(filename_2)
intersection_list = all_words1.intersection(all_words2)
union_list = []
for word in intersection_list:
union_list += [word]
union_list.sort()
for i in union_list:
print(i)

Resources