Creating a csv file from python readlines() - python-3.x

I want to create a CSV file from a text file
text_file.txt
Friday,09071235462,08:42:48
Princely,08123456,08:46:45
My code to convert the file
#Convert to csv
import csv
for_csv_list = []
with open(f'./text_file.txt', "r") as file:
lines = file.readlines()
for line in lines:
if line != "\n":
for_csv_list.append(line.strip().split())
with open("the_csv_file.csv","w") as convert_to_csv:
writer = csv.writer(convert_to_csv)
writer.writerows(for_csv_list)
Then I tried to open my converted CSV file
f = open("the_csv_file.csv")
csv_f = csv.reader(f)
for row in csv_f:
print("this is row = ",row)
f.close()
The code returned
this is row = ['Friday,09071235462,08:42:48']
this is row = []
this is row = ['Princely,08123456,08:46:45']
this is row = []
Please how will I remove the empty list since my expected result should be:
this is row = ['Friday,09071235462,08:42:48']
this is row = ['Princely,08123456,08:46:45']

Related

.csv to .arff function on Python

I'm trying to do a convertion function from csv to arff, right now I have this:
def csv2arff(csv_path, arff_path=None):
with open(csv_path, 'r') as fr:
attributes = []
if arff_path is None:
arff_path = csv_path[:-4] + '_prueba.arff' # *.arff -> *.csv
write_sw = False
with open(arff_path, 'w') as fw:
fw.write('#relation base_datos_modelo_3_limpia \n')
firstline = fr.readlines()[0].rstrip()
fw.write(firstline)
and that gives me:
#relation base_datos_modelo_3_limpia
DVJ_Valgus_KneeMedialDisplacement_D_discr,BMI,AgeGroup,ROM-PADF-KE_D,DVJ_Valgus_FPPA_D_discr,TrainFrequency,DVJ_Valgus_FPPA_ND_discr,Asym_SLCMJLanding-pVGRF(10percent)_discr,Asym-ROM-PHIR(≥8)_discr,Asym_TJ_Valgus_FPPA(10percent)_discr,TJ_Valgus_FPPA_ND_discr,Asym-ROM-PHF-KE(≥8)_discr,TJ_Valgus_FPPA_D_discr,Asym_SLCMJ-Height(10percent)_discr,Asym_YBTpl(10percent)_discr,Position,Asym-ROM-PADF-KE(≥8º)_discr,DVJ_Valgus_KneeMedialDisplacement_ND_discr,DVJ_Valgus_Knee-to-ankle-ratio_discr,Asym-ROM-PKF(≥8)_discr,Asym-ROM-PHABD(≥8)_discr,Asym-ROM-PHF-KF(≥8)_discr,Asym-ROM-PHER(≥8)_discr,AsymYBTanterior10percentdiscr,Asym-ROM-PHABD-HF(≥8)_discr,Asym-ROM-PHE(≥8)_discr,Asym(>4cm)-DVJ_Valgus_Knee;edialDisplacement_discr,Asym_SLCMJTakeOff-pVGRF(10percent)_discr,Asym-ROM-PHADD(≥8)_discr,Asym-YBTcomposite(10percent)_discr,Asym_SingleHop(10percent)_discr,Asym_YBTpm(10percent)_discr,Asym_DVJ_Valgus_FPPA(10percent)_discr,Asym_SLCMJ-pLFT(10percent)_discr,DominantLeg,Asym-ROM-PADF-KF(≥8)_discr,ROM-PHER_ND,CPRDmentalskills,POMStension,STAI-R,ROM-PHER_D,ROM-PHIR_D,ROM-PADF-KF_ND,ROM-PADF-KF_D,Age_at_PHV,ROM-PHIR_ND,CPRDtcohesion,Eperience,ROM-PHABD-HF_D,MaturityOffset,Weight,ROM-PHADD_ND,Height,ROM-PHADD_D,Age,POMSdepressio,ROM-PADF-KE_ND,POMSanger,YBTanterior_Dnorm,YBTanterior_NDnorm,POMSvigour,Soft-Tissue_injury_≥4days
So i want to put "#attribute" before each attribute and change the "," to "\n". But don't know how to do it, I tried to make a function to change the "," but didn't work, any idea?
Thank you guys.
Try the liac-arff library.
Here is an example for converting the UCI iris dataset from ARFF to CSV and then back to ARFF:
import csv
import arff
# arff -> csv
content = arff.load(open('./iris.arff', 'r'))
with open('./out.csv', 'w') as fp:
writer = csv.writer(fp)
header = []
for n, t in content['attributes']:
header.append(n)
writer.writerow(header)
writer.writerows(content['data'])
# csv -> arff
with open('./out.csv', 'r') as fp:
reader = csv.reader(fp)
header = None
data = []
for row in reader:
if header is None:
header = row
else:
data.append(row)
content = {}
content['relation'] = "from my csv file"
content['attributes'] = []
for n in header:
if n == "class":
content['attributes'].append((n, ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']))
else:
content['attributes'].append((n, 'NUMERIC'))
content['data'] = data
with open('./out.arff', 'w') as fp:
arff.dump(content, fp)
NB: For the last stage, we need to specify the nominal class values, which you could determine by scanning the data.

How do I remove first column in csv file?

I have a CSV file where the first row in the first column is blank with some numbers in the second and third row. This whole column is useless and I need to remove it so I can convert the data into a JSON file. I just need to know how to remove the first column of data so I can parse it. Any help is greatly appreciated!
My script is as follows
#!/usr/bin/python3
import pandas as pd
import csv, json
xls = pd.ExcelFile(r'C:\Users\Andy-\Desktop\Lab2Data.xlsx')
df = xls.parse(sheetname="Sheet1", index_col=None, na_values=['NA'])
df.to_csv('file.csv')
file = open('file.csv', 'r')
lines = file.readlines()
file.close()
data = {}
with open('file.csv') as csvFile:
csvReader = csv.DictReader(csvFile)
for rows in csvReader:
id = rows['Id']
data[id] = rows
with open('Lab2.json', 'w') as jsonFile:
jsonFile.write(json.dumps(data, indent=4))
I don't know much about json files but this will remove the first column from your csv file.
with open ('new_file.csv', 'w') as out_file :
with open ('file.csv') as in_file :
for line in in_file :
test_string = line.strip ('\n').split (',')
out_file.write (','.join (test_string [1:]) + '\n')

Getting an IndexError: list index out of range in line no. = 19?

import csv
with open('C:/Users/dkarar/Desktop/Mapping project/RC_Mapping.csv', 'r') as file1:
with open('C:/Users/dkarar/Desktop/Mapping project/Thinclient_mapping.csv', 'r') as file2:
with open('C:/Users/dkarar/Desktop/Mapping project/output.csv', 'w') as outfile:
writer = csv.writer(outfile)
reader1 = csv.reader(file1)
reader2 = csv.reader(file2)
for row in reader1:
if not row:
continue
for other_row in reader2:
if not other_row:
continue
# if we found a match, let's write it to the csv file with the id appended
if row[1].lower() == other_row[1].lower():
new_row = other_row
new_row.append(row[0])
writer.writerow(new_row)
continue
# reset file pointer to beginning of file
file2.seek(0)
You seem to be getting at least one row where there is a single element. That's why when accessing row[1] you get an IndexError, there's only one element in the list row.

How to read a column and write it as a row in python?

I am trying to read a csv and then transpose one column into a row.
I tried following a tutorial for reading a csv and then one for writing but the data doesnt stay saved to the list when I try to write the row.
import csv
f = open('bond-dist-rep.csv')
csv_f = csv.reader(f)
bondlength = []
with open("bond-dist-rep.csv") as f:
for row in csv_f:
bondlength.append(row[1])
print (bondlength)
print (len(bondlength))
with open('joined.csv', 'w', newline='') as csvfile:
csv_a = csv.writer (csvfile, delimiter=',',quotechar='"',
quoting=csv.QUOTE_ALL)
csv_a.writerow(['bondlength'])
with open('joined.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=',')
for row in readCSV:
print(row)
print(row[0])
f.close()
The matter is that you only read the first value of each line and write only a string in the new file.
In order to transpose the read lines, you can use the zip function.
I also delete the first open function which is useless because of the good use of with for opening the file.
Here the final code:
import csv
bondlength = []
with open("bond-dist-rep.csv") as csv_f:
read_csv = csv.reader(csv_f)
for row in read_csv:
bondlength.append(row)
# delete the header if you have one
bondlength.pop(0)
with open('joined.csv', 'w') as csvfile:
csv_a = csv.writer (csvfile, delimiter=',')
for transpose_row in zip(*bondlength):
csv_a.writerow(transpose_row)

Using Python to delete rows in a csv file that contain certain chars

I have a csv file that I'm trying to clean up. I am trying to look at the first column and delete any rows that have anything other than chars for that row in the first column (I'm working on cleaning up rows where the first column has a ^ or . for now). It seems all my attempts either do nothing or nuke the whole csv file.
Interestingly enough, I have code that can identify the problem rows and it seems to work fine
def FindProblemRows():
with open('Data.csv') as csvDataFile:
ProblemRows = []
csvReader = csv.reader(csvDataFile)
data = [row for row in csv.reader(csvDataFile)]
length = len(data)
for i in range (0,length):
if data[i][0].find('^')!=-1 or data[i][0].find('.')!=-1:
ProblemRows.append(i)
return (ProblemRows)
Below I have my latest three failed attempts. Where am I going wrong and what should I change? Which of these comes closest?
'''
def Clean():
with open("Data.csv", "w", newline='') as f:
data = list(csv.reader(f))
writer = csv.writer(f)
Problems = FindProblemRows()
data = list(csv.reader(f))
length = len(data)
for row in data:
for i in Problems:
for j in range (0, length):
if row[j] == i:
writer.writerow(row)
Problems.remove(i)
def Clean():
Problems = FindProblemRows()
with open('Data.csv') as csvDataFile:
csvReader = csv.reader(csvDataFile)
data = [row for row in csv.reader(csvDataFile)]
length = len(data)
width = len(data[0])
with open("Data.csv","r") as csvFile:
csvReader = csv.reader( csvFile )
with open("CleansedData.csv","w") as csvResult:
csvWrite = csv.writer( csvResult )
for i in Problems:
for j in range (0, length):
if data[j] == i:
del data[j]
for j in range (0, length):
csvWrite.writerow(data[j])
'''
def Clean():
with open("Data.csv", 'r') as infile , open("CleansedData.csv", 'w') as outfile:
data = [row for row in infile]
for row in infile:
for column in row:
if "^" not in data[row][0]:
if "." not in data[row][0]:
outfile.write(data[row])
Update
Now I have:
def Clean():
df = pd.read_csv('Data.csv')
df = df['^' not in df.Symbol]
df = df['.' not in df.Symbol]
but I get KeyError: True
Shouldn't that work?
You should check whether the column Symbol contains any of the characters of interest. Method contains takes a regular expression:
bad_rows = df.Symbol.str.contains('[.^]')
df_clean = df[~bad_rows]

Resources