.csv to .arff function on Python - python-3.x

I'm trying to do a convertion function from csv to arff, right now I have this:
def csv2arff(csv_path, arff_path=None):
with open(csv_path, 'r') as fr:
attributes = []
if arff_path is None:
arff_path = csv_path[:-4] + '_prueba.arff' # *.arff -> *.csv
write_sw = False
with open(arff_path, 'w') as fw:
fw.write('#relation base_datos_modelo_3_limpia \n')
firstline = fr.readlines()[0].rstrip()
fw.write(firstline)
and that gives me:
#relation base_datos_modelo_3_limpia
DVJ_Valgus_KneeMedialDisplacement_D_discr,BMI,AgeGroup,ROM-PADF-KE_D,DVJ_Valgus_FPPA_D_discr,TrainFrequency,DVJ_Valgus_FPPA_ND_discr,Asym_SLCMJLanding-pVGRF(10percent)_discr,Asym-ROM-PHIR(≥8)_discr,Asym_TJ_Valgus_FPPA(10percent)_discr,TJ_Valgus_FPPA_ND_discr,Asym-ROM-PHF-KE(≥8)_discr,TJ_Valgus_FPPA_D_discr,Asym_SLCMJ-Height(10percent)_discr,Asym_YBTpl(10percent)_discr,Position,Asym-ROM-PADF-KE(≥8º)_discr,DVJ_Valgus_KneeMedialDisplacement_ND_discr,DVJ_Valgus_Knee-to-ankle-ratio_discr,Asym-ROM-PKF(≥8)_discr,Asym-ROM-PHABD(≥8)_discr,Asym-ROM-PHF-KF(≥8)_discr,Asym-ROM-PHER(≥8)_discr,AsymYBTanterior10percentdiscr,Asym-ROM-PHABD-HF(≥8)_discr,Asym-ROM-PHE(≥8)_discr,Asym(>4cm)-DVJ_Valgus_Knee;edialDisplacement_discr,Asym_SLCMJTakeOff-pVGRF(10percent)_discr,Asym-ROM-PHADD(≥8)_discr,Asym-YBTcomposite(10percent)_discr,Asym_SingleHop(10percent)_discr,Asym_YBTpm(10percent)_discr,Asym_DVJ_Valgus_FPPA(10percent)_discr,Asym_SLCMJ-pLFT(10percent)_discr,DominantLeg,Asym-ROM-PADF-KF(≥8)_discr,ROM-PHER_ND,CPRDmentalskills,POMStension,STAI-R,ROM-PHER_D,ROM-PHIR_D,ROM-PADF-KF_ND,ROM-PADF-KF_D,Age_at_PHV,ROM-PHIR_ND,CPRDtcohesion,Eperience,ROM-PHABD-HF_D,MaturityOffset,Weight,ROM-PHADD_ND,Height,ROM-PHADD_D,Age,POMSdepressio,ROM-PADF-KE_ND,POMSanger,YBTanterior_Dnorm,YBTanterior_NDnorm,POMSvigour,Soft-Tissue_injury_≥4days
So i want to put "#attribute" before each attribute and change the "," to "\n". But don't know how to do it, I tried to make a function to change the "," but didn't work, any idea?
Thank you guys.

Try the liac-arff library.
Here is an example for converting the UCI iris dataset from ARFF to CSV and then back to ARFF:
import csv
import arff
# arff -> csv
content = arff.load(open('./iris.arff', 'r'))
with open('./out.csv', 'w') as fp:
writer = csv.writer(fp)
header = []
for n, t in content['attributes']:
header.append(n)
writer.writerow(header)
writer.writerows(content['data'])
# csv -> arff
with open('./out.csv', 'r') as fp:
reader = csv.reader(fp)
header = None
data = []
for row in reader:
if header is None:
header = row
else:
data.append(row)
content = {}
content['relation'] = "from my csv file"
content['attributes'] = []
for n in header:
if n == "class":
content['attributes'].append((n, ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']))
else:
content['attributes'].append((n, 'NUMERIC'))
content['data'] = data
with open('./out.arff', 'w') as fp:
arff.dump(content, fp)
NB: For the last stage, we need to specify the nominal class values, which you could determine by scanning the data.

Related

Method reads properly but the written text file only has 1 line. Is \n not working?

The goal is to extract specific data from a text file under a folder
then write that data into another file under different folder
The extraction part works, save to variables and can even print them
The rises when you try to write them to a file
The file is empty
Need to write in this format
{self.title};;;{self.author};;;{self.release_date};;;
{self.last_update_date};;;{self.language};;;{self.producer};;;{self.book_path}
# This class includes all the operations related to a book
class Operation:
"""
Need to include these class variables
book_title_list (List of all books titles such as “[title1, title2, title3, …]”)
book_info_dict = “{title1: obj1, title2:obj2, title3:obj3…….}”)
"""
book_folder_path = './data/books_data/'
book_info_path = './data/result_data/books.txt'
def extract_book_info(self):
directory_files = os.listdir(self.book_folder_path) # Stores the .txt files under books_data folder
try:
for i in directory_files:
with open(f'{self.book_folder_path}/{i}', 'r', encoding='utf8') as f:
f_contents = f.readlines()
f_line_free = list(map(lambda x: x.strip(), f_contents))
f_lists = f_line_free[10:22] # Slicing only the required elements of the list
"""
Extracting only the necessary part and storing them
under proper variables
"""
title = f_lists[0]
author = f_lists[2]
release_date = f_lists[4]
last_update_date = f_lists[5]
language = f_lists[7]
producer = f_lists[11]
"""
Extracting the desired values
"""
title_data = title[7:]
author_data = author[8:]
release_date_data = release_date[14:]
last_update_date_data = last_update_date[24:-1]
language_data = language[10:]
producer_data = producer[13:]
print(title_data)
with open(self.book_info_path, 'w', encoding="utf8") as wf:
wf.write(f'{title_data};;;{author_data};;;{release_date_data};;;'
f'{last_update_date_data};;;{language_data};;;{producer_data};;;{self.book_info_path}\n')
return True
except FileNotFoundError:
return False
except Exception:
return False

Printing data from the csv file

I wanna ask how do I print the number of dentists respective in 2012 from the CSV file.
import csv
csv_file = open('project.csv' , 'r')
data_file = csv.reader(csv_file)
def opt_a():
next(data_file)
for row in data_file:
i = row[1:]
print(i)
opt_a()
This what I have tried but I will only be able to print out in rows.
Sample of data :
year,Private Dental Specialists,Private General Dental Practitioners,Public Dental Specialists,Public General Dental Practitioners
2008,116,864,47,268
2009,180,863,74,246
2010,185,874,87,267
2011,199,961,77,241
2012,203,1012,86,271
2013,219,1192,88,308
2014,216,1219,96,348
2015,215,1326,102,347
2016,219,1425,106,380
2017,232,1516,112,365
2018,189,1579,113,412
2019,237,1644,130,379
In your for loop, simply check if the first value of your row is 2012 :
import csv
csv_file = open('project.csv' , 'r')
data_file = csv.reader(csv_file)
def opt_a():
next(data_file)
for row in data_file:
if row[0] == "2012":
i = row[1:]
print(i)
opt_a()

Creating a csv file from python readlines()

I want to create a CSV file from a text file
text_file.txt
Friday,09071235462,08:42:48
Princely,08123456,08:46:45
My code to convert the file
#Convert to csv
import csv
for_csv_list = []
with open(f'./text_file.txt', "r") as file:
lines = file.readlines()
for line in lines:
if line != "\n":
for_csv_list.append(line.strip().split())
with open("the_csv_file.csv","w") as convert_to_csv:
writer = csv.writer(convert_to_csv)
writer.writerows(for_csv_list)
Then I tried to open my converted CSV file
f = open("the_csv_file.csv")
csv_f = csv.reader(f)
for row in csv_f:
print("this is row = ",row)
f.close()
The code returned
this is row = ['Friday,09071235462,08:42:48']
this is row = []
this is row = ['Princely,08123456,08:46:45']
this is row = []
Please how will I remove the empty list since my expected result should be:
this is row = ['Friday,09071235462,08:42:48']
this is row = ['Princely,08123456,08:46:45']

How to new line or separators in csv?

Not to over write csv, I tried 'a' but I get no separators.
Tried to replaced 'w' with 'a', but get no separator just mismash
import csv
with open('csv/1.n04', 'r') as csvFile:
reader = csv.reader(csvFile)
s = "".join(row[0] for row in reader)
a = s.count('0')
b = s.count('1')
csvFile.close()
import csv
f = open('csv/sum1.n04', 'w')
if b>=1:
f.write(str((a+b)/b))
else:
f.write(str(a))
f.close
import os
os.remove("csv/1.n04")
print ('Klar')
No error messages, it just over write csv file

Formatting a Python generated CSV

I'm making a web scraper in python.
I'd like to remove the blank rows from the generated csv and would like to add a header saying "Car make", "Car Model", "Price". and would also like to remove the [] from all the names in the generated csv
imports go here...
source = requests.get(' website link goes here...').text
soup = bs(source, 'html.parser')
csv_file = open('pyScraper_1.3_Export', 'w')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['brand_Names', 'Prices'])
csv_file.close()
#gives us the make and model of all cars
Names = []
Prices_Cars = []
for var1 in soup.find_all('h3', class_ = 'brandModelTitle'):
car_Names = var1.text # var1.span.text
test_Split = car_Names.split("\n")
full_Names = test_Split[1:3]
#make = test_Split[1:2]
#model = test_Split[2:3]
Names.append(full_Names)
#prices
for Prices in soup.find_all('span', class_ = 'f20 bold fieldPrice'):
Prices = Prices.span.text
Prices = re.sub("^\s+|\s+$", "",Prices, flags=re.UNICODE) # removing whitespace before the prices
Prices_Cars.append(Prices)
csv_file = open('pyScraper_1.3_Export.csv', 'a')
csv_writer = csv.writer(csv_file)
i = 0
while i < len(Prices_Cars):
csv_writer.writerow([Names[i], Prices_Cars[i]])
i = i + 1
csv_file.close()
here is the screenshot of the generated csv
![][1]
[1]: https://i.stack.imgur.com/m7Xw1.jpg
To remove additional newlines:
csv_file = open('pyScraper_1.3_Export.csv', 'a', newline='')
("If csvfile is a file object, it should be opened with newline=''.", https://docs.python.org/3/library/csv.html#csv.writer)
To add headers:
you are actually adding headers, but to file named pyScraper_1.3_Export (note no .csv extension), this may be a mistype. Just change the code at about line 6 to
csv_file = open('pyScraper_1.3_Export.csv', 'w', newline='')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(["Car make", "Car Model", "Price"])
csv_file.close()
As for removing nested list, unpack Names[i] with * operator:
csv_writer.writerow([*Names[i], Prices_Cars[i]])

Resources