when i try to save my table (QTableWidget) as a csv file everything fits in the first cell from the excel file, how can i seperate each cell? this is my saving function:
def save_text(self, table):
path = QFileDialog.getSaveFileName(self, 'Save CSV', os.getenv('HOME'), 'CSV(*.csv)')
if path[0] != '':
with open(path[0], 'w') as csv_file:
writer = csv.writer(csv_file, dialect='excel')
for row in range(table.rowCount()):
row_data = []
for column in range(table.columnCount()):
item = table.item(row, column)
if item is not None:
row_data.append(item.text())
else:
row_data.append('')
writer.writerow(row_data)
Try replacing:
writer = csv.writer (csv_file, dialect = 'excel')
on
writer = csv.writer (csv_file, dialect = 'excel', delimiter = ';')
Related
I'm trying to do a convertion function from csv to arff, right now I have this:
def csv2arff(csv_path, arff_path=None):
with open(csv_path, 'r') as fr:
attributes = []
if arff_path is None:
arff_path = csv_path[:-4] + '_prueba.arff' # *.arff -> *.csv
write_sw = False
with open(arff_path, 'w') as fw:
fw.write('#relation base_datos_modelo_3_limpia \n')
firstline = fr.readlines()[0].rstrip()
fw.write(firstline)
and that gives me:
#relation base_datos_modelo_3_limpia
DVJ_Valgus_KneeMedialDisplacement_D_discr,BMI,AgeGroup,ROM-PADF-KE_D,DVJ_Valgus_FPPA_D_discr,TrainFrequency,DVJ_Valgus_FPPA_ND_discr,Asym_SLCMJLanding-pVGRF(10percent)_discr,Asym-ROM-PHIR(≥8)_discr,Asym_TJ_Valgus_FPPA(10percent)_discr,TJ_Valgus_FPPA_ND_discr,Asym-ROM-PHF-KE(≥8)_discr,TJ_Valgus_FPPA_D_discr,Asym_SLCMJ-Height(10percent)_discr,Asym_YBTpl(10percent)_discr,Position,Asym-ROM-PADF-KE(≥8º)_discr,DVJ_Valgus_KneeMedialDisplacement_ND_discr,DVJ_Valgus_Knee-to-ankle-ratio_discr,Asym-ROM-PKF(≥8)_discr,Asym-ROM-PHABD(≥8)_discr,Asym-ROM-PHF-KF(≥8)_discr,Asym-ROM-PHER(≥8)_discr,AsymYBTanterior10percentdiscr,Asym-ROM-PHABD-HF(≥8)_discr,Asym-ROM-PHE(≥8)_discr,Asym(>4cm)-DVJ_Valgus_Knee;edialDisplacement_discr,Asym_SLCMJTakeOff-pVGRF(10percent)_discr,Asym-ROM-PHADD(≥8)_discr,Asym-YBTcomposite(10percent)_discr,Asym_SingleHop(10percent)_discr,Asym_YBTpm(10percent)_discr,Asym_DVJ_Valgus_FPPA(10percent)_discr,Asym_SLCMJ-pLFT(10percent)_discr,DominantLeg,Asym-ROM-PADF-KF(≥8)_discr,ROM-PHER_ND,CPRDmentalskills,POMStension,STAI-R,ROM-PHER_D,ROM-PHIR_D,ROM-PADF-KF_ND,ROM-PADF-KF_D,Age_at_PHV,ROM-PHIR_ND,CPRDtcohesion,Eperience,ROM-PHABD-HF_D,MaturityOffset,Weight,ROM-PHADD_ND,Height,ROM-PHADD_D,Age,POMSdepressio,ROM-PADF-KE_ND,POMSanger,YBTanterior_Dnorm,YBTanterior_NDnorm,POMSvigour,Soft-Tissue_injury_≥4days
So i want to put "#attribute" before each attribute and change the "," to "\n". But don't know how to do it, I tried to make a function to change the "," but didn't work, any idea?
Thank you guys.
Try the liac-arff library.
Here is an example for converting the UCI iris dataset from ARFF to CSV and then back to ARFF:
import csv
import arff
# arff -> csv
content = arff.load(open('./iris.arff', 'r'))
with open('./out.csv', 'w') as fp:
writer = csv.writer(fp)
header = []
for n, t in content['attributes']:
header.append(n)
writer.writerow(header)
writer.writerows(content['data'])
# csv -> arff
with open('./out.csv', 'r') as fp:
reader = csv.reader(fp)
header = None
data = []
for row in reader:
if header is None:
header = row
else:
data.append(row)
content = {}
content['relation'] = "from my csv file"
content['attributes'] = []
for n in header:
if n == "class":
content['attributes'].append((n, ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']))
else:
content['attributes'].append((n, 'NUMERIC'))
content['data'] = data
with open('./out.arff', 'w') as fp:
arff.dump(content, fp)
NB: For the last stage, we need to specify the nominal class values, which you could determine by scanning the data.
I want to create a CSV file from a text file
text_file.txt
Friday,09071235462,08:42:48
Princely,08123456,08:46:45
My code to convert the file
#Convert to csv
import csv
for_csv_list = []
with open(f'./text_file.txt', "r") as file:
lines = file.readlines()
for line in lines:
if line != "\n":
for_csv_list.append(line.strip().split())
with open("the_csv_file.csv","w") as convert_to_csv:
writer = csv.writer(convert_to_csv)
writer.writerows(for_csv_list)
Then I tried to open my converted CSV file
f = open("the_csv_file.csv")
csv_f = csv.reader(f)
for row in csv_f:
print("this is row = ",row)
f.close()
The code returned
this is row = ['Friday,09071235462,08:42:48']
this is row = []
this is row = ['Princely,08123456,08:46:45']
this is row = []
Please how will I remove the empty list since my expected result should be:
this is row = ['Friday,09071235462,08:42:48']
this is row = ['Princely,08123456,08:46:45']
I am getting "raw" data from a csv file, and putting only what I need for a new csv file that will be used to auto add users to a different system...
I am unsure how to add the correct headers needed for the file.
I've tried looking at other examples of adding headers but have not figured this out yet...
The headers I need to add are as follows:
"ID Card Number","Name","E-Mail","User Level","Position","Status","Covered Under Insurance","Paid Insurance"
(and in that order)
import csv
def studentscsv():
with open('..\StudentEmails_and_StudentNumbers.csv') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
with open('mydirectory\student_users.csv', mode='w', newline='') as output_file:
write = csv.writer(output_file, delimiter=',', quoting=csv.QUOTE_MINIMAL)
for row in csv_reader:
a = row[0]
studentnumber = row[1]
firstname = row[2]
lastname = row[3]
grade = row[4]
studentname = firstname + " " + lastname
studentemail = firstname + "." + lastname + "#mydomain.org"
status = "Active"
position = "Student"
covered = "Yes"
paid = "Yes"
write.writerow([studentnumber, studentname, studentemail, grade, position, status, covered, paid])
def main():
"""
Controls the program execution
:param in_file: the name of the input file.
:return: None
"""
if __name__ == '__main__':
main()
The file generates fine with the way the code is written. I am just unsure what I need to change to add the headers.
Using the csv module, as you are, it's pretty straight forward. Define your headers in an array and then create a DictWriter with the fieldnames set to your array. Reference the following code and documentation:
import csv
with open('names.csv', 'w') as csvfile:
fieldnames = ['first_name', 'last_name']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerow({'first_name': 'Baked', 'last_name': 'Beans'})
writer.writerow({'first_name': 'Lovely', 'last_name': 'Spam'})
writer.writerow({'first_name': 'Wonderful', 'last_name': 'Spam'})
Here's the documentation:
https://docs.python.org/2/library/csv.html#csv.DictWriter
I have a csv file that I'm trying to clean up. I am trying to look at the first column and delete any rows that have anything other than chars for that row in the first column (I'm working on cleaning up rows where the first column has a ^ or . for now). It seems all my attempts either do nothing or nuke the whole csv file.
Interestingly enough, I have code that can identify the problem rows and it seems to work fine
def FindProblemRows():
with open('Data.csv') as csvDataFile:
ProblemRows = []
csvReader = csv.reader(csvDataFile)
data = [row for row in csv.reader(csvDataFile)]
length = len(data)
for i in range (0,length):
if data[i][0].find('^')!=-1 or data[i][0].find('.')!=-1:
ProblemRows.append(i)
return (ProblemRows)
Below I have my latest three failed attempts. Where am I going wrong and what should I change? Which of these comes closest?
'''
def Clean():
with open("Data.csv", "w", newline='') as f:
data = list(csv.reader(f))
writer = csv.writer(f)
Problems = FindProblemRows()
data = list(csv.reader(f))
length = len(data)
for row in data:
for i in Problems:
for j in range (0, length):
if row[j] == i:
writer.writerow(row)
Problems.remove(i)
def Clean():
Problems = FindProblemRows()
with open('Data.csv') as csvDataFile:
csvReader = csv.reader(csvDataFile)
data = [row for row in csv.reader(csvDataFile)]
length = len(data)
width = len(data[0])
with open("Data.csv","r") as csvFile:
csvReader = csv.reader( csvFile )
with open("CleansedData.csv","w") as csvResult:
csvWrite = csv.writer( csvResult )
for i in Problems:
for j in range (0, length):
if data[j] == i:
del data[j]
for j in range (0, length):
csvWrite.writerow(data[j])
'''
def Clean():
with open("Data.csv", 'r') as infile , open("CleansedData.csv", 'w') as outfile:
data = [row for row in infile]
for row in infile:
for column in row:
if "^" not in data[row][0]:
if "." not in data[row][0]:
outfile.write(data[row])
Update
Now I have:
def Clean():
df = pd.read_csv('Data.csv')
df = df['^' not in df.Symbol]
df = df['.' not in df.Symbol]
but I get KeyError: True
Shouldn't that work?
You should check whether the column Symbol contains any of the characters of interest. Method contains takes a regular expression:
bad_rows = df.Symbol.str.contains('[.^]')
df_clean = df[~bad_rows]
I've seen a few answers around to this question but none of them are working.
eg: How to write to an existing excel file without breaking formulas with openpyxl?
Docs give nothing away it seems:
http://openpyxl.readthedocs.io/en/latest/api/openpyxl.reader.excel.html
I tried replacing xls.load_workbook with xls.reader.excel.load_workbook but it doesn't change anything.
My current code overwrites the data in the data sheet, but kills the pivot table functionality in the other sheet (the sheet is still there but only with values). Any idea how to keep the pivot table?
import pandas as pd
import openpyxl as xls
from shutil import copyfile
template_file = 'openpy_test.xlsx'
output_file = 'openpy_output.xlsx'
copyfile(template_file, output_file)
book = xls.load_workbook(output_file,guess_types=False,data_only=False)
writer = pd.ExcelWriter(output_file,engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer,sheet_name='data',index=False,encoding='utf8')
writer.save()
I have also tried book.save('dummycopy.xlsx'), which also saves with a non-funcitoning pivot table. So I am sure the problem is related to the load_workbook function.
Package versions:
openpyxl 2.4.10 py36_0
pandas 0.20.3 py36hce827b7_2
i don't think openpyxl supports excel pivot tables currently. I had to switch to using win32com library.
here is a wrapper module i wrote to do specific stuff with pivot tables; it's basically VBA translated to python (record macros and read the VBA, it'll make sense). hope it helps. it's still a work in progress but should be enough for you to work with.
import os, datetime
import win32com.client as win32
win32c = win32.constants
import sys, datetime
letters = ' ABCDEFGHIJKLMNOPQRSTUVWXYZ' #space to compensate for index. if letter is a if column is 1
def Pull_excel_workbook(path = '', filename = '', visible = False):
'''function to run excel on the given filename'''
if path == '': path = os.getcwd()
if filename == '': raise FileNotFoundError('Please supply a file')
excel = win32.gencache.EnsureDispatch('Excel.Application')
excel.Visible = visible
try: wb = excel.Workbooks.Open(path + filename)
except: print('Try again\n{}'.format(sys.exc_info()))
ws = wb.ActiveSheet
data = list(ws.UsedRange.Value) #2d list of rows and columns
src = '{}!R1C1:R{}C{}'.format(ws.Name, len(data), len(data[0]))
return excel, wb, src
#wb.SaveAs(path + filename)
def Create_pivottable(wb, src, table_name = 'Pivot'):
'''creates Pivot Table object in the wb in a new Pivot worksheet'''
ws = wb.Sheets.Add() #should also change wb.ActiveSheet to the new one.
ws.Name = table_name
tname = ws.Name
starting_point = (4,1) #row, column
pc = wb.PivotCaches().Add(SourceType = win32c.xlDatabase,
SourceData = src)
try:
pt = pc.CreatePivotTable(TableDestination = '{}!R{}C{}'.format(tname, starting_point[0], starting_point[1]),
TableName = table_name,
DefaultVersion = win32c.xlPivotTableVersion10 #15
)
except: #not sure if will work...
print('{}:{}:{}:{}'.format(wb, src, table_name, '{}!R{}C{}'.format(tname, starting_point[0], starting_point[1])))
#tabledestination format of RN Pivot!R4C1 is not correct format, should be 'RN Pivot'!R4C1
pt = pc.CreatePivotTable(TableDestination = '{}!R{}C{}'.format(tname, starting_point[0], starting_point[1]),
TableName = table_name,
DefaultVersion = win32c.xlPivotTableVersion15
)
wb.Sheets(ws.Name).Select()
wb.Sheets(ws.Name).Cells(3,1).Select()
def Add_to_Filter(wb, tname, field_name):
''' '''
field = wb.ActiveSheet.PivotTables(tname).PivotFields(field_name)
field.Orientation = win32c.xlPageField
field.Position = 1
def Add_to_Row(wb, tname, field_name, position = 1):
''' '''
field = wb.ActiveSheet.PivotTables(tname).PivotFields(field_name)
field.Orientation = win32c.xlRowField
field.Position = position
def Add_to_Column(wb, tname, field_name, position = 1):
''' '''
field = wb.ActiveSheet.PivotTables(tname).PivotFields(field_name)
field.Orientation = win32c.xlColumnField
field.Position = position
if position > 1:
text = 'maybe do something here....'
pass
def Add_to_Value(wb, tname, field_name, alias = '', calculation = 'xlSum'):
''' '''
if type(calculation) is str and calculation in win32c.__dict__['__dicts__'][0]:
calculation = win32c.__dict__['__dicts__'][0][calculation]
datafield = wb.ActiveSheet.PivotTables(tname).PivotFields(field_name)
wb.ActiveSheet.PivotTables(tname).AddDataField(datafield, alias, calculation)
def LtoC(letter):
global letters
col = letters.index(letter)
return col
def CtoL(col):
global letters
letter = letters[col]
return letter
def Format_pretty(wb, tname, row_to_colapse):
'''makes it look prettier'''
wb.ActiveSheet.PivotTables(tname).TableStyle2 = 'PivotStyleMedium9'
if type(row_to_colapse) is not str:
for row in row_to_colapse:
wb.ActiveSheet.PivotTables(tname).PivotFields(row).ShowDetail = False #collapses
wb.ActiveSheet.PivotTables(tname).PivotFields(row).RepeatLabels = True #repeats labels
else:
wb.ActiveSheet.PivotTables(tname).PivotFields(row_to_colapse).ShowDetail = False #collapses
wb.ActiveSheet.PivotTables(tname).PivotFields(row_to_colapse).RepeatLabels = True #repeats labels
wb.ActiveSheet.Columns('A:Z').EntireColumn.AutoFit()
wb.ActiveSheet.Range('A1').Select()
def Add_calcd_col(ws, col, row_start, row_end, formula, style = '', col_title = 'default'):
'''col and rows should be int
'''
letter = CtoL(col)
ws.Range('{0}{1}:{0}{2}'.format(letter, row_start, row_end)).Select()
ws.Cells(row_start, col).Value = col_title
for row in range(row_start + 1, row_end + 1):
ws.Cells(row, col).Value = formula.format(row)
ws.Range('{0}{1}:{0}{2}'.format(letter, row_start, row_end)).Style = style
#print("ws.Range('{0}1:{0}200'.format({0})).Style = style".format(letter))
#ws.Range('{0}1:{0}200'.format(letter)).Style = style
def Values_to_columns(wb,tname, position = 2):
''' '''
wb.ActiveSheet.PivotTables(tname).DataPivotField.Orientation = win32c.xlColumnField
wb.ActiveSheet.PivotTables(tname).DataPivotField.Position = position
def WB_save(wb, path, tname, filename):
'''clean save of the new file '''
#Format_pretty(wb, tname, 'Division') #that needs to be fixed....
new_filename = filename[:-5] + '-{}.xlsx'.format(datetime.date.today().strftime('%m.%d.%y'))
wb.SaveAs(path + new_filename)
def Pivot_refresh(path, filename, pivot_sheet_name, pivot_table_name = 'Pivot'):
'''function to refresh the pivot table
tested and functional with recruiting prod report'''
excel, wb, src = Pull_excel_workbook(path = path, filename = filename)
wb.Sheets(pivot_sheet_name).Select()
cell = 'A6' #need a better way for this
excel.Worksheets(pivot_sheet_name).Range(cell).PivotTable.RefreshTable()
#pvt = excel.Worksheets(pivot_sheet_name).Range(cell).PivotTable
#pvt.RefreshTable()
WB_save(wb, path, pivot_table_name, filename)
#pivot refresh
#new = filename[:-5] + '-{}.xlsx'.format(2)
#Pivot_refresh(path = path, filename = new, pivot_sheet_name = 'Pivot')
def Hide_columns(wb, tname, start, end):
'''Hides columns'''
if type(start) is not str: start = CtoL(start)
if type(end) is not str: end = CtoL(end)
wb.ActiveSheet.Columns('{}:{}'.format(start, end)).EntireColumn.Hidden = True