pandas DataFrame.to_excel keeping excel file format - excel

there is an option in pandas to keep the format of the file, when I use df.to_excel to save the data on the file?
The only workaround that i found is:
from openpyxl import load_workbook
import pandas as pd
# df_data is a pd.DataFrame
wb = load_workbook(fout_file)
sheet = wb.active
for r, row in enumerate(dataframe_to_rows(df_data, index=False, header=False), 2):
for c in range(0, df_columns):
sheet.cell(row=r, column=c + 1).value = row[c]
wb.save(fout_file)
There a better way where i don't must copy cell by cell?
Thanks
stefano G.
#DSteman thanks for the idea, I jus tryed to use StyleForm as you advised me.
def main ():
...
...
...
# df_new_data = pd.DataFrame(columns=self.df_values.columns)
df_new_data = StyleFrame.read_excel(self.template_fout, read_style=True)
...
...
...
cr_dfnewdata = 0
for j, row_data in data.iterrows():
original_row = row_data.copy(deep=True)
# df_new_data = df_new_data.append(original_row)
cr_dfnewdata += 1
df_new_data[cr_dfnewdata] = original_row
...
...
...
compensa_row = row_data.copy(deep=True)
compensa_row[self.importo_col] = importo * -1
# compensa_row[self.qta_col] = qta * -1
compensa_row[self.cod_ribal_col] = f"{cod_ribal}-{j}"
# df_new_data = df_new_data.append(compensa_row)
cr_dfnewdata += 1
df_new_data[cr_dfnewdata] = compensa_row
...
...
...
def save_working_data(self, cod_ribalt: str, df_data):
fout_working_name = f"{self.working_dir}/working_{cod_ribalt}.xlsx"
df_data.to_excel(fout_working_name).save()
BUT i got this error:
export_df.index = [row_index.value for row_index in export_df.index]
AttributeError: 'int' object has no attribute 'value'

You can do this using df.to_clipboard(index=False)
from win32com.client import Dispatch
import pandas as pd
xlApp = Dispatch("Excel.Application")
xlApp.Visible = 1
xlApp.Workbooks.Open(r'c:\Chadee\test.xlsx')
xlApp.ActiveSheet.Cells(1,1).Select
d = {'col1': [1, 2], 'col2': [3, 4]}
df = pd.DataFrame(data=d)
df.to_clipboard(index=False)
xlApp.ActiveWorkbook.ActiveSheet.PasteSpecial()
Output:
Note that the cell colors are still the same
Hope that helps! :-)

Use the styleframe module to perserve most of the styling in your sheet. If you have a styled sheet with columns ['Entry 1', 'Entry 2'] for example, you can enter values like this:
from styleframe import StyleFrame
sf = StyleFrame.read_excel('test.xlsx', read_style=True)
sf.loc[0,'Entry 1'].value = 'Modified 1'
sf.to_excel('test.xlsx').save()
Make sure that the cell you are trying to fill already has a placeholder value like 0. My script returned errors if it attempted to fill an empty cell.
Check out this thread too: Overwriting excel columns while keeping format using pandas

Related

Is there a method to change the number format of 100 excel files with 20 sheets each from 2 decimal to 6 decimal using Python?

import pandas as pd
import os
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
path=os.getcwd()
files=os.listdir(path)
for i in range(len(files)):
filename = files[i]
filepath=os.path.join(path,filename)
print(filepath)
df=pd.ExcelFile(filepath) # read sheet name
sheet = df.sheet_names
print(sheet)
df=pd.read_excel(filepath, sheet_name=sheet,skiprows = 5, nrows=15, usecols = 'E:L')
print(df)
when I click on the number it show 6 digits in the header but I want to change it into the excel also
import openpyxl
wb = openpyxl.load_workbook('ds.xlsx')
ws = wb.active
for row in ws.iter_rows():
for cell in row:
# only relevant column and without header
if cell.column_letter == 'A' and cell.row > 1: # put the cell number from where to where you want to check
ws[cell.coordinate].number_format = '0.0000000' # it will change the number format
wb.save('ds2.xlsx')

Python deleting row which not contains specific value?

If the ninth column doesn't contains the name of city ISTANBUL, then I need to be able to delete
the entire row. i wrote the code it works but it doesn't delete row.
from openpyxl import Workbook, load_workbook
from openpyxl.utils import get_column_letter
wb = load_workbook('deneme.xlsx')
ws = wb.active
for row in range(1, 3313):
for col in [9]:
char = get_column_letter(col)
if ws[char + str(row)].value is not 'İSTANBUL':
ws.delete_rows(row)
wb.save('deneme.xlsx')
Here it is solution i just found.
import pandas as pd
df = pd.read_excel('staji.xlsx') #here you can put your file path
filt = (df['ŞEHİR'] == 'İSTANBUL') # put your condition ŞEHİR=column name.
df[filt].to_excel('den1.xlsx')

Can we copy one column from excel and convert it to a list in Python?

I use
df = pd.read_clipboard()
list_name = df['column_name'].to_list()
but this is a bit long method for me. I want to copy a column and convert in python and then apply some function so that the copied text is converted to a list.
this will read a excel column as list
import xlrd
book = xlrd.open_workbook('Myfile.xlsx') #path to your file
sheet = book.sheet_by_name("Sheet1") #Sheet name
def Readlist(Element, Column):
for _ in range(1,sheet.nrows):
Element.append(str(sheet.row_values(_)[Column]))
pass
column1 = [] # List name
Readlist(column1, 1) # Column Number is 1 here
pirnt(column1)
Read a specified column as list use Readlist function, intialize [] variable before using that.
Using Pandas:
import pandas as pd
df = pd.read_excel("path.xlsx", index_col=None, na_values=['NA'], usecols = "A")
mylist = list(df[0])
print(mylist)

Python for the Comparison of excel column elements and print the matched elements in separate column

I have developed the following code and fetched the matched output using a for loop.I need to print these output elements in separate column using python.
excel file name - Sample_data.xlsx
first column - WBS_CODE
second column - PROJECT_CODE
first column and second column are matched and then printed in separate column (column F) using python code. Please find my below code,
import pandas as pd
A = pd.read_excel("D:\python_work\Sample_data.xlsx", sheet_name = '31Sep')
code = A['WBS_CODE'].tolist()
B = pd.read_excel("D:\python_work\Sample_data.xlsx", sheet_name = '4Dec')
code1 = B['PROJECT_CODE'].tolist()
for x in code1:
if x in code:
print(x)
else:
print("NA")
output:
NA
NA
NA
APP-ACI-PJ-APAC-EMEA-ENG
NA
NA
I have found a way to export the output and print them in a separate column in excel sheet. Below is the solution,
import pandas as pd
from openpyxl import load_workbook
# Reading the Excel file columns
A = pd.read_excel("D:\python_work\Sample_data.xlsx", sheet_name='4Dec')
code = A['PROJECT_CODE'].tolist()
B = pd.read_excel("D:\python_work\Sample_data.xlsx", sheet_name='31Sep')
code1 = B['WBS_CODE'].tolist()
# Comparison of columns
class test:
def loop(self):
result = []
for x in code1:
if x in code:
result.append(x)
else:
y = "NA"
result.append(y)
print(result)
# Printing data into Excel
try:
book = load_workbook('D:\python_work\Aew1.xlsx')
writer = pd.ExcelWriter('D:\python_work\Aew1.xlsx', engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets) # loading all the worksheets in opened Excel
df = pd.DataFrame.from_dict({'Column1': result})
df.to_excel(writer, sheet_name='Sheet1', startcol=19)
writer.save()
except FileNotFoundError:
print("File Not found: 'Check the Name or Existence of file specified/'")
except PermissionError:
print("File Opened/No-Access: Check whether you have access to file or file is opened")
test().loop()
steps that solved:
1. Appended the for loop output to a list
2. used openpyxl library to print the output to a column in excel worksheet.
Thanks guyz for help and support. Have a good day

Why is plot returning "ValueError: could not convert string to float:" when a dataframe column of floats is being passed to the plot function?

I am trying to plot a dataframe I have created from an excel spreadsheet using either matplotlib or matplotlib and pandas ie. df.plot. However, python keeps returning a cannot convert string to float error. This is confusing since when I print the column of the dataframe it appears to be all float values.
I've tried printing the values of the dataframe column and using the pandas.plot syntax. I've also tried saving the column to a new variable.
import pandas as pd
from matplotlib import pyplot as plt
import glob
import openpyxl
import math
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.styles import Border, Side, Alignment
import seaborn as sns
import itertools
directory = 'E:\some directory'
#QA_directory = directory + '**/*COPY.xlsx'
wb = openpyxl.load_workbook(directory + '\\Calcs\\' + "excel file.xlsx", data_only = 'True')
plt.figure(figsize=(16,9))
axes = plt.axes()
plt.title('Drag Amplification', fontsize = 16)
plt.xlabel('Time (s)', fontsize = 14)
plt.ylabel('Cf', fontsize = 14)
d = pd.DataFrame()
n=[]
for sheets in wb.sheetnames:
if '2_1' in sheets and '2%' not in sheets and '44%' not in sheets:
name = sheets[:8]
print(name)
ws = wb[sheets]
data = ws.values
cols = next(data)[1:]
data = list(data)
idx = [r[0] for r in data]
data = (itertools.islice(r, 1, None) for r in data)
df = pd.DataFrame(data, index=idx, columns=cols)
df = df.dropna()
#x = df['x/l']
#y = df.Cf
print(df.columns)
print(df.Cf.values)
x=df['x/l'].values
plt.plot(x, df.Cf.values)
"""x = [wb[sheets].cell(row=row,column=1).value for row in range(1,2000) if wb[sheets].cell(row=row,column=1).value]
print(x)
Cf = [wb[sheets].cell(row=row,column=6).value for row in range(1,2000) if wb[sheets].cell(row=row,column=1).value]
d[name+ 'x'] = pd.DataFrame(x)
d[name + '_Cf'] = pd.Series(Cf, index=d.index)
print(name)"""
print(df)
plt.show()
I'm expecting a plot of line graphs with the values of x/l on the x access and Cf on the 'y' with a line for each of the relevant sheets in the workbook. Any insights as to why i am getting this error would be appreciated!

Resources