Not able to unlock cell with custom value using pd.xlsxwriter - python-3.x

I have a dataframe as shown in the below code. I just want to lock the header(top row) and let the user change rest of the cells. Based on the code below, it does lock the header and enable me to change the value of all the columns except for the "Date" column. I cannot change the value of date column. It should allow me to change the value of the date column too
import pandas as pd
df = pd.DataFrame({'Data1': [10, 20, 30],
'Data2': [11, 21, 31],
'Date': ["",
"",
pd.to_datetime('today')]})
writer = pd.ExcelWriter('pandas_filter.xlsx', engine='xlsxwriter', )
df.to_excel(writer, sheet_name='Sheet1', index=False)
workbook = writer.book
worksheet = writer.sheets['Sheet1']
unlocked = workbook.add_format({'locked': False})
locked = workbook.add_format({'locked': True})
worksheet.protect()
for row in range(1, 150):
worksheet.set_row(row, None, unlocked)
writer.save()

In XlsxWriter a cell format overrides a row format which overrides a column format.
The reason that the datetime cells aren't unlocked is due to the fact that Pandas applies a cell format to those cells (for the date format) and thus the row format is ignored/overridden.
The only way to avoid this would be to write (or overwrite) the date cell data separately from the other data frame data and apply an unlocked date format. Something like this:
import pandas as pd
df = pd.DataFrame({'Data1': [10, 20, 30],
'Data2': [11, 21, 31],
'Date': [pd.to_datetime('today'),
pd.to_datetime('today'),
pd.to_datetime('today')]})
writer = pd.ExcelWriter('pandas_filter.xlsx', engine='xlsxwriter', )
df.to_excel(writer, sheet_name='Sheet1', index=False)
workbook = writer.book
worksheet = writer.sheets['Sheet1']
unlocked = workbook.add_format({'locked': False})
locked = workbook.add_format({'locked': True})
# Write the 'Date' column data.
worksheet.set_column('C:C', 20)
unlocked_date_format = workbook.add_format({'num_format': 'yyyy-mm-dd',
'locked': False})
worksheet.write_column('C2', df['Date'], unlocked_date_format)
worksheet.protect()
for row in range(1, 150):
worksheet.set_row(row, None, unlocked)
writer.save()
Output, after modification:

Related

Highlighting excel cells based on the cell values using pandas dataframe and xlsxwriter

I have a csv file. After doing certain process, it has to be saved as an excel file.
I am opening it as pandas dataframe and after doing some cleaning (renaming and rearranging columns, dropping few columns), i have to replace null values or if the cell value is "N/A" to "DN". Currently i am using two lines of code for this.
df.replace('', np.nan, inplace = True)
df.replace('N/A', np.nan, inplace = True)
df = df.fillna("DN")
Then, i have to highlight cells which has the value "DN" with yellow color
I am trying with the code mentioned in this post How Do I Highlight Rows Of Data? Python Pandas issue. But in the output excel nothing is getting highlighted. Below is the code i am currently working with
df.replace('', np.nan, inplace = True)
df.replace('N/A', np.nan, inplace = True)
df = df.fillna("NA")
df.index = np.arange(1, len(df) + 1)
def high_color(val):
color = 'yellow' if val == 'NA' else ''
return 'color: {}'.format(color)
result = df.style.applymap(high_color)
writer_orig = pd.ExcelWriter(out_name, engine='xlsxwriter')
df.to_excel(writer_orig, sheet_name='report', index=True, index_label="S_No", freeze_panes=(1,1))
workbook = writer_orig.book
worksheet = writer_orig.sheets['report']
# Add a header format.
header_format = workbook.add_format({
'bold': True,
'fg_color': '#ffcccc',
'border': 1})
for col_num, value in enumerate(df.columns.values):
worksheet.write(0, col_num + 1, value, header_format)
writer_orig.close()
Any kind of suggestions will be greatly helpful.
You can't save a Styler Object to an Excel spreadsheet by using pandas.ExcelWriter.
class pandas.ExcelWriter(path, engine=None, date_format=None,
datetime_format=None, mode='w', storage_options=None,
if_sheet_exists=None, engine_kwargs=None, **kwargs)
Class for writing DataFrame objects into excel sheets.
You need to use worksheet.conditional_format from xlsxwriter to highlight a value in every cell. Also, you can pass na_values as a kwarg to pandas.read_csv to automatically consider a list of values as NaN.
from xlsxwriter.utility import xl_rowcol_to_cell
df = pd.read_csv('/tmp/inputfile.csv', na_values=['', 'N/A']).fillna('DN')
l = df.columns.get_indexer(df.columns).tolist()
xshape = list(map(xl_col_to_name, [e+1 for e in l]))
max_row, max_col = df.shape
with pd.ExcelWriter("/tmp/outputfile.xlsx") as writer:
df.to_excel(writer, sheet_name='report', index=True,
index_label='S_No', freeze_panes=(1,1))
wb = writer.book
ws = writer.sheets['report']
format_header = wb.add_format({'bold': True, 'fg_color': '#ffcccc', 'border': 1})
for idx, col in enumerate(['S_No'] + list(df.columns)):
ws.write(0, idx, col, format_header)
format_dn = wb.add_format({'bg_color':'yellow', 'font_color': 'black'})
ws.conditional_format(f'{xshape[0]}2:{xshape[-1]}{str(max_row+1)}',
{'type': 'cell', 'criteria': '==',
'value': '"DN"', 'format': format_dn})
Output :
You have to export to excel with result Styler:
# Demo
def high_color(val):
return 'background-color: yellow' if val == 'NA' else None
result = df.style.applymap(high_color)
result.to_excel('styler1.xlsx')
df.to_excel('styler2.xlsx')
Export from result
Export from df

Write second Header and Merge Cells using Pandas

All,
I have written a script to write header and data into a excel sheet. But my actual requirement is to write sub header as well and need to merge the cells from the 2nd row.
import xlsxwriter
import pandas as pd
import numpy as np
import openpyxl
import time
# Creating a dataframe
df = pd.DataFrame(np.random.randn(10, 3), columns=list('ABC'))
column_list = df
# Create a Pandas Excel writer using XlsxWriter engine.
writer = pd.ExcelWriter("test.xlsx", engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1', startrow=2, header=False, index=False)
# Get workbook and worksheet objects
workbook = writer.book
worksheet = writer.sheets['Sheet1']
header_fmt = workbook.add_format({'font_name': 'Arial', 'font_size': 12, 'color' : 'white', 'fg_color': '#00007f','bold': True, 'border' : 1})
for idx, val in enumerate(column_list):
worksheet.write(0, idx, val, header_fmt)
worksheet.write(1, 1, 'Sample', header_fmt)
font_fmt = workbook.add_format({'font_name': 'Arial', 'font_size': 9 })
worksheet.set_column('A:C', None, font_fmt)
worksheet.set_row(0, None, header_fmt)
writer.save()
EDIT:
Expected Output:
There are 4 sections in the expected output, all of them are from different Dataframes. I need to merge all those Dataframes' output into a single sheet as shown in the image.
How about this? Is this what you want?
import xlsxwriter
import pandas as pd
import numpy as np
import openpyxl
import time
# Creating a dataframe
df = pd.DataFrame(np.random.randn(10, 3), columns=list('ABC'))
column_list = df
# Create a Pandas Excel writer using XlsxWriter engine.
writer = pd.ExcelWriter("test.xlsx", engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1', startrow=2, header=False, index=False)
# Get workbook and worksheet objects
workbook = writer.book
worksheet = writer.sheets['Sheet1']
header_fmt = workbook.add_format({'font_name': 'Arial', 'font_size': 12, 'color' : 'white', 'fg_color': '#00007f','bold': True, 'border' : 1})
merge_format = workbook.add_format({'align': 'center'})
worksheet.merge_range('A2:C2', 'Sample', merge_format)
for idx, val in enumerate(column_list):
worksheet.write(0, idx, val,header_fmt)
font_fmt = workbook.add_format({'font_name': 'Arial', 'font_size': 9 })
worksheet.set_column('A:C',None, font_fmt)
worksheet.set_row(0,None, header_fmt)
writer.save()

How to create Excel **Table** with pandas.to_excel()?

Need the achieve this programmatically from a dataframe:
https://learn.microsoft.com/en-us/power-bi/service-admin-troubleshoot-excel-workbook-data
Here is one way to do it using XlsxWriter:
import pandas as pd
# Create a Pandas dataframe from some data.
data = [10, 20, 30, 40, 50, 60, 70, 80]
df = pd.DataFrame({'Rank': data,
'Country': data,
'Population': data,
'Data1': data,
'Data2': data})
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter("pandas_table.xlsx", engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object. Turn off the default
# header and index and skip one row to allow us to insert a user defined
# header.
df.to_excel(writer, sheet_name='Sheet1', startrow=1, header=False, index=False)
# Get the xlsxwriter workbook and worksheet objects.
workbook = writer.book
worksheet = writer.sheets['Sheet1']
# Get the dimensions of the dataframe.
(max_row, max_col) = df.shape
# Create a list of column headers, to use in add_table().
column_settings = []
for header in df.columns:
column_settings.append({'header': header})
# Add the table.
worksheet.add_table(0, 0, max_row, max_col - 1, {'columns': column_settings})
# Make the columns wider for clarity.
worksheet.set_column(0, max_col - 1, 12)
# Close the Pandas Excel writer and output the Excel file.
writer.save()
Output:
Update: I've added a similar example to the XlsxWriter docs: Example: Pandas Excel output with a worksheet table
You can't do it with to_excel. A workaround is to open the generated xlsx file and add the table there with openpyxl:
import pandas as pd
df = pd.DataFrame({'Col1': [1,2,3], 'Col2': list('abc')})
filename = 'so58326392.xlsx'
sheetname = 'mySheet'
with pd.ExcelWriter(filename) as writer:
if not df.index.name:
df.index.name = 'Index'
df.to_excel(writer, sheet_name=sheetname)
import openpyxl
wb = openpyxl.load_workbook(filename = filename)
tab = openpyxl.worksheet.table.Table(displayName="df", ref=f'A1:{openpyxl.utils.get_column_letter(df.shape[1])}{len(df)+1}')
wb[sheetname].add_table(tab)
wb.save(filename)
Please note the all table headers must be strings. If you have an un-named index (which is the rule) the first cell (A1) will be empty which leads to file corruption. To avoid this give your index a name (as shown above) or export the dataframe without the index using:
df.to_excel(writer, sheet_name=sheetname, index=False)
Another workaround, if you don't want to save, re-open, and re-save, is to use xlsxwriter. It can write ListObject tables directly, but does not do so directly from a dataframe, so you need to break out the parts:
import pandas as pd
import xlsxwriter as xl
df = pd.DataFrame({'Col1': [1,2,3], 'Col2': list('abc')})
filename = 'output.xlsx'
sheetname = 'Table'
tablename = 'TEST'
(rows, cols) = df.shape
data = df.to_dict('split')['data']
headers = []
for col in df.columns:
headers.append({'header':col})
wb = xl.Workbook(filename)
ws = wb.add_worksheet()
ws.add_table(0, 0, rows, cols-1,
{'name': tablename
,'data': data
,'columns': headers})
wb.close()
The add_table() function expects 'data' as a list of lists, where each sublist represents a row of the dataframe, and 'columns' as a list of dicts for the header where each column is specified by a dictionary of the form {'header': 'ColumnName'}.
I created a package to write properly formatted excel tables from pandas: pandas-xlsx-tables
from pandas_xlsx_tables import df_to_xlsx_table
import pandas as pd
data = [10, 20, 30, 40, 50, 60, 70, 80]
df = pd.DataFrame({'Rank': data,
'Country': data,
'Population': data,
'Strings': [f"n{n}" for n in data],
'Datetimes': [pd.Timestamp.now() for _ in range(len(data))]})
df_to_xlsx_table(df, "my_table", index=False, header_orientation="diagonal")
You can also do the reverse with xlsx_table_to_df
Based on the answer of #jmcnamara, but as a convenient function and using "with" statement:
import pandas as pd
def to_excel(df:pd.DataFrame, excel_name: str, sheet_name: str, startrow=1, startcol=0):
""" Exports pandas dataframe as a formated excel table """
with pd.ExcelWriter(excel_name, engine='xlsxwriter') as writer:
df.to_excel(writer, sheet_name=sheet_name, startrow=startrow, startcol=startcol, header=True, index=False)
workbook = writer.book
worksheet = writer.sheets[sheet_name]
max_row, max_col = df.shape
olumn_settings = [{'header': header} for header in df.columns]
worksheet.add_table(startrow, startcol, max_row+startrow, max_col+startcol-1, {'columns': column_settings})
# style columns
worksheet.set_column(startcol, max_col + startcol, 21)

xlsxwriter - Conditional formatting based on column name of the dataframe

I have a dataframe as below. I want to apply conditional formatting on column "Data2" using the column name. I know how to define format for a specific column but I am not sure how to define it based on column name as shown below.
So basically I want to do the same formatting on column name(because the order of column might change)
df1 = pd.DataFrame({'Data1': [10, 20, 30],
'Data2': ["a", "b", "c"]})
writer = pd.ExcelWriter('pandas_filter.xlsx', engine='xlsxwriter', )
workbook = writer.book
df1.to_excel(writer, sheet_name='Sheet1', index=False)
worksheet = writer.sheets['Sheet1']
blue = workbook.add_format({'bg_color':'#000080', 'font_color': 'white'})
red = workbook.add_format({'bg_color':'#E52935', 'font_color': 'white'})
l = ['B2:B500']
for columns in l:
worksheet.conditional_format(columns, {'type': 'text',
'criteria': 'containing',
'value': 'a',
'format': blue})
worksheet.conditional_format(columns, {'type': 'text',
'criteria': 'containing',
'value': 'b',
'format': red})
writer.save()
using xlsxwriter with xl_col_to_name we can get the column name using the index.
from xlsxwriter.utility import xl_col_to_name
target_col = xl_col_to_name(df1.columns.get_loc("Data2"))
l = [f'{target_col}2:{target_col}500']
for columns in l:
using opnpyxl with get_column_letter we can get the column name using the index.
from openpyxl.utils import get_column_letter
target_col = get_column_letter(df1.columns.get_loc("Data2") + 1) # add 1 because get_column_letter index start from 1
l = [f'{target_col}2:{target_col}500']
for columns in l:
...

Wrap text for column of dataframe in pandas

I am trying to wrap text in python dataframe columns but this code is working for values in columns and not header of column.
I am using below code (taken form stackoverflow). Kindly suggest how to wrap header of dataframe
long_text = 'aa aa ss df fff ggh ttr tre ww rr tt ww errr t ttyyy eewww rr55t e'
data = {'a':[long_text, long_text, 'a'],'c': [long_text,long_text,long_text],
'b':[1,2,3]}
df = pd.DataFrame(data)
#choose columns of df for wrapping
cols_for_wrap = ['a','c']
writer = pd.ExcelWriter('aaa.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1', index=False)
#modifyng output by style - wrap
workbook = writer.book
worksheet = writer.sheets['Sheet1']
wrap_format = workbook.add_format({'text_wrap': True})
#get positions of columns
for col in df.columns.get_indexer(cols_for_wrap):
#map by dict to format like "A:A"
excel_header = d[col] + ':' + d[col]
#None means not set with
worksheet.set_column(excel_header, None, wrap_format)
#for with = 20
worksheet.set_column(excel_header, 10, wrap_format)
writer.save()
In the header_format piece that jmcnamara linked, you can add or remove any formats you want or do not want.
header_format = workbook.add_format({
'bold': True,
'text_wrap': True,
'valign': 'top',
'fg_color': '#D7E4BC',
'border': 1})
# Write the column headers with the defined format.
for col_num, value in enumerate(df.columns.values):
worksheet.write(0, col_num + 1, value, header_format)
My code looks like this:
h_format = workbook.add_format({'text_wrap': True})
...
...
...
for col_num, value in enumerate(df_new.columns.values):
worksheet.write(0, col_num, value, format)
writer.save()
This is covered almost exactly in the Formatting of the Dataframe headers section of the XlsxWriter docs.

Resources