openpyxl - overwrite datasheet and preserve pivot table - python-3.x

I've seen a few answers around to this question but none of them are working.
eg: How to write to an existing excel file without breaking formulas with openpyxl?
Docs give nothing away it seems:
http://openpyxl.readthedocs.io/en/latest/api/openpyxl.reader.excel.html
I tried replacing xls.load_workbook with xls.reader.excel.load_workbook but it doesn't change anything.
My current code overwrites the data in the data sheet, but kills the pivot table functionality in the other sheet (the sheet is still there but only with values). Any idea how to keep the pivot table?
import pandas as pd
import openpyxl as xls
from shutil import copyfile
template_file = 'openpy_test.xlsx'
output_file = 'openpy_output.xlsx'
copyfile(template_file, output_file)
book = xls.load_workbook(output_file,guess_types=False,data_only=False)
writer = pd.ExcelWriter(output_file,engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer,sheet_name='data',index=False,encoding='utf8')
writer.save()
I have also tried book.save('dummycopy.xlsx'), which also saves with a non-funcitoning pivot table. So I am sure the problem is related to the load_workbook function.
Package versions:
openpyxl 2.4.10 py36_0
pandas 0.20.3 py36hce827b7_2

i don't think openpyxl supports excel pivot tables currently. I had to switch to using win32com library.
here is a wrapper module i wrote to do specific stuff with pivot tables; it's basically VBA translated to python (record macros and read the VBA, it'll make sense). hope it helps. it's still a work in progress but should be enough for you to work with.
import os, datetime
import win32com.client as win32
win32c = win32.constants
import sys, datetime
letters = ' ABCDEFGHIJKLMNOPQRSTUVWXYZ' #space to compensate for index. if letter is a if column is 1
def Pull_excel_workbook(path = '', filename = '', visible = False):
'''function to run excel on the given filename'''
if path == '': path = os.getcwd()
if filename == '': raise FileNotFoundError('Please supply a file')
excel = win32.gencache.EnsureDispatch('Excel.Application')
excel.Visible = visible
try: wb = excel.Workbooks.Open(path + filename)
except: print('Try again\n{}'.format(sys.exc_info()))
ws = wb.ActiveSheet
data = list(ws.UsedRange.Value) #2d list of rows and columns
src = '{}!R1C1:R{}C{}'.format(ws.Name, len(data), len(data[0]))
return excel, wb, src
#wb.SaveAs(path + filename)
def Create_pivottable(wb, src, table_name = 'Pivot'):
'''creates Pivot Table object in the wb in a new Pivot worksheet'''
ws = wb.Sheets.Add() #should also change wb.ActiveSheet to the new one.
ws.Name = table_name
tname = ws.Name
starting_point = (4,1) #row, column
pc = wb.PivotCaches().Add(SourceType = win32c.xlDatabase,
SourceData = src)
try:
pt = pc.CreatePivotTable(TableDestination = '{}!R{}C{}'.format(tname, starting_point[0], starting_point[1]),
TableName = table_name,
DefaultVersion = win32c.xlPivotTableVersion10 #15
)
except: #not sure if will work...
print('{}:{}:{}:{}'.format(wb, src, table_name, '{}!R{}C{}'.format(tname, starting_point[0], starting_point[1])))
#tabledestination format of RN Pivot!R4C1 is not correct format, should be 'RN Pivot'!R4C1
pt = pc.CreatePivotTable(TableDestination = '{}!R{}C{}'.format(tname, starting_point[0], starting_point[1]),
TableName = table_name,
DefaultVersion = win32c.xlPivotTableVersion15
)
wb.Sheets(ws.Name).Select()
wb.Sheets(ws.Name).Cells(3,1).Select()
def Add_to_Filter(wb, tname, field_name):
''' '''
field = wb.ActiveSheet.PivotTables(tname).PivotFields(field_name)
field.Orientation = win32c.xlPageField
field.Position = 1
def Add_to_Row(wb, tname, field_name, position = 1):
''' '''
field = wb.ActiveSheet.PivotTables(tname).PivotFields(field_name)
field.Orientation = win32c.xlRowField
field.Position = position
def Add_to_Column(wb, tname, field_name, position = 1):
''' '''
field = wb.ActiveSheet.PivotTables(tname).PivotFields(field_name)
field.Orientation = win32c.xlColumnField
field.Position = position
if position > 1:
text = 'maybe do something here....'
pass
def Add_to_Value(wb, tname, field_name, alias = '', calculation = 'xlSum'):
''' '''
if type(calculation) is str and calculation in win32c.__dict__['__dicts__'][0]:
calculation = win32c.__dict__['__dicts__'][0][calculation]
datafield = wb.ActiveSheet.PivotTables(tname).PivotFields(field_name)
wb.ActiveSheet.PivotTables(tname).AddDataField(datafield, alias, calculation)
def LtoC(letter):
global letters
col = letters.index(letter)
return col
def CtoL(col):
global letters
letter = letters[col]
return letter
def Format_pretty(wb, tname, row_to_colapse):
'''makes it look prettier'''
wb.ActiveSheet.PivotTables(tname).TableStyle2 = 'PivotStyleMedium9'
if type(row_to_colapse) is not str:
for row in row_to_colapse:
wb.ActiveSheet.PivotTables(tname).PivotFields(row).ShowDetail = False #collapses
wb.ActiveSheet.PivotTables(tname).PivotFields(row).RepeatLabels = True #repeats labels
else:
wb.ActiveSheet.PivotTables(tname).PivotFields(row_to_colapse).ShowDetail = False #collapses
wb.ActiveSheet.PivotTables(tname).PivotFields(row_to_colapse).RepeatLabels = True #repeats labels
wb.ActiveSheet.Columns('A:Z').EntireColumn.AutoFit()
wb.ActiveSheet.Range('A1').Select()
def Add_calcd_col(ws, col, row_start, row_end, formula, style = '', col_title = 'default'):
'''col and rows should be int
'''
letter = CtoL(col)
ws.Range('{0}{1}:{0}{2}'.format(letter, row_start, row_end)).Select()
ws.Cells(row_start, col).Value = col_title
for row in range(row_start + 1, row_end + 1):
ws.Cells(row, col).Value = formula.format(row)
ws.Range('{0}{1}:{0}{2}'.format(letter, row_start, row_end)).Style = style
#print("ws.Range('{0}1:{0}200'.format({0})).Style = style".format(letter))
#ws.Range('{0}1:{0}200'.format(letter)).Style = style
def Values_to_columns(wb,tname, position = 2):
''' '''
wb.ActiveSheet.PivotTables(tname).DataPivotField.Orientation = win32c.xlColumnField
wb.ActiveSheet.PivotTables(tname).DataPivotField.Position = position
def WB_save(wb, path, tname, filename):
'''clean save of the new file '''
#Format_pretty(wb, tname, 'Division') #that needs to be fixed....
new_filename = filename[:-5] + '-{}.xlsx'.format(datetime.date.today().strftime('%m.%d.%y'))
wb.SaveAs(path + new_filename)
def Pivot_refresh(path, filename, pivot_sheet_name, pivot_table_name = 'Pivot'):
'''function to refresh the pivot table
tested and functional with recruiting prod report'''
excel, wb, src = Pull_excel_workbook(path = path, filename = filename)
wb.Sheets(pivot_sheet_name).Select()
cell = 'A6' #need a better way for this
excel.Worksheets(pivot_sheet_name).Range(cell).PivotTable.RefreshTable()
#pvt = excel.Worksheets(pivot_sheet_name).Range(cell).PivotTable
#pvt.RefreshTable()
WB_save(wb, path, pivot_table_name, filename)
#pivot refresh
#new = filename[:-5] + '-{}.xlsx'.format(2)
#Pivot_refresh(path = path, filename = new, pivot_sheet_name = 'Pivot')
def Hide_columns(wb, tname, start, end):
'''Hides columns'''
if type(start) is not str: start = CtoL(start)
if type(end) is not str: end = CtoL(end)
wb.ActiveSheet.Columns('{}:{}'.format(start, end)).EntireColumn.Hidden = True

Related

Find next empty column to write query data in Excel using Python (

I'm using pandas to fetch query results from Oracle and I want to write it to an Excel file and put the data in the first column that is empty, so the first time should be Column A, next time I run this program it should add the data into Column B etc.
I'm using openpyxl to write this data using the max_row / max_column method I found. I've been searching for awhile and cannot find a way to use openpyxl to do it in the next empty column though.
main_file = glob('C:\\Users\\dataTemplate.xlsx')[0]
nwb = load_workbook(main_file)
nws = nwb.worksheets[0]
copy_file = (
r'C:\\Users\\queryData.xlsx')
cwb = load_workbook(copy_file)
cws = cwb.worksheets[0]
#Updated
nmc = nws.max_column + 1
mr = cws.max_row
mc = cws.max_column
for i in range(1, mr + 1):
for j in range(1, mc + 1):
c = cws.cell(row=i, column=j)
nws.cell(row=i, column=nmc + j).value = c.value
Update
As you use pandas, you can use the following code:
with pd.ExcelWriter('data.xlsx', engine='openpyxl', mode='a', if_sheet_exists='overlay') as writer:
wb = writer.book
ws = wb.active
df.to_excel(writer, startrow=ws.min_row-1, startcol=ws.max_column, index=False)
Old answer
You can use ws.max_column and ws.max_row:
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
wb = load_workbook('test.xlsx')
ws = wb.active
Output:
>>> ws.max_row
5
>>> ws.max_column
9
>>> get_column_letter(ws.max_column)
'I'
My excel file:

How to read visible-only cells using Python?

I want to read and merge only visible cells in Excel, but I failed.
Also, I've tried openpyxl but didn't work. (Find my second code)
Is there any other ways to read only visible cells and paste on new excel?
I want to read only visible cells because sometimes they need to be filtered or hidden.
Please kindly advise me.
What kind of module should I put in?
If every excel module cannot do that, please also let me know.
My current code:
import os
import glob
import xlwings as xw
import xlrd
import xlsxwriter
xw.App().visible = False
path = os.getcwd()
x=input('name:') + '.xlsx'
target_xls = os.path.join(path,x)
data = []
for file in glob.glob(path+'\*.*'):
if file.endswith((".xls", ".xlsm", ".xlsx")):
wb = xlrd.open_workbook(file)
for sheet in wb.sheets():
for rownum in range(sheet.nrows):
data.append(sheet.row_values(rownum))
workbook = xlsxwriter.Workbook(target_xls)
worksheet = workbook.add_worksheet()
for i in range(len(data)):
print(range(len(data)))
for j in range(len(data[i])):
worksheet.write(i, j, data[i][j])
workbook.close()
My openpyxl code:
import os
import glob
import xlwings as xw
import xlrd
import xlsxwriter
from openpyxl import load_workbook
xw.App().visible = False
path = os.getcwd()
x = input('name:') + '.xlsx'
target_xls = os.path.join(path, x)
data = []
wb = load_workbook('sample.xlsx')
ws = wb['Sheet1']
for row in ws:
if ws.row_dimensions[row[0].row].hidden == False:
for cell in row:
data.append(cell.value)
workbook = xlsxwriter.Workbook(target_xls)
worksheet = workbook.add_worksheet()
for i in range(len(data)):
print(range(len(data)))
for j in range(len(data[i])):
worksheet.write(i, j, data[i][j])
workbook.close()
I want to read excel like below:
And output into:
#Reference: xlrd manual: https://media.readthedocs.org/pdf/xlrd/latest/xlrd.pdf
#Python Forum Reference: https://python-forum.io/Thread-Identify-Hidden-rows-in-xls
import xlrd
print("Read the VALUE and ROW VISIBILITY from cells A1:A6 in a .xls file from 'Sheet2'.")
print()
######################################################
# Access .xls file (Excel 2003 and before)
excel_filename = "HiddenRow3OnSheet2.xls"
# Open the workbook
#NOTE: Traceback error if 'formatting_info=True' is NOT INCLUDED
xl_workbook = xlrd.open_workbook(excel_filename, formatting_info=True)
#Set the focus on 'Sheet2'
my_sheet_name = "Sheet2"
xl_sheet = xl_workbook.sheet_by_name(my_sheet_name)
print("File: {}".format(excel_filename))
for irow in range(xl_sheet.nrows):
ihidden = xl_sheet.rowinfo_map[irow].hidden #Row Visibility 0=Visible 1=Hidden
if ihidden == True:
shidden = "VISIBLE"
else:
shidden = "HIDDEN"
svalue = xl_sheet.cell(irow,0).value
print("Value: {} Row Visibility: {}".format(svalue, shidden))
######################################################
# Access .xlsx file (Excel 2007 and later)
excel_filename = "HiddenRow3OnSheet2.xlsx"
# Open the workbook
#NOTE: 'formatting_info=True' is NOT SUPPORTED for .xlsx files
xl_workbook = xlrd.open_workbook(excel_filename)
#Set the focus on 'Sheet2'
my_sheet_name = "Sheet2"
xl_sheet = xl_workbook.sheet_by_name(my_sheet_name)
print()
print("File: {}".format(excel_filename))
for irow in range(xl_sheet.nrows):
svalue = xl_sheet.cell(irow,0).value
print("Value: {} Row Visibility: {}".format(svalue, "Not Available for .xlsx files"))

Exception Occurred (..) - 'Microsoft Excel', 'SaveAs ... class failed'

This code basically does what it needs to do. But it does throw an error sometimes whereas I would expect the code to work as all data necessary is present.
So what it does is: (1) Read in users (2) Add information to the Excel dashboard, such as header information, word clouds and profile picture.
Then it saves the Excel file. It sometimes, randomly almost, gives an error: (pywintypes.com_error: (-2147352567, 'Exception occurred.', (0, 'Microsoft Excel', 'Open method of Workbooks class failed', 'xlmain11.chm', 0, -2146827284), None)). What part of the code could cause this?
import os
import xlwings as xw
import pandas as pd
import openpyxl
def get_users(file_name):
"""Read all the users from the csv file."""
users = []
f = open(file_name, 'r')
for line in f:
user = line.strip().split(',')
screen_name = user[0]
users.append(screen_name)
f.close()
return users
def read_csv_file(file_name):
"""Return csv file with accounts."""
data = []
f = open(file_name, 'r')
for line in f:
temp = tuple(line.strip().split(';'))
data.append(temp)
f.close()
return data
def write_panda_to_excel(df, start_cell, wb):
"""Write Pandas DataFrame to Excel."""
sht = wb.sheets['Input']
sht.range(start_cell).value = df
def add_word_cloud(name, cell, wb):
"""Add the WordCloud to Sheet2 """
sht = wb.sheets['Sheet2']
name = os.getcwd() + '\\' + name
rng = sht.range(cell)
sht.pictures.add(name, top=rng.top, left=rng.left, width=325, height=155)
def add_profile_picture(user, cell, wb):
#Add charts to dashboard.
sht = wb.sheets['Sheet1']
picture = [f for f in os.listdir('.') if f.startswith(user + '.')][0]
name = os.getcwd() + '\\' + picture
rng = sht.range(cell)
sht.pictures.add(name, top=rng.top, left=rng.left, width=70, height=90)
app = xw.App(visible=False)
# Read users
os.chdir('../FolderA/')
file_name = 'accounts_file.csv'
users = get_users(file_name)
os.chdir('../Data')
for i, user in enumerate(users):
try:
#count += 1
print(100 * '-')
print(len(users), i+1, user)
# go to directory where the dashboard is stored
os.chdir('../Folder5/FolderE')
wb = xw.Book('Twitter - Individuele Rapportage.xlsm')
os.chdir('../../Data/' + user)
# Remove file if exists
xl = [e for e in os.listdir('.') if e.endswith('.xlsm')]
for e in xl:
os.remove(e)
# add user name to title of Dashboard
sht = wb.sheets['Input_Data']
# add the csv data and profile pictures the other data to the dashboard
df = pd.read_csv(user + '_header_info.csv', sep=',')
write_panda_to_excel(df, 'A1', wb)
cell = 'L20'
try:
add_profile_picture(user, cell, wb)
except:
os.chdir('../../Folder6')
with open('Twitter - Profile picture Error.txt', 'a') as ExceptFile:
ExceptFile.write(str(user) + '\n')
os.chdir('../Data/' + user)
name = user + '_WC.png'
cell = 'Y46'
add_word_cloud(name, cell, wb)
xlname = 'Twitter' + user + '.xlsm'
try:
wb.save(xlname)
wb.close()
except:
os.chdir('../../Folder6')
with open('Twitter - Dashboard Generation Errors.txt', 'a') as myfile:
myfile.write(str(user + "\n"))
myfile.close()
os.chdir('../Data/' + user)
os.chdir('..')
except OSError as exception:
print(exception)
os.chdir('..')
with open('dash_errors.txt', 'w') as dashboard_errors:
dashboard_errors.write(user+"\n")

Exporting as csv fits entirely in first cell

when i try to save my table (QTableWidget) as a csv file everything fits in the first cell from the excel file, how can i seperate each cell? this is my saving function:
def save_text(self, table):
path = QFileDialog.getSaveFileName(self, 'Save CSV', os.getenv('HOME'), 'CSV(*.csv)')
if path[0] != '':
with open(path[0], 'w') as csv_file:
writer = csv.writer(csv_file, dialect='excel')
for row in range(table.rowCount()):
row_data = []
for column in range(table.columnCount()):
item = table.item(row, column)
if item is not None:
row_data.append(item.text())
else:
row_data.append('')
writer.writerow(row_data)
Try replacing:
writer = csv.writer (csv_file, dialect = 'excel')
on
writer = csv.writer (csv_file, dialect = 'excel', delimiter = ';')

xlrd named range example?

I have an excel spreadsheet that I am trying to parse with xlrd. The spreadsheet itself makes extensive use of named ranges.
If I use:
for name in book.name_map:
print(name)
I can see all of the names are there.
However I can't make any of the methods work (cell method and area2d). Can anyone give me an example of the syntax to be able to read the cell range that a name is pointing to given the name.
The Excel file is an XLSM file with lots of visual basic that also operates on these named ranges.
I think that the naming support in XLRD is broken for XLSM files but I found an answer by switching to openpyxl. This has a function get_named_ranges() which contains all of the named ranges. The support after that is a bit thin so I wrote my own class to turn the named ranges in my spreadsheet into a class where I can access the same information using the same names.
# -- coding: utf-8 --
"""
Created on Wed Sep 14 09:42:09 2016
#author: ellwood
"""
from openpyxl import load_workbook
class NamedArray(object):
''' Named range object
'''
C_CAPS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
def __init__(self, workbook, named_range_raw):
''' Initialise a NameArray object from the named_range_raw information in the given
workbook
'''
self.wb = workbook
sheet_str, cellrange_str = str(named_range_raw).split('!')
self.sheet =sheet_str.split('"')[1]
self.loc = self.wb[self.sheet]
if ':' in cellrange_str:
self.has_range = True
self.has_value = False
lo,hi = cellrange_str.split(':')
self.ad_lo = lo.replace('$','')
self.ad_hi = hi.replace('$','')
else:
self.has_range = False
self.has_value = True
self.ad_lo = cellrange_str.replace('$','')
self.ad_hi = self.ad_lo
self.min_row = self.get_row(self.ad_lo)
self.max_row = self.get_row(self.ad_hi)
self.rows = self.max_row - self.min_row + 1
self.min_col = self.col_to_n(self.ad_lo)
self.max_col = self.col_to_n(self.ad_hi)
self.cols = self.max_col - self.min_col + 1
def size_of(self):
''' Returns two dimensional size of named space
'''
return self.cols, self.rows
def cols(self):
''' Returns number of cols in named space
'''
return self.cols
def rows(self):
''' Returns number of rows in named space
'''
return self.rows
def value(self, r=1, c=1):
''' Returns the value at row r, column c
'''
if self.has_value:
return self.loc.cell(self.ad_lo).value
assert r < self.max_rows
assert c < self.max_cols
return self.loc.cell(self.n_to_col(self.min_col + c-1)+str(self.min_row+r-1)).value
def is_range(self):
''' if true then name defines a table more than 1 cell
'''
return self.range
def is_value(self):
''' if true then name defines the location of a single value
'''
return None
def __str__(self):
''' printed description of named space
'''
locs = 's ' + self.ad_lo + ':' + self.ad_hi if self.is_range else ' ' + self.ad_lo
return('named range'+ str(self.size_of()) + ' in sheet ' + self.sheet + ' # location' + locs)
#classmethod
def get_row(cls, ad):
''' get row number from cell string
Cell string is assumed to be in excel format i.e "ABC123" where row is 123
'''
row = 0
for l in ad:
if l in "1234567890":
row = row*10 + int(l)
return row
#classmethod
def col_to_n(cls, ad):
''' find column number from xl address
Cell string is assumed to be in excel format i.e "ABC123" where column is abc
column number is integer represenation i.e.(A-A)*26*26 + (B-A)*26 + (C-A)
'''
n = 0
for l in ad:
if l in cls.C_CAPS:
n = n*26 + cls.C_CAPS.find(l)+1
return n
#classmethod
def n_to_col(cls,n):
''' make xl column address from column number
'''
ad = ''
while n > 0:
ad = cls.C_CAPS[n%26-1] + ad
n = n // 26
return ad
class Struct(object):
''' clast which turns a dictionary into a structure
'''
def __init__(self, **entries):
self.__dict__.update(entries)
def repr__(self):
return '<%s>' % str('\n '.join('%s : %s' % (k, repr(v)) for (k, v) in self.__dict.iteritems()))
def get_names(workbook):
''' Get a structure containing all of the names in the workbook
'''
named_ranges = wb.get_named_ranges()
name_list = {}
for named_range in named_ranges:
name = named_range.name
if name[0:2] == 'n_':
# only store the names beginning with 'n_'
name_list[name[2:]] = NamedArray(wb, str(named_range))
for item in name_list:
print (item, '=', name_list[item])
return Struct(**name_list)
# ------------------
# program example
# -----------------
wb = load_workbook('test.xlsm', data_only=True)
n = get_names(wb)
print(n.my_name.value())
One Small optimisation is that I prefixed all of the names I was interested in importing wiht 'n_' so I could then ignore any built in Excel names. I hope this is useful to someone.

Resources