Find next empty column to write query data in Excel using Python ( - python-3.x

I'm using pandas to fetch query results from Oracle and I want to write it to an Excel file and put the data in the first column that is empty, so the first time should be Column A, next time I run this program it should add the data into Column B etc.
I'm using openpyxl to write this data using the max_row / max_column method I found. I've been searching for awhile and cannot find a way to use openpyxl to do it in the next empty column though.
main_file = glob('C:\\Users\\dataTemplate.xlsx')[0]
nwb = load_workbook(main_file)
nws = nwb.worksheets[0]
copy_file = (
r'C:\\Users\\queryData.xlsx')
cwb = load_workbook(copy_file)
cws = cwb.worksheets[0]
#Updated
nmc = nws.max_column + 1
mr = cws.max_row
mc = cws.max_column
for i in range(1, mr + 1):
for j in range(1, mc + 1):
c = cws.cell(row=i, column=j)
nws.cell(row=i, column=nmc + j).value = c.value

Update
As you use pandas, you can use the following code:
with pd.ExcelWriter('data.xlsx', engine='openpyxl', mode='a', if_sheet_exists='overlay') as writer:
wb = writer.book
ws = wb.active
df.to_excel(writer, startrow=ws.min_row-1, startcol=ws.max_column, index=False)
Old answer
You can use ws.max_column and ws.max_row:
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
wb = load_workbook('test.xlsx')
ws = wb.active
Output:
>>> ws.max_row
5
>>> ws.max_column
9
>>> get_column_letter(ws.max_column)
'I'
My excel file:

Related

Python deleting row which not contains specific value?

If the ninth column doesn't contains the name of city ISTANBUL, then I need to be able to delete
the entire row. i wrote the code it works but it doesn't delete row.
from openpyxl import Workbook, load_workbook
from openpyxl.utils import get_column_letter
wb = load_workbook('deneme.xlsx')
ws = wb.active
for row in range(1, 3313):
for col in [9]:
char = get_column_letter(col)
if ws[char + str(row)].value is not 'İSTANBUL':
ws.delete_rows(row)
wb.save('deneme.xlsx')
Here it is solution i just found.
import pandas as pd
df = pd.read_excel('staji.xlsx') #here you can put your file path
filt = (df['ŞEHİR'] == 'İSTANBUL') # put your condition ŞEHİR=column name.
df[filt].to_excel('den1.xlsx')

pandas DataFrame.to_excel keeping excel file format

there is an option in pandas to keep the format of the file, when I use df.to_excel to save the data on the file?
The only workaround that i found is:
from openpyxl import load_workbook
import pandas as pd
# df_data is a pd.DataFrame
wb = load_workbook(fout_file)
sheet = wb.active
for r, row in enumerate(dataframe_to_rows(df_data, index=False, header=False), 2):
for c in range(0, df_columns):
sheet.cell(row=r, column=c + 1).value = row[c]
wb.save(fout_file)
There a better way where i don't must copy cell by cell?
Thanks
stefano G.
#DSteman thanks for the idea, I jus tryed to use StyleForm as you advised me.
def main ():
...
...
...
# df_new_data = pd.DataFrame(columns=self.df_values.columns)
df_new_data = StyleFrame.read_excel(self.template_fout, read_style=True)
...
...
...
cr_dfnewdata = 0
for j, row_data in data.iterrows():
original_row = row_data.copy(deep=True)
# df_new_data = df_new_data.append(original_row)
cr_dfnewdata += 1
df_new_data[cr_dfnewdata] = original_row
...
...
...
compensa_row = row_data.copy(deep=True)
compensa_row[self.importo_col] = importo * -1
# compensa_row[self.qta_col] = qta * -1
compensa_row[self.cod_ribal_col] = f"{cod_ribal}-{j}"
# df_new_data = df_new_data.append(compensa_row)
cr_dfnewdata += 1
df_new_data[cr_dfnewdata] = compensa_row
...
...
...
def save_working_data(self, cod_ribalt: str, df_data):
fout_working_name = f"{self.working_dir}/working_{cod_ribalt}.xlsx"
df_data.to_excel(fout_working_name).save()
BUT i got this error:
export_df.index = [row_index.value for row_index in export_df.index]
AttributeError: 'int' object has no attribute 'value'
You can do this using df.to_clipboard(index=False)
from win32com.client import Dispatch
import pandas as pd
xlApp = Dispatch("Excel.Application")
xlApp.Visible = 1
xlApp.Workbooks.Open(r'c:\Chadee\test.xlsx')
xlApp.ActiveSheet.Cells(1,1).Select
d = {'col1': [1, 2], 'col2': [3, 4]}
df = pd.DataFrame(data=d)
df.to_clipboard(index=False)
xlApp.ActiveWorkbook.ActiveSheet.PasteSpecial()
Output:
Note that the cell colors are still the same
Hope that helps! :-)
Use the styleframe module to perserve most of the styling in your sheet. If you have a styled sheet with columns ['Entry 1', 'Entry 2'] for example, you can enter values like this:
from styleframe import StyleFrame
sf = StyleFrame.read_excel('test.xlsx', read_style=True)
sf.loc[0,'Entry 1'].value = 'Modified 1'
sf.to_excel('test.xlsx').save()
Make sure that the cell you are trying to fill already has a placeholder value like 0. My script returned errors if it attempted to fill an empty cell.
Check out this thread too: Overwriting excel columns while keeping format using pandas

Python Openpyxl schedule replace timestamp range with string

I have workers that starts and end their workdays different times. I get a *.xlsx from a old schedule
program where i see their schedule. I want to replace their time with a string instead.
For example:
'13:00 - 21:30', '13:30 - 21:30','14:00 - 21:30','15:00 - 21:30' with a single 'name'('a-tur').
my code so far is:
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.utils.cell import get_column_letter
wb = load_workbook('sample.xlsx')
ws = wb.active
wb.sheetnames
worksheet = wb["Sheet"]
antal_rader = ws.max_column
antal_kolumn = ws.max_row
for r in range(antal_rader):
for c in range(antal_kolumn):
cellValue = str(worksheet[get_column_letter(r+1)+str(c+1)].value)
if cellValue == 'a':
worksheet[get_column_letter(r+1)+str(c+1)] = 'a-tur'
wb.save('sample.xlsx')
Thanks for all the help i can get.
Sincerely
Dan
for r in range(antal_rader):
for c in range(antal_kolumn):
cellValue = str(worksheet[get_column_letter(r+1)+str(c+1)].value)
# change cell to "a-tur"
if cellValue[0:2] == '07' and time_in_range(start_a, end_a, datetime.time(int(cellValue[8:10]), int(cellValue[11:13], 0))):
worksheet[get_column_letter(r+1)+str(c+1)] = 'a-tur'
# change cell to "c-tur"
elif cellValue[8:10] == '21' and time_in_range(start_c, end_c, datetime.time(int(cellValue[0:2]), int(cellValue[3:5], 0))):
worksheet[get_column_letter(r+1)+str(c+1)] = 'c-tur'
# change cell to "natt"
elif cellValue[0:2] == '21':
worksheet[get_column_letter(r+1)+str(c+1)] = 'natt'

How to read visible-only cells using Python?

I want to read and merge only visible cells in Excel, but I failed.
Also, I've tried openpyxl but didn't work. (Find my second code)
Is there any other ways to read only visible cells and paste on new excel?
I want to read only visible cells because sometimes they need to be filtered or hidden.
Please kindly advise me.
What kind of module should I put in?
If every excel module cannot do that, please also let me know.
My current code:
import os
import glob
import xlwings as xw
import xlrd
import xlsxwriter
xw.App().visible = False
path = os.getcwd()
x=input('name:') + '.xlsx'
target_xls = os.path.join(path,x)
data = []
for file in glob.glob(path+'\*.*'):
if file.endswith((".xls", ".xlsm", ".xlsx")):
wb = xlrd.open_workbook(file)
for sheet in wb.sheets():
for rownum in range(sheet.nrows):
data.append(sheet.row_values(rownum))
workbook = xlsxwriter.Workbook(target_xls)
worksheet = workbook.add_worksheet()
for i in range(len(data)):
print(range(len(data)))
for j in range(len(data[i])):
worksheet.write(i, j, data[i][j])
workbook.close()
My openpyxl code:
import os
import glob
import xlwings as xw
import xlrd
import xlsxwriter
from openpyxl import load_workbook
xw.App().visible = False
path = os.getcwd()
x = input('name:') + '.xlsx'
target_xls = os.path.join(path, x)
data = []
wb = load_workbook('sample.xlsx')
ws = wb['Sheet1']
for row in ws:
if ws.row_dimensions[row[0].row].hidden == False:
for cell in row:
data.append(cell.value)
workbook = xlsxwriter.Workbook(target_xls)
worksheet = workbook.add_worksheet()
for i in range(len(data)):
print(range(len(data)))
for j in range(len(data[i])):
worksheet.write(i, j, data[i][j])
workbook.close()
I want to read excel like below:
And output into:
#Reference: xlrd manual: https://media.readthedocs.org/pdf/xlrd/latest/xlrd.pdf
#Python Forum Reference: https://python-forum.io/Thread-Identify-Hidden-rows-in-xls
import xlrd
print("Read the VALUE and ROW VISIBILITY from cells A1:A6 in a .xls file from 'Sheet2'.")
print()
######################################################
# Access .xls file (Excel 2003 and before)
excel_filename = "HiddenRow3OnSheet2.xls"
# Open the workbook
#NOTE: Traceback error if 'formatting_info=True' is NOT INCLUDED
xl_workbook = xlrd.open_workbook(excel_filename, formatting_info=True)
#Set the focus on 'Sheet2'
my_sheet_name = "Sheet2"
xl_sheet = xl_workbook.sheet_by_name(my_sheet_name)
print("File: {}".format(excel_filename))
for irow in range(xl_sheet.nrows):
ihidden = xl_sheet.rowinfo_map[irow].hidden #Row Visibility 0=Visible 1=Hidden
if ihidden == True:
shidden = "VISIBLE"
else:
shidden = "HIDDEN"
svalue = xl_sheet.cell(irow,0).value
print("Value: {} Row Visibility: {}".format(svalue, shidden))
######################################################
# Access .xlsx file (Excel 2007 and later)
excel_filename = "HiddenRow3OnSheet2.xlsx"
# Open the workbook
#NOTE: 'formatting_info=True' is NOT SUPPORTED for .xlsx files
xl_workbook = xlrd.open_workbook(excel_filename)
#Set the focus on 'Sheet2'
my_sheet_name = "Sheet2"
xl_sheet = xl_workbook.sheet_by_name(my_sheet_name)
print()
print("File: {}".format(excel_filename))
for irow in range(xl_sheet.nrows):
svalue = xl_sheet.cell(irow,0).value
print("Value: {} Row Visibility: {}".format(svalue, "Not Available for .xlsx files"))

excel automation using python

I am trying to write a function to read data from excel file using python. My function should read rows from excel sheet one at a time. Below is my code which will print 1st row.
import xlrd
from xlrd import open_workbook, cellname
book = open_workbook('./Excel/Book1.xls')
def read_excel(sheetName):
sheet = book.sheet_by_name(sheetName)
row = sheet.nrows
for i in range(1):
rows = sheet.row_values(i+1)
print(rows)
file = r'd:\pythonTest.xlsx '
import xlrd
wb = xlrd.open_workbook(file)
sheet = wb.sheet_by_index(0)
# For row 0 and column 0
sheet.cell_value(0, 0)
# Extracting number of columns
print(sheet.ncols)
print(sheet.nrows)
print(sheet.row_values(1))
for i in range(sheet.nrows):
print(sheet.cell_value(i, 0), sheet.cell_value(i, 1))

Resources