Python deleting row which not contains specific value? - python-3.x

If the ninth column doesn't contains the name of city ISTANBUL, then I need to be able to delete
the entire row. i wrote the code it works but it doesn't delete row.
from openpyxl import Workbook, load_workbook
from openpyxl.utils import get_column_letter
wb = load_workbook('deneme.xlsx')
ws = wb.active
for row in range(1, 3313):
for col in [9]:
char = get_column_letter(col)
if ws[char + str(row)].value is not 'İSTANBUL':
ws.delete_rows(row)
wb.save('deneme.xlsx')

Here it is solution i just found.
import pandas as pd
df = pd.read_excel('staji.xlsx') #here you can put your file path
filt = (df['ŞEHİR'] == 'İSTANBUL') # put your condition ŞEHİR=column name.
df[filt].to_excel('den1.xlsx')

Related

Is there a method to change the number format of 100 excel files with 20 sheets each from 2 decimal to 6 decimal using Python?

import pandas as pd
import os
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
path=os.getcwd()
files=os.listdir(path)
for i in range(len(files)):
filename = files[i]
filepath=os.path.join(path,filename)
print(filepath)
df=pd.ExcelFile(filepath) # read sheet name
sheet = df.sheet_names
print(sheet)
df=pd.read_excel(filepath, sheet_name=sheet,skiprows = 5, nrows=15, usecols = 'E:L')
print(df)
when I click on the number it show 6 digits in the header but I want to change it into the excel also
import openpyxl
wb = openpyxl.load_workbook('ds.xlsx')
ws = wb.active
for row in ws.iter_rows():
for cell in row:
# only relevant column and without header
if cell.column_letter == 'A' and cell.row > 1: # put the cell number from where to where you want to check
ws[cell.coordinate].number_format = '0.0000000' # it will change the number format
wb.save('ds2.xlsx')

How to read visible-only cells using Python?

I want to read and merge only visible cells in Excel, but I failed.
Also, I've tried openpyxl but didn't work. (Find my second code)
Is there any other ways to read only visible cells and paste on new excel?
I want to read only visible cells because sometimes they need to be filtered or hidden.
Please kindly advise me.
What kind of module should I put in?
If every excel module cannot do that, please also let me know.
My current code:
import os
import glob
import xlwings as xw
import xlrd
import xlsxwriter
xw.App().visible = False
path = os.getcwd()
x=input('name:') + '.xlsx'
target_xls = os.path.join(path,x)
data = []
for file in glob.glob(path+'\*.*'):
if file.endswith((".xls", ".xlsm", ".xlsx")):
wb = xlrd.open_workbook(file)
for sheet in wb.sheets():
for rownum in range(sheet.nrows):
data.append(sheet.row_values(rownum))
workbook = xlsxwriter.Workbook(target_xls)
worksheet = workbook.add_worksheet()
for i in range(len(data)):
print(range(len(data)))
for j in range(len(data[i])):
worksheet.write(i, j, data[i][j])
workbook.close()
My openpyxl code:
import os
import glob
import xlwings as xw
import xlrd
import xlsxwriter
from openpyxl import load_workbook
xw.App().visible = False
path = os.getcwd()
x = input('name:') + '.xlsx'
target_xls = os.path.join(path, x)
data = []
wb = load_workbook('sample.xlsx')
ws = wb['Sheet1']
for row in ws:
if ws.row_dimensions[row[0].row].hidden == False:
for cell in row:
data.append(cell.value)
workbook = xlsxwriter.Workbook(target_xls)
worksheet = workbook.add_worksheet()
for i in range(len(data)):
print(range(len(data)))
for j in range(len(data[i])):
worksheet.write(i, j, data[i][j])
workbook.close()
I want to read excel like below:
And output into:
#Reference: xlrd manual: https://media.readthedocs.org/pdf/xlrd/latest/xlrd.pdf
#Python Forum Reference: https://python-forum.io/Thread-Identify-Hidden-rows-in-xls
import xlrd
print("Read the VALUE and ROW VISIBILITY from cells A1:A6 in a .xls file from 'Sheet2'.")
print()
######################################################
# Access .xls file (Excel 2003 and before)
excel_filename = "HiddenRow3OnSheet2.xls"
# Open the workbook
#NOTE: Traceback error if 'formatting_info=True' is NOT INCLUDED
xl_workbook = xlrd.open_workbook(excel_filename, formatting_info=True)
#Set the focus on 'Sheet2'
my_sheet_name = "Sheet2"
xl_sheet = xl_workbook.sheet_by_name(my_sheet_name)
print("File: {}".format(excel_filename))
for irow in range(xl_sheet.nrows):
ihidden = xl_sheet.rowinfo_map[irow].hidden #Row Visibility 0=Visible 1=Hidden
if ihidden == True:
shidden = "VISIBLE"
else:
shidden = "HIDDEN"
svalue = xl_sheet.cell(irow,0).value
print("Value: {} Row Visibility: {}".format(svalue, shidden))
######################################################
# Access .xlsx file (Excel 2007 and later)
excel_filename = "HiddenRow3OnSheet2.xlsx"
# Open the workbook
#NOTE: 'formatting_info=True' is NOT SUPPORTED for .xlsx files
xl_workbook = xlrd.open_workbook(excel_filename)
#Set the focus on 'Sheet2'
my_sheet_name = "Sheet2"
xl_sheet = xl_workbook.sheet_by_name(my_sheet_name)
print()
print("File: {}".format(excel_filename))
for irow in range(xl_sheet.nrows):
svalue = xl_sheet.cell(irow,0).value
print("Value: {} Row Visibility: {}".format(svalue, "Not Available for .xlsx files"))

how to read xlsx as pandas dataframe with formulas as strings

I have a excel file with some calculated columns.
for example, I have some data in columns 'a' and column 'b' is calculated using values in column 'a'.
i need to append new data to column 'a' and calculate column 'b' and save the file.
import pandas as pd
df = pd.DataFrame({'a':[1,2,3],'b':["=a2","=a3","=a4"]})
df.to_excel('test.xlsx',index=False)
when i try to read the file using pandas read excel it reads the column 'b' as NaN.
df = pd.read_excel(r'test.xlsx')
how do i achieve this. may be if i can read the file as string and append the formulas as string. when i open the file in excel the excel will do the calculations?
Use OpenPyXL to load the excel worksheet instead of directly with pandas
from openpyxl import load_workbook
import pandas as pd
wb = load_workbook(filename = 'test.xlsx')
sheet_name = wb.get_sheet_names()[0]
ws = wb[sheet_name]
df = pd.DataFrame(ws.values)
import pandas as pd
import xlsxwriter
name = '123.xlsx'
writer = pd.ExcelWriter(name,engine='xlsxwriter')
pd.DataFrame({}).to_excel(writer,sheet_name='Sheet1')
workbook = writer.book
worksheet = writer.sheets['Sheet1']
worksheet.write('A1',1)
worksheet.write('A2','=A1')
writer.save()

How to read an excel file with multiple sheets using for loop in python

This is what i try
from pathlib import Path
loc = Path('D:\DataSciSpec\Practice\Forloopindict.xlsx')
dict = pd.read_excel(loc,sheetname = None)
for i in dict.keys():
print(i)
I get the name of sheets
Sheet4
Sheet3
Sheet2
Sheet1
I can also see the sheet content one by one
for i in dict.keys():
print(dict[i].head())
But how put this data in n data frames (equal to no of sheets)
and then append one to another
This will create a single dataframe (df_full) with the data from all sheets.
import pandas as pd
loc = r'D:\DataSciSpec\Practice\Forloopindict.xlsx'
workbook = pd.read_excel(loc,sheet_name = None)
df_full = pd.DataFrame()
for _, sheet in workbook.items():
df_full = df_full.append(sheet)
# Reset index or you'll have duplicates
df_full = df_full.reset_index(drop=True)

excel automation using python

I am trying to write a function to read data from excel file using python. My function should read rows from excel sheet one at a time. Below is my code which will print 1st row.
import xlrd
from xlrd import open_workbook, cellname
book = open_workbook('./Excel/Book1.xls')
def read_excel(sheetName):
sheet = book.sheet_by_name(sheetName)
row = sheet.nrows
for i in range(1):
rows = sheet.row_values(i+1)
print(rows)
file = r'd:\pythonTest.xlsx '
import xlrd
wb = xlrd.open_workbook(file)
sheet = wb.sheet_by_index(0)
# For row 0 and column 0
sheet.cell_value(0, 0)
# Extracting number of columns
print(sheet.ncols)
print(sheet.nrows)
print(sheet.row_values(1))
for i in range(sheet.nrows):
print(sheet.cell_value(i, 0), sheet.cell_value(i, 1))

Resources