Python deleting row which not contains specific value?

Python deleting row which not contains specific value? - python-3.x

If the ninth column doesn't contains the name of city ISTANBUL, then I need to be able to delete
the entire row. i wrote the code it works but it doesn't delete row.
from openpyxl import Workbook, load_workbook
from openpyxl.utils import get_column_letter
wb = load_workbook('deneme.xlsx')
ws = wb.active
for row in range(1, 3313):
for col in [9]:
char = get_column_letter(col)
if ws[char + str(row)].value is not 'İSTANBUL':
ws.delete_rows(row)
wb.save('deneme.xlsx')

Here it is solution i just found.
import pandas as pd
df = pd.read_excel('staji.xlsx') #here you can put your file path
filt = (df['ŞEHİR'] == 'İSTANBUL') # put your condition ŞEHİR=column name.
df[filt].to_excel('den1.xlsx')

Related

Is there a method to change the number format of 100 excel files with 20 sheets each from 2 decimal to 6 decimal using Python?

import pandas as pd
import os
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
path=os.getcwd()
files=os.listdir(path)
for i in range(len(files)):
filename = files[i]
filepath=os.path.join(path,filename)
print(filepath)
df=pd.ExcelFile(filepath) # read sheet name
sheet = df.sheet_names
print(sheet)
df=pd.read_excel(filepath, sheet_name=sheet,skiprows = 5, nrows=15, usecols = 'E:L')
print(df)
when I click on the number it show 6 digits in the header but I want to change it into the excel also

import openpyxl
wb = openpyxl.load_workbook('ds.xlsx')
ws = wb.active
for row in ws.iter_rows():
for cell in row:
# only relevant column and without header
if cell.column_letter == 'A' and cell.row > 1: # put the cell number from where to where you want to check
ws[cell.coordinate].number_format = '0.0000000' # it will change the number format
wb.save('ds2.xlsx')

How to read visible-only cells using Python?

I want to read and merge only visible cells in Excel, but I failed.
Also, I've tried openpyxl but didn't work. (Find my second code)
Is there any other ways to read only visible cells and paste on new excel?
I want to read only visible cells because sometimes they need to be filtered or hidden.
Please kindly advise me.
What kind of module should I put in?
If every excel module cannot do that, please also let me know.
My current code:
import os
import glob
import xlwings as xw
import xlrd
import xlsxwriter
xw.App().visible = False
path = os.getcwd()
x=input('name：') + '.xlsx'
target_xls = os.path.join(path,x)
data = []
for file in glob.glob(path+'\*.*'):
if file.endswith((".xls", ".xlsm", ".xlsx")):
wb = xlrd.open_workbook(file)
for sheet in wb.sheets():
for rownum in range(sheet.nrows):
data.append(sheet.row_values(rownum))
workbook = xlsxwriter.Workbook(target_xls)
worksheet = workbook.add_worksheet()
for i in range(len(data)):
print(range(len(data)))
for j in range(len(data[i])):
worksheet.write(i, j, data[i][j])
workbook.close()
My openpyxl code:
import os
import glob
import xlwings as xw
import xlrd
import xlsxwriter
from openpyxl import load_workbook
xw.App().visible = False
path = os.getcwd()
x = input('name：') + '.xlsx'
target_xls = os.path.join(path, x)
data = []
wb = load_workbook('sample.xlsx')
ws = wb['Sheet1']
for row in ws:
if ws.row_dimensions[row[0].row].hidden == False:
for cell in row:
data.append(cell.value)
workbook = xlsxwriter.Workbook(target_xls)
worksheet = workbook.add_worksheet()
for i in range(len(data)):
print(range(len(data)))
for j in range(len(data[i])):
worksheet.write(i, j, data[i][j])
workbook.close()
I want to read excel like below:
And output into:

#Reference: xlrd manual: https://media.readthedocs.org/pdf/xlrd/latest/xlrd.pdf
#Python Forum Reference: https://python-forum.io/Thread-Identify-Hidden-rows-in-xls
import xlrd
print("Read the VALUE and ROW VISIBILITY from cells A1:A6 in a .xls file from 'Sheet2'.")
print()
######################################################
# Access .xls file (Excel 2003 and before)
excel_filename = "HiddenRow3OnSheet2.xls"
# Open the workbook
#NOTE: Traceback error if 'formatting_info=True' is NOT INCLUDED
xl_workbook = xlrd.open_workbook(excel_filename, formatting_info=True)
#Set the focus on 'Sheet2'
my_sheet_name = "Sheet2"
xl_sheet = xl_workbook.sheet_by_name(my_sheet_name)
print("File: {}".format(excel_filename))
for irow in range(xl_sheet.nrows):
ihidden = xl_sheet.rowinfo_map[irow].hidden #Row Visibility 0=Visible 1=Hidden
if ihidden == True:
shidden = "VISIBLE"
else:
shidden = "HIDDEN"
svalue = xl_sheet.cell(irow,0).value
print("Value: {} Row Visibility: {}".format(svalue, shidden))
######################################################
# Access .xlsx file (Excel 2007 and later)
excel_filename = "HiddenRow3OnSheet2.xlsx"
# Open the workbook
#NOTE: 'formatting_info=True' is NOT SUPPORTED for .xlsx files
xl_workbook = xlrd.open_workbook(excel_filename)
#Set the focus on 'Sheet2'
my_sheet_name = "Sheet2"
xl_sheet = xl_workbook.sheet_by_name(my_sheet_name)
print()
print("File: {}".format(excel_filename))
for irow in range(xl_sheet.nrows):
svalue = xl_sheet.cell(irow,0).value
print("Value: {} Row Visibility: {}".format(svalue, "Not Available for .xlsx files"))

how to read xlsx as pandas dataframe with formulas as strings

I have a excel file with some calculated columns.
for example, I have some data in columns 'a' and column 'b' is calculated using values in column 'a'.
i need to append new data to column 'a' and calculate column 'b' and save the file.
import pandas as pd
df = pd.DataFrame({'a':[1,2,3],'b':["=a2","=a3","=a4"]})
df.to_excel('test.xlsx',index=False)
when i try to read the file using pandas read excel it reads the column 'b' as NaN.
df = pd.read_excel(r'test.xlsx')
how do i achieve this. may be if i can read the file as string and append the formulas as string. when i open the file in excel the excel will do the calculations?

Use OpenPyXL to load the excel worksheet instead of directly with pandas
from openpyxl import load_workbook
import pandas as pd
wb = load_workbook(filename = 'test.xlsx')
sheet_name = wb.get_sheet_names()[0]
ws = wb[sheet_name]
df = pd.DataFrame(ws.values)

import pandas as pd
import xlsxwriter
name = '123.xlsx'
writer = pd.ExcelWriter(name,engine='xlsxwriter')
pd.DataFrame({}).to_excel(writer,sheet_name='Sheet1')
workbook = writer.book
worksheet = writer.sheets['Sheet1']
worksheet.write('A1',1)
worksheet.write('A2','=A1')
writer.save()

How to read an excel file with multiple sheets using for loop in python

This is what i try
from pathlib import Path
loc = Path('D:\DataSciSpec\Practice\Forloopindict.xlsx')
dict = pd.read_excel(loc,sheetname = None)
for i in dict.keys():
print(i)
I get the name of sheets
Sheet4
Sheet3
Sheet2
Sheet1
I can also see the sheet content one by one
for i in dict.keys():
print(dict[i].head())
But how put this data in n data frames (equal to no of sheets)
and then append one to another

This will create a single dataframe (df_full) with the data from all sheets.
import pandas as pd
loc = r'D:\DataSciSpec\Practice\Forloopindict.xlsx'
workbook = pd.read_excel(loc,sheet_name = None)
df_full = pd.DataFrame()
for _, sheet in workbook.items():
df_full = df_full.append(sheet)
# Reset index or you'll have duplicates
df_full = df_full.reset_index(drop=True)

excel automation using python

I am trying to write a function to read data from excel file using python. My function should read rows from excel sheet one at a time. Below is my code which will print 1st row.
import xlrd
from xlrd import open_workbook, cellname
book = open_workbook('./Excel/Book1.xls')
def read_excel(sheetName):
sheet = book.sheet_by_name(sheetName)
row = sheet.nrows
for i in range(1):
rows = sheet.row_values(i+1)
print(rows)

file = r'd:\pythonTest.xlsx '
import xlrd
wb = xlrd.open_workbook(file)
sheet = wb.sheet_by_index(0)
# For row 0 and column 0
sheet.cell_value(0, 0)
# Extracting number of columns
print(sheet.ncols)
print(sheet.nrows)
print(sheet.row_values(1))
for i in range(sheet.nrows):
print(sheet.cell_value(i, 0), sheet.cell_value(i, 1))

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Python deleting row which not contains specific value? - python-3.x

Here it is solution i just found. import pandas as pd df = pd.read_excel('staji.xlsx') #here you can put your file path filt = (df['ŞEHİR'] == 'İSTANBUL') # put your condition ŞEHİR=column name. df[filt].to_excel('den1.xlsx')

Related

Is there a method to change the number format of 100 excel files with 20 sheets each from 2 decimal to 6 decimal using Python?

How to read visible-only cells using Python?

how to read xlsx as pandas dataframe with formulas as strings

How to read an excel file with multiple sheets using for loop in python

excel automation using python

Categories

Resources