How to read an excel file with multiple sheets using for loop in python - python-3.x
This is what i try
from pathlib import Path
loc = Path('D:\DataSciSpec\Practice\Forloopindict.xlsx')
dict = pd.read_excel(loc,sheetname = None)
for i in dict.keys():
print(i)
I get the name of sheets
Sheet4
Sheet3
Sheet2
Sheet1
I can also see the sheet content one by one
for i in dict.keys():
print(dict[i].head())
But how put this data in n data frames (equal to no of sheets)
and then append one to another
This will create a single dataframe (df_full) with the data from all sheets.
import pandas as pd
loc = r'D:\DataSciSpec\Practice\Forloopindict.xlsx'
workbook = pd.read_excel(loc,sheet_name = None)
df_full = pd.DataFrame()
for _, sheet in workbook.items():
df_full = df_full.append(sheet)
# Reset index or you'll have duplicates
df_full = df_full.reset_index(drop=True)
Related
Append values to Dataframe in loop and if conditions
Need help please. I have a dataframe that reads rows from Excel and appends to Dataframe if certain columns exist. I need to add an additional Dataframe if the columns don't exist in a sheet and append filename and sheetname and write all the file names and sheet names for those sheets to an excel file. Also I want the values to be unique. I tried adding to dfErrorList but it only showed the last sheetname and filename and repeated itself many times in the output excel file from xlsxwriter import Workbook import pandas as pd import openpyxl import glob import os path = 'filestoimport/*.xlsx' list_of_dfs = [] list_of_dferror = [] dfErrorList = pd.DataFrame() #create empty df for filepath in glob.glob(path): xl = pd.ExcelFile(filepath) # Define an empty list to store individual DataFrames for sheet_name in xl.sheet_names: df = pd.read_excel(filepath, sheet_name=sheet_name) df['sheetname'] = sheet_name file_name = os.path.basename(filepath) df['sourcefilename'] = file_name if "Project ID" in df.columns and "Status" in df.columns: print('') *else: dfErrorList['sheetname'] = df['sheetname'] # adds `sheet_name` into the column dfErrorList['sourcefilename'] = df['sourcefilename'] continue list_of_dferror.append((dfErrorList)) df['Status'].fillna('', inplace=True) df['Added by'].fillna('', inplace=True) list_of_dfs.append(df) # # Combine all DataFrames into one data = pd.concat(list_of_dfs, ignore_index=True) dataErrors = pd.concat(list_of_dferror, ignore_index=True) dataErrors.to_excel(r'error.xlsx', index=False) # data.to_excel("total_countries.xlsx", index=None)
In python, how to concatenate corresponding sheets in multiple excel files
How do I concatenate multiple xlsx files with the same sheet_names. For example, I have 3 xlsx files, Rob_schedule.xlsx, Mike_schdule.xlsx and Jerome_schedule.xlsx. Each file has the following sheet/tab names : home, office & school. The code below generates the 3 xlsx files ( you can copy + paste and run to generate the excel files) ##############################Generating the data for Rob_schedule.xlsx######################## import pandas as pd import numpy as np df= { 'Date':[10232020,10242020,10252020,10262020], 'Class':['AP_Bio','AP_Chem','Physics','History'], 'Period':[3,1,2,4]} school = pd.DataFrame(df,columns = ['Date','Class','Period']) school df2= { 'Date':[10232020,10242020,10252020,10262020], 'Meeting':['MQ1','MQ6','MQ2','MQ8'], 'Lunch':[1,1,1,3], 'code':['java','python','C','C++']} office = pd.DataFrame(df2,columns = ['Date','Meeting','Lunch','code']) office df3= { 'cooking':['C','B','D','B'], 'Laundry':['color','white','White','color'], 'cleaning':['balcony','garage','restroom','bathroom']} home = pd.DataFrame(df3,columns = ['cooking','Laundry','cleaning']) home import pandas as pd #initialze the excel writer writer = pd.ExcelWriter('Rob_schedule.xlsx', engine='xlsxwriter') #store your dataframes in a dict, where the key is the sheet name you want frames = {'home':home, 'office':office, 'school':school} #now loop thru and put each on a specific sheet for sheet, frame in frames.items(): frame.to_excel(writer, sheet_name = sheet,index = False) #critical last step writer.save() ################################ generating Mike_schedule.xlsx################################### import pandas as pd import numpy as np df= { 'Date':[10232020,10242020,10252020,10262020], 'Class':['AP_Bio','AP_Chem','Physics','History'], 'Period':[3,1,2,4]} school = pd.DataFrame(df,columns = ['Date','Class','Period']) school df2= { 'Date':[10232020,10242020,10252020,10262020], 'Meeting':['MQ1','MQ2','MQ4','MQ5'], 'Lunch':[1,1,1,3], 'code':['javascript','R','C','C++']} office = pd.DataFrame(df2,columns = ['Date','Meeting','Lunch','code']) office df3= { 'cooking':['A','B','D','B'], 'Laundry':['color','white','white','color'], 'cleaning':['patio','garage','living_room','bathroom']} home = pd.DataFrame(df3,columns = ['cooking','Laundry','cleaning']) home #initialze the excel writer writer = pd.ExcelWriter('Mike_schedule.xlsx', engine='xlsxwriter') #store your dataframes in a dict, where the key is the sheet name you want frames = {'home':home, 'office':office, 'school':school} #now loop thru and put each on a specific sheet for sheet, frame in frames.items(): # .use .items for python 3.X frame.to_excel(writer, sheet_name = sheet,index = False) #critical last step writer.save() ######################### Generate Jerome schedule########################################### df= { 'Date':[10232020,10242020,10252020,10262020], 'Class':['French','Math','Physics','History'], 'Period':[3,1,2,4]} school = pd.DataFrame(df,columns = ['Date','Class','Period']) school df2= { 'Date':[10232020,10242020,10252020,10262020], 'Meeting':['MQ1','MQ2','MQ4','MQ5'], 'Lunch':[1,1,1,3], 'code':['javascript','python','R','C++']} office = pd.DataFrame(df2,columns = ['Date','Meeting','Lunch','code']) office df3= { 'cooking':['X','B','D','C'], 'Laundry':['color','white','white','color'], 'cleaning':['patio','garage','living_room','bathroom']} home = pd.DataFrame(df3,columns = ['cooking','Laundry','cleaning']) home import pandas as pd #initialze the excel writer writer = pd.ExcelWriter('Jerome_schedule.xlsx', engine='xlsxwriter') #store your dataframes in a dict, where the key is the sheet name you want frames = {'home':home, 'office':office, 'school':school} #now loop thru and put each on a specific sheet for sheet, frame in frames.items(): # .use .items for python 3.X frame.to_excel(writer, sheet_name = sheet,index = False) #critical last step writer.save() I want to concatenate the corresponding sheets/tabs :home, office, and school for Rob_schedule.xlsx,Mike_schedule.xlsx & Jerome_schedule.xlsx export the concatenated dataframes as family_schedule.xlsx with home, office and school tabs My attempt: # This code concatenates all the tabs into one tab, but what I want is to concatenate all by their corresponding sheet/tab names import pandas as pd path = os.chdir(r'mypath\\') files = os.listdir(path) files # pull files with `.xlsx` extension excel_files = [file for file in files if '.xlsx' in file] excel_files def create_df_from_excel(file_name): file = pd.ExcelFile(file_name) names = file.sheet_names return pd.concat([file.parse(name) for name in names]) df = pd.concat( [create_df_from_excel(xl) for xl in excel_files] ) # save the data frame writer = pd.ExcelWriter('family_reschedule.xlsx') df.to_excel(writer, '') writer.save()
I would iterate over each file, and then over each worksheet, adding each sheet to a different list based on the sheet name. Then you'll have a structure like... { 'sheet1': [df_file1_sheet1, df_file2_sheet1, df_file3_sheet1], 'sheet2': [df_file1_sheet2, df_file2_sheet2, df_file3_sheet2], 'sheet3': [df_file1_sheet3, df_file2_sheet3, df_file3_sheet3], } Then concatenate each list in to a single dataframe, them write the three dataframes to an excel file. # This part is just your own code, I've added it here because you # couldn't figure out where `excel_files` came from ################################################################# import os import pandas as pd path = os.chdir(r'mypath\\') files = os.listdir(path) files # pull files with `.xlsx` extension excel_files = [file for file in files if '.xlsx' in file] excel_files # This part is my actual answer ############################### from collections import defaultdict worksheet_lists = defaultdict(list) for file_name in excel_files: workbook = pd.ExcelFile(file_name) for sheet_name in workbook.sheet_names: worksheet = workbook.parse(sheet_name) worksheet['source'] = file_name worksheet_lists[sheet_name].append(worksheet) worksheets = { sheet_name: pd.concat(sheet_list) for (sheet_name, sheet_list) in worksheet_lists.items() } writer = pd.ExcelWriter('family_reschedule.xlsx') for sheet_name, df in worksheets.items(): df.to_excel(writer, sheet_name=sheet_name, index=False) writer.save()
Consider building a list of concatenated data frames with list/dict comprehensions by running an outer iteration across sheet names and inner iteration across workbooks: import pandas as pd path = "/path/to/workbooks" workbooks = [f for f in os.listdir(path) if f.endswith(".xlsx")] sheets = ["home", "office", "school"] df_dicts = { sh: pd.concat( [pd.read_excel(os.path.join(path, wb), sheet_name=sh) for wb in workbooks] ) for sh in sheets } Then, export to single file: with pd.ExcelWriter('family_reschedule.xlsx') as writer: for sh, df in df_dict.items(): df.to_excel(writer, sheet_name=sh, index=False) writer.save()
Can we copy one column from excel and convert it to a list in Python?
I use df = pd.read_clipboard() list_name = df['column_name'].to_list() but this is a bit long method for me. I want to copy a column and convert in python and then apply some function so that the copied text is converted to a list.
this will read a excel column as list import xlrd book = xlrd.open_workbook('Myfile.xlsx') #path to your file sheet = book.sheet_by_name("Sheet1") #Sheet name def Readlist(Element, Column): for _ in range(1,sheet.nrows): Element.append(str(sheet.row_values(_)[Column])) pass column1 = [] # List name Readlist(column1, 1) # Column Number is 1 here pirnt(column1) Read a specified column as list use Readlist function, intialize [] variable before using that. Using Pandas: import pandas as pd df = pd.read_excel("path.xlsx", index_col=None, na_values=['NA'], usecols = "A") mylist = list(df[0]) print(mylist)
Python for the Comparison of excel column elements and print the matched elements in separate column
I have developed the following code and fetched the matched output using a for loop.I need to print these output elements in separate column using python. excel file name - Sample_data.xlsx first column - WBS_CODE second column - PROJECT_CODE first column and second column are matched and then printed in separate column (column F) using python code. Please find my below code, import pandas as pd A = pd.read_excel("D:\python_work\Sample_data.xlsx", sheet_name = '31Sep') code = A['WBS_CODE'].tolist() B = pd.read_excel("D:\python_work\Sample_data.xlsx", sheet_name = '4Dec') code1 = B['PROJECT_CODE'].tolist() for x in code1: if x in code: print(x) else: print("NA") output: NA NA NA APP-ACI-PJ-APAC-EMEA-ENG NA NA
I have found a way to export the output and print them in a separate column in excel sheet. Below is the solution, import pandas as pd from openpyxl import load_workbook # Reading the Excel file columns A = pd.read_excel("D:\python_work\Sample_data.xlsx", sheet_name='4Dec') code = A['PROJECT_CODE'].tolist() B = pd.read_excel("D:\python_work\Sample_data.xlsx", sheet_name='31Sep') code1 = B['WBS_CODE'].tolist() # Comparison of columns class test: def loop(self): result = [] for x in code1: if x in code: result.append(x) else: y = "NA" result.append(y) print(result) # Printing data into Excel try: book = load_workbook('D:\python_work\Aew1.xlsx') writer = pd.ExcelWriter('D:\python_work\Aew1.xlsx', engine='openpyxl') writer.book = book writer.sheets = dict((ws.title, ws) for ws in book.worksheets) # loading all the worksheets in opened Excel df = pd.DataFrame.from_dict({'Column1': result}) df.to_excel(writer, sheet_name='Sheet1', startcol=19) writer.save() except FileNotFoundError: print("File Not found: 'Check the Name or Existence of file specified/'") except PermissionError: print("File Opened/No-Access: Check whether you have access to file or file is opened") test().loop() steps that solved: 1. Appended the for loop output to a list 2. used openpyxl library to print the output to a column in excel worksheet. Thanks guyz for help and support. Have a good day
Appending data when writing from excel sheet to txt document
There is a text document called: file.txt with text already on it. When I go through the excel sheet and write to the text file it erases the original text on it. How do I only append the info from the excel sheet while keeping the original text info? CODE import xlwt import xlrd import csv workbook = xlrd.open_workbook('input.xls') sheet = workbook.sheet_by_index(2) data = [] data.append([sheet.cell_value(row, 0).strip() for row in range(sheet.nrows)]) data.append([sheet.cell_value(row, 1).strip() for row in range(sheet.nrows)]) workbook = xlwt.Workbook() sheet = workbook.add_sheet('test') for colidx, col in enumerate(data): for rowidx, row in enumerate(col): sheet.write(rowidx, colidx, row) transposed = zip(*data) with open('file.txt','wb') as fou: writer = csv.writer(fou) for row in transposed: writer.writerow(row)
If I understand correctly you want to pass ab as a flag to open(): transposed = zip(*data) # v with open('file.txt','ab') as fou: writer = csv.writer(fou) for row in transposed: writer.writerow(row)