Broadly speaking I'm trying to take an excel file that contains our inventory and create different dictionaries for each location containing the {hostename : ip_address} for each switch in that location.
Ideally I'd like it to work something along the lines of
for row in range(1, worksheet.max_row + 1):
if cell.column == existing_dictionary_name
continue
else
cell.column = {}
key = worksheet.cell(row, #).value
value = worksheet.cell(row, #).value
cell.column[key] = value
I tried the following
from openpyxl import Workbook
from openpyxl import load_workbook
workbook = load_workbook(filename="Test db.xlsx")
worksheet = workbook.active
host_ip = {}
for row in range(1, worksheet.max_row + 1):
key = worksheet.cell(row, 4).value
value = worksheet.cell(row, 2).value
dict_name = worksheet.cell(row, 6).value
dict_name[key] = value
print(a)
print(b)
Print(c)
but I just get
Traceback (most recent call last):
File "main.py", line 20, in <module>
dict_name[key] = value
TypeError: 'str' object does not support item assignment
It's not easy creating a variable name with a name from input, but if you can settle for a dict of dicts, then it becomes a lot easier.
This uses a defaultdict defaulting to dict itself as the overall holder.
The iterates through the rows with iter_rows, skipping the header row and returning values only. Every iteration updates the dict for the row's location with the switch and ip values.
With this input:
switch
ip
location
switch1
192.168.1.1
a
switch2
192.168.1.2
a
switch3
192.168.1.3
b
switch4
192.168.1.4
c
switch5
192.168.1.5
b
from openpyxl import load_workbook
from collections import defaultdict
net_locations = defaultdict(dict)
wb = load_workbook("/tmp/75072112.xlsx")
ws = wb.active
for row in ws.iter_rows(min_row=2, values_only=True):
net_locations[row[2]].update({row[0]: row[1]})
net_locations
# defaultdict(dict,
# {'a': {'switch1': '192.168.1.1', 'switch2': '192.168.1.2'},
# 'b': {'switch3': '192.168.1.3', 'switch5': '192.168.1.5'},
# 'c': {'switch4': '192.168.1.4'}})
I'm using pd.ExcelFile as below to open and parse a file, but currently only with the actual folder path and filename in one string.
wb = pd.ExcelFile(folder_path+filename)
I want to put this into a function, that asks the user to give a path and filename and deals with invalid input. I started something like the below, but it doesn't seem like the error is being generated inside the function anyway, and i'm not sure how to say 'while wb isn't a valid thing' to continue to prompt for a filepath until we get a valid one?
def Load_Parse():
folder_path = input('\nEnter the path to the qry_T spreadsheet here (include slashes at the start and at the end): ')
filename = input('\nEnter the name of the spreadsheet to be used here: ')
sheetname = input('\nEnter the sheet containing the data here, including the extension (e.g. "qry_Trajectory 2019.xlsx": ')
try:
wb = pd.ExcelFile(folder_path+filename)
except FileNotFoundError:
Any ideas?
I'll then parse the file using a similar method i hope:
df = wb.parse('filename')
using Pathlib, os and pandas and a few functions.
one of the key functions you'll need is the while True which keeps executing a block of code until it's true and you initiate a break
feel free to edit to your own spec.
Modules
from pathlib import Path
import os
import pandas as pd
from xlrd import XLRDError
In Action
df = load_parser()
out:
#Hello Umar.Hussain please enter a valid target directory
#C:\Users\UmarH\Files
#1 excels_0
#2 excels_1
#Choose a number between 1 and 2
1
#Your Choice is excels_0.xlsx
#Choose a Sheet - Lists all sheets
'Sheet1'
# returns dataframe
Main Function
def load_parser():
user = os.getlogin()
print(f"Hello {user} please enter a valid target directory")
cmd = input('')
p = file_tester(cmd,file_type='path')
print("Please select a number from the following file")
target_file = create_excel_dict(p)
target_df = enumerate_sheets(target_file)
return target_df
Helper Functions
def file_tester(string_path, file_type="path"):
path = Path(string_path)
while True:
if path.is_dir():
break
else:
cmd = input(f"Please Enter a Valid {file_type}")
path = Path(cmd)
return path
def create_excel_dict(target_path):
xlsx_dict = {i: x for i, x in enumerate(target_path.glob('*.xlsx'), 1)}
for k,v in xlsx_dict.items():
print(k,v.stem)
rng = [i for i in xlsx_dict.keys()]
file_choice = input(f'Choose a number between {rng[0]} and {rng[-1]}')
while True:
try:
file_choice = int(file_choice)
print(f"Your Choice is {xlsx_dict[file_choice]}")
break
except KeyError:
file_choice = input(f'Choose a number between {rng[0]} and {rng[-1]}')
return xlsx_dict[file_choice]
def enumerate_sheets(target_file):
xl = pd.ExcelFile(target_file)
for sheet in xl.sheet_names:
print(sheet)
target_sheet = input("Please Type Your sheet name")
while True:
try:
df = pd.read_excel(xl,sheet_name=target_sheet)
break
except XLRDError:
target_sheet = input("Please enter a sheet from above.")
return df
i want my program to print the five first characters when he recognize a string, made of the addition of two columns (from a dataframe made with pandas), in some lines of a .txt, but as it is said in the title, it gives me this error when i run the code. Here is the code (the important lines are in the end of the code, i just put everything if you want to see the whole code).
import pandas as pd
import re
import numpy as np
link = "excelfilett.txt"
file = open(link, "r")
frames_load = []
is_count_frames_load = False
for line in file:
if "[Interface1]" in line:
is_count_frames_load = True
if is_count_frames_load== True:
frames_load.append(line)
if "[EthernetComNeed]" in line:
break
number_of_rows_load = len(frames_load) -1
header_load = re.split(r'\t', frames_load[0])
number_of_columns_load = len(header_load)
frame_array_load = np.full((number_of_rows_load, number_of_columns_load), 0)
df_frame_array_load = pd.DataFrame(frame_array_load)
df_frame_array_load.columns= header_load
for row in range(number_of_rows_load):
frame_row_load = re.split(r'\t', frames_load[row])
for position in range(len(frame_row_load))
df_frame_array_load["[Name]"] = df_frame_array_load["[End1]"] + " " + df_frame_array_load["[End2]"]
link = "excelfilett.txt"
file = open(link, "r")
frames_path = []
is_count_frames_path = False
for line in file:
if "[Routing Paths]" in line:
is_count_frames_path = True
if is_count_frames_path== True:
for row in df_frame_array_load["[Name]"].rows:
if row in line:
print(line[0:4])
if "[EthernetComConfig]" in line:
break
It gives me the AttributeError on "for row in df_frame_array_load["[Name]"].rows:" and it shoudln't be a version error, what is the problem then? I don't understand.
for row in df_frame_array_load["[Name]"].rows:
because pandas Series object does not have a "rows" attribute, as you for perform a perform a loop operation in a Series you are iterating over it.
should be changed to just:
for row in df_frame_array_load["[Name]"]:
...
Unable to figure out why am I getting the output as "No" only in the below code.
Shouldn't it print "Yes" for those 2 set values
import re
import subprocess
from plumbum import local, cmd
s = subprocess.check_output(["opatch", "lsinventory"])
output = s.decode("utf-8")
patches = [27923320, 27547329, 21171382, 21463894, 18961555, 28432129]
patches_found = set(re.findall(r'\b(?:%s)\b' % '|'.join(map(str, patches)), output))
patches_missing = set(map(str, patches)) - patches_found
for item in patches_missing:
if item in ["27923320", "27547329"]:
print("Yes", item)
else:
print("No")
The items 27923320 and 27547329 in the list patches are integers, while "27923320" and "27547329" are strings. This is what you want:
for item in patches_missing:
if item in [27923320, 27547329]:
print("Yes", item)
else:
print("No")
I've seen a few answers around to this question but none of them are working.
eg: How to write to an existing excel file without breaking formulas with openpyxl?
Docs give nothing away it seems:
http://openpyxl.readthedocs.io/en/latest/api/openpyxl.reader.excel.html
I tried replacing xls.load_workbook with xls.reader.excel.load_workbook but it doesn't change anything.
My current code overwrites the data in the data sheet, but kills the pivot table functionality in the other sheet (the sheet is still there but only with values). Any idea how to keep the pivot table?
import pandas as pd
import openpyxl as xls
from shutil import copyfile
template_file = 'openpy_test.xlsx'
output_file = 'openpy_output.xlsx'
copyfile(template_file, output_file)
book = xls.load_workbook(output_file,guess_types=False,data_only=False)
writer = pd.ExcelWriter(output_file,engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer,sheet_name='data',index=False,encoding='utf8')
writer.save()
I have also tried book.save('dummycopy.xlsx'), which also saves with a non-funcitoning pivot table. So I am sure the problem is related to the load_workbook function.
Package versions:
openpyxl 2.4.10 py36_0
pandas 0.20.3 py36hce827b7_2
i don't think openpyxl supports excel pivot tables currently. I had to switch to using win32com library.
here is a wrapper module i wrote to do specific stuff with pivot tables; it's basically VBA translated to python (record macros and read the VBA, it'll make sense). hope it helps. it's still a work in progress but should be enough for you to work with.
import os, datetime
import win32com.client as win32
win32c = win32.constants
import sys, datetime
letters = ' ABCDEFGHIJKLMNOPQRSTUVWXYZ' #space to compensate for index. if letter is a if column is 1
def Pull_excel_workbook(path = '', filename = '', visible = False):
'''function to run excel on the given filename'''
if path == '': path = os.getcwd()
if filename == '': raise FileNotFoundError('Please supply a file')
excel = win32.gencache.EnsureDispatch('Excel.Application')
excel.Visible = visible
try: wb = excel.Workbooks.Open(path + filename)
except: print('Try again\n{}'.format(sys.exc_info()))
ws = wb.ActiveSheet
data = list(ws.UsedRange.Value) #2d list of rows and columns
src = '{}!R1C1:R{}C{}'.format(ws.Name, len(data), len(data[0]))
return excel, wb, src
#wb.SaveAs(path + filename)
def Create_pivottable(wb, src, table_name = 'Pivot'):
'''creates Pivot Table object in the wb in a new Pivot worksheet'''
ws = wb.Sheets.Add() #should also change wb.ActiveSheet to the new one.
ws.Name = table_name
tname = ws.Name
starting_point = (4,1) #row, column
pc = wb.PivotCaches().Add(SourceType = win32c.xlDatabase,
SourceData = src)
try:
pt = pc.CreatePivotTable(TableDestination = '{}!R{}C{}'.format(tname, starting_point[0], starting_point[1]),
TableName = table_name,
DefaultVersion = win32c.xlPivotTableVersion10 #15
)
except: #not sure if will work...
print('{}:{}:{}:{}'.format(wb, src, table_name, '{}!R{}C{}'.format(tname, starting_point[0], starting_point[1])))
#tabledestination format of RN Pivot!R4C1 is not correct format, should be 'RN Pivot'!R4C1
pt = pc.CreatePivotTable(TableDestination = '{}!R{}C{}'.format(tname, starting_point[0], starting_point[1]),
TableName = table_name,
DefaultVersion = win32c.xlPivotTableVersion15
)
wb.Sheets(ws.Name).Select()
wb.Sheets(ws.Name).Cells(3,1).Select()
def Add_to_Filter(wb, tname, field_name):
''' '''
field = wb.ActiveSheet.PivotTables(tname).PivotFields(field_name)
field.Orientation = win32c.xlPageField
field.Position = 1
def Add_to_Row(wb, tname, field_name, position = 1):
''' '''
field = wb.ActiveSheet.PivotTables(tname).PivotFields(field_name)
field.Orientation = win32c.xlRowField
field.Position = position
def Add_to_Column(wb, tname, field_name, position = 1):
''' '''
field = wb.ActiveSheet.PivotTables(tname).PivotFields(field_name)
field.Orientation = win32c.xlColumnField
field.Position = position
if position > 1:
text = 'maybe do something here....'
pass
def Add_to_Value(wb, tname, field_name, alias = '', calculation = 'xlSum'):
''' '''
if type(calculation) is str and calculation in win32c.__dict__['__dicts__'][0]:
calculation = win32c.__dict__['__dicts__'][0][calculation]
datafield = wb.ActiveSheet.PivotTables(tname).PivotFields(field_name)
wb.ActiveSheet.PivotTables(tname).AddDataField(datafield, alias, calculation)
def LtoC(letter):
global letters
col = letters.index(letter)
return col
def CtoL(col):
global letters
letter = letters[col]
return letter
def Format_pretty(wb, tname, row_to_colapse):
'''makes it look prettier'''
wb.ActiveSheet.PivotTables(tname).TableStyle2 = 'PivotStyleMedium9'
if type(row_to_colapse) is not str:
for row in row_to_colapse:
wb.ActiveSheet.PivotTables(tname).PivotFields(row).ShowDetail = False #collapses
wb.ActiveSheet.PivotTables(tname).PivotFields(row).RepeatLabels = True #repeats labels
else:
wb.ActiveSheet.PivotTables(tname).PivotFields(row_to_colapse).ShowDetail = False #collapses
wb.ActiveSheet.PivotTables(tname).PivotFields(row_to_colapse).RepeatLabels = True #repeats labels
wb.ActiveSheet.Columns('A:Z').EntireColumn.AutoFit()
wb.ActiveSheet.Range('A1').Select()
def Add_calcd_col(ws, col, row_start, row_end, formula, style = '', col_title = 'default'):
'''col and rows should be int
'''
letter = CtoL(col)
ws.Range('{0}{1}:{0}{2}'.format(letter, row_start, row_end)).Select()
ws.Cells(row_start, col).Value = col_title
for row in range(row_start + 1, row_end + 1):
ws.Cells(row, col).Value = formula.format(row)
ws.Range('{0}{1}:{0}{2}'.format(letter, row_start, row_end)).Style = style
#print("ws.Range('{0}1:{0}200'.format({0})).Style = style".format(letter))
#ws.Range('{0}1:{0}200'.format(letter)).Style = style
def Values_to_columns(wb,tname, position = 2):
''' '''
wb.ActiveSheet.PivotTables(tname).DataPivotField.Orientation = win32c.xlColumnField
wb.ActiveSheet.PivotTables(tname).DataPivotField.Position = position
def WB_save(wb, path, tname, filename):
'''clean save of the new file '''
#Format_pretty(wb, tname, 'Division') #that needs to be fixed....
new_filename = filename[:-5] + '-{}.xlsx'.format(datetime.date.today().strftime('%m.%d.%y'))
wb.SaveAs(path + new_filename)
def Pivot_refresh(path, filename, pivot_sheet_name, pivot_table_name = 'Pivot'):
'''function to refresh the pivot table
tested and functional with recruiting prod report'''
excel, wb, src = Pull_excel_workbook(path = path, filename = filename)
wb.Sheets(pivot_sheet_name).Select()
cell = 'A6' #need a better way for this
excel.Worksheets(pivot_sheet_name).Range(cell).PivotTable.RefreshTable()
#pvt = excel.Worksheets(pivot_sheet_name).Range(cell).PivotTable
#pvt.RefreshTable()
WB_save(wb, path, pivot_table_name, filename)
#pivot refresh
#new = filename[:-5] + '-{}.xlsx'.format(2)
#Pivot_refresh(path = path, filename = new, pivot_sheet_name = 'Pivot')
def Hide_columns(wb, tname, start, end):
'''Hides columns'''
if type(start) is not str: start = CtoL(start)
if type(end) is not str: end = CtoL(end)
wb.ActiveSheet.Columns('{}:{}'.format(start, end)).EntireColumn.Hidden = True