Reading a CSV file with graphviz - python-3.x

The current project I have to do involves reading data from a CSV file and using graphviz to show a visual representation of the data. this is what the code looks like:
import graphviz
import pandas
import os
import math
def save_graph_as_jpg(graph, filename):
graph.save('temp.dot')
src = graphviz.Source.from_file('temp.dot')
src.render(filename, format="jpg")
os.remove(filename)
os.remove('temp.dot')
class Node:
def __init__(self, data, left = None, right = None):
self.left = left
self.right = right
self.data = data
df = pandas.read_csv('decisiontree.csv', index_col = "ID") # df is "data frame"
print(df.to_string())
print(df.info)
nodes = []
nodeMap = {None:None}
for index, row in df[::-1].iterrows():
row = df.index(int[index])
if isinstance(df.loc[row][3], float) and math.isnan(df.loc[row][3]):
df.loc[row][3] = None
if isinstance(df.loc[row][2], float) and math.isnan(df.loc[row][2]):
df.loc[row][2] = None
nodeMap[df.loc[row][0]] = Node(df.loc[row][1],nodeMap[df.loc[row][3]], nodeMap[df.loc[row][2]]), nodes.insert(0,df.loc[row][0])
graph = graphviz.Digraph('structs', filename='structs.gv', node_attr={'shape': 'plaintext', 'ordering':'out'})
for nodeID in nodes:
node = nodeMap[nodeID]
if node.left:
graph.edge(node.data, node.left.data)
if node.right:
graph.edge(node.data, node.right.data)
save_graph_as_jpg(graph, "Decisiontree")
When I run it using IDLE, it returns most of the code just fine, but it gets hung up on line 27:
row = df.index(int[index])
I get a traceback message saying the following:
Traceback (most recent call last):
File "C:\Users...... line 27, in <module>
row = df.index[index]
File "C:\Users......Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\pandas\core\indexes\base.py", line 5382, in __getitem__
result = getitem(key)
IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices
I changed it to:
row = df.index(int[index])
and now I get this as a traceback and index error:
Traceback (most recent call last):
File "C:\Users.......CTML AI\Week 3\Lab3.py", line 27, in <module>
row = df.index(int[index])
TypeError: 'type' object is not subscriptable

You are receiving that error because you tried to use square brackets with the int type, which would try to subscript int as if it were an array. That doesn't work because int is a type that can't be subscripted.[1][2] You probably want to use parentheses instead to cast the type of the index variable to integer. Try changing line 27 to
df.index(int(index))
However, it would work if you had an array named int, but naming your variables the same as builtin types or functions is probably not a good idea.
As of Python 3.9, some types can be subscripted in type hints, e.g. list[int].

Related

how to filter a particular column with python pandas?

I have an excel file where I have 2 columns: 'Name' and 'size'. The 'Name' column has multiple file types, namely ".apk, .dat, .vdex, .ttc" etc. But I only want to populate the files with the file extension ending with .apk. I do not want any other file type in the new excel file.
I have written the below code:
import pandas as pd
import json
def json_to_excel():
with open('installed-files.json') as jf:
data = json.load(jf)
df = pd.DataFrame(data)
new_df = df[df.columns.difference(['SHA256'])]
new_xl = new_df.to_excel('abc.xlsx')
return new_xl
def filter_apk(): `MODIFIED CODE`
old_xl = json_to_excel()
data = pd.read_excel(old_xl)
a = data[data["Name"].str.contains("\.apk")]
a.to_excel('zybg.xlsx')
Above program does following:
json_to_excel(), takes a Json file, converts it to a .xlsx format and save.
filter_apk() is suppose to create multiple excel file based on the file extension present in "Name" column.
1st function is doing what I intend to.
2nd function is not doing anything. Neither its throwing any error. I have followed this weblink
Below are the few samples of the "name" column
/system/product/<Path_to>/abc.apk
/system/fonts/wwwr.ttc
/system/framework/framework.jar
/system/<Path_to>/icu.dat
/system/<Path_to>/Normal.apk
/system/<Path_to>/Tv.apk
How to get that working? Or is there a better way to achieve the objective?
Please suggest.
ERROR
raise ValueError(msg)
ValueError: Invalid file path or buffer object type: <class 'NoneType'>
Note:
I have all the files at the same location.
modified code:
import pandas as pd
import json
def json_to_excel():
with open('installed-files.json') as jf:
data = json.load(jf)
df = pd.DataFrame(data)
new_df = df[df.columns.difference(['SHA256'])]
new_df.to_excel('abc.xlsx')
def filter_apk():
json_to_excel()
old_xl = pd.read_excel('abc.xlsx')
data = pd.read_excel(old_xl)
a = data[data["Name"].str.contains("\.apk")]
a.to_excel('zybg.xlsx')
t = filter_apk()
print(t)
New error:
Traceback (most recent call last):
File "C:/Users/amitesh.sahay/PycharmProjects/work_allocation/TASKS/Jenkins.py", line 89, in <module>
t = filter_apk()
File "C:/Users/amitesh.sahay/PycharmProjects/work_allocation/TASKS/Jenkins.py", line 84, in filter_apk
data = pd.read_excel(old_xl)
File "C:\Users\amitesh.sahay\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\util\_decorators.py", line 296, in wrapper
return func(*args, **kwargs)
File "C:\Users\amitesh.sahay\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\excel\_base.py", line 304, in read_excel
io = ExcelFile(io, engine=engine)
File "C:\Users\amitesh.sahay\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\excel\_base.py", line 867, in __init__
self._reader = self._engines[engine](self._io)
File "C:\Users\amitesh.sahay\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\excel\_xlrd.py", line 22, in __init__
super().__init__(filepath_or_buffer)
File "C:\Users\amitesh.sahay\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\excel\_base.py", line 344, in __init__
filepath_or_buffer, _, _, _ = get_filepath_or_buffer(filepath_or_buffer)
File "C:\Users\amitesh.sahay\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\common.py", line 243, in get_filepath_or_buffer
raise ValueError(msg)
ValueError: Invalid file path or buffer object type: <class 'pandas.core.frame.DataFrame'>
There is a difference between your use-case and use-case shown in the weblink. You want to apply a single filter (apk files), whereas the example you saw had multiple filters which were to be applied one after another (multiple species).
This will do the trick.
def filter_apk():
old_xl = json_to_excel()
data = pd.read_excel(old_xl)
a = data[data["Name"].str.contains("\.apk")]
a.to_excel("<path_to_new_excel>\\new_excel_name.xlsx")
Regarding your new updated question. I guess your first function is not working as you think it is working.
new_xl = new_df.to_excel('abc.xlsx')
This will write an excel file, as you are expecting it to do. Which works.
However, assigning it to new_xl, does not do anything since there is no return on pd.to_excel. So when you return new_xl as output of your json_to_excel function, you actually return None. Therefore in your second function, old_xl = json_to_excel() will make old_xl have the value None.
So, your functions should be something like this:
def json_to_excel():
with open('installed-files.json') as jf:
data = json.load(jf)
df = pd.DataFrame(data)
new_df = df[df.columns.difference(['SHA256'])]
new_df.to_excel('abc.xlsx')
def filter_apk():
json_to_excel()
data= pd.read_excel('abc.xlsx')
a = data[data["Name"].str.contains("\.apk")]
a.to_excel('zybg.xlsx')

Passing SAFEARRAY value to COM client

I am trying to Automate a Tool via win32com.client module which expects the Input to be in the following format,The format shown below if specific to MATLAB.
HRESULT StaticStokesParameters([in] SAFEARRAY(double) newVal)
I have no clue what does SAFEARRAY type represent. I have tried to create an 2D array in python, but i keep receiving the following error,
pywintypes.com_error: (-2147352571, 'Type mismatch.', None, 1)
I can read the values out without any problem, but when i assign the same value back as SET argument, then it fails to do so,
EngineMgr = win32com.client.Dispatch("EngineMgr")
Engine = EngineMgr.OpenEngine(0)
d_array = Engine.StaticStokesParameters
print(d_array)
**(-1.0, 0.0, 0.0) # Output of Print Statement**
Engine.StaticStokesParameters = d_array
**Traceback (most recent call last):
File "<pyshell#17>", line 1, in <module>
Engine.StaticStokesParameters = d_array
File "C:\Users\ashes\Anaconda3\lib\site-packages\win32com\client\dynamic.py", line 549, in __setattr__
self._oleobj_.Invoke(entry.dispid, 0, invoke_type, 0, value)
pywintypes.com_error: (-2147352571, 'Type mismatch.', None, 1)**
Got this working by importing below shown modules,
from win32com.client import VARIANT
import pythoncom
sop= Engine.StaticStokesParameters
Engine.StaticStokesParameters = VARIANT(pythoncom.VT_ARRAY | pythoncom.VT_R8,sop)

ValueError: Not enough values to unpack (expected 3, got 1) - tuple DOES contain 3 elements

I've created a class property to handle image data and then I'm trying to assign data to this property using a 3-element tuple. For some reason Python seems to think that my tuple contains just one element. Any ideas about what's going on here?
The property setter is defined as follows:
#data.setter
def data(self, *args):
image_array, dtype, sizes = args
if image_array is None:
self._data = np.empty(sizes, dtype)
else:
self._data = np.array(image_array, dtype)
self._set_color_data()
And upon execution, I get the following output:
test = (image_temp, np.uint8, sizes)
print(len(test))
>>> 3
self.image5d.data = test
Traceback (most recent call last):
File "C:\***\Python36\lib\tkinter\__init__.py", line 1699, in __call__
return self.func(*args)
File "c:***\mmCIAD\mmciad.py", line 88, in open_file
self.image5d.data = test
File "c:\***\mmCIAD\mmciad.py", line 172, in data
image_array, dtype, sizes = args
ValueError: not enough values to unpack (expected 3, got 1)
Any help will be much appreciated!
You should use:
def data(self, args):
Currently *args just packs the tuple in the first item so you get:
args = [(v1, v2, v3)]

Itertuples() in Tkinter function reveals an AttributeError: 'tuple' object has no attribute 'A'

Within a Tkinter function, I need to create the list named: 'value' extracting every 10 rows the value of dataframe column named: df['A'].
The following for-loop works perfectly out of a Tkinter function:
value = []; i = 0
for row in df.itertuples():
i = 1 + i
if i == 10:
value_app = row.A
value.append(value_app)
i=0
However within Tkinter function I have the following error:
Exception in Tkinter callback
Traceback (most recent call last):
File "/Users/anaconda/lib/python3.6/tkinter/__init__.py", line 1699, in __call__
return self.func(*args)
File "<ipython-input-1-38aed24ba6fc>", line 4174, in start
dfcx = self.mg(a,b,c,d,e)
File "<ipython-input-1-38aed24ba6fc>", line 4093, in mg
value_app = r.A
AttributeError: 'tuple' object has no attribute 'A'
A similar for-loop structure is running in another part of the same Tkinter function and is executed without errors.
If the column A is your first column you can do :
value_app = row[0]
I had the same problem and I think that it only sees it as regular arrays

Keep getting error `TypeError: 'float' object is not callable' when trying to run file using numpy library

I intend to perform a Newton Raphson iteration on some data I read in from a file. I use the following function in my python program.
def newton_raphson(r1, r2):
guess1 = 2 * numpy.log(2) / (numpy.pi() * (r1 + r2))
I call this function as so:
if answer == "f": # if data is in file
fileName = input("What is the name of the file you want to open?")
dataArray = extract_data_from_file(fileName)
resistivityArray = []
for i in range(0, len(dataArray[0])):
resistivity_point = newton_raphson(dataArray[0][i], dataArray[1][i])
resistivityArray += [resistivity_point]
On running the program and entering my file, this returns `TypeError: 'float' object is not callable'. Everything I've read online suggests this is due to missing an operator somewhere in my code, but I can't see where I have. Why do I keep getting this error and how do I avoid it?
numpy.pi is not a function, it is a constant:
>>> import numpy
>>> numpy.pi
3.141592653589793
Remove the () call from it:
def newton_raphson(r1, r2):
guess1 = 2 * numpy.log(2) / (numpy.pi * (r1 + r2))
as that is causing your error:
>>> numpy.pi()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: 'float' object is not callable

Resources