call special function when DataFrame is called upon custom class - python-3.x

I have the following classes:
class Result(UserDict):
"""Implements a especial version of dictionary, that will return the keys
ordered in the initialized way"""
def __init__(self, keys_order, items):
super().__init__(self)
self.__keys_order = keys_order
self.data = items
def __repr__(self):
attributes = ["{}:{}".format(_stringify(k), _stringify(self.data[k])) for k in self.keys()]
return "{}".format(", ".join(attributes))
def keys(self):
return [key for key in self.__keys_order]
class Results(UserList):
"""Implements a especial kind of list, that has a method to_df"""
def to_df(self):
return pd.DataFrame(self.data, columns=self.data[0].keys())
In this way, when I print an instance of Result, it will show it with the keys in the desired way (determined by keys_order). Also, the class Results implements the method to_df, which returns a pandas DataFrame with the columns ordered by the keys.
I know for example that if I want the len(results) function to behave in a especial way, I have to implement the __len__ method in it, in a similar way, is it possible to implement a special method so when pd.DataFrame(results) is called upon a results instance it will call to_df method instead? so I have the columns ordered by the keys.

You could have your Result class inherit not just from UserDict but also from pd.DataFrame. Then you just have to define the _data attribute of your class to be what you want the class to hand to pd.DataFrame(), i.e. the pd.DataFrame that you want to be constructed.
class Result(UserDict, pd.DataFrame):
"""Implements a especial version of dictionary, that will return the keys
ordered in the initialized way"""
def __init__(self, keys_order, items):
super().__init__(self)
self.__keys_order = keys_order
self.data = items
self._data = pd.DataFrame(self.data, columns=self.data[0].keys())
This becomes evident when looking at the source code of the pd.DataFrame class:
def __init__(self, data=None, index=None, columns=None, dtype=None,
copy=False):
if data is None:
data = {}
if dtype is not None:
dtype = self._validate_dtype(dtype)
if isinstance(data, DataFrame):
data = data._data
upon calling the __init__ method, which is what you are effectively doing when you use pd.DataFrame(results), it will check if results is an instance of DataFrame. If it is, then it will just set data to be results._data. Alternatively your results class could also inherit from dict, in which case the dict constructor would be called inside the __init__:
elif isinstance(data, dict):
mgr = self._init_dict(data, index, columns, dtype=dtype)
Here the excerpt from self._init_dict that would be called in your case:
else:
keys = list(data.keys())
if not isinstance(data, OrderedDict):
keys = _try_sort(keys)
columns = data_names = Index(keys)
arrays = [data[k] for k in keys]
So you have to define a keys() method for your class that returns the keys (which you already have), as well as __getitem__, so that data[k] in the last line returns the values of column k.

Related

PYTHON: How to access class object's members if the object is a key in a dictionary

I have the following class:
class car_class(object):
def __init__(self, mileage=11, tyre_size=11):
self.mileage = mileage
self.tyre_size = tyre_size
self.default_val = ''
def __hash__(self):
return hash((self.mileage, self.tyre_size))
def __getitem__(self, default_val):
return self.default_val
def __setitem__(self, default_val, mileage, tyre_size):
self[default_val] = str(mileage) + '_' + str(tyre_size)
def __eq__(self, other):
return (self.mileage, self.tyre_size) == (other.mileage, other.tyre_size)
def __str__(self):
return ('dict_cl: (tyre_size=\'%d\', mileage=\'%d\'' % (int(self.tyre_size), int(self.mileage)))
def __repr__(self):
return ('dict_cl: (tyre_size=\'%d\', mileage=\'%d\'' % (int(self.tyre_size), int(self.mileage)))
def __ne__(self, other):
# Not strictly necessary, but to avoid having both x==y and x!=y
# True at the same time
return not(self == other)
I also have a dictionary which takes the object of this class as the Key against a value as follows-
my_dict = dict()
dict_value_list = list()
mm_dict_cl = car_class()
mm_dict_cl.mileage = 29
mm_dict_cl.tyre_size = 265
dict_value_list.extend(['car_color'])
my_dict.update(mm_dict_cl = dict_value_list)
So the dictionary(my_dict)key has the class object(mm_dict_cl) as key and car_color as a value for this key. The Key is the class object itself having two attributes mileage and tyre_size.
Now when I print the following value of the dictionary I get the value as car_color as expected -
`>>>` print(my_dict[next(iter(my_dict))])
['car_color']
However I'm struggling to find a way to retrieve the properties of the class object.
>>>` print(next(iter(my_dict)))
mm_dict_cl
It prints the class name and the key type if printed as string.
`>>>` print(type(next(iter(my_dict))))
<type 'str'>
Query: How can then I access the object attributes of the key?
I want to check what is the value of tyre_size and mileage for a particular 'car_color' using the key of the dictionary my_dict
Please help, a novice here, trying to learn this language.
-edit: Fixed the extend call to the list for adding 'car_colour' as a list instead of a string as pointed by SorousH Bakhtiary.
The problem here is that you stored the name of the instance of your class as key in your dictionary("it is string"), so you have no control over the instance but because your object is defined in global namespace, you can retrieve it like this : (I simplified your class for the moment)
class car_class:
def __init__(self, mileage=11, tyre_size=11):
self.mileage = mileage
self.tyre_size = tyre_size
self.default_val = ''
my_dict = {}
dict_value_list = []
mm_dict_cl = car_class()
dict_value_list.append('car_color')
print(dict_value_list)
mm_dict_cl.mileage = 'temp1'
mm_dict_cl.tyre_size = 'temp2'
my_dict.update(mm_dict_cl=dict_value_list)
# here :
obj = globals().get(next(iter(my_dict)))
print(obj.mileage) # prints temp1
print(obj.tyre_size) # prints temp2
btw, you should have used append instead of extend method to add 'car_color'

How to overload python sets to accept duplicates?

I had taken a python coding test, which asked to create a class that overloads the builtin set(all the methods of sets must work). The only change between the set I was asked to create and the builtin sets is that my custom sets SHOULD store duplicates, and 2 more custom methods.
Here is what I could come up with:
import builtins
class Multiset(builtins.set):
def __init__(self):
super().__init__()
self.my_set = builtins.set()
def add(self, val):
self.my_set.add(val)
def remove(self, val):
# removes one occurrence of val from the multiset, if any
self.my_set.discard(val)
def __contains__(self, val):
# returns True when val is in the multiset, else returns False
return val in self.my_set
def __len__(self):
# returns the number of elements in the multiset
return len(self.my_set)
I have tried overriding multiple methods, but to no avail. I also couldn't find a method that defined this non-duplicate criteria for sets. So, how do I do this?
EDIT 1:
Here is the problem description, if you want to see it.
You can use a dictionary which maps objects to a list of all objects that are equal to themselves. The advantage is that dict keys are already set-like.
from collections import defaultdict
class MultiSet:
def __init__(self):
self._items = defaultdict(list)
def add(self, item):
self._items[item].append(item)
def remove(self, item):
try:
self._items[item].remove(item)
except ValueError:
pass
def __contains__(self, item):
return item in self._items
def __len__(self):
return sum(len(v) for v in self._items.values())

How to properly pass self in stacked decortors on class methods in python?

First, I would like to have a class method, that is concerened only with manipulating a dataframe column. So that i can really focus on the manipulation itself rather than the background stuff. This background stuff is actually applying this simple function over specified columns (e.g. all numeric ones, stated explicitly by their column name)
To seperate this from the nasty bits, i tryed using decorators and actually succeded.
However the difficulty arose as i wanted to use a second decorator that is in fact a plotting method for each of those manipulated columns to keep track on the manipulations.
The code that you find below is a working version and simplified:
plotter is merely printing each columns name rather than actually plotting it
Note the commented self, that allows this code to work propperly. I dont understand why.
import pandas as pd
import numpy as np
class test_class:
def __init__(self):
self.df = pd.DataFrame(np.random.randint(0, 100, size=(100, 4)), columns=list('ABCD'))
def plotter(fn):
def printer(self, **kwargs):
print('printer call')
for col in kwargs['column']:
print(col)
return fn(self, **kwargs) # this self actually allows to applyer to referencne self, otherwise: applyer positional argument missing
return printer
def wrapapply(fn):
def applyer(self, **kwargs):
print('applyer call')
fnkwargs = {k: v for k, v in kwargs.items() if k != 'column'} # clean up the decorators arguments
self.df[kwargs['column']] = pd.DataFrame.apply(self.df[kwargs['column']], func=fn, axis=0, **fnkwargs)
return applyer
#plotter
#wrapapply
def norm(column):
return (column - np.mean(column)) / np.std(column)
if __name__ == '__main__':
a = test_class()
a.norm(column=['A', 'B'])
a.norm(column=['D'])
print(a.df)
The result i expect is
a silent inplace manipulation of all columns A, B, D in the
Dataframe,
each of the columnames of a call must be printed by a seperate
decorator function (as in my application, this is in fact a plotting
method)

Create a list that can be accessed from multiple classes

I'm trying to create a list that is populated in one class and read in another class. I have a number of things, right now I'm getting the error shown below. I show only one class where the list is read, but there will be others.
How do I do this?
ReadDatabase.py
class ReadDatabase(object):
f_type_list = ReadDatabase.getFTypes() ## 'ReadDatabase' not defined
#staticmethod
def getFTypes():
<reads from database>
return my_list
MyTreeView.py
from ReadDatabase import *
class MyTreeView (ttk.Treeview):
def __init__(self, frame=None, list=[], column_headers=[]):
for f_type in ReadDatabase.f_type_list:
<do stuff>
You can separate it into two different classes in two different ways.
Example 1: Using two classes, one class (A) creates & updates the list and the other class (B) creates an instance of A and controls it:
class A:
"""
Create and updates the list
"""
def __init__(self):
self.my_list = []
self._read_from_database()
def _read_from_database(self):
# some database update logic
self.my_list.append(3)
class B:
"""
Creates an instance of A and can read from it.
"""
def __init__(self):
self.a = A()
def print_list(self):
for index, element in enumerate(self.a.my_list):
print(f"Index: {index} Element: {element}")
b_object = B()
b_object.print_list() # Prints: Index: 0 Element: 3
or
Example 2: You can just create a method in class B and just pass it the lst from class A:
class A:
"""
Create and updates the list
"""
def __init__(self):
self.my_list = []
self._read_from_database()
def _read_from_database(self):
# some database update logic
self.my_list.append(3)
class B:
def __init__(self):
pass
def print_list(self, lst):
for index, element in enumerate(lst):
print(f"Index: {index} Element: {element}")
a_object = A()
b_object = B()
b_object.print_list(a_object.my_list)
You can also pass the entire instance of A to B for it to use if you wanted to do it that way.

Is there a way to create a mutable list in a class statement? (Python 3x)

So in creating a class, I noticed that I was unable to append any elements for my list in the class. Is there a way to append and arrange the order of elements in the list I create in the class statement?
class Foo():
def __init__(self, bar):
self.__bar = []
def input_method(self):
self.__bar.append()
def return_bar(self)
return self.__bar
candy = Foo()
Is there a way for me to append an element into self.__bar?
You need to actually append something to the list in your appending method:
class Foo():
def __init__(self):
self.__bar = []
def input_method(self, something):
self.__bar.append(something)
def return_bar(self):
return self.__bar
candy = Foo()
Now it seems good to me:
>>> candy.input_method('helloo')
>>> candy.return_bar()
['helloo']
Note that since you weren't using (or sending) the bar argument to the __init__ method, I omitted it from my answer (just warning you)!

Resources