multiple nested functions output - python-3.x

I'm trying to get the result of multiple functions as nested functions from a dataframe
For example, 2 functions:
def carr(df):
df['carr'] = df[['end_value_carr','arr']].max(axis=1)
return df
def date(df):
df['date_id'] = pd.to_datetime(df['date_id']).dt.date
df['renewal_date'] = pd.to_datetime(df['renewal_date']).dt.date
df['next_renewal_date'] = pd.to_datetime(df['next_renewal_date']).dt.date
return df
When I use each one separately I get the right output
However, trying to have them nested in one function gives me a NoneType:
def cleanup(data):
df = data.copy()
def carr(df):
df['carr'] = df[['end_value_carr','arr']].max(axis=1)
return df
def date(df):
df['date_id'] = pd.to_datetime(df['date_id']).dt.date
df['renewal_date'] = pd.to_datetime(df['renewal_date']).dt.date
df['next_renewal_date'] = pd.to_datetime(df['next_renewal_date']).dt.date
return df
return df
Appreciate your help!
Thanks

Define all three functions separately
def carr(df):
df['carr'] = df[['end_value_carr','arr']].max(axis=1)
return df
def date(df):
df['date_id'] = pd.to_datetime(df['date_id']).dt.date
df['renewal_date'] = pd.to_datetime(df['renewal_date']).dt.date
df['next_renewal_date'] = pd.to_datetime(df['next_renewal_date']).dt.date
return df
Call the first two functions in your third one.
def cleanup(data):
df = data.copy()
df = carr(df)
df = date(df)
return df
Then you can call your cleanup function, which will call carr and date on its own.
df = cleanup(df)

Related

Passing DF from function

i wrote a function which build a df inside it and i want to use it afterwards outside the function or in another function, how can i do it witout facing any recognition problem?
Thankw's a lot :)
The code:
def DisplayDataFrame():
file_path = filedialog.askopenfilename()
df1 = pd.read_excel(file_path)
cols = list(df1.columns)
tree = ttk.Treeview(root)
tree.pack()
tree["columns"] = cols
for i in cols:
tree.column(i, anchor="w")
tree.heading(i, text=i, anchor='w')
for index, row in df1.iterrows():
tree.insert("", 0, text=index, values=list(row))
option = df1.index()
Do you mean use df1 from your DisplayDataFrame() in other functions? If so, you can have return df1 in your function like this:
def DisplayDataFrame():
'''
your original codes to define df1
'''
return df1
dataframe = DisplayDataFrame()
Then you can reuse the dataframe in other functions.

How to create a DataFrame from a list that each column is created by a regex expression

I have a list as such:
lst = ['2021_01_21__11_10_54_1__13928_snapshot.jpg',
'2021_01_21__12_27_44_1__13934_snapshot.jpg',
'2021_01_21__11_11_08_2__13928_snapshot.jpg',
'2021_01_21__12_27_56_2__13934_snapshot.jpg',
'2021_01_21__11_11_19_3__13928_snapshot.jpg',
'2021_01_21__12_28_08_3__13934_snapshot.jpg']
I want to create a DataFrame so that each column will be represented by:
def by_number(path):
base_name = os.path.basename(path)
return re.findall('[\_]{2}(\d{5})',lst)
And the rows will be represented by:
def by_index(path):
base_name = os.path.basename(path)
return re.findall('\_(\d)[\_]{2}',lst)
So eventually I'll get a DataFrame that looks something like this:
name_list = ['2021_01_21__11_10_54_1__13928_snapshot.jpg',
'2021_01_21__12_27_44_1__13934_snapshot.jpg',
'2021_01_21__11_11_08_2__13928_snapshot.jpg',
'2021_01_21__12_27_56_2__13934_snapshot.jpg',
'2021_01_21__11_11_19_3__13928_snapshot.jpg',
'2021_01_21__12_28_08_3__13934_snapshot.jpg']
import re
import pandas as pd
df = pd.DataFrame([[0]], columns=['count']) # initialize dataframe
for name in name_list:
count = re.search('\_(\d)[\_]{2}',name).group(1)
col = re.search('[\_]{2}(\d{5})',name).group(1)
if ((df['count'] == count)).any():
df.loc[df['count'] == count, col] = name
else:
new_row = pd.DataFrame([[count,name]], columns=['count',col])
df = df.append(new_row)
df.set_index('count', inplace=True)
print(df)

How to apply a function fastly on the list of DataFrame in Python?

I have a list of DataFrames with equal length of columns and rows but different values, such as
data = [df1, df2,df3.... dfn] .
How can I apply a function function on each dataframe in the list data? I used following code but it doe not work
data = [df1, def2,df3.... dfn]
def maxloc(data):
data['loc_max'] = np.zeros(len(data))
for i in range(1,len(data)-1): #from the second value on
if data['q_value'][i] >= data['q_value'][i-1] and data['q_value'][i] >= data['q_value'][i+1]:
data['loc_max'][i] = 1
return data
df_list = [df.pipe(maxloc) for df in data]
Seems to me the problem is in your maxloc() function as this code works.
I added also the maximum value in the return of maxloc.
from random import randrange
import pandas as pd
def maxloc(data_frame):
max_index = data_frame['Value'].idxmax(0)
maximum = data_frame['Value'][max_index]
return max_index, maximum
# create test list of data-frames
data = []
for i in range(5):
temp = []
for j in range(10):
temp.append(randrange(100))
df = pd.DataFrame({'Value': temp}, index=(range(10)))
data.append(df)
df_list = [df.pipe(maxloc) for df in data]
for i, (index, value) in enumerate(df_list):
print(f"Data-frame {i:02d}: maximum = {value} at position {index}")

How to pass a Pandas dataframe as argument to function through apply

I have a custom function as below to do something.
def f(x):
x['A'] = '123'
return x
df = df.groupby(level=0).apply(f)
Now, I would like to change the function as
def f(x):
x['A'] = '123'
df2['name'] = 'ABC'
return x
How to pass the dataframe df2 as an argument to apply?
Does it work? df = df.groupby(level=0).apply(f, args = df2)
df = df.groupby(level=0).apply(f, args = df2) - this will give an error ""TypeError: f() got an unexpected keyword argument 'args'
correct solution: remove args and pass like this, it solves the error.
df = df.groupby(level=0).apply(f, df2)

In python pandas why can't I add more levels to a MultiIndex?

I would like to create DataFrames that have three levels. Why does the following function not work twice?
def superGroup(dataframe=None,multi_index_name=None):
out_dataframe = pd.DataFrame(dataframe.values,index=dataframe.index,columns=pd.MultiIndex.from_product([[multi_index_name],dataframe.columns]))
return out_dataframe
ran = pd.DataFrame(np.random.rand(3),columns=["Random"])
ran2 = superGroup(ran,"Hello")
superGroup(ran2,"World")#Does not work
>>>[Out]: NotImplementedError: isnull is not defined for MultiIndex
Here is a solution I figured out after spending way too much time on this problem. Hope that it helps those out there that have had the same problem.
def superGroup(dataframe=None,new_level=None):
"""Returns a dataframe entered but multiindexed with name new level.
Parameters
----------
dataframe : DataFrame
new_level : str
Returns
-------
out_df : DataFrame
"""
if type(dataframe.columns) == pd.indexes.base.Index:
out_df = pd.DataFrame(dataframe.values,index=dataframe.index,columns=pd.MultiIndex.from_product([[new_level],dataframe.columns]))
return out_df
if type(dataframe.columns) == pd.indexes.multi.MultiIndex:
levels = [list(i.values) for i in dataframe.columns.levels]
levels = [[new_level]]+levels
out_df = pd.DataFrame(dataframe.values, index = dataframe.index, columns = pd.MultiIndex.from_product(levels))
return out_df

Resources