Passing DF from function - python-3.x

i wrote a function which build a df inside it and i want to use it afterwards outside the function or in another function, how can i do it witout facing any recognition problem?
Thankw's a lot :)
The code:
def DisplayDataFrame():
file_path = filedialog.askopenfilename()
df1 = pd.read_excel(file_path)
cols = list(df1.columns)
tree = ttk.Treeview(root)
tree.pack()
tree["columns"] = cols
for i in cols:
tree.column(i, anchor="w")
tree.heading(i, text=i, anchor='w')
for index, row in df1.iterrows():
tree.insert("", 0, text=index, values=list(row))
option = df1.index()

Do you mean use df1 from your DisplayDataFrame() in other functions? If so, you can have return df1 in your function like this:
def DisplayDataFrame():
'''
your original codes to define df1
'''
return df1
dataframe = DisplayDataFrame()
Then you can reuse the dataframe in other functions.

Related

How to create a DataFrame from a list that each column is created by a regex expression

I have a list as such:
lst = ['2021_01_21__11_10_54_1__13928_snapshot.jpg',
'2021_01_21__12_27_44_1__13934_snapshot.jpg',
'2021_01_21__11_11_08_2__13928_snapshot.jpg',
'2021_01_21__12_27_56_2__13934_snapshot.jpg',
'2021_01_21__11_11_19_3__13928_snapshot.jpg',
'2021_01_21__12_28_08_3__13934_snapshot.jpg']
I want to create a DataFrame so that each column will be represented by:
def by_number(path):
base_name = os.path.basename(path)
return re.findall('[\_]{2}(\d{5})',lst)
And the rows will be represented by:
def by_index(path):
base_name = os.path.basename(path)
return re.findall('\_(\d)[\_]{2}',lst)
So eventually I'll get a DataFrame that looks something like this:
name_list = ['2021_01_21__11_10_54_1__13928_snapshot.jpg',
'2021_01_21__12_27_44_1__13934_snapshot.jpg',
'2021_01_21__11_11_08_2__13928_snapshot.jpg',
'2021_01_21__12_27_56_2__13934_snapshot.jpg',
'2021_01_21__11_11_19_3__13928_snapshot.jpg',
'2021_01_21__12_28_08_3__13934_snapshot.jpg']
import re
import pandas as pd
df = pd.DataFrame([[0]], columns=['count']) # initialize dataframe
for name in name_list:
count = re.search('\_(\d)[\_]{2}',name).group(1)
col = re.search('[\_]{2}(\d{5})',name).group(1)
if ((df['count'] == count)).any():
df.loc[df['count'] == count, col] = name
else:
new_row = pd.DataFrame([[count,name]], columns=['count',col])
df = df.append(new_row)
df.set_index('count', inplace=True)
print(df)

df.to_excel capture only last request of iteration pandas

I am currently trying to iterate through a large dataset and using to.excel() to write in my dataframe to Excel.
My code:
writer = pd.ExcelWriter(r'report.xlsx')
for x in range(3):
slq = ("select date_added, fruit_id from market")
data = pd.read_sql(sql, c)
df = pd.DataFrame(data)
df.to_excel(writer)
writer.save()
When this is run, I am only capturing the 3rd request in my range. Is there a different method that would allow me to capture all 3 requests in my range?
There does not appear to be a ExcelWriter.append method. Instead, make a list of the dataframes and pd.concat at the end.
writer = pd.ExcelWriter(r'report.xlsx')
dfs = []
for x in range(3):
sql = ("select date_added, fruit_id from market")
data = pd.read_sql(sql, c)
df = pd.DataFrame(data)
dfs.append(df)
df = pd.concat(dfs)
df.to_excel(writer)
writer.save()
Alternatively, pd.DataFrame.to_excel does have a startrow argument that could be used to append.
writer = pd.ExcelWriter(r'report.xlsx')
row = 0
for x in range(3):
sql = ("select date_added, fruit_id from market")
data = pd.read_sql(sql, c)
df = pd.DataFrame(data)
df.to_excel(writer, startrow=row)
row += len(df)
writer.save()

multiple nested functions output

I'm trying to get the result of multiple functions as nested functions from a dataframe
For example, 2 functions:
def carr(df):
df['carr'] = df[['end_value_carr','arr']].max(axis=1)
return df
def date(df):
df['date_id'] = pd.to_datetime(df['date_id']).dt.date
df['renewal_date'] = pd.to_datetime(df['renewal_date']).dt.date
df['next_renewal_date'] = pd.to_datetime(df['next_renewal_date']).dt.date
return df
When I use each one separately I get the right output
However, trying to have them nested in one function gives me a NoneType:
def cleanup(data):
df = data.copy()
def carr(df):
df['carr'] = df[['end_value_carr','arr']].max(axis=1)
return df
def date(df):
df['date_id'] = pd.to_datetime(df['date_id']).dt.date
df['renewal_date'] = pd.to_datetime(df['renewal_date']).dt.date
df['next_renewal_date'] = pd.to_datetime(df['next_renewal_date']).dt.date
return df
return df
Appreciate your help!
Thanks
Define all three functions separately
def carr(df):
df['carr'] = df[['end_value_carr','arr']].max(axis=1)
return df
def date(df):
df['date_id'] = pd.to_datetime(df['date_id']).dt.date
df['renewal_date'] = pd.to_datetime(df['renewal_date']).dt.date
df['next_renewal_date'] = pd.to_datetime(df['next_renewal_date']).dt.date
return df
Call the first two functions in your third one.
def cleanup(data):
df = data.copy()
df = carr(df)
df = date(df)
return df
Then you can call your cleanup function, which will call carr and date on its own.
df = cleanup(df)

Writing to a dataframe through a loop

I have a dataframe with two columns, one called 'name' that is a string, and one called 'route' that is a Google polyline. I'm using the polyline library to decode the polyline into lat/long. I want to loop over each row to decode but it only seems to decode only the first row and write it to the rest of the created column. This is what I have so far.
df = pd.DataFrame(activities)
for row in df.itertuples(index=False):
name = row[0]
route = row[1]
try:
decoded = polyline.decode(route.replace('\\\\','\\'), geojson=True)
df['decode'] = df.apply(lambda route: [decoded], axis=1)
except:
print(name)
Use DataFrame.apply with function:
df = pd.DataFrame(activities)
def decoder(name, route):
try:
return polyline.decode(route.replace('\\\\','\\'), geojson=True)
except:
print (name)
return np.nan
df['decode'] = df.apply(lambda x: decoder(x[0], x[1]), axis=1)

How to apply a function fastly on the list of DataFrame in Python?

I have a list of DataFrames with equal length of columns and rows but different values, such as
data = [df1, df2,df3.... dfn] .
How can I apply a function function on each dataframe in the list data? I used following code but it doe not work
data = [df1, def2,df3.... dfn]
def maxloc(data):
data['loc_max'] = np.zeros(len(data))
for i in range(1,len(data)-1): #from the second value on
if data['q_value'][i] >= data['q_value'][i-1] and data['q_value'][i] >= data['q_value'][i+1]:
data['loc_max'][i] = 1
return data
df_list = [df.pipe(maxloc) for df in data]
Seems to me the problem is in your maxloc() function as this code works.
I added also the maximum value in the return of maxloc.
from random import randrange
import pandas as pd
def maxloc(data_frame):
max_index = data_frame['Value'].idxmax(0)
maximum = data_frame['Value'][max_index]
return max_index, maximum
# create test list of data-frames
data = []
for i in range(5):
temp = []
for j in range(10):
temp.append(randrange(100))
df = pd.DataFrame({'Value': temp}, index=(range(10)))
data.append(df)
df_list = [df.pipe(maxloc) for df in data]
for i, (index, value) in enumerate(df_list):
print(f"Data-frame {i:02d}: maximum = {value} at position {index}")

Resources