Iterate rows in df and insert new row with result of column operations of the matching rows - python-3.x

I wanted to create a new row with difference in values of a particular column of two other rows with matching columns and another variable column
Project Cost element Version Value Cur
A A1 000 -10000 USD
A A1 ASO -8000 USD
A A1 CHO -2000 USD
A A1 CHr -2000 USD create third & fourth rows
Similarly for costements starting with 5 say B1. the delta row should have version ADJ.
After browsing through old posts got some looping logic.. but feel it is too cumbersome. Any smart ideas welcome.
for i in range(0,len(Project_Forecast_Source)):
if Project_Forecast_Source['CostElem'].loc[i] =='4020101000' :
if (Project_Forecast_Source['ProjDef'].loc[i] == Project_Forecast_Source['ProjDef'].loc[i+1] and
Project_Forecast_Source['CostElem'].loc[i] == Project_Forecast_Source['CostElem'].loc[i+1] and
Project_Forecast_Source['Version'].loc[i] == "000") :
Val = float(Project_Forecast_Source['Amount'].loc[i] - Project_Forecast_Source['Amount'].loc[i+1])
row_number = Project_Forecast_Source.index.max()+1
row_Value = [Project_Forecast_Source['ProjDef'].loc[i], Project_Forecast_Source['CostElem'].loc[i+1],"CHO", Project_Forecast_Source['ObCur'].loc[i], Val]
Project_Forecast_Source.loc[row_number] = row_Value
row_number1 = Project_Forecast_Source.index.max()+1
row_Value1 = [Project_Forecast_Source['ProjDef'].loc[i], Project_Forecast_Source['CostElem'].loc[i+1],"CHR", Project_Forecast_Source['ObCur'].loc[i], Val]
Project_Forecast_Source.loc[row_number1] = row_Value1
else :
if (Project_Forecast_Source['ProjDef'].loc[i] == Project_Forecast_Source['ProjDef'].loc[i+1] and
Project_Forecast_Source['CostElem'].loc[i] == Project_Forecast_Source['CostElem'].loc[i+1] and
Project_Forecast_Source['Version'].loc[i] == "000") :
Val = float(Project_Forecast_Source['Amount'].loc[i] - Project_Forecast_Source['Amount'].loc[i+1])
row_number = Project_Forecast_Source.index.max()+1
row_Value = [Project_Forecast_Source['ProjDef'].loc[i], Project_Forecast_Source['CostElem'].loc[i],"ADJ", Project_Forecast_Source['ObCur'].loc[i], Val]
Project_Forecast_Source.loc[row_number] = row_Value
elif ((Project_Forecast_Source['Version'].loc[i] == "ASO") and
# (Project_Forecast_Source['ProjDef'].loc[i] != Project_Forecast_Source['ProjDef'].loc[i+1]) and
# (Project_Forecast_Source['ProjDef'].loc[i] != Project_Forecast_Source['ProjDef'].loc[i-1]) and
(Project_Forecast_Source['CostElem'].loc[i] != Project_Forecast_Source['CostElem'].loc[i+1]) and
(Project_Forecast_Source['CostElem'].loc[i] != Project_Forecast_Source['CostElem'].loc[i-1])) :
Val = 0.0 - float(Project_Forecast_Source['Amount'].loc[i])
row_number = Project_Forecast_Source.index.max()+1
row_Value = [Project_Forecast_Source['ProjDef'].loc[i], Project_Forecast_Source['CostElem'].loc[i],"ADJ", Project_Forecast_Source['ObCur'].loc[i], Val]
Project_Forecast_Source.loc[row_number] = row_Value
elif Project_Forecast_Source['Version'].loc[i] == "000" :
Val = float(Project_Forecast_Source['Amount'].loc[i])
row_number = Project_Forecast_Source.index.max()+1
row_Value = [Project_Forecast_Source['ProjDef'].loc[i], Project_Forecast_Source['CostElem'].loc[i],"ADJ", Project_Forecast_Source['ObCur'].loc[i], Val]
Project_Forecast_Source.loc[row_number] = row_Value

Related

ValueError: Length of values (1) does not match length of index (50)

Hey there awesome peeps,
I am trying to retrieve some trend information based on some keywords that I have in a list (1000 keywords). In order to minimize the chance of getting blocked by Google I have a cutoff period of 50 and a 10 second pause. At the moment I get an error saying that my Length of value does not match the length of the index. This fails on the
df3['Trend'] = trends
If anyone can help I will really appreciate it.
Thanks!
!pip install pytrends
import pandas as pd
import json
import time
from pytrends.request import TrendReq
get_gsc_file = "/content/Queries.csv"
sortby = "Clicks"
cutoff = 50
pause = 10
timeframe = "today 3-m"
geo = "US"
df = pd.read_csv(get_gsc_file, encoding='utf-8')
df.sort_values(by=[sortby], ascending=False, inplace=True)
df = df[:cutoff]
d = {'Keyword': [], sortby:[], 'Trend': []}
df3 = pd.DataFrame(data=d)
keywords = []
trends = []
metric = df[sortby].tolist()
up = 0
down = 0
flat = 0
na = 0
for index, row in df.iterrows():
keyword = row['Top queries']
pytrends = TrendReq(hl='en-US', tz=360, retries=2, backoff_factor=0.1)
kw_list = [keyword]
pytrends.build_payload(kw_list, cat=0, timeframe=timeframe, geo=geo, gprop='')
df2 = pytrends.interest_over_time()
keywords.append(keyword)
try:
trend1 = int((df2[keyword][-5] + df2[keyword][-4] + df2[keyword][-3])/3)
trend2 = int((df2[keyword][-4] + df2[keyword][-3] + df2[keyword][-2])/3)
trend3 = int((df2[keyword][-3] + df2[keyword][-2] + df2[keyword][-1])/3)
if trend3 > trend2 and trend2 > trend1:
trends.append('UP')
up+=1
elif trend3 < trend2 and trend2 < trend1:
trends.append('DOWN')
down+=1
else:
trends.append('FLAT')
flat+=1
except:
trends.append('N/A')
na+=1
time.sleep(pause)
df3['Keyword'] = keywords
df3['Trend'] = trends
df3[sortby] = metric
def colortable(val):
if val == 'DOWN':
color="lightcoral"
elif val == 'UP':
color = "lightgreen"
elif val == 'FLAT':
color = "lightblue"
else:
color = 'white'
return 'background-color: %s' % color
df3 = df3.style.applymap(colortable)
total = len(trends)
print("Up: " + str(up) + " | " + str(round((up/total)*100,0)) + "%")
print("Down: " + str(down) + " | " + str(round((down/total)*100,0)) + "%")
print("Flat: " + str(flat) + " | " + str(round((flat/total)*100,0)) + "%")
print("N/A: " + str(na) + " | " + str(round((na/total)*100,0)) + "%")
df3

How to use for loop to insert row into QSqltablemodel from a list?

Below is the Database model:
db = QSqlDatabase.addDatabase('QSQLITE')
db.setDatabaseName('book.db')
db.open()
self.model = QtSql.QSqlTableModel(self)
self.model.setTable("card")
self.model.select()
For example:
a = "name"
b = 30
c = "M"
data_row = [a, b, c]
r = self.model.record()
r.setValue("name", "name")
r.setValue("age", 30)
r.setValue("gender", "M")
self.model.insertRecord(-1, r)
self.model.select()
The database have 3 columns that are 'name', 'age','gender'.
My question is how to insert row in to Sqlite table using QSqlTableModel using with for-loop?
I tried below code That's worked for me :
Used "for-loop" to count-columns and zip() Function for Parallel Iteration.
a = "name"
b = 30
c = "M"
data_row = [a, b, c]
columns = []
for col in range(self.model.columnCount()):
columns.append(col)
r = self.model.record()
for col, rec in zip(columns, data_row):
r.setValue(col, rec)
self.model.insertRecord(-1, r)
self.model.select()

How can i apply something to each one of my columns?

I'm starting to study about Data Science and trying to preprocess a DataSet. I have 11 columns (col1 to col11) and to each one of that i wanna apply this:
exemple with second column witch is col2
col2Media = df['col2'][df['col2'] != '-'].astype(str).astype(int).values.mean()
df['col2'] = df['col2'].apply(lambda x: col2Media.astype(int) if x == '-' else x)
It's like a for in C with i as column index:
for (i = 1; i < 12; i++) {
MediaCol(i) = df['col(i)'][df['col(i)'] != '-'].astype(str).astype(int).values.mean()
df['col(i)'] = df['col(i)'].apply(lambda x: MediaCol(i).astype(int) if x == '-' else x)
}
One way is to program the same loop in python. I suppose colMedia serves only as a temporary Series.
for i in range(1,12):
colMedia = df['col'+str(i)][df['col'+str(i)] != '-'].astype(str).astype(int).values.mean()
df['col'+str(i)] = df['col'+str(i)].apply(lambda x: colMedia.astype(int) if x == '-' else x)
Or if col1 to col11 are the only columns of your dataframe:
for col in df:
colMedia = df[col][df[col] != '-'].astype(str).astype(int).values.mean()
df[col] = df[col].apply(lambda x: colMedia.astype(int) if x == '-' else x)

Bokeh charts unresponsive on rangeslider on_change

I am working on bokeh charts for the first time. I have followed a few tutorials but due to some reason, update function is not working on rangeslider on_change()
def make_data(df, start, end):
#df['ID'] = range(1, len(df) + 1)
s = df['ID'] >= start
e = df['ID'] <= end
df1 = df[e & s]
date = df1['date'].tolist()
capi = df1['capi'].tolist()
data = {'x': dateTime(date), 'y': capi}
source = ColumnDataSource(data)
return source
def update(attr, old, new):
df = pd.DataFrame.from_csv("main_data.csv", index_col = None)
df['ID'] = range(1, len(df) + 1)
new_src = make_dataset(df, range_start = range_select.value[0], range_end = range_select.value[1])
source.data.update(new_src.data)
def make_plot(source):
p1 = figure(x_axis_type="datetime", title="Stock Closing Prices")
p1.grid.grid_line_alpha=0.3
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Price'
p1.line('x', 'y', source = source, color='#A6CEE3', legend='capi')
return p1
range_select = RangeSlider(title="Date range", value=(ids[0], ids[100]), start=ids[0], end=ids[-1], step=1)
range_select.on_change('value', update)
source = make_data(df, 1, 1000)
p = make_plot(source)
controls = WidgetBox(range_select)
layout = column(controls, p)
tab = Panel(child=layout, title = 'Histogram')
tabs = Tabs(tabs = [tab])
show(tabs)
can someone please point me in the right direction here

Can't convert a string into a list of integers

I am trying to make a program in python that identifies whether a square is a magic square or not and i am having trouble getting the user input into a list. I understand that my code could be more efficient but I am very new to python.
column_1 = (0,3)
column_2 = (0,3)
column_3 = (0,3)
column_4 = (0,3)
row_1 = [int(i) for i in input('input row 1 with spaces inbetween numbers: ').split(' ')]
row_2 = [int(i) for i in input('input row 2 with spaces inbetween numbers: ').split(' ')]
row_3 = [int(i) for i in input('input row 3 with spaces inbetween numbers: ').split(' ')]
row_4 = [int(i) for i in input('input row 4 with spaces inbetween numbers: ').split(' ')]
column_1[0].append(row_1[0])
column_1[1].append(row_2[0])
column_1[2].append(row_3[0])
column_1[3].append(row_4[0])
column_2[0] = row_1[1]
column_2[1] = row_2[1]
column_2[2] = row_3[1]
column_2[3] = row_4[1]
column_3[0] = row_1[2]
column_3[1] = row_2[2]
column_3[2] = row_3[2]
column_3[3] = row_4[2]
column_4[0] = row_1[3]
column_4[1] = row_2[3]
column_4[2] = row_3[3]
column_4[3] = row_4[3]
diagonal_left_to_right[0] = column_1[0]
diagonal_left_to_right[1] = column_2[1]
diagonal_left_to_right[2] = column_3[2]
diagonal_left_to_right[3] = column_4[3]
diagonal_right_to_left[0] = column_4[0]
diagonal_right_to_left[1] = column_3[1]
diagonal_right_to_left[2] = column_2[2]
diagonal_right_to_left[3] = column_1[3]
sum_row_1 = sum(row_1)
sum_row_2 = sum(row_2)
sum_row_3 = sum(row_3)
sum_row_4 = sum(row_4)
sum_col_1 = sum(column_1)
sum_col_2 = sum(column_2)
sum_col_3 = sum(column_3)
sum_col_4 = sum(column_4)
sum_dag_l2r = sum(diagonal_left_to_right)
sum_dag_r2l = sum(diagonal_right_to_left)
if sum_row_1 == sum_row_2 == sum_row_3 == sum_row_4 == sum_col_1 == sum_col_2 == sum_col_3 == sum_col_4 == sum_dag_r2l == sum_dag_l2r:
print('magic')
else:
print('not magic')
I keep getting error messages that 'int' object has no attribute 'append'
I have tried a lot of different methods that I found on this website and none of them have worked for various reasons.
I am open to all suggestions, anything will help me.
Thanks
You first define column_1 as tuple (with 2 integer values, one at index 0 and one at index 1). The append method cannot work on column_1[0], which is like doing 0.append(). You probably did not intend to create a tuple, but a list with certain dimensions.
You can assign the values to columns and diagonals with this list notation:
column_1 = [row_1[0], row_2[0], row_3[0], row_4[0]]
column_2 = [row_1[1], row_2[1], row_3[1], row_4[1]]
column_3 = [row_1[2], row_2[2], row_3[2], row_4[2]]
column_4 = [row_1[3], row_2[3], row_3[3], row_4[3]]
diagonal_left_to_right = [column_1[0],column_2[1],column_3[2],column_4[3]]
diagonal_right_to_left = [column_4[0],column_3[1],column_2[2], column_1[3]]

Resources