Pandas Group By Weird Behaviour - python-3.x

def quant(X,col_):
print('X\n',X.head(5))
q25 = np.quantile(X[col_],0.15)
q75 = np.quantile(X[col_],0.85)
total = X[col_].tolist()
ltq = []
mtq = []
iqr = []
for i in total:
if i < q25:
ltq.append(i)
elif i > q75:
mtq.append(i)
else:
iqr.append(i)
p_l_q = 100*(len(ltq))/len(total)
p_l_m = 100*(len(mtq))/len(total)
percent_iqr = 100*len((iqr))/len(total)
X['p_l_q'] = p_l_q
X['p_l_m'] = p_l_m
X['p_l_i'] = percent_iqr
X['count'] = len(total)
X_short = X[['p_l_q','p_l_m','p_l_i','count']].copy(deep = True)
print(X_short[:1])
new = X_short[:1]
return new
X = pd.DataFrame()
X['G'] = ''
X['H'] = ''
X['M'] = ''
lst1 = ['a','a','a','a','a','b','b','b','b','b','b','c','c','c','c']
lst2 = [10,12,13,45,52,34,78,34,56,79,90,65,56,43,11]
lst3 = [1,2,3,4,5,1,2,3,4,5,6,1,2,3,4]
X['G'] = lst1
X['H'] = lst2
X['M'] = lst3
X_q = X.groupby('G').apply(quant,'H').reset_index()
I have used a print statement to give me head of dataframe block for each unique 'G' but I get weird print like in image.
There should be exactly three print outputs.(for each unqiue G) but it is showing 5 on top of that second print output (G='b') has H values as that of G ='a'.

Try replacing this:
def quant(X,col_):
print('X\n',X.head(5))
With this:
def quant(XX,col_):
X = XX.copy()
print('X\n',XX.head(5))
del XX # Delete 'XX' because 'X' copy is available
Output

Related

why I have output like this for make pair in siamese network?

This is my code, and I take the output that illustrates the below. Len of idx is 1000. I want to make image pairs and label pairs, but I take errors like this:
Code:
pair_images = []
pair_labels = []
new_labels = []
for k in labels:
new_labels.append(int(k))
numClasses = len(np.unique(new_labels))
new_labels = np.array(new_labels)
idx = [np.where(new_labels == i)[0] for i in range(0,numClasses)]
for idxA in range(len(images)):
# Make Posetive Images
currentImage = images[idxA]
label = new_labels[idxA]
idxB = np.random.choice(idx[label])
posImage = images[idxB]
output:
idxB = np.random.choice(idx[label])
IndexError: list index out of range

Print REGEX using USER DEFINED FUNCTION

I'm trying to print the variables ccb_3, nome, data, taxa and parcela using the function I defined as "ext_ccb", but when I run the code it returns 3 times (because I defined q as 3) the variable ccb_3.
I tried splitting it into 2 functions (one with the variable ccb_3 e one with the rest that uses REGEX) but it didn't worked to.
'''
from PyPDF2 import PdfFileReader, PdfFileWriter
import re
x = 1
q = 3
def ext_ccb():
nome_ccb = str("Vazio (" + y + ").pdf")
ccb = PdfFileReader(nome_ccb)
ccb_obj_1 = ccb.getPage(0)
ccb_text_1 = ccb_obj_1.extractText()
ccb_obj_2 = ccb.getPage(1)
ccb_text_2 = ccb_obj_2.extractText()
ccb_3 = ccb_text_1[1:8]
print(ccb_3)
pattern_nome = re.compile(r'''[^\n][^CPF][A-Z](|\.)\w*\s*.*$
Nome Completo
''', re.M)
matches_nome = pattern_nome.finditer(ccb_text_1)
for match in matches_nome:
nome = str(match)
nome = nome[40:].replace(r"\n\nNome Completo\n'>", "")
print(nome)
pattern_data = re.compile(r'''5\.2\. Modalidade
\d{2}/\d{2}/\d{4}
''')
matches_data = pattern_data.findall(ccb_text_1)
for match in matches_data:
data = match[17:27]
print(data)
pattern_taxa = re.compile(r'''Taxa de Juros a\.m\. \(%\)
\d*,\d*''')
matches_taxa = pattern_taxa.findall(ccb_text_2)
for match in matches_taxa:
taxa = match[24:]
print(taxa)
pattern_vparcela = re.compile(r'''Valor das Parcelas
R\$ \d*,\d*''')
matches_vparcela = pattern_vparcela.findall(ccb_text_2)
for match in matches_vparcela:
parcela = match[23:]
print(parcela)
while x <= q:
y = str(x)
x += 1
ext_ccb()
'''
What I really need is to insert it into an csv, multiple times from different PDF's, which I already have the code for:
'''
from csv import writer
x = 5
q = 0
while q < x:
q += 1
ccb_3 += 1
nome += 2
data += 4
taxa += 4
parcela += 5
list_data = [ccb_3, nome, data, taxa, parcela]
with open('csv_teste.csv', 'a', newline = '') as f_object:
writer_object = writer(f_object)
writer_object.writerow(list_data)
f_object.close()
'''
How can I save each data from each PDF and put it into the CSV?

python replace '?' with not successive or preceding character

I am trying to solve a riddle, the challenge is to replace the question mark in a string by not using the previous or the following character in that string
For example:-
riddle = 'abcd?ef?'
expected_out = 'abcdiefa'
riddle = '???'
expected_out = 'aea'
This is the solution that I have tried but for some reason it isn't working
successor_element = ''
predecessor_element = ''
my_pre_succ_elements = []
riddle = "ab?ac?"
required_list = []
def solution(riddle):
my_replacers = ['a','e','i']
j = len(riddle)
print(j)
for e in range(0,j):
req_element = riddle[e]
print(e)
print(req_element)
if req_element == '?':
if e == 0:
successor_element = riddle[e+1]
if e == j-1:
predecessor_element = riddle[e-1]
if (e!= 0) and (e != j-1):
successor_element = riddle[e+1]
predecessor_element = riddle [e-1]
my_pre_succ_elements.extend(successor_element)
my_pre_succ_elements.extend(predecessor_element)
required_list = list(set(my_pre_succ_elements)^set(my_replacers))
substitutor = required_list[0]
riddle = str(riddle[0:e]) + str(substitutor) + str(riddle[e + 1:])
print(riddle)
pass
You were pretty close. This should work:
if req_element == '?':
possibles = ['a','e','i']
if e > 0 and riddle[e-1] in possibles:
possibles.remove(riddle[e-1])
if e < j-1 and riddle[e+1] in possibles:
possibles.remove(riddle[e+1])
substitutor = possibles[0]
riddle = riddle[:e] + substitutor + riddle[e+1:]

Scipy optimize.minimize with multi- parameters

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import linalg, optimize
%matplotlib inline
Data load
data = pd.read_csv("D:/Stat/TimeSeries/KRW_month_0617_1.csv",index_col="Date") / 100
para = open("D:/Stat/TimeSeries/KRW_month_0617_1.txt").readlines()[0:2]
data.index = pd.to_datetime(data.index)
Parameters
cond = []
params = []
time = []
for i in para:
j = i.split()
for k in j:
cond.append(k)
cond = cond[1:]
for i in range(len(cond)):
cond[i] = round(float(cond[i]),4)
params = cond[0:23]
time = cond[23:]
maturity = np.array(time[1:])
timegap = 1/cond[23]
Functions We need
def Paramcheck(Params, checkStationary = 1):
result = 0
Kappa = np.array([[params[20],0,0], [0,params[21],0], [0,0,params[22]]])
Sigma = np.array([[params[1],0,0], [params[2],params[3],0], [params[4],params[5],params[6]]])
State = np.array([params[7], params[8], params[9]])
Lambda = params[0]
SigmaEps = np.identity(10)
for i in range(10):
SigmaEps[i][i] = params[i+10]
for i in range(len(Sigma)):
if Sigma[i][i] < 0:
result = 1
for j in SigmaEps:
if np.any(SigmaEps) < 0:
result = 1
if Lambda < 0.05 or Lambda > 2:
result = 1
elif State[0] < 0:
result = 1
elif Kappa[0][0] < 0:
result = 1
if result == 0 and checkStationary > 0:
if max(np.linalg.eigvals(-Kappa).real) > 0:
result = 2
return result
def CheckDet(x):
if x == np.inf or x == np.nan:
result = 1
elif x < 0:
result = 2
elif abs(x) < 10**-250:
result = 3
else:
result = 0
return result
def NS_factor(lambda_val, maturity):
col1 = np.ones(len(maturity))
col2 = (1 - np.exp(-lambda_val*maturity))/(lambda_val*maturity)
col3 = col2 - np.exp(-lambda_val*maturity)
factor = np.array([col1,col2,col3]).transpose()
return factor
def DNS_Kalman_filter(Params, *args):
N = Paramcheck(Params)
if N == 0:
Kappa = np.array([[params[20],0,0], [0,params[21],0], [0,0,params[22]]])
Sigma = np.array([[params[1],0,0], [params[2],params[3],0],
[params[4],params[5],params[6]]])
State = np.array([params[7], params[8], params[9]])
Lambda = params[0]
SigmaEps = np.identity(10)
for i in range(10):
SigmaEps[i][i] = params[i+10]
Obs_Yield = args[0]
Obs_Date = args[1]
Timegap = args[2]
Obs_Mty = args[3]
Finalstate = args[4]
Mty_length = len(Obs_Mty)
B = NS_factor(lambda_val = Lambda,maturity = Obs_Mty)
H_large = SigmaEps **2
N_obs = len(Obs_Date)
LLH_vec = np.zeros(N_obs)
phi1 = linalg.expm(-Kappa*Timegap)
phi0 = (np.identity(3)-phi1) # State
Eigenvalues = np.linalg.eig(Kappa)[0]
Eigen_vec = np.linalg.eig(Kappa)[1]
Eigen_vec_inv = np.linalg.inv(Eigen_vec)
S = Eigen_vec_inv # Sigma # Sigma.transpose() # Eigen_vec_inv.transpose()
Atilde = np.dot(Sigma[0], Sigma[0])
Btilde = np.dot(Sigma[1], Sigma[1])
Ctilde = np.dot(Sigma[2], Sigma[2])
Dtilde = np.dot(Sigma[0], Sigma[1])
Etilde = np.dot(Sigma[0], Sigma[2])
Ftilde = np.dot(Sigma[1], Sigma[2])
res1= Atilde* Obs_Mty* Obs_Mty/6
res2= Btilde*(1/(2*Lambda**2) - (1-np.exp(-Lambda*Obs_Mty))/(Lambda**3*Obs_Mty) + (1-
np.exp(-2*Lambda*Obs_Mty))/(4*Lambda**3*Obs_Mty))
res3= Ctilde*(1/(2*Lambda**2) + np.exp(-Lambda*Obs_Mty)/(Lambda**2)-
Obs_Mty*np.exp(-2*Lambda*Obs_Mty)/(4*Lambda) -
3*np.exp(-2*Lambda*Obs_Mty)/(4*Lambda**2) - 2*(1-np.exp(-
Lambda*Obs_Mty))/(Lambda**3*Obs_Mty) + 5*(1-
np.exp(-2*Lambda*Obs_Mty))/(8*Lambda**3*Obs_Mty))
res4= Dtilde*(Obs_Mty/(2*Lambda) + np.exp(-Lambda*Obs_Mty)/(Lambda**2) - (1-np.exp(-
Lambda*Obs_Mty))/(Lambda**3*Obs_Mty))
res5= Etilde*(3*np.exp(-Lambda*Obs_Mty)/(Lambda**2) + Obs_Mty/(2*Lambda)+Obs_Mty*np.exp(-
Lambda*Obs_Mty)/(Lambda) - 3*(1-np.exp(-Lambda*Obs_Mty))/(Lambda**3*Obs_Mty))
res6= Ftilde*(1/(Lambda**2) + np.exp(-Lambda*Obs_Mty)/(Lambda**2) -
np.exp(-2*Lambda*Obs_Mty)/(2*Lambda**2) - 3*(1-np.exp(-
Lambda*Obs_Mty))/(Lambda**3*Obs_Mty) + 3*(1-
np.exp(-2*Lambda*Obs_Mty))/(4*Lambda**3*Obs_Mty))
val = res1 + res2 + res3 + res4 + res5 + res6
V_mat = np.zeros([3,3])
V_lim = np.zeros([3,3])
for i in range(3):
for j in range(3):
V_mat[i][j] = S[i][j]*(1-np.exp(-(Eigenvalues[i] +
Eigenvalues[j])*Timegap))/(Eigenvalues[i] + Eigenvalues[j])
V_lim[i][j] = S[i][j]/(Eigenvalues[i] + Eigenvalues[j])
Q = (Eigen_vec # V_mat # Eigen_vec.transpose()).real
Sigma_lim = (Eigen_vec # V_lim # Eigen_vec.transpose()).real
for i in range(N_obs):
y = Obs_Yield[i]
xhat = phi0 + phi1 # State
y_implied = B # xhat
v = y - y_implied + val
Sigmahat = phi1 # Sigma_lim # phi1.transpose() + Q
F = B # Sigmahat # B.transpose() + H_large
detF = np.linalg.det(F)
if CheckDet(detF) > 0:
N = 3
break
Finv = np.linalg.inv(F)
State = xhat + Sigmahat # B.transpose() # Finv # v
Sigma_lim = Sigmahat - Sigmahat # B.transpose() # Finv # B # Sigmahat
LLH_vec[i] = np.log(detF) + v.transpose() # Finv # v
if N == 0:
if Finalstate:
yDate = Obs_Date[-1]
result = np.array([yDate,State])
else:
result = 0.5 * (sum(LLH_vec) + Mty_length*N_obs*np.log(2*np.pi))
else:
result = 7000000
return result
I made a code that does Arbitrage Free Nelson-Siegel model. Data is return rates of bond (1Y,1.5Y, ... ,20Y). I wanna optimize that function with scipy optimize.minimize function with fixed *args.
Suppose that Initial parmas are verified that it's close to optimized params from empirical experiments using Dynamic Nelson-Siegel Model.
LLC_new = 0
while True:
LLC_old = LLC_new
OPT = optimize.minimize(x0=params,fun=DNS_Kalman_filter, args=
(data.values,data.index,timegap,maturity,0))
params = OPT.x
LLC_new = round(OPT.fun,5)
print("Current LLC: %0.5f" %LLC_new)
if LLC_old == LLC_new:
OPT_para = params
FinalState = DNS_Kalman_filter(params,data.values,data.index,timegap,maturity,True)
break
Result is
Current LLC: -7613.70146
Current LLC: -7613.70146
LLC(log-likelihood value) isn't maximized. It's not a result I desire using Optimizer.
Is there any solution for that?
In R, there is optim() function works as similar as scipy.optimize.minimize() which works really well. I also have a R code for that very similar to this Python code.

Can't convert a string into a list of integers

I am trying to make a program in python that identifies whether a square is a magic square or not and i am having trouble getting the user input into a list. I understand that my code could be more efficient but I am very new to python.
column_1 = (0,3)
column_2 = (0,3)
column_3 = (0,3)
column_4 = (0,3)
row_1 = [int(i) for i in input('input row 1 with spaces inbetween numbers: ').split(' ')]
row_2 = [int(i) for i in input('input row 2 with spaces inbetween numbers: ').split(' ')]
row_3 = [int(i) for i in input('input row 3 with spaces inbetween numbers: ').split(' ')]
row_4 = [int(i) for i in input('input row 4 with spaces inbetween numbers: ').split(' ')]
column_1[0].append(row_1[0])
column_1[1].append(row_2[0])
column_1[2].append(row_3[0])
column_1[3].append(row_4[0])
column_2[0] = row_1[1]
column_2[1] = row_2[1]
column_2[2] = row_3[1]
column_2[3] = row_4[1]
column_3[0] = row_1[2]
column_3[1] = row_2[2]
column_3[2] = row_3[2]
column_3[3] = row_4[2]
column_4[0] = row_1[3]
column_4[1] = row_2[3]
column_4[2] = row_3[3]
column_4[3] = row_4[3]
diagonal_left_to_right[0] = column_1[0]
diagonal_left_to_right[1] = column_2[1]
diagonal_left_to_right[2] = column_3[2]
diagonal_left_to_right[3] = column_4[3]
diagonal_right_to_left[0] = column_4[0]
diagonal_right_to_left[1] = column_3[1]
diagonal_right_to_left[2] = column_2[2]
diagonal_right_to_left[3] = column_1[3]
sum_row_1 = sum(row_1)
sum_row_2 = sum(row_2)
sum_row_3 = sum(row_3)
sum_row_4 = sum(row_4)
sum_col_1 = sum(column_1)
sum_col_2 = sum(column_2)
sum_col_3 = sum(column_3)
sum_col_4 = sum(column_4)
sum_dag_l2r = sum(diagonal_left_to_right)
sum_dag_r2l = sum(diagonal_right_to_left)
if sum_row_1 == sum_row_2 == sum_row_3 == sum_row_4 == sum_col_1 == sum_col_2 == sum_col_3 == sum_col_4 == sum_dag_r2l == sum_dag_l2r:
print('magic')
else:
print('not magic')
I keep getting error messages that 'int' object has no attribute 'append'
I have tried a lot of different methods that I found on this website and none of them have worked for various reasons.
I am open to all suggestions, anything will help me.
Thanks
You first define column_1 as tuple (with 2 integer values, one at index 0 and one at index 1). The append method cannot work on column_1[0], which is like doing 0.append(). You probably did not intend to create a tuple, but a list with certain dimensions.
You can assign the values to columns and diagonals with this list notation:
column_1 = [row_1[0], row_2[0], row_3[0], row_4[0]]
column_2 = [row_1[1], row_2[1], row_3[1], row_4[1]]
column_3 = [row_1[2], row_2[2], row_3[2], row_4[2]]
column_4 = [row_1[3], row_2[3], row_3[3], row_4[3]]
diagonal_left_to_right = [column_1[0],column_2[1],column_3[2],column_4[3]]
diagonal_right_to_left = [column_4[0],column_3[1],column_2[2], column_1[3]]

Resources