why is my ROC curve getting plotted in reverse - python-3.x

I have one csv file and tried to plot an ROC curve with out using any predefined libraries for the curve plotting. I have used numpy and pandas for my code. Can any one please tell me where am I going wrong?ROC Curve
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('5_b.csv')
#adding a new column to the data
df['Y_pred'] = np.where(df['proba']<=0.5, 0, 1)
#printing the top 5 values in data
#df.head(5)
#sorting dataframe
df=df.sort_values(by =['proba'])
df.head(5)
#confusion matrix
TP_Main = len(df[(df['y'] == 1) & (df['Y_pred'] == 1)])
FP_Main = len(df[(df['y'] == 0) & (df['Y_pred'] == 1)])
FN_Main = len(df[(df['y'] == 1) & (df['Y_pred'] == 0)])
TN_Main = len(df[(df['y'] == 0) & (df['Y_pred'] == 0)])
print("TN_Main : {0},FN_Main : {1}".format(TN_Main,FN_Main))
print("FP_Main : {0},TP_Main : {1}".format(FP_Main,TP_Main))
#F1score
precision = TP_Main/(TP_Main+FP_Main)
recall = TP_Main/(TP_Main+FN_Main)
F1score = ((precision*recall)/(precision+recall))*2
print("precision : {0},recall : {1}".format(precision,recall))
print("F1score : ",F1score)
#df.sort_values(by =['proba'], inplace = True, ascending = False)
tprList = []
fprList = []
for i in range(len(df)):
df['Y_pred'] =np.where(df['proba']<=df.iloc[i][1],0,1)
TP = len(df[(df['y'] == 1) & (df['Y_pred'] == 1)])
FP = len(df[(df['y'] == 0) & (df['Y_pred'] == 1)])
FN = len(df[(df['y'] == 1) & (df['Y_pred'] == 0)])
TN = len(df[(df['y'] == 0) & (df['Y_pred'] == 0)])
TPR = TP/(FN+TP)
FPR = TN/(FP+TN)
tprList.append(TPR)
fprList.append(FPR)
tpr_array = np.array(tprList)
fpr_array = np.array(fprList)
#Accuracy score
AccScore = (TN_Main+TP_Main)/len(df)
print("Accuracy Score =", AccScore)
AUCScore = np.trapz(tpr_array,fpr_array)
print("AUC Score :",AUCScore)
plt.plot(tpr_array,fpr_array)

Related

keras BatchGenerator(keras.utils.Sequence) is too slow

I'm using a custom batch generator with large dataframe. but the Generator takes too much time to generate a batch, it takes 127s to generate a batch of 1024. I've tried Dask but still, the processing is slow. is there any way to integrate multiprocessing with inside the generator. knowing that I've tried use_multiprocessing=True with workers=12
import keras
from random import randint
import glob
import warnings
import numpy as np
import math
import pandas as pd
import dask.dataframe as dd
class BatchGenerator(keras.utils.Sequence):
'Generates data for Keras'
def __init__(self, labels=None, batch_size=8, n_classes=4, shuffle=True,
seq_len=6, data_path=None, meta_path=None,list_IDs=None):
'Initialization'
self.batch_size = batch_size
self.labels = labels
self.n_classes = n_classes
self.shuffle = shuffle
self.seq_len = seq_len
self.meta_df = meta_path
self.data_df = data_path
self.data_df = self.data_df.astype({"mjd": int})
self.list_IDs = list_IDs
if self.list_IDs==None:
self.list_IDs = list(self.meta_df['object_id'].unique())
self.on_epoch_end()
def __len__(self):
'Denotes the number of batches per epoch'
return int(np.floor(len(self.list_IDs) / self.batch_size))
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
# Find list of IDs
list_IDs_temp = [self.list_IDs[k] for k in indexes]
# Generate data
X, y = self.__data_generation(list_IDs_temp)
return X, y
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.list_IDs))
if self.shuffle == True:
np.random.shuffle(self.indexes)
def __data_generation(self, list_IDs_temp):
X_dat = np.zeros((self.batch_size, self.seq_len,6,1))
Y_mask = np.zeros((self.batch_size, self.seq_len,6,1))
# Y_dat = np.empty((self.batch_size,1), dtype=int)
X_length= np.empty((self.batch_size,1), dtype=int)
for i, trans_id in enumerate(list_IDs_temp):
curve = self.data_df[self.data_df.object_id==trans_id]
mjdlist = list(curve['mjd'].unique())
ts_length = len(mjdlist)
if ts_length <= self.seq_len :
start_ind = 0
else :
start_ind = randint(0, ts_length - self.seq_len)
ts_length = self.seq_len
for j in range(ts_length):
if j+start_ind < len(mjdlist):
step = curve[curve.mjd==mjdlist[j+start_ind]]
for k in range(len(step.mjd)):
obs = step[step.passband==k]
if len(obs) == 0 :
# print('here is one')
continue
else:
if k == 0:
X_dat[i,j,0,0] =obs.flux.iloc[0]
Y_mask[i,j,0,0] = 1
if k == 1:
X_dat[i,j,1,0] = obs.flux.iloc[0]
Y_mask[i,j,1,0] = 1
if k == 2:
X_dat[i,j,2,0] = obs.flux.iloc[0]
Y_mask[i,j,2,0] = 1
if k == 3:
X_dat[i,j,3,0] = obs.flux.iloc[0]
Y_mask[i,j,3,0] = 1
if k == 4:
X_dat[i,j,4,0] = obs.flux.iloc[0]
Y_mask[i,j,4,0] = 1
if k == 5:
X_dat[i,j,5,0] = obs.flux.iloc[0]
Y_mask[i,j,5,0] = 1
# meta = self.meta_df[self.meta_df['object_id'] == trans_id]
# Y_dat[i] = self.labels[int(meta['target'])]
X_length[i,0] = ts_length
flux_max = np.max(X_dat[i])
flux_min = np.min(X_dat[i])
flux_pow = math.log2(flux_max - flux_min)
X_dat[i] /= flux_pow
X_noised = X_dat + np.random.uniform(low=0, high=0.5, size=X_dat.shape)
return [X_noised, X_length, np.reshape(Y_mask,(self.batch_size, self.seq_len*6))], np.reshape(X_dat,(self.batch_size, self.seq_len*6))
To make it faster, the for loop in the function __data_generation should be parallelized. Using the joblib package may help.

How to alternate color of the graphs between blue and white?

I have a list D containing 50 sub-lists. The number of elements in these sub-lists are decreasing. I visualize the list D by
for i, array in enumerate(D):
plt.scatter([i]*len(array), array)
I have 50 functions taking values from St_Sp, and Y is a list containing 50 elements, each of them is the output of each function. I visualize these functions
fig, ax = plt.subplots()
for i in range(len(Y)):
ax.plot(St_Sp, Y[i])
I found that too many colors are not easy to eyes. I would like to ask how to alternate color of the graphs between blue and white? I mean the color of the functions and dots in D are white > blue > white > blue ...
Could you please elaborate on how to do so?
##### Import packages
import numpy as np
import scipy.linalg as la
import time
import matplotlib
import matplotlib.pyplot as plt
##### Initial conditions
N = 100
lamda = 7
mu = 2
a = np.exp(-0.05)
r = - np.log(a).copy()
St_Sp = np.arange(- N, N + 1)
Card = St_Sp.shape[0]
##### Define infintesimal generator
def LL(x, y):
if x == N or x == - N: re = 0
elif x - y == - 1: re = lamda
elif x - y == 1: re = mu
elif x - y == 0: re = - (mu + lamda)
else: re = 0
return re
def L(x):
return - LL(x, x)
##### Define function Phi
def Phi(x):
return max(x, 0)
Phi = np.vectorize(Phi)
##### Define vector b
b = np.array(Phi(St_Sp))
##### Define function Psi
def Psi(x):
return L(x) / (L(x) + r)
Psi = np.vectorize(Psi)
##### Generate a Boolean vector whose all elements are False
d = np.array([0] * Card).astype(bool)
##### Define matrix A
A = np.zeros((Card, Card))
for i in range(Card):
for j in range(Card):
if (i != j) & (L(St_Sp[i]) != 0):
A[i, j] = LL(St_Sp[i], St_Sp[j]) / L(St_Sp[i])
elif (i != j) & (L(St_Sp[i]) == 0):
A[i, j] = 0
elif (i == j) & (Psi(St_Sp[i]) != 0):
A[i, j] = - 1 / Psi(St_Sp[i])
else: A[i, j] = 1
##### Row names of A
rows = np.arange(0, Card)
##### Define matrix B
B = np.zeros((Card, Card))
for i in range(Card):
for j in range(Card):
if i != j:
B[i, j] = LL(St_Sp[i], St_Sp[j])
else: B[i, j] = LL(St_Sp[i], St_Sp[j]) - r
start = time.time()
##### Generate I_0
I = [np.array([1] * Card).astype(bool), d.copy()]
Z = np.array(b.copy())
Z = Z.astype(float)
D = [St_Sp]
index0 = np.matmul(B, Z) <= 0
index1 = ~ index0
Y = [b.copy()]
##### Iterations
for i in range(1, Card):
I = [I[0] & index0, I[1] | index1]
Z = np.array(b.copy())
Z = Z.astype(float)
A1 = A[np.ix_(rows[I[1]], rows[I[1]])]
A2 = A[np.ix_(rows[I[1]], rows[I[0]])]
Z[I[1]] = la.solve(A1, - np.matmul(A2, Z[I[0]]))
Y = np.concatenate((Y, [Z]))
D.append(St_Sp[I[0]])
index = np.matmul(B[I[0]], Z) <= 0
index0, index1 = d.copy(), d.copy()
index0[I[0]], index1[I[0]] = index, ~ index
if (I[0] == index0).all() == True: break
for i, array in enumerate(D):
plt.scatter([i]*len(array), array)
fig, ax = plt.subplots()
for i in range(len(Y)):
ax.plot(St_Sp, Y[i])
The easiest approach is to set a custom color cycler. Instead of cycling between the 10 typical colors, the default colors for the plots will cycle through the given colors.
from cycler import cycler
custom_cycler = cycler(color=['white', 'blue'])
plt.gca().set_prop_cycle(custom_cycler)
for i, array in enumerate(D[:-1]):
plt.scatter([i] * len(array), array)
plt.scatter([len(D) - 1] * len(D[-1]), D[-1], color='crimson')
fig, ax = plt.subplots()
ax.set_prop_cycle(custom_cycler)
for i in range(len(Y) - 1):
ax.plot(St_Sp, Y[i])
ax.plot(St_Sp, Y[len(Y) - 1], color='crimson')
plt.show()

Which convolution algorithm Keras uses?

I coded a general convolution function in Python for CNNs.
As it turned out the time taken for this function was almost 5x more than the Keras Conv2D takes.
So I was curious if anyone knows why is there a speed difference ?
(It took almost 10-15min for 1 epoch of MNIST Dataset for my convolution function. Whereas Keras does it in almost 3-4min)
Heres my Conv class :
class Convolutional2D(Layer):
def __init__(self, kernel_size, feature_maps):
self.kernel_size = kernel_size
self.feature_maps = feature_maps
self.b = np.zeros((feature_maps))#np.random.rand(feature_maps)
def connect(self, to_layer):
if len(to_layer.layer_shape) == 2:
kernel_shape = [self.feature_maps, self.kernel_size, self.kernel_size]
self.layer_shape = [self.feature_maps] + list(np.array(to_layer.layer_shape)-self.kernel_size+1)
else:
kernel_shape = [self.feature_maps, to_layer.layer_shape[0], self.kernel_size, self.kernel_size]
self.layer_shape = [self.feature_maps] + list(np.array(to_layer.layer_shape[1:])-self.kernel_size+1)
self.kernel = np.random.random(kernel_shape)
super().init_adam_params(self.kernel, self.b)
def convolve(self, x, k, mode='forward'):
if mode == 'forward':
ksize = k.shape[-1]
if len(x.shape) == 3:
out = np.zeros((x.shape[0], k.shape[0], x.shape[1]-k.shape[1]+1, x.shape[2]-k.shape[2]+1))
else:
out = np.zeros((x.shape[0], k.shape[0], x.shape[2]-k.shape[2]+1, x.shape[3]-k.shape[3]+1))
for i in range(out.shape[2]):
for j in range(out.shape[3]):
if len(x.shape) == 3:
window = x[:,i:i+ksize,j:j+ksize]
m = np.reshape(window, (window.shape[0], 1, window.shape[1], window.shape[2]))*k
m = np.sum(m, axis=(2,3))
else:
window = x[:,:,i:i+ksize,j:j+ksize]
m = np.reshape(window, (window.shape[0], 1, window.shape[1], window.shape[2], window.shape[3]))*k
m = np.sum(m, axis=(2,3,4))
out[:,:,i,j] = m
return out
elif mode == 'backward_i':
if len(k.shape) == 3:
out = np.zeros((x.shape[0], x.shape[2]+k.shape[1]-1, x.shape[3]+k.shape[2]-1))
x = np.pad(x, ((0, 0), (0, 0), (k.shape[1]-1, k.shape[1]-1), (k.shape[2]-1, k.shape[2]-1)))
else:
out = np.zeros((x.shape[0], k.shape[1], x.shape[2]+k.shape[2]-1, x.shape[3]+k.shape[3]-1))
x = np.pad(x, ((0, 0), (0, 0), (k.shape[2]-1, k.shape[2]-1), (k.shape[3]-1, k.shape[3]-1)))
fk = np.transpose(k, axes=(1,0,2,3))
x = np.reshape(x, (x.shape[0], 1, x.shape[1], x.shape[2], x.shape[3]))
ksize = k.shape[-1]
for i in range(out.shape[-2]):
for j in range(out.shape[-1]):
if len(k.shape) == 3:
window = x[:,:,i:i+ksize,j:j+ksize]
m = window*k
m = np.sum(m, axis=(1,2,3))
out[:,i,j] = m
else:
window = x[:,:,:,i:i+ksize,j:j+ksize]
m = window*fk
m = np.sum(m, axis=(2,3,4))
out[:,:,i,j] = m
return out
elif mode == 'backward_k':
if len(x.shape) == 3:
out = np.zeros((k.shape[1], x.shape[1]-k.shape[2]+1, x.shape[2]-k.shape[3]+1))
else:
out = np.zeros((k.shape[1], x.shape[1], x.shape[2]-k.shape[2]+1, x.shape[3]-k.shape[3]+1))
x = np.transpose(x, axes=(1,0,2,3))
x = np.reshape(x, (x.shape[0], x.shape[1], x.shape[2], x.shape[3]))
ksize = k.shape[-1]
k = np.transpose(k, axes=(1,0,2,3))
if len(x.shape) != 3:
fk = np.reshape(k, (k.shape[0], 1, k.shape[1], k.shape[2], k.shape[3]))
for i in range(out.shape[-2]):
for j in range(out.shape[-1]):
if len(x.shape) == 3:
window = x[:,i:i+ksize,j:j+ksize]
m = window*k
m = np.sum(m, axis=(1,2,3))
out[:,i,j] = m
else:
window = x[:,:,i:i+ksize,j:j+ksize]
m = window*fk
m = np.sum(m, axis=(2,3,4))
out[:,:,i,j] = m
return out
def forward(self, x):
return self.convolve(x, self.kernel)
def backward(self, x, loss_grad, params):
if len(self.kernel.shape) == 3:
flipped_kernel = np.flip(self.kernel, axis=(1,2))
flipped_loss_grad = np.flip(loss_grad, axis=(1,2))
else:
flipped_kernel = np.flip(self.kernel, axis=(2,3))
flipped_loss_grad = np.flip(loss_grad, axis=(2,3))
i_grad = self.convolve(loss_grad, flipped_kernel, mode='backward_i')
k_grad = self.convolve(x, flipped_loss_grad, mode='backward_k')
self.vw = params['beta1']*self.vw + (1-params['beta1'])*k_grad
self.sw = params['beta2']*self.sw + (1-params['beta2'])*(k_grad**2)
self.kernel += params['lr']*self.vw/np.sqrt(self.sw+params['eps'])
return i_grad
def get_save_data(self):
return {'type':'Convolutional2D', 'shape':np.array(self.layer_shape).tolist(), 'data':[self.kernel_size, self.feature_maps, self.kernel.tolist()]}
def load_saved_data(data):
obj = Convolutional2D(data['data'][0], data['data'][1])
obj.layer_shape = data['shape']
obj.kernel = np.array(data['data'][2])
obj.init_adam_params(obj.kernel, obj.b)
return obj
Keras and Pytorch are much more efficient because they take advantage of vectorization and the fact that matrix multiplication is very well optimized. They basically convert the convolution into a matrix multiplication by flattening the filter and creating a new matrix whose column values are the values of each block. They also take advantage of how the data is stored in memory. You can find more information in this article: https://towardsdatascience.com/how-are-convolutions-actually-performed-under-the-hood-226523ce7fbf

Is it possible to us a csv file to connect one part of the algo to another

I am trying to modify a triangular arbitrage crypto trading bot to include a predictive capability with a neural network. I've found some open source algorithms on GitHub, but I am having problem integrating them.
I've been trying to separate parts of the code into modules and using a continuously updated csv file to direct the data from the first half of the algorithm into the second, but it just isn't working.
I tried to create modules for different parts of the algorithm, but it didn't work:
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.layers import Bidirectional
from keras.models import Sequential
from binance.client import Client
from binance.enums import *
from sklearn.metrics import mean_squared_error
import time
import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime
from pandas import Series
from matplotlib import cm
api_key = BinanceKey1['api_key']
api_secret = BinanceKey1['api_secret']
client = Client(api_key, api_secret)
import csv
import json
from binance.client import Client
import csv
import json
from binance.client import Client
client = Client(api_key, api_secret)
client = Client("", "")
klines1 = client.get_historical_klines("BNBBTC", Client.KLINE_INTERVAL_1MINUTE,
"1 day ago UTC")
csv.write(klines1)
# fetch 30 minute klines for the last month of 2017
klines2 = client.get_historical_klines("ETHBTC", Client.KLINE_INTERVAL_30MINUTE,
"1 Dec, 2017", "1 Jan, 2018")
csv.write(klines2)
# fetch weekly klines since it listed
klines3 = client.get_historical_klines("NEOBTC", Client.KLINE_INTERVAL_1WEEK,
"1 Jan, 2017")
csv.write(klines3)
def load_data(klines, sequence_length):
raw_data = pd.read_csv(klines, dtype=float).values
for x in range(0, raw_data.shape[0]):
for y in range(0, raw_data.shape[1]):
if(raw_data[x][y] == 0):
raw_data[x][y] = raw_data[x-1][y]
data = raw_data.tolist()
result = []
for index in range(len(data) - sequence_length):
result.append(data[index: index + sequence_length])
d0 = np.array(result)
dr = np.zeros_like(d0)
dr[:, 1:, :] = d0[:, 1:, :] / d0[:, 0:1, :] - 1
start = 2400
end = int(dr.shape[0] + 1)
unnormalized_bases = d0[start:end, 0:1, 20]
split_line = round(0.9 * dr.shape[0])
training_data = dr[:int(split_line), :]
np.random.shuffle(training_data)
X_train = training_data[:, :-1]
Y_train = training_data[:, -1]
Y_train = Y_train[:, 20]
X_test = dr[int(split_line):, :-1]
Y_test = dr[int(split_line):, 49, :]
Y_test = Y_test[:, 20]
Y_daybefore = dr[int(split_line):, 48, :]
Y_daybefore = Y_daybefore[:, 20]
sequence_length = sequence_length
window_size = sequence_length - 1
return X_train, Y_train, X_test, Y_test, Y_daybefore, unnormalized_bases, window_size
def initialize_model(window_size, dropout_value, activation_function, loss_function, optimizer):
model = Sequential()
model.add(Bidirectional(LSTM(window_size, return_sequences=True), input_shape=(window_size, X_train.shape[-1]),))
model.add(Dropout(dropout_value))
model.add(Bidirectional(LSTM((window_size*2), return_sequences=True)))
model.add(Dropout(dropout_value))
model.add(Bidirectional(LSTM(window_size, return_sequences=False)))
model.add(Dense(units=1))
model.add(Activation(activation_function))
model.compile(loss=loss_function, optimizer=optimizer)
return model
def fit_model(model, X_train, Y_train, batch_num, num_epoch, val_split):
start = time.time()
model.fit(X_train, Y_train, batch_size= batch_num, nb_epoch=num_epoch, validation_split= val_split)
training_time = int(math.floor(time.time() - start))
return model, training_time
def test_model(model, X_test, Y_test, unnormalized_bases):
y_predict = model.predict(X_test)
real_y_test = np.zeros_like(Y_test)
real_y_predict = np.zeros_like(y_predict)
for i in range(Y_test.shape[0]):
y = Y_test[i]
predict = y_predict[i]
real_y_test[i] = (y+1)*unnormalized_bases[i]
real_y_predict[i] = (predict+1)*unnormalized_bases[i]
fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(111)
ax.set_title("Bitcoin Price Over Time")
plt.plot(real_y_predict, color = 'green', label = 'Predicted Price')
plt.plot(real_y_test, color = 'red', label = 'Real Price')
ax.set_ylabel("Price (USD)")
ax.set_xlabel("Time (Days)")
ax.legend()
return y_predict, real_y_test, real_y_predict, fig
def price_change(Y_daybefore, Y_test, y_predict):
Y_daybefore = np.reshape(Y_daybefore, (-1, 1))
Y_test = np.reshape(Y_test, (-1, 1))
delta_predict = (y_predict - Y_daybefore) / (1+Y_daybefore)
delta_real = (Y_test - Y_daybefore) / (1+Y_daybefore)
fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111)
ax.set_title("Percent Change in Bitcoin Price Per Day")
plt.plot(delta_predict, color='green', label = 'Predicted Percent Change')
plt.plot(delta_real, color='red', label = 'Real Percent Change')
plt.ylabel("Percent Change")
plt.xlabel("Time (Days)")
ax.legend()
plt.show()
return Y_daybefore, Y_test, delta_predict, delta_real, fig
def binary_price(delta_predict, delta_real):
delta_predict_1_0 = np.empty(delta_predict.shape)
delta_real_1_0 = np.empty(delta_real.shape)
for i in range(delta_predict.shape[0]):
if delta_predict[i][0] > 0:
delta_predict_1_0[i][0] = 1
else:
delta_predict_1_0[i][0] = 0
for i in range(delta_real.shape[0]):
if delta_real[i][0] > 0:
delta_real_1_0[i][0] = 1
else:
delta_real_1_0[i][0] = 0
return delta_predict_1_0, delta_real_1_0
def find_positives_negatives(delta_predict_1_0, delta_real_1_0):
true_pos = 0
false_pos = 0
true_neg = 0
false_neg = 0
for i in range(delta_real_1_0.shape[0]):
real = delta_real_1_0[i][0]
predicted = delta_predict_1_0[i][0]
if real == 1:
if predicted == 1:
true_pos += 1
else:
false_neg += 1
elif real == 0:
if predicted == 0:
true_neg += 1
else:
false_pos += 1
return true_pos, false_pos, true_neg, false_neg
def calculate_statistics(true_pos, false_pos, true_neg, false_neg, y_predict, Y_test):
precision = float(true_pos) / (true_pos + false_pos)
recall = float(true_pos) / (true_pos + false_neg)
F1 = float(2 * precision * recall) / (precision + recall)
MSE = mean_squared_error(y_predict.flatten(), Y_test.flatten())
return precision, recall, F1, MSE
X_train, Y_train, X_test, Y_test, Y_daybefore, unnormalized_bases, window_size = load_data("Bitcoin Data.csv", 50)
print (X_train.shape)
print (Y_train.shape)
print (X_test.shape)
print (Y_test.shape)
print (Y_daybefore.shape)
print (unnormalized_bases.shape)
print (window_size)
model = initialize_model(window_size, 0.2, 'linear', 'mse', 'adam')
print model.summary()
model, training_time = fit_model(model, X_train, Y_train, 1024, 100, .05)
print "Training time", training_time, "seconds"
y_predict, real_y_test, real_y_predict, fig1 = test_model(model, X_test, Y_test, unnormalized_bases)
plt.show(fig1)
Y_daybefore, Y_test, delta_predict, delta_real, fig2 = price_change(Y_daybefore, Y_test, y_predict)
plt.show(fig)
delta_predict_1_0, delta_real_1_0 = binary_price(delta_predict, delta_real)
print delta_predict_1_0.shape
print delta_real_1_0.shape
true_pos, false_pos, true_neg, false_neg = find_positives_negatives(delta_predict_1_0, delta_real_1_0)
print "True positives:", true_pos
print "False positives:", false_pos
print "True negatives:", true_neg
print "False negatives:", false_neg
precision, recall, F1, MSE = calculate_statistics(true_pos, false_pos, true_neg, false_neg, y_predict, Y_test)
print "Precision:", precision
print "Recall:", recall
print "F1 score:", F1
print "Mean Squared Error:", MSE
class Client(object):
API_URL = 'https://api.binance.com/api'
WITHDRAW_API_URL = 'https://api.binance.com/wapi'
WEBSITE_URL = 'https://www.binance.com'
PUBLIC_API_VERSION = 'v1'
PRIVATE_API_VERSION = 'v3'
WITHDRAW_API_VERSION = 'v3'
SYMBOL_TYPE_SPOT = 'SPOT'
ORDER_STATUS_NEW = 'NEW'
ORDER_STATUS_PARTIALLY_FILLED = 'PARTIALLY_FILLED'
ORDER_STATUS_FILLED = 'FILLED'
ORDER_STATUS_CANCELED = 'CANCELED'
ORDER_STATUS_PENDING_CANCEL = 'PENDING_CANCEL'
ORDER_STATUS_REJECTED = 'REJECTED'
ORDER_STATUS_EXPIRED = 'EXPIRED'
KLINE_INTERVAL_1MINUTE = '1m'
KLINE_INTERVAL_3MINUTE = '3m'
KLINE_INTERVAL_5MINUTE = '5m'
KLINE_INTERVAL_15MINUTE = '15m'
KLINE_INTERVAL_30MINUTE = '30m'
KLINE_INTERVAL_1HOUR = '1h'
KLINE_INTERVAL_2HOUR = '2h'
KLINE_INTERVAL_4HOUR = '4h'
KLINE_INTERVAL_6HOUR = '6h'
KLINE_INTERVAL_8HOUR = '8h'
KLINE_INTERVAL_12HOUR = '12h'
KLINE_INTERVAL_1DAY = '1d'
KLINE_INTERVAL_3DAY = '3d'
KLINE_INTERVAL_1WEEK = '1w'
KLINE_INTERVAL_1MONTH = '1M'
SIDE_BUY = 'BUY'
SIDE_SELL = 'SELL'
ORDER_TYPE_LIMIT = 'LIMIT'
ORDER_TYPE_MARKET = 'MARKET'
ORDER_TYPE_STOP_LOSS = 'STOP_LOSS'
ORDER_TYPE_STOP_LOSS_LIMIT = 'STOP_LOSS_LIMIT'
ORDER_TYPE_TAKE_PROFIT = 'TAKE_PROFIT'
ORDER_TYPE_TAKE_PROFIT_LIMIT = 'TAKE_PROFIT_LIMIT'
ORDER_TYPE_LIMIT_MAKER = 'LIMIT_MAKER'
TIME_IN_FORCE_GTC = 'GTC'
TIME_IN_FORCE_IOC = 'IOC'
TIME_IN_FORCE_FOK = 'FOK'
ORDER_RESP_TYPE_ACK = 'ACK'
ORDER_RESP_TYPE_RESULT = 'RESULT'
ORDER_RESP_TYPE_FULL = 'FULL'
AGG_ID = 'a'
AGG_PRICE = 'p'
AGG_QUANTITY = 'q'
AGG_FIRST_TRADE_ID = 'f'
AGG_LAST_TRADE_ID = 'l'
AGG_TIME = 'T'
AGG_BUYER_MAKES = 'm'
AGG_BEST_MATCH = 'M'
def run():
initialize_arb()
pass
def initialize_arb():
welcome_message = "\n\n---------------------------------------------------------\n\n"
welcome_message+= "Hello and Welcome to the Binance Arbitrage Crypto Trader Bot Python Script\nCreated 2018 by Joaquin Roibal (#BlockchainEng)"
welcome_message+= "A quick 'run-through' will be performed to introduce you to the functionality of this bot\n"
welcome_message+="To learn more visit medium.com/#BlockchainEng or watch introductory Youtube Videos"
welcome_message+="\nCopyright 2018 by Joaquin Roibal\n"
bot_start_time = str(datetime.now())
welcome_message+= "\nBot Start Time: {}\n\n\n".format(bot_start_time)
print(welcome_message)
data_log_to_file(welcome_message)
time.sleep(5)
try:
status = client.get_system_status()
list_of_symbols = ['ETHBTC', 'BNBETH', 'BNBBTC']
list_of_symbols2 = ['ETHUSDT', 'BNBETH', 'BNBUSDT']
list_of_symbols3 = ['BTCUSDT', 'BNBBTC', 'BNBUSDT']
list_of_arb_sym = [list_of_symbols, list_of_symbols2, list_of_symbols3]
tickers = client.get_orderbook_tickers()
portfolio=[]
with open('Portfolio.txt') as f1:
read_data = f1.readlines()
for line in read_data:
load_portfolio = line
load_portfolio = list(load_portfolio[1:-1].split(','))
i=0
for val in load_portfolio:
if i == 4:
portfolio.append(str(datetime.now()))
break
portfolio.append(float(val))
i+=1
portf_msg = "Starting Portfolio: " + str(portfolio)
print(portf_msg)
portf_file_save(portfolio)
data_log_to_file(portf_msg)
while 1:
calc_profit_list =[]
for arb_market in list_of_arb_sym:
calc_profit_list.append(arbitrage_bin(arb_market, tickers, portfolio, 1, 1))
for profit1 in calc_profit_list:
data_log_to_file(str(profit1))
print(calc_profit_list)
exp_profit = 0
m = n = 0
for exch_market in calc_profit_list:
if exch_market[4]>exp_profit:
exp_profit = exch_market[4]
m = n
n+=1
profit_message = "\nMost Profitable Market: {} \nExpected Profit: {}%".format(list_of_arb_sym[m], exp_profit)
print(profit_message)
data_log_to_file(profit_message)
time.sleep(5)
arb_list_data = []
arb_start_time = str(datetime.now())
for i in range(0,5):
arb_list_data.append(arbitrage_bin(list_of_arb_sym[m], tickers, portfolio, 1, 1, 'Yes'))
time.sleep(30)
arb_end_time = str(datetime.now())
viz_arb_data(arb_list_data, list_of_arb_sym[m], arb_start_time, arb_end_time)
except:
print("\nFAILURE INITIALIZE\n")
def data_log_to_file(message):
with open('CryptoTriArbBot_DataLog.txt', 'a+') as f:
f.write(message)
def portf_file_save(portfolio):
with open('Portfolio.txt', 'a+') as f:
f.write('\n'+str(portfolio))
def arbitrage_bin(list_of_sym, tickers, portfolio, cycle_num=10, cycle_time=30, place_order='No'):
arb_message = "Beginning Binance Arbitrage Function Data Collection - Running\n"
print(arb_message)
data_log_to_file(arb_message)
time.sleep(2)
fee_percentage = 0.05
for i in range(0,1):
"""
pairs = []
for sym in symbols:
for symbol in coins:
if symbol in sym:
pairs.append(sym)
print(pairs)
#From Coin 1 to Coin 2 - ETH/BTC - Bid
#From Coin 2 to Coin 3 - ETH/LTC - Ask
#From Coin 3 to Coin 1 - BTC/LTC - Bid
arb_list = ['ETH/BTC'] #, 'ETH/LTC', 'BTC/LTC']
#Find 'closed loop' of currency rate pairs
j=0
while 1:
if j == 1:
final = arb_list[0][-3:] + '/' + str(arb_list[1][-3:])
print(final)
#if final in symbols:
arb_list.append(final)
break
for sym in symbols:
if sym in arb_list:
pass
else:
if j % 2 == 0:
if arb_list[j][0:3] == sym[0:3]:
if arb_list[j] == sym:
pass
else:
arb_list.append(sym)
print(arb_list)
j+=1
break
if j % 2 == 1:
if arb_list[j][-3:] == sym[-3:]:
if arb_list[j] == sym:
pass
else:
arb_list.append(sym)
print(arb_list)
j+=1
break
"""
print("List of Arbitrage Symbols:", list_of_sym)
list_exch_rate_list = []
if 1:
for k in range(0,cycle_num):
i=0
exch_rate_list = []
data_collect_message1 = "Data Collection Cycle Number: "+str(k) +'\n'
print(data_collect_message1)
data_log_to_file(data_collect_message1)
for sym in list_of_sym:
currency_pair = "Currency Pair: "+str(sym)+"\n"
print(currency_pair)
data_log_to_file(currency_pair)
if sym in list_of_sym:
"""if i == 0: #For first in triangle
depth = client.get_order_book(symbol=sym)
exch_rate_list.append(float(depth['bids'][0][0]))
print(depth['bids'][0][0])
"""
if i % 2==0:
depth = client.get_order_book(symbol=sym)
inv1 = depth['asks'][0][0]
exch_rate_list.append(float(inv1))
Exch_rate1 = "Exchange Rate: {}".format(depth['asks'][0][0]) +'\n'
print(Exch_rate1)
data_log_to_file(Exch_rate1)
if i == 1:
depth = client.get_order_book(symbol=sym)
inv2 = round(1.0/float(depth['bids'][0][0]),6)
exch_rate_list.append(float(inv2))
Exch_rate2 = "Exchange Rate: {}".format(depth['bids'][0][0])+'\n'
print(Exch_rate2)
data_log_to_file(Exch_rate2)
i+=1
else:
exch_rate_list.append(0)
exch_rate_list.append(datetime.now())
rate1 = exch_rate_list[0]
buy_price = "Buy: {}\n".format(rate1)
print(buy_price)
data_log_to_file(buy_price)
rate2 = float(exch_rate_list[2])*float(exch_rate_list[1])
sell_price = "Sell: {}\n".format(rate2)
print(sell_price)
data_log_to_file(sell_price)
if float(rate1)<float(rate2):
arb_1_msg = "Arbitrage Possibility - "
arb_profit = round((float(rate2)-float(rate1))/float(rate2)*100,3)
arb_1_msg += "Potential Profit (Percentage): "+str(arb_profit) +'%\n'
print(arb_1_msg)
data_log_to_file(arb_1_msg)
exch_rate_list.append(arb_profit)
if place_order == 'Yes':
place_order_msg = "PLACING ORDER"
print(place_order_msg)
data_log_to_file(place_order_msg)
portfolio = tri_arb_paper(portfolio, list_of_sym, exch_rate_list)
portf_file_save(portfolio)
else:
arb_2_msg = "No Arbitrage Possibility"
print(arb_2_msg)
data_log_to_file(arb_2_msg)
exch_rate_list.append(0)
exch_msg = "Exchange Rate List: " +str(exch_rate_list)+'\n'
print(exch_msg)
data_log_to_file(exch_msg)
time.sleep(cycle_time)
print('\nARBITRAGE FUNCTIONALITY SUCCESSFUL - Data of Exchange Rates Collected\n')
return exch_rate_list
def tri_arb_paper(portfolio1, sym_list, list_exch_rates):
tri_arb_paper_msg = "\nSTARTING TRI ARB PAPER TRADING FUNCTION\n"
print(tri_arb_paper_msg)
time.sleep(10)
data_log_to_file(tri_arb_paper_msg)
if sym_list[0][-3:]=='BTC':
portf_pos = 0
elif sym_list[0][-3:]=='ETH':
portf_pos = 1
elif sym_list[0][-3:]=='SDT':
portf_pos = 2
elif sym_list[0][-3:]=='BNB':
portf_pos = 3
start_amount = float(portfolio1[portf_pos])
amt_coin2 = start_amount / float(list_exch_rates[0])
amt_coin3 = amt_coin2 * float(list_exch_rates[1])
final_amount = amt_coin3 * float(list_exch_rates[2])
tri_arb_paper_msg = "Starting Amount: "+str(sym_list[0][-3:])+" "+str(start_amount)+'\n'
tri_arb_paper_msg += "Amount Coin 2: "+str(sym_list[0][0:3])+" "+str(amt_coin2)+'\n'
tri_arb_paper_msg += "Amount Coin 3: "+str(sym_list[2][0:3])+" "+str(amt_coin3) +'\n'
tri_arb_paper_msg += "Final Amount: "+str(sym_list[0][-3:])+" "+str(final_amount)+'\n'
print(tri_arb_paper_msg)
data_log_to_file(tri_arb_paper_msg)
portfolio1[portf_pos] = final_amount
portfolio1[-1] = str(datetime.now())
return portfolio1
def viz_arb_data(list_exch_rate_list, arb_market, start_time, end_time):
viz_msg = "RUNNING ARBITRAGE VISUALIZATION FUNCTIONALITY"
print(viz_msg)
data_log_to_file(viz_msg)
rateA = []
rateB = []
rateB_fee = []
price1 = []
price2 = []
time_list = []
profit_list = []
for rate in list_exch_rate_list:
rateA.append(rate[0])
rateB1 = round(float(rate[1])*float(rate[2]),6)
rateB.append(rateB1)
price1.append(rate[1])
price2.append(rate[2])
profit_list.append(rate[4])
time_list.append(rate[3])
viz_msg2 = "Rate A: {} \n Rate B: {} \n Projected Profit (%): {} ".format(rateA, rateB, profit_list) #rateB_fee))
print(viz_msg2)
data_log_to_file(viz_msg2)
fig, host = plt.subplots()
fig.subplots_adjust(right=0.75)
par1 = host.twinx()
par2 = host.twinx()
par2.spines["right"].set_position(("axes", 1.2))
make_patch_spines_invisible(par2)
par2.spines["right"].set_visible(True)
p1, = host.plot(time_list, rateA, "k", label = "{}".format(arb_market[0]))
p1, = host.plot(time_list, rateB, "k+", label = "{} * {}".format(arb_market[1], arb_market[2]))
p2, = par1.plot(time_list, price1, "b-", label="Price - {}".format(arb_market[1]))
p3, = par2.plot(time_list, price2, "g-", label="Price - {}".format(arb_market[2]))
host.set_xlabel("Time")
host.set(title='Triangular Arbitrage - Exchange: {}\nStart Time: {}\n End Time: {}\n'
'Copyright (c) 2018 #BlockchainEng'.format('Binance', start_time, end_time))
host.set_ylabel("Exchange Rate")
par1.set_ylabel("Price - {}".format(arb_market[1]))
par2.set_ylabel("Price - {}".format(arb_market[2]))
host.yaxis.label.set_color(p1.get_color())
tkw = dict(size=4, width=1.5)
host.tick_params(axis='y', colors=p1.get_color(), **tkw)
par1.tick_params(axis='y', colors=p2.get_color(), **tkw)
par2.tick_params(axis='y', colors=p3.get_color(), **tkw)
host.tick_params(axis='x', **tkw)
lines = [p1, p2, p3]
host.legend(lines, [l.get_label() for l in lines])
fname = "Binance_Test.png"
plt.savefig(fname)
""", dpi=None, facecolor='w', edgecolor='w',
orientation='portrait', papertype=None, format=None,
transparent=False, bbox_inches=None, pad_inches=0.1,
frameon=None)"""
print_figure_message = "Data Collected Figure Printed & Saved - " + str(fname)
print(print_figure_message)
data_log_to_file(print_figure_message)
def make_patch_spines_invisible(ax):
ax.set_frame_on(True)
ax.patch.set_visible(False)
for sp in ax.spines.values():
sp.set_visible(False)
"""
def market_depth(sym, num_entries=20):
#Get market depth
#Retrieve and format market depth (order book) including time-stamp
i=0 #Used as a counter for number of entries
#print("Order Book: ", convert_time_binance(client.get_server_time()))
depth = client.get_order_book(symbol=sym)
print(depth)
print(depth['asks'][0])
ask_tot=0.0
ask_price =[]
ask_quantity = []
bid_price = []
bid_quantity = []
bid_tot = 0.0
place_order_ask_price = 0
place_order_bid_price = 0
max_order_ask = 0
max_order_bid = 0
print("\n", sym, "\nDepth ASKS:\n")
print("Price Amount")
for ask in depth['asks']:
if i<num_entries:
if float(ask[1])>float(max_order_ask):
#Determine Price to place ask order based on highest volume
max_order_ask=ask[1]
place_order_ask_price=round(float(ask[0]),5)-0.0001
#ask_list.append([ask[0], ask[1]])
ask_price.append(float(ask[0]))
ask_tot+=float(ask[1])
ask_quantity.append(ask_tot)
#print(ask)
i+=1
j=0 #Secondary Counter for Bids
print("\n", sym, "\nDepth BIDS:\n")
print("Price Amount")
for bid in depth['bids']:
if j<num_entries:
if float(bid[1])>float(max_order_bid):
#Determine Price to place ask order based on highest volume
max_order_bid=bid[1]
place_order_bid_price=round(float(bid[0]),5)+0.0001
bid_price.append(float(bid[0]))
bid_tot += float(bid[1])
bid_quantity.append(bid_tot)
#print(bid)
j+=1
return ask_price, ask_quantity, bid_price, bid_quantity, place_order_ask_price, place_order_bid_price
#Plot Data
"""
if __name__ == "__main__":
run()
Ideally, the code is supposed to find arbitrage opportunities in predicted price changes and execute orders accordingly.

why do I get Partition index must be integer in KNeighborsclassifier?

I am trying to use sciklearn to find the goodness of a KNeighborsClassifier on my data.
My code is below (X is a matrix with NUM_MATCHES rows and NUM_FEATURES columns, Y is a column vector with NUM_MATCHES rows). I keep getting the error
TypeError: Partition index must be integer
on this line of the code below
rad_prob = estimator.predict_proba(np.reshape(radiant_query,(1,-1)))[0][1]
I am new to sciklearn not sure what the issue is.
from sklearn.neighbors import KNeighborsClassifier
from sklearn import cross_validation
import numpy as np
K=2
FOLDS_FINISHED=0
NUM_HEROES = 78
NUM_FEATURES = NUM_HEROES*2
def score(estimator, X, y):
global FOLDS_FINISHED
correct_predictions = 0
for i, radiant_query in enumerate(X):
dire_query = np.concatenate((radiant_query[NUM_HEROES:NUM_FEATURES], radiant_query[0:NUM_HEROES]))
rad_prob = estimator.predict_proba(np.reshape(radiant_query,(1,-1)))[0][1]
dire_prob = estimator.predict_proba(np.reshape(dire_query,(1,-1)))[0][0]
overall_prob = (rad_prob + dire_prob) / 2
prediction = 1 if (overall_prob > 0.5) else -1
result = 1 if prediction == y[i] else 0
correct_predictions += result
FOLDS_FINISHED += 1
accuracy = float(correct_predictions) / len(X)
print ('Accuracy: %f' % accuracy)
return accuracy
preprocessed = np.load('train_9000.npz')
X = preprocessed['X']
Y = preprocessed['Y']
NUM_MATCHES = 3000
X = X[0:NUM_MATCHES]
Y = Y[0:NUM_MATCHES]
k_fold = cross_validation.KFold(n=NUM_MATCHES, n_folds=K, shuffle=True)
d_tries = [3, 4, 5]
d_accuracy_pairs = []
for d_index, d in enumerate(d_tries):
model = KNeighborsClassifier(n_neighbors=NUM_MATCHES/K,metric=my_distance,weights=poly_param(d))
model_accuracies = cross_validation.cross_val_score(model, X, Y, scoring=score, cv=k_fold)
model_accuracy = model_accuracies.mean()
d_accuracy_pairs.append((d, model_accuracy))

Resources