Create a Multiprocess script from a system consisting of multiple files - python-3.x
Hi guys im not too into python but need to do some research. The problem mainly consists of a file that calculates a large number of non linear equations which takes quite some time. The idea is to implement Multiprocessing in some way. I was wondering if there is a "correct" way to do this, since the main file calls the "computational" script, should i focus on the main or the computational file for multiprocessing? There are more files involved but this should be a start.
Main file:
import numpy as np
import pandas as pd
from properties_basic import *
import sys
sys.path.append('../physics_sup/')
import os, glob, shutil
for filename in glob.glob("obl_point_data_*"):
os.remove(filename)
for filename in glob.glob("restart.pkl"):
os.remove(filename)
for filename in glob.glob("data.pvd"):
os.remove(filename)
for filename in glob.glob("__pycache__/conversio*"):
os.remove(filename)
for filename in glob.glob("__pycache__/mode*"):
os.remove(filename)
for filename in glob.glob("__pycache__/operato*"):
os.remove(filename)
for filename in glob.glob("__pycache__/physi*"):
os.remove(filename)
for filename in glob.glob("__pycache__/prope*"):
os.remove(filename)
from model_benchmark import Model
from darts.engines import value_vector, redirect_darts_output
import matplotlib.pyplot as plt
grid_1D = False
redirect_darts_output('run.log')
n = Model()
n.init()
injectionsrate = np.genfromtxt('injectionrate.txt')[0:].astype(float) #np.genfromtxt('InjectionMonthly.txt')[0:].astype(float)#
injectionsrate = injectionsrate / 20
#mu_w = CP.PropsSI('V', 'T', 22, 'P|liquid', bar2pa(130), 'Water') * 1000
#n.property_container.viscosity_ev = dict([('wat', ViscosityConst(mu_w))])
NT = 16 # 16
runtime = 50 # 365
#increase = np.repeat(0.000005,37)
#print(increase)
for i in range(NT):
n.inj_rate = injectionsrate[i]
n.injection_temperature = 273.15 + 22
n.set_boundary_conditions(injectionrate=injectionsrate[i], tempinj=273.15+22)
#n.property_container.kinetic_rate_ev = kinetic_advanced(comp_min=1e-11, rsa=int(2e-05 + increase[NT]))
n.run_python(runtime)
time_data = pd.DataFrame.from_dict(n.physics.engine.time_data)
time_data.to_pickle("darts_time_data.pkl")
writer = pd.ExcelWriter('time_data.xlsx')
time_data.to_excel(writer, 'Sheet1')
writer.save()
writer.close()
n.export_vtk()
n.save_restart_data()
n.load_restart_data()
injectionsrate2 = np.genfromtxt('injectionrate.txt')[15:].astype(float) #np.genfromtxt('InjectionMonthly.txt')[191:].astype(float)#
injectionsrate2 = injectionsrate2 / 20 #*2
#mu_w2 = CP.PropsSI('V', 'T', 10, 'P|liquid', bar2pa(130), 'Water') * 1000
#n.property_container.viscosity_ev = dict([('wat', ViscosityConst(1.3))])
n.property_container.kinetic_rate_ev = kinetic_advanced(comp_min=1e-11, rsa=2e-03)
days = 200
NT2 = 21 #21 # 252
runtime2 = 50 # 30
for i in range(NT2):
n.inj_rate = injectionsrate2[i]
n.injection_temperature = 273.15 + 10
n.set_boundary_conditions(injectionrate=injectionsrate2[i], tempinj=273.15 + 10)
n.run_python(runtime2)
time_data2 = pd.DataFrame.from_dict(n.physics.engine.time_data)
time_data2.to_pickle("darts_time_data2.pkl")
writer = pd.ExcelWriter('time_data2.xlsx')
time_data2.to_excel(writer, 'Sheet1')
writer.save()
writer.close()
n.export_vtk()
n.print_timers()
n.print_stat()
import darts.tools.plot_darts
from darts.tools.plot_darts import *
p_w = 'I1'
#ax = plot_water_rate_darts(p_w, time_data)
time_dataInjection = pd.read_pickle("darts_time_data.pkl")
time_dataInjection2= pd.read_pickle("darts_time_data2.pkl")
#ax = darts.tools.plot_darts.plot_water_rate_darts(p_w, time_dataInjection)
ax2 = darts.tools.plot_darts.plot_water_rate_darts(p_w, time_dataInjection2)
p_w2 = 'P1'
#ax3 = darts.tools.plot_darts.plot_water_rate_darts(p_w2, time_dataInjection)
ax4 = darts.tools.plot_darts.plot_water_rate_darts(p_w2, time_dataInjection2)
ax5 = darts.tools.plot_darts.plot_bhp_darts(p_w, time_dataInjection2)
plt.show()
The Non linear calculator:
from math import fabs
import pickle
import os
import numpy as np
from darts.engines import *
from darts.engines import print_build_info as engines_pbi
from darts.physics import print_build_info as physics_pbi
from darts.print_build_info import print_build_info as package_pbi
class DartsModel:
def __init__(self):
# print out build information
engines_pbi()
physics_pbi()
package_pbi()
self.timer = timer_node() # Create time_node object for time record
self.timer.start() # Start time record
self.timer.node["simulation"] = timer_node() # Create timer.node called "simulation" to record simulation time
self.timer.node["newton update"] = timer_node()
self.timer.node[
"initialization"] = timer_node() # Create timer.node called "initialization" to record initialization time
self.timer.node["initialization"].start() # Start recording "initialization" time
self.params = sim_params() # Create sim_params object to set simulation parameters
self.timer.node["initialization"].stop() # Stop recording "initialization" time
def init(self):
self.reservoir.init_wells()
self.physics.init_wells(self.reservoir.wells)
self.set_initial_conditions()
self.set_boundary_conditions()
self.set_op_list()
self.reset()
def reset(self)
self.physics.engine.init(self.reservoir.mesh, ms_well_vector(self.reservoir.wells),
op_vector(self.op_list),
self.params, self.timer.node["simulation"])
def set_initial_conditions(self):
pass
def set_boundary_conditions(self):
pass
def set_op_list(self):
self.op_list = [self.physics.acc_flux_itor]
def run(self, days=0):
if days:
runtime = days
else:
runtime = self.runtime
self.physics.engine.run(runtime)
def run_python(self, days=0, restart_dt=0, log_3d_body_path=0, timestep_python=False):
if days:
runtime = days
else:
runtime = self.runtime
mult_dt = self.params.mult_ts
max_dt = self.params.max_ts
self.e = self.physics.engine
t = self.e.t
if fabs(t) < 1e-15:
dt = self.params.first_ts
elif restart_dt > 0:
dt = restart_dt
else:
dt = self.params.max_ts
runtime += t
ts = 0
if log_3d_body_path and self.physics.n_vars == 3:
self.body_path_start()
while t < runtime:
if timestep_python:
converged = self.e.run_timestep(dt, t)
else:
converged = self.run_timestep_python(dt, t)
if converged:
t += dt
ts = ts + 1
print("# %d \tT = %3g\tDT = %2g\tNI = %d\tLI=%d"
% (ts, t, dt, self.e.n_newton_last_dt, self.e.n_linear_last_dt))
dt *= mult_dt
if dt > max_dt:
dt = max_dt
if t + dt > runtime:
dt = runtime - t
if log_3d_body_path and self.physics.n_vars == 3:
self.body_path_add_bodys(t)
nb_begin = self.reservoir.nx * self.reservoir.ny * (self.body_path_map_layer - 1) * 3
nb_end = self.reservoir.nx * self.reservoir.ny * (self.body_path_map_layer) * 3
self.save_matlab_map(self.body_path_axes[0] + '_ts_' + str(ts), self.e.X[nb_begin:nb_end:3])
self.save_matlab_map(self.body_path_axes[1] + '_ts_' + str(ts), self.e.X[nb_begin + 1:nb_end:3])
self.save_matlab_map(self.body_path_axes[2] + '_ts_' + str(ts), self.e.X[nb_begin + 2:nb_end:3])
else:
dt /= mult_dt
print("Cut timestep to %2.3f" % dt)
if dt < 1e-8:
break
self.e.t = runtime
print("TS = %d(%d), NI = %d(%d), LI = %d(%d)" % (self.e.stat.n_timesteps_total, self.e.stat.n_timesteps_wasted,
self.e.stat.n_newton_total, self.e.stat.n_newton_wasted,
self.e.stat.n_linear_total, self.e.stat.n_linear_wasted))
def load_restart_data(self, filename='restart.pkl'):
if os.path.exists(filename):
with open(filename, "rb") as fp:
data = pickle.load(fp)
days, X, arr_n = data
self.physics.engine.t = days
self.physics.engine.X = value_vector(X)
self.physics.engine.Xn = value_vector(X)
self.physics.engine.op_vals_arr_n = value_vector(arr_n)
def save_restart_data(self, filename='restart.pkl'):
"""
Function to save the simulation data for restart usage.
:param filename: Name of the file where restart_data stores.
"""
t = np.copy(self.physics.engine.t)
X = np.copy(self.physics.engine.X)
arr_n = np.copy(self.physics.engine.op_vals_arr_n)
data = [t, X, arr_n]
with open(filename, "wb") as fp:
pickle.dump(data, fp, 4)
def check_performance(self, overwrite=0, diff_norm_normalized_tol=1e-10, diff_abs_max_normalized_tol=1e-7,
rel_diff_tol=1, perf_file=''):
fail = 0
data_et = self.load_performance_data(perf_file)
if data_et and not overwrite:
data = self.get_performance_data()
nb = self.reservoir.mesh.n_res_blocks
nv = self.physics.n_vars
for v in range(nv):
sol_et = data_et['solution'][v:nb * nv:nv]
diff = data['solution'][v:nb * nv:nv] - sol_et
sol_range = np.max(sol_et) - np.min(sol_et)
diff_abs = np.abs(diff)
diff_norm = np.linalg.norm(diff)
diff_norm_normalized = diff_norm / len(sol_et) / sol_range
diff_abs_max_normalized = np.max(diff_abs) / sol_range
if diff_norm_normalized > diff_norm_normalized_tol or diff_abs_max_normalized > diff_abs_max_normalized_tol:
fail += 1
print(
'#%d solution check failed for variable %s (range %f): L2(diff)/len(diff)/range = %.2E (tol %.2E), max(abs(diff))/range %.2E (tol %.2E), max(abs(diff)) = %.2E' \
% (fail, self.physics.vars[v], sol_range, diff_norm_normalized, diff_norm_normalized_tol,
diff_abs_max_normalized, diff_abs_max_normalized_tol, np.max(diff_abs)))
for key, value in sorted(data.items()):
if key == 'solution' or type(value) != int:
continue
reference = data_et[key]
if reference == 0:
if value != 0:
print('#%d parameter %s is %d (was 0)' % (fail, key, value))
fail += 1
else:
rel_diff = (value - data_et[key]) / reference * 100
if abs(rel_diff) > rel_diff_tol:
print('#%d parameter %s is %d (was %d, %+.2f%%)' % (fail, key, value, reference, rel_diff))
fail += 1
if not fail:
print('OK, \t%.2f s' % self.timer.node['simulation'].get_timer())
return 0
else:
print('FAIL, \t%.2f s' % self.timer.node['simulation'].get_timer())
return 1
else:
self.save_performance_data(perf_file)
print('SAVED')
return 0
def get_performance_data(self):
perf_data = dict()
perf_data['solution'] = np.copy(self.physics.engine.X)
perf_data['reservoir blocks'] = self.reservoir.mesh.n_res_blocks
perf_data['variables'] = self.physics.n_vars
perf_data['OBL resolution'] = self.physics.n_points
perf_data['operators'] = self.physics.n_ops
perf_data['timesteps'] = self.physics.engine.stat.n_timesteps_total
perf_data['wasted timesteps'] = self.physics.engine.stat.n_timesteps_wasted
perf_data['newton iterations'] = self.physics.engine.stat.n_newton_total
perf_data['wasted newton iterations'] = self.physics.engine.stat.n_newton_wasted
perf_data['linear iterations'] = self.physics.engine.stat.n_linear_total
perf_data['wasted linear iterations'] = self.physics.engine.stat.n_linear_wasted
sim = self.timer.node['simulation']
jac = sim.node['jacobian assembly']
perf_data['simulation time'] = sim.get_timer()
perf_data['linearization time'] = jac.get_timer()
perf_data['linear solver time'] = sim.node['linear solver solve'].get_timer() + sim.node[
'linear solver setup'].get_timer()
interp = jac.node['interpolation']
perf_data['interpolation incl. generation time'] = interp.get_timer()
return perf_data
def save_performance_data(self, file_name=''):
import platform
if file_name == '':
file_name = 'perf_' + platform.system().lower()[:3] + '.pkl'
data = self.get_performance_data()
with open(file_name, "wb") as fp:
pickle.dump(data, fp, 4)
#staticmethod
def load_performance_data(file_name=''):
import platform
if file_name == '':
file_name = 'perf_' + platform.system().lower()[:3] + '.pkl'
if os.path.exists(file_name):
with open(file_name, "rb") as fp:
return pickle.load(fp)
return 0
def print_timers(self):
print(self.timer.print("", ""))
def print_stat(self):
self.physics.engine.print_stat()
def plot_layer_map(self, map_data, k, name, transpose=0):
import plotly
import plotly.graph_objs as go
nxny = self.reservoir.nx * self.reservoir.ny
layer_indexes = np.arange(nxny * (k - 1), nxny * k)
layer_data = np.zeros(nxny)
# for correct vizualization of inactive cells
layer_data.fill(np.nan)
active_mask = np.where(self.reservoir.discretizer.global_to_local[layer_indexes] > -1)
layer_data[active_mask] = map_data[self.reservoir.discretizer.global_to_local[layer_indexes][active_mask]]
layer_data = layer_data.reshape(self.reservoir.ny, self.reservoir.nx)
if transpose:
layer_data = layer_data.transpose()
y_axis = dict(scaleratio=1, scaleanchor='x', title='X, block')
x_axis = dict(title='Y, block')
else:
x_axis = dict(scaleratio=1, scaleanchor='x', title='X, block')
y_axis = dict(title='Y, block')
data = [go.Heatmap(
z=layer_data)]
layout = go.Layout(title='%s, layer %d' % (name, k),
xaxis=x_axis,
yaxis=y_axis)
fig = go.Figure(data=data, layout=layout)
plotly.offline.plot(fig, filename='%s_%d_map.html' % (name, k))
def plot_layer_map_offline(self, map_data, k, name, transpose=0):
import plotly
plotly.offline.init_notebook_mode()
self.plot_layer_map(map_data, k, name, transpose)
def plot_layer_surface(self, map_data, k, name, transpose=0):
import plotly
import plotly.graph_objs as go
nxny = self.reservoir.nx * self.reservoir.ny
layer_indexes = np.arange(nxny * (k - 1), nxny * k)
layer_data = np.zeros(nxny)
# for correct vizualization of inactive cells
layer_data.fill(np.nan)
active_mask = np.where(self.reservoir.discretizer.global_to_local[layer_indexes] > -1)
layer_data[active_mask] = map_data[self.reservoir.discretizer.global_to_local[layer_indexes][active_mask]]
layer_data = layer_data.reshape(self.reservoir.ny, self.reservoir.nx)
if transpose:
layer_data = layer_data.transpose()
data = [go.Surface(z=layer_data)]
plotly.offline.plot(data, filename='%s_%d_surf.html' % (name, k))
def plot_geothermal_temp_layer_map(self, X, k, name, transpose=0):
import plotly
import plotly.graph_objs as go
import numpy as np
from darts.models.physics.iapws.iapws_property import iapws_temperature_evaluator
nxny = self.reservoir.nx * self.reservoir.ny
temperature = iapws_temperature_evaluator()
layer_pres_data = np.zeros(nxny)
layer_enth_data = np.zeros(nxny)
layer_indexes = np.arange(nxny * (k - 1), nxny * k)
active_mask = np.where(self.reservoir.discretizer.global_to_local[layer_indexes] > -1)
layer_pres_data[active_mask] = X[2 * self.reservoir.discretizer.global_to_local[layer_indexes][active_mask]]
layer_enth_data[active_mask] = X[2 * self.reservoir.discretizer.global_to_local[layer_indexes][active_mask] + 1]
# used_data = map_data[2 * nxny * (k-1): 2 * nxny * k]
T = np.zeros(nxny)
T.fill(np.nan)
for i in range(0, nxny):
if self.reservoir.discretizer.global_to_local[nxny * (k - 1) + i] > -1:
T[i] = temperature.evaluate([layer_pres_data[i], layer_enth_data[i]])
layer_data = T.reshape(self.reservoir.ny, self.reservoir.nx)
if transpose:
layer_data = layer_data.transpose()
y_axis = dict(scaleratio=1, scaleanchor='x', title='X, block')
x_axis = dict(title='Y, block')
else:
x_axis = dict(scaleratio=1, scaleanchor='x', title='X, block')
y_axis = dict(title='Y, block')
data = [go.Heatmap(
z=layer_data)]
layout = go.Layout(title='%s, layer %d' % (name, k),
xaxis=x_axis,
yaxis=y_axis)
fig = go.Figure(data=data, layout=layout)
plotly.offline.plot(fig, filename='%s_%d_map.html' % (name, k))
def plot_1d(self, map_data, name):
import plotly
import plotly.graph_objs as go
import numpy as np
nx = self.reservoir.nx
data = [go.Scatter(x=np.linspace(0, 1, nx), y=map_data[1:nx])]
plotly.offline.plot(data, filename='%s_surf.html' % name)
def plot_1d_all(self, map_data):
import plotly
import plotly.graph_objs as go
import numpy as np
nx = self.reservoir.nx
nc = self.physics.n_components
data = []
for i in range(nc - 1):
data.append(go.Scatter(x=np.linspace(0, 1, nx), y=map_data[i + 1::nc][1:nx], dash='dash'))
plotly.offline.plot(data, filename='Compositions.html')
def plot_cumulative_totals_mass(self):
import plotly.offline as po
import plotly.graph_objs as go
import numpy as np
import pandas as pd
nc = self.physics.n_components
darts_df = pd.DataFrame(self.physics.engine.time_data)
total_df = pd.DataFrame()
total_df['time'] = darts_df['time']
time_diff = darts_df['time'].diff()
time_diff[0] = darts_df['time'][0]
for c in range(nc):
total_df['Total injection c %d' % c] = 0
total_df['Total production c %d' % c] = 0
search_str = ' : c %d rate (Kmol/day)' % c
for col in darts_df.columns:
if search_str in col:
inj_mass = darts_df[col] * time_diff
prod_mass = darts_df[col] * time_diff
# assuming that any well can inject and produce over the whole time
inj_mass[inj_mass < 0] = 0
prod_mass[prod_mass > 0] = 0
total_df['Total injection c %d' % c] += inj_mass
total_df['Total production c %d' % c] -= prod_mass
data = []
for c in range(nc):
data.append(go.Scatter(x=total_df['time'], y=total_df['Total injection c %d' % c].cumsum(),
name='%s injection' % self.physics.components[c]))
data.append(go.Scatter(x=total_df['time'], y=total_df['Total production c %d' % c].cumsum(),
name='%s production' % self.physics.components[c]))
layout = go.Layout(title='Cumulative total masses (kmol)', xaxis=dict(title='Time (days)'),
yaxis=dict(title='Mass (kmols)'))
fig = go.Figure(data=data, layout=layout)
po.plot(fig, filename='Cumulative_totals_mass.html')
def plot_mass_balance_error(self):
import plotly.offline as po
import plotly.graph_objs as go
import numpy as np
import pandas as pd
nc = self.physics.n_components
darts_df = pd.DataFrame(self.physics.engine.time_data)
total_df = pd.DataFrame()
total_df['time'] = darts_df['time']
time_diff = darts_df['time'].diff()
time_diff[0] = darts_df['time'][0]
for c in range(nc):
total_df['Total source-sink c %d' % c] = 0
search_str = ' : c %d rate (Kmol/day)' % c
for col in darts_df.columns:
if search_str in col:
mass = darts_df[col] * time_diff
total_df['Total source-sink c %d' % c] += mass
data = []
for c in range(nc):
total_df['Total mass balance error c %d' % c] = darts_df['FIPS c %d (kmol)' % c] - total_df[
'Total source-sink c %d' % c].cumsum()
total_df['Total mass balance error c %d' % c] -= darts_df['FIPS c %d (kmol)' % c][0] - \
total_df['Total source-sink c %d' % c][0]
data.append(go.Scatter(x=total_df['time'], y=total_df['Total mass balance error c %d' % c],
name='%s' % self.physics.components[c]))
layout = go.Layout(title='Mass balance error (kmol)', xaxis=dict(title='Time (days)'),
yaxis=dict(title='Mass (kmols)'))
fig = go.Figure(data=data, layout=layout)
po.plot(fig, filename='Mass_balance_error.html')
def plot_FIPS(self):
import plotly.offline as po
import plotly.graph_objs as go
import numpy as np
import pandas as pd
nc = self.physics.n_components
darts_df = pd.DataFrame(self.physics.engine.time_data)
data = []
for c in range(nc):
data.append(go.Scatter(x=darts_df['time'], y=darts_df['FIPS c %d (kmol)' % c],
name='%s' % self.physics.components[c]))
layout = go.Layout(title='FIPS (kmol)', xaxis=dict(title='Time (days)'),
yaxis=dict(title='Mass (kmols)'))
fig = go.Figure(data=data, layout=layout)
po.plot(fig, filename='FIPS.html')
def plot_totals_mass(self):
import plotly.offline as po
import plotly.graph_objs as go
import numpy as np
import pandas as pd
nc = self.physics.n_components
darts_df = pd.DataFrame(self.physics.engine.time_data)
total_df = pd.DataFrame()
total_df['time'] = darts_df['time']
for c in range(nc):
total_df['Total injection c %d' % c] = 0
total_df['Total production c %d' % c] = 0
search_str = ' : c %d rate (Kmol/day)' % c
for col in darts_df.columns:
if search_str in col:
inj_mass = darts_df[col].copy()
prod_mass = darts_df[col].copy()
# assuming that any well can inject and produce over the whole time
inj_mass[inj_mass < 0] = 0
prod_mass[prod_mass > 0] = 0
total_df['Total injection c %d' % c] += inj_mass
total_df['Total production c %d' % c] -= prod_mass
data = []
for c in range(nc):
data.append(go.Scatter(x=total_df['time'], y=total_df['Total injection c %d' % c],
name='%s injection' % self.physics.components[c]))
data.append(go.Scatter(x=total_df['time'], y=total_df['Total production c %d' % c],
name='%s production' % self.physics.components[c]))
layout = go.Layout(title='Total mass rates (kmols/day)', xaxis=dict(title='Time (days)'),
yaxis=dict(title='Rate (kmols/day)'))
fig = go.Figure(data=data, layout=layout)
po.plot(fig, filename='Totals_mass_rates.html')
def plot_1d_compare(self, map_data1, map_data2):
import plotly
import plotly.graph_objs as go
import numpy as np
nx = self.reservoir.nx
nc = self.physics.n_components
data = []
for i in range(nc - 1):
data.append(go.Scatter(x=np.linspace(0, 1, nx), y=map_data1[i + 1::nc][1:nx],
name="Comp = %d, dt = 5 days" % (i + 1)))
for i in range(nc - 1):
data.append(go.Scatter(x=np.linspace(0, 1, nx), y=map_data2[i + 1::nc][1:nx],
name="Comp = %d, dt = 50 days" % (i + 1), line=dict(dash='dot')))
plotly.offline.plot(data, filename='Compositions.html')
def body_path_start(self):
with open('body_path.txt', "w") as fp:
itor = self.physics.acc_flux_itor
self.processed_body_idxs = set()
for i, p in enumerate(itor.axis_points):
fp.write('%d %lf %lf %s\n' % (p, itor.axis_min[i], itor.axis_max[i], self.body_path_axes[i]))
fp.write('Body Index Data\n')
def body_path_add_bodys(self, time):
with open('body_path.txt', "a") as fp:
fp.write('T=%lf\n' % time)
itor = self.physics.acc_flux_itor
all_idxs = set(itor.body_data.keys())
new_idxs = all_idxs - self.processed_body_idxs
for i in new_idxs:
fp.write('%d\n' % i)
self.processed_body_idxs = all_idxs
def save_matlab_map(self, name, np_arr):
import scipy.io
scipy.io.savemat(name + '.mat', dict(x=np_arr))
def export_vtk(self, file_name='data', local_cell_data={}, global_cell_data={}, vars_data_dtype=np.float32,
export_grid_data=True):
# get current engine time
t = self.physics.engine.t
nb = self.reservoir.mesh.n_res_blocks
nv = self.physics.n_vars
X = np.array(self.physics.engine.X, copy=False)
for v in range(nv):
local_cell_data[self.physics.vars[v]] = X[v:nb * nv:nv].astype(vars_data_dtype)
self.reservoir.export_vtk(file_name, t, local_cell_data, global_cell_data, export_grid_data)
# destructor to force to destroy all created C objects and free memory
def __del__(self):
for name in list(vars(self).keys()):
delattr(self, name)
def run_timestep_python(self, dt, t):
max_newt = self.params.max_i_newton
max_residual = np.zeros(max_newt + 1)
self.e.n_linear_last_dt = 0
well_tolerance_coefficient = 1e2
self.timer.node['simulation'].start()
for i in range(max_newt+1):
self.e.run_single_newton_iteration(dt)
self.e.newton_residual_last_dt = self.e.calc_newton_residual()
max_residual[i] = self.e.newton_residual_last_dt
counter = 0
for j in range(i):
if abs(max_residual[i] - max_residual[j])/max_residual[i] < 1e-3:
counter += 1
if counter > 2:
print("Stationary point detected!")
break
self.e.well_residual_last_dt = self.e.calc_well_residual()
self.e.n_newton_last_dt = i
# check tolerance if it converges
if ((self.e.newton_residual_last_dt < self.params.tolerance_newton and self.e.well_residual_last_dt < well_tolerance_coefficient * self.params.tolerance_newton )
or self.e.n_newton_last_dt == self.params.max_i_newton):
if (i > 0): # min_i_newton
break
r_code = self.e.solve_linear_equation()
self.timer.node["newton update"].start()
self.e.apply_newton_update(dt)
self.timer.node["newton update"].stop()
# End of newton loop
converged = self.e.post_newtonloop(dt, t)
self.timer.node['simulation'].stop()
return converged
Related
keras BatchGenerator(keras.utils.Sequence) is too slow
I'm using a custom batch generator with large dataframe. but the Generator takes too much time to generate a batch, it takes 127s to generate a batch of 1024. I've tried Dask but still, the processing is slow. is there any way to integrate multiprocessing with inside the generator. knowing that I've tried use_multiprocessing=True with workers=12 import keras from random import randint import glob import warnings import numpy as np import math import pandas as pd import dask.dataframe as dd class BatchGenerator(keras.utils.Sequence): 'Generates data for Keras' def __init__(self, labels=None, batch_size=8, n_classes=4, shuffle=True, seq_len=6, data_path=None, meta_path=None,list_IDs=None): 'Initialization' self.batch_size = batch_size self.labels = labels self.n_classes = n_classes self.shuffle = shuffle self.seq_len = seq_len self.meta_df = meta_path self.data_df = data_path self.data_df = self.data_df.astype({"mjd": int}) self.list_IDs = list_IDs if self.list_IDs==None: self.list_IDs = list(self.meta_df['object_id'].unique()) self.on_epoch_end() def __len__(self): 'Denotes the number of batches per epoch' return int(np.floor(len(self.list_IDs) / self.batch_size)) def __getitem__(self, index): 'Generate one batch of data' # Generate indexes of the batch indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size] # Find list of IDs list_IDs_temp = [self.list_IDs[k] for k in indexes] # Generate data X, y = self.__data_generation(list_IDs_temp) return X, y def on_epoch_end(self): 'Updates indexes after each epoch' self.indexes = np.arange(len(self.list_IDs)) if self.shuffle == True: np.random.shuffle(self.indexes) def __data_generation(self, list_IDs_temp): X_dat = np.zeros((self.batch_size, self.seq_len,6,1)) Y_mask = np.zeros((self.batch_size, self.seq_len,6,1)) # Y_dat = np.empty((self.batch_size,1), dtype=int) X_length= np.empty((self.batch_size,1), dtype=int) for i, trans_id in enumerate(list_IDs_temp): curve = self.data_df[self.data_df.object_id==trans_id] mjdlist = list(curve['mjd'].unique()) ts_length = len(mjdlist) if ts_length <= self.seq_len : start_ind = 0 else : start_ind = randint(0, ts_length - self.seq_len) ts_length = self.seq_len for j in range(ts_length): if j+start_ind < len(mjdlist): step = curve[curve.mjd==mjdlist[j+start_ind]] for k in range(len(step.mjd)): obs = step[step.passband==k] if len(obs) == 0 : # print('here is one') continue else: if k == 0: X_dat[i,j,0,0] =obs.flux.iloc[0] Y_mask[i,j,0,0] = 1 if k == 1: X_dat[i,j,1,0] = obs.flux.iloc[0] Y_mask[i,j,1,0] = 1 if k == 2: X_dat[i,j,2,0] = obs.flux.iloc[0] Y_mask[i,j,2,0] = 1 if k == 3: X_dat[i,j,3,0] = obs.flux.iloc[0] Y_mask[i,j,3,0] = 1 if k == 4: X_dat[i,j,4,0] = obs.flux.iloc[0] Y_mask[i,j,4,0] = 1 if k == 5: X_dat[i,j,5,0] = obs.flux.iloc[0] Y_mask[i,j,5,0] = 1 # meta = self.meta_df[self.meta_df['object_id'] == trans_id] # Y_dat[i] = self.labels[int(meta['target'])] X_length[i,0] = ts_length flux_max = np.max(X_dat[i]) flux_min = np.min(X_dat[i]) flux_pow = math.log2(flux_max - flux_min) X_dat[i] /= flux_pow X_noised = X_dat + np.random.uniform(low=0, high=0.5, size=X_dat.shape) return [X_noised, X_length, np.reshape(Y_mask,(self.batch_size, self.seq_len*6))], np.reshape(X_dat,(self.batch_size, self.seq_len*6))
To make it faster, the for loop in the function __data_generation should be parallelized. Using the joblib package may help.
Scipy optimize.minimize with multi- parameters
import numpy as np import pandas as pd import matplotlib.pyplot as plt from scipy import linalg, optimize %matplotlib inline Data load data = pd.read_csv("D:/Stat/TimeSeries/KRW_month_0617_1.csv",index_col="Date") / 100 para = open("D:/Stat/TimeSeries/KRW_month_0617_1.txt").readlines()[0:2] data.index = pd.to_datetime(data.index) Parameters cond = [] params = [] time = [] for i in para: j = i.split() for k in j: cond.append(k) cond = cond[1:] for i in range(len(cond)): cond[i] = round(float(cond[i]),4) params = cond[0:23] time = cond[23:] maturity = np.array(time[1:]) timegap = 1/cond[23] Functions We need def Paramcheck(Params, checkStationary = 1): result = 0 Kappa = np.array([[params[20],0,0], [0,params[21],0], [0,0,params[22]]]) Sigma = np.array([[params[1],0,0], [params[2],params[3],0], [params[4],params[5],params[6]]]) State = np.array([params[7], params[8], params[9]]) Lambda = params[0] SigmaEps = np.identity(10) for i in range(10): SigmaEps[i][i] = params[i+10] for i in range(len(Sigma)): if Sigma[i][i] < 0: result = 1 for j in SigmaEps: if np.any(SigmaEps) < 0: result = 1 if Lambda < 0.05 or Lambda > 2: result = 1 elif State[0] < 0: result = 1 elif Kappa[0][0] < 0: result = 1 if result == 0 and checkStationary > 0: if max(np.linalg.eigvals(-Kappa).real) > 0: result = 2 return result def CheckDet(x): if x == np.inf or x == np.nan: result = 1 elif x < 0: result = 2 elif abs(x) < 10**-250: result = 3 else: result = 0 return result def NS_factor(lambda_val, maturity): col1 = np.ones(len(maturity)) col2 = (1 - np.exp(-lambda_val*maturity))/(lambda_val*maturity) col3 = col2 - np.exp(-lambda_val*maturity) factor = np.array([col1,col2,col3]).transpose() return factor def DNS_Kalman_filter(Params, *args): N = Paramcheck(Params) if N == 0: Kappa = np.array([[params[20],0,0], [0,params[21],0], [0,0,params[22]]]) Sigma = np.array([[params[1],0,0], [params[2],params[3],0], [params[4],params[5],params[6]]]) State = np.array([params[7], params[8], params[9]]) Lambda = params[0] SigmaEps = np.identity(10) for i in range(10): SigmaEps[i][i] = params[i+10] Obs_Yield = args[0] Obs_Date = args[1] Timegap = args[2] Obs_Mty = args[3] Finalstate = args[4] Mty_length = len(Obs_Mty) B = NS_factor(lambda_val = Lambda,maturity = Obs_Mty) H_large = SigmaEps **2 N_obs = len(Obs_Date) LLH_vec = np.zeros(N_obs) phi1 = linalg.expm(-Kappa*Timegap) phi0 = (np.identity(3)-phi1) # State Eigenvalues = np.linalg.eig(Kappa)[0] Eigen_vec = np.linalg.eig(Kappa)[1] Eigen_vec_inv = np.linalg.inv(Eigen_vec) S = Eigen_vec_inv # Sigma # Sigma.transpose() # Eigen_vec_inv.transpose() Atilde = np.dot(Sigma[0], Sigma[0]) Btilde = np.dot(Sigma[1], Sigma[1]) Ctilde = np.dot(Sigma[2], Sigma[2]) Dtilde = np.dot(Sigma[0], Sigma[1]) Etilde = np.dot(Sigma[0], Sigma[2]) Ftilde = np.dot(Sigma[1], Sigma[2]) res1= Atilde* Obs_Mty* Obs_Mty/6 res2= Btilde*(1/(2*Lambda**2) - (1-np.exp(-Lambda*Obs_Mty))/(Lambda**3*Obs_Mty) + (1- np.exp(-2*Lambda*Obs_Mty))/(4*Lambda**3*Obs_Mty)) res3= Ctilde*(1/(2*Lambda**2) + np.exp(-Lambda*Obs_Mty)/(Lambda**2)- Obs_Mty*np.exp(-2*Lambda*Obs_Mty)/(4*Lambda) - 3*np.exp(-2*Lambda*Obs_Mty)/(4*Lambda**2) - 2*(1-np.exp(- Lambda*Obs_Mty))/(Lambda**3*Obs_Mty) + 5*(1- np.exp(-2*Lambda*Obs_Mty))/(8*Lambda**3*Obs_Mty)) res4= Dtilde*(Obs_Mty/(2*Lambda) + np.exp(-Lambda*Obs_Mty)/(Lambda**2) - (1-np.exp(- Lambda*Obs_Mty))/(Lambda**3*Obs_Mty)) res5= Etilde*(3*np.exp(-Lambda*Obs_Mty)/(Lambda**2) + Obs_Mty/(2*Lambda)+Obs_Mty*np.exp(- Lambda*Obs_Mty)/(Lambda) - 3*(1-np.exp(-Lambda*Obs_Mty))/(Lambda**3*Obs_Mty)) res6= Ftilde*(1/(Lambda**2) + np.exp(-Lambda*Obs_Mty)/(Lambda**2) - np.exp(-2*Lambda*Obs_Mty)/(2*Lambda**2) - 3*(1-np.exp(- Lambda*Obs_Mty))/(Lambda**3*Obs_Mty) + 3*(1- np.exp(-2*Lambda*Obs_Mty))/(4*Lambda**3*Obs_Mty)) val = res1 + res2 + res3 + res4 + res5 + res6 V_mat = np.zeros([3,3]) V_lim = np.zeros([3,3]) for i in range(3): for j in range(3): V_mat[i][j] = S[i][j]*(1-np.exp(-(Eigenvalues[i] + Eigenvalues[j])*Timegap))/(Eigenvalues[i] + Eigenvalues[j]) V_lim[i][j] = S[i][j]/(Eigenvalues[i] + Eigenvalues[j]) Q = (Eigen_vec # V_mat # Eigen_vec.transpose()).real Sigma_lim = (Eigen_vec # V_lim # Eigen_vec.transpose()).real for i in range(N_obs): y = Obs_Yield[i] xhat = phi0 + phi1 # State y_implied = B # xhat v = y - y_implied + val Sigmahat = phi1 # Sigma_lim # phi1.transpose() + Q F = B # Sigmahat # B.transpose() + H_large detF = np.linalg.det(F) if CheckDet(detF) > 0: N = 3 break Finv = np.linalg.inv(F) State = xhat + Sigmahat # B.transpose() # Finv # v Sigma_lim = Sigmahat - Sigmahat # B.transpose() # Finv # B # Sigmahat LLH_vec[i] = np.log(detF) + v.transpose() # Finv # v if N == 0: if Finalstate: yDate = Obs_Date[-1] result = np.array([yDate,State]) else: result = 0.5 * (sum(LLH_vec) + Mty_length*N_obs*np.log(2*np.pi)) else: result = 7000000 return result I made a code that does Arbitrage Free Nelson-Siegel model. Data is return rates of bond (1Y,1.5Y, ... ,20Y). I wanna optimize that function with scipy optimize.minimize function with fixed *args. Suppose that Initial parmas are verified that it's close to optimized params from empirical experiments using Dynamic Nelson-Siegel Model. LLC_new = 0 while True: LLC_old = LLC_new OPT = optimize.minimize(x0=params,fun=DNS_Kalman_filter, args= (data.values,data.index,timegap,maturity,0)) params = OPT.x LLC_new = round(OPT.fun,5) print("Current LLC: %0.5f" %LLC_new) if LLC_old == LLC_new: OPT_para = params FinalState = DNS_Kalman_filter(params,data.values,data.index,timegap,maturity,True) break Result is Current LLC: -7613.70146 Current LLC: -7613.70146 LLC(log-likelihood value) isn't maximized. It's not a result I desire using Optimizer. Is there any solution for that? In R, there is optim() function works as similar as scipy.optimize.minimize() which works really well. I also have a R code for that very similar to this Python code.
TypeError: Cannot cast array data from dtype('O') to dtype('int64') according to the rule 'safe'
When I run code below, I get: TypeError: Cannot cast array data from dtype('O') to dtype('int64') according to the rule 'safe' But I don't know where is dtype('O') and dtype('int64'). Does anyone know where is to parse? import collections import numpy as np import math import pandas as pd def pre_prob(y): y_dict = collections.Counter(y) pre_probab = np.ones(2) for i in range(0, 2): pre_probab[i] = y_dict[i]/y.shape[0] return pre_probab def mean_var(X, y): n_features = X.shape[1] m = np.ones((2, n_features)) v = np.ones((2, n_features)) n_0 = np.bincount(y)[np.nonzero(np.bincount(y))[0]][0] x0 = np.ones((n_0, n_features)) x1 = np.ones((X.shape[0] - n_0, n_features)) k = 0 for i in range(0, X.shape[0]): if y[i] == 0: x0[k] = X[i] k = k + 1 k = 0 for i in range(0, X.shape[0]): if y[i] == 1: x1[k] = X[i] k = k + 1 for j in range(0, n_features): m[0][j] = np.mean(x0.T[j]) v[0][j] = np.var(x0.T[j])*(n_0/(n_0 - 1)) m[1][j] = np.mean(x1.T[j]) v[1][j] = np.var(x1.T[j])*((X.shape[0]-n_0)/((X.shape[0]- n_0) - 1)) return m, v # mean and variance def prob_feature_class(m, v, x): n_features = m.shape[1] pfc = np.ones(2) for i in range(0, 2): product = 1 for j in range(0, n_features): product = product * (1/math.sqrt(2*3.14*v[i][j])) * math.exp(-0.5* pow((x[j] - m[i][j]),2)/v[i][j]) pfc[i] = product return pfc def GNB(X, y, x): m, v = mean_var(X, y) pfc = prob_feature_class(m, v, x) pre_probab = pre_prob(y) pcf = np.ones(2) total_prob = 0 for i in range(0, 2): total_prob = total_prob + (pfc[i] * pre_probab[i]) for i in range(0, 2): pcf[i] = (pfc[i] * pre_probab[i])/total_prob prediction = int(pcf.argmax()) return m, v, pre_probab, pfc, pcf, prediction
Is it possible to us a csv file to connect one part of the algo to another
I am trying to modify a triangular arbitrage crypto trading bot to include a predictive capability with a neural network. I've found some open source algorithms on GitHub, but I am having problem integrating them. I've been trying to separate parts of the code into modules and using a continuously updated csv file to direct the data from the first half of the algorithm into the second, but it just isn't working. I tried to create modules for different parts of the algorithm, but it didn't work: from keras.layers.core import Dense, Activation, Dropout from keras.layers.recurrent import LSTM from keras.layers import Bidirectional from keras.models import Sequential from binance.client import Client from binance.enums import * from sklearn.metrics import mean_squared_error import time import numpy as np import math import matplotlib.pyplot as plt import pandas as pd from datetime import datetime from pandas import Series from matplotlib import cm api_key = BinanceKey1['api_key'] api_secret = BinanceKey1['api_secret'] client = Client(api_key, api_secret) import csv import json from binance.client import Client import csv import json from binance.client import Client client = Client(api_key, api_secret) client = Client("", "") klines1 = client.get_historical_klines("BNBBTC", Client.KLINE_INTERVAL_1MINUTE, "1 day ago UTC") csv.write(klines1) # fetch 30 minute klines for the last month of 2017 klines2 = client.get_historical_klines("ETHBTC", Client.KLINE_INTERVAL_30MINUTE, "1 Dec, 2017", "1 Jan, 2018") csv.write(klines2) # fetch weekly klines since it listed klines3 = client.get_historical_klines("NEOBTC", Client.KLINE_INTERVAL_1WEEK, "1 Jan, 2017") csv.write(klines3) def load_data(klines, sequence_length): raw_data = pd.read_csv(klines, dtype=float).values for x in range(0, raw_data.shape[0]): for y in range(0, raw_data.shape[1]): if(raw_data[x][y] == 0): raw_data[x][y] = raw_data[x-1][y] data = raw_data.tolist() result = [] for index in range(len(data) - sequence_length): result.append(data[index: index + sequence_length]) d0 = np.array(result) dr = np.zeros_like(d0) dr[:, 1:, :] = d0[:, 1:, :] / d0[:, 0:1, :] - 1 start = 2400 end = int(dr.shape[0] + 1) unnormalized_bases = d0[start:end, 0:1, 20] split_line = round(0.9 * dr.shape[0]) training_data = dr[:int(split_line), :] np.random.shuffle(training_data) X_train = training_data[:, :-1] Y_train = training_data[:, -1] Y_train = Y_train[:, 20] X_test = dr[int(split_line):, :-1] Y_test = dr[int(split_line):, 49, :] Y_test = Y_test[:, 20] Y_daybefore = dr[int(split_line):, 48, :] Y_daybefore = Y_daybefore[:, 20] sequence_length = sequence_length window_size = sequence_length - 1 return X_train, Y_train, X_test, Y_test, Y_daybefore, unnormalized_bases, window_size def initialize_model(window_size, dropout_value, activation_function, loss_function, optimizer): model = Sequential() model.add(Bidirectional(LSTM(window_size, return_sequences=True), input_shape=(window_size, X_train.shape[-1]),)) model.add(Dropout(dropout_value)) model.add(Bidirectional(LSTM((window_size*2), return_sequences=True))) model.add(Dropout(dropout_value)) model.add(Bidirectional(LSTM(window_size, return_sequences=False))) model.add(Dense(units=1)) model.add(Activation(activation_function)) model.compile(loss=loss_function, optimizer=optimizer) return model def fit_model(model, X_train, Y_train, batch_num, num_epoch, val_split): start = time.time() model.fit(X_train, Y_train, batch_size= batch_num, nb_epoch=num_epoch, validation_split= val_split) training_time = int(math.floor(time.time() - start)) return model, training_time def test_model(model, X_test, Y_test, unnormalized_bases): y_predict = model.predict(X_test) real_y_test = np.zeros_like(Y_test) real_y_predict = np.zeros_like(y_predict) for i in range(Y_test.shape[0]): y = Y_test[i] predict = y_predict[i] real_y_test[i] = (y+1)*unnormalized_bases[i] real_y_predict[i] = (predict+1)*unnormalized_bases[i] fig = plt.figure(figsize=(10,5)) ax = fig.add_subplot(111) ax.set_title("Bitcoin Price Over Time") plt.plot(real_y_predict, color = 'green', label = 'Predicted Price') plt.plot(real_y_test, color = 'red', label = 'Real Price') ax.set_ylabel("Price (USD)") ax.set_xlabel("Time (Days)") ax.legend() return y_predict, real_y_test, real_y_predict, fig def price_change(Y_daybefore, Y_test, y_predict): Y_daybefore = np.reshape(Y_daybefore, (-1, 1)) Y_test = np.reshape(Y_test, (-1, 1)) delta_predict = (y_predict - Y_daybefore) / (1+Y_daybefore) delta_real = (Y_test - Y_daybefore) / (1+Y_daybefore) fig = plt.figure(figsize=(10, 6)) ax = fig.add_subplot(111) ax.set_title("Percent Change in Bitcoin Price Per Day") plt.plot(delta_predict, color='green', label = 'Predicted Percent Change') plt.plot(delta_real, color='red', label = 'Real Percent Change') plt.ylabel("Percent Change") plt.xlabel("Time (Days)") ax.legend() plt.show() return Y_daybefore, Y_test, delta_predict, delta_real, fig def binary_price(delta_predict, delta_real): delta_predict_1_0 = np.empty(delta_predict.shape) delta_real_1_0 = np.empty(delta_real.shape) for i in range(delta_predict.shape[0]): if delta_predict[i][0] > 0: delta_predict_1_0[i][0] = 1 else: delta_predict_1_0[i][0] = 0 for i in range(delta_real.shape[0]): if delta_real[i][0] > 0: delta_real_1_0[i][0] = 1 else: delta_real_1_0[i][0] = 0 return delta_predict_1_0, delta_real_1_0 def find_positives_negatives(delta_predict_1_0, delta_real_1_0): true_pos = 0 false_pos = 0 true_neg = 0 false_neg = 0 for i in range(delta_real_1_0.shape[0]): real = delta_real_1_0[i][0] predicted = delta_predict_1_0[i][0] if real == 1: if predicted == 1: true_pos += 1 else: false_neg += 1 elif real == 0: if predicted == 0: true_neg += 1 else: false_pos += 1 return true_pos, false_pos, true_neg, false_neg def calculate_statistics(true_pos, false_pos, true_neg, false_neg, y_predict, Y_test): precision = float(true_pos) / (true_pos + false_pos) recall = float(true_pos) / (true_pos + false_neg) F1 = float(2 * precision * recall) / (precision + recall) MSE = mean_squared_error(y_predict.flatten(), Y_test.flatten()) return precision, recall, F1, MSE X_train, Y_train, X_test, Y_test, Y_daybefore, unnormalized_bases, window_size = load_data("Bitcoin Data.csv", 50) print (X_train.shape) print (Y_train.shape) print (X_test.shape) print (Y_test.shape) print (Y_daybefore.shape) print (unnormalized_bases.shape) print (window_size) model = initialize_model(window_size, 0.2, 'linear', 'mse', 'adam') print model.summary() model, training_time = fit_model(model, X_train, Y_train, 1024, 100, .05) print "Training time", training_time, "seconds" y_predict, real_y_test, real_y_predict, fig1 = test_model(model, X_test, Y_test, unnormalized_bases) plt.show(fig1) Y_daybefore, Y_test, delta_predict, delta_real, fig2 = price_change(Y_daybefore, Y_test, y_predict) plt.show(fig) delta_predict_1_0, delta_real_1_0 = binary_price(delta_predict, delta_real) print delta_predict_1_0.shape print delta_real_1_0.shape true_pos, false_pos, true_neg, false_neg = find_positives_negatives(delta_predict_1_0, delta_real_1_0) print "True positives:", true_pos print "False positives:", false_pos print "True negatives:", true_neg print "False negatives:", false_neg precision, recall, F1, MSE = calculate_statistics(true_pos, false_pos, true_neg, false_neg, y_predict, Y_test) print "Precision:", precision print "Recall:", recall print "F1 score:", F1 print "Mean Squared Error:", MSE class Client(object): API_URL = 'https://api.binance.com/api' WITHDRAW_API_URL = 'https://api.binance.com/wapi' WEBSITE_URL = 'https://www.binance.com' PUBLIC_API_VERSION = 'v1' PRIVATE_API_VERSION = 'v3' WITHDRAW_API_VERSION = 'v3' SYMBOL_TYPE_SPOT = 'SPOT' ORDER_STATUS_NEW = 'NEW' ORDER_STATUS_PARTIALLY_FILLED = 'PARTIALLY_FILLED' ORDER_STATUS_FILLED = 'FILLED' ORDER_STATUS_CANCELED = 'CANCELED' ORDER_STATUS_PENDING_CANCEL = 'PENDING_CANCEL' ORDER_STATUS_REJECTED = 'REJECTED' ORDER_STATUS_EXPIRED = 'EXPIRED' KLINE_INTERVAL_1MINUTE = '1m' KLINE_INTERVAL_3MINUTE = '3m' KLINE_INTERVAL_5MINUTE = '5m' KLINE_INTERVAL_15MINUTE = '15m' KLINE_INTERVAL_30MINUTE = '30m' KLINE_INTERVAL_1HOUR = '1h' KLINE_INTERVAL_2HOUR = '2h' KLINE_INTERVAL_4HOUR = '4h' KLINE_INTERVAL_6HOUR = '6h' KLINE_INTERVAL_8HOUR = '8h' KLINE_INTERVAL_12HOUR = '12h' KLINE_INTERVAL_1DAY = '1d' KLINE_INTERVAL_3DAY = '3d' KLINE_INTERVAL_1WEEK = '1w' KLINE_INTERVAL_1MONTH = '1M' SIDE_BUY = 'BUY' SIDE_SELL = 'SELL' ORDER_TYPE_LIMIT = 'LIMIT' ORDER_TYPE_MARKET = 'MARKET' ORDER_TYPE_STOP_LOSS = 'STOP_LOSS' ORDER_TYPE_STOP_LOSS_LIMIT = 'STOP_LOSS_LIMIT' ORDER_TYPE_TAKE_PROFIT = 'TAKE_PROFIT' ORDER_TYPE_TAKE_PROFIT_LIMIT = 'TAKE_PROFIT_LIMIT' ORDER_TYPE_LIMIT_MAKER = 'LIMIT_MAKER' TIME_IN_FORCE_GTC = 'GTC' TIME_IN_FORCE_IOC = 'IOC' TIME_IN_FORCE_FOK = 'FOK' ORDER_RESP_TYPE_ACK = 'ACK' ORDER_RESP_TYPE_RESULT = 'RESULT' ORDER_RESP_TYPE_FULL = 'FULL' AGG_ID = 'a' AGG_PRICE = 'p' AGG_QUANTITY = 'q' AGG_FIRST_TRADE_ID = 'f' AGG_LAST_TRADE_ID = 'l' AGG_TIME = 'T' AGG_BUYER_MAKES = 'm' AGG_BEST_MATCH = 'M' def run(): initialize_arb() pass def initialize_arb(): welcome_message = "\n\n---------------------------------------------------------\n\n" welcome_message+= "Hello and Welcome to the Binance Arbitrage Crypto Trader Bot Python Script\nCreated 2018 by Joaquin Roibal (#BlockchainEng)" welcome_message+= "A quick 'run-through' will be performed to introduce you to the functionality of this bot\n" welcome_message+="To learn more visit medium.com/#BlockchainEng or watch introductory Youtube Videos" welcome_message+="\nCopyright 2018 by Joaquin Roibal\n" bot_start_time = str(datetime.now()) welcome_message+= "\nBot Start Time: {}\n\n\n".format(bot_start_time) print(welcome_message) data_log_to_file(welcome_message) time.sleep(5) try: status = client.get_system_status() list_of_symbols = ['ETHBTC', 'BNBETH', 'BNBBTC'] list_of_symbols2 = ['ETHUSDT', 'BNBETH', 'BNBUSDT'] list_of_symbols3 = ['BTCUSDT', 'BNBBTC', 'BNBUSDT'] list_of_arb_sym = [list_of_symbols, list_of_symbols2, list_of_symbols3] tickers = client.get_orderbook_tickers() portfolio=[] with open('Portfolio.txt') as f1: read_data = f1.readlines() for line in read_data: load_portfolio = line load_portfolio = list(load_portfolio[1:-1].split(',')) i=0 for val in load_portfolio: if i == 4: portfolio.append(str(datetime.now())) break portfolio.append(float(val)) i+=1 portf_msg = "Starting Portfolio: " + str(portfolio) print(portf_msg) portf_file_save(portfolio) data_log_to_file(portf_msg) while 1: calc_profit_list =[] for arb_market in list_of_arb_sym: calc_profit_list.append(arbitrage_bin(arb_market, tickers, portfolio, 1, 1)) for profit1 in calc_profit_list: data_log_to_file(str(profit1)) print(calc_profit_list) exp_profit = 0 m = n = 0 for exch_market in calc_profit_list: if exch_market[4]>exp_profit: exp_profit = exch_market[4] m = n n+=1 profit_message = "\nMost Profitable Market: {} \nExpected Profit: {}%".format(list_of_arb_sym[m], exp_profit) print(profit_message) data_log_to_file(profit_message) time.sleep(5) arb_list_data = [] arb_start_time = str(datetime.now()) for i in range(0,5): arb_list_data.append(arbitrage_bin(list_of_arb_sym[m], tickers, portfolio, 1, 1, 'Yes')) time.sleep(30) arb_end_time = str(datetime.now()) viz_arb_data(arb_list_data, list_of_arb_sym[m], arb_start_time, arb_end_time) except: print("\nFAILURE INITIALIZE\n") def data_log_to_file(message): with open('CryptoTriArbBot_DataLog.txt', 'a+') as f: f.write(message) def portf_file_save(portfolio): with open('Portfolio.txt', 'a+') as f: f.write('\n'+str(portfolio)) def arbitrage_bin(list_of_sym, tickers, portfolio, cycle_num=10, cycle_time=30, place_order='No'): arb_message = "Beginning Binance Arbitrage Function Data Collection - Running\n" print(arb_message) data_log_to_file(arb_message) time.sleep(2) fee_percentage = 0.05 for i in range(0,1): """ pairs = [] for sym in symbols: for symbol in coins: if symbol in sym: pairs.append(sym) print(pairs) #From Coin 1 to Coin 2 - ETH/BTC - Bid #From Coin 2 to Coin 3 - ETH/LTC - Ask #From Coin 3 to Coin 1 - BTC/LTC - Bid arb_list = ['ETH/BTC'] #, 'ETH/LTC', 'BTC/LTC'] #Find 'closed loop' of currency rate pairs j=0 while 1: if j == 1: final = arb_list[0][-3:] + '/' + str(arb_list[1][-3:]) print(final) #if final in symbols: arb_list.append(final) break for sym in symbols: if sym in arb_list: pass else: if j % 2 == 0: if arb_list[j][0:3] == sym[0:3]: if arb_list[j] == sym: pass else: arb_list.append(sym) print(arb_list) j+=1 break if j % 2 == 1: if arb_list[j][-3:] == sym[-3:]: if arb_list[j] == sym: pass else: arb_list.append(sym) print(arb_list) j+=1 break """ print("List of Arbitrage Symbols:", list_of_sym) list_exch_rate_list = [] if 1: for k in range(0,cycle_num): i=0 exch_rate_list = [] data_collect_message1 = "Data Collection Cycle Number: "+str(k) +'\n' print(data_collect_message1) data_log_to_file(data_collect_message1) for sym in list_of_sym: currency_pair = "Currency Pair: "+str(sym)+"\n" print(currency_pair) data_log_to_file(currency_pair) if sym in list_of_sym: """if i == 0: #For first in triangle depth = client.get_order_book(symbol=sym) exch_rate_list.append(float(depth['bids'][0][0])) print(depth['bids'][0][0]) """ if i % 2==0: depth = client.get_order_book(symbol=sym) inv1 = depth['asks'][0][0] exch_rate_list.append(float(inv1)) Exch_rate1 = "Exchange Rate: {}".format(depth['asks'][0][0]) +'\n' print(Exch_rate1) data_log_to_file(Exch_rate1) if i == 1: depth = client.get_order_book(symbol=sym) inv2 = round(1.0/float(depth['bids'][0][0]),6) exch_rate_list.append(float(inv2)) Exch_rate2 = "Exchange Rate: {}".format(depth['bids'][0][0])+'\n' print(Exch_rate2) data_log_to_file(Exch_rate2) i+=1 else: exch_rate_list.append(0) exch_rate_list.append(datetime.now()) rate1 = exch_rate_list[0] buy_price = "Buy: {}\n".format(rate1) print(buy_price) data_log_to_file(buy_price) rate2 = float(exch_rate_list[2])*float(exch_rate_list[1]) sell_price = "Sell: {}\n".format(rate2) print(sell_price) data_log_to_file(sell_price) if float(rate1)<float(rate2): arb_1_msg = "Arbitrage Possibility - " arb_profit = round((float(rate2)-float(rate1))/float(rate2)*100,3) arb_1_msg += "Potential Profit (Percentage): "+str(arb_profit) +'%\n' print(arb_1_msg) data_log_to_file(arb_1_msg) exch_rate_list.append(arb_profit) if place_order == 'Yes': place_order_msg = "PLACING ORDER" print(place_order_msg) data_log_to_file(place_order_msg) portfolio = tri_arb_paper(portfolio, list_of_sym, exch_rate_list) portf_file_save(portfolio) else: arb_2_msg = "No Arbitrage Possibility" print(arb_2_msg) data_log_to_file(arb_2_msg) exch_rate_list.append(0) exch_msg = "Exchange Rate List: " +str(exch_rate_list)+'\n' print(exch_msg) data_log_to_file(exch_msg) time.sleep(cycle_time) print('\nARBITRAGE FUNCTIONALITY SUCCESSFUL - Data of Exchange Rates Collected\n') return exch_rate_list def tri_arb_paper(portfolio1, sym_list, list_exch_rates): tri_arb_paper_msg = "\nSTARTING TRI ARB PAPER TRADING FUNCTION\n" print(tri_arb_paper_msg) time.sleep(10) data_log_to_file(tri_arb_paper_msg) if sym_list[0][-3:]=='BTC': portf_pos = 0 elif sym_list[0][-3:]=='ETH': portf_pos = 1 elif sym_list[0][-3:]=='SDT': portf_pos = 2 elif sym_list[0][-3:]=='BNB': portf_pos = 3 start_amount = float(portfolio1[portf_pos]) amt_coin2 = start_amount / float(list_exch_rates[0]) amt_coin3 = amt_coin2 * float(list_exch_rates[1]) final_amount = amt_coin3 * float(list_exch_rates[2]) tri_arb_paper_msg = "Starting Amount: "+str(sym_list[0][-3:])+" "+str(start_amount)+'\n' tri_arb_paper_msg += "Amount Coin 2: "+str(sym_list[0][0:3])+" "+str(amt_coin2)+'\n' tri_arb_paper_msg += "Amount Coin 3: "+str(sym_list[2][0:3])+" "+str(amt_coin3) +'\n' tri_arb_paper_msg += "Final Amount: "+str(sym_list[0][-3:])+" "+str(final_amount)+'\n' print(tri_arb_paper_msg) data_log_to_file(tri_arb_paper_msg) portfolio1[portf_pos] = final_amount portfolio1[-1] = str(datetime.now()) return portfolio1 def viz_arb_data(list_exch_rate_list, arb_market, start_time, end_time): viz_msg = "RUNNING ARBITRAGE VISUALIZATION FUNCTIONALITY" print(viz_msg) data_log_to_file(viz_msg) rateA = [] rateB = [] rateB_fee = [] price1 = [] price2 = [] time_list = [] profit_list = [] for rate in list_exch_rate_list: rateA.append(rate[0]) rateB1 = round(float(rate[1])*float(rate[2]),6) rateB.append(rateB1) price1.append(rate[1]) price2.append(rate[2]) profit_list.append(rate[4]) time_list.append(rate[3]) viz_msg2 = "Rate A: {} \n Rate B: {} \n Projected Profit (%): {} ".format(rateA, rateB, profit_list) #rateB_fee)) print(viz_msg2) data_log_to_file(viz_msg2) fig, host = plt.subplots() fig.subplots_adjust(right=0.75) par1 = host.twinx() par2 = host.twinx() par2.spines["right"].set_position(("axes", 1.2)) make_patch_spines_invisible(par2) par2.spines["right"].set_visible(True) p1, = host.plot(time_list, rateA, "k", label = "{}".format(arb_market[0])) p1, = host.plot(time_list, rateB, "k+", label = "{} * {}".format(arb_market[1], arb_market[2])) p2, = par1.plot(time_list, price1, "b-", label="Price - {}".format(arb_market[1])) p3, = par2.plot(time_list, price2, "g-", label="Price - {}".format(arb_market[2])) host.set_xlabel("Time") host.set(title='Triangular Arbitrage - Exchange: {}\nStart Time: {}\n End Time: {}\n' 'Copyright (c) 2018 #BlockchainEng'.format('Binance', start_time, end_time)) host.set_ylabel("Exchange Rate") par1.set_ylabel("Price - {}".format(arb_market[1])) par2.set_ylabel("Price - {}".format(arb_market[2])) host.yaxis.label.set_color(p1.get_color()) tkw = dict(size=4, width=1.5) host.tick_params(axis='y', colors=p1.get_color(), **tkw) par1.tick_params(axis='y', colors=p2.get_color(), **tkw) par2.tick_params(axis='y', colors=p3.get_color(), **tkw) host.tick_params(axis='x', **tkw) lines = [p1, p2, p3] host.legend(lines, [l.get_label() for l in lines]) fname = "Binance_Test.png" plt.savefig(fname) """, dpi=None, facecolor='w', edgecolor='w', orientation='portrait', papertype=None, format=None, transparent=False, bbox_inches=None, pad_inches=0.1, frameon=None)""" print_figure_message = "Data Collected Figure Printed & Saved - " + str(fname) print(print_figure_message) data_log_to_file(print_figure_message) def make_patch_spines_invisible(ax): ax.set_frame_on(True) ax.patch.set_visible(False) for sp in ax.spines.values(): sp.set_visible(False) """ def market_depth(sym, num_entries=20): #Get market depth #Retrieve and format market depth (order book) including time-stamp i=0 #Used as a counter for number of entries #print("Order Book: ", convert_time_binance(client.get_server_time())) depth = client.get_order_book(symbol=sym) print(depth) print(depth['asks'][0]) ask_tot=0.0 ask_price =[] ask_quantity = [] bid_price = [] bid_quantity = [] bid_tot = 0.0 place_order_ask_price = 0 place_order_bid_price = 0 max_order_ask = 0 max_order_bid = 0 print("\n", sym, "\nDepth ASKS:\n") print("Price Amount") for ask in depth['asks']: if i<num_entries: if float(ask[1])>float(max_order_ask): #Determine Price to place ask order based on highest volume max_order_ask=ask[1] place_order_ask_price=round(float(ask[0]),5)-0.0001 #ask_list.append([ask[0], ask[1]]) ask_price.append(float(ask[0])) ask_tot+=float(ask[1]) ask_quantity.append(ask_tot) #print(ask) i+=1 j=0 #Secondary Counter for Bids print("\n", sym, "\nDepth BIDS:\n") print("Price Amount") for bid in depth['bids']: if j<num_entries: if float(bid[1])>float(max_order_bid): #Determine Price to place ask order based on highest volume max_order_bid=bid[1] place_order_bid_price=round(float(bid[0]),5)+0.0001 bid_price.append(float(bid[0])) bid_tot += float(bid[1]) bid_quantity.append(bid_tot) #print(bid) j+=1 return ask_price, ask_quantity, bid_price, bid_quantity, place_order_ask_price, place_order_bid_price #Plot Data """ if __name__ == "__main__": run() Ideally, the code is supposed to find arbitrage opportunities in predicted price changes and execute orders accordingly.
TypeError: __init__() takes from 1 to 4 positional arguments but 9 were given
when l run the following program l got this error : originDataset = dataset.lmdbDataset(originPath, 'abc', *args) TypeError: __init__() takes from 1 to 4 positional arguments but 9 were given This error is relate to the second code source l presented below. it's strange because l don't have 9 argument. what's wrong with my code ? import sys origin_path = sys.path sys.path.append("..") import dataset sys.path = origin_path import lmdb def writeCache(env, cache): with env.begin(write=True) as txn: for k, v in cache.iteritems(): txn.put(k, v) def convert(originPath, outputPath): args = [0] * 6 originDataset = dataset.lmdbDataset(originPath, 'abc', *args) print('Origin dataset has %d samples' % len(originDataset)) labelStrList = [] for i in range(len(originDataset)): label = originDataset.getLabel(i + 1) labelStrList.append(label) if i % 10000 == 0: print(i) lengthList = [len(s) for s in labelStrList] items = zip(lengthList, range(len(labelStrList))) items.sort(key=lambda item: item[0]) env = lmdb.open(outputPath, map_size=1099511627776) cnt = 1 cache = {} nSamples = len(items) for i in range(nSamples): imageKey = 'image-%09d' % cnt labelKey = 'label-%09d' % cnt origin_i = items[i][1] img, label = originDataset[origin_i + 1] cache[labelKey] = label cache[imageKey] = img if cnt % 1000 == 0 or cnt == nSamples: writeCache(env, cache) cache = {} print('Written %d / %d' % (cnt, nSamples)) cnt += 1 nSamples = cnt - 1 cache['num-samples'] = str(nSamples) writeCache(env, cache) print('Convert dataset with %d samples' % nSamples) if __name__ == "__main__": convert('/share/datasets/scene_text/Synth90k/synth90k-val-lmdb', '/share/datasets/scene_text/Synth90k/synth90k-val-ordered-lmdb') convert('/share/datasets/scene_text/Synth90k/synth90k-train-lmdb', '/share/datasets/scene_text/Synth90k/synth90k-train-ordered-lmdb') which calls the following program : #!/usr/bin/python # encoding: utf-8 import random import torch from torch.utils.data import Dataset from torch.utils.data import sampler import torchvision.transforms as transforms import lmdb import six import sys from PIL import Image import numpy as np class lmdbDataset(Dataset): def __init__(self, root=None, transform=None, target_transform=None): self.env = lmdb.open( root, max_readers=1, readonly=True, lock=False, readahead=False, meminit=False) if not self.env: print('cannot creat lmdb from %s' % (root)) sys.exit(0) with self.env.begin(write=False) as txn: nSamples = int(txn.get('num-samples')) self.nSamples = nSamples self.transform = transform self.target_transform = target_transform def __len__(self): return self.nSamples def __getitem__(self, index): assert index <= len(self), 'index range error' index += 1 with self.env.begin(write=False) as txn: img_key = 'image-%09d' % index imgbuf = txn.get(img_key) buf = six.BytesIO() buf.write(imgbuf) buf.seek(0) try: img = Image.open(buf).convert('L') except IOError: print('Corrupted image for %d' % index) return self[index + 1] if self.transform is not None: img = self.transform(img) label_key = 'label-%09d' % index label = str(txn.get(label_key)) if self.target_transform is not None: label = self.target_transform(label) return (img, label) class resizeNormalize(object): def __init__(self, size, interpolation=Image.BILINEAR): self.size = size self.interpolation = interpolation self.toTensor = transforms.ToTensor() def __call__(self, img): img = img.resize(self.size, self.interpolation) img = self.toTensor(img) img.sub_(0.5).div_(0.5) return img class randomSequentialSampler(sampler.Sampler): def __init__(self, data_source, batch_size): self.num_samples = len(data_source) self.batch_size = batch_size def __iter__(self): n_batch = len(self) // self.batch_size tail = len(self) % self.batch_size index = torch.LongTensor(len(self)).fill_(0) for i in range(n_batch): random_start = random.randint(0, len(self) - self.batch_size) batch_index = random_start + torch.range(0, self.batch_size - 1) index[i * self.batch_size:(i + 1) * self.batch_size] = batch_index # deal with tail if tail: random_start = random.randint(0, len(self) - self.batch_size) tail_index = random_start + torch.range(0, tail - 1) index[(i + 1) * self.batch_size:] = tail_index return iter(index) def __len__(self): return self.num_samples class alignCollate(object): def __init__(self, imgH=32, imgW=128, keep_ratio=False, min_ratio=1): self.imgH = imgH self.imgW = imgW self.keep_ratio = keep_ratio self.min_ratio = min_ratio def __call__(self, batch): images, labels = zip(*batch) imgH = self.imgH imgW = self.imgW if self.keep_ratio: ratios = [] for image in images: w, h = image.size ratios.append(w / float(h)) ratios.sort() max_ratio = ratios[-1] imgW = int(np.floor(max_ratio * imgH)) imgW = max(imgH * self.min_ratio, imgW) # assure imgH >= imgW transform = resizeNormalize((imgW, imgH)) images = [transform(image) for image in images] images = torch.cat([t.unsqueeze(0) for t in images], 0) return images, labels