Full diagonalization NumPy vs SciPy - python-3.x

I have to make use of diagonalization routines to obtain all eigenpairs of a Hermitian complex matrix. I am a bit limited by performance since I need to repeat the operation thousands of times and my matrices are roughly of 8000x8000. I have created a little comparison between the NumPy and SciPy routines for diagonalization of Hermitian matrices and I got these times on a 6 physical cores machine:
I am observing that for 8000x8000 matrices this scales to~0.8 minutes and I need to repeat the process 50000 times. Is there something I am missing here or are these actually the performance times? Overall, this all looks very slow specially if this needs to be repeated several times. In fact, on a 30 core machine, I observe little performance gain. I am using the Python3.8 under Anaconda distribution so this is linked against the MKL.
Here is the example code
import numpy as np
import scipy.linalg
import matplotlib.pyplot as pyt
from time import time
t_ls = []
d_ls = np.array([100, 500, 1000,2000,4000])
for N in d_ls:
A =np.random.rand( N,N ) + 1j*np.random.rand( N,N )
A = 0.5*( A + np.conj(A.T) )
ts = time()
evals, evecs = np.linalg.eigh( A )
t_np = time()-ts
ts = time()
evals2, evecs2 = scipy.linalg.eigh( A )
t_sp = time()-ts
t_ls.append(np.array([t_np, t_sp]))
t_ls = np.array(t_ls)
pyt.plot( d_ls, t_ls[:,0], marker='s' )
pyt.plot( d_ls, t_ls[:,1], marker='^')
pyt.xlabel("N")
pyt.ylabel("time(secs)")
pyt.legend(["NumPy", "SciPy"])
pyt.show()
USING SVD AND MP PARALELLIZATION
Going through some of the comments in the post, I have tried SVD of the matrix and multiprocessing. I all cases I still see the serialized approach with NumPy eigh is the most efficient one; here is the code:
import numpy as np
import scipy.linalg
import matplotlib.pyplot as pyt
from time import time
import psutil
def f_mp_pool(*args):
N = args[0]
A =np.random.rand( N,N ) + 1j*np.random.rand( N,N )
A = 0.5*( A + np.conj(A.T) )
evals, evecs = np.linalg.eigh(A)
return evals, evecs
nreps = 100
N = 700
ts = time()
for n in range(nreps):
A =np.random.rand( N,N ) + 1j*np.random.rand( N,N )
A = 0.5*( A + np.conj(A.T) )
res = np.linalg.eigh(A)
print("serialized:", time()-ts)
#use svd
import scipy.linalg
ts = time()
for n in range(nreps):
res = scipy.linalg.svd( A, full_matrices=True, check_finite=False )
print("SVD:", time()-ts)
import multiprocessing as mp
nproc = psutil.cpu_count(logical=False)-1
mp_pool = mp.Pool(processes=nproc)
args_ls = [ (N,) for n in range(nreps) ]
ts = time()
res = mp_pool.starmap( f_mp_pool, args_ls )
print("parallel:", time()-ts)

Pytorch will b faster, and if you have GPU it can also take advantage of that, however not so much because the QR iteration is not good for parallel computation. I have a potential solution to accelerate that part on GPUs but I never actually implemented it.
import numpy as np
import scipy.linalg
import torch
import matplotlib.pyplot as plt
from time import time
t_ls = []
d_ls = np.array([100, 500, 1000,2000,4000])
for N in d_ls:
A =np.random.rand( N,N ) + 1j*np.random.rand( N,N )
A = 0.5*( A + np.conj(A.T) )
# skipping numpy, it is slow here, you may put it back if you want
# ts = time()
# evals, evecs = np.linalg.eigh( A )
# t_np = time()-ts
ts = time()
evals2, evecs2 = scipy.linalg.eigh( A )
t_sp = time()-ts
# When using CPU torch will use intra operation
# parallelism, so if you care about latency
# this is better than using multiprocessing
A_cpu = torch.as_tensor(A)
ts = time()
evals3, evecs3 = torch.linalg.eigh(A_cpu)
t_cpu = time() - ts;
if torch.cuda.is_available():
# Using GPU will give a significant speedup for some
# operations, I guess the
A_gpu = A_cpu.to('cuda')
torch.cuda.synchronize()
ts = time()
evals4, evecs4 = torch.linalg.eigh(A_gpu)
torch.cuda.synchronize()
t_gpu = time() - ts;
else:
t_gpu = np.nan #if you don't have GPU let's skip this part
t_ls.append(np.array([np.nan, t_sp, t_cpu, t_gpu]))
print(t_ls[-1])
t_ls = np.array(t_ls)
plt.plot( d_ls, t_ls[:,0], marker='s' )
plt.plot( d_ls, t_ls[:,1], marker='^')
plt.plot( d_ls, t_ls[:,2], marker='+')
plt.plot( d_ls, t_ls[:,3], marker='d')
plt.xlabel("N")
plt.ylabel("time(secs)")
plt.legend(["NumPy", "SciPy", "PyTorch CPU", "PyTorch GPU"])
My plot

Related

Multiprocessing pool map for a BIG array computation go very slow than expected

I've experienced some difficulties when using multiprocessing Pool in python3. I want to do BIG array calculation by using pool.map. Basically, I've a 3D array which I need to do computation for 10 times and it generates 10 output files sequentially. This task can be done 3 times i,e, in the output we get 3*10=30 output files(*.txt). To do this, I've prepared the following script for small array calculation (a sample problem). However, when I use this script for a BIG array calculation or array come out from a series of files, then this piece of code (maybe pool) capture the memory, and it does not save any .txt file at the destination directory. There is no error message when I run the file with command mpirun python3 sample_prob_func.py
Can anybody suggest what is the problem in the sample script and how to write code to get rid of stuck? I've not received any error message, but don't know where the problem occurs. Any help is appreciated. Thanks!
import numpy as np
import multiprocessing as mp
from scipy import signal
import matplotlib.pyplot as plt
import contextlib
import os, glob, re
import random
import cmath, math
import time
import pdb
#File Storing path
save_results_to = 'File saving path'
arr_x = [0, 8.49, 0.0, -8.49, -12.0, -8.49, -0.0, 8.49, 12.0]
arr_y = [0, 8.49, 12.0, 8.49, 0.0, -8.49, -12.0, -8.49, -0.0]
N=len(arr_x)
np.random.seed(12345)
total_rows = 5000
arr = np.reshape(np.random.rand(total_rows*N),(total_rows, N))
arr1 = np.reshape(np.random.rand(total_rows*N),(total_rows, N))
arr2 = np.reshape(np.random.rand(total_rows*N),(total_rows, N))
# Finding cross spectral density (CSD)
def my_func1(data):
# Do something here
return array1
t0 = time.time()
my_data1 = my_func1(arr)
my_data2 = my_func1(arr1)
my_data3 = my_func1(arr2)
print('Time required {} seconds to execute CSD--For loop'.format(time.time()-t0))
mydata_list = [my_data1,my_data3,my_data3]
def my_func2(data2):
# Do something here
return from_data2
start_freq = 100
stop_freq = 110
freq_range= np.around(np.linspace(start_freq,stop_freq,11)/10, decimals=2)
no_of_freq = len(freq_range)
list_arr =[]
def my_func3(csd):
list_csd=[]
for fr_count in range(start_freq, stop_freq):
csd_single = csd[:,:, fr_count]
list_csd.append(csd_single)
print('Shape of list is :', np.array(list_csd).shape)
return list_csd
def parallel_function(BIG_list_data):
with contextlib.closing(mp.Pool(processes=10)) as pool:
dft= pool.map(my_func2, BIG_list_data)
pool.close()
pool.join()
data_arr = np.array(dft)
print('shape of data :', data_arr.shape)
return data_arr
count_day = 1
count_hour =0
for count in range(3):
count_hour +=1
list_arr = my_func3(mydata_list[count]) # Load Numpy files
print('Array shape is :', np.array(arr).shape)
t0 = time.time()
data_dft = parallel_function(list_arr)
print('The hour number={} data is processing... '.format(count_hour))
print('Time in parallel:', time.time() - t0)
for i in range(no_of_freq-1): # (11-1=10)
jj = freq_range[i]
#print('The hour_number {} and frequency number {} data is processing... '.format(count_hour, jj))
dft_1hr_complx = data_dft[i,:,:]
np.savetxt(save_results_to + f'csd_Day_{count_day}_Hour_{count_hour}_f_{jj}_hz.txt', dft_1hr_complx.view(float))
As #JérômeRichard suggested,to aware your job scheduler you need to define the number of processors will engage to perform this task. So, the following command could help you: ncpus = int(os.getenv('SLURM_CPUS_PER_TASK', 1))
You need to use this line inside your python script. Also, inside the parallel_function use with contextlib.closing(mp.Pool(ncpus=10)) as pool: instead of with contextlib.closing(mp.Pool(processes=10)) as pool:. Thanks

Trying to rule out astrology but something is wrong

I am trying to rule out a possible astrology effect on populations as a statistically insignificant effect but to no avail. I am using Pearson's Chi Square test on two distributions of sun signs from two different populations one of astronaut pilots and the other one of celebrities. Something must be wrong but I failed to find it, probably on the statistics side.
import numpy as np
import pandas as pd
import ephem
from collections import Counter, namedtuple
import matplotlib.pyplot as plt
from scipy import stats
models = pd.read_csv('models.csv', delimiter=',')
astronauts = pd.read_csv('astronauts.csv', delimiter=',')
models = models.sample(229)
astronauts = astronauts.sample(229)
sun = ephem.Sun()
def get_planet_constellation(planet, dataset):
person_planet_constellation = []
for person in dataset['Birth Date']:
planet.compute(person)
person_planet_constellation += [ephem.constellation(planet)[1]]
return person_planet_constellation
def plot_bar_group(planet, data1, data2):
fig, ax = plt.subplots()
plt.bar(data1.keys(), data1.values(), alpha=0.5)
plt.bar(data2.keys(), data2.values(), alpha=0.5)
plt.legend(['astronauts', 'models'])
ylabel = 'Percentages of ' + planet.name + ' in constellation'
ax.set_ylabel(ylabel)
title = 'Histogram of ' + planet.name + ' in constellation by group'
ax.set_title(title)
plt.show()
astronaut_sun_constellation = Counter(
get_planet_constellation(sun, astronauts))
model_sun_constellation = Counter(get_planet_constellation(sun, models))
plot_bar_group(sun, astronaut_sun_constellation, model_sun_constellation)
a = list(astronaut_sun_constellation.values())
b = list(model_sun_constellation.values())
s = np.array([a, b])
stat, p, dof, expected = stats.chi2_contingency(s)
print(stat, p, dof, expected)
prob = 0.95
critical = stats.chi2.ppf(prob, dof)
if abs(stat) >= critical:
print('Dependent (reject H0)')
else:
print('Independent (fail to reject H0)')
# interpret p-value
alpha = 1.0 - prob
if p <= alpha:
print('Dependent (reject H0)')
else:
print('Independent (fail to reject H0)')
https://www.dropbox.com/s/w7rye6m5lbihjlh/astronauts.csv
https://www.dropbox.com/s/xlxanr0pxqtxcvv/models.csv
I have eventually found the bug, it was on passing the counter as a list to the chisquare function, it must be sorted first, otherwise chisquare sees a major difference in the counters values. All astrology effects now are insignificant as expected at the level of 0.95

Can I using multiple processes to read different subsets of numpy array (or pandas dataframe) safely?

I want to use multiple processes to get each 2 columns combination in numpy array (or pandas dataframe), such as array[:, 1:3], array[:, 2:4].
I wonder is it safe to get array[:, 1:3] in one process and get array[:, 2:4] in another process?
The example code is shown:
import time
import numpy as np
import pandas as pd
from itertools import combinations
from multiprocessing import Pool, Value, Lock, Array
g = np.load('input.npy')
c = Value('i', 0, lock=True)
def count_valid_pairs(i):
pair = g[:, i]
global c
if pair.max() > 100:
with c.get_lock():
c.value += 1
return
if __name__ == '__main__':
t_start = time.time()
cpus = 20
p = Pool(processes=cpus)
r=p.imap_unordered(count_valid_pairs, combinations(range(g.shape[1]), 2))
p.close()
p.join()
print("Total {} pairs has max value > 100".format(c.value)

Goodness of fit always being zero despite taking random data?

I'm trying to write code that generates random data and computes goodness of fit but I'm not understanding why the chi-squared test is always zero, may I have a fix for this ? For an attempted fix I tried playing around with different types to see if I get any resulting changes in the initial output, also I've tried changing the parameters to the loop in question.
from scipy import stats
import math
import random
import numpy
import scipy
import numpy as np
def Linear_Chi2_Generate(observed_values = [], expected_values = []):
#===============================================================#
# !!!!!!! Generation of Data !!!!!!!!!! #
#===============================================================#
for i in range(0,12):
a = random.randint(-10,10)
b = random.randint(-10,10)
y = a * (b + i)
observed_values.append(y)
#######################################################################################
# !!! Array Setup !!!! #
# ***Had the Array types converted to floats before computing Chi2*** #
# #
#######################################################################################
t_s = 0
o_v = np.array(observed_values)
e_v = np.array(expected_values)
o_v_f = o_v.astype(float)
e_v_f = o_v.astype(float)
z_o_e_v_f = zip(o_v.astype(float), e_v.astype(float))
######################################################################################
for i in z_o_e_v_f:
t_s += [((o_v_f)-(e_v_f))]**2/(e_v_f) # Computs the Chi2 Stat !
######################################################################################
print("Observed Values ", o_v_f)
print("Expected Values" , e_v_f)
df=len(o_v_f)-1
print("Our goodness of fit for our linear function", stats.chi2.cdf(t_s,df))
return t_s
Linear_Chi2_Generate()
In your original code, e_v_f = o_v.astype(float) made o_v_f, e_v_f ending up the same. There was also some issue in the for loop. I have edited your code a bit. See what it does you are looking for:
from scipy import stats
import math
import random
import numpy
import scipy
import numpy as np
def Linear_Chi2_Generate(observed_values = [], expected_values = []):
#===============================================================#
# !!!!!!! Generation of Data !!!!!!!!!! #
#===============================================================#
for i in range(0,12):
a_o = random.randint(-10,10)
b_o = random.randint(-10,10)
y_o = a_o * (b_o + i)
observed_values.append(y_o)
# a_e = random.randint(-10,10)
# b_e = random.randint(-10,10)
# y_e = a_e * (b_e + i)
expected_values.append(y_o + 5)
#######################################################################################
# !!! Array Setup !!!! #
# ***Had the Array types converted to floats before computing Chi2*** #
# #
#######################################################################################
t_s = 0
o_v = np.array(observed_values)
e_v = np.array(expected_values)
o_v_f = o_v.astype(float)
e_v_f = e_v.astype(float)
z_o_e_v_f = zip(o_v.astype(float), e_v.astype(float))
######################################################################################
for o, e in z_o_e_v_f:
t_s += (o - e) **2 / e # Computs the Chi2 Stat !
######################################################################################
print("Observed Values ", o_v_f)
print("Expected Values" , e_v_f)
df=len(o_v_f)-1
print("Our goodness of fit for our linear function", stats.chi2.cdf(t_s,df))
return t_s
Linear_Chi2_Generate()

Estimating parameters using minimization in Python and speed up this process

I am trying to find parameter estimates using using minimization. The code I wrote works but there are two problems:
I finds only a local minimum. I tried to solve this by using basinhopping.
It takes very long until I get a result and since I have to do this minimization around 1000 times this becomes a big issue.
So my questions are:
Do you know how I could optimize my code so that it runs faster for the minimization.
Is there a way I can change the basinhopping part so that it runs faster? eg. set niter lower or a differnt method im not aware of. I tried running it like this and after 10 hour I didnt get a response for even one of the 1000 individuals for basinhopping.
Is there another way to find a global minimum?
Feel free to ask further questions please.
My code:
import numpy as np
from scipy.optimize import minimize
from scipy.optimize import basinhopping
from scipy.integrate import odeint
import pickle
import os
import pandas as pd
import datetime
import numpy.random as npr
import csv
path = "C:\\Users\Sebastian Gäumann\OneDrive\Dokumente\FS 2017\Bachelorarbeit\Python"
os.chdir(path)
###IDS
df = pd.read_csv('1_Youtuber_SingleNrSheet_Comedy.csv', sep = ";", skipinitialspace=True) ######Change Name
YoutuberID = df["Channel_ID"].tolist()
##print(YoutuberID)
with open("9_p_q_m_Fun_ExtendedBass_VIEWS_Comedy_test.csv", "w" ,newline='',encoding='utf-8') as csv_file2: ######Change Name
csv_writer2 = csv.writer(csv_file2, delimiter=';')
csv_writer2.writerow(["Type","p", "q", "m","Functionvalue"])
count = 0
for ID in YoutuberID[0:]: ###Change
try:
path = "C:\\Users\Sebastian Gäumann\OneDrive\Dokumente\FS 2017\Bachelorarbeit\Python"
os.chdir(path)
###ALL INFO
Days = pd.read_csv('3_API_Call_ALL_info_Comedy_v2.csv', sep = ";", skipinitialspace=True)
views_path = "C:\\Users\Sebastian Gäumann\OneDrive\Dokumente\FS 2017\Bachelorarbeit\Python\Daily_Views_Comedy" ######Change Name
os.chdir(views_path)
SVR = pd.read_csv("4_COMEDY_DailyViews_Clean_" + str(count) + "_" + ID + ".csv", sep = ";", parse_dates=True, dayfirst=True) ######Change Name
## print(SVR[SVR.columns[0]])
SVR = SVR[SVR[SVR.columns[0]]< "2018-05-01"] ####CHANGE DATE FOR DIF CAT
## print(SVR)
#####SV Input
SV = np.array(SVR["Daily Views"])
## print(SV)
Days = Days[Days["channelId"] == ID]
## print(Days)
Days["publishedAt"] = pd.to_datetime(Days.publishedAt)
Days = Days[Days["publishedAt"] > "2015-01-08"] ##"2015-01-10"
## print(Days)
##### Timedelta #####
start_date = pd.to_datetime("2015-06-08")
##print(start_date)
video_upload_day =[]
for video_date in Days["publishedAt"]:
TimeDelta = video_date - start_date
video_upload_day.append(TimeDelta.days)
##print(video_upload_day)
##print(videoT)
nvideos = len(video_upload_day)
ndays = len(SV)
videoT = np.array(video_upload_day)
## print(videoT,nvideos,ndays)
def objective(x):
p = x[0]
q = x[1]
m = x[2]
estimateV = np.zeros( (ndays, nvideos) )
for t in range( ndays ):
for v in range( nvideos ):
if videoT[v] <= t:
estimateV[ t,v ] = p*m + (q-p) * np.sum(estimateV[0:t,v],axis=0) - (q/m) * (np.sum(estimateV[0:t,v],axis=0)**2)
estimateSV = np.sum( estimateV, axis = 1 )
return np.sum( (SV - estimateSV)**2 )
This is the minimization part. I made one for the normal minimization and one for basinhopping and seperated it with ##.
###### MINIMIZATION #######
mguess = round(sum(SV)/(nvideos*2),0)
print(sum(SV),mguess)
x0 = np.array([0.001, 0.01, mguess]) ####Make it less volatile to first guess? Make bigger steps for m?
b1 = (0.00001,0.5)
b2 = (10**4,10**7)
bnds = (b1,b1,b2)
## minimizer_kwargs = dict(method="L-BFGS-B",bounds=bnds)
## res = basinhopping(objective, x0,niter=20, minimizer_kwargs=minimizer_kwargs)
res = minimize(objective, x0,bounds = bnds)
print(res)
csv_writer2.writerow(["COMEDY",res.x[0], res.x[1],res.x[2],res.fun]) ###CHANNGE CAT
print("CURRERNT YOUTUBER IS:",count)
count += 1
except:
print("PROBLEM",count)
count += 1
## print(res,res.x[0],res.x[1],res.x[2],res.fun)

Resources