How can I speed up my python program? - python-3.x

I was trying to make a program in Python 3.4.1 to obtain the prime numbers from 2 to 100,000.
My problem is that it takes too much time to process all the information and it never give me any result.
I had left it for around half an hour, it slows me all the computer and it doesn't give me what I want.
I am using the Eratosthenes' Sieve algorithm "Criba de Eratostenes".
Here is my code:
from math import *
def primos(num):
num2 = num + 1
tnumeros = [] # tnumeros = every number from 2 to num
npnumeros= [] # npnumeros = every number that is no prime
pnumeros = [] # pnumeros = every prime number
for a in range( 2, num2 ):
tnumeros.append( a )
for i in range( 2, int( sqrt( num ) ) + 1 ):
for j in range( i, int( num / i ) + 1 ):
np = i * j
npnumeros.append( np )
npnumeros = list( set( npnumeros ) )
for e in tnumeros:
if ( e in npnumeros ):
continue
else:
pnumeros.append( e )
return ( str( "".join( str( pnumeros ) ) ) )
print( primos( 100000 ) )

Don't use a list for your npnumeros value; use a set instead. You're only interested in looking up whether a number is in that collection, so make it a set from the start:
npnumeros = set()
# ...
for i in range( 2, int( sqrt( num ) ) + 1 ):
for j in range( i, int( num / i ) + 1 ):
np = i * j
npnumeros.add( np )
# npnumeros = list( set( npnumeros ) ) # Remove this line, it's no longer needed
for e in tnumeros:
if ( e in npnumeros ):
continue
else:
pnumeros.append( e )
The reason your code is slow is that looking up numbers in a list is O(N) time, and doing that inside an O(N) loop is O(N^2) time. But looking up numbers in a set is O(1) time, so you'll have O(N) time inside that loop. Going from O(N^2) to O(N) is going to represent a HUGE difference in processing speed.
If you don't understand the O(N) notation I used, Google "Big O notation" to read more about it.

This is a severly truncated answer, due to the fact this question should probably be moved to CR.
One quick speed up is simply leaving npnumeros as a set instead of a list. What that means is that the later computation if ( e in npnumeros ): will happen sigifcantly faster.
The modified code:
from math import *
def primos(num):
num2 = num + 1
tnumeros = [] # tnumeros = every number from 2 to num
npnumeros= [] # npnumeros = every number that is no prime
pnumeros = [] # pnumeros = every prime number
for a in range( 2, num2 ):
tnumeros.append( a )
for i in range( 2, int( sqrt( num ) ) + 1 ):
for j in range( i, int( num / i ) + 1 ):
np = i * j
npnumeros.append( np )
npnumeros = set( npnumeros )
for e in tnumeros:
if ( e in npnumeros ):
continue
else:
pnumeros.append( e )
return ( str( "".join( str( pnumeros ) ) ) )
print( primos( 100000 ) )
runs ~60 times faster.

Related

Clustering of sites using OR tools (CP solver)

I am trying to make clusters of sites based on their distance from each other. I am using or tools cp solver to achieve this. The program runs fine for 40 to 50 number of sites but when i try to run for about 200 sites with each cluster containing 10 sites the program gets stuck and does not give any output. I am using the cp_model for this.
Please below the code I am using :
from ortools.sat.python import cp_model
import pandas as pd,sys,os,requests,re
matrix_data = pd.read_csv(''.join([filepath,'//matrix.csv']),
matrix = matrix_data.values.tolist()
#data = {}
#data['distance_matrix'] = []
distance_matrix = []
for i in matrix:
distance_matrix.append(i)
def main():
"""Entry point of the program."""
num_nodes = len(distance_matrix)
print('Num nodes =', num_nodes)
# Number of groups to split the nodes, must divide num_nodes.
num_groups = 10
# Model.
model = cp_model.CpModel()
# Variables.
neighbors = {}
obj_vars = []
obj_coeffs = []
for n1 in range(num_nodes - 1):
for n2 in range(n1 + 1, num_nodes):
same = model.NewBoolVar('neighbors_%i_%i' % (n1, n2))
neighbors[n1, n2] = same
obj_vars.append(same)
obj_coeffs.append(distance_matrix[n1][n2] + distance_matrix[n2][n1])
# Number of neighborss:
for n in range(num_nodes):
model.Add(sum(neighbors[m, n] for m in range(n)) +
sum(neighbors[n, m] for m in range(n + 1, num_nodes)) ==
group_size - 1)
# Enforce transivity on all triplets.
for n1 in range(num_nodes - 2):
for n2 in range(n1 + 1, num_nodes - 1):
for n3 in range(n2 + 1, num_nodes):
model.Add(
neighbors[n1, n3] + neighbors[n2, n3] + neighbors[n1, n2] != 2)
# Redundant constraints on total sum of neighborss.
model.Add(sum(obj_vars) == num_groups * group_size * (group_size - 1) // 2)
# Minimize weighted sum of arcs.
model.Minimize(
sum(obj_vars[i] * obj_coeffs[i] for i in range(len(obj_vars))))
# Solve and print out the solution.
solver = cp_model.CpSolver()
solver.parameters.log_search_progress = True
solver.parameters.num_search_workers = 6
status = solver.Solve(model)
print(solver.ResponseStats())
visited = set()
for g in range(num_groups):
flonglist = []
flatlist = []
for n in range(num_nodes):
if not n in visited:
visited.add(n)
output = str(n)
for o in range(n + 1, num_nodes):
if solver.BooleanValue(neighbors[n, o]):
visited.add(o)
output += ' ' + str(o)
print('Group', g, ':', output)
print(site_list)
break
if __name__ == '__main__':
main()
print("-- %s seconds ---" % (time.time() - start_time))
The matrix file contain the distance of sites from each other in matrix format.
need some help.

Make multiprocessing Pool to use free cores if available

I have the following piece of code that uses a Pool of workes to perform some operations.
def my_func( args ):
low_index = args[0][0]
up_index = args[0][1]
params = args[1][0]
A = args[1][1]
B = args[1][2]
print( "PID:", mp.current_process() )
for k in range( low_index, up_index ):
a = params[k]
# what if np.dot uses multi-threading?
A = a*A + ( np.dot( A , B ) )*( np.dot( B, B ) )
B = a*B + ( np.dot( B , A ) )*( np.dot( A, A ) )
return A,B
if __name__ == '__main__':
ts = time()
import numpy as np
params = np.linspace( 1, 10, 1000 )
n_dim = 1000
# the arrays A,B get modified with each call to the worker
A = np.random.rand( n_dim, n_dim )
B = np.random.rand( n_dim, n_dim )
C = np.random.rand( 5*n_dim, 5*n_dim )
D = np.random.rand( 5*n_dim, 5*n_dim )
ncpus = psutil.cpu_count( logical=False )
number_processes = ncpus - 1
total_items = params.shape[0]
n_chunck = int( ( total_items )/number_processes )
intervals = [ [ k*n_chunck, (k+1)*n_chunck ] for k in range( number_processes ) ]
intervals[ -1 ][ -1 ] = total_items
from itertools import repeat
objs_ = list( repeat( ( params,
copy.deepcopy( A ),
copy.deepcopy( B ),
) , number_processes - 1 ) )
objs_.append( ( params,
copy.deepcopy( C ),
copy.deepcopy( D ),
) )
args_l = []
for k in range( number_processes ):
args_l.append( [ intervals[k] , objs_[k] ] )
pool = mp.Pool( processes = ncpus )
results = pool.map( my_func, args_l )
pool.close()
pool.join()
print( time() - ts )
The last process (involving the C and D arrays) will take considerably longer than the rest; therefore, I want that once the other processes are done, the remaining process (the one with bigger dimensions of the array) can make efficient use of all available free cores. However, I am observing that the CPU usage stays around 20% (In my machine I use 5 cores out of 6) for the last process, therefore being highly inefficient in the remaining operations. Is there a good way to fix that?

Simpson's rule 3/8 for n intervals in Python

im trying to write a program that gives the integral approximation of e(x^2) between 0 and 1 based on this integral formula:
Formula
i've done this code so far but it keeps giving the wrong answer (Other methods gives 1.46 as an answer, this one gives 1.006).
I think that maybe there is a problem with the two for cycles that does the Riemman sum, or that there is a problem in the way i've wrote the formula. I also tried to re-write the formula in other ways but i had no success
Any kind of help is appreciated.
import math
import numpy as np
def f(x):
y = np.exp(x**2)
return y
a = float(input("¿Cual es el limite inferior? \n"))
b = float(input("¿Cual es el limite superior? \n"))
n = int(input("¿Cual es el numero de intervalos? "))
x = np.zeros([n+1])
y = np.zeros([n])
z = np.zeros([n])
h = (b-a)/n
print (h)
x[0] = a
x[n] = b
suma1 = 0
suma2 = 0
for i in np.arange(1,n):
x[i] = x[i-1] + h
suma1 = suma1 + f(x[i])
alfa = (x[i]-x[i-1])/3
for i in np.arange(0,n):
y[i] = (x[i-1]+ alfa)
suma2 = suma2 + f(y[i])
z[i] = y[i] + alfa
int3 = ((b-a)/(8*n)) * (f(x[0])+f(x[n]) + (3*(suma2+f(z[i]))) + (2*(suma1)))
print (int3)
I'm not a math major but I remember helping a friend with this rule for something about waterplane area for ships.
Here's an implementation based on Wikipedia's description of the Simpson's 3/8 rule:
# The input parameters
a, b, n = 0, 1, 10
# Divide the interval into 3*n sub-intervals
# and hence 3*n+1 endpoints
x = np.linspace(a,b,3*n+1)
y = f(x)
# The weight for each points
w = [1,3,3,1]
result = 0
for i in range(0, 3*n, 3):
# Calculate the area, 4 points at a time
result += (x[i+3] - x[i]) / 8 * (y[i:i+4] * w).sum()
# result = 1.4626525814387632
You can do it using numpy.vectorize (Based on this wikipedia post):
a, b, n = 0, 1, 10**6
h = (b-a) / n
x = np.linspace(0,n,n+1)*h + a
fv = np.vectorize(f)
(
3*h/8 * (
f(x[0]) +
3 * fv(x[np.mod(np.arange(len(x)), 3) != 0]).sum() + #skip every 3rd index
2 * fv(x[::3]).sum() + #get every 3rd index
f(x[-1])
)
)
#Output: 1.462654874404461
If you use numpy's built-in functions (which I think is always possible), performance will improve considerably:
a, b, n = 0, 1, 10**6
x = np.exp(np.square(np.linspace(0,n,n+1)*h + a))
(
3*h/8 * (
x[0] +
3 * x[np.mod(np.arange(len(x)), 3) != 0].sum()+
2 * x[::3].sum() +
x[-1]
)
)
#Output: 1.462654874404461

Fitting a function f(x,y,z) with a quadratic polynomial

I'm trying to fit a function f(x,y,z) with the following quadratic polynomial:
3d polynomial
Some distorted spherical surface in three dimensions. The problem is related to the calculation of effective masses in solid state physics.
Here is a picture of the data to show that it indeed falls off parabolically in all directions, even though the curvature in the z-direction is rather low:
3d parabolas
I'm interested in the coefficients, which correspond to effective masses. I've got an array of xyz coordinates, which is regular and centered on the maximum:
[[ 0. 0. 0. ]
[ 0. 0. 0.01282017]
[ 0. 0. 0.02564034]
...
[-0.05026321 -0.05026321 -0.03846052]
[-0.05026321 -0.05026321 -0.02564034]
[-0.05026321 -0.05026321 -0.01282017]]
And a corresponding 1D array of scalar values, one for each point. The number of data points around this maximum can range from 100 to 1000.
This is the code I'm currently trying to use for fitting:
def func(data, mxx, mxy, mxz, myy, myz, mzz):
x = data[:, 0]
y = data[:, 1]
z = data[:, 2]
return (
(1 / (2 * mxx)) * (x ** 2)
+ (1 / (1 * mxy)) * (x * y)
+ (1 / (1 * mxz)) * (x * z)
+ (1 / (2 * myy)) * (y ** 2)
+ (1 / (1 * myz)) * (y * z)
+ (1 / (2 * mzz)) * (z ** 2)
) + f(0, 0, 0)
energy = data[:, 3]
guess = (mxx, mxy, mxz, myy, myz, mzz)
params, pcov = scipy.optimize.curve_fit(
func, data, energy, p0=guess, method="trf"
)
Where f(0,0,0) is the value of the function at (0, 0, 0), which I retrieve with the scipy.interpolate.griddata function.
For this problem, the masses should be negative and have values between -0.2 and -2, roughly speaking. I'm creating guess values through a finite difference differentiation.
However, I don't get any senseful results from scipy.interpolate.curve_fit - typically the coefficients end up with huge numbers (like 1e9). I'm completly lost at this point.
What am I doing wrong :( ?
One of the problems is that you fit 1/m. While this is correct from a physics point of view, it is bad from the algorithm point of view. If the fitting algorithm needs to change sign for values of m near zero, the coefficients diverge. Consequently, it is better to fit mI = 1/m and make the according error progressions later. Here I use leastsqwhich requires some additional calculations for the covariance matrix (as it returns the reduced form). I do the fit with g() and the inverse masses, but you can immediately reproduce your problems when introducing f() and directly fitting the ms.
A second point is that the data has an offset, i.e. if x = y = z = 0 the data is v= -0.0195 This needs to be introduced into the model.
Finally, I'd say that you already have non-parabolic behaviour in your data.
Nevertheless, here is how it looks like:
import matplotlib.pyplot as plt
import numpy as np
np.set_printoptions(linewidth=300)
from scipy.optimize import leastsq
from scipy.optimize import curve_fit
data = np.loadtxt( "silicon.csv", delimiter=',' )
def f( x, y, z, mxx, mxy, mxz, myy, myz, mzz, offI ):
out = 1./(2 * mxx) * x * x
out += 1./( mxy ) * x * y
out += 1./( mxz ) * x * z
out += 1./( 2 * myy ) * y * y
out += 1./( myz ) * y * z
out += 1./( 2 * mzz ) * z * z
out += 1./offI
return out
def g( x, y, z, mxxI, mxyI, mxzI, myyI, myzI, mzzI, off ):
out = mxxI / 2 * x * x
out += mxyI * x * y
out += mxzI * x * z
out += myyI / 2 * y * y
out += myzI * y * z
out += mzzI / 2 * z * z
out += off
return out
def residuals( params, indata ):
out = list()
for x, y, z, v in indata:
out.append( v - g( x,y, z, *params ) )
return out
sol, cov, info, msg, ier = leastsq( residuals, 7*[0], args=( data, ), full_output=True)
s_sq = sum( [x**2 for x in residuals( sol, data) ] )/ (len( data ) - len( sol ) )
print "solution"
print sol
masses = [1/x for x in sol]
print "masses:"
print masses
print "covariance matrix:"
covMX = cov * s_sq
print covMX
print "sum of residuals"
print sum( residuals( sol, data) )
### plotting the cuts
fig = plt.figure('cuts')
ax = dict()
for i in range( 1, 10 ):
ax[i] = fig.add_subplot( 3, 3, i )
dl = np.linspace( -.2, .2, 25)
#### xx
xdata = [ [ x, v ] for x,y,z,v in data if ( abs(y)<1e-3 and abs(z) < 1e-3 ) ]
vl = np.fromiter( ( f( x, 0, 0, *masses ) for x in dl ), np.float )
ax[1].plot( *zip(*sorted( xdata ) ), ls='', marker='o')
ax[1].plot( dl, vl )
#### xy
xydata = [ [ x, v ] for x, y, z, v in data if ( abs( x - y )<1e-2 and abs(z) < 1e-3 ) ]
vl = np.fromiter( ( f( xy, xy, 0, *masses ) for xy in dl ), np.float )
ax[2].plot( *zip(*sorted( xydata ) ), ls='', marker='o')
ax[2].plot( dl, vl )
#### xz
xzdata = [ [ x, v ] for x, y, z, v in data if ( abs( x - z )<1e-2 and abs(y) < 1e-3 ) ]
vl = np.fromiter( ( f( xz, 0, xz, *masses ) for xz in dl ), np.float )
ax[3].plot( *zip(*sorted( xzdata ) ), ls='', marker='o')
ax[3].plot( dl, vl )
#### yy
ydata = [ [ y, v ] for x, y, z, v in data if ( abs(x)<1e-3 and abs(z) < 1e-3 ) ]
vl = np.fromiter( ( f( 0, y, 0, *masses ) for y in dl ), np.float )
ax[5].plot( *zip(*sorted( ydata ) ), ls='', marker='o' )
ax[5].plot( dl, vl )
#### yz
yzdata = [ [ y, v ] for x, y, z, v in data if ( abs( y - z )<1e-2 and abs(x) < 1e-3 ) ]
vl = np.fromiter( ( f( 0, yz, yz, *masses ) for yz in dl ), np.float )
ax[6].plot( *zip(*sorted( yzdata ) ), ls='', marker='o')
ax[6].plot( dl, vl )
#### zz
zdata = [ [ z, v ] for x, y, z, v in data if ( abs(x)<1e-3 and abs(y) < 1e-3 ) ]
vl = np.fromiter( ( f( 0, 0, z, *masses ) for z in dl ), np.float )
ax[9].plot( *zip(*sorted( zdata ) ), ls='', marker='o' )
ax[9].plot( dl, vl )
#### some diag
ddata = [ [ z, v ] for x, y, z, v in data if ( abs(x - y)<1e-3 and abs(x - z) < 1e-3 ) ]
vl = np.fromiter( ( f( d, d, d, *masses ) for d in dl ), np.float )
ax[7].plot( *zip(*sorted( ddata ) ), ls='', marker='o' )
ax[7].plot( dl, vl )
#### some other diag
ddata = [ [ z, v ] for x, y, z, v in data if ( abs(x - y)<1e-3 and abs(x + z) < 1e-3 ) ]
vl = np.fromiter( ( f( d, d, -d, *masses ) for d in dl ), np.float )
ax[8].plot( *zip(*sorted( ddata ) ), ls='', marker='o' )
ax[8].plot( dl, vl )
plt.show()
This gives the following output:
solution
[-1.46528595 0.25090717 0.25090717 -1.46528595 0.25090717 -1.46528595 -0.01993436]
masses:
[-0.6824606499739905, 3.985537743156507, 3.9855376943660676, -0.6824606473928339, 3.9855377322848344, -0.6824606467055248, -50.16463861555409]
covariance matrix:
[
[ 4.76417852e-03 -1.46907683e-12 -8.57639600e-12 -2.21281938e-12 -2.38444957e-12 8.42981521e-12 -2.70034183e-05]
[-1.46907683e-12 9.17104397e-04 -7.10573582e-13 1.32125214e-11 7.44553140e-12 1.29909935e-11 -1.11259046e-13]
[-8.57639600e-12 -7.10573582e-13 9.17104389e-04 -8.60004172e-12 -6.14797647e-12 8.27070243e-12 3.11127064e-14]
[-2.21281914e-12 1.32125214e-11 -8.60004172e-12 4.76417860e-03 -4.20477032e-12 9.20893224e-12 -2.70034186e-05]
[-2.38444957e-12 7.44553140e-12 -6.14797647e-12 -4.20477032e-12 9.17104395e-04 1.50963408e-11 -7.28889534e-14]
[ 8.42981530e-12 1.29909935e-11 8.27070243e-12 9.20893175e-12 1.50963408e-11 4.76417849e-03 -2.70034182e-05]
[-2.70034183e-05 -1.11259046e-13 3.11127064e-14 -2.70034186e-05 -7.28889534e-14 -2.70034182e-05 5.77019926e-07]
]
sum of residuals
4.352727352163743e-09
...and here some 1d cuts that show some significant deviation from parabolic behaviour if one is not on one of the main axes.

How to write cos(1)

I need to find a way to write cos(1) in python using a while loop. But i cant use any math functions. Can someone help me out?
for example I also had to write the value of exp(1) and I was able to do it by writing:
count = 1
term = 1
expTotal = 0
xx = 1
while abs(term) > 1e-20:
print("%1d %22.17e" % (count, term))
expTotal = expTotal + term
term=term * xx/(count)
count+=1
I amm completely lost as for how to do this with the cos and sin values though.
Just change your expression to compute the term to:
term = term * (-1 * x * x)/( (2*count) * ((2*count)-1) )
Multiplying the count by 2 could be changed to increment the count by 2, so here is your copypasta:
import math
def cos(x):
cosTotal = 1
count = 2
term = 1
x=float(x)
while abs(term) > 1e-20:
term *= (-x * x)/( count * (count-1) )
cosTotal += term
count += 2
print("%1d %22.17e" % (count, term))
return cosTotal
print( cos(1) )
print( math.cos(1) )
You can calculate cos(1) by using the Taylor expansion of this function:
You can find more details on Wikipedia, see an implementation below:
import math
def factorial(n):
if n == 0:
return 1
else:
return n * factorial(n-1)
def cos(order):
a = 0
for i in range(0, order):
a += ((-1)**i)/(factorial(2*i)*1.0)
return a
print cos(10)
print math.cos(1)
This gives as output:
0.540302305868
0.540302305868
EDIT: Apparently the cosine is implemented in hardware using the CORDIC algorithm that uses a lookup table to calculate atan. See below a Python implementation of the CORDIS algorithm based on this Google group question:
#atans = [math.atan(2.0**(-i)) for i in range(0,40)]
atans =[0.7853981633974483, 0.4636476090008061, 0.24497866312686414, 0.12435499454676144, 0.06241880999595735, 0.031239833430268277, 0.015623728620476831, 0.007812341060101111, 0.0039062301319669718, 0.0019531225164788188, 0.0009765621895593195, 0.0004882812111948983, 0.00024414062014936177, 0.00012207031189367021, 6.103515617420877e-05, 3.0517578115526096e-05, 1.5258789061315762e-05, 7.62939453110197e-06, 3.814697265606496e-06, 1.907348632810187e-06, 9.536743164059608e-07, 4.7683715820308884e-07, 2.3841857910155797e-07, 1.1920928955078068e-07, 5.960464477539055e-08, 2.9802322387695303e-08, 1.4901161193847655e-08, 7.450580596923828e-09, 3.725290298461914e-09, 1.862645149230957e-09, 9.313225746154785e-10, 4.656612873077393e-10, 2.3283064365386963e-10, 1.1641532182693481e-10, 5.820766091346741e-11, 2.9103830456733704e-11, 1.4551915228366852e-11, 7.275957614183426e-12, 3.637978807091713e-12, 1.8189894035458565e-12]
def cosine_sine_cordic(beta,N=40):
# in hardware, put this in a table.
def K_vals(n):
K = []
acc = 1.0
for i in range(0, n):
acc = acc * (1.0/(1 + 2.0**(-2*i))**0.5)
K.append(acc)
return K
#K = K_vals(N)
K = 0.6072529350088812561694
x = 1
y = 0
for i in range(0,N):
d = 1.0
if beta < 0:
d = -1.0
(x,y) = (x - (d*(2.0**(-i))*y), (d*(2.0**(-i))*x) + y)
# in hardware put the atan values in a table
beta = beta - (d*atans[i])
return (K*x, K*y)
if __name__ == '__main__':
beta = 1
cos_val, sin_val = cosine_sine_cordic(beta)
print "Actual cos: " + str(math.cos(beta))
print "Cordic cos: " + str(cos_val)
This gives as output:
Actual cos: 0.540302305868
Cordic cos: 0.540302305869

Resources