select with bokeh not really working - python-3.x

I am using bokeh 0.12.2. I have a select with words. When i choose a word it should circle the dot data. It seems to work then stop. I am trying with 2 words, word1 and word2. lastidx is full of index.xc and yx are the location of the circle here is the code. This is working with one but not really if i change the value in the select:
for j in range(0,2):
for i in range(0,len(lastidx[j])):
xc.append(tsne_kmeans[lastidx[j][i], 0])
yc.append(tsne_kmeans[lastidx[j][i], 1])
source = ColumnDataSource(data=dict(x=xc, y=yc, s=mstwrd))
def callback(source=source):
dat = source.get('data')
x, y, s = dat['x'], dat['y'], dat['s']
val = cb_obj.get('value')
if val == 'word1':
for i in range(0,75):
x[i] = x[i]
y[i] = y[i]
elif val == 'word2':
for i in range(76,173):
x[i-76] = x[i]
y[i-76] = y[i]
source.trigger('change')
slct = Select(title="Word:", value="word1", options=mstwrd , callback=CustomJS.from_py_func(callback))
# create the circle around the data where the word exist
r = plot_kmeans.circle('x','y', source=source)
glyph = r.glyph
glyph.size = 15
glyph.fill_alpha = 0.0
glyph.line_color = "black"
glyph.line_dash = [4, 2]
glyph.line_width = 1
x and y are loaded with all the data here and I just pick the data for the word I select. It seems to work and then it does not.
Is it possible to do that as a stand alone chart?
Thank you

I figured it out: code here is just to see if this was working. This will be improved of course. And may be this is what was written here at the end:
https://github.com/bokeh/bokeh/issues/2618
for i in range(0,len(lastidx[0])):
xc.append(tsne_kmeans[lastidx[0][i], 0])
yc.append(tsne_kmeans[lastidx[0][i], 1])
addto = len(lastidx[1])-len(lastidx[0])
# here i max out the data which has the least
# so when you go from one option to the other it
# removes all the previous data circle
for i in range(0,addto):
xc.append(-16) # just send them somewhere
yc.append(16)
for i in range(0, len(lastidx[1])):
xf.append(tsne_kmeans[lastidx[1][i], 0])
yf.append(tsne_kmeans[lastidx[1][i], 1])
x = xc
y = yc
source = ColumnDataSource(data=dict(x=x, y=y,xc=xc,yc=yc,xf=xf,yf=yf))
val = "word1"
def callback(source=source):
dat = source.get('data')
x, y,xc,yc,xf,yf = dat['x'], dat['y'], dat['xc'], dat['yc'], dat['xf'], dat['yf']
# if slct.options['value'] == 'growth':
val = cb_obj.get('value')
if val == 'word1':
for i in range(0,len(xc)):
x[i] = xc[i]
y[i] = yc[i]
elif val == 'word2':
for i in range(0,len(xf)):
x[i] = xf[i]
y[i] = yf[i]
source.trigger('change')
slct = Select(title="Most Used Word:", value=val, options=mstwrd , callback=CustomJS.from_py_func(callback))
# create the circle around the data where the word exist
r = plot_kmeans.circle('x','y', source=source)
I will check if i can pass a matrix. Don't forget to have the same size of data if not you will have multiple options circled in the same time.
Thank you

Related

Discrete Laplacian on a non-regular mesh (python)

I have coded the laplacien function for a non-regular mesh (created with the scipy.spatial.Delaunay function).
I have not errors but the results are not correct : the eigenvectors are correct but the eigenvalues ​​are too high (in absolute value).
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import scipy.spatial
def rect_drum(L,H,U):
vals = []
val = 0
k = 1
l = 1
while val >= -U:
while val >= -U:
val = -np.pi**2*((k/L)**2+(l/H)**2)
if val >= -U:
vals.append(val)
l += 1
l = 1
k += 1
val = -np.pi**2*((k/L)**2+(l/H)**2)
return np.array(vals)
def count_vp(tab,U):
#count the n eigenvalues ​​greater than equal to -U in the array tab
return tab[tab>=-U]
def in_curve(f,fargs,shape,a):
points = [] # the points inside the curve
for j in range(shape[0]):
for i in range(shape[1]):
if f(i*a,j*a,*fargs) < 0:
points.append([i*a,j*a])
return np.array(points)
def triang(points,a,f,fargs,bord):
tri_points = points.copy()
tri_points[:,1] *= np.sqrt(3)
tri_points2 = np.vstack((points,bord))
tri_points2[:,1] *= np.sqrt(3)
tri_points2[:,0] += a/2
tri_points2[:,1] += np.sqrt(3)/2*a
fin = np.vstack((tri_points,tri_points2))
i = 0
eps = 0.01
while i < len(fin):
if f(fin[i,0]+eps,fin[i,1]+eps,*fargs) > 0:
fin = np.delete(fin,i,0)
i -= 1
i += 1
return np.vstack((fin,bord)),len(fin),len(bord)
def tri_ang(points,ind,p0):
# sort the points in trigonometric order
vec=np.arctan2((points-p0)[:,1],(points-p0)[:,0])
values = []
dtype = [('val',float),('n',int)]
for i in range(len(vec)):
values.append((vec[i],i))
values = np.sort(np.array(values,dtype),order='val')
new_points = []
new_ind = []
for tup in values:
new_points.append(points[tup[1]])
new_ind.append(ind[tup[1]])
return np.array(new_points),np.array(new_ind)
def M(points,tri,Nint):
indptr,ind = tri.vertex_neighbor_vertices
W = np.zeros((Nint,Nint)) # cotangents matrix
A = np.zeros((Nint,1)) # surfaces vertex array for each point i (A[i])
for i in range(Nint):
tot = 0
nhb_ind = ind[indptr[i]:indptr[i+1]] # indices of the points close to the point of index k
nhb = points[nhb_ind] # their coordinates
nhb,nhb_ind = tri_ang(nhb,nhb_ind,points[i]) #the coordinates (nhb) and (nhb_ind) of each neighbor of i
for j in range(len(nhb_ind)):
vec = nhb[j]-points[i] # a vector connecting the point to his neighbor of index 0
vec_av = nhb[j-1]-points[i] # another vector but with the Vosin from before
if j+1 >= len(nhb_ind):
k = 0
else:
k = j+1
vec_ap = nhb[k]-points[i] # another vector but with the next neighbor
# another vector but with the next neighbor
A[i] += 0.5/3*np.linalg.norm(np.cross(vec,vec_av))
if nhb_ind[j] < Nint:
# we use the vector and scalar product to calculate the cotangents: A.B/||AxB||
cotan_alpha = np.dot(vec_av,vec_av-vec)/np.linalg.norm(np.cross(vec_av,vec_av-vec))
cotan_beta = np.dot(vec_ap,vec_ap-vec)/np.linalg.norm(np.cross(vec_ap,vec_ap-vec))
# Wij value :
W[i,nhb_ind[j]] = -0.5*(cotan_alpha+cotan_beta)
tot += cotan_alpha+cotan_beta
W[i,i] = -0.5*tot # diagonal values
return (1/A)*W
def rect(x,y,L,H,x0=0,y0=0):
if 0<x-x0<L and 0<y-y0<H:
return -1
else:
return 1
def rect_rim(L,H,a,x0=0,y0=0):
tab1 = np.arange(x0,L+x0,a)[:,np.newaxis]
h = np.hstack((tab1,H*np.ones((len(tab1),1))+y0))
b = np.hstack((tab1,np.zeros((len(tab1),1))+y0))
tab2 = np.arange(y0+a,H+y0,a)[:,np.newaxis]
g = np.hstack((np.zeros((len(tab2),1))+x0,tab2))
d = np.hstack((L*np.ones((len(tab2),1))+x0,tab2))
hp = np.array([[L+x0,H+y0]])
bp = np.array([[L+x0,0]])
return np.vstack((h,b,g,d,hp,bp))
# sample with a square 1*1
L = 1
H = 1
dl = 0.05
sol = in_curve(rect,[L,H],(100,100),dl)
sol_tri,Nint,Nbord = triang(sol,dl,rect,[L,H],rect_rim(L,H,dl))
# plt.plot(sol_tri[:,0],sol_tri[:,1],linestyle="",marker="+",label="tri")
# plt.plot(sol[:,0],sol[:,1],linestyle="",marker="x")
# plt.legend()
# plt.show()
# triangulation
tri = scipy.spatial.Delaunay(sol_tri)
# plt.triplot(sol_tri[:,0],sol_tri[:,1],tri.simplices)
# plt.show()
M = M(sol_tri,tri,Nint)
valp,vecp = np.linalg.eig(M) # eigenvalues and eigenvectors
vecp = np.real(vecp)
# comparison with the exact solution:
T = 1000
U = np.arange(0,T,1)
NUsim = np.array([len(count_vp(valp,u)) for u in U])
NU = np.array([len(rect_drum(L,H,u)) for u in U])
plt.plot(U,NUsim,label='simulation')
plt.plot(U,NU,label='exacts')
plt.legend()
plt.show()
# 3D plot of an eigenvector
vecp_tot = np.vstack((vecp,np.zeros((Nbord,Nint))))
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_trisurf(sol_tri[:,0],sol_tri[:,1],vecp_tot[:,0],triangles=tri.simplices)
plt.show()
The laplacian is the function named "M".
The "in_curve function" return the points inside a curve defined by f(x,y,*fargs) < 0 (a square in the sample).
The "triang" function return points with added points (triangle meshs). The fonction uses an another function for the rim of the curve (for most precision), in the sample it is the "rect_rim" function.
I used the formula given at https://en.wikipedia.org/wiki/Discrete_Laplace_operator ("mesh laplacians").
I have solve my problem : it's a sign and a rim problems.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import scipy.spatial
def rect_drum(L,H,U):
vals = []
val = 0
k = 1
l = 1
while val >= -U:
while val >= -U:
val = -np.pi**2*((k/L)**2+(l/H)**2)
if val >= -U:
vals.append(val)
l += 1
l = 1
k += 1
val = -np.pi**2*((k/L)**2+(l/H)**2)
return np.array(vals)
def count_vp(tab,U):
#count the n eigenvalues ​​greater than equal to -U in the array tab
return tab[tab>=-U]
def in_curve(f,fargs,shape,a):
points = [] # the points inside the curve
for j in range(shape[0]):
for i in range(shape[1]):
if f(i*a,j*a,*fargs) < 0:
points.append([i*a,j*a])
return np.array(points)
def triang(points,a,f,fargs,bord):
tri_points = points.copy()
tri_points[:,1] *= np.sqrt(3)
tri_points2 = np.vstack((points,bord))
tri_points2[:,1] *= np.sqrt(3)
tri_points2[:,0] += a/2
tri_points2[:,1] += np.sqrt(3)/2*a
fin = np.vstack((tri_points,tri_points2))
i = 0
eps = 0.01
while i < len(fin):
if f(fin[i,0]+eps,fin[i,1]+eps,*fargs) > 0:
fin = np.delete(fin,i,0)
i -= 1
i += 1
return np.vstack((fin,bord)),len(fin),len(bord)
def tri_ang(points,ind,p0):
# sort the points in trigonometric order
vec=np.arctan2((points-p0)[:,1],(points-p0)[:,0])
values = []
dtype = [('val',float),('n',int)]
for i in range(len(vec)):
values.append((vec[i],i))
values = np.sort(np.array(values,dtype),order='val')
new_points = []
new_ind = []
for tup in values:
new_points.append(points[tup[1]])
new_ind.append(ind[tup[1]])
return np.array(new_points),np.array(new_ind)
def Laplacian(points,tri,Nint):
indptr,ind = tri.vertex_neighbor_vertices
W = np.zeros((Nint,Nint)) # cotangents matrix
A = np.zeros((Nint,1)) # surfacesvertex aray of point i (A[i])
for i in range(Nint):
tot = 0
nhb_ind = ind[indptr[i]:indptr[i+1]] # indices of the points close to the point of index k
nhb = points[nhb_ind] # their coordinates
nhb,nhb_ind = tri_ang(nhb,nhb_ind,points[i]) #the coordinates (nhb) and (nhb_ind) of each neighbor of i
for j in range(len(nhb_ind)):
vec = nhb[j]-points[i] # a vector connecting the point to his neighbor of index 0
vec_av = nhb[j-1]-points[i] # another vector but with the Vosin from before
if j+1 >= len(nhb_ind):
k = 0
else:
k = j+1
vec_ap = nhb[k]-points[i] # another vector but with the next neighbor
# we use the cross product to calculate the areas of the triangles: ||AxB||/2:
A[i] += 0.5/3*np.linalg.norm(np.cross(vec,vec_av))
# we use the cross product and scalar product to calculate the cotangents: A.B/||AxB||
cotan_alpha = np.dot(vec_av,vec_av-vec)/np.linalg.norm(np.cross(vec_av,vec_av-vec))
cotan_beta = np.dot(vec_ap,vec_ap-vec)/np.linalg.norm(np.cross(vec_ap,vec_ap-vec))
tot += cotan_alpha+cotan_beta
if nhb_ind[j] < Nint:
W[i,nhb_ind[j]] = 0.5*(cotan_alpha+cotan_beta)
W[i,i] = -0.5*tot # diagonal values
return (1/A)*W
def rect(x,y,L,H,x0=0,y0=0):
if 0<x-x0<L and 0<y-y0<H:
return -1
else:
return 1
def rect_rim(L,H,a,x0=0,y0=0):
tab1 = np.arange(x0,L+x0,a)[:,np.newaxis]
h = np.hstack((tab1,H*np.ones((len(tab1),1))+y0))
b = np.hstack((tab1,np.zeros((len(tab1),1))+y0))
tab2 = np.arange(y0+a,H+y0,a)[:,np.newaxis]
g = np.hstack((np.zeros((len(tab2),1))+x0,tab2))
d = np.hstack((L*np.ones((len(tab2),1))+x0,tab2))
hp = np.array([[L+x0,H+y0]])
bp = np.array([[L+x0,0]])
return np.vstack((h,b,g,d,hp,bp))
# sample with a square 1*1
L = 1
H = 1
dl = 0.04
sol = in_curve(rect,[L,H],(100,100),dl)
sol_tri,Nint,Nbord = triang(sol,dl,rect,[L,H],rect_rim(L,H,dl))
# triangulation
tri = scipy.spatial.Delaunay(sol_tri)
M = Laplacian(sol_tri,tri,Nint)
valp,vecp = np.linalg.eig(M) # eigenvalues and eigenvectors
vecp = np.real(vecp)
# comparison with the exact solution:
T = 1000
U = np.arange(0,T,1)
NUsim = np.array([len(count_vp(valp,u)) for u in U])
NU = np.array([len(rect_drum(L,H,u)) for u in U])
plt.plot(U,NUsim,label='simulation')
plt.plot(U,NU,label='exacts')
plt.legend()
plt.show()
# 3D plot of an eigenvector
mode = 0 # change this for an another mode
vecp_tot = np.vstack((vecp,np.zeros((Nbord,Nint))))
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_trisurf(sol_tri[:,0],sol_tri[:,1],vecp_tot[:,mode],triangles=tri.simplices)
plt.show()
Notes :
1- The hight eigenvalues are false : it's an effect of discretisation.
2- If dl is too small, we have false eigenvectors and eigenvalues (at the top of valp and firsts vectors of vecp), it's probably due to the quality of the meshing.

Speed Up a for Loop - Python

I have a code that works perfectly well but I wish to speed up the time it takes to converge. A snippet of the code is shown below:
def myfunction(x, i):
y = x + (min(0, target[i] - data[i, :]x))*data[i]/(norm(data[i])**2))
return y
rows, columns = data.shape
start = time.time()
iterate = 0
iterate_count = []
norm_count = []
res = 5
x_not = np.ones(columns)
norm_count.append(norm(x_not))
iterate_count.append(0)
while res > 1e-8:
for row in range(rows):
y = myfunction(x_not, row)
x_not = y
iterate += 1
iterate_count.append(iterate)
norm_count.append(norm(x_not))
res = abs(norm_count[-1] - norm_count[-2])
print('Converge at {} iterations'.format(iterate))
print('Duration: {:.4f} seconds'.format(time.time() - start))
I am relatively new in Python. I will appreciate any hint/assistance.
Ax=b is the problem we wish to solve. Here, 'A' is the 'data' and 'b' is the 'target'
Ugh! After spending a while on this I don't think it can be done the way you've set up your problem. In each iteration over the row, you modify x_not and then pass the updated result to get the solution for the next row. This kind of setup can't be vectorized easily. You can learn the thought process of vectorization from the failed attempt, so I'm including it in the answer. I'm also including a different iterative method to solve linear systems of equations. I've included a vectorized version -- where the solution is updated using matrix multiplication and vector addition, and a loopy version -- where the solution is updated using a for loop to demonstrate what you can expect to gain.
1. The failed attempt
Let's take a look at what you're doing here.
def myfunction(x, i):
y = x + (min(0, target[i] - data[i, :] # x)) * (data[i] / (norm(data[i])**2))
return y
You subtract
the dot product of (the ith row of data and x_not)
from the ith row of target,
limited at zero.
You multiply this result with the ith row of data divided my the norm of that row squared. Let's call this part2
Then you add this to the ith element of x_not
Now let's look at the shapes of the matrices.
data is (M, N).
target is (M, ).
x_not is (N, )
Instead of doing these operations rowwise, you can operate on the entire matrix!
1.1. Simplifying the dot product.
Instead of doing data[i, :] # x, you can do data # x_not and this gives an array with the ith element giving the dot product of the ith row with x_not. So now we have data # x_not with shape (M, )
Then, you can subtract this from the entire target array, so target - (data # x_not) has shape (M, ).
So far, we have
part1 = target - (data # x_not)
Next, if anything is greater than zero, set it to zero.
part1[part1 > 0] = 0
1.2. Finding rowwise norms.
Finally, you want to multiply this by the row of data, and divide by the square of the L2-norm of that row. To get the norm of each row of a matrix, you do
rownorms = np.linalg.norm(data, axis=1)
This is a (M, ) array, so we need to convert it to a (M, 1) array so we can divide each row. rownorms[:, None] does this. Then divide data by this.
part2 = data / (rownorms[:, None]**2)
1.3. Add to x_not
Finally, we're adding each row of part1 * part2 to the original x_not and returning the result
result = x_not + (part1 * part2).sum(axis=0)
Here's where we get stuck. In your approach, each call to myfunction() gives a value of part1 that depends on target[i], which was changed in the last call to myfunction().
2. Why vectorize?
Using numpy's inbuilt methods instead of looping allows it to offload the calculation to its C backend, so it runs faster. If your numpy is linked to a BLAS backend, you can extract even more speed by using your processor's SIMD registers
The conjugate gradient method is a simple iterative method to solve certain systems of equations. There are other more complex algorithms that can solve general systems well, but this should do for the purposes of our demo. Again, the purpose is not to have an iterative algorithm that will perfectly solve any linear system of equations, but to show what kind of speedup you can expect if you vectorize your code.
Given your system
data # x_not = target
Let's define some variables:
A = data.T # data
b = data.T # target
And we'll solve the system A # x = b
x = np.zeros((columns,)) # Initial guess. Can be anything
resid = b - A # x
p = resid
while (np.abs(resid) > tolerance).any():
Ap = A # p
alpha = (resid.T # resid) / (p.T # Ap)
x = x + alpha * p
resid_new = resid - alpha * Ap
beta = (resid_new.T # resid_new) / (resid.T # resid)
p = resid_new + beta * p
resid = resid_new + 0
To contrast the fully vectorized approach with one that uses iterations to update the rows of x and resid_new, let's define another implementation of the CG solver that does this.
def solve_loopy(data, target, itermax = 100, tolerance = 1e-8):
A = data.T # data
b = data.T # target
rows, columns = data.shape
x = np.zeros((columns,)) # Initial guess. Can be anything
resid = b - A # x
resid_new = b - A # x
p = resid
niter = 0
while (np.abs(resid) > tolerance).any() and niter < itermax:
Ap = A # p
alpha = (resid.T # resid) / (p.T # Ap)
for i in range(len(x)):
x[i] = x[i] + alpha * p[i]
resid_new[i] = resid[i] - alpha * Ap[i]
# resid_new = resid - alpha * A # p
beta = (resid_new.T # resid_new) / (resid.T # resid)
p = resid_new + beta * p
resid = resid_new + 0
niter += 1
return x
And our original vector method:
def solve_vect(data, target, itermax = 100, tolerance = 1e-8):
A = data.T # data
b = data.T # target
rows, columns = data.shape
x = np.zeros((columns,)) # Initial guess. Can be anything
resid = b - A # x
resid_new = b - A # x
p = resid
niter = 0
while (np.abs(resid) > tolerance).any() and niter < itermax:
Ap = A # p
alpha = (resid.T # resid) / (p.T # Ap)
x = x + alpha * p
resid_new = resid - alpha * Ap
beta = (resid_new.T # resid_new) / (resid.T # resid)
p = resid_new + beta * p
resid = resid_new + 0
niter += 1
return x
Let's solve a simple system to see if this works first:
2x1 + x2 = -5
−x1 + x2 = -2
should give a solution of [-1, -3]
data = np.array([[ 2, 1],
[-1, 1]])
target = np.array([-5, -2])
print(solve_loopy(data, target))
print(solve_vect(data, target))
Both give the correct solution [-1, -3], yay! Now on to bigger things:
data = np.random.random((100, 100))
target = np.random.random((100, ))
Let's ensure the solution is still correct:
sol1 = solve_loopy(data, target)
np.allclose(data # sol1, target)
# Output: False
sol2 = solve_vect(data, target)
np.allclose(data # sol2, target)
# Output: False
Hmm, looks like the CG method doesn't work for badly conditioned random matrices we created. Well, at least both give the same result.
np.allclose(sol1, sol2)
# Output: True
But let's not get discouraged! We don't really care if it works perfectly, the point of this is to demonstrate how amazing vectorization is. So let's time this:
import timeit
timeit.timeit('solve_loopy(data, target)', number=10, setup='from __main__ import solve_loopy, data, target')
# Output: 0.25586539999994784
timeit.timeit('solve_vect(data, target)', number=10, setup='from __main__ import solve_vect, data, target')
# Output: 0.12008900000000722
Nice! A ~2x speedup simply by avoiding a loop while updating our solution!
For larger systems, this will be even better.
for N in [10, 50, 100, 500, 1000]:
data = np.random.random((N, N))
target = np.random.random((N, ))
t_loopy = timeit.timeit('solve_loopy(data, target)', number=10, setup='from __main__ import solve_loopy, data, target')
t_vect = timeit.timeit('solve_vect(data, target)', number=10, setup='from __main__ import solve_vect, data, target')
print(N, t_loopy, t_vect, t_loopy/t_vect)
This gives us:
N t_loopy t_vect speedup
00010 0.002823 0.002099 1.345390
00050 0.051209 0.014486 3.535048
00100 0.260348 0.114601 2.271773
00500 0.980453 0.240151 4.082644
01000 1.769959 0.508197 3.482822

How could I set the staring and ending points randomly in a grid that generates random obstacles?

I built a grid that generates random obstacles for pathfinding algorithm, but with fixed starting and ending points as shown in my snippet below:
import random
import numpy as np
#grid format
# 0 = navigable space
# 1 = occupied space
x = [[random.uniform(0,1) for i in range(50)]for j in range(50)]
grid = np.array([[0 for i in range(len(x[0]))]for j in range(len(x))])
for i in range(len(x)):
for j in range(len(x[0])):
if x[i][j] <= 0.7:
grid[i][j] = 0
else:
grid[i][j] = 1
init = [5,5] #Start location
goal = [45,45] #Our goal
# clear starting and end point of potential obstacles
def clear_grid(grid, x, y):
if x != 0 and y != 0:
grid[x-1:x+2,y-1:y+2]=0
elif x == 0 and y != 0:
grid[x:x+2,y-1:y+2]=0
elif x != 0 and y == 0:
grid[x-1:x+2,y:y+2]=0
elif x ==0 and y == 0:
grid[x:x+2,y:y+2]=0
clear_grid(grid, init[0], init[1])
clear_grid(grid, goal[0], goal[1])
I need to generate also the starting and ending points randomly every time I run the code instead of making them fixed. How could I make it? Any assistance, please?.
Replace,
init = [5,5] #Start location
goal = [45,45] #Our goal
with,
init = np.random.randint(0, high = 49, size = 2)
goal = np.random.randint(0, high = 49, size = 2)
Assuming your grid goes from 0-49 on each axis. Personally I would add grid size variables, i_length & j_length
EDIT #1
i_length = 50
j_length = 50
x = [[random.uniform(0,1) for i in range(i_length)]for j in range(j_length)]
grid = np.array([[0 for i in range(i_length)]for j in range(j_length)])

How to remove an image and it's label from a dataset

When I import a dataset of images and the csv file with the corresponding labels, I made a function to extract every image label. But know I have to remove some images that do not fit specific criteria. Is there a way to remove the corresponding label as well?
This is the function that is used to load the images and the labels
def imp_img():
dirname = '/s/desk/img/'
x = np.zeros((1000, 100, 100), dtype=np.float32)
for i in range(x.shape[0]):
img = Image.open(dirname + 'img_%02d.png' % (i))
img = np.array(img)
x[i] = img
path = '/s/desk/labels_classificatio.csv'
labels = pd.read_csv(path, usecols=["category"],
sep=";" )
y = np.array(labels)
return x, y
This is how they are imported
x, y = imp_img()
x = x/255.0
y = y.reshape(y.shape[0], 1)
x.shape, y.shape
and now I made for loop to remove the images that are too dark
c =[]
for i in x:
if np.sum(i) >= 100:
c.append(i)
c = np.asarray(c)
The problem now is that I have fewer images than I have labels. Is there a way to remove the corresponding label as well?
You're looking for enumerate. It lets you loop over an iterable while maintaining a count. Instead of for i in x we'll do for i, img in enumerate(x) which let us maintain a loop counter i. This way you can sbset the labels corresponding to the images that meet your criteria.
code:
c = []
c_labels = []
for i, img in enumerate(x):
if np.sum(img) >= 100:
c.append(img)
c_labels.append(y[i])
c = np.asarray(c)
c_labels = np.asarray(c_labels)

create set of randomized column names in pandas dataframe

I am trying to create a set of columns (within panda dataframe) where the column names are randomized. This is because I want to generate filter data from a larger data-set in a randomized fashion.
How can I generate an N (= 4) * 3 set of column names as per below?
car_speed state_8 state_17 state_19 state_16 wd_8 wd_17 wd_19 wd_16 wu_8 wu_17 wu_19 wu_16
My potential code below, but doesn't really work. I need the blocks'state_' first, then 'wd_', and then 'wd_'. My code below generates 'state_', 'wd_', 'wu_' individually in consecutive order. I have problems further on, when it is in that order, of filling in the data from the larger data-set
def iteration1(data, classes = 50, sigNum = 4):
dataNN = pd.DataFrame(index = [0])
dataNN['car_speed'] = np.zeros(1)
while len(dataNN.columns) < sigNum + 1:
state = np.int(np.random.uniform(0, 50))
dataNN['state_'+str(state)] = np.zeros(1) # this is the state value set-up
dataNN['wd_' + str(state)] = np.zeros(1) # this is the weight direction
dataNN['wu_' + str(state)] = np.zeros(1) # this is the weight magnitude
count = 0 # initialize count row as zero
while count < classes :
dataNN.loc[count] = np.zeros(len(dataNN.columns))
for state in dataNN.columns[1:10]:
dataNN[state].loc[count] = data[state].loc[count]
count = count + 1
if count > classes : break
return dataNN
Assuming the problem you have is lack of grouping of "state_*", "wd_*", and "wu_*" I suggest that you first select sigNum / 3 random ints and then use them to label the columns. Like the following:
states = [np.int(np.random.uniform(0, 50)) for _ in range (sigNum/3)]
i = 0
while len(dataNN.columns) <= sigNum:
state = states[i]
i += 1
dataNN['state_'+str(state)] = np.zeros(1) # this is the state value set-up
dataNN['wd_' + str(state)] = np.zeros(1) # this is the weight direction
dataNN['wu_' + str(state)] = np.zeros(1) # this is the weight magnitude
import random
import pandas as pd
def iteration1(data, classes = 5, subNum = 15):
dataNN = pd.DataFrame(index = [0])
dataNN['car_speed'] = np.zeros(1)
states = random.sample(range(50), sub_sig)
for i in range(0, sub_sig, 1):
dataNN['state_'+str(states[i])] = np.zeros(1) # this is the state value set-up
for i in range(0, subNum, 1):
dataNN['wd_' + str(states[i])] = np.zeros(1) # this is the weight direction
for i in range(0, subNum, 1):
dataNN['wu_' + str(states[i])] = np.zeros(1) # this is the weight magnitude
return dataNN

Resources