Scipy optimization with matrix multiplication - python-3.x

I've tried to use spicy.optimize.minimize to solve a matrix multiplication optimization problem, however, the result gives me a dimension error, can someone help me with it?
import numpy as np
from scipy.optimize import minimize
# define known variables, mu, sigma, rf
mu = np.matrix([[0.12],
[0.08],
[0.05]])
sigma = np.matrix([[0.5, 0.05, 0.03],
[0.05, 0.4, 0.01],
[0.03, 0.01, 0.2]])
rf = 0.02
def objective_fun(x):
'''
This is the objective function
'''
s = np.sqrt(x.T * sigma * x)/(mu.T * x - rf)
return s
def constraint(x):
con = 1
for i in np.arange(0,3):
con = con - x[i]
return con
# set up the boundaries for x
bound_i = (0, np.Inf)
bnds = (bound_i, bound_i, bound_i)
#set up the constraints for x
con = {'type':'eq', 'fun':constraint}
# initial guess for variable x
x = np.matrix([[0.5],
[0.3],
[0.2]])
sol = minimize(objective_fun, x, method = 'SLSQP', bounds = bnds, constraints = con)
The error gives me:
ValueError Traceback (most recent call last)
<ipython-input-31-b8901077b164> in <module>
----> 1 sol = minimize(objective_fun, x, method = 'SLSQP', bounds = bnds, constraints = con)
e:\Anaconda3\lib\site-packages\scipy\optimize\_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
606 elif meth == 'slsqp':
607 return _minimize_slsqp(fun, x0, args, jac, bounds,
--> 608 constraints, callback=callback, **options)
609 elif meth == 'trust-constr':
610 return _minimize_trustregion_constr(fun, x0, args, jac, hess, hessp,
e:\Anaconda3\lib\site-packages\scipy\optimize\slsqp.py in _minimize_slsqp(func, x0, args, jac, bounds, constraints, maxiter, ftol, iprint, disp, eps, callback, **unknown_options)
397
398 # Compute objective function
--> 399 fx = func(x)
400 try:
401 fx = float(np.asarray(fx))
e:\Anaconda3\lib\site-packages\scipy\optimize\optimize.py in function_wrapper(*wrapper_args)
324 def function_wrapper(*wrapper_args):
325 ncalls[0] += 1
--> 326 return function(*(wrapper_args + args))
327
328 return ncalls, function_wrapper
<ipython-input-28-b1fb2386a380> in objective_fun(x)
3 This is the objective function
4 '''
----> 5 s = np.sqrt(x.T * sigma * x)/(mu.T * x - rf)
6 return s
e:\Anaconda3\lib\site-packages\numpy\matrixlib\defmatrix.py in __mul__(self, other)
218 if isinstance(other, (N.ndarray, list, tuple)) :
219 # This promotes 1-D vectors to row vectors
--> 220 return N.dot(self, asmatrix(other))
221 if isscalar(other) or not hasattr(other, '__rmul__') :
222 return N.dot(self, other)
ValueError: shapes (1,3) and (1,3) not aligned: 3 (dim 1) != 1 (dim 0)
However, I tried individually every function I wrote, they all have no errors in the end, like, if after defining the x matrix as shown in the code, I simply run objective_fun(x) in the console, and I immediately get an answer:
optimize_fun(x)
matrix([[5.90897598]])
That means that my function can do the matrix multiplication correctly, so what is wrong with the code here?

The docs for minimize() says that x0 should be an (n,) shaped array, but you are trying to treat it like a (3,1) array. I'm not sure on the inner workings of minimize() but I suspect when it steps over different values of the fit parameters it converts to the format that it thinks it wants. Anyways, the following minor corrections make it so the code works.
import numpy as np
from scipy.optimize import minimize
# define known variables, mu, sigma, rf
mu = np.matrix([[0.12],
[0.08],
[0.05]])
sigma = np.matrix([[0.5, 0.05, 0.03],
[0.05, 0.4, 0.01],
[0.03, 0.01, 0.2]])
rf = 0.02
def objective_fun(x):
'''
This is the objective function
'''
x = np.expand_dims(x, 1) # convert the (3,) shape to (3,1). Then we can do our normal matrix math on it
s = np.sqrt(x.T * sigma * x)/(mu.T * x - rf) # Transposes so the shapes are correct
return s
def constraint(x):
con = 1
for i in np.arange(0,3):
con = con - x[i]
return con
# set up the boundaries for x
bound_i = (0, np.Inf)
bnds = (bound_i, bound_i, bound_i)
#set up the constraints for x
con = {'type':'eq', 'fun':constraint}
# initial guess for variable x
x = np.array([0.5, 0.3, 0.2]) # Defining the initial guess as an (3,) array)
sol = minimize(objective_fun, x, method = 'SLSQP', bounds = bnds, constraints = con)
print(sol) # and the solution looks reasonable
Output
fun: 5.86953830952583
jac: array([-1.70555401, -1.70578796, -1.70573896])
message: 'Optimization terminated successfully.'
nfev: 32
nit: 6
njev: 6
status: 0
success: True
x: array([0.42809911, 0.29522438, 0.27667651])
Take a look at the comments I put in for an explanation on what you need to do.

Related

How can I use curve_fit for functions that involve case-splitting?

I want to use curve_fit for functions that involve case-splitting.
However python throws Error.
Does curve_fit not support such a function ? Or is there is any problem at function definition ?
Example)
from scipy.optimize import curve_fit
import numpy as np
def slope_devided_by_cases(x,a,b):
if x < 4:
return a*x + b
else:
return 4*a + b
data_x = [1,2,3,4,5,6,7,8,9] # x
data_y = [45,46,42,36,27,23,21,13,11] # y
coef, cov = curve_fit(slope_devided_by_cases, data_x, data_y)
Error)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
C:\Users\Lisa~1\AppData\Local\Temp/ipykernel_1516/1012358816.py in <module>
10 data_x = [1,2,3,4,5,6,7,8,9] # x
11 data_y = [45,46,42,36,27,23,21,13,11] # y
---> 12 coef, cov = curve_fit(slope_devided_by_cases, data_x, data_y)
~\anaconda3\lib\site-packages\scipy\optimize\minpack.py in curve_fit(f, xdata, ydata, p0, sigma, absolute_sigma, check_finite, bounds, method, jac, **kwargs)
787 # Remove full_output from kwargs, otherwise we're passing it in twice.
788 return_full = kwargs.pop('full_output', False)
--> 789 res = leastsq(func, p0, Dfun=jac, full_output=1, **kwargs)
790 popt, pcov, infodict, errmsg, ier = res
791 ysize = len(infodict['fvec'])
~\anaconda3\lib\site-packages\scipy\optimize\minpack.py in leastsq(func, x0, args, Dfun, full_output, col_deriv, ftol, xtol, gtol, maxfev, epsfcn, factor, diag)
408 if not isinstance(args, tuple):
409 args = (args,)
--> 410 shape, dtype = _check_func('leastsq', 'func', func, x0, args, n)
411 m = shape[0]
412
~\anaconda3\lib\site-packages\scipy\optimize\minpack.py in _check_func(checker, argname, thefunc, x0, args, numinputs, output_shape)
22 def _check_func(checker, argname, thefunc, x0, args, numinputs,
23 output_shape=None):
---> 24 res = atleast_1d(thefunc(*((x0[:numinputs],) + args)))
25 if (output_shape is not None) and (shape(res) != output_shape):
26 if (output_shape[0] != 1):
~\anaconda3\lib\site-packages\scipy\optimize\minpack.py in func_wrapped(params)
483 if transform is None:
484 def func_wrapped(params):
--> 485 return func(xdata, *params) - ydata
486 elif transform.ndim == 1:
487 def func_wrapped(params):
C:\Users\Lisa~1\AppData\Local\Temp/ipykernel_1516/1012358816.py in slope_devided_by_cases(x, a, b)
3
4 def slope_devided_by_cases(x,a,b):
----> 5 if x < 4:
6 return a*x + b
7 else:
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
I want to use curve_fit for functions that involve case-splitting such as above example.
The problem is that x < 4 is not a boolean scalar value because curve_fit will evaluate your function with an np.ndarray x (your given x data points), not a scalar value. Consequently, x < 4 will give you an array of boolean values.
That said, you could rewrite your function by using NumPy's vectorized operations:
def slope_devided_by_cases(x,a,b):
return (x < 4) * (a*x + b) + (x >= 4) * (4*a+b)
Alternatively, you could use np.where as a vectorized alternative to your if-else approach:
def slope_devided_by_cases(x,a,b):
return np.where(x < 4, a*x + b, 4+a+b)
Another interesting approach could be using the piecewise function from numpy.
from matplotlib import pyplot as plt
from scipy.optimize import curve_fit
import numpy as np
def f(x, a, b):
return np.piecewise(
x, [x < 4, x >= 4], [lambda x_: a * x_ + b, lambda x_: 4 * a + b]
)
data_x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
data_y = np.array([45, 46, 42, 36, 27, 23, 21, 13, 11])
coeff, cov = curve_fit(f, data_x, data_y)
y_fit = f(data_x, *coeff)
plt.plot(data_x, data_y, "o")
plt.plot(data_x, y_fit, "-")
plt.show()
Here is the result of the optimization (maybe a better model could be chosen, but I don't know all the details of the problem at hand and I didn't even specify any initial value since this question is more about making the code work).

numpy condition function for 2-D data

I have a synthetic dataset consisting of features (X) and labels (y) which is used for KMeans clustering using Python 3.8 and sklearn 0.22.2 and numpy 1.19.
X.shape, y.shape
# ((100, 2), (100,))
kmeans = KMeans(n_clusters = 3, init = 'random', n_init = 10, max_iter = 300)
# Train model on scaled features-
kmeans.fit(X)
After training KMeans on 'X', I want to replace the unique (continuous) values of 'X' with the cluster centers (discreet) obtained using KMeans.
for i in range(3):
print("cluster number {0} has center = {1}".format(i + 1, kmeans.cluster_centers_[i, :]))
'''
cluster number 1 has center = [-0.7869159 1.14173859]
cluster number 2 has center = [ 1.28010442 -1.04663318]
cluster number 3 has center = [-0.54654735 0.0054752 ]
'''
set(kmeans.labels_)
# {0, 1, 2}
One way I have of doing it is:
X[np.where(clustered_labels == 0)] = val[0,:]
X[np.where(clustered_labels == 1)] = val[1,:]
X[np.where(clustered_labels == 2)] = val[2,:]
Can I do it using np.select()?
cond = [clustered_labels == i for i in range(3)]
val = kmeans.cluster_centers_[:,:]
But on executing the code:
np.select(cond, val)
I get the following error:
--------------------------------------------------------------------------- ValueError Traceback (most recent call
last) in
----> 1 np.select(cond, val)
<array_function internals> in select(*args, **kwargs)
~/.local/lib/python3.8/site-packages/numpy/lib/function_base.py in
select(condlist, choicelist, default)
693 result_shape = condlist[0].shape
694 else:
--> 695 result_shape = np.broadcast_arrays(condlist[0], choicelist[0])[0].shape
696
697 result = np.full(result_shape, choicelist[-1], dtype)
<array_function internals> in broadcast_arrays(*args, **kwargs)
~/.local/lib/python3.8/site-packages/numpy/lib/stride_tricks.py in
broadcast_arrays(subok, *args)
256 args = [np.array(_m, copy=False, subok=subok) for _m in args]
257
--> 258 shape = _broadcast_shape(*args)
259
260 if all(array.shape == shape for array in args):
~/.local/lib/python3.8/site-packages/numpy/lib/stride_tricks.py in
_broadcast_shape(*args)
187 # use the old-iterator because np.nditer does not handle size 0 arrays
188 # consistently
--> 189 b = np.broadcast(*args[:32])
190 # unfortunately, it cannot handle 32 or more arguments directly
191 for pos in range(32, len(args), 31):
ValueError: shape mismatch: objects cannot be broadcast to a single
shape
Suggestions?
Thanks!
Somewhat cleaner way to do it (but very similar to your way) will be the following. Here's a simple example:
from sklearn.cluster import KMeans
import numpy as np
x1 = np.random.normal(0, 2, 100)
y1 = np.random.normal(0, 1, 100)
label1 = np.ones(100)
d1 = np.column_stack([x1, y1, label1])
x2 = np.random.normal(3, 1, 100)
y2 = np.random.normal(1, 2, 100)
label2 = np.ones(100) * 2
d2 = np.column_stack([x2, y2, label2])
x3 = np.random.normal(-3, 0.5, 100)
y3 = np.random.normal(0.5, 0.25, 100)
label3 = np.ones(100) * 3
d3 = np.column_stack([x3, y3, label3])
D = np.row_stack([d1, d2, d3])
np.random.shuffle(D)
X = D[:, :2]
y = D[:, 2]
print(f'X.shape = {X.shape}, y.shape = {y.shape}')
# X.shape = (300, 2), y.shape = (300,)
kmeans = KMeans(n_clusters = 3, init = 'random', n_init = 10, max_iter = 300)
# Train model on scaled features-
kmeans.fit(X)
preds = kmeans.predict(X)
X[preds==0] = kmeans.cluster_centers_[0]
X[preds==1] = kmeans.cluster_centers_[1]
X[preds==2] = kmeans.cluster_centers_[2]
Yet another way to accomplish the task is to use the np.put method instead of the assignment as follows:
np.put(X, preds==0, kmeans.cluster_centers_[0])
np.put(X, preds==1, kmeans.cluster_centers_[1])
np.put(X, preds==2, kmeans.cluster_centers_[2])
Frankly, I don't see a way to accomplish the task by the means of the np.select function, and I guess the way you do it is the best way, based on this answer.
Cheers.

How can I fix this TypeError when fitting ODE with scipy?

I would like to adjust parameters of a simple ODE using the scipy package. I have the feeling that it is bearable. I am aware about this post but I think my question is different.
First we import required packages:
import numpy as np
from scipy import integrate, optimize
We define the ODE with a signature compliant to the new scipy.interpolate.solve_ivp method:
def GGM_ODE(t, C, r, p):
return r*np.power(C, p)
We define the integrated ODE solution with a signature compliant to the classic scipy.optimize.curve_fit:
def GGM_sol(t, C, r, p):
return integrate.solve_ivp(GGM_ODE, (t[0], t[-1]), [C], t_eval=t, args=(r, p))
We create a synthetic dataset by solving the IV problem for a given set of parameters:
t = np.arange(0, 21)
sol = GGM_sol(t, 1, 0.5, 0.7)
This works perfectly.
Finally, we try to adjust parameters by fitting the integrated solution:
popt, pcov = optimize.curve_fit(GGM_sol, t, sol.y)
Unfortunately, this last step fails with a cryptic error (at least cryptic to me because I haven't enough insight on how scipy is built):
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-45-22b0c3097986> in <module>
----> 1 popt, pcov = optimize.curve_fit(GGM_sol, t, sol.y)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\scipy\optimize\minpack.py in curve_fit(f, xdata, ydata, p0, sigma, absolute_sigma, check_finite, bounds, method, jac, **kwargs)
761 # Remove full_output from kwargs, otherwise we're passing it in twice.
762 return_full = kwargs.pop('full_output', False)
--> 763 res = leastsq(func, p0, Dfun=jac, full_output=1, **kwargs)
764 popt, pcov, infodict, errmsg, ier = res
765 ysize = len(infodict['fvec'])
~\AppData\Local\Continuum\anaconda3\lib\site-packages\scipy\optimize\minpack.py in leastsq(func, x0, args, Dfun, full_output, col_deriv, ftol, xtol, gtol, maxfev, epsfcn, factor, diag)
386 if not isinstance(args, tuple):
387 args = (args,)
--> 388 shape, dtype = _check_func('leastsq', 'func', func, x0, args, n)
389 m = shape[0]
390
~\AppData\Local\Continuum\anaconda3\lib\site-packages\scipy\optimize\minpack.py in _check_func(checker, argname, thefunc, x0, args, numinputs, output_shape)
24 def _check_func(checker, argname, thefunc, x0, args, numinputs,
25 output_shape=None):
---> 26 res = atleast_1d(thefunc(*((x0[:numinputs],) + args)))
27 if (output_shape is not None) and (shape(res) != output_shape):
28 if (output_shape[0] != 1):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\scipy\optimize\minpack.py in func_wrapped(params)
461 if transform is None:
462 def func_wrapped(params):
--> 463 return func(xdata, *params) - ydata
464 elif transform.ndim == 1:
465 def func_wrapped(params):
TypeError: unsupported operand type(s) for -: 'OdeResult' and 'float'
I can see this error is classic TypeError about incompatible operands for the difference operator. It claims it cannot subtract a float to an OdeResult object. It also only concerns optimize package not the integrate.
What I do not understand it is why I am getting this error.
What must I change in my function signature or function call to make curve_fit work? Or is there something else I have missed?
It is exactly as the error message says, solve_ivp returns a solution object which contains the solution data. Try
def GGM_sol(t, C, r, p):
res = integrate.solve_ivp(GGM_ODE, (t[0], t[-1]), [C], t_eval=t, args=(r, p))
return res.y[0]
to get only the solution values.

Difference in use of ** and pow function

while attempting to write a cost function for linear regression the error is arising while replacing ** with pow function in cost_function :
Original cost function
def cost_function(x,y,theta):
m = np.size(y)
j = (1/(2*m))*np.sum(np.power(np.matmul(x,theta)-y),2)
return j
Cost function giving the error:
def cost_function(x,y,theta):
m = np.size(y)
j = (1/(2*m))*np.sum((np.matmul(x,theta)-y)**2)
return j
Gradient Descent
def gradient_descent(x,y,theta,learn_rate,iters):
x = np.mat(x);y = np.mat(y); theta= np.mat(theta);
m = np.size(y)
j_hist = np.zeros(iters)
for i in range(0,iters):
temp = theta - (learn_rate/m)*(x.T*(x*theta-y))
theta = temp
j_hist[i] = cost_function(x,y,theta)
return (theta),j_hist
Variable values
theta = np.zeros((2,1))
learn_rate = 0.01
iters = 1000
x is (97,2) matrix
y is (97,1) matrix
cost function is calculated fine with value of 32.0727
The error arises while using the same function in gradient descent.
The error am getting is LinAlgError: Last 2 dimensions of the array must be square
First let's distinguish between pow, ** and np.power. pow is the Python function, that according to docs is equivalent to ** when used with 2 arguments.
Second, you apply np.mat to the arrays, making np.matrix objects. According to its docs:
It has certain special operators, such as *
(matrix multiplication) and ** (matrix power).
matrix power:
In [475]: np.mat([[1,2],[3,4]])**2
Out[475]:
matrix([[ 7, 10],
[15, 22]])
Elementwise square:
In [476]: np.array([[1,2],[3,4]])**2
Out[476]:
array([[ 1, 4],
[ 9, 16]])
In [477]: np.power(np.mat([[1,2],[3,4]]),2)
Out[477]:
matrix([[ 1, 4],
[ 9, 16]])
Matrix power:
In [478]: arr = np.array([[1,2],[3,4]])
In [479]: arr#arr # np.matmul
Out[479]:
array([[ 7, 10],
[15, 22]])
With a non-square matrix:
In [480]: np.power(np.mat([[1,2]]),2)
Out[480]: matrix([[1, 4]]) # elementwise
Attempting to do matrix_power on a non-square matrix:
In [481]: np.mat([[1,2]])**2
---------------------------------------------------------------------------
LinAlgError Traceback (most recent call last)
<ipython-input-481-18e19d5a9d6c> in <module>()
----> 1 np.mat([[1,2]])**2
/usr/local/lib/python3.6/dist-packages/numpy/matrixlib/defmatrix.py in __pow__(self, other)
226
227 def __pow__(self, other):
--> 228 return matrix_power(self, other)
229
230 def __ipow__(self, other):
/usr/local/lib/python3.6/dist-packages/numpy/linalg/linalg.py in matrix_power(a, n)
600 a = asanyarray(a)
601 _assertRankAtLeast2(a)
--> 602 _assertNdSquareness(a)
603
604 try:
/usr/local/lib/python3.6/dist-packages/numpy/linalg/linalg.py in _assertNdSquareness(*arrays)
213 m, n = a.shape[-2:]
214 if m != n:
--> 215 raise LinAlgError('Last 2 dimensions of the array must be square')
216
217 def _assertFinite(*arrays):
LinAlgError: Last 2 dimensions of the array must be square
Note that the whole traceback lists matrix_power. That's why we often ask to see the whole traceback.
Why are you setting x,y and theta to np.mat? The cost_function uses matmul. With that function, and its # operator, there are few(er) good reasons for using np.matrix.
Despite the subject line, you did not try to use pow. That confused me and at least one other commentator. I tried to find a np.pow or a scipy version.

Trying to to use Caffe classifier causes "sequence argument must have length equal to input rank "error

I am trying to use Caffe.Classifier class and its predict() method on my Imagenet trained caffemodel.
Images were resized to 256x256 and crops of 227x227 were used to train the net.
Everything is simple and straight forward, yet I keep getting weird errors such as the following :
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-7-3b440ebf1f6e> in <module>()
17 image_dims=(256, 256))
18
---> 19 out = net.predict([image_caffe], oversample=True)
20 print(labels[out[0].argmax()].strip(),' (', out[0][out[0].argmax()] , ')')
21 plabel = int(labels[out[0].argmax()].strip())
<ipython-input-5-e6ae1810b820> in predict(self, inputs, oversample)
65 for ix, in_ in enumerate(inputs):
66 print('image dims = ',self.image_dims[0],',',self.image_dims[1] ,'_in = ',in_.shape)
---> 67 input_[ix] = caffe.io.resize_image(in_, self.image_dims)
68
69 if oversample:
C:\Users\Master\Anaconda3\envs\anaconda35\lib\site-packages\caffe\io.py in resize_image(im, new_dims, interp_order)
335 # ndimage interpolates anything but more slowly.
336 scale = tuple(np.array(new_dims, dtype=float) / np.array(im.shape[:2]))
--> 337 resized_im = zoom(im, scale + (1,), order=interp_order)
338 return resized_im.astype(np.float32)
339
C:\Users\Master\Anaconda3\envs\anaconda35\lib\site-packages\scipy\ndimage\interpolation.py in zoom(input, zoom, output, order, mode, cval, prefilter)
588 else:
589 filtered = input
--> 590 zoom = _ni_support._normalize_sequence(zoom, input.ndim)
591 output_shape = tuple(
592 [int(round(ii * jj)) for ii, jj in zip(input.shape, zoom)])
C:\Users\Master\Anaconda3\envs\anaconda35\lib\site-packages\scipy\ndimage\_ni_support.py in _normalize_sequence(input, rank, array_type)
63 if len(normalized) != rank:
64 err = "sequence argument must have length equal to input rank"
---> 65 raise RuntimeError(err)
66 else:
67 normalized = [input] * rank
RuntimeError: sequence argument must have length equal to input rank
And here is the snippets of code I'm using :
import sys
import caffe
import numpy as np
import lmdb
import matplotlib.pyplot as plt
import itertools
def flat_shape(x):
"Returns x without singleton dimension, eg: (1,28,28) -> (28,28)"
return x.reshape(x.shape)
def db_reader(fpath, type='lmdb'):
if type == 'lmdb':
return lmdb_reader(fpath)
else:
return leveldb_reader(fpath)
def lmdb_reader(fpath):
import lmdb
lmdb_env = lmdb.open(fpath)
lmdb_txn = lmdb_env.begin()
lmdb_cursor = lmdb_txn.cursor()
for key, value in lmdb_cursor:
datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(value)
label = int(datum.label)
image = caffe.io.datum_to_array(datum).astype(np.uint8)
yield (key, flat_shape(image), label)
def leveldb_reader(fpath):
import leveldb
db = leveldb.LevelDB(fpath)
for key, value in db.RangeIter():
datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(value)
label = int(datum.label)
image = caffe.io.datum_to_array(datum).astype(np.uint8)
yield (key, flat_shape(image), label)
Classifier class (copied form Caffe's python directory):
import numpy as np
import caffe
class Classifier(caffe.Net):
"""
Classifier extends Net for image class prediction
by scaling, center cropping, or oversampling.
Parameters
----------
image_dims : dimensions to scale input for cropping/sampling.
Default is to scale to net input size for whole-image crop.
mean, input_scale, raw_scale, channel_swap: params for
preprocessing options.
"""
def __init__(self, model_file, pretrained_file, image_dims=None,
mean=None, input_scale=None, raw_scale=None,
channel_swap=None):
caffe.Net.__init__(self, model_file, pretrained_file, caffe.TEST)
# configure pre-processing
in_ = self.inputs[0]
print('inputs[0]',self.inputs[0])
self.transformer = caffe.io.Transformer(
{in_: self.blobs[in_].data.shape})
self.transformer.set_transpose(in_, (2, 0, 1))
if mean is not None:
self.transformer.set_mean(in_, mean)
if input_scale is not None:
self.transformer.set_input_scale(in_, input_scale)
if raw_scale is not None:
self.transformer.set_raw_scale(in_, raw_scale)
if channel_swap is not None:
self.transformer.set_channel_swap(in_, channel_swap)
print('crops: ',self.blobs[in_].data.shape[2:])
self.crop_dims = np.array(self.blobs[in_].data.shape[2:])
if not image_dims:
image_dims = self.crop_dims
self.image_dims = image_dims
def predict(self, inputs, oversample=True):
"""
Predict classification probabilities of inputs.
Parameters
----------
inputs : iterable of (H x W x K) input ndarrays.
oversample : boolean
average predictions across center, corners, and mirrors
when True (default). Center-only prediction when False.
Returns
-------
predictions: (N x C) ndarray of class probabilities for N images and C
classes.
"""
# Scale to standardize input dimensions.
input_ = np.zeros((len(inputs),
self.image_dims[0],
self.image_dims[1],
inputs[0].shape[2]),
dtype=np.float32)
for ix, in_ in enumerate(inputs):
print('image dims = ',self.image_dims[0],',',self.image_dims[1] ,'_in = ',in_.shape)
input_[ix] = caffe.io.resize_image(in_, self.image_dims)
if oversample:
# Generate center, corner, and mirrored crops.
input_ = caffe.io.oversample(input_, self.crop_dims)
else:
# Take center crop.
center = np.array(self.image_dims) / 2.0
crop = np.tile(center, (1, 2))[0] + np.concatenate([
-self.crop_dims / 2.0,
self.crop_dims / 2.0
])
input_ = input_[:, crop[0]:crop[2], crop[1]:crop[3], :]
# Classify
caffe_in = np.zeros(np.array(input_.shape)[[0, 3, 1, 2]],
dtype=np.float32)
for ix, in_ in enumerate(input_):
caffe_in[ix] = self.transformer.preprocess(self.inputs[0], in_)
out = self.forward_all(**{self.inputs[0]: caffe_in})
predictions = out[self.outputs[0]]
# For oversampling, average predictions across crops.
if oversample:
predictions = predictions.reshape((len(predictions) / 10, 10, -1))
predictions = predictions.mean(1)
return predictions
Main section :
proto ='deploy.prototxt'
model='snap1.caffemodel'
mean='imagenet_mean.binaryproto'
db_path='G:/imagenet/ilsvrc12_val_lmdb'
# Extract mean from the mean image file
#mean_blobproto_new = caffe.proto.caffe_pb2.BlobProto()
#f = open(mean, 'rb')
#mean_blobproto_new.ParseFromString(f.read())
#mean_image = caffe.io.blobproto_to_array(mean_blobproto_new)
#f.close()
mu = np.load('mean.npy').mean(1).mean(1)
caffe.set_mode_gpu()
reader = lmdb_reader(db_path)
i = 0
for i, image, label in reader:
image_caffe = image.reshape(1, *image.shape)
print(image_caffe.shape, mu.shape)
net = Classifier(proto, model,
mean= mu,
channel_swap=(2,1,0),
raw_scale=255,
image_dims=(256, 256))
out = net.predict([image_caffe], oversample=True)
print(i, labels[out[0].argmax()].strip(),' (', out[0][out[0].argmax()] , ')')
i+=1
What is wrong here?
I found the cause, I had to feed the image in the form of 3D tensor not a 4D one!
so our 4d tensor:
image_caffe = image.reshape(1, *image.shape)
needed to be changed to a 3D one:
image_caffe = image.transpose(2,1,0)
As a side note, try using python2 for running any caffe related. python3 might work at first but will definitely cause a lot of headaches. for instance, predict method with oversample set to True, will crash under python3 but works just fine under python2!

Resources