problem at implementing cost function for logistic regression - python-3.x

I am trying to run a cost function for logistic regression and I keep receiving that the function is incorrect and I do not know why
def compute_cost(X, y, w, b, lambda_= 1):
"""
Computes the cost overall examples
Args:
X : (ndarray Shape (m,n)) data, m examples by n features
y : (array_like Shape (m,)) target value
w : (array_like Shape (n,)) Values of parameters of the model
b : scalar Values of bias parameter of the model
lambda_: unused placeholder
Returns:
total_cost: (scalar) cost
"""
m, n = X.shape
### START CODE HERE ###
loss_sum = 0
for i in range(m):
z_wb = 0
for j in range(n):
z_wb_ij = w[j]*X[i][j]
z_wb += z_wb_ij
z_wb += b
f_wb = sigmoid(z_wb)
loss = -y[i]*np.log(f_wb)-(1-y[i])*np.log(1-f_wb)
loss_sum += loss
total_cost = (1/m)*loss_sum
### END SOLUTION ###
return total_cost

def compute_cost(X, y, w, b, lambda_= 1):
"""
Computes the cost over all examples
Args:
X : (ndarray Shape (m,n)) data, m examples by n features
y : (array_like Shape (m,)) target value
w : (array_like Shape (n,)) Values of parameters of the model
b : scalar Values of bias parameter of the model
lambda_: unused placeholder
Returns:
total_cost: (scalar) cost
"""
m, n = X.shape
### START CODE HERE ###
cost = 0.0
for i in range(m):
z_i = np.dot(X[i], w) + b #(n,)(n,)=scalar, see np.dot
f_wb_i = sigmoid(z_i) #scalar
cost += -y[i]*np.log(f_wb_i) - (1-y[i])*np.log(1-f_wb_i) #scalar
cost = cost/m #scalar
reg_cost = 0
for j in range(n):
reg_cost += (w[j]**2) #scalar
reg_cost = (lambda_/(2*m)) * reg_cost #scalar
total_cost = cost + reg_cost
### END CODE HERE ###
return total_cost

Related

Placing the constant when defining a matrix in python

Can someone explain to me why these seemingly same pieces of code yield different results in using my function on test data from a MNIST dataset('http://yann.lecun.com/exdb/mnist/')?
1
def fit_generative_model1(x,y):
k = 10 # labels 0,1,...,k-1
d = (x.shape)[1] # number of features
mu = np.zeros((k,d))
sigma = np.zeros((k,d,d))
pi = np.zeros(k)
for label in range(0,k):
c = 4000
indices = (y == label)
mu[label] = np.mean(x[indices,:], axis=0)
sigma[label] = np.cov(x[indices,:], rowvar=0, bias=1) + c*np.identity(784, dtype=float) # regularizing the matrix
return mu, sigma, pi
mu, sigma, pi = fit_generative_model1(train_data, train_labels)
2
def fit_generative_model2(x,y):
k = 10 # labels 0,1,...,k-1
d = (x.shape)[1] # number of features
mu = np.zeros((k,d))
sigma = np.zeros((k,d,d))
pi = np.zeros(k)
for label in range(0,k):
indices= (y==label)
mu[label]= np.mean(x[indices,:],axis=0)
sigma[label]= np.cov(x[indices,:], rowvar=0, bias=1) + 4000*np.eye(784) #Regularized
pi[label]= float(sum(indices))/float(len(y))
return mu, sigma, pi
mu, sigma, pi = fit_generative_model2(train_data, train_labels)
Next, making predictions on the test data.
# Compute log Pr(label|image) for each [test image,label] pair.
k = 10
score = np.zeros((len(test_labels),k))
for label in range(0,k):
rv = multivariate_normal(mean=mu[label], cov=sigma[label])
for i in range(0,len(test_labels)):
score[i,label] = np.log(pi[label]) + rv.logpdf(test_data[i,:])
predictions = np.argmax(score, axis=1)
# Finally, tally up score
errors = np.sum(predictions != test_labels)
print("Your model makes " + str(errors) + " errors out of 10000")
print("This is " + str(errors/100) + "% error rate")
#with model1 -??
Your model makes 9020 errors out of 10000
This is 90.2% error rate
#with model2 - all good.
Your model makes 431 errors out of 10000
This is 4.31% error rate
I am also struggling to make the loop to work for the best c.
for c in [20,2000, 4000]:
k = 10
score = np.zeros((len(test_labels),k))
for label in range(0,k):
rv = multivariate_normal(mean=mu[label], cov=sigma[label])
for i in range(0,len(test_labels)):
score[i,label] = np.log(pi[label]) + rv.logpdf(test_data[i,:])
predictions = np.argmax(score, axis=1)
errors = np.sum(predictions != test_labels)
print("Model with "+ str(c) + " has a " + str(errors/100) + " error rate")
Model with 20 has a 4.31 error rate
Model with 2000 has a 4.31 error rate
Model with 4000 has a 4.31 error rate

How to create my own loss function in Pytorch?

I'd like to create a model that predicts parameters of a circle (coordinates of center, radius).
Input is an array of points (of arc with noise):
def generate_circle(x0, y0, r, start_angle, phi, N, sigma):
theta = np.linspace(start_angle*np.pi/180, (start_angle + phi)*np.pi/180, num=N)
x = np.array([np.random.normal(r*np.cos(t) + x0 , sigma, 1)[0] for t in theta])
y = np.array([np.random.normal(r*np.sin(t) + y0 , sigma, 1)[0] for t in theta])
return x, y
n_x = 1000
start_angle = 0
phi = 90
N = 100
sigma = 0.005
x_full = []
for i in range(n_x):
x0 = np.random.normal(0 , 10, 1)[0]
y0 = np.random.normal(0 , 10, 1)[0]
r = np.random.normal(0 , 10, 1)[0]
x, y = generate_circle(x0, y0, r, start_angle, phi, N, sigma)
x_full.append(np.array([ [x[i], y[i]] for i in range(len(x))]))
X = torch.from_numpy(np.array(x_full))
print(X.size()) # torch.Size([1000, 100, 2])
Output: [x_c, y_c, r]
As a loss function I need to use this one:
I tried to implement something like the following:
class Net(torch.nn.Module):
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(n_feature, n_hidden)
self.predict = torch.nn.Linear(n_hidden, n_output)
def forward(self, x):
x = F.relu(self.hidden(x))
x = self.predict(x)
return x
# It doesn't work, it's just an idea
def my_loss(point, params):
arr = ((point[:, 0] - params[:, 0])**2 + (point[:, 1] - params[:, 1])**2 - params[:, 2]**2)**2
loss = torch.sum(arr)
return loss
# For N pairs (x, y) model predicts parameters of circle
net = Net(n_feature=N*2, n_hidden=10, n_output=3)
optimizer = torch.optim.SGD(net.parameters(), lr=1e-4)
for t in range(1000):
prediction = net(X.view(n_x, N*2).float())
loss = my_loss(X, prediction)
print(f"loss: {loss}")
optimizer.zero_grad()
loss.backward()
optimizer.step()
So, the question is how to correctly implement my own loss function in terms of Pytorch in this case?
Or how to change the model's structure to get expected results?
You're trying to create a loss between the predicted outputs and the inputs instead of between the predicted outputs and the true outputs. To do this you need to save the true values of x0, y0, and r when you generate them.
n_x = 1000
start_angle = 0
phi = 90
N = 100
sigma = 0.005
x_full = []
targets = [] # <-- Here
for i in range(n_x):
x0 = np.random.normal(0 , 10, 1)[0]
y0 = np.random.normal(0 , 10, 1)[0]
r = np.random.normal(0 , 10, 1)[0]
targets.append(np.array([x0, y0, r])) # <-- Here
x, y = generate_circle(x0, y0, r, start_angle, phi, N, sigma)
x_full.append(np.array([ [x[i], y[i]] for i in range(len(x))]))
X = torch.from_numpy(np.array(x_full))
Y = torch.from_numpy(np.array(targets)) # <-- Here
print(X.size()) # torch.Size([1000, 100, 2])
print(Y.size()) # torch.Size([1000, 3])
Now, when you call my_loss you should use:
loss = my_loss(Y, prediction)
You are passing in all your data points every iteration of your for loop, I would split your data into smaller sections so that your model doesn't just learn to output the same values every time. e.g. you have generated 1000 points so pass in a random selection of 100 in each iteration using something like random.sample(...)
Your input numbers are pretty large which means your loss will be huge, so generate inputs between 0 and 1 and then if you need the value to be between 0 and 10 you can just multiply by 10.

CNN forward and backward with numpy einsum give different results to for loop implementation

I am trying to implement Convolutional Neural Network from scratch with Python numpy. I implemented forward and backward phases with numpy einsum (functions conv_forward and conv_backward).
When I compared the results of einsum conv_forward and conv_backward to reference implementations taken from the Coursera's Convolution Neural Network course (conv_forward_ref, conv_backward_ref), it is shown that the einsum versions give slight different results as compared to the reference implementation.
It is neglectable for a small amount of loops, but the difference is significant with a larger number of loops.
I was checking my implementation carefully and found no errors. I am not sure why is that, and which implementation is giving correct results.
And is there any other ways to implement the functions more efficiently (without using numpy einsum)?
Here is the code:
import numpy as np
# pad data
def pad_data(img_array, pad_size, pad_val=0):
padded_array = np.pad(img_array, ((0, 0), (pad_size, pad_size), (pad_size, pad_size), (0, 0)), 'constant', constant_values=(pad_val, pad_val))
return padded_array
"""
Reference implementation: Coursera's Convolution Neural Network course
"""
# Implement a single matrix multiplication of a slice of input and weights, bias
def conv_single_step(a_slice_prev, W, b):
s = a_slice_prev * W
Z = np.sum(s)
Z = Z + b
return Z
# conv forward: source code from Coursera's Convolution Neural Network course
def conv_forward_ref(A_prev, W, b, hparameters):
# get dimension of output of previous layer
(m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
# get dimension of this layer's filter
(f, f, n_C_prev, n_C) = W.shape
# get values of this layer's hyperparameters
stride = hparameters["stride"]
pad = hparameters["pad"]
# compute the dimensions of the CONV output volume
n_H = int((n_H_prev - f + 2*pad) / stride) + 1
n_W = int((n_W_prev - f + 2*pad) / stride) + 1
# initialize the output volume Z with zeros
Z = np.zeros((m, n_H, n_W, n_C))
# pad the output of previous layer
A_prev_pad = pad_data(A_prev, pad)
# compute Z
for i in range(m):
a_prev_pad = A_prev_pad[i]
for h in range(n_H):
for w in range(n_W):
for c in range(n_C):
# find the corners of the current slice
vert_start = h * stride
vert_end = vert_start + f
horiz_start = w * stride
horiz_end = horiz_start + f
# get the pixel values of the current slice of the previous layer's output
a_slice_prev = a_prev_pad[vert_start:vert_end,horiz_start:horiz_end,:]
# convolve
Z[i,h,w,c] = conv_single_step(a_slice_prev, W[:,:,:,c], b[:,:,:,c])
# make sure the output shape is correct
assert(Z.shape == (m, n_H, n_W, n_C))
return Z
# conv backward: source code from Coursera's Convolution Neural Network course
def conv_backward_ref(dZ, A_prev, W, b, hparameters):
### START CODE HERE ###
# Retrieve dimensions from A_prev's shape
(m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
# Retrieve dimensions from W's shape
(f, f, n_C_prev, n_C) = W.shape
# Retrieve information from "hparameters"
stride = hparameters["stride"]
pad = hparameters["pad"]
# Retrieve dimensions from dZ's shape
(m, n_H, n_W, n_C) = dZ.shape
# Initialize dA_prev, dW, db with the correct shapes
dA_prev = np.zeros((m, n_H_prev, n_W_prev, n_C_prev))
dW = np.zeros((f, f, n_C_prev, n_C))
db = np.zeros((1, 1, 1, n_C))
# Pad A_prev and dA_prev
A_prev_pad = pad_data(A_prev, pad_size=pad)
dA_prev_pad = pad_data(dA_prev, pad_size=pad)
for i in range(m): # loop over the training examples
# select ith training example from A_prev_pad and dA_prev_pad
a_prev_pad = A_prev_pad[i]
da_prev_pad = dA_prev_pad[i]
for h in range(n_H): # loop over vertical axis of the output volume
for w in range(n_W): # loop over horizontal axis of the output volume
for c in range(n_C): # loop over the channels of the output volume
# Find the corners of the current "slice"
vert_start = h * stride
vert_end = vert_start + f
horiz_start = w * stride
horiz_end = horiz_start + f
# Use the corners to define the slice from a_prev_pad
a_slice = a_prev_pad[vert_start:vert_end,horiz_start:horiz_end,:]
# Update gradients for the window and the filter's parameters using the code formulas given above
da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += W[:,:,:,c] * dZ[i, h, w, c]
dW[:,:,:,c] += a_slice * dZ[i, h, w, c]
db[:,:,:,c] += dZ[i, h, w, c]
# Set the ith training example's dA_prev to the unpaded da_prev_pad (Hint: use X[pad:-pad, pad:-pad, :])
#print(da_prev_pad[pad:-pad, pad:-pad, :].shape)
dA_prev[i, :, :, :] = da_prev_pad[pad:-pad, pad:-pad, :]
### END CODE HERE ###
# Making sure your output shape is correct
assert(dA_prev.shape == (m, n_H_prev, n_W_prev, n_C_prev))
return dA_prev, dW, db
"""
Numpy einsum implementation
"""
# conv forward: implemented with numpy einsum
def conv_forward(A_prev, W, b, hparameters):
# get dimension of output of previous layer
#print(A_prev.shape)
(m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
# get dimension of this layer's filter
(f, f, n_C_prev_W, n_C) = W.shape
# make sure number of channels of A_prev equal to number of channels of W
assert(n_C_prev == n_C_prev_W)
# get values of this layer's hyperparameters and determine shape of output
stride = hparameters["stride"]
pad = hparameters["pad"]
n_H = int((n_H_prev - f + 2*pad) / stride) + 1
n_W = int((n_W_prev - f + 2*pad) / stride) + 1
# pad the output of previous layer
A_prev_pad = pad_data(A_prev, pad)
# compute Z for multiple input images and multiple filters
shape = (f, f, n_C_prev, m, n_H, n_W, 1)
strides = (A_prev_pad.strides * 2)[1:]
M = np.lib.stride_tricks.as_strided(A_prev_pad, shape=shape, strides=strides, writeable=False)
Z = np.einsum('pqrs,pqrtbmn->tbms', W, M)
Z = Z + b
assert(Z.shape == (m, n_H, n_W, n_C))
return Z
# # conv backward: implemented with numpy einsum
def conv_backward(dZ, A_prev, W, b, hparameters):
# retrieve dimensions from A_prev's shape
(m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
# Retrieve dimensions from W's shape
(f, f, n_C_prev, n_C) = W.shape
# retrieve information from "hparameters"
stride = hparameters["stride"]
pad = hparameters["pad"]
# retrieve dimensions from dZ's shape
(m, n_H, n_W, n_C) = dZ.shape
# compute pad values to be applied to dZ, to guarantee A_prev's dimensions
pad_H = int(((n_H_prev - 1) * stride - n_H + f) / 2)
pad_W = int(((n_W_prev - 1) * stride - n_W + f) / 2)
# make sure pad_H equal pad_W cause this implementation support equal padding only
assert(pad_H == pad_W)
pad_dZ = pad_H
## compute dA_prev: inverse of forward process
# step 1: rotate W 180 degrees
# step 1: pad dZ then as_strided dZ to fxfxn_C
# step 2: dot strided dZ and 180-rotated W
# rotate W 180 degrees (= rotate 90 degrees twice) around the two first dims, anti-clockwise direction
W = np.rot90(W, 2)
# pad dZ
dZ_pad = pad_data(dZ, pad_dZ)
# compute dA_prev with strided trick and numpy einsum
shape = (f, f, n_C, m, n_H_prev, n_W_prev)
strides = (dZ_pad.strides)[1:] + (dZ_pad.strides)[0:3]
M = np.lib.stride_tricks.as_strided(dZ_pad, shape=shape, strides=strides, writeable=False)
dA_prev = np.einsum('pqrs,pqsbmn->bmnr', W, M)
assert(dA_prev.shape == A_prev.shape)
# free memory
del dZ_pad
## compute dW and db
# compute dW
A_prev_pad = pad_data(A_prev, pad)
shape_Z = (f, f, n_C_prev, m, n_H, n_W)
strides_Z = (A_prev_pad.strides)[1:] + (A_prev_pad.strides)[0:3]
M = np.lib.stride_tricks.as_strided(A_prev_pad, shape=shape_Z, strides=strides_Z, writeable=False)
dW = np.einsum('abcd,pqsabc->pqsd', dZ, M)
assert(dW.shape == W.shape)
db = np.einsum('abcd->d', dZ).reshape(1, 1, 1, n_C)
return dA_prev, dW, db
## compute dW and db
"""
Test
"""
A_prev = np.random.rand(10, 100, 100, 3) * 1000
W = np.random.rand(5, 5, 3, 10)
b = np.zeros((1, 1, 1, 10))
hparameters = {"stride": 1, "pad": 2}
Z_ref = conv_forward_ref(A_prev, W, b, hparameters)
Z = conv_forward(A_prev, W, b, hparameters)
print("sum of difference for Z: ", np.sum(Z_ref - Z))
print("is Z matched with Z_slow: ", np.allclose(Z_ref, Z))
dZ = np.random.rand(10, 100, 100, 10) * 1000
dA_prev_ref, dW_ref, db_ref = conv_backward_ref(dZ, A_prev, W, b, hparameters)
dA_prev, dW, db = conv_backward(dZ, A_prev, W, b, hparameters)
print("sum of difference for dA: ", np.sum(dA_prev_ref - dA_prev))
print("sum of difference for dW: ", np.sum(dW_ref - dW))
print("sum of difference for db: ", np.sum(db_ref - db))
print(np.allclose(dA_prev_ref, dA_prev))
print(np.allclose(dW_ref, dW))
print(np.allclose(db_ref, db))
Results:
sum of difference for Z: -4.743924364447594e-08
is Z matched with Z_ref: True
sum of difference for dA: 3.2011885195970535e-06
sum of difference for dW: 0.0
sum of difference for db: 0.0
is dA_prev matched with dA_prev_ref: True
is dW matched with dW_ref: True
is db matched with db_ref: True

Why tensorflow1.1 gets slower and slower when training? Is it memory leak or queue starvation?

I trained a ESPCN in tensorflow1.1, the costed time per patch increase nearly linearly when training. The first 100 epoch takes only 4-5 seconds, but the 70th epoch takes about half a minute. See the training result below:
I've searched the same question on Google and Stack-overflow, and tried the solutions below, but seemed no work:
1.add tf.reset_default_graph() after every sess.run();
2.add time.sleep(5) to prevent queue starvation;
I know the general idea, that is to reduce the operations in Session(). But how? Anyone have the solution?
Here's part of my code:
L3, var_w_list, var_b_list = model_train(IN, FLAGS)
cost = tf.reduce_mean(tf.reduce_sum(tf.square(OUT - L3), reduction_indices=0))
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(FLAGS.base_lr, global_step * FLAGS.batch_size, FLAGS.decay_step, 0.96, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost, global_step = global_step, var_list = var_w_list + var_b_list)
# optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(cost, var_list = var_w_list + var_b_list)
cnt = 0
with tf.Session() as sess:
init_op = tf.initialize_all_variables()
sess.run(init_op)
saver = tf.train.Saver()
ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
print('\n\n\n =========== All initialization finished, now training begins ===========\n\n\n')
t_start = time.time()
t1 = t_start
for i in range(1, FLAGS.max_Epoch + 1):
LR_batch, HR_batch = batch.__next__()
global_step += 1
[_, cost1] = sess.run([optimizer, cost], feed_dict = {IN: LR_batch, OUT: HR_batch})
# tf.reset_default_graph()
if i % 100 == 0 or i == 1:
print_step = i
print_loss = cost1 / FLAGS.batch_size
test_LR_batch, test_HR_batch = test_batch.__next__()
test_SR_batch = test_HR_batch.copy()
test_SR_batch[:,:,:,0:3] = sess.run(L3, feed_dict = {IN: test_LR_batch[:,:,:,0:3]})
# tf.reset_default_graph()
psnr_tmp = 0.0
ssim_tmp = 0.0
for k in range(test_SR_batch.shape[0]):
com1 = test_SR_batch[k, :, :, 0]
com2 = test_HR_batch[k, :, :, 0]
psnr_tmp += get_psnr(com1, com2, FLAGS.HR_size, FLAGS.HR_size)
ssim_tmp += get_ssim(com1, com2, FLAGS.HR_size, FLAGS.HR_size)
psnr[cnt] = psnr_tmp / test_SR_batch.shape[0]
ssim[cnt] = ssim_tmp / test_SR_batch.shape[0]
ep[cnt] = print_step
t2 = time.time()
print_time = t2 - t1
t1 = t2
print(("[Epoch] : {0:d} [Current cost] : {1:5.8f} \t [Validation PSNR] : {2:5.8f} \t [Duration time] : {3:10.8f} s \n").format(print_step, print_loss, psnr[cnt], print_time))
# tf.reset_default_graph()
cnt += 1
if i % 1000 == 0:
L3_test = model_test(IN_TEST, var_w_list, var_b_list, FLAGS)
output_img = single_HR.copy()
output_img[:,:,:,0:3] = sess.run(L3_test, feed_dict = {IN_TEST:single_LR[:,:,:,0:3]})
tf.reset_default_graph()
subname = FLAGS.img_save_dir + '/' + str(i) + ".jpg"
img_gen(output_img[0,:,:,:], subname)
print(('================= Saving model to {}/model.ckpt ================= \n').format(FLAGS.checkpoint_dir))
time.sleep(5)
# saver.save(sess, FLAGS.checkpoint_dir + '/model.ckpt', print_step)
t_tmp = time.time() - t_start
My configuration is: windows10 + tf1.1 + python3.5 + cuda8.0 + cudnn5.1
================================================================
Besides, I used pixel-shuffle(PS) layer instead of deconvolution in the last layer. I copied the PS code from others, which is shown below:
def _phase_shift(I, r):
bsize, a, b, c = I.get_shape().as_list()
bsize = tf.shape(I)[0] # Handling Dimension(None) type for undefined batch dim
X = tf.reshape(I, (bsize, a, b, r, r))
X = tf.transpose(X, (0, 1, 2, 4, 3)) # bsize, a, b, 1, 1
X = tf.split(X, a, 1) # a, [bsize, b, r, r]
X = tf.concat([tf.squeeze(x, axis=1) for x in X], 2) # bsize, b, a*r, r
X = tf.split(X, b, 1) # b, [bsize, a*r, r]
X = tf.concat([tf.squeeze(x, axis=1) for x in X], 2) # bsize, a*r, b*r
return tf.reshape(X, (bsize, a*r, b*r, 1))
def PS(X, r, color=False):
if color:
Xc = tf.split(X, 3, 3)
X = tf.concat([_phase_shift(x, r) for x in Xc], 3)
else:
X = _phase_shift(X, r)
return X
Which X is the 4-dimensional image tensor, r means the up-scaling factor, color determine whether the channel of images is 3(Ycbcr format) or 1(Grayscale format).
To use the layer is very simple, just like the tf.nn.relu() does:
L3_ps = PS(L3, scale, True)
Now I'm wondering whether this layer caused the slowing-down, because the program goes well when using deconvolution layer. Using deconvolution layer may be a solution, but I have to use PS layer for some reason.
I suspect this line is causing a memory leak (although without seeing the code, I can't say for certain):
L3_test = model_test(IN_TEST, var_w_list, var_b_list, FLAGS)
L3_test seems to be a tf.Tensor (because you later pass it to sess.run(), so it seems likely that model_test() is adding new nodes to the graph each time it is called (every 1000 steps), which causes more work to be done over time.
The solution is quite simple though: since model_test() does not depend on anything calculated in the training loop, you can move the call to outside the training loop, so it is only called once.

finding optimum lambda and features for polynomial regression

I am new to Data Mining/ML. I've been trying to solve a polynomial regression problem of predicting the price from given input parameters (already normalized within range[0, 1])
I'm quite close as my output is in proportion to the correct one, but it seems a bit suppressed, my algorithm is correct, just don't know how to reach to an appropriate lambda, (regularized parameter) and how to decide to what extent I should populate features as the problem says : "The prices per square foot, are (approximately) a polynomial function of the features. This polynomial always has an order less than 4".
Is there a way we could visualize data to find optimum value for these parameters, like we find optimal alpha (step size) and number of iterations by visualizing cost function in linear regression using gradient descent.
Here is my code : http://ideone.com/6ctDFh
from numpy import *
def mapFeature(X1, X2):
degree = 2
out = ones((shape(X1)[0], 1))
for i in range(1, degree+1):
for j in range(0, i+1):
term1 = X1**(i-j)
term2 = X2 ** (j)
term = (term1 * term2).reshape( shape(term1)[0], 1 )
"""note that here 'out[i]' represents mappedfeatures of X1[i], X2[i], .......... out is made to store features of one set in out[i] horizontally """
out = hstack(( out, term ))
return out
def solve():
n, m = input().split()
m = int(m)
n = int(n)
data = zeros((m, n+1))
for i in range(0, m):
ausi = input().split()
for k in range(0, n+1):
data[i, k] = float(ausi[k])
X = data[:, 0 : n]
y = data[:, n]
theta = zeros((6, 1))
X = mapFeature(X[:, 0], X[:, 1])
ausi = computeCostVect(X, y, theta)
# print(X)
print("Results usning BFGS : ")
lamda = 2
theta, cost = findMinTheta(theta, X, y, lamda)
test = [0.05, 0.54, 0.91, 0.91, 0.31, 0.76, 0.51, 0.31]
print("prediction for 0.31 , 0.76 (using BFGS) : ")
for i in range(0, 7, 2):
print(mapFeature(array([test[i]]), array([test[i+1]])).dot( theta ))
# pyplot.plot(X[:, 1], y, 'rx', markersize = 5)
# fig = pyplot.figure()
# ax = fig.add_subplot(1,1,1)
# ax.scatter(X[:, 1],X[:, 2], s=y) # Added third variable income as size of the bubble
# pyplot.show()
The current output is:
183.43478288
349.10716957
236.94627602
208.61071682
The correct output should be:
180.38
1312.07
440.13
343.72

Resources