How to implement gradient descent algorithms using partial derivatives? - python-3.x

For this question, I want to use gradient descent to minimize the function value to zero for the function x^2 + y^2 with the point (1,1) as the initial starting point. However, I get this error when I try to run my code. I'm not sure why I get that error. Here is what I have so far. Can anyone please help me out? Thanks
Error: xnew = x0-alpha*grad(x0, y0)
TypeError: 'int' object is not callable
def f(x,y):
return x**2 + y**2
def dfdx(x,y):
return 2*x
def dfdy(x,y):
return 2*y
def grad(x,y):
return [dfdx(x,y), dfdy(x,y)]
x0 = 1
y0 = 1
def grad2(f, grad, x0, y0, alpha):
iterations = 0
maxIterations = 1000
while iterations < maxIterations:
xnew = x0-alpha*grad(x0, y0)
ynew = y0-alpha*grad(x0,y0)
x0 = xnew
y0 = ynew
iterations += 1
return xnew, ynew

Related

2101. Detonate the Maximum Bombs, why is one code optimized than another

I am trying to solve https://leetcode.com/problems/detonate-the-maximum-bombs/
the accepted solution is https://www.youtube.com/watch?v=mEB2y5_Aoos
'''
class Solution:
def maximumDetonation(self, bombs) -> int:
adj,m = defaultdict(list), len(bombs)
def canDetonate(b1,b2):
x1,y1,r1 = b1
x2,y2,r2 = b2
return (x1 - x2) ** 2 + (y1 - y2) ** 2 <= r1 ** 2
for i in range(m):
for j in range(m):
if i==j:
continue
if canDetonate(bombs[i],bombs[j]):
adj[i].append(j)
def dfs(node, seen):
seen.add(node)
for nei in adj[node]:
if nei not in seen:
dfs(nei, seen)
ctr = 0
for idx, bomb in enumerate(bombs):
seen = set()
dfs(idx,seen)
ctr = max(ctr,len(seen))
return ctr
'''
and solution I came up with is as follows:
'''
class Solution:
def isReachable(self, i1, i2, bombs) -> bool:
x1, y1, r1 = bombs[i1]
x2, y2, r2 = bombs[i2]
sol = (x1 - x2) ** 2 + (y1 - y2) ** 2 <= r1 ** 2
return sol
def maximumDetonation(self, bombs) -> int:
maxBombs = 0
def dfs(i, visited):
nonlocal maxBombs
#if bomb visited, return otherwise add to visited.
if i in visited:
return
visited.add(i)
for index in range(len(bombs)):
#for each unvisited bomb, running dfs on all other bombs.
if index not in visited and self.isReachable(i, index, bombs):
dfs(index, visited)
maxBombs = max(maxBombs, len(visited))
for i in range(len(bombs)):
#for each node i am running dfs to check how many bombs explode.
visit = set()
dfs(i, visit)
return maxBombs
'''
both solutions give correct answers for all test cases but I am wondering why their solution is more optimised than mine, as I am doing essentially the same thing. My solution keeps getting time limits exceeding error.

Why can't I get this Runge-Kutta solver to converge as the time step decreases?

For reasons, I need to implement the Runge-Kutta4 method in PyTorch (so no, I'm not going to use scipy.odeint). I tried and I get weird results on the simplest test case, solving x'=x with x(0)=1 (analytical solution: x=exp(t)). Basically, as I reduce the time step, I cannot get the numerical error to go down. I'm able to do it with a simpler Euler method, but not with the Runge-Kutta 4 method, which makes me suspect some floating point issue here (maybe I'm missing some hidden conversion from double precision to single)?
import torch
import numpy as np
import matplotlib.pyplot as plt
def Euler(f, IC, time_grid):
y0 = torch.tensor([IC])
time_grid = time_grid.to(y0[0])
values = y0
for i in range(0, time_grid.shape[0] - 1):
t_i = time_grid[i]
t_next = time_grid[i+1]
y_i = values[i]
dt = t_next - t_i
dy = f(t_i, y_i) * dt
y_next = y_i + dy
y_next = y_next.unsqueeze(0)
values = torch.cat((values, y_next), dim=0)
return values
def RungeKutta4(f, IC, time_grid):
y0 = torch.tensor([IC])
time_grid = time_grid.to(y0[0])
values = y0
for i in range(0, time_grid.shape[0] - 1):
t_i = time_grid[i]
t_next = time_grid[i+1]
y_i = values[i]
dt = t_next - t_i
dtd2 = 0.5 * dt
f1 = f(t_i, y_i)
f2 = f(t_i + dtd2, y_i + dtd2 * f1)
f3 = f(t_i + dtd2, y_i + dtd2 * f2)
f4 = f(t_next, y_i + dt * f3)
dy = 1/6 * dt * (f1 + 2 * (f2 + f3) +f4)
y_next = y_i + dy
y_next = y_next.unsqueeze(0)
values = torch.cat((values, y_next), dim=0)
return values
# differential equation
def f(T, X):
return X
# initial condition
IC = 1.
# integration interval
def integration_interval(steps, ND=1):
return torch.linspace(0, ND, steps)
# analytical solution
def analytical_solution(t_range):
return np.exp(t_range)
# test a numerical method
def test_method(method, t_range, analytical_solution):
numerical_solution = method(f, IC, t_range)
L_inf_err = torch.dist(numerical_solution, analytical_solution, float('inf'))
return L_inf_err
if __name__ == '__main__':
Euler_error = np.array([0.,0.,0.])
RungeKutta4_error = np.array([0.,0.,0.])
indices = np.arange(1, Euler_error.shape[0]+1)
n_steps = np.power(10, indices)
for i, n in np.ndenumerate(n_steps):
t_range = integration_interval(steps=n)
solution = analytical_solution(t_range)
Euler_error[i] = test_method(Euler, t_range, solution).numpy()
RungeKutta4_error[i] = test_method(RungeKutta4, t_range, solution).numpy()
plots_path = "./plots"
a = plt.figure()
plt.xscale('log')
plt.yscale('log')
plt.plot(n_steps, Euler_error, label="Euler error", linestyle='-')
plt.plot(n_steps, RungeKutta4_error, label="RungeKutta 4 error", linestyle='-.')
plt.legend()
plt.savefig(plots_path + "/errors.png")
The result:
As you can see, the Euler method converges (slowly, as expected of a first order method). However, the Runge-Kutta4 method does not converge as the time step gets smaller and smaller. The error goes down initially, and then up again. What's the issue here?
The reason is indeed a floating point precision issue. torch defaults to single precision, so once the truncation error becomes small enough, the total error is basically determined by the roundoff error, and reducing the truncation error further by increasing the number of steps <=> decreasing the time step doesn't lead to any decrease in the total error.
To fix this, we need to enforce double precision 64bit floats for all floating point torch tensors and numpy arrays. Note that the right way to do this is to use respectively torch.float64 and np.float64 rather than, e.g., torch.double and np.double, because the former are fixed-sized float values, (always 64bit) while the latter depend on the machine and/or compiler. Here's the fixed code:
import torch
import numpy as np
import matplotlib.pyplot as plt
def Euler(f, IC, time_grid):
y0 = torch.tensor([IC], dtype=torch.float64)
time_grid = time_grid.to(y0[0])
values = y0
for i in range(0, time_grid.shape[0] - 1):
t_i = time_grid[i]
t_next = time_grid[i+1]
y_i = values[i]
dt = t_next - t_i
dy = f(t_i, y_i) * dt
y_next = y_i + dy
y_next = y_next.unsqueeze(0)
values = torch.cat((values, y_next), dim=0)
return values
def RungeKutta4(f, IC, time_grid):
y0 = torch.tensor([IC], dtype=torch.float64)
time_grid = time_grid.to(y0[0])
values = y0
for i in range(0, time_grid.shape[0] - 1):
t_i = time_grid[i]
t_next = time_grid[i+1]
y_i = values[i]
dt = t_next - t_i
dtd2 = 0.5 * dt
f1 = f(t_i, y_i)
f2 = f(t_i + dtd2, y_i + dtd2 * f1)
f3 = f(t_i + dtd2, y_i + dtd2 * f2)
f4 = f(t_next, y_i + dt * f3)
dy = 1/6 * dt * (f1 + 2 * (f2 + f3) +f4)
y_next = y_i + dy
y_next = y_next.unsqueeze(0)
values = torch.cat((values, y_next), dim=0)
return values
# differential equation
def f(T, X):
return X
# initial condition
IC = 1.
# integration interval
def integration_interval(steps, ND=1):
return torch.linspace(0, ND, steps, dtype=torch.float64)
# analytical solution
def analytical_solution(t_range):
return np.exp(t_range, dtype=np.float64)
# test a numerical method
def test_method(method, t_range, analytical_solution):
numerical_solution = method(f, IC, t_range)
L_inf_err = torch.dist(numerical_solution, analytical_solution, float('inf'))
return L_inf_err
if __name__ == '__main__':
Euler_error = np.array([0.,0.,0.], dtype=np.float64)
RungeKutta4_error = np.array([0.,0.,0.], dtype=np.float64)
indices = np.arange(1, Euler_error.shape[0]+1)
n_steps = np.power(10, indices)
for i, n in np.ndenumerate(n_steps):
t_range = integration_interval(steps=n)
solution = analytical_solution(t_range)
Euler_error[i] = test_method(Euler, t_range, solution).numpy()
RungeKutta4_error[i] = test_method(RungeKutta4, t_range, solution).numpy()
plots_path = "./plots"
a = plt.figure()
plt.xscale('log')
plt.yscale('log')
plt.plot(n_steps, Euler_error, label="Euler error", linestyle='-')
plt.plot(n_steps, RungeKutta4_error, label="RungeKutta 4 error", linestyle='-.')
plt.legend()
plt.savefig(plots_path + "/errors.png")
Result:
Now, as we decrease the time step, the error of the RungeKutta4 approximation decreases with the correct rate.

How to create my own loss function in Pytorch?

I'd like to create a model that predicts parameters of a circle (coordinates of center, radius).
Input is an array of points (of arc with noise):
def generate_circle(x0, y0, r, start_angle, phi, N, sigma):
theta = np.linspace(start_angle*np.pi/180, (start_angle + phi)*np.pi/180, num=N)
x = np.array([np.random.normal(r*np.cos(t) + x0 , sigma, 1)[0] for t in theta])
y = np.array([np.random.normal(r*np.sin(t) + y0 , sigma, 1)[0] for t in theta])
return x, y
n_x = 1000
start_angle = 0
phi = 90
N = 100
sigma = 0.005
x_full = []
for i in range(n_x):
x0 = np.random.normal(0 , 10, 1)[0]
y0 = np.random.normal(0 , 10, 1)[0]
r = np.random.normal(0 , 10, 1)[0]
x, y = generate_circle(x0, y0, r, start_angle, phi, N, sigma)
x_full.append(np.array([ [x[i], y[i]] for i in range(len(x))]))
X = torch.from_numpy(np.array(x_full))
print(X.size()) # torch.Size([1000, 100, 2])
Output: [x_c, y_c, r]
As a loss function I need to use this one:
I tried to implement something like the following:
class Net(torch.nn.Module):
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(n_feature, n_hidden)
self.predict = torch.nn.Linear(n_hidden, n_output)
def forward(self, x):
x = F.relu(self.hidden(x))
x = self.predict(x)
return x
# It doesn't work, it's just an idea
def my_loss(point, params):
arr = ((point[:, 0] - params[:, 0])**2 + (point[:, 1] - params[:, 1])**2 - params[:, 2]**2)**2
loss = torch.sum(arr)
return loss
# For N pairs (x, y) model predicts parameters of circle
net = Net(n_feature=N*2, n_hidden=10, n_output=3)
optimizer = torch.optim.SGD(net.parameters(), lr=1e-4)
for t in range(1000):
prediction = net(X.view(n_x, N*2).float())
loss = my_loss(X, prediction)
print(f"loss: {loss}")
optimizer.zero_grad()
loss.backward()
optimizer.step()
So, the question is how to correctly implement my own loss function in terms of Pytorch in this case?
Or how to change the model's structure to get expected results?
You're trying to create a loss between the predicted outputs and the inputs instead of between the predicted outputs and the true outputs. To do this you need to save the true values of x0, y0, and r when you generate them.
n_x = 1000
start_angle = 0
phi = 90
N = 100
sigma = 0.005
x_full = []
targets = [] # <-- Here
for i in range(n_x):
x0 = np.random.normal(0 , 10, 1)[0]
y0 = np.random.normal(0 , 10, 1)[0]
r = np.random.normal(0 , 10, 1)[0]
targets.append(np.array([x0, y0, r])) # <-- Here
x, y = generate_circle(x0, y0, r, start_angle, phi, N, sigma)
x_full.append(np.array([ [x[i], y[i]] for i in range(len(x))]))
X = torch.from_numpy(np.array(x_full))
Y = torch.from_numpy(np.array(targets)) # <-- Here
print(X.size()) # torch.Size([1000, 100, 2])
print(Y.size()) # torch.Size([1000, 3])
Now, when you call my_loss you should use:
loss = my_loss(Y, prediction)
You are passing in all your data points every iteration of your for loop, I would split your data into smaller sections so that your model doesn't just learn to output the same values every time. e.g. you have generated 1000 points so pass in a random selection of 100 in each iteration using something like random.sample(...)
Your input numbers are pretty large which means your loss will be huge, so generate inputs between 0 and 1 and then if you need the value to be between 0 and 10 you can just multiply by 10.

How to resolve value error in Scipy function fmintnc?

I am trying to implement coursera assignments in python, while doing Scipy optimise for logistic regression. However, I am getting the error below.
Can any one help!
Note: cost, gradient functions are working fine.
#Sigmoid function
def sigmoid(z):
h_of_z = np.zeros([z.shape[0]])
h_of_z = np.divide(1,(1+(np.exp(-z))))
return h_of_z
def cost(x,y,theta):
m = y.shape[0]
h_of_x = sigmoid(np.matmul(x,theta))
term1 = sum(-1 * y.T # np.log(h_of_x) - (1-y.T) # np.log(1-h_of_x))
J = 1/m * term1
return J
def grad(x,y,theta):
grad = np.zeros_like(theta)
m = y.shape[0]
h_of_x = sigmoid(x#theta)
grad = (x.T # (h_of_x - y)) * (1/m)
return grad
#add intercept term for X
x = np.hstack([np.ones_like(y),X[:,0:2]])
#initialise theta
[m,n] = np.shape(x)
initial_theta = np.zeros([n,1])
#optimising theta from given theta and gradient
result = opt.fmin_tnc(func=cost, x0=initial_theta, args=(x, y))
ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 99 is different from 3)
I got it !
so the problem is fmin_tnc function programmed in a way we should parse the the parameter 'theta' before calling arguments x and y .
Since in my function 'cost' I have passed x and y first, it interpreted values differently so thrown ValueError .
Below are the corrected code..
def sigmoid(x):
return 1/(1+np.exp(-x))
def cost(theta,x,y):
J = (-1/m) * np.sum(np.multiply(y, np.log(sigmoid(x # theta)))
+ np.multiply((1-y), np.log(1 - sigmoid(x # theta))))
return J
def gradient(theta,x,y):
h_of_x = sigmoid(x#theta)
grad = 1 / m * (x.T # (h_of_x - y))
return grad
#initialise theta
init_theta = np.zeros([n+1,1])
#optimise theta
from scipy import optimize as op
result = op.fmin_tnc(func=cost,
x0=init_theta.flatten(),
fprime=gradient,
args=(x,y.flatten()))

fit() missing 1 required positional argument: 'theta'

I tried to implement a logistic regression model.
I get the below error message
TypeError: fit() missing 1 required positional argument: 'theta'
This is my code
if __name__ == "__main__":
# X = feature values, all the columns except the last column
X = data.iloc[:, :-1]
# y = target values, last column of the data frame
y = data.iloc[:, -1]
# filter out the applicants that got admitted
admitted = data.loc[y == 1]
# filter out the applicants that din't get admission
not_admitted = data.loc[y == 0]
X = np.c_[np.ones((X.shape[0], 1)), X]
y = y[:, np.newaxis]
theta = np.zeros((X.shape[1], 1))
def sigmoid(x):
# Activation function used to map any real value between 0 and 1
return 1 / (1 + np.exp(-x))
def net_input(theta, x):
# Computes the weighted sum of inputs
return np.dot(x, theta)
def probability(theta, x):
# Returns the probability after passing through sigmoid
return sigmoid(net_input(theta, x))
def cost_function(self, theta, x, y):
# Computes the cost function for all the training samples
m = x.shape[0]
total_cost = -(1 / m) * np.sum(
y * np.log(probability(theta, x)) + (1 - y) * np.log(
1 - probability(theta, x)))
return total_cost
def gradient(self, theta, x, y):
# Computes the gradient of the cost function at the point theta
m = x.shape[0]
return (1 / m) * np.dot(x.T, sigmoid(net_input(theta, x)) - y)
def fit(self, x, y, theta):
opt_weights = fmin_tnc(func=cost_function, x0=theta,
fprime=gradient,args=(x, y.flatten()))
return opt_weights[0]
parameters = fit(X, y, theta)
TypeError: fit() missing 1 required positional argument: 'theta'

Resources