to calculate CDF of a continuous regression - python-3.x

I need to calculate a CDF for a regression.I have N observations, I need to reestimate coefficients(beta) in a joint distribution.
Yobs is my observations and Y is calculated by X(matrix of predctors)* array of coefficients(betas)
def CDF(beta):
Y = X.dot(beta)
sigma = 0
for n in range(0,N):
sigma = sigma + (np.square(Yobs[n] - Y[n])) # summation of squarred of residuals
SSR = sigma / N # mu (mean or expectation)
dof = N - P - 1 # degree of freedom
var = sigma / dof # the mean square of residuals
PDF = np.zeros(N)
CDF = np.zeros(N) # I want to calculate the F(X < Yobs)
for n in range (0,N):
PDF[n] = (1/np.sqrt(2*np.pi*var))*np.exp(-SSR/(2*var)) # probability density function
CDF[n] = integrate.quad(PDF, -np.inf , (Yobs+a)) # CDF
return CDF
Where am I wrong? I think CDF is wrong since I haven't determined the arg, but how can I define? can I simply use?
from scipy.stats import norm
def CDF(beta):
Y = X.dot(beta)
sigma = 0
for n in range(0,N):
sigma = sigma + (np.square(Yobs[n] - Y[n])) # summation of squarred of residuals
SSR = sigma / N # mu (mean or expectation)
dof = N - P - 1 # degree of freedom
var = sigma / dof # the mean square of residuals
CDF = np.zeros(N)
for n in range(0,N): # I want to calculate the F(X < Yobs)
CDF[n] = norm.cdf(Yobs[n],SSR,var)
return CDF

Related

Truncation error vs dt and truncation error vs dx graph of Crank Nicolson scheme for the Nagumo's equation in python

For a problem, I implemented the Nagumo equation via Crank-Nicolson's scheme. Now the problem is that I plotted the truncation error vs dt but the graph should give me a line with a slope of about 62° while the other graph should give a line with a negative slope (angle of 117°).
With the code below I get the following graphs:
# -*- coding: utf-8 -*-
"""
Created on Sun Feb 5 13:21:29 2023
#author: theha
"""
import numpy as np
#import itertools
import numpy.linalg as l
import matplotlib.pyplot as plt
def generateMatrix(N, sigma):
""" Computes the matrix for the Nagumo's equation with Crank-Nicolson
Dirichlet condition at i=0 and at i=1
Parameters:
----------
N: int
Number of discretization points
sigma: float
dt/(2*dx^2)
Returns:
-------
A: 2D numpy array of float
Matrix for Nagumo's equation
"""
# Setup the diagonal
d = np.diag(np.ones(N+1)*(2*r+1))
# Setup upper diagonal
ud = np.diag(np.ones(N)*-r, 1)
# Setup lower diagonal
ld = np.diag(np.ones(N)*-r, -1)
A = d + ud + ld
return A
def generateRHS(u1, sigma,alpha):
""" Computes right-hand side of linear system for Nagumo's equation
with Crank-Nicolson scheme
Parameters:
----------
T: array of float
Nagumo's solution at current time step
sigma: float
dt/(2*dx^2)
Returns:
-------
b: array of float
Right-hand side of Nagumo's equation
with Crank-Nicolson scheme
"""
N=len(u1)
F=((1-2*sigma)*u1)+sigma*((np.append(0,u1[0:N-1]))+np.append(u1[1:N+1],1))+ dt* ((u1)*(1-u1)*(u1-alpha))
F[0]=0
F[-1]=1
return F
def CrankNicolson(T, A, nt, sigma,alpha):
""" Nagumo's equation in time with Crank-Nicolson
Parameters:
----------
T: array of float
initial Nagumo profile
A: 2D array of float
Matrix with discretized Nagumo equation
nt: int
number of time steps
sigma: float
dt/(2*(dx^2))
Returns:
-------
T: array of floats
Nagumo profile after nt time steps
"""
for t in range(nt):
Tn = T.copy()
b = generateRHS(Tn, sigma,alpha)
# Use numpy.linalg.solve
T_interior = np.linalg.solve(A,b)
T = T_interior
return T
#Domain x in [-L,L]
L = 100
nx = 400 # Partition in x
alpha = 0.25
dx = 2*(L/nx) # Step size
dt= .00001 #Time step
r = dt/(2*(dx**2))
sigma = r
nt = 5 # Partition in time
x = np.linspace(-L,L,nx+1)
u0=np.zeros(nx)
t=0
gamma1 = np.sqrt(2)/2*x + (0.5-alpha)*t
gamma2 = (np.sqrt(2)/2*alpha*x) + alpha*(alpha-2)*t/2
u0=(np.exp(gamma1)+alpha*np.exp(gamma2))/(np.exp(gamma1)+np.exp(gamma2)+1) #Initial condition u(x,0)
#initial time step
Ti = u0;
A = generateMatrix(nx, sigma) # A matrix
T = CrankNicolson(Ti.copy(), A, nt, sigma, alpha) #Solution of system Ax=b
def T_analytical(x, t,n_max, alpha):
"""Computes the exact solution for Nagumo's equation
Paramters:
---------
x : array of float
Spatial position
t : float
Evaluation time
n_max: int
Number of terms to evaluate expression
alpha: float
r coefficient of A matrix associated
L : float
Size of rod
Returns:
-------
T : array of float
u(x,t) at each location x
"""
#T = 100
for n in range(1,n_max+1):
gamma1=(np.sqrt(2)*(x))/(2) + (0.5- alpha)*(t)
gamma2=(np.sqrt(2)*(alpha*x))/2 + (alpha*(alpha-2))*((t)/2)
ue=(np.exp(gamma1)+(alpha*np.exp(gamma2)))/(np.exp(gamma1)+np.exp(gamma2)+1)
T=ue
return T
"Graph of approximate solution and exact solution"
T_exact = T_analytical(x, dt*nt, 100, alpha)
fig=plt.figure()
plt.plot(x,T,'x-', label='Aproximada',color='#003366')
plt.plot(x,T_exact,'c|-' ,label='Solucion exacta',color='red')
plt.xlabel('x (espacio)', fontsize = 12)
plt.ylabel('u', fontsize = 12)
plt.xticks(fontsize = 12)
plt.yticks(fontsize = 12)
plt.axis([-L,L,0,1])
plt.grid(True)
plt.legend()
plt.show()
def L2_error(T, T_exact):
"""Computes L2 norm of error
Parameters:
----------
T : array of float
array with numerical solution
T_exact: array of float
array with exact solution
Returns:
-------
e: L2 norm of error
"""
e = l.norm(T_exact-T)
return e
"Calculation of the error in time"
nx = 5
t_final = 1
t_initial = 0
dt_values = np.asanyarray([2.0000e-01,4.0000e-01,6.0000e-01,8.0000e-01,1.0000]) #Values of dt
error = np.zeros(len(dt_values)) #error's array
x = np.linspace(-L,L,nx+1) #Discretization in space
Ti = T_analytical(x, t_initial, 100, alpha) #Simulation of Initial condition, u(x,0) = u0(x)
T_exact = T_analytical(x, t_final, 100, alpha) #Simulation of analytical solution
"Loop for the error calculation in time"
for i,dt in enumerate(dt_values):
#print(i, dt)
sigma = dt/(2*(dx**2))
nt = int((t_final-t_initial)/dt)
A = generateMatrix(nx, sigma)
T = CrankNicolson(Ti.copy(), A, nt, sigma,alpha)
error[i] = L2_error(T,T_exact)
"Plot of error vs dt on logarithmic scale"
plt.figure(figsize=(8,8))
plt.xlabel(r'$\Delta t$', fontsize=18)
plt.ylabel(r'norma $L_2$ del error', fontsize=18)
plt.axis('equal')
plt.loglog(dt_values, error, color='k', ls='--', lw=2, marker='o')
plt.xticks(dt_values,dt_values)
plt.grid(True)
plt.legend(['Crank-Nicolson']);
#loop=nx
#rate =np.log(error[1:loop-1]/error[2:loop])/np.log(dt_values[1:loop-1]/dt_values[2:loop])
"Loop for the error calculation in space"
dx_values = np.asanyarray([0.5,0.25,0.2,0.125,0.0625]) #Values of dx
#dx_values = np.asanyarray([0.0625,0.125,0.2,0.25,0.5]) #Values of dx
error_x = np.zeros(len(dx_values)) #error's array
nt = 5
dt = 0.01
for i,dx in enumerate(dx_values):
sigma = dt/(2*(dx_values[i]**2))
nx = int((2*L)/dx_values[i])
#int((t_final-t_initial)/dt)
x =np.linspace(-L,L,nx+1)
Ti = T_analytical(x, t_initial, nx+1, alpha) #Simulation of Initial condition, u(x,0) = u0(x)
T_exact = T_analytical(x, t_final, nx+1, alpha) #Simulation of analytical solution
A = generateMatrix(nx, sigma)
T = CrankNicolson(Ti.copy(), A, nt, sigma,alpha)
error_x[i] = round(l.norm(T_exact - T), 2)
error_x
"Plot of error vs dx on logarithmic scale"
plt.figure(figsize=(8,8))
plt.xlabel(r'$\Delta x$', fontsize=18)
plt.ylabel(r'norma $L_2$ del error', fontsize=18)
plt.axis('equal')
plt.loglog(dx_values,error_x , color='k', ls='--', lw=2, marker='o')
plt.xticks(np.round(dx_values,2),np.round(dx_values,2))
plt.grid(True)
plt.legend(['Crank-Nicolson']);
The graphs that I got
I would like someone to tell me what the error is in the propagation or if indeed the results I obtained are correct.

Hyperbolic sin and cos of an array

I'm trying to define some function for an eady stream function model as shown in the next line:
# Geometry of the wave / domain / mean state:
Lx = 3800 # Zonal Wavelength in km
H = 10000 # tropopause height in meters
Shear = 30/H # shear in sec^-1
k = 2*np.pi/(Lx*1000) # wavenumber (zonal)
l = np.pi/3.e6 # meridional wavenumber in 1/m
# Constants:
cor = 2*(7.292e-5)*np.sin(np.pi/4) # Coriolis parameter
bv2 = 1.e-4 # buoyancy frequency squared
sigma = 2.e-6 # static stability parameter
R = 287 # gas constant
# Grid points on which fields are computed:
xx = np.linspace(0,1.5*Lx,151) # gridpoints in x
yy = np.linspace( -1500,1500,101) # gridpoints in y
zz = np.linspace(0,H,51) # gridpoints in z
# Set array for grid system in x, y, and z
x,y,z = np.meshgrid(xx*1000, yy*1000, zz)
# Define coefficients for the model
mu2 = ((bv2*(H**2))/cor**2)*(k**2 + l**2)
mu = np.sqrt(mu2)
c = (Shear*H/2) + ((Shear*H)/mu)*np.sqrt((mu/2 - coth(mu/2))*(mu/2 - tanh(mu/2)))
# Note: try switching this to (Shear*H/2) - (Shear*H/mu)*...
ci = np.imag(c)
cr = np.real(c)
t = 0*np.pi/(10*cr*k)
A = 2.e7 # streamfunction amplitude (arbitrary)
B = -A*Shear*H/(mu*c)
Psi_z = A*cosh(mu*z/H) + B*sinh(mu*z/H)
I noticed that I'm getting an error when it comes to taking the hyperbolic sin and cos of the array with the following message:
TypeError: cannot create mpf from array (mu*z/H) for both sin and cos.
I've never encountered this error message before, so I'm not familiar enough to try and figure out an approach to this error.

How do i fix this error when converting a Matlab code to Python

I converted a Matlab code into python by manually typing it out. However i keep getting an error message which i still have not been able to fix. what am i doing wrong and how do i get the plot as that in Matlab? Just is little information about the code; this is a Explicit finite difference method for solving pressure distribution in an oil reservoir with production from the middle block only. Its similar to the heat equation, Ut=Uxx. I was told to add more text because my question is mostly code so had to add all these details. I think that notification has vanished now.
[P_new[N] = 4000 #last blocks at all time levels equals 4000
IndexError: index 9 is out of bounds for axis 0 with size 9]
The Matlab code which runs ok is below: The python code follows.
clear
clc
% Solution of P_t = P_{xx}
L = 1000 ; %ft length of reservoir
W = 100 ; %ft reservoir width
h = 50 ;%ft pay thickness
poro = 0.25; % rock porosity
k_o = 5; %md effective perm to oil
P_i = 4000; %psia initial pressure
B_o = 1.25; %oil formation vol fact
mu = 5; %cp oil visc
c_t = 0.0000125; %1/atm total compressibility
Q_o = 10;%stb/day production rate from central well
alpha = c_t*mu*poro/k_o;
T = 1;
N_time = 50;
dt = T/N_time;
% % Number of grid cells
N =9; %number of grid cells
%N =11;%number of grid cells
dx = (L/(N-1)); %distance between grid blocks
x = 0+dx*0.5:dx:L+dx; %points in space
for i=1:N
P_old(i)=P_i;
FPT(i)=0;
end
FPT((N+1)/2)=-Q_o*B_o*mu/1.127/W/dx/h/k_o; %source term at the center block of grid cell
P_new = P_old;
for j = 1:N_time
for k = 1: N
if k<2
P_new(k)=4000;%P_old(k)+dt/alpha*((P_old(k+1)-2*P_old(k)+P_old(k))/dx^2+FPT(k));
elseif k > N-1
P_new(k) = 4000;%P_old(k)+dt/alpha*((P_old(k)-2*P_old(k)+P_old(k-1))/dx^2+FPT(k));
else
P_new(k) = P_old(k)+dt/alpha*((P_old(k+1)-2*P_old(k)+P_old(k-1))/dx^2+FPT(k));
end
end
plot(x,P_new, '-x')
xlabel('X')
ylabel('P(X)')
hold on
grid on
%%update "u_old" before you move forward to the next time level
P_old = P_new;
end
hold off
Python Code:
import numpy as np
import matplotlib.pyplot as plt
# Solution of P_t = P_{xx}
L = 1000 #ft length of reservoir
W = 100 #ft reservoir width
h = 50 #ft pay thickness
poro = 0.25 # rock porosity
k_o = 5 #md effective perm to oil
P_i = 4000 #psia initial pressure
B_o = 1.25 #oil formation vol fact
mu = 5 #cp oil visc
c_t = 0.0000125 #1/atm total compressibility
Q_o = 10 #stb/day production rate from central well
alpha = c_t * mu * poro / k_o
T = 1
N_time = 20
dt = T / N_time
# % Number of grid cells
N = 9 #number of grid cells
dx = (L / (N - 1)) #distance between grid blocks
x= np.arange(0.0,L+dx,dx)
P_old = np.zeros_like(x) #pressure at previous time level
P_new = np.zeros_like(x) #pressure at previous time level
FPT = np.zeros_like(x)
for i in range(0,N):
P_old[i]= P_i
FPT[int((N + 1) / 2)]= -Q_o * B_o * mu / (1.127 * W * dx * h * k_o) # source term at the center block of grid cell
P_new = P_old
d=np.arange(0,N)
for j in range(0,N_time):
for k in range(0,N):
P_new[0] = 4000 #pressure at first block for all time levels equals 4000
P_new[N] = 4000 #pressure at last block for all time levels equals 4000
P_new[k]= P_old[k] + dt / alpha * ((P_old[k+1] - 2 * P_old[k] + P_old[k - 1]) / dx ** 2 + FPT[k])
plt.plot(x, P_new)
plt.xlabel('X')
plt.ylabel('P(X)')
P_old = P_new
Matlab uses 1 based indexing , Python arrays use "0" based indexing. If you define an array of length N in python, the indices are from 0 to N-1.
So just replace the index N to index N-1 in your code as below and it works.
import numpy as np
import matplotlib.pyplot as plt
# Solution of P_t = P_{xx}
L = 1000 #ft length of reservoir
W = 100 #ft reservoir width
h = 50 #ft pay thickness
poro = 0.25 # rock porosity
k_o = 5 #md effective perm to oil
P_i = 4000 #psia initial pressure
B_o = 1.25 #oil formation vol fact
mu = 5 #cp oil visc
c_t = 0.0000125 #1/atm total compressibility
Q_o = 10 #stb/day production rate from central well
alpha = c_t * mu * poro / k_o
T = 1
N_time = 20
dt = T / N_time
# % Number of grid cells
N = 9 #number of grid cells
dx = (L / (N - 1)) #distance between grid blocks
x= np.arange(0.0,L+dx,dx)
P_old = np.zeros_like(x) #pressure at previous time level
P_new = np.zeros_like(x) #pressure at previous time level
FPT = np.zeros_like(x)
for i in range(0,N):
P_old[i]= P_i
FPT[int((N + 1) / 2)]= -Q_o * B_o * mu / (1.127 * W * dx * h * k_o) # source term at the center block of grid cell
P_new = P_old
d=np.arange(0,N)
for j in range(0,N_time):
for k in range(0,N-1):
P_new[0] = 4000 #pressure at first block for all time levels equals 4000
P_new[N-1] = 4000 #pressure at last block for all time levels equals 4000
P_new[k]= P_old[k] + dt / alpha * ((P_old[k+1] - 2 * P_old[k] + P_old[k - 1]) / dx ** 2 + FPT[k])
plt.plot(x, P_new)
plt.xlabel('X')
plt.ylabel('P(X)')
P_old = P_new
output:

In linear regression, i am getting NAN values for cost and weights

import numpy as np
def cost_function(X,Y,B):
J = np.sum((X.T.dot(B)-Y) ** 2) / (2 * len(Y))
return J
def gradient_descent(X,Y,B,alpha,iterations):
cost_history = [0] * iterations
for iteration in range(iterations):
h = X.T.dot(B)
loss = h - Y
gradient = X.dot(loss) / len(Y)
B = B + (alpha * gradient)
cost = cost_function(X,Y,B)
cost_history[iteration] = cost
return B,cost_history
B--weights (2,1)
X--input(2,700)
Y--output(700,1)
alpha--learning rate (0.001)
iterations -- 3000
i am using cost function to calculate the error

Gradient Descent diverges, learning rate too high

There is a piece of code below, which does GD step by step but theta is diverging. What could be wrong?
X = arange(100)
Y = 50 + 4*X + uniform(-20, 20, X.shape)
theta = array([0,0])
alpha = 0.001
# one step of GD
theta0 = theta[0] - alpha * sum( theta[0]+theta[1]*x-y for x,y in zip(X,Y))/len(X)
theta1 = theta[1] - alpha * sum((theta[0]+theta[1]*x-y)*x for x,y in zip(X,Y))/len(X)
theta = [theta0, theta1]
Learning rate was too high.
alpha = 0.0001

Resources