Fail to fit a normal distribution with Python. scipy package defects?

Fail to fit a normal distribution with Python. scipy package defects? - python-3.x

import numpy as np
from astropy import modeling
import matplotlib.pyplot as plt
from scipy import optimize
def gaussian(x, amplitude, mean, stddev):
return amplitude * np.exp(-((x - mean)/4/stddev)**2)
# the data
m = modeling.models.Gaussian1D(amplitude=10, mean=100, stddev=10)
x = np.linspace(0, 400, 400)
data = m(x)
# fitting
popt, _ = optimize.curve_fit(gaussian, x, data)
plt.figure(0)
plt.plot(x, data)
plt.plot(x, gaussian(x, *popt))
plt.show()
I run this to do a normal distribution fitting. But it gave me a line. Can't figure out why.
However, if I decrease the mean under 45, it will give a good fitting. Is this a design defects of scipy package?

Initial estimates for the model parameters (in this case, amplitude, mean and standard deviation) make a big difference when you use scipy.optimize.curve_fit.
You did not provide any initial guess. Providing guess (with actual values), then then the fit is perfect (since the Gaussian is a perfect Gaussian with no noise added):
import numpy as np
from astropy import modeling
import matplotlib.pyplot as plt
from scipy import optimize
def gaussian(x, amplitude, mean, stddev):
return amplitude * np.exp(-((x - mean)/4/stddev)**2)
# the data
m = modeling.models.Gaussian1D(amplitude=10, mean=100, stddev=10)
x = np.linspace(0, 400, 400)
data = m(x)
# fitting
popt, _ = optimize.curve_fit(gaussian, x, data, p0 = [10, 100, 10])
plt.figure(0)
plt.plot(x, data)
plt.plot(x, gaussian(x, *popt))
plt.show()
and you get: gaussian fit
you can make sure the fit works by adding some noise to your data, e.g.:
from astropy import modeling
import matplotlib.pyplot as plt
from scipy import optimize
import numpy as np
def gaussian(x, amplitude, mean, stddev):
return amplitude * np.exp(-((x - mean)/4/stddev)**2)
# the data
m = modeling.models.Gaussian1D(amplitude=10, mean=100, stddev=10)
x = np.linspace(0, 400, 400)
data = m(x)
noise = np.random.normal(len(m))
data = data + noise
# fitting
popt, _ = optimize.curve_fit(gaussian, x, data, p0 = [10, 100, 10])
plt.figure(0)
plt.plot(x, data, 'o', label = 'data')
plt.plot(x, gaussian(x, *popt), label = 'fit')
plt.legend()
plt.show()
and you get: fit to noisy gaussian

Related

Not able to replicate curve fitting of a gaussian function in python using curve_fit()

I am trying to fit a Gaussian function to my dataset using scipy's curve_fit() function and have failed to get the function to fit. I tried the same using some other tools like Matlab and the function readily fits. Could someone please help me out here? I am not sure what I am doing wrong. Thanks a lot for any help :)
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
x_data = [12, 34, 56]
y_data = [1e-10, 1e-3, 1e-10]
def func(xdata, a, b, c):
return a*np.exp(-(xdata - b)**2/(2*c**2))
popt,_ = curve_fit(func, x_data, y_data)
x_fit = np.linspace(0,100, 100)
y_fit = func(x_fit, *popt)
plt.scatter(x_data, y_data)
plt.plot(x_fit,y_fit)
plt.show()
The above is the code I have tried and I get a bell-curve that is refusing to move from mean point of 0 (the bell part is over x=0).

It fits fine so long as you give it sane initial conditions:
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
x_data = [12, 34, 56]
y_data = [1e-10, 1e-3, 1e-10]
def func(xdata: np.ndarray, a: float, b: float, c: float) -> np.ndarray:
return a*np.exp(-(xdata - b)**2/(2*c**2))
popt, _ = curve_fit(f=func, xdata=x_data, ydata=y_data, p0=[1e-3, 34, 10])
print(popt)
x_fit = np.linspace(0, 100, 100)
y_fit = func(x_fit, *popt)
plt.scatter(x_data, y_data)
plt.plot(x_fit,y_fit)
plt.show()
[1.00000000e-03 3.40000000e+01 3.87481363e+00]

Poor GMM fit in sklearn from 2 gaussian

I want to fit a 2 component mixture model with sklearn for then calculating back posterior probability. Butwith the code I have so far the fit for one of the two distributions is perfect (overfitting?) and other one is very poor. I made a dummy example with sampling 2 gaussian
import numpy as np
from sklearn.mixture import GaussianMixture
import matplotlib.pyplot as plt
def calc_pdf():
"""
calculate gauss mixture modelling for 2 comp
return pdfs
"""
d = np.random.normal(-0.1, 0.07, 5000)
t = np.random.normal(0.2, 0.13, 10000)
pool = np.concatenate([d, t]).reshape(-1,1)
label = ['d']*d.shape[0] + ['t'] * t.shape[0]
X = pool[pool>0].reshape(-1,1)
X = np.log(X)
clf = GaussianMixture(
n_components=2,
covariance_type='full',
tol = 1e-24,
max_iter = 1000
)
logprob = clf.fit(X).score_samples(X)
responsibilities = clf.predict_proba(X)
pdf = np.exp(logprob)
pdf_individual = responsibilities * pdf[:, np.newaxis]
plot_gauss(np.log(d), np.log(t), pdf_individual, X)
return pdf_individual[0], pdf_individual[1]
def plot_gauss(d, t, pdf_individual, x):
fig, ax = plt.subplots(figsize=(12, 9), facecolor='white')
ax.hist(d, 30, density=True, histtype='stepfilled', alpha=0.4)
ax.hist(t, 30, density=True, histtype='stepfilled', alpha=0.4)
ax.plot(x, pdf_individual, '.')
ax.set_xlabel('$x$')
ax.set_ylabel('$p(x)$')
plt.show()
calc_pdf()
which produces this plot here
Is there something obvious that I am missing?

How to have the best gaussian fit on a histogram plot

I have a histogram and I'm trying to fit the best norm(Gaussian) function as you can see below. the problem is that the gaussian fit isn't the best fit that I expected.
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.mlab as mlab
from astropy.modeling import models, fitting
bins=np.arange(-1,8,0.3)
#Reading data
a18 = np.loadtxt('AndXII18I.srt')
arr18 = np.array(a18[:,11])
axs[0,0].hist(arr18,bins,histtype='step')
axs[0,0].set_xlim([np.min(arr18), np.max(arr18)])
x = np.linspace(-1, bins[len(bins)-2],len(bins)-1)
x1 = np.linspace(-1, 8, 1000)
# guesses for the parameters:
g_init = models.Gaussian1D(1, 0, 1.)
fit_g = fitting.LevMarLSQFitter()
axs[0,0].plot(x1,t18)
axs[0,0].plot(edges18[8],hist18[8],'o')
g18 = fit_g(g_init, x, y18[0])
a18=g18.mean
t18=g18.amplitude*np.exp(-(x1-a18)**2/(2*g18.stddev**2))
plt.show()

Using scipy's solve_ivp to solve non linear pendulum motion

I am still trying to understand how solve_ivp works against odeint, but just as I was getting the hang of it something happened.
I am trying to solve for the motion of a non linear pendulum. With odeint, everything works like a charm, on solve_ivp hoever something weird happens:
import numpy as np
from matplotlib import pyplot as plt
from scipy.integrate import solve_ivp, odeint
g = 9.81
l = 0.1
def f(t, r):
omega = r[0]
theta = r[1]
return np.array([-g / l * np.sin(theta), omega])
time = np.linspace(0, 10, 1000)
init_r = [0, np.radians(179)]
results = solve_ivp(f, (0, 10), init_r, method="RK45", t_eval=time) #??????
cenas = odeint(f, init_r, time, tfirst=True)
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.plot(results.t, results.y[1])
ax1.plot(time, cenas[:, 1])
plt.show()
What am I missing?

It is a numerical problem. The default relative and absolute tolerances of solve_ivp are 1e-3 and 1e-6, respectively. For many problems, these values are too big, and tighter error tolerances should be given. The default relative tolerance for odeint is 1.49e-8.
If you add the argument rtol=1e-8 to the solve_ivp call, the plots agree:
import numpy as np
from matplotlib import pyplot as plt
from scipy.integrate import solve_ivp, odeint
g = 9.81
l = 0.1
def f(t, r):
omega = r[0]
theta = r[1]
return np.array([-g / l * np.sin(theta), omega])
time = np.linspace(0, 10, 1000)
init_r = [0, np.radians(179)]
results = solve_ivp(f, (0, 10), init_r, method='RK45', t_eval=time, rtol=1e-8)
cenas = odeint(f, init_r, time, tfirst=True)
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.plot(results.t, results.y[1])
ax1.plot(time, cenas[:, 1])
plt.show()
Plot:

How does one estimate error of extrapolated value with only 2 data points?

I am trying to do a simple linear regression. I have only two data points with errors. How do I estimate the y-intercept with errors?
Currently I have:
#! /usr/bin/env python
import sys,os
from scipy.optimize import curve_fit
import numpy as np
import matplotlib.pyplot as plt
xdata = [25, 33]
ydata = [-279.430059,-279.450271]
yerr = [0.0021, 0.0019]
def linear(x, a,b):
y = a*x + b
return y
## Pol fit
initial=[0.1, min(ydata)]
popt, pcov = curve_fit(linear, xdata, ydata, sigma=yerr,p0=initial)
print("Params", popt, np.sqrt(np.diag(pcov)))
x=np.linspace(xdata[0],xdata[-1],200)
fig, ax = plt.subplots()
plt.plot(xdata, ydata, 'o')
plt.plot(x, linear(x, *popt), '--', lw=1, label="Linear Func")
plt.show()
OptimizeWarning: Covariance of the parameters could not be estimated
category=OptimizeWarning)
('Params', array([-2.52650000e-03, -2.79366897e+02]), array([inf, inf]))
Preferably looking for something that will generalize to more data points.

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Fail to fit a normal distribution with Python. scipy package defects? - python-3.x

Related

Not able to replicate curve fitting of a gaussian function in python using curve_fit()

Poor GMM fit in sklearn from 2 gaussian

How to have the best gaussian fit on a histogram plot

Using scipy's solve_ivp to solve non linear pendulum motion

How does one estimate error of extrapolated value with only 2 data points?

Categories

Resources