Can't seem to get Scipy curve_fit Sigmoid to work in Python - python-3.x

There are a few posts about this and normally the answer is to have a good initial guess and bounds. I've played around with it for a while and cannot find a configuration that produces any sort of curve.
import numpy as np
array1 = np.array(column1).astype(float)
array2 = np.array(column2).astype(float)
print(array1)
print(array2)
Output:
[18.7327 9.3784 6.6293 20.8361 11.2603 19.3706 5.4302 10.1293 13.7516
8.0567 16.8688 4.969 3.94 19.4793 11.7527 13.2811 13.338 0.5944
7.4406 11.2338 6.2283 3.4818 10.1056 16.2689 22.442 18.7345 5.2605
5.6405 12.7186 18.2497 5.4315 14.2651 16.7544 12.9192 13.5955 10.9256
5.7798 8.4485 8.5229 11.879 6.5271 10.3376 7.781 31.4558 8.0236
2.3527 10.8926 16.1995 11.1924 25.8071 13.9692 20.7791 10.3045 12.2833
7.4066 15.9807 11.4462 15.1504 5.9021 19.1184]
[83.85 52.45 41.2 92.59 62.65 86.77 30.63 53.78 73.34 48.55 82.53 28.3
23.87 90.99 62.95 68.82 71.06 20.74 45.25 60.65 39.07 21.93 53.35 79.61
93.27 85.88 28.95 32.73 65.89 83.51 30.74 75.22 79.8 67.43 71.12 58.41
35.83 49.61 50.72 63.49 40.67 55.75 46.49 96.22 47.62 21.8 56.23 76.97
59.07 94.67 74.9 92.52 55.61 63.51 41.34 76.8 62.81 75.99 36.34 85.96]
import pylab
from scipy.optimize import curve_fit
def sigmoid(x, a, b):
y = 1 / (1 + np.exp(-b*(x-a)))
return y
popt, pcov = curve_fit(sigmoid, array1, array2, p0 = [5,20], method='dogbox', bounds=([0, 20],[40, 100]))
print(popt)
x = np.linspace(0, 35, 50)
y = sigmoid(x, *popt)
pylab.plot(array1, array2, 'o', label='data')
pylab.plot(x,y, label='fit')
pylab.ylim(0, 100)
pylab.legend(loc='best')
pylab.show()
Output:
Graph
As you can see it just not doing anything at all. Would really appreciate any help on this to get a rough sigmoid curve. Doesn't need to be super accurate.
Many Thanks.

In your case, the problem wasn't a good initial guess, but an inappropriate model. Note how your sigmoid cannot be larger than 1, yet your data is in the range of ~10 - 100.
xs = np.linspace(0, 15)
as_ = np.linspace(0, 5, num=10)
bs_ = np.linspace(0, 5, num=10)
for a in as_:
for b in bs_:
plt.plot(xs, sigmoid(xs, a, b))
Therefore, you either have to modify your model to accept a scaling parameter, or scale down your data to a range your model can fit. Here's the two solutions:
Preamble
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import pandas as pd
array1 = np.array([18.7327,9.3784,6.6293,20.8361,11.2603,19.3706,5.4302,10.1293,13.7516,8.0567,16.8688,4.969,3.94,19.4793,11.7527,13.2811,13.338,0.5944,7.4406,11.2338,6.2283,3.4818,10.1056,16.2689,22.442,18.7345,5.2605,5.6405,12.7186,18.2497,5.4315,14.2651,16.7544,12.9192,13.5955,10.9256,5.7798,8.4485,8.5229,11.879,6.5271,10.3376,7.781,31.4558,8.0236,2.3527,10.8926,16.1995,11.1924,25.8071,13.9692,20.7791,10.3045,12.2833,7.4066,15.9807,11.4462,15.1504,5.9021,19.1184])
array2 = np.array([83.85,52.45,41.2,92.59,62.65,86.77,30.63,53.78,73.34,48.55,82.53,28.3,23.87,90.99,62.95,68.82,71.06,20.74,45.25,60.65,39.07,21.93,53.35,79.61,93.27,85.88,28.95,32.73,65.89,83.51,30.74,75.22,79.8,67.43,71.12,58.41,35.83,49.61,50.72,63.49,40.67,55.75,46.49,96.22,47.62,21.8,56.23,76.97,59.07,94.67,74.9,92.52,55.61,63.51,41.34,76.8,62.81,75.99,36.34,85.96])
df = pd.DataFrame({'x':array1, 'y':array2})
df = df.sort_values('x')
Scaling data to match parameter
def sigmoid(x, a, b):
y = 1 / (1 + np.exp(-b*(x-a)))
return y
popt, pcov = curve_fit(sigmoid, df['x'], df['y'] / df['y'].max(), p0 = [5,20], method='dogbox', bounds=([0, 0],[40, 100]))
plt.plot(df['x'], df['y'] / df['y'].max(), label='data')
plt.plot(df['x'], sigmoid(df['x'], *popt))
popt is [8.56754823 0.20609918]
Adding new parameter to function
def sigmoid2(x, a, b, scale):
y = scale / (1 + np.exp(-b*(x-a)))
return y
popt, pcov = curve_fit(sigmoid2, df['x'], df['y'], p0 = [5,20, 100], method='dogbox', bounds=([0, 0, 0],[40, 100, 1E5]))
plt.plot(df['x'], df['y'], label='data')
plt.plot(df['x'], sigmoid2(df['x'], *popt))
popt is array([ 8.81708442, 0.19749557, 98.357044 ])

Related

Fitting logistic function giving strainght line in python?

I am trying to fit a logistic function to the data below. I am not able to understand why I get a straight line with fitted parameters? Can someone please help me with this?
x = [473,523,573,623,673]
y = [104,103,95,79,83]
x = np.array(x)
y = np.array(y)
def f(x,a,b,c,d):
return a/(1+np.exp(-c*(x-d))) + b
popt, pcov = opt.curve_fit(f,x,y,method="trf")
y_fit = f(x,*popt)
plt.plot(x,y,'o')
plt.plot(x,y_fit,'-')
plt.show()
A good choice of initial values can make the difference between a successful optimization and one that fails. For this particular problem I would suggest the initial values that you can find in the code below (p0 list). These values allows to properly estimate the covariance.
Moreover, you don't need to use the trf algorithm since you are not providing any bound (see docs). The Levenberg–Marquardt algorithm is the right tool for this case.
import numpy as np
from matplotlib import pyplot as plt
from scipy.optimize import curve_fit
x = [473, 523, 573, 623, 673]
y = [104, 103, 95, 79, 83]
x = np.array(x)
y = np.array(y)
def f(x, a, b, c, d):
return a / (1 + np.exp(-c * (x - d))) + b
p0 = [20.0, 0.0, 0.0, 0.0]
popt, pcov = curve_fit(f, x, y, p0=p0)
y_fit = f(x, *popt)
plt.plot(x, y, "o")
plt.plot(x, y_fit, "-")
plt.show()
If possible I would also suggest to increase the number of observations.

plotting of 3-d softmax function using matplotlib

I would like to illustrate a detailed function similar to the softmax function.
The formula is as shown in the image.
I wrote it in python based on the following blog. Sorry, written in Japanese.
https://www.anarchive-beta.com/entry/2020/06/07/180000
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib
from mpl_toolkits.mplot3d.axes3d import Axes3D
%matplotlib notebook
def softmax(x, a, b, d):
x = x - np.max(x, axis=-1, keepdims=True)
return (np.exp(a * (x - d))*b / np.sum(np.exp(a * (x - d)), axis=-1, keepdims=True))
# input
x_vals = np.arange(0, 10, 0.01)
x2_vals = np.arange(0, 10, 0.01)
X0_vals, X1_vals = np.meshgrid(x_vals, x2_vals)
X_vals = np.array([
X0_vals.flatten(),
X1_vals.flatten()
]).T
print(X_vals)
print(X_vals[:5])
print(X_vals.shape)
input_shape = X0_vals.shape
print(input_shape)
Y_vals = softmax(X_vals, 12, 0.8, [10,10])
print(np.round(Y_vals[:5], 3))
print(np.sum(Y_vals[:5], axis=1))
Y0_vals = np.array(Y_vals[:, 0]).reshape(input_shape)
fig = plt.figure(figsize=(5, 5))
ax = Axes3D(fig)
ax.plot_wireframe(X0_vals, X1_vals, Y0_vals, label='$y_0$')
ax.set_xlabel('$x_0$')
ax.set_ylabel('$x_1$')
ax.set_zlabel('$y_0$')
ax.set_title('Softmax Function', fontsize=20)
ax.legend()
ax.set_zlim(0, 1)
ax.view_init(elev=20, azim=240)
plt.show()
Y_vals = softmax(X_vals, 12, 0.8, 0)
print(np.round(Y_vals[:5], 3))
print(np.sum(Y_vals[:5], axis=1))
Y0_vals = np.array(Y_vals[:, 0]).reshape(input_shape)
fig = plt.figure(figsize=(5, 5))
ax = Axes3D(fig)
ax.plot_wireframe(X0_vals, X1_vals, Y0_vals, label='$y_0$')
ax.set_xlabel('$x_0$')
ax.set_ylabel('$x_1$')
ax.set_zlabel('$y_0$')
ax.set_title('Softmax Function', fontsize=20)
ax.legend()
ax.set_zlim(0, 1)
ax.view_init(elev=20, azim=240)
plt.show()
(1)
I try to plot the picture with a_1 = a_2 = 12, b = 0.8, c_1 = c_2 = 12. However, I could not find the differences with a_1 = a_2 = 12, b = 0.8, c_1 = c_2 = 0.
How should I write the code?
(2)
I have no idea to plot when a_1 is not equal to a_2, or c_1 is not equal to c_2.
(3)
I would like to overlap a scatterplot of any point on a function, but it does not overlap properly.
There might be similar questions, but I'm not so familiar with the 3-D plot, so I would be glad to show me the details.

Find the intersaction of two equations

I would like to find the intersection between (eq1, eq2) and (eq1, eq3) and show that point with the dotted line on each axis. This code does not give me the exact point but just an approximation. I do not understand where am I doing mistake.
import matplotlib.pyplot as plt
import numpy as np
f = []
h = []
j = []
point = []
for x in range(25):
eq1 = x * 185 * 3
eq2 = 11930 - (12502 / 6) + (x * 185) / 6
eq3 = 11930 - (12502 / 3) + (x * 185) / 6
point.append(x)
f.append(eq1)
h.append(eq2)
j.append(eq3)
plt.plot(point, f)
plt.plot(point, h)
plt.plot(point, j)
plt.legend(loc='lower right', fontsize=10)
idx1 = np.argwhere(np.diff(np.sign(np.array(f) - np.array(h)))).flatten()
idx2 = idx = np.argwhere(np.diff(np.sign(np.array(f) - np.array(j)))).flatten()
plt.plot(np.array(point)[idx1+1], np.array(h)[idx1+1], 'ro')
plt.plot(np.array(point)[idx2+1], np.array(j)[idx2+1], 'ro')
plt.show()
Several issues here:
Firstly, your code is unnecessarily long. Make use of NumPy arrays to simplify things. Since NumPy is a dependency of matplotlib, you are not overkilling by importing NumPy.
You need to make a very dense mesh of points between 0 and 25 to get more accurate intersection points. Use linspace with 1000 points for example.
As you can see, with arrays, you don't need to use for loop, neither you need to initialize empty lists and then append values one by one.
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(0, 25, 1000)
f = x * 185 * 3
h = 11930 - (12502 / 6) + (x * 185) / 6
j = 11930 - (12502 / 3) + (x * 185) / 6
plt.plot(x, f, label='f')
plt.plot(x, h, label='h')
plt.plot(x, j, label='j')
plt.legend(loc='lower right', fontsize=12)
idx1 = np.argwhere(np.diff(np.sign(np.array(f) - np.array(h)))).flatten()
idx2 = idx = np.argwhere(np.diff(np.sign(np.array(f) - np.array(j)))).flatten()
plt.plot(x[idx1+1], h[idx1+1], 'ro')
plt.plot(x[idx2+1], j[idx2+1], 'ro')
plt.vlines(x[idx1+1], 0, h[idx1+1], linestyle='--')
plt.vlines(x[idx2+1], 0, j[idx2+1], linestyle='--')
plt.hlines(h[idx1+1], 0, x[idx1+1], linestyle='--')
plt.hlines(j[idx2+1], 0, x[idx2+1], linestyle='--')
plt.xlim(0, None)
plt.ylim(0, None)
plt.show()

Poor GMM fit in sklearn from 2 gaussian

I want to fit a 2 component mixture model with sklearn for then calculating back posterior probability. Butwith the code I have so far the fit for one of the two distributions is perfect (overfitting?) and other one is very poor. I made a dummy example with sampling 2 gaussian
import numpy as np
from sklearn.mixture import GaussianMixture
import matplotlib.pyplot as plt
def calc_pdf():
"""
calculate gauss mixture modelling for 2 comp
return pdfs
"""
d = np.random.normal(-0.1, 0.07, 5000)
t = np.random.normal(0.2, 0.13, 10000)
pool = np.concatenate([d, t]).reshape(-1,1)
label = ['d']*d.shape[0] + ['t'] * t.shape[0]
X = pool[pool>0].reshape(-1,1)
X = np.log(X)
clf = GaussianMixture(
n_components=2,
covariance_type='full',
tol = 1e-24,
max_iter = 1000
)
logprob = clf.fit(X).score_samples(X)
responsibilities = clf.predict_proba(X)
pdf = np.exp(logprob)
pdf_individual = responsibilities * pdf[:, np.newaxis]
plot_gauss(np.log(d), np.log(t), pdf_individual, X)
return pdf_individual[0], pdf_individual[1]
def plot_gauss(d, t, pdf_individual, x):
fig, ax = plt.subplots(figsize=(12, 9), facecolor='white')
ax.hist(d, 30, density=True, histtype='stepfilled', alpha=0.4)
ax.hist(t, 30, density=True, histtype='stepfilled', alpha=0.4)
ax.plot(x, pdf_individual, '.')
ax.set_xlabel('$x$')
ax.set_ylabel('$p(x)$')
plt.show()
calc_pdf()
which produces this plot here
Is there something obvious that I am missing?

Draw curves with triple colors and width by using matplotlib and LineCollection [duplicate]

The figure above is a great artwork showing the wind speed, wind direction and temperature simultaneously. detailedly:
The X axes represent the date
The Y axes shows the wind direction(Southern, western, etc)
The variant widths of the line were stand for the wind speed through timeseries
The variant colors of the line were stand for the atmospheric temperature
This simple figure visualized 3 different attribute without redundancy.
So, I really want to reproduce similar plot in matplotlib.
My attempt now
## Reference 1 http://stackoverflow.com/questions/19390895/matplotlib-plot-with-variable-line-width
## Reference 2 http://stackoverflow.com/questions/17240694/python-how-to-plot-one-line-in-different-colors
def plot_colourline(x,y,c):
c = plt.cm.jet((c-np.min(c))/(np.max(c)-np.min(c)))
lwidths=1+x[:-1]
ax = plt.gca()
for i in np.arange(len(x)-1):
ax.plot([x[i],x[i+1]], [y[i],y[i+1]], c=c[i],linewidth = lwidths[i])# = lwidths[i])
return
x=np.linspace(0,4*math.pi,100)
y=np.cos(x)
lwidths=1+x[:-1]
fig = plt.figure(1, figsize=(5,5))
ax = fig.add_subplot(111)
plot_colourline(x,y,prop)
ax.set_xlim(0,4*math.pi)
ax.set_ylim(-1.1,1.1)
Does someone has a more interested way to achieve this? Any advice would be appreciate!
Using as inspiration another question.
One option would be to use fill_between. But perhaps not in the way it was intended. Instead of using it to create your line, use it to mask everything that is not the line. Under it you can have a pcolormesh or contourf (for example) to map color any way you want.
Look, for instance, at this example:
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import interp1d
def windline(x,y,deviation,color):
y1 = y-deviation/2
y2 = y+deviation/2
tol = (y2.max()-y1.min())*0.05
X, Y = np.meshgrid(np.linspace(x.min(), x.max(), 100), np.linspace(y1.min()-tol, y2.max()+tol, 100))
Z = X.copy()
for i in range(Z.shape[0]):
Z[i,:] = c
#plt.pcolormesh(X, Y, Z)
plt.contourf(X, Y, Z, cmap='seismic')
plt.fill_between(x, y2, y2=np.ones(x.shape)*(y2.max()+tol), color='w')
plt.fill_between(x, np.ones(x.shape) * (y1.min() - tol), y2=y1, color='w')
plt.xlim(x.min(), x.max())
plt.ylim(y1.min()-tol, y2.max()+tol)
plt.show()
x = np.arange(100)
yo = np.random.randint(20, 60, 21)
y = interp1d(np.arange(0, 101, 5), yo, kind='cubic')(x)
dv = np.random.randint(2, 10, 21)
d = interp1d(np.arange(0, 101, 5), dv, kind='cubic')(x)
co = np.random.randint(20, 60, 21)
c = interp1d(np.arange(0, 101, 5), co, kind='cubic')(x)
windline(x, y, d, c)
, which results in this:
The function windline accepts as arguments numpy arrays with x, y , a deviation (like a thickness value per x value), and color array for color mapping. I think it can be greatly improved by messing around with other details but the principle, although not perfect, should be solid.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
x = np.linspace(0,4*np.pi,10000) # x data
y = np.cos(x) # y data
r = np.piecewise(x, [x < 2*np.pi, x >= 2*np.pi], [lambda x: 1-x/(2*np.pi), 0]) # red
g = np.piecewise(x, [x < 2*np.pi, x >= 2*np.pi], [lambda x: x/(2*np.pi), lambda x: -x/(2*np.pi)+2]) # green
b = np.piecewise(x, [x < 2*np.pi, x >= 2*np.pi], [0, lambda x: x/(2*np.pi)-1]) # blue
a = np.ones(10000) # alpha
w = x # width
fig, ax = plt.subplots(2)
ax[0].plot(x, r, color='r')
ax[0].plot(x, g, color='g')
ax[0].plot(x, b, color='b')
# mysterious parts
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
# mysterious parts
rgba = list(zip(r,g,b,a))
lc = LineCollection(segments, linewidths=w, colors=rgba)
ax[1].add_collection(lc)
ax[1].set_xlim(0,4*np.pi)
ax[1].set_ylim(-1.1,1.1)
fig.show()
I notice this is what I suffered.

Resources