I'm currently trying to curve fit some experimental data to a simple power-law equation.
Nu = C*Re**m*Pr**(1/3)
I am trying to use the scipy.optimize.curve_fit function to do this, but am getting the error code: "Result from function call is not a proper array of floats." I don't know why I am getting this error code but I wonder if it is because I have too many arrays that I need to use for my equation.
My code is as follows
import matplotlib.pyplot as plt
import scipy.optimize as so
def function(C, m):
result = []
for i,j in zip(Re, Pr):
y = C * i ** m * j ** (1/3)
result.append(y)
return result
parameters, covariance = so.curve_fit(function, Re, Nu)
y2 = function(Re, Pr, *parameters)
print(parameters)
plt.plot(Re, Nu)
plt.plot(Re, y2)
plt.show()
Here is a graphing 3D surface fitter using curve_fit that has a 3D scatterplot, 3D surface plot, and a contour plot. Note that the initial parameter estimates are all 1.0, and this example does not use scipy's genetic algorithm to estimate initial parameter values.
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def func(data, a, alpha, beta):
t = data[0]
p_p = data[1]
return a * (t**alpha) * (p_p**beta)
if __name__ == "__main__":
xData = numpy.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
yData = numpy.array([11.0, 12.1, 13.0, 14.1, 15.0, 16.1, 17.0, 18.1, 90.0])
zData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.0, 9.9])
data = [xData, yData, zData]
initialParameters = [1.0, 1.0, 1.0] # these are the same as scipy default values in this example
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
Related
Am unable to get the proper plot for this, even though i set index 15,15 as 100 , still i get plot with all zero. Can someone help?
import matplotlib.pyplot as plt
import numpy as np
# Make data.
X = np.arange(0, 512, 1)
Y = np.arange(0, 512, 1)
X, Y = np.meshgrid(X, Y)
Z = np.zeros((512, 512), dtype=float)
Z[15,15]=10
# Plot the surface.
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111, projection='3d')
# Plot a 3D surface
ax.plot_surface(X, Y, Z)
plt.show()
This is a scatter plot which illustrates the relationship between two variables:
It is obvious that this is a non-linear relationship. Both variables are time-series - the points are different observations.
How can I fit a curve (in python) that would approximate it?
EDIT:
Note that this is not a 2D relationship (as JamesPhilips pointed out below).
As I mentioned, these are two time series. I guess the correct thing to do would be to go for a 3D fit (including the time as a third dimension). So the function would take two inputs (x and time). How to do that?
EDIT2:
I'm attaching a sample of that dataset here
EDIT3:
I am fortunate to have received two high quality answers by norok2 and JamesPhilips (many thanks to both of them!) and I will be exploring these. However, my impression is that none of the ideas proposed so far is making significant use of the fact that these are time series. My intuition is that there is some signal there (I know, not having the time stamps is making things complicated). So I will keep the question open for a while in case someone wants to chip in some other ideas.
Also, it seems that the dataset I put a link to was sorted not by using the original index (my bad!) - I am putting a link to the correctly sorted dataset here.
According to the question, the two columns x and y are two timeseries, i.e. x(t) and y(t). The t parameter is represented by the index
First, let's load the data:
import io
import requests
import numpy as np
import scipy as sp
import matplotlib as mpl
import scipy.interpolate
import scipy.ndimage
import matplotlib.pyplot as plt
file_id = '1q4zY7B-BwG8bmbQJT3QvRt6B2MD4k0a0'
url = requests.get('https://drive.google.com/uc?export=download&id=' + file_id)
csv_file = io.StringIO(url.text)
data = np.loadtxt(csv_file, delimiter=',')
x = data[:, 0]
y = data[:, 1]
t = np.arange(len(x))
Now, y(x) may not, in general, be well defined. A more useful representation of the data is obtained by plotting x(t) and y(t) (perhaps along y(x)):
fig, ax = plt.subplots(1, 3, figsize=(15, 4), squeeze=False)
ax[0, 0].scatter(t, x, color='k', s=8.0)
ax[0, 1].scatter(t, y, color='k', s=8.0)
ax[0, 2].scatter(x, y, color='k', s=8.0)
ax[0, 0].plot(t, x, color='b')
ax[0, 1].plot(t, y, color='b')
ax[0, 2].plot(x, y, color='b')
Note that while the y(x) visualization gets two clustering, i.e. a stretched spiral and a straight line, without further information, this observation should not be over-interpreted.
Now, without a model to fit, what we could do is to have an interpolant numerical function for x(t) and y(t).
If x(t) and y(t) are assumed to be noiseless, then a simple 1D interpolator, as provided by scipy.interpolate.interp1d():
func_x_t = sp.interpolate.interp1d(t, x, kind='cubic', assume_sorted=True)
func_y_t = sp.interpolate.interp1d(t, y, kind='cubic', assume_sorted=True)
x_interp = func_x_t(t)
y_interp = func_y_t(t)
fig, ax = plt.subplots(1, 3, figsize=(15, 4), squeeze=False)
ax[0, 0].scatter(t, x, color='k', s=8.0)
ax[0, 1].scatter(t, y, color='k', s=8.0)
ax[0, 2].scatter(x, y, color='k', s=8.0)
ax[0, 0].plot(t, x_interp, color='r')
ax[0, 1].plot(t, y_interp, color='r')
ax[0, 2].plot(x_interp, y_interp, color='r')
Note that the red line is now generated by the interpolator. SciPy offers a variety of different interpolator which may be worth exploring.
If x(t) and y(t) are noisy measurements, a more useful interpolator may be obtained as above, but using a de-noised x(t) and y(t). Here, I assume that the high-frequency oscillations observed are driven by noise (both in x(t) and in y(t)), and a simple but effective de-noising approach would be Gaussian filtering (as provided by scipy.ndimage.gaussian_filter1d():
smooth_x = sp.ndimage.gaussian_filter1d(x, 12.0, mode='nearest')
smooth_y = sp.ndimage.gaussian_filter1d(y, 12.0, mode='nearest')
func_x_t = sp.interpolate.interp1d(t, smooth_x, kind='cubic', assume_sorted=True)
func_y_t = sp.interpolate.interp1d(t, smooth_y, kind='cubic', assume_sorted=True)
x_smooth_interp = func_x_t(t)
y_smooth_interp = func_y_t(t)
fig, ax = plt.subplots(1, 3, figsize=(15, 4), squeeze=False)
ax[0, 0].scatter(t, x, color='k', s=8.0)
ax[0, 1].scatter(t, y, color='k', s=8.0)
ax[0, 2].scatter(x, y, color='k', s=8.0)
ax[0, 0].plot(t, smooth_x, color='g')
ax[0, 1].plot(t, smooth_y, color='g')
ax[0, 2].plot(smooth_x, smooth_y, color='g')
ax[0, 0].plot(t, x_smooth_interp, color='r')
ax[0, 1].plot(t, y_smooth_interp, color='r')
ax[0, 2].plot(x_smooth_interp, y_smooth_interp, color='r')
Note that the *_smooth and *_smooth_interp gets plot on top of each other.
Another approach would be to use artificial neural network, e.g. from scikit-learn:
import sklearn as skl
import sklearn.neural_network as skl_nn
import sklearn.preprocessing
x_train = t.reshape(-1, 1)
y_train = data
reg = skl_nn.MLPRegressor(
solver='adam', hidden_layer_sizes=(24, 8), activation='tanh',
learning_rate='adaptive', max_iter=1024)
reg.fit(x_train, y_train)
y_predict = reg.predict(x_train)
x_ann = y_predict[:, 0]
y_ann = y_predict[:, 1]
fig, ax = plt.subplots(1, 3, figsize=(15, 4), squeeze=False)
ax[0, 0].scatter(t, x, color='k', s=8.0)
ax[0, 1].scatter(t, y, color='k', s=8.0)
ax[0, 2].scatter(x, y, color='k', s=8.0)
ax[0, 0].plot(t, x, color='b')
ax[0, 1].plot(t, y, color='b')
ax[0, 2].plot(x, y, color='b')
ax[0, 0].plot(t, x_ann, color='r')
ax[0, 1].plot(t, y_ann, color='r')
ax[0, 2].plot(x_ann, y_ann, color='r')
This gets you to an interpolator without the need to explicitly de-noise your target signal, which may be more or less desireable, depending on the application.
Re-parametrized x(t') and y(t') with t'(t) (reordering)
If we relax the requirement that x(t) and y(t) are from a timeseries, we could investigate x(t') and y(t') for a given t'(t) transformation.
A possible transformation that results a somewhat interesting is obtained by sorting the CSV data by y (the timeseries are sorted by x):
data = data[data[:, 1].argsort()]
x = data[:, 0]
y = data[:, 1]
with this transformation, we obtain the following interpolator for the ANN approach:
and this for the smoothed x(t') and y(t'):
Possibly, there are more effective reordering, but it may not be simple to formulate them.
A relatively simple formulation may involve clustering, but I believe this answer is already quite long.
(full code available here)
Per the comments, here is a graphical Python surface fitter reading data from the csv file. You should be able to mouse-click-drag and rotate the 3D ploys in 3-space for inspection.
In this example, I have guessed at a simple flat plane equation "csv_column_two = (a * index) + (b * csv_column_one) + c", because the 3D scatterplot and 3D surface plane shows what might be outliers on the left-hand side as plotted. With this example in hand you can easily try variations on data and equation. The fitter also prints the RMSE and R-squared value to aid in model evaluation and comparison.
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('Index') # X axis data label
axes.set_ylabel('CSV file column 1') # Y axis data label
axes.set_zlabel('CSV file column 2') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('Index') # X axis data label
axes.set_ylabel('CSV file column 1') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('Index')
axes.set_ylabel('CSV file column 1')
axes.set_zlabel('CSV file column 2')
plt.show()
plt.close('all') # clean up after using pyplot or else there can be memory and process problems
def func(data, a, b, c):
x = data[0]
y = data[1]
return (a * x) + (b * y) + c
if __name__ == "__main__":
filename = 'test_bfa_corr.csv'
filetext = open(filename, 'rt').read()
lines = filetext.split('\n')
xData = []
yData = []
zData = []
for i in range(len(lines)):
line = lines[i]
spl = line.split(',')
xData.append(i+1)
yData.append(spl[0])
zData.append(spl[1])
xData = numpy.array(xData, dtype=float)
yData = numpy.array(yData, dtype=float)
zData = numpy.array(zData, dtype=float)
data = [xData, yData, zData]
initialParameters = [1.0, 1.0, 1.0] # these are the same as scipy default values in this example
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted parameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
I have some experimental data which needs to be fitted so we can elucidate x value for certain y value.
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.interpolate import interp1d
#from xlrd import open_workbook
points = np.array([(0, -0.0142294), (20, 0.0308458785714286), (50,
0.1091054), (100
,0.2379176875), (200, 0.404354166666667)])
x = points[:,0]
y = points[:,1]
def func(x, p1,p2):
return p1*(1-np.e**(-p2*x))
popt, pcov = curve_fit(func, x, y)
p1 = popt[0]
p2 = popt[1]
curvex=np.linspace(0,200,1000)
fit = func(curvex, p1, p2)
plt.plot(x, y, 'yo', label='data')
f = interp1d(fit, curvex, kind = 'nearest')
print (f(100))
plt.plot(curvex,fit,'r', linewidth=1)
plt.plot(x,y,'x',label = 'Xsaved')
plt.show()
Data is not fitted correctly. Help would be much appreciated.
Here is an example graphical fitter using your data and equation, with scipy's differential_evolution genetic algorithm used to supply initial parameter estimates. The scipy implementation of Differential Evolution ises the Latin Hypercube algorithm to ensure a thorough search of parameter space, and this requires bounds within which to search. In this example I have used the data maximum and minimum values as search bounds, this seems to work in this case. Note that it is much easier to find ranges within which to search than specific values.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import warnings
points = numpy.array([(0, -0.0142294), (20, 0.0308458785714286), (50, 0.1091054), (100 ,0.2379176875), (200, 0.404354166666667)])
x = points[:,0]
y = points[:,1]
# rename to match previous example code below
xData = x
yData = y
def func(x, p1,p2):
return p1*(1-numpy.exp(-p2*x))
# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
val = func(xData, *parameterTuple)
return numpy.sum((yData - val) ** 2.0)
def generate_Initial_Parameters():
# min and max used for bounds
maxX = max(xData)
minX = min(xData)
maxY = max(yData)
minY = min(yData)
minAllData = min(minX, minY)
maxAllData = min(maxX, maxY)
parameterBounds = []
parameterBounds.append([minAllData, maxAllData]) # search bounds for p1
parameterBounds.append([minAllData, maxAllData]) # search bounds for p2
# "seed" the numpy random number generator for repeatable results
result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
return result.x
# by default, differential_evolution completes by calling curve_fit() using parameter bounds
geneticParameters = generate_Initial_Parameters()
# now call curve_fit without passing bounds from the genetic algorithm,
# just in case the best fit parameters are aoutside those bounds
fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Fitted parameters:', fittedParameters)
print()
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print()
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
I would like to fit a function to a 3d data.
I read the data with pandas:
df = pd.read_csv('data.csv')
Ca = df.Ca
q = df.q
L = df.L0
Then, I define my 3d function (z=f(x,y)) as:
def func(q, Ca, l0, v0, beta):
return l0 + q*v0*(1+beta/(q*Ca))
then I use curve_fit to find the best fit parameters:
from scipy.optimize import curve_fit
guess = (1,1,1)
popt, pcov = curve_fit(func, q,Ca,L, guess)
And it gives me the following errors:
ValueError: `sigma` has incorrect shape.
Do you know what is the mistake and how to solve it?
Thanks a lot for your help
Here is a graphical 3D fitter with 3D scatter plot, 3D surface plot, and 3D contour plot.
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def func(data, a, alpha, beta):
x = data[0]
y = data[1]
return a * (x**alpha) * (y**beta)
if __name__ == "__main__":
xData = numpy.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
yData = numpy.array([11.0, 12.1, 13.0, 14.1, 15.0, 16.1, 17.0, 18.1, 90.0])
zData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.0, 9.9])
data = [xData, yData, zData]
initialParameters = [1.0, 1.0, 1.0] # these are the same as scipy default values in this example
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
I was given a set of raw datum and have to model it by means of some machine learning techniques. After some research, I decided to do with the method of linear approximation.
Description of the equation.
z - depth (meters)
T(z) - temperature at the depth z
T(zᵢ) - temperature at the depth zᵢ
T₀ - temperature at the surface (It is constant and known)
K - coefficient of geothermal gradient (How the temperature changes with respect to the depth)
Mᵢ - flow rate of the liquid at the depth zᵢ
As it is shown from the equation we can find the temperature of the liquid in the any depth of the well bore.
I have list of depths, temperature and the flow rate of the liquid. I have to model an equation according to these datum by means of python3. Currently I use matplotlib library for such type of calculations.
Here is an example of non-linear multiple regression in Python 3, this should easily be adapted to your multiple regression problem.
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def func(data, a, alpha, beta):
t = data[0]
p_p = data[1]
return a * (t**alpha) * (p_p**beta)
if __name__ == "__main__":
xData = numpy.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
yData = numpy.array([11.0, 12.1, 13.0, 14.1, 15.0, 16.1, 17.0, 18.1, 90.0])
zData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.0, 9.9])
data = [xData, yData, zData]
# this example uses curve_fit()'s default initial paramter values
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)