Data fitting with curve_fit not correct - python-3.x

I have some experimental data which needs to be fitted so we can elucidate x value for certain y value.
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.interpolate import interp1d
#from xlrd import open_workbook
points = np.array([(0, -0.0142294), (20, 0.0308458785714286), (50,
0.1091054), (100
,0.2379176875), (200, 0.404354166666667)])
x = points[:,0]
y = points[:,1]
def func(x, p1,p2):
return p1*(1-np.e**(-p2*x))
popt, pcov = curve_fit(func, x, y)
p1 = popt[0]
p2 = popt[1]
curvex=np.linspace(0,200,1000)
fit = func(curvex, p1, p2)
plt.plot(x, y, 'yo', label='data')
f = interp1d(fit, curvex, kind = 'nearest')
print (f(100))
plt.plot(curvex,fit,'r', linewidth=1)
plt.plot(x,y,'x',label = 'Xsaved')
plt.show()
Data is not fitted correctly. Help would be much appreciated.

Here is an example graphical fitter using your data and equation, with scipy's differential_evolution genetic algorithm used to supply initial parameter estimates. The scipy implementation of Differential Evolution ises the Latin Hypercube algorithm to ensure a thorough search of parameter space, and this requires bounds within which to search. In this example I have used the data maximum and minimum values as search bounds, this seems to work in this case. Note that it is much easier to find ranges within which to search than specific values.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import warnings
points = numpy.array([(0, -0.0142294), (20, 0.0308458785714286), (50, 0.1091054), (100 ,0.2379176875), (200, 0.404354166666667)])
x = points[:,0]
y = points[:,1]
# rename to match previous example code below
xData = x
yData = y
def func(x, p1,p2):
return p1*(1-numpy.exp(-p2*x))
# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
val = func(xData, *parameterTuple)
return numpy.sum((yData - val) ** 2.0)
def generate_Initial_Parameters():
# min and max used for bounds
maxX = max(xData)
minX = min(xData)
maxY = max(yData)
minY = min(yData)
minAllData = min(minX, minY)
maxAllData = min(maxX, maxY)
parameterBounds = []
parameterBounds.append([minAllData, maxAllData]) # search bounds for p1
parameterBounds.append([minAllData, maxAllData]) # search bounds for p2
# "seed" the numpy random number generator for repeatable results
result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
return result.x
# by default, differential_evolution completes by calling curve_fit() using parameter bounds
geneticParameters = generate_Initial_Parameters()
# now call curve_fit without passing bounds from the genetic algorithm,
# just in case the best fit parameters are aoutside those bounds
fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Fitted parameters:', fittedParameters)
print()
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print()
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData))
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)

Related

A Traceback while drawing the trajectory of gradient descent in 2D using TensorFlow

I want to draw the trajectory of gradient descent in 2D. Unfortunately, I had the following traceback:
InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder_1' with dtype float and shape [?,1]
I am using TensorFlow v 1.13.1, along with Python v 3.6.7 from Google COLAB.
From the below code, I found that the variable target is of class <tf.Tensor 'Placeholder_1:0' shape=(?, 1) dtype=float32>.
I tried to feed it as mentioned feed_dict={features: x, target: y}, but I still have the same traceback.
Here is the code I used for this task:
## BLOCK 1
import tensorflow as tf
import numpy as np
from matplotlib import animation, rc
import matplotlib_utils
from IPython.display import HTML, display_html
import matplotlib.pyplot as plt
%matplotlib inline
## BLOCK 2
tf.reset_default_graph()
# generate model data
N = 1000
D = 3
x = np.random.random((N, D))
w = np.random.random((D, 1))
y = x # w + np.random.randn(N, 1) * 0.20
## Deep Learning steps:
# 1. Get input (features) and true output (target)
features = tf.placeholder(tf.float32, shape=(None, D))
target = tf.placeholder(tf.float32, shape=(None, 1))
weights = tf.get_variable("weights", shape=(D, 1), dtype=tf.float32)
# 2. Compute the "guess" (predictions) based on the features and weights
predictions = features # weights
# 3. Compute the loss based on the difference between the predictions and the target
loss = tf.reduce_mean((target - predictions) ** 2)
# 4. Update the weights (parameters) based on the gradient descent of the loss
optimizer = tf.train.GradientDescentOptimizer(0.1)
step = optimizer.minimize(loss)
s = tf.Session()
s.run(tf.global_variables_initializer())
_, curr_loss, curr_weights = s.run([step, loss, weights],
feed_dict={features: x, target: y})
I am expecting that the following code will run properly (traceback raised while running this code):
## BLOCK 3
# nice figure settings
fig, ax = plt.subplots()
y_true_value = s.run(target)
level_x = np.arange(0, 2, 0.02)
level_y = np.arange(0, 3, 0.02)
X, Y = np.meshgrid(level_x, level_y)
Z = (X - y_true_value[0])**2 + (Y - y_true_value[1])**2
ax.set_xlim(-0.02, 2)
ax.set_ylim(-0.02, 3)
s.run(tf.global_variables_initializer())
ax.scatter(*s.run(target), c='red')
contour = ax.contour(X, Y, Z, 10)
ax.clabel(contour, inline=1, fontsize=10)
line, = ax.plot([], [], lw=2)
# start animation with empty trajectory
def init():
line.set_data([], [])
return (line,)
trajectory = [s.run(predictions)]
# one animation step (make one GD step)
def animate(i):
s.run(step)
trajectory.append(s.run(predictions))
line.set_data(*zip(*trajectory))
return (line,)
anim = animation.FuncAnimation(fig, animate, init_func=init,
frames=100, interval=20, blit=True)
Note: the library matplotlib_utils can be found here!
Example
This is an example in which the code runs perfectly.
If I run the following code instead of the second block, it displays a beautiful gradient descent in 2D.
y_guess = tf.Variable(np.zeros(2, dtype='float32'))
y_true = tf.range(1, 3, dtype='float32')
loss = tf.reduce_mean((y_guess - y_true + 0.5*tf.random_normal([2]))**2)
optimizer = tf.train.RMSPropOptimizer(0.03, 0.5)
step = optimizer.minimize(loss, var_list=y_guess)
This trajectory is something like this:
Adding this black of code will display perfect auto-generator of the trajectory:
## BLOCK 4
try:
display_html(HTML(anim.to_html5_video()))
except (RuntimeError, KeyError):
# In case the build-in renderers are unaviable, fall back to
# a custom one, that doesn't require external libraries
anim.save(None, writer=matplotlib_utils.SimpleMovieWriter(0.001))
Now I want to use my own code (second block of code) to draw such trajectory of gradient descent in 2D.
Pass the feed_dict argument with tf.session.run. Example:
s.run([step, loss, weights], feed_dict={features: x, target: y})
Explanation:
When the operation on the computation graph depends on a placeholder, they must be provided. Operations like s.run(tf.global_variables_initializer()) do not depend on the placeholders, so not passing them does not raise an error.

Curve Fitting multiple x variables

I'm currently trying to curve fit some experimental data to a simple power-law equation.
Nu = C*Re**m*Pr**(1/3)
I am trying to use the scipy.optimize.curve_fit function to do this, but am getting the error code: "Result from function call is not a proper array of floats." I don't know why I am getting this error code but I wonder if it is because I have too many arrays that I need to use for my equation.
My code is as follows
import matplotlib.pyplot as plt
import scipy.optimize as so
def function(C, m):
result = []
for i,j in zip(Re, Pr):
y = C * i ** m * j ** (1/3)
result.append(y)
return result
parameters, covariance = so.curve_fit(function, Re, Nu)
y2 = function(Re, Pr, *parameters)
print(parameters)
plt.plot(Re, Nu)
plt.plot(Re, y2)
plt.show()
Here is a graphing 3D surface fitter using curve_fit that has a 3D scatterplot, 3D surface plot, and a contour plot. Note that the initial parameter estimates are all 1.0, and this example does not use scipy's genetic algorithm to estimate initial parameter values.
import numpy, scipy, scipy.optimize
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt
graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels
# 3D contour plot lines
numberOfContourLines = 16
def SurfacePlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)
axes.scatter(x_data, y_data, z_data) # show data along with plotted surface
axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
axes.set_zlabel('Z Data') # Z axis data label
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ContourPlot(func, data, fittedParameters):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
x_data = data[0]
y_data = data[1]
z_data = data[2]
xModel = numpy.linspace(min(x_data), max(x_data), 20)
yModel = numpy.linspace(min(y_data), max(y_data), 20)
X, Y = numpy.meshgrid(xModel, yModel)
Z = func(numpy.array([X, Y]), *fittedParameters)
axes.plot(x_data, y_data, 'o')
axes.set_title('Contour Plot') # add a title for contour plot
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def ScatterPlot(data):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
matplotlib.pyplot.grid(True)
axes = Axes3D(f)
x_data = data[0]
y_data = data[1]
z_data = data[2]
axes.scatter(x_data, y_data, z_data)
axes.set_title('Scatter Plot (click-drag with mouse)')
axes.set_xlabel('X Data')
axes.set_ylabel('Y Data')
axes.set_zlabel('Z Data')
plt.show()
plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems
def func(data, a, alpha, beta):
t = data[0]
p_p = data[1]
return a * (t**alpha) * (p_p**beta)
if __name__ == "__main__":
xData = numpy.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
yData = numpy.array([11.0, 12.1, 13.0, 14.1, 15.0, 16.1, 17.0, 18.1, 90.0])
zData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.0, 9.9])
data = [xData, yData, zData]
initialParameters = [1.0, 1.0, 1.0] # these are the same as scipy default values in this example
# here a non-linear surface fit is made with scipy's curve_fit()
fittedParameters, pcov = scipy.optimize.curve_fit(func, [xData, yData], zData, p0 = initialParameters)
ScatterPlot(data)
SurfacePlot(func, data, fittedParameters)
ContourPlot(func, data, fittedParameters)
print('fitted prameters', fittedParameters)
modelPredictions = func(data, *fittedParameters)
absError = modelPredictions - zData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(zData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

Multi-variable linear regression using Tensorflow

I am trying to implement multi-varibale linear regression using tensorflow. I have a csv file with 200 rows and 3 columns (features) with the last column as output. Something like this:
I have written the following code:
from __future__ import print_function
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import csv
import pandas
rng = np.random
# Parameters
learning_rate = 0.01
training_epochs = 1000
display_step = 50
I get the data from the file using pandas and store it:
# Training Data
dataframe = pandas.read_csv("Advertising.csv", delim_whitespace=True, header=None)
dataset = dataframe.values
X1,X2,X3,y1 = [],[],[],[]
for i in range(1,len(dataset)):
X = dataset[i][0]
X1.append(np.float32(X.split(",")[1]))
X2.append(np.float32(X.split(",")[2]))
X3.append(np.float32(X.split(",")[3]))
y1.append(np.float32(X.split(",")[4]))
X = np.column_stack((X1,X2))
X = np.column_stack((X,X3))
I assign the placeholders and variables and the linear regression model:
n_samples = len(X1)
#print(n_samples) = 17
# tf Graph Input
X_1 = tf.placeholder(tf.float32, [3, None])
Y = tf.placeholder(tf.float32, [None])
# Set model weights
W1 = tf.Variable(rng.randn(), [n_samples,3])
b = tf.Variable(rng.randn(), [n_samples])
# Construct a linear model
pred = tf.add(tf.matmul(W1, X_1), b)
# Mean squared error
cost = tf.reduce_sum(tf.pow(pred-Y, 2))/(2*n_samples)
# Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Fit all training data
for epoch in range(training_epochs):
for (x1, y) in zip(X, y1):
sess.run(optimizer, feed_dict={X_1: x1, Y: y})
# Display logs per epoch step
if (epoch+1) % display_step == 0:
c = sess.run(cost, feed_dict={X_1: x1, Y: y})
print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), \
"Weights=", sess.run(W1),"b=", sess.run(b))
I get the following error which I am not able to debug:
ValueError: Shape must be rank 2 but is rank 0 for 'MatMul' (op:
'MatMul') with input shapes: [], [3,?].
Can you help me with hot to solve this?
Thanks in advance.
tf.variable doesn't take inputs as you are thinking, the second parameter is not shape. To set the shape of the variable you do this with the initializer (the first parameter). see https://www.tensorflow.org/api_docs/python/tf/Variable
Your code
# Set model weights
W1 = tf.Variable(rng.randn(), [n_samples,3])
b = tf.Variable(rng.randn(), [n_samples])
My suggested change
initial1 = tf.constant(rng.randn(), dtype=tf.float32, shape=[n_samples,3])
initial2 = tf.constant(rng.randn(), dtype=tf.float32, shape=[n_samples,3])
W1 = tf.Variable(initial_value=initial1)
b = tf.Variable(initial_value=initial2)
In answer to the additional issues which arise after fixing the initial question the following code runs - but there still might be some logical error which you need to think about - like your #display logs per epoch step.
from __future__ import print_function
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import csv
import pandas
rng = np.random
# Parameters
learning_rate = 0.01
training_epochs = 1000
display_step = 50
# Training Data
#Created some fake data
dataframe = [[230.1,37.8,69.2,22.1],[2230.1,32.8,61.2,21.1]] #pandas.read_csv("Advertising.csv", delim_whitespace=True, header=None)
dataset = dataframe
X1,X2,X3,y1 = [],[],[],[]
for i in range(0,len(dataset)):
X = dataset[i][0]
X1.append(np.float32(dataset[i][0]))
X2.append(np.float32(dataset[i][1]))
X3.append(np.float32(dataset[i][2]))
y1.append(np.float32(dataset[i][3]))
#X=np.array([X1,X2,X3])
X = np.column_stack((X1,X2,X3)) ##MYEDIT: This combines all three values. If you find you need to stack in a different way then you will need to ensure the shapes below match this shape.
#X = np.column_stack((X,X3))
n_samples = len(X1)
#print(n_samples) = 17
# tf Graph Input
X_1 = tf.placeholder(tf.float32, [ None,3])##MYEDIT: Changed order
Y = tf.placeholder(tf.float32, [None])
# Set model weights
initial1 = tf.constant(rng.randn(), dtype=tf.float32, shape=[3,1]) ###MYEDIT: change order and you are only giving 1 sample at a time with your method of calling
initial2 = tf.constant(rng.randn(), dtype=tf.float32, shape=[3,1])
W1 = tf.Variable(initial_value=initial1)
b = tf.Variable(initial_value=initial2)
mul=tf.matmul(W1, X_1) ##MYEDIT: remove matmul from pred for clarity and shape checking
# Construct a linear model
pred = tf.add(mul, b)
# Mean squared error
cost = tf.reduce_sum(tf.pow(pred-Y, 2))/(2*n_samples)
# Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Fit all training data
for epoch in range(training_epochs):
for (x1, y) in zip(X, y1):
Xformatted=np.array([x1]) #has shape (1,3) #MYEDIT: separated this to demonstrate shapes
yformatted=np.array([y]) #shape (1,) #MYEDIT: separated this to demonstrate shapes
#NB. X_1 shape is (?,3) and Y shape is (?,)
sess.run(optimizer, feed_dict={X_1: Xformatted, Y: yformatted})
# Display logs per epoch step
if (epoch+1) % display_step == 0:
c = sess.run(cost, feed_dict={X_1: Xformatted, Y: yformatted}) #NB. x1 an y are out of scope here - you will only get the last values. Double check if this is what you meant.
print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), \
"Weights=", sess.run(W1),"b=", sess.run(b))
You need to feed a matrix into tf.matmul(W1, X_1). Check the types for your W1 and X_1 for your code.
See the question here for more details

How to change the ticks in a confusion matrix?

I am working with a confusion matrix (Figure A)
How can I make my ticks to start from 1 to 3 instead of 0 to 2?
I tried adding a +1 in tick_marks. But it does not work (Figure B)
Check my code:
import itertools
cm = confusion_matrix(y_test, y_pred)
np.set_printoptions(precision=2)
print('Confusion matrix, without normalization')
print(cm)
plt.figure()
plot_confusion_matrix(cm)
def plot_confusion_matrix(cm, title='Confusion matrix', cmap=plt.cm.Oranges):
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(iris.target_names)) + 1
plt.xticks(tick_marks, rotation=45)
plt.yticks(tick_marks)
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, cm[i, j],
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
Figure A:
Figure B
You should get the axis of the plt and change the xtick_labels (if that's what you intend to do):
import itertools
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
# import some data to play with
iris = datasets.load_iris()
X = iris.data
y = iris.target
class_names = iris.target_names
# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
# Run classifier, using a model that is too regularized (C too low) to see
# the impact on the results
classifier = svm.SVC(kernel='linear', C=0.01)
y_pred = classifier.fit(X_train, y_train).predict(X_test)
def plot_confusion_matrix(cm, title='Confusion matrix', cmap=plt.cm.Oranges):
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(iris.target_names))
plt.xticks(tick_marks, rotation=45)
ax = plt.gca()
ax.set_xticklabels((ax.get_xticks() +1).astype(str))
plt.yticks(tick_marks)
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, cm[i, j],
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
cm = confusion_matrix(y_test, y_pred)
np.set_printoptions(precision=2)
print('Confusion matrix, without normalization')
print(cm)
fig, ax = plt.subplots()
plot_confusion_matrix(cm)
plt.show()
result:
I faced a similar problem: When I wanted to use custom labels for my classes, either the squared boxes went out of bounds or the labels were being offset, as you show here.
If you have multiple labels (>7), then first you need to explicitly set the tick frequency to one using plticker.MultipleLocator. Then you simply set the x and y ticklabels without mentioning the ticks (To not set the xticks and yticks is important. If you do so, the imshow/matshow part gets chopped off at the top.) Add the following lines inside the plot_confusion_matrix function.
import matplotlib.ticker as plticker
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(cm,cmap=cmap)
fig.colorbar(cax)
loc = plticker.MultipleLocator(base=1.0)
ax.xaxis.set_major_locator(loc)
ax.yaxis.set_major_locator(loc)
ax.set_yticklabels(['']+iris.target_names)
ax.set_xticklabels(['']+iris.target_names)

Why doesn't Nearest Neighbour work on my data?

I am trying to learn a little about nearest neighbour matching. Below you see two scatter plots. The first shows the real data. I trying to use scikit-learn's NN-classifier to identify the white observations. The second scatter plot shows my achievement - which is entirely useless, as you can see.
I don't get why that is the case? It seems that the white observations are closely related and different fromt the other observations. What is happening here?
Here is what I do:
# import neccessary packages
import pandas as pd
import numpy as np
import sklearn as skl
from sklearn.cross_validation import train_test_split as tts
import matplotlib.pyplot as plt
from sklearn import neighbors
from matplotlib.colors import ListedColormap
# import data and give a little overview
sample = pd.read_stata('real_data_1.dta')
s = sample
print(s.dtypes)
print(s.shape)
# Nearest Neighboor
print(__doc__)
n_neighbors = 1
X = np.array((s.t_ums_ma, s.t_matauf)).reshape(918, 2)
y = np.array(s.matauf_measure)
plt.scatter(s.t_ums_ma,s.t_matauf, c=s.matauf_measure, label='Nordan Scatter', color='b', s=25, marker="o")
plt.xlabel('crisis')
plt.ylabel('current debt')
plt.title('Interesting Graph\nCheck it out')
plt.legend()
plt.gray()
plt.show()
X_train, X_test, y_train, y_test = tts(X, y, test_size = 1)
h = 0.02
# Create color maps
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
for weights in ['uniform', 'distance']:
# we create an instance of Neighbours Classifier and fit the data.
clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
clf.fit(X, y)
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
x_min, x_max = X_train[:, 0].min() - 0.01, X[:, 0].max() + 0.01
y_min, y_max = X_train[:, 1].min() - 0.01, X[:, 1].max() + 0.01
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title("3-Class classification (k = %i, weights = '%s')"
% (n_neighbors, weights))
plt.show()
Any help is greatly appreciated! Best /R

Resources