How to determine the minimum grid length | Numerical Plasma physics - python-3.x

I am learning particle-in-cell (PIC) python code. PIC currently represents one of the most important plasma simulation tools. It is particularly suited to the study of kinetic or non-Maxwellian effects.
Given the following dispersion relation
I found the range of wave numbers k for which the oscillation frequency is imaginary to be -|\frac{w}{v_0}| < k < |\frac{w}{v_0}|
What I am trying to understand is how to find the minimum grid length L_{min} as a function of \frac{v_0}{w}. L_{min} indicates the needed minimum grid length to support such unstable modes.
I think we should be able to study the plasma behaviour for both L < L_{min} and L > L_{min}. I was told I should adjust the number of simulation particles to grid points to improve the statistics. Besides, the number of particles per cell (i.e. npart/ngrid) should be fixed and should be much greater than 1, in order to reduce numerical noise. The runtime needed (here in units of ω_p^−1) to observe the instability can be estimated from the maximum growth rate.
Here's the full python 3 code I am working with. Please note I have little experience with coding so I might ask lots of follow up questions. Thank you.
#! /usr/bin/python
#
# Python script for computing and plotting single charged particle
# trajectories in prescribed electric and magnetic fields.
# Roughly equivalent to boris.m matlab program
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.widgets import Slider, Button, RadioButtons
from mpl_toolkits.mplot3d import Axes3D
import os
import os.path
import sys
from sys import exit
from time import sleep
# ===================================
#
# Function to integrate particle trajectory
# in given E, B fields
#
# ===================================
def integrate(E0, B0, vz0):
global dt, v0, x0, xp, yp, zp, qom, larmor, nsteps
wc=qom*B0 # cyclotron frequency
larmor=vperp/wc
print ("Cyclotron frequency =",wc)
print ("Perpendicular velocity v_p=",vperp)
print ("Larmor radius=",larmor)
norm = 1. # choose whether to normalise plot axes dimensions to Larmor radius
trun=5*2*np.pi/wc # total runtime
dt=.1/wc # timestep - adjust to current B-field
nsteps=int(trun/dt) # timesteps
E=np.array([0.,E0,0.]) # initial E-field
B=np.array([0.,0.,B0]) # initial B-field
u=np.array([0.,0.,0.]) # intermediate velocity
h=np.array([0.,0.,0.]) # normalized B-field
xp[0]=x0[0]
yp[0]=x0[1]
zp[0]=x0[2]
v0[2]=vz0 # z-component
v=v0+.5*dt*qom*(E+np.cross(v0,B)) # shift initial velocity back 1/2 step
x=x0
for itime in range(1,nsteps):
x=x+dt*v
xp[itime]=x[0] /norm
yp[itime]=x[1] /norm
zp[itime]=x[2] /norm
tp[itime]=itime*dt
#
# Boris mover: solves dv/dt = q/m*(E + vxB) to 2nd order accuracy in dt
#
qomdt2 = dt*qom/2
h = qomdt2*B
s=2*h/(1+np.dot(h,h))
u = v + qomdt2*E
up=u+np.cross(u+np.cross(u,h),s)
v=up+qomdt2*E
# vxp[itime] = v[0]
# ===================================
# Make 2D plots of particle orbit
#
# ===================================
def plot_track2D():
global xp,yp,nsteps,ax1
fig = plt.figure(figsize=(8,8)) # initialize plot
xmin=np.min(xp)
xmax=np.max(xp)
ymin=np.min(yp)
ymax=np.max(yp)
fig.add_subplot(221) # 1st subplot in 2x2 arrangement
plt.cla()
plt.grid(True, which='both')
plt.xlim( (xmin, xmax) )
plt.ylim( (ymin, ymax) )
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.plot(xp[0:nsteps],yp[0:nsteps],c='b')
fig.add_subplot(222) # 2nd subplot
# fig.add_subplot(223) # 2nd subplot
# fig.add_subplot(224) # 2nd subplot
plt.draw()
plt.savefig('./particle_orbit.png') # Save plot to file
# ===================================
#
# Make 3D plot of particle orbit
#
# ===================================
def plot_track3D():
global xp,yp,zp,nsteps,ax1
xmin=np.min(xp)
xmax=np.max(xp)
ymin=np.min(yp)
ymax=np.max(yp)
zmin=np.min(zp)
zmax=np.max(zp)
ax1.cla()
plt.ion()
plt.grid(True, which='both')
ax1.set_xlim( (xmin, xmax) )
ax1.set_ylim( (ymin, ymax) )
ax1.set_zlim( (zmin, zmax) )
ax1.set_xlabel('$x $ [m]')
ax1.set_ylabel('$y $ [m]')
ax1.set_zlabel('$z $ [m]')
#ax1.set_aspect(1.)
ax1.scatter(xp,yp,zp,c=tp,marker='o') # tracks coloured by elapsed time since start
plt.draw()
# =============================================
#
# Main program
#
# =============================================
print ("Charged particle orbit solver")
plotboxsize = 8.
animated = True
x0=np.array([0.,0.,0.]) # initial coords
vz0=0.
v0=np.array([-1e2,0.,vz0]) # initial velocity
vperp = np.sqrt(v0[0]**2+v0[2]**2)
E0=0.
B0=.1
e=1.602176e-19 # electron charge
m=9.109e-31 # electron mass
qom=e/m # charge/mass ratio
wc=qom*B0 # cyclotron frequency
larmor=vperp/wc
print (wc,vperp,larmor)
trun=5*2*np.pi/wc # total runtime
dt=.1/wc # timestep - adjust to current B-field
nsteps=int(trun/dt) # timesteps
B1=np.array([0.,0.,0.1]) # gradient B perturbation
#wc=qom*np.linalg.norm(B) # cyclotron frequency
#nsteps=2
tp = np.zeros(nsteps) # variables to store particle tracks
xp = np.zeros(nsteps)
yp = np.zeros(nsteps)
zp = np.zeros(nsteps)
vxp = np.zeros(nsteps)
vyp = np.zeros(nsteps)
vzp = np.zeros(nsteps)
# Compute orbit
integrate(E0, B0, vz0)
# 2D orbit plotter
plot_track2D()
exit(0) # Quit script before 3D plot - comment out to continue!
# Start 3D interactive mode with sliders for B, E and v0
plt.ion() # Turn on interactive plot display
fig = plt.figure(figsize=(8,8))
# Get instance of Axis3D
ax1 = fig.add_subplot(111, projection='3d')
# Get current rotation angle
print (ax1.azim)
# Set initial view to x-y plane
ax1.view_init(elev=90,azim=0)
ax1.set_xlabel('$x $[microns]')
ax1.set_ylabel('$y $[microns]')
ax1.set_zlabel('$z $[microns]')
plot_track3D()
#filename = 'a0_45/parts_p0000.%0*d'%(6, ts)
#plot_from_file(filename):
axcolor = 'lightgoldenrodyellow'
axe0 = fig.add_axes([0.1, 0.95, 0.3, 0.03])#, facecolor=axcolor) # box position, color & size
axb0 = fig.add_axes([0.5, 0.95, 0.3, 0.03])#, facecolor=axcolor)
axv0 = fig.add_axes([0.1, 0.9, 0.3, 0.03])#, facecolor=axcolor)
sefield = Slider(axe0, 'Ey [V/m]', -5.0,5.0, valinit=E0)
sbfield = Slider(axb0, 'Bz [T]', -1.0, 1.0, valinit=B0)
svz = Slider(axv0, 'vz [m/s]', 0.0, 1.0, valinit=0.)
def update(val):
E0 = sefield.val
B0 = sbfield.val
vz0 = svz.val
integrate(E0,B0,vz0)
plot_track3D()
plt.draw()
sefield.on_changed(update)
sbfield.on_changed(update)
svz.on_changed(update)
resetax = fig.add_axes([0.8, 0.025, 0.1, 0.04])
button = Button(resetax, 'Reset', color=axcolor, hovercolor='0.975')
def reset(event):
global ax1
sefield.reset()
sbfield.reset()
svz.reset()
ax1.cla()
ax1.set_xlabel('$x $[microns]')
ax1.set_ylabel('$y $[microns]')
ax1.set_xlim( (0., 10.) )
# ax1.set_ylim( (-sigma, sigma) )
ax1.grid(True, which='both')
plt.draw()
button.on_clicked(reset)
#plt.show()
plt.show(block=False)
$$x=3$$

From algorithmic stability concerns there are only upper bounds on the two variables grid length and time step, dx and dt taken together. Put very informally, in one time step a particle must not travel a longer distance than from a grid cell to one of its direct neighbours. There is no lower limit on grid spacing until you come into conflict with the resolution of your numerical data type or run out of computing memory, or the time to execute one simulation run takes far too long.

Related

where does pyplot define the number of levels

I'm trying to understand how levels, values, boundaries are created in the contourf-toolchain.
import matplotlib.pyplot as plt
import matplotlib as mpl
t=np.array([[-5,0,5,0,0,0],[0,0,5,0,0,0],[0,0,5,0,0,0],[5,5.2,10,5,5,0],
[0,0,5,0,0,0]], dtype=np.double)
print ("print as is")
print (t)
print ("appearing in a contourf as:")
print (np.flipud((t)))
# assuming indices: array[x,y]
# value 10 supposed to be at 0,0
# contourf's pos. x-axis is pointing up!!
xmin, xmax=-2, 3
ymin, ymax=-3, 1
fig, axs = plt.subplots(nrows=1, ncols=2)
ax0 = axs[0]
ax1 = axs[1]
# plot and add colorbar
set0 = ax0.contourf(t, extent=(xmin,xmax,ymin,ymax), cmap="brg")
#colorbar with coarse steps
cba = plt.colorbar(set0,ax=ax0)
# # This makes the colorbar "appear continuos"
# cba.boundaries=None
# cba.values = None
set0.changed()
# define colorbar and then plot
cNorm = mpl.colors.Normalize(vmin=np.min(t), vmax=np.max(t))
sm = mpl.cm.ScalarMappable(cNorm, cmap="brg")
cbb = plt.colorbar(sm,ax=ax1)
set1 = ax1.contourf(t, extent=(ymin,ymax,xmin,xmax), cmap="brg")
plt.show()
print("left .boundaries: {:s}".format(str(cba.boundaries)))
print("left ._boundaries: {:s}".format(str(cba._boundaries)))
print("right .boundaries: {:s}".format(str(cbb.boundaries)))
print("right len(._boundaries): {:s}".format(str(len(cbb._boundaries))))
For the left plot the colorbar has 9 boundaries and 8 discrete color values for the right one there are 257 boundaries.
I did a couple of vars(...) and dir(...) on the various objects. Where is decision made on the amount of boundaries and values made?
What is the deviation of the right plots colorbar creation from the default behaviour?

plot analog wave from 9 points of freq and power in python

Good day Everyone, I'm new there hope someone will guide me and help me with my query.
is there away to plot the wave of signal using python? i have 9 points of frequency an power and i want it plot it using python v3.6.
i found some recourse like here and here and here and here i have try the code in below , but i want the graph shows as wave not in same that way. any suggest ?
code is :
# importing the required module
import matplotlib.pyplot as plt
# x axis values
x = [54,58,61,62,64,65,66,69,72] # frequency
# corresponding y axis values
y = [2,2.5,4,3,2.5,3.5,4.5,3,2] # Power
# plotting the points
plt.plot(x, y)
# naming the x axis
plt.xlabel('x - axis')
# naming the y axis
plt.ylabel('y - axis')
# giving a title to my graph
plt.title('My first graph!')
# function to show the plot
plt.show()
code of sin-wave, how i modify the code in below to assign the value of frequency and power as : freq = [54,58,61,62,64,65,66,69,72] # frequency and Power = [2,2.5,4,3,2.5,3.5,4.5,3,2] # Power
import numpy as np
import matplotlib
matplotlib.use('TKAgg') #use matplotlib backend TkAgg (optional)
import matplotlib.pyplot as plt
sample_rate = 200 # sampling frequency in Hz (atleast 2 times f)
t = np.linspace(0,5,sample_rate) #time axis
f = 100 #Signal frequency in Hz
sig = np.sin(2*np.pi*f*(t/sample_rate))
plt.plot(t,sig)
plt.xlabel("Time")
plt.ylabel("Amplitude")
plt.tight_layout()
plt.show()

How to make the fluctuation range of three polylines all obvious in same figure by matplotlib? [duplicate]

I'm trying to create a plot using pyplot that has a discontinuous x-axis. The usual way this is drawn is that the axis will have something like this:
(values)----//----(later values)
where the // indicates that you're skipping everything between (values) and (later values).
I haven't been able to find any examples of this, so I'm wondering if it's even possible. I know you can join data over a discontinuity for, eg, financial data, but I'd like to make the jump in the axis more explicit. At the moment I'm just using subplots but I'd really like to have everything end up on the same graph in the end.
Paul's answer is a perfectly fine method of doing this.
However, if you don't want to make a custom transform, you can just use two subplots to create the same effect.
Rather than put together an example from scratch, there's an excellent example of this written by Paul Ivanov in the matplotlib examples (It's only in the current git tip, as it was only committed a few months ago. It's not on the webpage yet.).
This is just a simple modification of this example to have a discontinuous x-axis instead of the y-axis. (Which is why I'm making this post a CW)
Basically, you just do something like this:
import matplotlib.pylab as plt
import numpy as np
# If you're not familiar with np.r_, don't worry too much about this. It's just
# a series with points from 0 to 1 spaced at 0.1, and 9 to 10 with the same spacing.
x = np.r_[0:1:0.1, 9:10:0.1]
y = np.sin(x)
fig,(ax,ax2) = plt.subplots(1, 2, sharey=True)
# plot the same data on both axes
ax.plot(x, y, 'bo')
ax2.plot(x, y, 'bo')
# zoom-in / limit the view to different portions of the data
ax.set_xlim(0,1) # most of the data
ax2.set_xlim(9,10) # outliers only
# hide the spines between ax and ax2
ax.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)
ax.yaxis.tick_left()
ax.tick_params(labeltop='off') # don't put tick labels at the top
ax2.yaxis.tick_right()
# Make the spacing between the two axes a bit smaller
plt.subplots_adjust(wspace=0.15)
plt.show()
To add the broken axis lines // effect, we can do this (again, modified from Paul Ivanov's example):
import matplotlib.pylab as plt
import numpy as np
# If you're not familiar with np.r_, don't worry too much about this. It's just
# a series with points from 0 to 1 spaced at 0.1, and 9 to 10 with the same spacing.
x = np.r_[0:1:0.1, 9:10:0.1]
y = np.sin(x)
fig,(ax,ax2) = plt.subplots(1, 2, sharey=True)
# plot the same data on both axes
ax.plot(x, y, 'bo')
ax2.plot(x, y, 'bo')
# zoom-in / limit the view to different portions of the data
ax.set_xlim(0,1) # most of the data
ax2.set_xlim(9,10) # outliers only
# hide the spines between ax and ax2
ax.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)
ax.yaxis.tick_left()
ax.tick_params(labeltop='off') # don't put tick labels at the top
ax2.yaxis.tick_right()
# Make the spacing between the two axes a bit smaller
plt.subplots_adjust(wspace=0.15)
# This looks pretty good, and was fairly painless, but you can get that
# cut-out diagonal lines look with just a bit more work. The important
# thing to know here is that in axes coordinates, which are always
# between 0-1, spine endpoints are at these locations (0,0), (0,1),
# (1,0), and (1,1). Thus, we just need to put the diagonals in the
# appropriate corners of each of our axes, and so long as we use the
# right transform and disable clipping.
d = .015 # how big to make the diagonal lines in axes coordinates
# arguments to pass plot, just so we don't keep repeating them
kwargs = dict(transform=ax.transAxes, color='k', clip_on=False)
ax.plot((1-d,1+d),(-d,+d), **kwargs) # top-left diagonal
ax.plot((1-d,1+d),(1-d,1+d), **kwargs) # bottom-left diagonal
kwargs.update(transform=ax2.transAxes) # switch to the bottom axes
ax2.plot((-d,d),(-d,+d), **kwargs) # top-right diagonal
ax2.plot((-d,d),(1-d,1+d), **kwargs) # bottom-right diagonal
# What's cool about this is that now if we vary the distance between
# ax and ax2 via f.subplots_adjust(hspace=...) or plt.subplot_tool(),
# the diagonal lines will move accordingly, and stay right at the tips
# of the spines they are 'breaking'
plt.show()
I see many suggestions for this feature but no indication that it's been implemented. Here is a workable solution for the time-being. It applies a step-function transform to the x-axis. It's a lot of code, but it's fairly simple since most of it is boilerplate custom scale stuff. I have not added any graphics to indicate the location of the break, since that is a matter of style. Good luck finishing the job.
from matplotlib import pyplot as plt
from matplotlib import scale as mscale
from matplotlib import transforms as mtransforms
import numpy as np
def CustomScaleFactory(l, u):
class CustomScale(mscale.ScaleBase):
name = 'custom'
def __init__(self, axis, **kwargs):
mscale.ScaleBase.__init__(self)
self.thresh = None #thresh
def get_transform(self):
return self.CustomTransform(self.thresh)
def set_default_locators_and_formatters(self, axis):
pass
class CustomTransform(mtransforms.Transform):
input_dims = 1
output_dims = 1
is_separable = True
lower = l
upper = u
def __init__(self, thresh):
mtransforms.Transform.__init__(self)
self.thresh = thresh
def transform(self, a):
aa = a.copy()
aa[a>self.lower] = a[a>self.lower]-(self.upper-self.lower)
aa[(a>self.lower)&(a<self.upper)] = self.lower
return aa
def inverted(self):
return CustomScale.InvertedCustomTransform(self.thresh)
class InvertedCustomTransform(mtransforms.Transform):
input_dims = 1
output_dims = 1
is_separable = True
lower = l
upper = u
def __init__(self, thresh):
mtransforms.Transform.__init__(self)
self.thresh = thresh
def transform(self, a):
aa = a.copy()
aa[a>self.lower] = a[a>self.lower]+(self.upper-self.lower)
return aa
def inverted(self):
return CustomScale.CustomTransform(self.thresh)
return CustomScale
mscale.register_scale(CustomScaleFactory(1.12, 8.88))
x = np.concatenate((np.linspace(0,1,10), np.linspace(9,10,10)))
xticks = np.concatenate((np.linspace(0,1,6), np.linspace(9,10,6)))
y = np.sin(x)
plt.plot(x, y, '.')
ax = plt.gca()
ax.set_xscale('custom')
ax.set_xticks(xticks)
plt.show()
Check the brokenaxes package:
import matplotlib.pyplot as plt
from brokenaxes import brokenaxes
import numpy as np
fig = plt.figure(figsize=(5,2))
bax = brokenaxes(
xlims=((0, .1), (.4, .7)),
ylims=((-1, .7), (.79, 1)),
hspace=.05
)
x = np.linspace(0, 1, 100)
bax.plot(x, np.sin(10 * x), label='sin')
bax.plot(x, np.cos(10 * x), label='cos')
bax.legend(loc=3)
bax.set_xlabel('time')
bax.set_ylabel('value')
A very simple hack is to
scatter plot rectangles over the axes' spines and
draw the "//" as text at that position.
Worked like a charm for me:
# FAKE BROKEN AXES
# plot a white rectangle on the x-axis-spine to "break" it
xpos = 10 # x position of the "break"
ypos = plt.gca().get_ylim()[0] # y position of the "break"
plt.scatter(xpos, ypos, color='white', marker='s', s=80, clip_on=False, zorder=100)
# draw "//" on the same place as text
plt.text(xpos, ymin-0.125, r'//', fontsize=label_size, zorder=101, horizontalalignment='center', verticalalignment='center')
Example Plot:
For those interested, I've expanded upon #Paul's answer and added it to the matplotlib wrapper proplot. It can do axis "jumps", "speedups", and "slowdowns".
There is no way currently to add "crosses" that indicate the discrete jump like in Joe's answer, but I plan to add this in the future. I also plan to add a default "tick locator" that sets sensible default tick locations depending on the CutoffScale arguments.
Adressing Frederick Nord's question how to enable parallel orientation of the diagonal "breaking" lines when using a gridspec with ratios unequal 1:1, the following changes based on the proposals of Paul Ivanov and Joe Kingtons may be helpful. Width ratio can be varied using variables n and m.
import matplotlib.pylab as plt
import numpy as np
import matplotlib.gridspec as gridspec
x = np.r_[0:1:0.1, 9:10:0.1]
y = np.sin(x)
n = 5; m = 1;
gs = gridspec.GridSpec(1,2, width_ratios = [n,m])
plt.figure(figsize=(10,8))
ax = plt.subplot(gs[0,0])
ax2 = plt.subplot(gs[0,1], sharey = ax)
plt.setp(ax2.get_yticklabels(), visible=False)
plt.subplots_adjust(wspace = 0.1)
ax.plot(x, y, 'bo')
ax2.plot(x, y, 'bo')
ax.set_xlim(0,1)
ax2.set_xlim(10,8)
# hide the spines between ax and ax2
ax.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)
ax.yaxis.tick_left()
ax.tick_params(labeltop='off') # don't put tick labels at the top
ax2.yaxis.tick_right()
d = .015 # how big to make the diagonal lines in axes coordinates
# arguments to pass plot, just so we don't keep repeating them
kwargs = dict(transform=ax.transAxes, color='k', clip_on=False)
on = (n+m)/n; om = (n+m)/m;
ax.plot((1-d*on,1+d*on),(-d,d), **kwargs) # bottom-left diagonal
ax.plot((1-d*on,1+d*on),(1-d,1+d), **kwargs) # top-left diagonal
kwargs.update(transform=ax2.transAxes) # switch to the bottom axes
ax2.plot((-d*om,d*om),(-d,d), **kwargs) # bottom-right diagonal
ax2.plot((-d*om,d*om),(1-d,1+d), **kwargs) # top-right diagonal
plt.show()
This is a hacky but pretty solution for x-axis breaks.
The solution is based on https://matplotlib.org/stable/gallery/subplots_axes_and_figures/broken_axis.html, which gets rid of the problem with positioning the break above the spine, solved by How can I plot points so they appear over top of the spines with matplotlib?
from matplotlib.patches import Rectangle
import matplotlib.pyplot as plt
def axis_break(axis, xpos=[0.1, 0.125], slant=1.5):
d = slant # proportion of vertical to horizontal extent of the slanted line
anchor = (xpos[0], -1)
w = xpos[1] - xpos[0]
h = 1
kwargs = dict(marker=[(-1, -d), (1, d)], markersize=12, zorder=3,
linestyle="none", color='k', mec='k', mew=1, clip_on=False)
axis.add_patch(Rectangle(
anchor, w, h, fill=True, color="white",
transform=axis.transAxes, clip_on=False, zorder=3)
)
axis.plot(xpos, [0, 0], transform=axis.transAxes, **kwargs)
fig, ax = plt.subplots(1,1)
plt.plot(np.arange(10))
axis_break(ax, xpos=[0.1, 0.12], slant=1.5)
axis_break(ax, xpos=[0.3, 0.31], slant=-10)
if you want to replace an axis label, this would do the trick:
from matplotlib import ticker
def replace_pos_with_label(fig, pos, label, axis):
fig.canvas.draw() # this is needed to set up the x-ticks
labs = axis.get_xticklabels()
labels = []
locs = []
for text in labs:
x = text._x
lab = text._text
if x == pos:
lab = label
labels.append(lab)
locs.append(x)
axis.xaxis.set_major_locator(ticker.FixedLocator(locs))
axis.set_xticklabels(labels)
fig, ax = plt.subplots(1,1)
plt.plot(np.arange(10))
replace_pos_with_label(fig, 0, "-10", axis=ax)
replace_pos_with_label(fig, 6, "$10^{4}$", axis=ax)
axis_break(ax, xpos=[0.1, 0.12], slant=2)

How to speed up the following code using numba?

I am doing a molecular dynamics simulation. It consists of numerical integration, many for loops, manipulating large NumPy arrays. I have tried to use NumPy function and arrays wherever possible. But the code is still too slow. I thought of using numba jit as a speedup. But it always throws an error message.
Here is the code.
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 28 12:10:42 2020
#author: Sandipan
"""
import numpy as np
import matplotlib.pyplot as plt
from numba import jit
import os
import sys
# Setting up the simulation
NSteps =100 # Number of steps
deltat = 0.005 # Time step in reduced time units
temp = 0.851# #Reduced temperature
DumpFreq = 100 # Save the position to file every DumpFreq steps
epsilon = 1.0 # LJ parameter for the energy between particles
DIM =3
N =500
density =0.776
Rcutoff =3
#----------------------Function Definitions---------------------
#------------------Initialise Configuration--------
#jit(nopython=True)
def initialise_config(N,DIM,density):
velocity = (np.random.randn(N,DIM)-0.5)
# Set initial momentum to zero
COM_V = np.sum(velocity)/N #Center of mass velocity
velocity = velocity - COM_V # Fix any center-of-mass drift
# Calculate initial kinetic energy
k_energy=0
for i in range (N):
k_energy+=np.dot(velocity[i],velocity[i])
vscale=np.sqrt(DIM*temp/k_energy)
velocity*=vscale
#Initialize with zeroes
coords = np.zeros([N,DIM]);
# Get the cooresponding box size
L = (N/density)**(1.0/DIM)
""" Find the lowest perfect cube greater than or equal to the number of
particles"""
nCube = 2
while (nCube**3 < N):
nCube = nCube + 1
# Assign particle positions
ip=-1
x=0
y=0
z=0
for i in range(0,nCube):
for j in range(0,nCube):
for k in range(0,nCube):
if(ip<N):
x=(i+0.5)*(L/nCube)
y=(j+0.5)*(L/nCube)
z=(k+0.5)*(L/nCube)
coords[ip]=np.array([x,y,z])
ip=ip+1
else:
break
return coords,velocity,L
#jit(nopython=True)
def wrap(pos,L):
'''Apply perodic boundary conditions.'''
for i in range (len(pos)):
for k in range(DIM):
if (pos[i][k]>0.5):
pos[i][k]=pos[i][k]-1
if (pos[i][k]<-0.5):
pos[i][k]=pos[i][k]+1
return (pos)
#jit(nopython=True)
def LJ_Forces(pos,acc,epsilon,L,DIM,N):
# Compute forces on positions using the Lennard-Jones potential
# Uses double nested loop which is slow O(N^2) time unsuitable for large systems
Sij = np.zeros(DIM) # Box scaled units
Rij = np.zeros(DIM) # Real space units
#Set all variables to zero
ene_pot = np.zeros(N)
acc = acc*0
virial=0.0
# Loop over all pairs of particles
for i in range(N-1):
for j in range(i+1,N): #i+1 to N ensures we do not double count
Sij = pos[i]-pos[j] # Distance in box scaled units
for l in range(DIM): # Periodic interactions
if (np.abs(Sij[l])>0.5):
Sij[l] = Sij[l] - np.copysign(1.0,Sij[l]) # If distance is greater than 0.5 (scaled units) then subtract 0.5 to find periodic interaction distance.
Rij = L*Sij # Scale the box to the real units in this case reduced LJ units
Rsqij = np.dot(Rij,Rij) # Calculate the square of the distance
if(Rsqij < Rcutoff**2):
# Calculate LJ potential inside cutoff
# We calculate parts of the LJ potential at a time to improve the efficieny of the computation (most important for compiled code)
rm2 = 1.0/Rsqij # 1/r^2
rm6 = rm2**3
forcefact=(rm2**4)*(rm6-0.5) # 1/r^6
phi =4*(rm6**2-rm6)
ene_pot[i] = ene_pot[i]+0.5*phi # Accumulate energy
ene_pot[j] = ene_pot[j]+0.5*phi # Accumulate energy
virial = virial-forcefact*Rsqij # Virial is needed to calculate the pressure
acc[i] = acc[i]+forcefact*Sij # Accumulate forces
acc[j] = acc[j]-forcefact*Sij # (Fji=-Fij)
return 48*acc, np.sum(ene_pot)/N, -virial/DIM # return the acceleration vector, potential energy and virial coefficient
#jit(nopython=True)
def Calculate_Temperature(vel,L,DIM,N):
ene_kin = 0.0
for i in range(N):
real_vel = L*vel[i]
ene_kin = ene_kin + 0.5*np.dot(real_vel,real_vel)
ene_kin_aver = 1.0*ene_kin/N
temperature = 2.0*ene_kin_aver/DIM
return ene_kin_aver,temperature
# Main MD loop
#jit(nopython=True)
def main():
# Vectors to store parameter values at each step
ene_kin_aver = np.ones(NSteps)
ene_pot_aver = np.ones(NSteps)
temperature = np.ones(NSteps)
virial = np.ones(NSteps)
pressure = np.ones(NSteps)
pos,vel,L = initialise_config(N,DIM,density)
acc = (np.random.randn(N,DIM)-0.5)
volume=L**3
# Open file which we will save the outputs to
if os.path.exists('energy2'):
os.remove('energy2')
f = open('traj.xyz', 'w')
for k in range(NSteps):
# Refold positions according to periodic boundary conditions
pos=wrap(pos,L)
# r(t+dt) modify positions according to velocity and acceleration
pos = pos + deltat*vel + 0.5*(deltat**2.0)*acc # Step 1
# Calculate temperature
ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)
# Rescale velocities and take half step
chi = np.sqrt(temp/temperature[k])
vel = chi*vel + 0.5*deltat*acc # v(t+dt/2) Step 2
# Compute forces a(t+dt),ene_pot,virial
acc, ene_pot_aver[k], virial[k] = LJ_Forces(pos,acc,epsilon,L,DIM,N) # Step 3
# Complete the velocity step
vel = vel + 0.5*deltat*acc # v(t+dt/2) Step 4
# Calculate temperature
ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)
# Calculate pressure
pressure[k]= density*temperature[k] + virial[k]/volume
# Print output to file every DumpFreq number of steps
if(k%DumpFreq==0): # The % symbol is the modulus so if the Step is a whole multiple of DumpFreq then print the values
f.write("%s\n" %(N)) # Write the number of particles to file
# Write all of the quantities at this step to the file
f.write("Energy %s, Temperature %.5f\n" %(ene_kin_aver[k]+ene_pot_aver[k],temperature[k]))
for n in range(N): # Write the positions to file
f.write("X"+" ")
for l in range(DIM):
f.write(str(pos[n][l]*L)+" ")
f.write("\n")
if (k%5==0):
# print("\rStep: {0} KE: {1} PE: {2} Energy: {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
sys.stdout.write("\rStep: {0} KE: {1} PE: {2} Energy: {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
sys.stdout.flush()
return ene_kin_aver, ene_pot_aver, temperature, pressure, pos
#------------------------------------------------------
ene_kin_aver, ene_pot_aver, temperature, pressure, pos = main()
# Plot all of the quantities
def plot():
plt.figure(figsize=[7,12])
plt.rc('xtick', labelsize=15)
plt.rc('ytick', labelsize=15)
plt.subplot(4, 1, 1)
plt.plot(ene_kin_aver,'k-')
plt.ylabel(r"$E_{K}", fontsize=20)
plt.subplot(4, 1, 2)
plt.plot(ene_pot_aver,'k-')
plt.ylabel(r"$E_{P}$", fontsize=20)
plt.subplot(4, 1, 3)
plt.plot(temperature,'k-')
plt.ylabel(r"$T$", fontsize=20)
plt.subplot(4, 1, 4)
plt.plot(pressure,'k-')
plt.ylabel(r"$P$", fontsize=20)
plt.show()
plot()
The error I am getting is:
runfile('E:/Project/LJMD4.py', wdir='E:/Project')
Traceback (most recent call last):
File "<ipython-input-8-aeebce887079>", line 1, in <module>
runfile('E:/Project/LJMD4.py', wdir='E:/Project')
File "C:\Users\Sandipan\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)
File "C:\Users\Sandipan\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "E:/Project/LJMD4.py", line 226, in <module>
ene_kin_aver, ene_pot_aver, temperature, pressure, pos = main()
File "C:\Users\Sandipan\Anaconda3\lib\site-packages\numba\dispatcher.py", line 351, in _compile_for_args
error_rewrite(e, 'typing')
File "C:\Users\Sandipan\Anaconda3\lib\site-packages\numba\dispatcher.py", line 318, in error_rewrite
reraise(type(e), e, None)
File "C:\Users\Sandipan\Anaconda3\lib\site-packages\numba\six.py", line 658, in reraise
raise value.with_traceback(tb)
TypingError: cannot determine Numba type of <class 'builtin_function_or_method'>
When I searched on the internet, I found numba may not support some function I am using. But I am not using any Pandas or other data frame. I am just using pure python loop or NumPy which as far numba documentation suggests are well supported. I have tried removing numba from some functions and making nopython=0 but still, it sends different error messages. I can't figure out what is wrong with it. Without numba the code will not be feasible for actual use. Any further tips on speedup will be of great help.
Thank you in advance.
A few common errors
Use of unsupported functions
file operations, many string operation. These can be in a objmode block.
In this example I commented these things out.
Wrong way of initializing arrays
Only tuples are supported, not lists (Numpy accepts both but the documentation there are only tuples mentioned)
Checking for division by zero and throwing an exception
This is the standard behavior of Python, but not Numpy. If you want to avoid this overhead (if/else on every division) turn on the Numpy default behaviour (error_model="numpy").
Use of globals
Globals are hard coded into the compiled code (as you would directly write them into the code). They cannot be changed without recompilation.
Wrong indexing of Numpy arrays
pos[i][k] instead of pos[i,k]. Numba may optimize this away, but this has a quite noticeable negative impact in Pure Python code.
Working version
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 28 12:10:42 2020
#author: Sandipan
"""
import numpy as np
import matplotlib.pyplot as plt
from numba import jit
import os
import sys
# All globals are compile time constants
# recompilation needed if you change this values
# Better way: hand a tuple of all needed vars to the functions
# params=(NSteps,deltat,temp,DumpFreq,epsilon,DIM,N,density,Rcutoff)
# Setting up the simulation
NSteps =100 # Number of steps
deltat = 0.005 # Time step in reduced time units
temp = 0.851# #Reduced temperature
DumpFreq = 100 # Save the position to file every DumpFreq steps
epsilon = 1.0 # LJ parameter for the energy between particles
DIM =3
N =500
density =0.776
Rcutoff =3
params=(NSteps,deltat,temp,DumpFreq,epsilon,DIM,N,density,Rcutoff)
#----------------------Function Definitions---------------------
#------------------Initialise Configuration--------
#error_model=True
#Do you really want to search for division by zeros (additional cost)?
#jit(nopython=True,error_model="numpy")
def initialise_config(N,DIM,density):
velocity = (np.random.randn(N,DIM)-0.5)
# Set initial momentum to zero
COM_V = np.sum(velocity)/N #Center of mass velocity
velocity = velocity - COM_V # Fix any center-of-mass drift
# Calculate initial kinetic energy
k_energy=0
for i in range (N):
k_energy+=np.dot(velocity[i],velocity[i])
vscale=np.sqrt(DIM*temp/k_energy)
velocity*=vscale
#wrong array initialization (use tuple)
#Initialize with zeroes
coords = np.zeros((N,DIM))
# Get the cooresponding box size
L = (N/density)**(1.0/DIM)
""" Find the lowest perfect cube greater than or equal to the number of
particles"""
nCube = 2
while (nCube**3 < N):
nCube = nCube + 1
# Assign particle positions
ip=-1
x=0
y=0
z=0
for i in range(0,nCube):
for j in range(0,nCube):
for k in range(0,nCube):
if(ip<N):
x=(i+0.5)*(L/nCube)
y=(j+0.5)*(L/nCube)
z=(k+0.5)*(L/nCube)
coords[ip]=np.array([x,y,z])
ip=ip+1
else:
break
return coords,velocity,L
#jit(nopython=True)
def wrap(pos,L):
'''Apply perodic boundary conditions.'''
#correct array indexing
for i in range (len(pos)):
for k in range(DIM):
if (pos[i,k]>0.5):
pos[i,k]=pos[i,k]-1
if (pos[i,k]<-0.5):
pos[i,k]=pos[i,k]+1
return (pos)
#jit(nopython=True,error_model="numpy")
def LJ_Forces(pos,acc,epsilon,L,DIM,N):
# Compute forces on positions using the Lennard-Jones potential
# Uses double nested loop which is slow O(N^2) time unsuitable for large systems
Sij = np.zeros(DIM) # Box scaled units
Rij = np.zeros(DIM) # Real space units
#Set all variables to zero
ene_pot = np.zeros(N)
acc = acc*0
virial=0.0
# Loop over all pairs of particles
for i in range(N-1):
for j in range(i+1,N): #i+1 to N ensures we do not double count
Sij = pos[i]-pos[j] # Distance in box scaled units
for l in range(DIM): # Periodic interactions
if (np.abs(Sij[l])>0.5):
Sij[l] = Sij[l] - np.copysign(1.0,Sij[l]) # If distance is greater than 0.5 (scaled units) then subtract 0.5 to find periodic interaction distance.
Rij = L*Sij # Scale the box to the real units in this case reduced LJ units
Rsqij = np.dot(Rij,Rij) # Calculate the square of the distance
if(Rsqij < Rcutoff**2):
# Calculate LJ potential inside cutoff
# We calculate parts of the LJ potential at a time to improve the efficieny of the computation (most important for compiled code)
rm2 = 1.0/Rsqij # 1/r^2
rm6 = rm2**3
forcefact=(rm2**4)*(rm6-0.5) # 1/r^6
phi =4*(rm6**2-rm6)
ene_pot[i] = ene_pot[i]+0.5*phi # Accumulate energy
ene_pot[j] = ene_pot[j]+0.5*phi # Accumulate energy
virial = virial-forcefact*Rsqij # Virial is needed to calculate the pressure
acc[i] = acc[i]+forcefact*Sij # Accumulate forces
acc[j] = acc[j]-forcefact*Sij # (Fji=-Fij)
#If you want to get get the best performance, sum directly in the loop intead of
#summing at the end np.sum(ene_pot)
return 48*acc, np.sum(ene_pot)/N, -virial/DIM # return the acceleration vector, potential energy and virial coefficient
#jit(nopython=True,error_model="numpy")
def Calculate_Temperature(vel,L,DIM,N):
ene_kin = 0.0
for i in range(N):
real_vel = L*vel[i]
ene_kin = ene_kin + 0.5*np.dot(real_vel,real_vel)
ene_kin_aver = 1.0*ene_kin/N
temperature = 2.0*ene_kin_aver/DIM
return ene_kin_aver,temperature
# Main MD loop
#jit(nopython=True,error_model="numpy")
def main(params):
NSteps,deltat,temp,DumpFreq,epsilon,DIM,N,density,Rcutoff=params
# Vectors to store parameter values at each step
ene_kin_aver = np.ones(NSteps)
ene_pot_aver = np.ones(NSteps)
temperature = np.ones(NSteps)
virial = np.ones(NSteps)
pressure = np.ones(NSteps)
pos,vel,L = initialise_config(N,DIM,density)
acc = (np.random.randn(N,DIM)-0.5)
volume=L**3
# Open file which we will save the outputs to
# Unsupported operations have to be in an objectmode block
# or simply write the outputs at the end in a pure Python function
"""
if os.path.exists('energy2'):
os.remove('energy2')
f = open('traj.xyz', 'w')
"""
for k in range(NSteps):
# Refold positions according to periodic boundary conditions
pos=wrap(pos,L)
# r(t+dt) modify positions according to velocity and acceleration
pos = pos + deltat*vel + 0.5*(deltat**2.0)*acc # Step 1
# Calculate temperature
ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)
# Rescale velocities and take half step
chi = np.sqrt(temp/temperature[k])
vel = chi*vel + 0.5*deltat*acc # v(t+dt/2) Step 2
# Compute forces a(t+dt),ene_pot,virial
acc, ene_pot_aver[k], virial[k] = LJ_Forces(pos,acc,epsilon,L,DIM,N) # Step 3
# Complete the velocity step
vel = vel + 0.5*deltat*acc # v(t+dt/2) Step 4
# Calculate temperature
ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)
# Calculate pressure
pressure[k]= density*temperature[k] + virial[k]/volume
# Print output to file every DumpFreq number of steps
"""
if(k%DumpFreq==0): # The % symbol is the modulus so if the Step is a whole multiple of DumpFreq then print the values
f.write("%s\n" %(N)) # Write the number of particles to file
# Write all of the quantities at this step to the file
f.write("Energy %s, Temperature %.5f\n" %(ene_kin_aver[k]+ene_pot_aver[k],temperature[k]))
for n in range(N): # Write the positions to file
f.write("X"+" ")
for l in range(DIM):
f.write(str(pos[n][l]*L)+" ")
f.write("\n")
#Simple prints without formating are supported
if (k%5==0):
#print("\rStep: {0} KE: {1} PE: {2} Energy: {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
#sys.stdout.write("\rStep: {0} KE: {1} PE: {2} Energy: {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
#sys.stdout.flush()
"""
return ene_kin_aver, ene_pot_aver, temperature, pressure, pos
#------------------------------------------------------
ene_kin_aver, ene_pot_aver, temperature, pressure, pos = main(params)
# Plot all of the quantities
def plot():
plt.figure(figsize=[7,12])
plt.rc('xtick', labelsize=15)
plt.rc('ytick', labelsize=15)
plt.subplot(4, 1, 1)
plt.plot(ene_kin_aver,'k-')
plt.ylabel(r"$E_{K}", fontsize=20)
plt.subplot(4, 1, 2)
plt.plot(ene_pot_aver,'k-')
plt.ylabel(r"$E_{P}$", fontsize=20)
plt.subplot(4, 1, 3)
plt.plot(temperature,'k-')
plt.ylabel(r"$T$", fontsize=20)
plt.subplot(4, 1, 4)
plt.plot(pressure,'k-')
plt.ylabel(r"$P$", fontsize=20)
plt.show()
plot()

Recreating decision-boundary plot in python with scikit-learn and matplotlib

I found this wonderful graph in post here Variation on "How to plot decision boundary of a k-nearest neighbor classifier from Elements of Statistical Learning?". In this example K-NN is used to clasify data into three classes. I especially enjoy that it features the probability of class membership as a indication of the "confidence".
r and ggplot seem to do a great job.I wonder, whether this can be re-created in python? My initial thought tends to scikit-learn and matplotlib. Here is the iris example from scikit:
print(__doc__)
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets
n_neighbors = 15
# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2] # we only take the first two features. We could
# avoid this ugly slicing by using a two-dim dataset
y = iris.target
h = .02 # step size in the mesh
# Create color maps
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
for weights in ['uniform', 'distance']:
# we create an instance of Neighbours Classifier and fit the data.
clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
clf.fit(X, y)
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, x_max]x[y_min, y_max].
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title("3-Class classification (k = %i, weights = '%s')"
% (n_neighbors, weights))
plt.show()
This produces a graph in a sense very similar:
I have three questions:
How can I introduce the confidence to the plot?
How can I plot the decision-boundaries with a connected line?
Let's say I have a new observation, how can I introduce it to the plot and plot if it is classified correctly?
I stumbled upon your question about a year ago, and loved the plot -- I just never got around to answering it, until now. Hopefully the code comments below are self-explanitory enough (I also blogged about, if you want more details). Maybe four years too late, haha.
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from matplotlib.lines import Line2D
from matplotlib.ticker import MaxNLocator
from sklearn import neighbors
iris = datasets.load_iris()
x = iris.data[:,0:2]
y = iris.target
# create the x0, x1 feature
x0 = x[:,0]
x1 = x[:,1]
# set main parameters for KNN plot
N_NEIGHBORS = 15 # KNN number of neighbors
H = 0.1 # mesh stepsize
PROB_DOT_SCALE = 40 # modifier to scale the probability dots
PROB_DOT_SCALE_POWER = 3 # exponential used to increase/decrease size of prob dots
TRUE_DOT_SIZE = 50 # size of the true labels
PAD = 1.0 # how much to "pad" around the true labels
clf = neighbors.KNeighborsClassifier(N_NEIGHBORS, weights='uniform')
clf.fit(x, y)
# find the min/max points for both x0 and x1 features
# these min/max values will be used to set the bounds
# for the plot
x0_min, x0_max = np.round(x0.min())-PAD, np.round(x0.max()+PAD)
x1_min, x1_max = np.round(x1.min())-PAD, np.round(x1.max()+PAD)
# create 1D arrays representing the range of probability data points
# on both the x0 and x1 axes.
x0_axis_range = np.arange(x0_min,x0_max, H)
x1_axis_range = np.arange(x1_min,x1_max, H)
# create meshgrid between the two axis ranges
xx0, xx1 = np.meshgrid(x0_axis_range, x1_axis_range)
# put the xx in the same dimensional format as the original x
# because it's easier to work with that way (at least for me)
# * shape will be: [no_dots, no_dimensions]
# where no_dimensions = 2 (x0 and x1 axis)
xx = np.reshape(np.stack((xx0.ravel(),xx1.ravel()),axis=1),(-1,2))
yy_hat = clf.predict(xx) # prediction of all the little dots
yy_prob = clf.predict_proba(xx) # probability of each dot being
# the predicted color
yy_size = np.max(yy_prob, axis=1)
# make figure
plt.style.use('seaborn-whitegrid') # set style because it looks nice
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8,6), dpi=150)
# establish colors and colormap
# * color blind colors, from
# https://towardsdatascience.com/two-simple-steps-to-create-colorblind-friendly-data-visualizations-2ed781a167ec
redish = '#d73027'
orangeish = '#fc8d59'
yellowish = '#fee090'
blueish = '#4575b4'
colormap = np.array([redish,blueish,orangeish])
# plot all the little dots, position defined by the xx values, color
# defined by the knn predictions (yy_hat), and size defined by the
# probability of that color (yy_prob)
# * because the yy_hat values are either 0, 1, 2, we can use
# these as values to index into the colormap array
# * size of dots (the probability) increases exponentially (^3), so that there is
# a nice difference between different probabilities. I'm sure there is a more
# elegant way to do this though...
# * linewidths=0 so that there are no "edges" around the dots
ax.scatter(xx[:,0], xx[:,1], c=colormap[yy_hat], alpha=0.4,
s=PROB_DOT_SCALE*yy_size**PROB_DOT_SCALE_POWER, linewidths=0,)
# plot the contours
# * we have to reshape the yy_hat to get it into a
# 2D dimensional format, representing both the x0
# and x1 axis
# * the number of levels and color scheme was manually tuned
# to make sense for this data. Would probably change, for
# instance, if there were 4, or 5 (etc.) classes
ax.contour(x0_axis_range, x1_axis_range,
np.reshape(yy_hat,(xx0.shape[0],-1)),
levels=3, linewidths=1,
colors=[redish,blueish, blueish,orangeish,])
# plot the original x values.
# * zorder is 3 so that the dots appear above all the other dots
ax.scatter(x[:,0], x[:,1], c=colormap[y], s=TRUE_DOT_SIZE, zorder=3,
linewidths=0.7, edgecolor='k')
# create legends
x_min, x_max = ax.get_xlim()
y_min, y_max = ax.get_ylim()
# set x-y labels
ax.set_ylabel(r"$x_1$")
ax.set_xlabel(r"$x_0$")
# create class legend
# Line2D properties: https://matplotlib.org/stable/api/_as_gen/matplotlib.lines.Line2D.html
# about size of scatter plot points: https://stackoverflow.com/a/47403507/9214620
legend_class = []
for flower_class, color in zip(['c', 's', 'v'], [blueish, redish, orangeish]):
legend_class.append(Line2D([0], [0], marker='o', label=flower_class,ls='None',
markerfacecolor=color, markersize=np.sqrt(TRUE_DOT_SIZE),
markeredgecolor='k', markeredgewidth=0.7))
# iterate over each of the probabilities to create prob legend
prob_values = [0.4, 0.6, 0.8, 1.0]
legend_prob = []
for prob in prob_values:
legend_prob.append(Line2D([0], [0], marker='o', label=prob, ls='None', alpha=0.8,
markerfacecolor='grey',
markersize=np.sqrt(PROB_DOT_SCALE*prob**PROB_DOT_SCALE_POWER),
markeredgecolor='k', markeredgewidth=0))
legend1 = ax.legend(handles=legend_class, loc='center',
bbox_to_anchor=(1.05, 0.35),
frameon=False, title='class')
legend2 = ax.legend(handles=legend_prob, loc='center',
bbox_to_anchor=(1.05, 0.65),
frameon=False, title='prob', )
ax.add_artist(legend1) # add legend back after it disappears
ax.set_yticks(np.arange(x1_min,x1_max, 1)) # I don't like the decimals
ax.grid(False) # remove gridlines (inherited from 'seaborn-whitegrid' style)
# only use integers for axis tick labels
# from: https://stackoverflow.com/a/34880501/9214620
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
ax.yaxis.set_major_locator(MaxNLocator(integer=True))
# set the aspect ratio to 1, for looks
ax.set_aspect(1)
# remove first ticks from axis labels, for looks
# from: https://stackoverflow.com/a/19503828/9214620
ax.set_xticks(ax.get_xticks()[1:-1])
ax.set_yticks(np.arange(x1_min,x1_max, 1)[1:])
plt.show()

Resources