How to insert Gaussian/Normal distribution in multiple subplots? - python-3.x

So I have multiple plots, using subplot and I would like to add the Gaussian distribution on it. I have done it, in a for loop for each plot separately, but I am not sure how to do it using subplots. At the moment it does not show anything on the subplots.
def index_of(arrval, value):
if value < min(arrval):
return 0
return max(np.where(arrval <= value)[0])
# load file using loadtxt
for file in filename:
data = np.loadtxt(file,delimiter='\t', skiprows=2)
for x,y in data:
x = data[:,0]
y = data[:,1]
xs.append(x)
ys.append(y)
# Make the subplots
for i, (x, y) in enumerate(zip(xs, ys)):
ij = np.unravel_index(i, axs.shape)
axs[ij].plot(x, y,label = lsnames[i])
axs[ij].set_title(lsnames[i])
axs[ij].legend()
# Using one of the lmfit functions to get the Gaussian plot.
# But it does not show anything
gauss1 = GaussianModel(prefix='g1_')
gauss2 = GaussianModel(prefix='g2_')
pars = gauss1.guess(y, x=x)
pars.update(gauss2.make_params())
ix1 = index_of(x, 20)
ix2 = index_of(x, 40)
ix3 = index_of(x, 75)
gauss1.guess(y[ix1:ix2], x=x[ix1:ix2])
gauss2.guess(y[ix2:ix3], x=x[ix2:ix3])
mod = gauss1 + gauss2
mod = GaussianModel()
pars = mod.guess(y, x=x)
out = mod.fit(y, pars, x=x)
print(out.fit_report(min_correl=0.25))
plt.show()

Maybe I'm not fully understanding, but this seems like it could just be a looping question or even an indentation problem.
I think what you're asking to do is something like:
# loop over datasets, putting each in a subplot
for i, (x, y) in enumerate(zip(xs, ys)):
ij = np.unravel_index(i, axs.shape)
axs[ij].plot(x, y,label = lsnames[i])
axs[ij].set_title(lsnames[i])
axs[ij].legend()
# fit this dataset with 1 gaussian
mod = GaussianModel()
pars = mod.guess(y, x=x)
out = mod.fit(y, pars, x=x)
# plot best-fit
axs[ij].plot(x, out.best_fit, label='fit')
print("Data Set %d" % i)
print(out.fit_report(min_correl=0.25))
plt.show()
Your code was sort of confusingly making a model with two Gaussians and then not using it. It would be fine to use a more complicated model in the loop.
Hope that helps.

Related

How to combine multiple spectrogram subplots to produce single plot?

I am visualizing four classes of the spectrogram. For a single class, My spectrogram code looks like this
Considering this as one image.
And the code to produce this, is
def spec(filename):
time_period = 0.5 # FFT time period (in seconds). Can comfortably process time frames from 0.05 seconds - 10 seconds
# ==============================================
fs_rate, signal_original = wavfile.read(filename)
total_time = int(np.floor(len(signal_original)/fs_rate))
sample_range = np.arange(0,total_time,time_period)
total_samples = len(sample_range)
print ("Frequency sampling", fs_rate)
print ("total time: ", total_time)
print ("sample time period: ", time_period)
print ("total samples: ", total_samples)
output_array = []
for i in sample_range:
# print ("Processing: %d / %d (%d%%)" % (i/time_period + 1, total_samples, (i/time_period + 1)*100/total_samples))
sample_start = int(i*fs_rate)
sample_end = int((i+time_period)*fs_rate)
signal = signal_original[sample_start:sample_end]
l_audio = len(signal.shape)
#print ("Channels", l_audio)
if l_audio == 2:
signal = signal.sum(axis=1) / 2
N = signal.shape[0]
#print ("Complete Samplings N", N)
secs = N / float(fs_rate)
# print ("secs", secs)
Ts = 1.0/fs_rate # sampling interval in time
#print ("Timestep between samples Ts", Ts)
t = scipy.arange(0, secs, Ts) # time vector as scipy arange field / numpy.ndarray
FFT = abs(scipy.fft(signal))
FFT_side = FFT[range(int(N/2))] # one side FFT range
freqs = scipy.fftpack.fftfreq(signal.size, t[1]-t[0])
fft_freqs = np.array(freqs)
freqs_side = freqs[range(int(N/2))] # one side frequency range
fft_freqs_side = np.array(freqs_side)
# Reduce to 0-5000 Hz
bucket_size = 5
buckets = 16
FFT_side = FFT_side[0:bucket_size*buckets]
fft_freqs_side = fft_freqs_side[0:bucket_size*buckets]
# Combine frequencies into buckets
FFT_side = np.array([int(sum(FFT_side[current: current+bucket_size])) for current in range(0, len(FFT_side), bucket_size)])
fft_freqs_side = np.array([int(sum(fft_freqs_side[current: current+bucket_size])) for current in range(0, len(fft_freqs_side), bucket_size)])
# FFT_side: Normalize (0-1)
max_value = max(FFT_side)
if (max_value != 0):
FFT_side_norm = FFT_side / max_value
# Append to output array
output_array.append(FFT_side_norm)
# ============================================
# Plotting
plt.figure(figsize=(4,7))
plt.subplot(411)
plt.subplots_adjust(hspace = 0.5)
plt.plot(t, signal, "g") # plotting the signal
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.subplot(412)
diff = np.diff(fft_freqs_side)
widths = np.hstack([diff, diff[-1]])
plt.bar(fft_freqs_side, abs(FFT_side_norm), width=widths) # plotting the positive fft spectrum
plt.xticks(fft_freqs_side, fft_freqs_side, rotation='vertical')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Count single-sided')
FFT_side_norm_line = FFT_side_norm.copy()
FFT_side_norm_line.resize( (1,buckets) )
plt.subplot(413)
plt.imshow(FFT_side_norm_line)
plt.xlabel('Image Representation 1D')
plt.show()
print("\n\n\n\n\n\n")
How can I combine four images plot like this, and make a single output image. Something like this
I'd suggest using fig.subfigures and plt.subplot_mosaic.
The plot above was obtained using this simple script:
import matplotlib.pyplot as plt
fig = plt.figure(figsize = (8, 10), layout='constrained')
# next two lines make the trick
sfigs = fig.subfigures(2,2)
mosaics = [f.subplot_mosaic('t;t;t;f;f;f;i;.') for f in sfigs.flat]
# next, "how to" reference the subplots in subfigures
mosaics[0]['t'].plot(range(5), color='b')
mosaics[1]['t'].plot(range(5), color='k')
mosaics[2]['t'].plot(range(5), color='r')
mosaics[3]['t'].plot(range(5), color='g')
mosaics[0]['f'].plot(range(3), color='b')
mosaics[1]['f'].plot(range(3), color='k')
mosaics[2]['f'].plot(range(3), color='r')
mosaics[3]['f'].plot(range(3), color='g')
mosaics[0]['i'].imshow([range(10)]*2)
plt.show()
You can do it this way:
fig, axs = plt.subplots(2, 2)
axs[0, 0].plot(x, y)
axs[0, 0].set_title('Axis [0, 0]')
axs[0, 1].plot(x, y, 'tab:orange')
axs[0, 1].set_title('Axis [0, 1]')
axs[1, 0].plot(x, -y, 'tab:green')
axs[1, 0].set_title('Axis [1, 0]')
axs[1, 1].plot(x, -y, 'tab:red')
axs[1, 1].set_title('Axis [1, 1]')
for ax in axs.flat:
ax.set(xlabel='x-label', ylabel='y-label')
# Hide x labels and tick labels for top plots and y ticks for right plots.
for ax in axs.flat:
ax.label_outer()
The result will be like this:
Taken from https://matplotlib.org/stable/gallery/subplots_axes_and_figures/subplots_demo.html

mplcursors: show and highlight coordinates of nearby local extreme

I have code that shows the label for each point in a matplotlib scatterplot using mplcursors, similar to this example. I want to know how to, form a list of values, make a certain point stand out, as in if I have a graph of points y=-x^2. When I go near the peak, it shouldn't show 0.001, but 0 instead, without the trouble needing to find the exact mouse placement of the top. I can't solve for each point in the graph, as I don't have a specific function.
Supposing the points in the scatter plot are ordered, we can investigate whether an extreme in a nearby window is also an extreme in a somewhat larger window. If, so we can report that extreme with its x and y coordinates.
The code below only shows the annotation when we're close to a local maximum or minimum. It also temporarily shows a horizontal and vertical line to indicate the exact spot. The code can be a starting point for many variations.
import matplotlib.pyplot as plt
import mplcursors
import numpy as np
near_window = 10 # the width of the nearby window
far_window = 20 # the width of the far window
def show_annotation(sel):
ind = sel.target.index
near_start_index = max(0, ind - near_window)
y_near = y[near_start_index: min(N, ind + near_window)]
y_far = y[max(0, ind - far_window): min(N, ind + far_window)]
near_max = y_near.max()
far_max = y_far.max()
annotation_str = ''
if near_max == far_max:
near_argmax = y_near.argmax()
annotation_str = f'local max:\nx:{x[near_start_index + near_argmax]:.3f}\ny:{near_max:.3f}'
maxline = plt.axhline(near_max, color='crimson', ls=':')
maxline_x = plt.axvline(x[near_start_index+near_argmax], color='grey', ls=':')
sel.extras.append(maxline)
sel.extras.append(maxline_x)
else:
near_min = y_near.min()
far_min = y_far.min()
if near_min == far_min:
near_argmin = y_near.argmin()
annotation_str = f'local min:\nx:{x[near_start_index+near_argmin]:.3f}\ny:{near_min:.3f}'
minline = plt.axhline(near_min, color='limegreen', ls=':')
minline_x = plt.axvline(x[near_start_index + near_argmin], color='grey', ls=':')
sel.extras.append(minline)
sel.extras.append(minline_x)
if len(annotation_str) > 0:
sel.annotation.set_text(annotation_str)
else:
sel.annotation.set_visible(False) # hide the annotation
# sel.annotation.set_text(f'x:{sel.target[0]:.3f}\n y:{sel.target[1]:.3f}')
N = 500
x = np.linspace(0, 100, 500)
y = np.cumsum(np.random.normal(0, 0.1, N))
box = np.ones(20) / 20
y = np.convolve(y, box, mode='same')
scat = plt.scatter(x, y, s=1)
cursor = mplcursors.cursor(scat, hover=True)
cursor.connect('add', show_annotation)
plt.show()

Weighted moving average in python with different width in different regions

I was trying to take a oscillation avarage of a highly oscillating data. The oscillations are not uniform, it has less oscillations in the initial regions.
x = np.linspace(0, 1000, 1000001)
y = some oscillating data say, sin(x^2)
(The original data file is huge, so I can't upload it)
I want to take a weighted moving avarage of the function and plot it. Initially the period of the function is larger, so I want to take avarage over a large time interval. While I can do with smaller time interval latter.
I have found a possible elegant solution in following post:
Weighted moving average in python
However, I want to have different width in different regions of x. Say when x is between (0,100) I want the width=0.6, while when x is between (101, 300) width=0.2 and so on.
This is what I have tried to implement( with my limited knowledge in programing!)
def weighted_moving_average(x,y,step_size=0.05):#change the width to control average
bin_centers = np.arange(np.min(x),np.max(x)-0.5*step_size,step_size)+0.5*step_size
bin_avg = np.zeros(len(bin_centers))
#We're going to weight with a Gaussian function
def gaussian(x,amp=1,mean=0,sigma=1):
return amp*np.exp(-(x-mean)**2/(2*sigma**2))
if x.any() < 100:
for index in range(0,len(bin_centers)):
bin_center = bin_centers[index]
weights = gaussian(x,mean=bin_center,sigma=0.6)
bin_avg[index] = np.average(y,weights=weights)
else:
for index in range(0,len(bin_centers)):
bin_center = bin_centers[index]
weights = gaussian(x,mean=bin_center,sigma=0.1)
bin_avg[index] = np.average(y,weights=weights)
return (bin_centers,bin_avg)
It is needless to say that this is not working! I am getting the plot with the first value of sigma. Please help...
The following snippet should do more or less what you tried to do. You have mainly a logical problem in your code, x.any() < 100 will always be True, so you'll never execute the second part.
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0, 10, 1000)
y = np.sin(x**2)
def gaussian(x,amp=1,mean=0,sigma=1):
return amp*np.exp(-(x-mean)**2/(2*sigma**2))
def weighted_average(x,y,step_size=0.3):
weights = np.zeros_like(x)
bin_centers = np.arange(np.min(x),np.max(x)-.5*step_size,step_size)+.5*step_size
bin_avg = np.zeros_like(bin_centers)
for i, center in enumerate(bin_centers):
# Select the indices that should count to that bin
idx = ((x >= center-.5*step_size) & (x <= center+.5*step_size))
weights = gaussian(x[idx], mean=center, sigma=step_size)
bin_avg[i] = np.average(y[idx], weights=weights)
return (bin_centers,bin_avg)
idx = x <= 4
plt.plot(*weighted_average(x[idx],y[idx], step_size=0.6))
idx = x >= 3
plt.plot(*weighted_average(x[idx],y[idx], step_size=0.1))
plt.plot(x,y)
plt.legend(['0.6', '0.1', 'y'])
plt.show()
However, depending on the usage, you could also implement moving average directly:
x = np.linspace(0, 60, 1000)
y = np.sin(x**2)
z = np.zeros_like(x)
z[0] = x[0]
for i, t in enumerate(x[1:]):
a=.2
z[i+1] = a*y[i+1] + (1-a)*z[i]
plt.plot(x,y)
plt.plot(x,z)
plt.legend(['data', 'moving average'])
plt.show()
Of course you could then change a adaptively, e.g. depending of the local variance. Also note that this has apriori a small bias depending on a and the step size in x.

Updating pyplot.vlines in interactive plot

I need your help. Please consider the code below, which plots a sinusoid using pylab in IPython. A slider below the axis enables the user to adjust the frequency of the sinusoid interactively.
%pylab
# setup figure
fig, ax = subplots(1)
fig.subplots_adjust(left=0.25, bottom=0.25)
# add a slider
axcolor = 'lightgoldenrodyellow'
ax_freq = axes([0.3, 0.13, 0.5, 0.03], axisbg=axcolor)
s_freq = Slider(ax_freq, 'Frequency [Hz]', 0, 100, valinit=a0)
# plot
g = linspace(0, 1, 100)
f0 = 1
sig = sin(2*pi*f0*t)
myline, = ax.plot(sig)
# update plot
def update(value):
f = s_freq.val
new_data = sin(2*pi*f*t)
myline.set_ydata(new_data) # crucial line
fig.canvas.draw_idle()
s_freq.on_changed(update)
Instead of the above, I need to plot the signal as vertical lines, ranging from the amplitude of each point in t to the x-axis. Thus, my first idea was to use vlines instead of plot in line 15:
myline = ax.vlines(range(len(sig)), 0, sig)
This solution works in the non-interactive case. The problem is, plot returns an matplotlib.lines.Line2D object, which provides the set_ydata method to update data interactively. The object returned by vlines is of type matplotlib.collections.LineCollection and does not provide such a method.
My question: how do I update a LineCollection interactively?
Using #Aaron Voelker's comment of using set_segments and wrapping it up in a function:
def update_vlines(*, h, x, ymin=None, ymax=None):
seg_old = h.get_segments()
if ymin is None:
ymin = seg_old[0][0, 1]
if ymax is None:
ymax = seg_old[0][1, 1]
seg_new = [np.array([[xx, ymin],
[xx, ymax]]) for xx in x]
h.set_segments(seg_new)
Analog for hlines:
def update_hlines(*, h, y, xmin=None, xmax=None):
seg_old = h.get_segments()
if xmin is None:
xmin = seg_old[0][0, 0]
if xmax is None:
xmax = seg_old[0][1, 0]
seg_new = [np.array([[xmin, yy],
[xmax, yy]]) for yy in y]
h.set_segments(seg_new)
I will give examples for vlines here.
If you have multiple lines, #scleronomic solution works perfect. You also might prefer one-liner:
myline.set_segments([np.array([[x, x_min], [x, x_max]]) for x in xx])
If you need to update only maximums, then you can do this:
def update_maxs(vline):
vline[:,1] = x_min, x_max
return vline
myline.set_segments(list(map(update_maxs, x.get_segments())))
Also this example could be useful: LINK

GNUPLOT : Plot sum of distributions given by parameters in text file

Suppose I have a text file with an arbitrary number of rows where each row gives some set of parameters that define a function (say the (x,y) location and sigmas (possibility unequal) of a 2D Gaussian). For example, in that case, the text file might contain:
100 112 3 4
97 38 8 9
88 79 3 9
...
...
102 152 9 5
I would like to plot (using pm3d) the SUM of all the distributions defined by the text file. How can that be done?
I would like to plot (using pm3d) the SUM of all the distributions defined by the text file. How can that be done?
I think that cannot be done in native gnuplot, at least not in any sane way that I know. That kind of number crunching is not really what gnuplot is designed to do.
Python, however, makes it pretty doable...
#!/usr/bin/env python2
import math
import numpy
import os
class Gaussian(object):
'''A 2D gaussian function (normalized), defined by
mx: x mean position
my: y mean position
sx: sigma in x
sy: sigma in y'''
def __init__(self, mx, my, sx, sy):
self.mx = float(mx)
self.my = float(my)
self.sx = float(sx)
self.sy = float(sy)
def value(self,x,y):
'''Evaluates the value of a Gaussian at a certain point (x,y)'''
prefactor = (1.0/(self.sx*self.sy*2*math.pi))
ypart = math.exp(-(x - self.mx)**2/(2*self.sx**2))
xpart = math.exp(-(y - self.my)**2/(2*self.sy**2))
return prefactor*ypart*xpart
def getLimits(self, devs):
'''Finds the upper and lower x and y limits for the distribution,
defined as points a certain number of deviations from the mean.'''
xmin = self.mx - devs*self.sx
xmax = self.mx + devs*self.sx
ymin = self.my - devs*self.sy
ymax = self.my + devs*self.sy
return (xmin, xmax, ymin, ymax)
def readGaussians(filename):
'''reads in gaussian parameters from an input file in the format
mx my sx sy
This makes some assumptions about how perfect the input file will be'''
gaussians = []
with open(filename, 'r') as f:
for line in f.readlines():
(mx, my, sx, sy) = line.split()
gaussians.append(Gaussian(mx, my, sx, sy))
return gaussians
def getPlotLimits(gaussians, devs):
'''finds the x and y limits of the field of gaussian distributions.
Sets the boundary a set number of deviations from the mean'''
# get the limits from the first gaussian and work from there
(xminlim, xmaxlim, yminlim, ymaxlim) = gaussians[0].getLimits(devs)
for i in range(1, len(gaussians)):
(xmin, xmax, ymin, ymax) = gaussians[i].getLimits(devs)
if (xmin < xminlim):
xminlim = xmin
if (xmax > xmaxlim):
xmaxlim = xmax
if (ymin < yminlim):
yminlim = ymin
if (ymax > ymaxlim):
ymaxlim = ymax
return (xminlim, xmaxlim, yminlim, ymaxlim)
def makeDataFile(gaussians, limits, res, outputFile):
'''makes a data file of x,y coordinates to be plotted'''
xres = res[0]
yres = res[1]
# make arrays of x and y values
x = numpy.linspace(limits[0], limits[1], xres)
y = numpy.linspace(limits[2], limits[3], yres)
# initialize grid of z values
z = numpy.zeros((xres, yres))
# compute z value at each x, y point
for i in range(len(x)):
for j in range(len(y)):
for gaussian in gaussians:
z[i][j] += gaussian.value(x[i], y[j])
# write out the matrix
with open(outputFile, 'w') as f:
for i in range(len(x)):
for j in range(len(y)):
f.write('%f %f %f\n' % (x[i], y[j], z[i][j]))
f.write('\n')
def makePlotFile(limits, outputFile):
'''makes a plot file for gnuplot'''
with open('plot.plt', 'w') as f:
f.write("#!/usr/bin/env gnuplot\n")
f.write("set terminal png font 'Courier'\n")
f.write("set output 'gaussians.png'\n")
f.write("set xr [%f:%f]\n" % (limits[0], limits[1]))
f.write("set yr [%f:%f]\n" % (limits[2], limits[3]))
f.write("set pm3d map\n")
f.write("plot '%s' with image\n" % outputFile)
# make plot file executable
os.system('chmod 755 plot.plt')
# plot
os.system('./plot.plt')
# name of input file
inputFile = 'data.dat'
# name of output (processed data)
outputFile = 'gaussians.dat'
# number of x and y points in plot
res = (100, 100)
# deviations from the mean by which to define Gaussian limits
devs = 3
# read in the gaussians from the data file
print 'reading in data...'
gaussians = readGaussians(inputFile)
# find the plot limits
limits = getPlotLimits(gaussians, devs)
# make the gaussian data file
print 'computing data for plotting...'
makeDataFile(gaussians, limits, res, outputFile)
# make the plot file
print 'plotting...'
makePlotFile(limits, outputFile)
This script produces the following output when run on your example data.

Resources