Scaling a PDF to show 100% at peak - python-3.x

I'm displaying a histogram of my data, with an overlaid PDF. My plots all look something like this:
and I'm trying to scale the red curve to show 100% at the peak.
My following toy code is identical to what I'm actually using, apart from the lines in between the two %:
%
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
import numpy as np
my_randoms = np.random.normal(0.5, 1, 50000)
dictOne = {"delta z":my_randoms}
df = pd.DataFrame(dictOne)
df = df[df['delta z'] > -999]
%
fig, ax = plt.subplots()
h, edges, _ = ax.hist(df['delta z'], alpha = 1, density = False, bins = 100)
param = stats.norm.fit(df['delta z'].dropna()) # Fit a normal distribution to the data
pdf_fitted = stats.norm.pdf(df['delta z'], *param)
x = np.linspace(*df['delta z'].agg([min, max]), 100) # x-values
binwidth = np.diff(edges).mean()
ax.plot(x, stats.norm.pdf(x, *param)*h.sum()*binwidth, color = 'r')
# Decorations
graph_title = 'U-B'
plt.grid(which = 'both')
plt.title(r'$\Delta z$ distribution for %s'%graph_title, fontsize = 25)
plt.xlabel(r'$\Delta z = z_{spec} - z_{photo}$', fontsize = 25)
plt.ylabel('Number', fontsize = 25)
plt.xticks(fontsize = 25)
plt.yticks(fontsize = 25)
xmin, xmax = min(df['delta z']), max(df['delta z'])
plt.xlim(xmin, xmax)
plt.annotate(
r'''$\mu_{\Delta z}$ = %.3f
$\sigma_{\Delta z}$ = %.3f'''%(param[0], param[1]),
fontsize = 25, color = 'r', xy=(0.85, 0.85), xycoords='axes fraction')
How would I define another axes object from 0 to 100 on the right-hand side and map the PDF to that?
Or is there a better way to do it?
This is kind of a follow-up to my previous question.

You can use density=True in plotting the histogram.

You use .twinx():
fig = plt.figure(figsize=(10, 8), dpi=72.0)
n_rows = 2
n_cols = 2
ax1 = fig.add_subplot(n_rows, n_cols, 1)
ax2 = fig.add_subplot(n_rows, n_cols, 2)
ax3 = ax1.twinx()
https://matplotlib.org/gallery/api/two_scales.html

Related

How to plot a contour within a bounded area?

I would like help to plot a counterf only within the region bounded by the blue area in the image, not in the whole graph. I am trying to use matplotlib mpath and mpatches but I am getting several errors.
How could I plot the difference in levels (contorf) only within the area delimited by the blue markings?
Here is my code and the picture
#import base modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.interpolate import LinearNDInterpolator
from matplotlib.collections import PatchCollection
import matplotlib.path as mpath
import matplotlib.patches as mpatches
from mpl_toolkits.axes_grid1 import make_axes_locatable
#Loading the images
img1953 = plt.imread('O1953_9960_A3_1m.tif')
img1999 = plt.imread('O1999_9523_A3_1m.tif')
img2004 = plt.imread('O2004_2385c_A3_1m.tif')
#extract data from surfer files with pandas and name the columns as x and y, first line omited
area = pd.read_csv('diff_analyse_grd.bln', names=['x1', 'y1'], skiprows=1)
slope = pd.read_csv("Abbruchkante.bln", names=['x2', 'y2'], skiprows=1)
crack = pd.read_csv("Anrisskante.bln", names=['x3', 'y3'], skiprows=1)
xmin, xmax = -1100, -200 #set the maximum and miniumum values for the axIs:
ymin, ymax = 1500, 2100
#Read csv from grid/countour file, 1999
grid = np.loadtxt('kr_99_A3_o25.dat', delimiter = ' ', skiprows = 1, usecols = [1,2,3])
x99 = grid[:,0]
y99 = grid[:,1]
z99 = grid[:,2]
#linear space with 600x900 values between min and max graphic values
xi = np.linspace(xmin, xmax, 900)
yi = np.linspace(ymin, ymax, 600)
X, Y = np.meshgrid(xi, yi)
#Using LinearNDInterpolation, year 1999
interpolation99 = LinearNDInterpolator(list(zip(x99, y99)), z99)#https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.LinearNDInterpolator.html
zi99 = interpolation99(X, Y)
#Read csv from grid/countour file, year 1953
grid = np.loadtxt('kr_53_A3_o25.dat', delimiter = ' ', skiprows = 1, usecols = [1,2,3])
x53 = grid[:,0]
y53 = grid[:,1]
z53 = grid[:,2]
#Using LinearNDInterpolation, year 1953
interpolation53 = LinearNDInterpolator(list(zip(x53,y53)), z53)
zi53 = interpolation53(X,Y)
#Calculation of the hight variation:
dh5399 = zi53 - zi99
#Plot graph 1953 - 1999
plt.title('Hangrutschung im Blaubachgraben \n \n Differenz der Geländeoberflachen \n 1953 - 1999')
plt.imshow(img1953,cmap='gray', extent=(xmin, xmax, ymin, ymax))
plt.plot(area.x1,area.y1,'b-', label ='Studienbereich', linewidth = 2)
plt.plot(slope.x2,slope.y2,'r-', label = 'Abbruchkante')
plt.plot(crack.x3,crack.y3,'r--', label = 'Anrisskante')
cs = plt.contour(X,Y,zi99,levels=10, colors='#333')
plt.clabel(cs,inline=True, colors = 'k')
plt.legend(loc='upper right')
plt.xlabel('North [m]')
plt.ylabel('East [m]')
plt.xlim(xmin,xmax)
plt.ylim(ymin,ymax)
cs = plt.contourf(X, Y, dh5399, cmap='rainbow', vmin=-5, vmax=17)
#Setting colorbar right the axis
ax = plt.gca()
divider = make_axes_locatable(ax)
cax = divider.append_axes('right',size='3%',pad=0.1)
cbar = plt.colorbar(cs, cax=cax)
cbar.set_label('\u0394H [m]', rotation = 360, loc = 'top')
plt.tight_layout()
plt.savefig('Dif1.png',dpi=400)
plt.show()

Annotate Percentage of Group within a Seaborn CountPlot

The below code gets the percentage of all collisions. However, I want to get the percentage within a group. E.G. Mid-Block (not related to intersection) has 2 labels, a 1(red) or a 2(green/blue). Currently, the percentages next to those bars are percentages of the whole (bar count / all collisions), but I need to display the percentage within just one y-axis label. E.G. for Mid-block (not related to intersection), bar count / all collisions within mid-block (not related to intersection). I do not know how to do this, so if someone could point me in the right direction or give me some code that I could study to understand, I'd be very grateful.
Thank you so much for your time.
plt.style.use('ggplot')
plt.figure(figsize = (20, 15))
ax = sb.countplot(y = "JUNCTIONTYPE", hue = "SEVERITYCODE", data = dfm)
plt.title('Number of Persons vs. Number of Collisions by Severity', fontsize = 30)
plt.xlabel('Number of Collisions', fontsize = 24)
plt.ylabel('Number of Persons', fontsize = 24)
plt.tick_params(labelsize=18);
plt.legend(fontsize = 18, title = "Severity", loc = 'lower right')
plt.text(5, 6, "Figure 8: Number of persons plotted against the number of collisions grouped by severity", fontsize = 16)
# labels = [item.get_text() for item in ax.get_yticklabels()]
# labels[0] = 'No'
# labels[1] = 'Yes'
# ax.set_yticklabels(labels)
for p in ax.patches:
width = p.get_width()
height = p.get_height()
x, y = p.get_xy()
ax.annotate(int(width),
((x + width), y),
xytext = (30, -25),
fontsize = 18,
color = '#000000',
textcoords = 'offset points',
ha = 'right',
va = 'center')
for p in ax.patches:
width = p.get_width()
height = p.get_height()
x, y = p.get_xy()
totals = []
for i in ax.patches:
totals.append(i.get_width())
total = sum(totals)
ax.text(width + 0.3, y + 0.38,
str(
round((width/total) * 100, 2))
+ '%',
fontsize=18)
You could pre-calculate the per-group percentage points and use the order in which seaborn / matplotlib draws the bars to reference them.
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
titanic = sns.load_dataset('titanic')
# prepare the dataset
df = (titanic
.groupby(["embark_town", "survived"])
.size()
.reset_index()
.replace({"survived": {0:"no", 1:"yes"}})
.rename(columns={0:"count"}))
# calculate survival % per town of embarkation
df["percent"] = (df
.groupby("embark_town")
.apply(lambda x: x["count"] / x["count"].sum()).values)
# sort the dataframe to match the drawing order
df.sort_values(by=["survived", "embark_town"], inplace=True)
# visualisation
plt.style.use('ggplot')
fig = sns.catplot(
x="count", y="embark_town", hue="survived",
kind="bar", data=df, height=4, aspect=2)
for i, bar in enumerate(fig.ax.patches):
height = bar.get_height()
fig.ax.annotate(
# reference the pre-calculated row in the dataframe
f"{df.iloc[i, 3] :.0%}",
xycoords="data",
xytext=(20, -15),
textcoords="offset points",
xy=(bar.get_width(), bar.get_y()),
ha='center', va='center')
# make space for annonations
plt.margins(x=0.2)
plt.show()

How to adjust color in KDE scatter plot?

I wrote a program to plot oscilloscope data and make a KDE scatter plot with a colorbar. Unfortunately it requires a third party lib (readTrc) as well as the oscilloscope binary file which size is 200MB. The lib can be found on github.
import pandas as pd
import readTrc
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import collections
from scipy.stats import gaussian_kde
trcpath = 'filename.trc' #Binary Oscilloscope File (200 MB)
datX, datY, m = readTrc.readTrc(trcpath)
srx, sry = pd.Series(datX * 1000), pd.Series(datY * 1000)
df = pd.concat([srx, sry], axis = 1)
df.set_index(0, inplace = True)
df = df.abs() #Build Dataframe from above file
fig = plt.figure()
#Eliminate Noise
df[df < 3] = None
df = df.dropna()
#x and y axes data to plot
q1 = np.array(df[1].tolist()[:-2])
q2 = np.array(df[1].tolist()[1:-1])
q3 = np.array(df[1].tolist()[2:])
dq1 = q2 - q1
dq2 = q3 - q2
#Create first Dataset
qqstack = []
xy = np.vstack([dq1,dq2])
#Determine max value for colorbar (highest repeating x/y combination)
df_d = pd.DataFrame([dq1,dq2]).T
for idx, row in df_d.iterrows():
if row[0] == row[1]:
qqstack.append((row[0], row[1]))
cbar_max = collections.Counter(qqstack).most_common(1)[0][-1]
#sort to show most present values last
z = gaussian_kde(xy)(xy)
idx = z.argsort()
x, y, z = dq1[idx], dq2[idx], z[idx]
#plot graph
plt.scatter(x, y,
c=z,
s=20,
cmap = plt.cm.get_cmap('jet'))
#create colormap variable
sm = plt.cm.ScalarMappable(cmap = plt.cm.get_cmap('jet'),
norm = matplotlib.colors.PowerNorm(vmin = -0.1, vmax = cbar_max, gamma = 1))
sm._A = []
fig.colorbar(sm, ticks = range(0, cbar_max, 250))
plt.grid(zorder = 0, alpha = 0.3)
plt.xlabel('dq1 / mV')
plt.ylabel('dq2 / mV')
plt.show()
How can I adjust the color allocation in the plot? I want there to be less blue space so the transition is visible more, like on this graph:

Matplotlib Line Rotation or Animation

I have created a polar plot and would like to mimic a doppler. This includes a 360 degree sweep around the circle (polar plot). Once the sweep gets to 360 degrees, it needs to go back to zero and continue the sweep.
How do I animate or rotate this line to constantly sweep around this circle? I only want one line to constantly sweep around this plot.
I have looked at several different examples, however, none that create this rotation.
import numpy as np
import math
import matplotlib.pyplot as plt
import pylab
import time
r = 90 * (math.pi/180)
t = 50000
az = 90
el = 5
fig = pylab.figure(figsize = [5.0, 5.0])
ax = fig.gca(projection = 'polar')
fig.canvas.set_window_title('Doppler')
ax.plot(r, t, color ='b', marker = 'o', markersize = '3')
ax.set_theta_zero_location('N')
ax.set_theta_direction(-1)
currTime = time.time()
prevTime = currTime - 1
deltaTime = currTime - prevTime
outer_border_width = 1
screen_width = 500
screen_height = 500
midpoint = [int(screen_width/2), int(screen_height/2)]
radius = (midpoint[0])
sweep_length = radius - outer_border_width
angle = 50
sweep_interval = 10
sweep_speed = sweep_interval
x = sweep_length * math.sin(angle) + int(screen_width/2)
y = sweep_length * math.cos(angle) + int(screen_height/2)
az = az + ((360.0 / sweep_interval ) * deltaTime)
line1 = (midpoint, [50000, 50000])
#line2 = (midpoint, [20000, 20000])
ax.plot(line1, color = 'b', linewidth = 1)
#Increase the angle by 0.05 radians
angle = angle - sweep_speed
#Reset the angle to 0
if angle > 2 * math.pi:
angle = angle - 2 * math.pi
#ax.plot(line2, color = 'r', linewidth = 1)
#ax.lines.pop(0)
plt.show()
Below is a picture of what it currently looks like for reference:
Many thanks!
I do not understand much of your code, but in order to produce an animation you can use matplotlib.animation.FuncAnimation. Here, you'd give an array of angles to an updating function, which sets the data of the line for each frame.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation
r = 90 * (np.pi/180)
t = 50000
fig = plt.figure()
ax = fig.gca(projection = 'polar')
fig.canvas.set_window_title('Doppler')
ax.plot(r, t, color ='b', marker = 'o', markersize = '3')
ax.set_theta_zero_location('N')
ax.set_theta_direction(-1)
ax.set_ylim(0,1.02*t)
line1, = ax.plot([0, 0],[0,t], color = 'b', linewidth = 1)
def update(angle):
line1.set_data([angle, angle],[0,t])
return line1,
frames = np.linspace(0,2*np.pi,120)
fig.canvas.draw()
ani = matplotlib.animation.FuncAnimation(fig, update, frames=frames, blit=True, interval=10)
plt.show()

Recreating price distribution chart

I'm trying to recreate following chart:
Currently I have no idea what should I do now, wondering if there is a possibility to merge these charts or plot everything in one chart code,
import numpy as np
import math
import matplotlib.pyplot as plt
from scipy.stats import norm
fig = plt.figure()
ax1 = plt.subplot2grid((6, 2), (0, 0), rowspan=6, colspan=1)
ax2 = plt.subplot2grid((6, 2), (0, 1), rowspan=6, colspan=1)
ax2.axes.get_xaxis().set_visible(False)
ax2.axes.get_yaxis().set_visible(False)
S = 5
T = 100
mu = 0
vol = 0.3
for i in range(10):
daily_returns = np.random.normal(mu / T, vol / math.sqrt(T), T) + 1
price_list = [S]
for x in daily_returns:
price_list.append(price_list[-1] * x)
ax1.plot(price_list)
ax1.set_ylim(0,10)
srange = np.arange(-4, 4, 0.01)
mean = 0
standard_deviation = 1
var = norm.pdf(srange, mean, standard_deviation)
ax2.plot(var,srange, color="grey")
plt.show()

Resources