How to adjust color in KDE scatter plot? - python-3.x

I wrote a program to plot oscilloscope data and make a KDE scatter plot with a colorbar. Unfortunately it requires a third party lib (readTrc) as well as the oscilloscope binary file which size is 200MB. The lib can be found on github.
import pandas as pd
import readTrc
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import collections
from scipy.stats import gaussian_kde
trcpath = 'filename.trc' #Binary Oscilloscope File (200 MB)
datX, datY, m = readTrc.readTrc(trcpath)
srx, sry = pd.Series(datX * 1000), pd.Series(datY * 1000)
df = pd.concat([srx, sry], axis = 1)
df.set_index(0, inplace = True)
df = df.abs() #Build Dataframe from above file
fig = plt.figure()
#Eliminate Noise
df[df < 3] = None
df = df.dropna()
#x and y axes data to plot
q1 = np.array(df[1].tolist()[:-2])
q2 = np.array(df[1].tolist()[1:-1])
q3 = np.array(df[1].tolist()[2:])
dq1 = q2 - q1
dq2 = q3 - q2
#Create first Dataset
qqstack = []
xy = np.vstack([dq1,dq2])
#Determine max value for colorbar (highest repeating x/y combination)
df_d = pd.DataFrame([dq1,dq2]).T
for idx, row in df_d.iterrows():
if row[0] == row[1]:
qqstack.append((row[0], row[1]))
cbar_max = collections.Counter(qqstack).most_common(1)[0][-1]
#sort to show most present values last
z = gaussian_kde(xy)(xy)
idx = z.argsort()
x, y, z = dq1[idx], dq2[idx], z[idx]
#plot graph
plt.scatter(x, y,
c=z,
s=20,
cmap = plt.cm.get_cmap('jet'))
#create colormap variable
sm = plt.cm.ScalarMappable(cmap = plt.cm.get_cmap('jet'),
norm = matplotlib.colors.PowerNorm(vmin = -0.1, vmax = cbar_max, gamma = 1))
sm._A = []
fig.colorbar(sm, ticks = range(0, cbar_max, 250))
plt.grid(zorder = 0, alpha = 0.3)
plt.xlabel('dq1 / mV')
plt.ylabel('dq2 / mV')
plt.show()
How can I adjust the color allocation in the plot? I want there to be less blue space so the transition is visible more, like on this graph:

Related

Python plotting put and call options

I am trying to plot the put/call option using python but I am having some errors when obtaining my values and plot looks weird. I think there is something wrong with my loop of the matrices. My put and call prices should be 0.37 & 1.03. But I just get a printed out matrix. Some help would be appreciated.
import matplotlib.pyplot as plt
import numpy as np
S = 8.5
K = 8
r = 0.02
sigma = 0.2
T = 1
h = 0.0005
N = int(T/h)
stock_price = np.zeros((N+1,N+1))
option_price_call = np.zeros((N+1,N+1))
option_price_put = np.zeros((N+1,N+1))
stock_price[0,0] = S
for j in range(1, N+1):
stock_price[0,j]= stock_price[0,j-1] *np.exp(sigma*np.sqrt(h)*np.random.normal())
for j in range(0, N+1):
option_price_call[N,j] = max(stock_price[N,j]-K,0)
option_price_put[N,j] = max(K-stock_price[N,j],0)
for i in range(N-1, -1, -1):
for j in range(0, i+1):
stock_price[i,j] = stock_price[i+1,j]*np.exp(-r*h)
option_price_call[i,j] = (option_price_call[i+1,j+1]+option_price_call[i+1,j])/2
option_price_put[i,j] = (option_price_put[i+1,j+1]+option_price_put[i+1,j])/2
print(option_price_call)
print(option_price_put)
plt.figure(1)
plt.plot(stock_price[0,:],option_price_call[0,:], 'r', label = "Call option")
plt.plot(stock_price[0,:],option_price_put[0,:], 'b', label = "Put option")
plt.xlabel("Stock")
plt.ylabel("Price")
plt.legend()
plt.show()
Here is an example.
import numpy as np
import matplotlib.pyplot as plt
import seaborn
# Fortis stock price
spot_price = 138.90
# Long put
strike_price_long_put = 135
premium_long_put = 4
# Long call
strike_price_long_call = 145
premium_long_call = 3.50
# Stock price range at expiration of the put
sT = np.arange(0.7*spot_price,1.3*spot_price,1)
def call_payoff(sT, strike_price, premium):
return np.where(sT > strike_price, sT - strike_price, 0) - premium
payoff_long_call = call_payoff(sT, strike_price_long_call, premium_long_call)
# Plot
fig, ax = plt.subplots()
ax.spines['bottom'].set_position('zero')
ax.plot(sT,payoff_long_call,label='Long Call',color='r')
plt.xlabel('Stock Price')
plt.ylabel('Profit and loss')
plt.legend()
plt.show()

How to plot a contour within a bounded area?

I would like help to plot a counterf only within the region bounded by the blue area in the image, not in the whole graph. I am trying to use matplotlib mpath and mpatches but I am getting several errors.
How could I plot the difference in levels (contorf) only within the area delimited by the blue markings?
Here is my code and the picture
#import base modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.interpolate import LinearNDInterpolator
from matplotlib.collections import PatchCollection
import matplotlib.path as mpath
import matplotlib.patches as mpatches
from mpl_toolkits.axes_grid1 import make_axes_locatable
#Loading the images
img1953 = plt.imread('O1953_9960_A3_1m.tif')
img1999 = plt.imread('O1999_9523_A3_1m.tif')
img2004 = plt.imread('O2004_2385c_A3_1m.tif')
#extract data from surfer files with pandas and name the columns as x and y, first line omited
area = pd.read_csv('diff_analyse_grd.bln', names=['x1', 'y1'], skiprows=1)
slope = pd.read_csv("Abbruchkante.bln", names=['x2', 'y2'], skiprows=1)
crack = pd.read_csv("Anrisskante.bln", names=['x3', 'y3'], skiprows=1)
xmin, xmax = -1100, -200 #set the maximum and miniumum values for the axIs:
ymin, ymax = 1500, 2100
#Read csv from grid/countour file, 1999
grid = np.loadtxt('kr_99_A3_o25.dat', delimiter = ' ', skiprows = 1, usecols = [1,2,3])
x99 = grid[:,0]
y99 = grid[:,1]
z99 = grid[:,2]
#linear space with 600x900 values between min and max graphic values
xi = np.linspace(xmin, xmax, 900)
yi = np.linspace(ymin, ymax, 600)
X, Y = np.meshgrid(xi, yi)
#Using LinearNDInterpolation, year 1999
interpolation99 = LinearNDInterpolator(list(zip(x99, y99)), z99)#https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.LinearNDInterpolator.html
zi99 = interpolation99(X, Y)
#Read csv from grid/countour file, year 1953
grid = np.loadtxt('kr_53_A3_o25.dat', delimiter = ' ', skiprows = 1, usecols = [1,2,3])
x53 = grid[:,0]
y53 = grid[:,1]
z53 = grid[:,2]
#Using LinearNDInterpolation, year 1953
interpolation53 = LinearNDInterpolator(list(zip(x53,y53)), z53)
zi53 = interpolation53(X,Y)
#Calculation of the hight variation:
dh5399 = zi53 - zi99
#Plot graph 1953 - 1999
plt.title('Hangrutschung im Blaubachgraben \n \n Differenz der Geländeoberflachen \n 1953 - 1999')
plt.imshow(img1953,cmap='gray', extent=(xmin, xmax, ymin, ymax))
plt.plot(area.x1,area.y1,'b-', label ='Studienbereich', linewidth = 2)
plt.plot(slope.x2,slope.y2,'r-', label = 'Abbruchkante')
plt.plot(crack.x3,crack.y3,'r--', label = 'Anrisskante')
cs = plt.contour(X,Y,zi99,levels=10, colors='#333')
plt.clabel(cs,inline=True, colors = 'k')
plt.legend(loc='upper right')
plt.xlabel('North [m]')
plt.ylabel('East [m]')
plt.xlim(xmin,xmax)
plt.ylim(ymin,ymax)
cs = plt.contourf(X, Y, dh5399, cmap='rainbow', vmin=-5, vmax=17)
#Setting colorbar right the axis
ax = plt.gca()
divider = make_axes_locatable(ax)
cax = divider.append_axes('right',size='3%',pad=0.1)
cbar = plt.colorbar(cs, cax=cax)
cbar.set_label('\u0394H [m]', rotation = 360, loc = 'top')
plt.tight_layout()
plt.savefig('Dif1.png',dpi=400)
plt.show()

Scatter plot colorbar based on datapoint cluster

I am trying to achieve a plot similar to this one:
The color shows the clustering of the datapoints.
My code so far:
import pandas as pd
import readTrc
import matplotlib.pyplot as plt
import numpy as np
import os
import gc
trcpath = 'filename.trc'
datX, datY, m = readTrc.readTrc(trcpath)
srx, sry = pd.Series(datX * 1000), pd.Series(datY * 1000)
df_plot = pd.concat([srx, sry], axis = 1)
df_plot.set_index(0, inplace = True)
fig, ax = plt.subplots()
#Eliminate Noise
df_plot[df_plot < 3] = 0
df = df_plot[df_plot > 3]
df[df < 3] = None
df = df.dropna()
#Plot Parameters
p = np.array(df[1].tolist()[:-1])
p_nach = np.array(df[1].tolist()[1:])
d_t = np.array(pd.Series(df.index).diff().tolist()[1:])
#Graph Limit
graphlim = 101
#Plot
plt.scatter(p, p_nach,
edgecolors = 'none',
c = p,
s = 20,
cmap=plt.cm.get_cmap('jet'))
plt.xlim(0,graphlim)
plt.ylim(0,graphlim)
plt.xticks(range(0,graphlim,int(graphlim/10)))
plt.yticks(range(0,graphlim,int(graphlim/10)))
plt.colorbar()
plt.grid(zorder = 0, alpha = 0.3)
ax.set_xlabel('p / mV')
ax.set_ylabel('p_nach / mV')
##plt.savefig(dpi = 300)
plt.show()
##plt.close()
##fig.clear()
##gc.collect()
print('Progress... done!')
As you can see, the colorbar does not represent the clustering and instead the place on the x-axis. How do I configure my colorbar to represent the amount of datapoints in an area?
Folder with files: Link
import pandas as pd
import readTrc
import matplotlib.pyplot as plt
import numpy as np
import os
import gc
trcpath = 'filename.trc'
datX, datY, m = readTrc.readTrc(trcpath)
df = pd.DataFrame({'time': datX * 1000, 'volts': datY * 1000})
reduce_noise_df = df[df.volts >= 3.0]
d_t = reduce_noise_df.time.diff()[1:]
p = reduce_noise_df.volts[:-1]
p_nach = reduce_noise_df.volts[1:]
#Graph Limit
graphlim = 41
#Plot
fig, ax = plt.subplots(figsize=(6,6))
plt.scatter(p, p_nach,
edgecolors = 'none',
c = d_t,
s = 20,
cmap=plt.cm.get_cmap('jet'))
plt.xlim(0, graphlim)
plt.ylim(0, graphlim)
plt.xticks(range(0, graphlim, int(graphlim/10)))
plt.yticks(range(0, graphlim, int(graphlim/10)))
plt.colorbar()
plt.grid(zorder = 0, alpha = 0.3)
ax.set_xlabel('p / mV')
ax.set_ylabel('p_nach / mV')
plt.show()
I began be removing unnecessary code
The main issue was c = p instead of c = d_t.
Plot of waveform from your Le Croy WR640Zi colored by data density
import pandas as pd
import readTrc
import matplotlib.pyplot as plt
import numpy as np
import os
import gc
from scipy.stats import gaussian_kde
trcpath = 'filename.trc'
datX, datY, m = readTrc.readTrc(trcpath)
df = pd.DataFrame({'time': datX * 1000, 'volts': datY * 1000})
reduce_noise_df = df[df.volts >= 3.0]
y = np.array(reduce_noise_df.volts.tolist())
x = np.array(reduce_noise_df.time.tolist())
# Calculate point density
xy = np.vstack([x, y])
z = gaussian_kde(xy)(xy)
# Sort points by density
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
#Plot
fig, ax = plt.subplots(figsize=(6,6))
plt.scatter(x, y,
edgecolors = 'none',
c = z,
s = 20,
cmap=plt.cm.get_cmap('jet'))
plt.colorbar()
plt.grid(zorder = 0, alpha = 0.3)
ax.set_xlabel('Time (ms)')
ax.set_ylabel('Voltage (mV)')
plt.show()

Scaling a PDF to show 100% at peak

I'm displaying a histogram of my data, with an overlaid PDF. My plots all look something like this:
and I'm trying to scale the red curve to show 100% at the peak.
My following toy code is identical to what I'm actually using, apart from the lines in between the two %:
%
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
import numpy as np
my_randoms = np.random.normal(0.5, 1, 50000)
dictOne = {"delta z":my_randoms}
df = pd.DataFrame(dictOne)
df = df[df['delta z'] > -999]
%
fig, ax = plt.subplots()
h, edges, _ = ax.hist(df['delta z'], alpha = 1, density = False, bins = 100)
param = stats.norm.fit(df['delta z'].dropna()) # Fit a normal distribution to the data
pdf_fitted = stats.norm.pdf(df['delta z'], *param)
x = np.linspace(*df['delta z'].agg([min, max]), 100) # x-values
binwidth = np.diff(edges).mean()
ax.plot(x, stats.norm.pdf(x, *param)*h.sum()*binwidth, color = 'r')
# Decorations
graph_title = 'U-B'
plt.grid(which = 'both')
plt.title(r'$\Delta z$ distribution for %s'%graph_title, fontsize = 25)
plt.xlabel(r'$\Delta z = z_{spec} - z_{photo}$', fontsize = 25)
plt.ylabel('Number', fontsize = 25)
plt.xticks(fontsize = 25)
plt.yticks(fontsize = 25)
xmin, xmax = min(df['delta z']), max(df['delta z'])
plt.xlim(xmin, xmax)
plt.annotate(
r'''$\mu_{\Delta z}$ = %.3f
$\sigma_{\Delta z}$ = %.3f'''%(param[0], param[1]),
fontsize = 25, color = 'r', xy=(0.85, 0.85), xycoords='axes fraction')
How would I define another axes object from 0 to 100 on the right-hand side and map the PDF to that?
Or is there a better way to do it?
This is kind of a follow-up to my previous question.
You can use density=True in plotting the histogram.
You use .twinx():
fig = plt.figure(figsize=(10, 8), dpi=72.0)
n_rows = 2
n_cols = 2
ax1 = fig.add_subplot(n_rows, n_cols, 1)
ax2 = fig.add_subplot(n_rows, n_cols, 2)
ax3 = ax1.twinx()
https://matplotlib.org/gallery/api/two_scales.html

Subset data points outside confidence interval

Using the same example as from this previous question (code pasted below), we can get the 95% CI with the summary_table function from statsmodels outliers_influence. But now, how would it be possible to only subset the data points (x and y) that are outside the confidence interval?
import numpy as np
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import summary_table
#measurements genre
n = 100
x = np.linspace(0, 10, n)
e = np.random.normal(size=n)
y = 1 + 0.5*x + 2*e
X = sm.add_constant(x)
re = sm.OLS(y, X).fit()
st, data, ss2 = summary_table(re, alpha=0.05)
predict_ci_low, predict_ci_upp = data[:, 6:8].T
It might be a bit late for this, but you could put it in a pandas.DataFrame and filter depending on a list of booleans. Assuming I got your question:
import numpy as np
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import summary_table
import matplotlib.pyplot as plot
## Import pandas
import pandas as pd
#measurements genre
n = 100
x = np.linspace(0, 10, n)
e = np.random.normal(size=n)
y = 1 + 0.5*x + 2*e
X = sm.add_constant(x)
re = sm.OLS(y, X).fit()
st, data, ss2 = summary_table(re, alpha=0.05)
# Make prediction
prediction = re.predict(X)
predict_ci_low, predict_ci_upp = data[:, 6:8].T
# Put y and x in a pd.DataFrame
df = pd.DataFrame(y).set_index(x)
# Get the y values that are out of the ci intervals. This could be done directly in the df indexer
out_up = y > predict_ci_upp
out_down = y < predict_ci_low
# Plot everything
plot.plot(x, y, label = 'train')
plot.plot(df[out_up], marker = 'o', linewidth = 0)
plot.plot(df[out_down], marker = 'o', linewidth = 0)
plot.plot(x, predictionTrain, label = 'prediction')
plot.plot(x, predict_ci_upp, label = 'ci_up')
plot.plot(x, predict_ci_low, label = 'ci_low')
plot.legend(loc='best')
Here is the resulting plot:

Resources