Scatter plot colorbar based on datapoint cluster - python-3.x

I am trying to achieve a plot similar to this one:
The color shows the clustering of the datapoints.
My code so far:
import pandas as pd
import readTrc
import matplotlib.pyplot as plt
import numpy as np
import os
import gc
trcpath = 'filename.trc'
datX, datY, m = readTrc.readTrc(trcpath)
srx, sry = pd.Series(datX * 1000), pd.Series(datY * 1000)
df_plot = pd.concat([srx, sry], axis = 1)
df_plot.set_index(0, inplace = True)
fig, ax = plt.subplots()
#Eliminate Noise
df_plot[df_plot < 3] = 0
df = df_plot[df_plot > 3]
df[df < 3] = None
df = df.dropna()
#Plot Parameters
p = np.array(df[1].tolist()[:-1])
p_nach = np.array(df[1].tolist()[1:])
d_t = np.array(pd.Series(df.index).diff().tolist()[1:])
#Graph Limit
graphlim = 101
#Plot
plt.scatter(p, p_nach,
edgecolors = 'none',
c = p,
s = 20,
cmap=plt.cm.get_cmap('jet'))
plt.xlim(0,graphlim)
plt.ylim(0,graphlim)
plt.xticks(range(0,graphlim,int(graphlim/10)))
plt.yticks(range(0,graphlim,int(graphlim/10)))
plt.colorbar()
plt.grid(zorder = 0, alpha = 0.3)
ax.set_xlabel('p / mV')
ax.set_ylabel('p_nach / mV')
##plt.savefig(dpi = 300)
plt.show()
##plt.close()
##fig.clear()
##gc.collect()
print('Progress... done!')
As you can see, the colorbar does not represent the clustering and instead the place on the x-axis. How do I configure my colorbar to represent the amount of datapoints in an area?
Folder with files: Link

import pandas as pd
import readTrc
import matplotlib.pyplot as plt
import numpy as np
import os
import gc
trcpath = 'filename.trc'
datX, datY, m = readTrc.readTrc(trcpath)
df = pd.DataFrame({'time': datX * 1000, 'volts': datY * 1000})
reduce_noise_df = df[df.volts >= 3.0]
d_t = reduce_noise_df.time.diff()[1:]
p = reduce_noise_df.volts[:-1]
p_nach = reduce_noise_df.volts[1:]
#Graph Limit
graphlim = 41
#Plot
fig, ax = plt.subplots(figsize=(6,6))
plt.scatter(p, p_nach,
edgecolors = 'none',
c = d_t,
s = 20,
cmap=plt.cm.get_cmap('jet'))
plt.xlim(0, graphlim)
plt.ylim(0, graphlim)
plt.xticks(range(0, graphlim, int(graphlim/10)))
plt.yticks(range(0, graphlim, int(graphlim/10)))
plt.colorbar()
plt.grid(zorder = 0, alpha = 0.3)
ax.set_xlabel('p / mV')
ax.set_ylabel('p_nach / mV')
plt.show()
I began be removing unnecessary code
The main issue was c = p instead of c = d_t.
Plot of waveform from your Le Croy WR640Zi colored by data density
import pandas as pd
import readTrc
import matplotlib.pyplot as plt
import numpy as np
import os
import gc
from scipy.stats import gaussian_kde
trcpath = 'filename.trc'
datX, datY, m = readTrc.readTrc(trcpath)
df = pd.DataFrame({'time': datX * 1000, 'volts': datY * 1000})
reduce_noise_df = df[df.volts >= 3.0]
y = np.array(reduce_noise_df.volts.tolist())
x = np.array(reduce_noise_df.time.tolist())
# Calculate point density
xy = np.vstack([x, y])
z = gaussian_kde(xy)(xy)
# Sort points by density
idx = z.argsort()
x, y, z = x[idx], y[idx], z[idx]
#Plot
fig, ax = plt.subplots(figsize=(6,6))
plt.scatter(x, y,
edgecolors = 'none',
c = z,
s = 20,
cmap=plt.cm.get_cmap('jet'))
plt.colorbar()
plt.grid(zorder = 0, alpha = 0.3)
ax.set_xlabel('Time (ms)')
ax.set_ylabel('Voltage (mV)')
plt.show()

Related

Simulate stock price based on a given equation in Python

How can I generate a price time series using the following equation:
p(t) = p0(1+A * sin(ωt +0.5η(t)))
where t ranges from 0 to 1 in 1000 time steps, p0 = 100, A = 0.1, and ω = 100. η(t) is a sequence of i.i.d Gaussian random variables with zero mean and unit variance.
I have use the code as follows to generate price, but it seems not as required. So I need helps from the community. Thanks in advance.
from scipy.stats import norm
import numpy as np
import matplotlib.pyplot as plt
mu = 0
sigma = 1
np.random.seed(2020)
dist = norm(loc = mu,scale=sigma)
sample = dist.rvs(size = 1000)
stock_price = np.exp(sample.cumsum())
print(stock_price)
plt.plot(stock_price)
plt.xlabel("Day")
plt.ylabel("Price")
plt.title("Simulated Stock price")
plt.show()
Assuming I haven't missed anything, this should do the trick
import numpy as np
import matplotlib.pyplot as plt
n_t = np.random.normal(0, 1, 1000)
t = np.arange(0, 1, 1/1000)
p_0, A, w = 100, 0.1, 100
ts = p_0 * (1 + A * np.sin(w * t + 0.5 * n_t))
plt.plot(t, ts)
plt.xlabel("Day")
plt.ylabel("Price")
plt.show()
which gives the plot
My trial, not sure if it's correct, welcome to give me some comments.
import numpy as np
import math
np.random.seed(2020)
mu = 0
sigma = 1
dt = 0.01
p0 = 100
A = 0.1
w = 100
N = 1000
for t in np.linspace(0, 1, 1000):
X = np.random.normal(mu * dt, sigma* np.sqrt(dt), N)
X = np.cumsum(X)
pt = p0 * (1+ A*np.sin(w*t + 0.5*X))
# print(pt)
plt.plot(pt)
plt.xlabel("Day")
plt.ylabel("Price")
plt.title("Simulated Stock price")
plt.show()
Out:

How to adjust color in KDE scatter plot?

I wrote a program to plot oscilloscope data and make a KDE scatter plot with a colorbar. Unfortunately it requires a third party lib (readTrc) as well as the oscilloscope binary file which size is 200MB. The lib can be found on github.
import pandas as pd
import readTrc
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import collections
from scipy.stats import gaussian_kde
trcpath = 'filename.trc' #Binary Oscilloscope File (200 MB)
datX, datY, m = readTrc.readTrc(trcpath)
srx, sry = pd.Series(datX * 1000), pd.Series(datY * 1000)
df = pd.concat([srx, sry], axis = 1)
df.set_index(0, inplace = True)
df = df.abs() #Build Dataframe from above file
fig = plt.figure()
#Eliminate Noise
df[df < 3] = None
df = df.dropna()
#x and y axes data to plot
q1 = np.array(df[1].tolist()[:-2])
q2 = np.array(df[1].tolist()[1:-1])
q3 = np.array(df[1].tolist()[2:])
dq1 = q2 - q1
dq2 = q3 - q2
#Create first Dataset
qqstack = []
xy = np.vstack([dq1,dq2])
#Determine max value for colorbar (highest repeating x/y combination)
df_d = pd.DataFrame([dq1,dq2]).T
for idx, row in df_d.iterrows():
if row[0] == row[1]:
qqstack.append((row[0], row[1]))
cbar_max = collections.Counter(qqstack).most_common(1)[0][-1]
#sort to show most present values last
z = gaussian_kde(xy)(xy)
idx = z.argsort()
x, y, z = dq1[idx], dq2[idx], z[idx]
#plot graph
plt.scatter(x, y,
c=z,
s=20,
cmap = plt.cm.get_cmap('jet'))
#create colormap variable
sm = plt.cm.ScalarMappable(cmap = plt.cm.get_cmap('jet'),
norm = matplotlib.colors.PowerNorm(vmin = -0.1, vmax = cbar_max, gamma = 1))
sm._A = []
fig.colorbar(sm, ticks = range(0, cbar_max, 250))
plt.grid(zorder = 0, alpha = 0.3)
plt.xlabel('dq1 / mV')
plt.ylabel('dq2 / mV')
plt.show()
How can I adjust the color allocation in the plot? I want there to be less blue space so the transition is visible more, like on this graph:

Subset data points outside confidence interval

Using the same example as from this previous question (code pasted below), we can get the 95% CI with the summary_table function from statsmodels outliers_influence. But now, how would it be possible to only subset the data points (x and y) that are outside the confidence interval?
import numpy as np
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import summary_table
#measurements genre
n = 100
x = np.linspace(0, 10, n)
e = np.random.normal(size=n)
y = 1 + 0.5*x + 2*e
X = sm.add_constant(x)
re = sm.OLS(y, X).fit()
st, data, ss2 = summary_table(re, alpha=0.05)
predict_ci_low, predict_ci_upp = data[:, 6:8].T
It might be a bit late for this, but you could put it in a pandas.DataFrame and filter depending on a list of booleans. Assuming I got your question:
import numpy as np
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import summary_table
import matplotlib.pyplot as plot
## Import pandas
import pandas as pd
#measurements genre
n = 100
x = np.linspace(0, 10, n)
e = np.random.normal(size=n)
y = 1 + 0.5*x + 2*e
X = sm.add_constant(x)
re = sm.OLS(y, X).fit()
st, data, ss2 = summary_table(re, alpha=0.05)
# Make prediction
prediction = re.predict(X)
predict_ci_low, predict_ci_upp = data[:, 6:8].T
# Put y and x in a pd.DataFrame
df = pd.DataFrame(y).set_index(x)
# Get the y values that are out of the ci intervals. This could be done directly in the df indexer
out_up = y > predict_ci_upp
out_down = y < predict_ci_low
# Plot everything
plot.plot(x, y, label = 'train')
plot.plot(df[out_up], marker = 'o', linewidth = 0)
plot.plot(df[out_down], marker = 'o', linewidth = 0)
plot.plot(x, predictionTrain, label = 'prediction')
plot.plot(x, predict_ci_upp, label = 'ci_up')
plot.plot(x, predict_ci_low, label = 'ci_low')
plot.legend(loc='best')
Here is the resulting plot:

Recreating price distribution chart

I'm trying to recreate following chart:
Currently I have no idea what should I do now, wondering if there is a possibility to merge these charts or plot everything in one chart code,
import numpy as np
import math
import matplotlib.pyplot as plt
from scipy.stats import norm
fig = plt.figure()
ax1 = plt.subplot2grid((6, 2), (0, 0), rowspan=6, colspan=1)
ax2 = plt.subplot2grid((6, 2), (0, 1), rowspan=6, colspan=1)
ax2.axes.get_xaxis().set_visible(False)
ax2.axes.get_yaxis().set_visible(False)
S = 5
T = 100
mu = 0
vol = 0.3
for i in range(10):
daily_returns = np.random.normal(mu / T, vol / math.sqrt(T), T) + 1
price_list = [S]
for x in daily_returns:
price_list.append(price_list[-1] * x)
ax1.plot(price_list)
ax1.set_ylim(0,10)
srange = np.arange(-4, 4, 0.01)
mean = 0
standard_deviation = 1
var = norm.pdf(srange, mean, standard_deviation)
ax2.plot(var,srange, color="grey")
plt.show()

facecolor for 3d plot doesn't work

I am new to Python and trying to do a 3d plot and color it with a 4th variable. I use facecolors for this, and for one example below, it doesn't work properly. I have positive value but facecolor only displays negatives. Much appreciate if anybody looks into this.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
from scipy import ndimage
import scipy.ndimage.filters
def RP_source(theta,phi,MT):
x1 = np.sin(theta)*np.cos(phi)
x2 = np.sin(theta)*np.sin(phi)
x3 = np.cos(theta)
#
M11 = MT[0,0]
M22 = MT[1,1]
M33 = MT[2,2]
M12 = MT[0,1]
M23 = MT[1,2]
M13 = MT[0,2]
core = M11*x1*x1 + M22*x2*x2 + M33*x3*x3 + 2*M12*x1*x2 + 2*M13*x1*x3 + 2*M23*x2*x3
## S-wave
# S-wave displacement RP 3-components
us1 = (x1*M11 + x2*M12 + x3*M13) - x1*core
us2 = (x1*M12 + x2*M22 + x3*M23) - x2*core
us3 = (x1*M13 + x2*M23 + x3*M33) - x3*core
# transform S-wave displacement vector to the spherical coordinate (r,theta, phi)
USV = np.cos(theta)*np.cos(phi)*us1 + np.cos(theta)*np.sin(phi)*us2 - np.sin(theta)*us3;
return USV, us1, us2, us3
####################################################################
phi = np.linspace(0., 360., 90) # (degrees) azimuth angle with the x1-axis
theta = np.linspace(0., 180. ,45) #(degrees) angle with x3-axis (assumes positive x3 upward)
# convert to radian
theta = np.radians(theta)
phi = np.radians(phi)
theta, phi = np.meshgrid(theta, phi)
st = np.sin(theta)
ct = np.cos(theta)
sp = np.sin(phi)
cp = np.cos(phi)
# generate the propagation ray vectror
x1 = st*cp
x2 = st*sp
x3 = ct
# define moment-tensor matrix
MT = np.array([[0, 1., 0.],[1., 0., 0.],[0., 0., 0.]])
USV, us1,us2,us3 = RP_source(theta,phi,MT)
#########################
# first plot
scale = np.abs(USV)
x1_sv = scale*x1
x2_sv = scale*x2
x3_sv = scale*x3
fig =plt.figure()
ax1 = fig.gca(projection='3d')
surf1 = ax1.plot_surface(x1_sv,x2_sv,x3_sv,rstride=1, cstride=1, facecolors=cm.jet(USV), alpha=0.6)
plt.ylabel('y-axis')
plt.xlabel('x-axis')
m1 = cm.ScalarMappable(cmap=cm.jet)
m1.set_array(USV)
plt.colorbar(m1)
cm.jet wants a number in the interval [0, 1].
Replace your surf1 =... line with the following line:
surf1 = ax1.plot_surface(x1_sv,x2_sv,x3_sv,rstride=1, cstride=1, facecolors=cm.jet(USV-np.min(USV.ravel())), alpha=0.6)

Resources