seaborn joyplot does not fill all the way to the top - python-3.x

I'm using seaborn in Python 3.5. Taking the example joy plot from the gallery, modified slightly to save the figure:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
df["x"] += m
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, size=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play will with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
plt.savefig('tmp.png')
There is a slight visual defect, namely the KDEs do not quite fill all the way to the top. This is most visible in rows B, G, H and J:
Any idea what's causing this?

Related

How to plot vertical stacked graph from different text files?

I have 5 txt files which contain data give me the effect of increasing heat on my samples and I want plot them in a vertical stacked graph, Where the final figure is 5 vertical stacked chart sharing the same X-axis and each line in a separate one to reveal the difference between them.
I wrote this code:
import glob
import pandas as pd
import matplotlib.axes._axes as axes
import matplotlib.pyplot as plt
input_files = glob.glob('01-input/RR_*.txt')
for file in input_files:
data = pd.read_csv(file, header=None, delimiter="\t").values
x = data[:,0]
y = data[:,1]
plt.subplot(2, 1, 1)
plt.plot(x, y, linewidth=2, linestyle=':')
plt.tight_layout()
plt.xlabel('x-axis')
plt.ylabel('y-axis')
But the result is only one graph containing all the lines:
I want to get the following chart:
import matplotlib.pyplot as plt
import numpy as np
# just a dummy data
x = np.linspace(0, 2700, 50)
all_data = [np.sin(x), np.cos(x), x**0.3, x**0.4, x**0.5]
n = len(all_data)
n_rows = n
n_cols = 1
fig, ax = plt.subplots(n_rows, n_cols) # each element in "ax" is a axes
for i, y in enumerate(all_data):
ax[i].plot(x, y, linewidth=2, linestyle=':')
ax[i].set_ylabel('y-axis')
# You can to use a list of y-labels. Example:
# my_labels = ['y1', 'y2', 'y3', 'y4', 'y5']
# ax[i].set_ylabel(my_labels[i])
# The "my_labels" lenght must be "n" too
plt.xlabel('x-axis') # add xlabel at last axes
plt.tight_layout()

Is there a library that will help me fit data easily? I found fitter and i will provide the code but it shows some errors

So, here is my code:
import pandas as pd
import scipy.stats as st
import matplotlib.pyplot as plt
from matplotlib.ticker import AutoMinorLocator
from fitter import Fitter, get_common_distributions
df = pd.read_csv("project3.csv")
bins = [282.33, 594.33, 906.33, 1281.33, 15030.33, 1842.33, 2154.33, 2466.33, 2778.33, 3090.33, 3402.33]
#declaring
facecolor = '#EAEAEA'
color_bars = '#3475D0'
txt_color1 = '#252525'
txt_color2 = '#004C74'
fig, ax = plt.subplots(1, figsize=(16, 6), facecolor=facecolor)
ax.set_facecolor(facecolor)
n, bins, patches = plt.hist(df.City1, color=color_bars, bins=10)
#grid
minor_locator = AutoMinorLocator(2)
plt.gca().xaxis.set_minor_locator(minor_locator)
plt.grid(which='minor', color=facecolor, lw = 0.5)
xticks = [(bins[idx+1] + value)/2 for idx, value in enumerate(bins[:-1])]
xticks_labels = [ "{:.0f}-{:.0f}".format(value, bins[idx+1]) for idx, value in enumerate(bins[:-1])]
plt.xticks(xticks, labels=xticks_labels, c=txt_color1, fontsize=13)
#beautify
ax.tick_params(axis='x', which='both',length=0)
plt.yticks([])
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
for idx, value in enumerate(n):
if value > 0:
plt.text(xticks[idx], value+5, int(value), ha='center', fontsize=16, c=txt_color1)
plt.title('Histogram of rainfall in City1\n', loc = 'right', fontsize = 20, c=txt_color1)
plt.xlabel('\nCentimeters of rainfall', c=txt_color2, fontsize=14)
plt.ylabel('Frequency of occurrence', c=txt_color2, fontsize=14)
plt.tight_layout()
#plt.savefig('City1_Raw.png', facecolor=facecolor)
plt.show()
city1 = df['City1'].values
f = Fitter(city1, distributions=get_common_distributions())
f.fit()
fig = f.plot_pdf(names=None, Nbest=4, lw=1, method='sumsquare_error')
plt.show()
print(f.get_best(method = 'sumsquare_error'))
The issue is with the plots it shows. The first histogram it generates is
Next I get another graph with best fitted distributions which is
Then an output statement
{'chi2': {'df': 10.692966790090342, 'loc': 16.690849400411103, 'scale': 118.71595997157786}}
Process finished with exit code 0
I have a couple of questions. Why is chi2, the best fitted distribution not plotted on the graph?
How do I plot these distributions on top of the histograms and not separately? The hist() function in fitter library can do that but there I don't get to control the bins and so I end up getting like 100 bins with some flat looking data.
How do I solve this issue? I need to plot the best fit curve on the histogram that looks like image1. Can I use any other module/package to get the work done in similar way? This uses least squares fit but I am OK with least likelihood or log likelihood too.
Simple way of plotting things on top of each other (using some properties of the Fitter class)
import scipy.stats as st
import matplotlib.pyplot as plt
from fitter import Fitter, get_common_distributions
from scipy import stats
numberofpoints=50000
df = stats.norm.rvs( loc=1090, scale=500, size=numberofpoints)
fig, ax = plt.subplots(1, figsize=(16, 6))
n, bins, patches = ax.hist( df, bins=30, density=True)
f = Fitter(df, distributions=get_common_distributions())
f.fit()
errorlist = sorted(
[
[f._fitted_errors[dist], dist]
for dist in get_common_distributions()
]
)[:4]
for err, dist in errorlist:
ax.plot( f.x, f.fitted_pdf[dist] )
plt.show()
Using the histogram normalization, one would need to play with scaling to generalize again.

Change colorbar limits without changing the values of the data it represents in scatter

I'm trying to change a colorbar attached to a scatter plot so that the minimum and maximum of the colorbar are the minimum and maximum of the data, but I want the data to be centred at zero as I'm using a colormap with white at zero. Here is my example
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0, 1, 61)
y = np.linspace(0, 1, 61)
C = np.linspace(-10, 50, 61)
M = np.abs(C).max() # used for vmin and vmax
fig, ax = plt.subplots(1, 1, figsize=(5,3), dpi=150)
sc=ax.scatter(x, y, c=C, marker='o', edgecolor='k', vmin=-M, vmax=M, cmap=plt.cm.RdBu_r)
cbar=fig.colorbar(sc, ax=ax, label='$R - R_0$ (mm)')
ax.set_xlabel('x')
ax.set_ylabel('y')
As you can see from the attached figure, the colorbar goes down to -M, where as I want the bar to just go down to -10, but if I let vmin=-10 then the colorbar won't be zerod at white. Normally, setting vmin to +/- M when using contourf the colorbar automatically sorts to how I want. This sort of behaviour is what I expect when contourf uses levels=np.linspace(-M,M,61) rather than setting it with vmin and vmax with levels=62. An example showing the default contourf colorbar behaviour I want in my scatter example is shown below
plt.figure(figsize=(6,5), dpi=150)
plt.contourf(x, x, np.reshape(np.linspace(-10, 50, 61*61), (61,61)),
levels=62, vmin=-M, vmax=M, cmap=plt.cm.RdBu_r)
plt.colorbar(label='$R - R_0$ (mm)')
Does anyone have any thoughts? I found this link which I thought might solve the problem, but when executing the cbar.outline.set_ydata line I get this error AttributeError: 'Polygon' object has no attribute 'set_ydata' .
EDIT a little annoyed that someone has closed this question without allowing me to clarify any questions they might have, as none of the proposed solutions are what I'm asking for.
As for Normalize.TwoSlopeNorm, I do not want to rescale the smaller negative side to use the entire colormap range, I just want the colorbar attached to the side of my graph to stop at -10.
This link also does not solve my issue, as it's the TwoSlopeNorm solution again.
After changing the ylim of the colorbar, the rectangle formed by the surrounding spines is too large. You can make this outline invisible. And then add a new rectangular border:
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0, 1, 61)
y = np.linspace(0, 1, 61)
C = np.linspace(-10, 50, 61)
M = np.abs(C).max() # used for vmin and vmax
fig, ax = plt.subplots(1, 1, figsize=(5, 3), dpi=150)
sc = ax.scatter(x, y, c=C, marker='o', edgecolor='k', vmin=-M, vmax=M, cmap=plt.cm.RdBu_r)
cbar = fig.colorbar(sc, ax=ax, label='$R - R_0$ (mm)')
cb_ymin = C.min()
cb_ymax = C.max()
cb_xmin, cb_xmax = cbar.ax.get_xlim()
cbar.ax.set_ylim(cb_ymin, cb_ymax)
cbar.outline.set_visible(False) # hide the surrounding spines, which are too large after set_ylim
cbar.ax.add_patch(plt.Rectangle((cb_xmin, cb_ymin), cb_xmax - cb_xmin, cb_ymax - cb_ymin,
fc='none', ec='black', clip_on=False))
plt.show()
Another approach until v3.5 is released is to make a custom colormap that does what you want (see also https://matplotlib.org/stable/tutorials/colors/colormap-manipulation.html#sphx-glr-tutorials-colors-colormap-manipulation-py)
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.cm as cm
from matplotlib.colors import ListedColormap
fig, axs = plt.subplots(2, 1)
X = np.random.randn(32, 32) + 2
pc = axs[0].pcolormesh(X, vmin=-6, vmax=6, cmap='RdBu_r')
fig.colorbar(pc, ax=axs[0])
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.cm as cm
from matplotlib.colors import ListedColormap
fig, axs = plt.subplots(2, 1)
X = np.random.randn(32, 32) + 2
pc = axs[0].pcolormesh(X, vmin=-6, vmax=6, cmap='RdBu_r')
fig.colorbar(pc, ax=axs[0])
def keep_center_colormap(vmin, vmax, center=0):
vmin = vmin - center
vmax = vmax - center
dv = max(-vmin, vmax) * 2
N = int(256 * dv / (vmax-vmin))
RdBu_r = cm.get_cmap('RdBu_r', N)
newcolors = RdBu_r(np.linspace(0, 1, N))
beg = int((dv / 2 + vmin)*N / dv)
end = N - int((dv / 2 - vmax)*N / dv)
newmap = ListedColormap(newcolors[beg:end])
return newmap
newmap = keep_center_colormap(-2, 6, center=0)
pc = axs[1].pcolormesh(X, vmin=-2, vmax=6, cmap=newmap)
fig.colorbar(pc, ax=axs[1])
plt.show()

Specifying the color Increments of heat-map in python

Is there a way to specify in Seaborn or Matplotlib the color increments of heat-map color scale. For instance, for data-frame that contains normalized values between 0-1, to specify 100,discrete, color increments so each value is distinguished from other values?
Thank you in advance
There are two principle approaches to discetize a heatmap into n colors:
Supply the data rounded to the n values.
Use a discrete colormap.
The following code shows those two options.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
x, y = np.meshgrid(range(15),range(6))
v = np.random.rand(len(x.flatten()))
df = pd.DataFrame({"x":x.flatten(), "y":y.flatten(),"value":v})
df = df.pivot(index="y", columns="x", values="value")
n = 4.
fig, (ax0, ax, ax2) = plt.subplots(nrows=3)
### original
im0 = ax0.imshow(df.values, cmap="viridis", vmin=0, vmax=1)
ax0.set_title("original")
### Discretize array
arr = np.floor(df.values * n)/n
im = ax.imshow(arr, cmap="viridis", vmin=0, vmax=1)
ax.set_title("discretize values")
### Discretize colormap
cmap = plt.cm.get_cmap("viridis", n)
im2 = ax2.imshow(df.values, cmap=cmap, vmin=0, vmax=1 )
ax2.set_title("discretize colormap")
#colorbars
fig.colorbar(im0, ax=ax0)
fig.colorbar(im, ax=ax)
fig.colorbar(im2, ax=ax2, ticks=np.arange(0,1,1./n), )
plt.tight_layout()
plt.show()

How can I add a normal distribution curve to multiple histograms?

With the following code I create four histograms:
import numpy as np
import pandas as pd
data = pd.DataFrame(np.random.normal((1, 2, 3 , 4), size=(100, 4)))
data.hist(bins=10)
I want the histograms to look like this:
I know how to make it one graph at the time, see here
But how can I do it for multiple histograms without specifying each single one? Ideally I could use 'pd.scatter_matrix'.
Plot each histogram seperately and do the fit to each histogram as in the example you linked or take a look at the hist api example here. Essentially what should be done is
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
fig = plt.figure()
ax1 = fig.add_subplot(221)
ax2 = fig.add_subplot(222)
ax3 = fig.add_subplot(223)
ax4 = fig.add_subplot(224)
for ax in [ax1, ax2, ax3, ax4]:
n, bins, patches = ax.hist(**your_data_here**, 50, normed=1, facecolor='green', alpha=0.75)
bincenters = 0.5*(bins[1:]+bins[:-1])
y = mlab.normpdf( bincenters, mu, sigma)
l = ax.plot(bincenters, y, 'r--', linewidth=1)
plt.show()

Resources