Specifying the color Increments of heat-map in python - python-3.x

Is there a way to specify in Seaborn or Matplotlib the color increments of heat-map color scale. For instance, for data-frame that contains normalized values between 0-1, to specify 100,discrete, color increments so each value is distinguished from other values?
Thank you in advance

There are two principle approaches to discetize a heatmap into n colors:
Supply the data rounded to the n values.
Use a discrete colormap.
The following code shows those two options.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
x, y = np.meshgrid(range(15),range(6))
v = np.random.rand(len(x.flatten()))
df = pd.DataFrame({"x":x.flatten(), "y":y.flatten(),"value":v})
df = df.pivot(index="y", columns="x", values="value")
n = 4.
fig, (ax0, ax, ax2) = plt.subplots(nrows=3)
### original
im0 = ax0.imshow(df.values, cmap="viridis", vmin=0, vmax=1)
ax0.set_title("original")
### Discretize array
arr = np.floor(df.values * n)/n
im = ax.imshow(arr, cmap="viridis", vmin=0, vmax=1)
ax.set_title("discretize values")
### Discretize colormap
cmap = plt.cm.get_cmap("viridis", n)
im2 = ax2.imshow(df.values, cmap=cmap, vmin=0, vmax=1 )
ax2.set_title("discretize colormap")
#colorbars
fig.colorbar(im0, ax=ax0)
fig.colorbar(im, ax=ax)
fig.colorbar(im2, ax=ax2, ticks=np.arange(0,1,1./n), )
plt.tight_layout()
plt.show()

Related

How to plot vertical stacked graph from different text files?

I have 5 txt files which contain data give me the effect of increasing heat on my samples and I want plot them in a vertical stacked graph, Where the final figure is 5 vertical stacked chart sharing the same X-axis and each line in a separate one to reveal the difference between them.
I wrote this code:
import glob
import pandas as pd
import matplotlib.axes._axes as axes
import matplotlib.pyplot as plt
input_files = glob.glob('01-input/RR_*.txt')
for file in input_files:
data = pd.read_csv(file, header=None, delimiter="\t").values
x = data[:,0]
y = data[:,1]
plt.subplot(2, 1, 1)
plt.plot(x, y, linewidth=2, linestyle=':')
plt.tight_layout()
plt.xlabel('x-axis')
plt.ylabel('y-axis')
But the result is only one graph containing all the lines:
I want to get the following chart:
import matplotlib.pyplot as plt
import numpy as np
# just a dummy data
x = np.linspace(0, 2700, 50)
all_data = [np.sin(x), np.cos(x), x**0.3, x**0.4, x**0.5]
n = len(all_data)
n_rows = n
n_cols = 1
fig, ax = plt.subplots(n_rows, n_cols) # each element in "ax" is a axes
for i, y in enumerate(all_data):
ax[i].plot(x, y, linewidth=2, linestyle=':')
ax[i].set_ylabel('y-axis')
# You can to use a list of y-labels. Example:
# my_labels = ['y1', 'y2', 'y3', 'y4', 'y5']
# ax[i].set_ylabel(my_labels[i])
# The "my_labels" lenght must be "n" too
plt.xlabel('x-axis') # add xlabel at last axes
plt.tight_layout()

Is there a library that will help me fit data easily? I found fitter and i will provide the code but it shows some errors

So, here is my code:
import pandas as pd
import scipy.stats as st
import matplotlib.pyplot as plt
from matplotlib.ticker import AutoMinorLocator
from fitter import Fitter, get_common_distributions
df = pd.read_csv("project3.csv")
bins = [282.33, 594.33, 906.33, 1281.33, 15030.33, 1842.33, 2154.33, 2466.33, 2778.33, 3090.33, 3402.33]
#declaring
facecolor = '#EAEAEA'
color_bars = '#3475D0'
txt_color1 = '#252525'
txt_color2 = '#004C74'
fig, ax = plt.subplots(1, figsize=(16, 6), facecolor=facecolor)
ax.set_facecolor(facecolor)
n, bins, patches = plt.hist(df.City1, color=color_bars, bins=10)
#grid
minor_locator = AutoMinorLocator(2)
plt.gca().xaxis.set_minor_locator(minor_locator)
plt.grid(which='minor', color=facecolor, lw = 0.5)
xticks = [(bins[idx+1] + value)/2 for idx, value in enumerate(bins[:-1])]
xticks_labels = [ "{:.0f}-{:.0f}".format(value, bins[idx+1]) for idx, value in enumerate(bins[:-1])]
plt.xticks(xticks, labels=xticks_labels, c=txt_color1, fontsize=13)
#beautify
ax.tick_params(axis='x', which='both',length=0)
plt.yticks([])
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
for idx, value in enumerate(n):
if value > 0:
plt.text(xticks[idx], value+5, int(value), ha='center', fontsize=16, c=txt_color1)
plt.title('Histogram of rainfall in City1\n', loc = 'right', fontsize = 20, c=txt_color1)
plt.xlabel('\nCentimeters of rainfall', c=txt_color2, fontsize=14)
plt.ylabel('Frequency of occurrence', c=txt_color2, fontsize=14)
plt.tight_layout()
#plt.savefig('City1_Raw.png', facecolor=facecolor)
plt.show()
city1 = df['City1'].values
f = Fitter(city1, distributions=get_common_distributions())
f.fit()
fig = f.plot_pdf(names=None, Nbest=4, lw=1, method='sumsquare_error')
plt.show()
print(f.get_best(method = 'sumsquare_error'))
The issue is with the plots it shows. The first histogram it generates is
Next I get another graph with best fitted distributions which is
Then an output statement
{'chi2': {'df': 10.692966790090342, 'loc': 16.690849400411103, 'scale': 118.71595997157786}}
Process finished with exit code 0
I have a couple of questions. Why is chi2, the best fitted distribution not plotted on the graph?
How do I plot these distributions on top of the histograms and not separately? The hist() function in fitter library can do that but there I don't get to control the bins and so I end up getting like 100 bins with some flat looking data.
How do I solve this issue? I need to plot the best fit curve on the histogram that looks like image1. Can I use any other module/package to get the work done in similar way? This uses least squares fit but I am OK with least likelihood or log likelihood too.
Simple way of plotting things on top of each other (using some properties of the Fitter class)
import scipy.stats as st
import matplotlib.pyplot as plt
from fitter import Fitter, get_common_distributions
from scipy import stats
numberofpoints=50000
df = stats.norm.rvs( loc=1090, scale=500, size=numberofpoints)
fig, ax = plt.subplots(1, figsize=(16, 6))
n, bins, patches = ax.hist( df, bins=30, density=True)
f = Fitter(df, distributions=get_common_distributions())
f.fit()
errorlist = sorted(
[
[f._fitted_errors[dist], dist]
for dist in get_common_distributions()
]
)[:4]
for err, dist in errorlist:
ax.plot( f.x, f.fitted_pdf[dist] )
plt.show()
Using the histogram normalization, one would need to play with scaling to generalize again.

NaN values as special color in pyplot scatter plot

I have an (x,y)-scatter plot, where each point is associated with a color. Some points, however, do not have a valid color, and are assigned NaN. I would like to include these points, but show them in a color not contained by the colormap.
Here's the example code:
import numpy as np
import matplotlib.colors as mcol
import matplotlib.pyplot as plt
numPoints = 20
nanFrequency = 3
xVec = np.arange(numPoints, dtype=float)
yVec = xVec
colorVec = np.linspace(0,1,numPoints)
colorVec[range(0, numPoints, nanFrequency)] = np.nan
colormap = mcol.LinearSegmentedColormap.from_list("Blue-Red-Colormap", ["b", "r"])
plt.scatter(xVec, yVec, c=colorVec, cmap=colormap)
and the corresponding output:
Every third point is not shown due to its invalid color value. Based on my code, I would have expected these points to be shown in yellow. Why doesn't this work?
Note that there's a related post concerning imshow(), from which the above code is inspired. The solution presented there does not seem to work for me.
Many thanks in advance.
Of course you need to set the desired yellow to your colormap, colormap.set_bad("yellow").
Then, this is a long standing bug in matplotlib (#4354), which fortunately has now been fixed (#12422).
So from matplotlib 3.1 onwards, you can use the plotnonfinite=True argument to include masked or invalid points in scatter plots.
import numpy as np
import matplotlib.colors as mcol
import matplotlib.pyplot as plt
numPoints = 20
nanFrequency = 3
xVec = np.arange(numPoints, dtype=float)
yVec = xVec
colorVec = np.linspace(0,1,numPoints)
colorVec[range(0, numPoints, nanFrequency)] = np.nan
colormap = mcol.LinearSegmentedColormap.from_list("Blue-Red-Colormap", ["b", "r"])
colormap.set_bad("yellow")
plt.scatter(xVec, yVec, c=colorVec, cmap=colormap, plotnonfinite=True)
plt.show()
The reason that your NaN values are not plotted is that matplotlib's scatter currently filters them out before giving them to the colormap.
To show the NaN entries you can manually assign them a dummy value with a special meaning. For example, because your list is in the range [0, 1] you could define that any value > 1 get a special color. For this you will have to fix the range of the color-axis, and specify a color for entries outside this range (in this case higher than the maximum).
Basically you will use:
cax = ax.scatter(...)
cax.cmap.set_over('y') # assigns yellow to any entry >1
cax.set_clim(0, 1) # fixes the range of 'normal' colors to (0, 1)
For your example:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
numPoints = 20
nanFrequency = 3
xVec = np.arange(numPoints, dtype=float)
yVec = xVec
colorVec = np.linspace(0,1,numPoints)
colorVec[range(0, numPoints, nanFrequency)] = np.NaN
cmap = mpl.colors.LinearSegmentedColormap.from_list("Blue-Red-Colormap", ["b", "r"], numPoints)
# ---
fig, axes = plt.subplots(nrows=2, figsize=(8, 2*6))
# ---
ax = axes[0]
ax.scatter(xVec, yVec, c=colorVec, cmap=cmap)
ax.set_xlim([0, 20])
ax.set_ylim([0, 20])
# ---
ax = axes[1]
colorVec[np.isnan(colorVec)] = 2.0
cax = ax.scatter(xVec, yVec, c=colorVec, cmap=cmap)
cax.cmap.set_over('y')
cax.set_clim(0, 1)
ax.set_xlim([0, 20])
ax.set_ylim([0, 20])
# ---
plt.show()
Which produces two subplots: the top corresponds to what you had, the bottom uses the dummy value and assigns yellow to it:

seaborn joyplot does not fill all the way to the top

I'm using seaborn in Python 3.5. Taking the example joy plot from the gallery, modified slightly to save the figure:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
df["x"] += m
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, size=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play will with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
plt.savefig('tmp.png')
There is a slight visual defect, namely the KDEs do not quite fill all the way to the top. This is most visible in rows B, G, H and J:
Any idea what's causing this?

Can't add matplotlib colorbar ticks

I am trying to add ticks and labels to a color bar, but it just doesn't seem to show up in the output. I have tried two approaches(as shown in the code below). Second appraoch was to do as shown in another question on Stack Overflow here: How to add Matplotlib Colorbar Ticks.
I must be overlooking something very simple here as I am a beginner in Matplotlib and Python.
I have managed to obtain the color bar, but the ticks I want just don't show up. Any help here will be greatly appreciated as I have been stuck at it for hours after trying and searching.
Here is the code I used to generate a heatmap using hexbin over a basemap.
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
from matplotlib.colors import LinearSegmentedColormap
from matplotlib import cm
#Loading data from CSV file
DATA_FILE = '....../Population_data.csv'
roc_data = pd.read_csv(DATA_FILE)
roc_data.head()
#Creating figure window
fig = plt.figure(figsize=(14,10))
ax = fig.add_subplot(111)
#Drawing the basemap
m = Basemap(projection='merc', lat_0=43.12, lon_0=-77.626,
resolution = 'i',llcrnrlon=-78.236,
llcrnrlat=42.935,
urcrnrlon=-77.072,
urcrnrlat=43.349)
m.drawcoastlines()
m.drawcounties(zorder=20, color='red')
m.drawcountries()
m.drawmapboundary()
#plotting the heatmap using hexbin
x, y = m(roc_data['Longitude'].values, roc_data['Latitude'].values)
values = roc_data['Total(20-64)']
m.hexbin(x, y, gridsize = 125, bins = 'log', C = values, cmap = cm.Reds)
#Defining minimum, mean and maximum population values
max_p = roc_data['Total(20-64)'].max()
min_p = roc_data['Total(20-64)'].min()
mean_p = roc_data['Total(20-64)'].mean()
#Adding Colorbar
cb = m.colorbar(location = 'bottom', format = '%d', label = 'Population by Census Blocks')
#setting ticks
#cb.set_ticks([48, 107, 1302]) #First approach, didn't work
#cb.set_ticklabels(['Min', 'Mean', 'Max'])
cb.set_ticks([min_p, mean_p, max_p]) #Second appraoch, assumed ticks and tick labels should be same
cb.set_ticklabels([min_p, mean_p, max_p]) #from the above mentioned stackoverflow question, but did't work
plt.show()
The output I get by using the first or second approach for colorbar ticks is the same. It is as here:
Heatmap and colorbar with no ticks and labels
I want the minimum, median and maximum population values (48, 107 and 1302) to be shown on the colorbar with the labels Min, Mean and Max. Thank you for your time
When plotting the hexbin plot with mode bins = 'log', the colors will be plotted with a logarithmic scaling. This means that if the data minimum, mean and maximum are min, mean and max, their values on the logarithmically scaled colorbar are log10(min), log10(mean), log10(max).
The ticks on the colorbar therefore needs to be set with the log values. The ticklabels can be set to any value. However I would think that simply putting something like "mean" on a logarithmic scale may not be too informative.
A particularity is that the minimum of the colorbar is actually log10(min+1). The +1 is due to the log which is negative below 1.
Here is a complete example.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(42)
from mpl_toolkits.basemap import Basemap
from matplotlib import cm
lon = -78.236+np.random.rand(1000)*(-77.072+78.236)
lat = 42.935 + np.random.rand(1000)*(43.349-42.935)
t = 99+np.random.normal(10,20,1000)
t[:50] = np.linspace(48,1302)
roc_data = pd.DataFrame({'Longitude':lon, 'Latitude':lat, "T":t })
#Creating figure window
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111)
#Drawing the basemap
m = Basemap(projection='merc', lat_0=43.12, lon_0=-77.626,
resolution = 'i',llcrnrlon=-78.236,
llcrnrlat=42.935,
urcrnrlon=-77.072,
urcrnrlat=43.349)
m.drawcoastlines()
m.drawcounties(zorder=20, color='red')
m.drawcountries()
m.drawmapboundary()
#plotting the heatmap using hexbin
x, y = m(roc_data['Longitude'].values, roc_data['Latitude'].values)
values = roc_data['T']
m.hexbin(x, y, gridsize = 125, bins = 'log', C = values, cmap = cm.Reds) #bins = 'log',
#Defining minimum, mean and maximum population values
max_p = roc_data['T'].max()
min_p = roc_data['T'].min()
mean_p = roc_data['T'].mean()
print [min_p, mean_p, max_p]
print [np.log10(min_p), np.log10(mean_p), np.log10(max_p)]
#Adding Colorbar
cb = m.colorbar(location = 'bottom', format = '%d', label = 'Population by Census Blocks') #format = '%d',
#setting ticks
cb.set_ticks([np.log10(min_p+1), np.log10(mean_p), np.log10(max_p)])
cb.set_ticklabels(['Min\n({:.1f})'.format(min_p), 'Mean\n({:.1f})'.format(mean_p), 'Max\n({:.1f})'.format(max_p)])
plt.tight_layout()
plt.show()

Resources