I am making some density plots like so:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import r2_score
import matplotlib
from scipy import stats
import matplotlib.gridspec as gridspec
from mpl_toolkits.axes_grid1.inset_locator import InsetPosition
from matplotlib.ticker import FormatStrFormatter
import matplotlib.cm as cm
from scipy.ndimage.filters import gaussian_filter
import random
matplotlib.rcParams.update({'font.size': 16})
matplotlib.rcParams['xtick.direction'] = 'in'
matplotlib.rcParams['ytick.direction'] = 'in'
x = random.sample(range(1, 10001), 1000)
y = random.sample(range(1, 10001), 1000)
def myplot(x, y, s, bins=1000):
heatmap, xedges, yedges = np.histogram2d(x, y, bins=bins)
heatmap = gaussian_filter(heatmap, sigma=s)
extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
return heatmap.T, extent
cmap = cm.YlOrRd
fig, (ax, ax1, cax) = plt.subplots(ncols = 3, figsize = (15, 5),
gridspec_kw={"width_ratios":[1,1, 0.5]})
img, extent = myplot(x, y, 20)
im = ax.imshow(img, extent = extent, origin = 'lower', cmap = cmap)
ax.text(0.05, 0.92, '$R^2$ = {}'.format(np.round(r2_score(x, y), 2)), fontsize=14, color = 'k', transform = ax.transAxes)
ax.plot(ax.get_xlim(), ax.get_ylim(), ls="--", c=".3")
ax.set_xlabel("Black Sky")
ax.set_ylabel("Blue Sky")
img2, extent2 = myplot(x, y, 20)
ax1.imshow(img2, extent = extent2, origin = 'lower', cmap = cmap)
ax1.text(0.05, 0.92, '$R^2$ = {}'.format(np.round(r2_score(x, y), 2)), fontsize=14, color = 'k', transform = ax1.transAxes)
ax1.axes.get_yaxis().set_visible(False)
ax1.yaxis.set_ticks([])
ax1.plot(ax1.get_xlim(), ax1.get_ylim(), ls="--", c=".3")
ax1.set_xlabel("White Sky")
ip = InsetPosition(ax1, [1.05,0,0.05,1])
cax.set_axes_locator(ip)
fig.colorbar(im, cax=cax, ax=[ax,ax1], use_gridspec = True)
plt.subplots_adjust(wspace=0.1, hspace=0)
which gives me a plot like this:
No matter what I change wspace to the plot stays the same. I think this is because when I turn of the y-axis in ax1 I am just making the text blank instead of removing the y-axis all together. Is there a way to do this so that I can make the width spacing between the figures closer together?
As commented, wspace sets the minimal distance between plots. This distance may be larger in case of equal aspect axes. Then it will depend on the figure size, figure aspect and image aspect.
A. Use automatic aspect
You may set aspect = "auto" in your imshow plots,
ax.imshow(..., aspect = "auto")
B. Adjust the subplot parameters
You may set the left or right subplot parameter to something smaller. E.g.
plt.subplots_adjust(wspace=0.0, hspace=0, right=0.7)
C. Adjust the figure size
Using a smaller figure width, which is closer to the actual image aspect will also reduce whitespace around the figure.
E.g, making the figure only 11 inches wide and using 5% padding on the right,
plt.subplots(..., figsize = (11, 5))
plt.subplots_adjust(wspace=0.0, hspace=0, right=.95)
Related
So, here is my code:
import pandas as pd
import scipy.stats as st
import matplotlib.pyplot as plt
from matplotlib.ticker import AutoMinorLocator
from fitter import Fitter, get_common_distributions
df = pd.read_csv("project3.csv")
bins = [282.33, 594.33, 906.33, 1281.33, 15030.33, 1842.33, 2154.33, 2466.33, 2778.33, 3090.33, 3402.33]
#declaring
facecolor = '#EAEAEA'
color_bars = '#3475D0'
txt_color1 = '#252525'
txt_color2 = '#004C74'
fig, ax = plt.subplots(1, figsize=(16, 6), facecolor=facecolor)
ax.set_facecolor(facecolor)
n, bins, patches = plt.hist(df.City1, color=color_bars, bins=10)
#grid
minor_locator = AutoMinorLocator(2)
plt.gca().xaxis.set_minor_locator(minor_locator)
plt.grid(which='minor', color=facecolor, lw = 0.5)
xticks = [(bins[idx+1] + value)/2 for idx, value in enumerate(bins[:-1])]
xticks_labels = [ "{:.0f}-{:.0f}".format(value, bins[idx+1]) for idx, value in enumerate(bins[:-1])]
plt.xticks(xticks, labels=xticks_labels, c=txt_color1, fontsize=13)
#beautify
ax.tick_params(axis='x', which='both',length=0)
plt.yticks([])
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
for idx, value in enumerate(n):
if value > 0:
plt.text(xticks[idx], value+5, int(value), ha='center', fontsize=16, c=txt_color1)
plt.title('Histogram of rainfall in City1\n', loc = 'right', fontsize = 20, c=txt_color1)
plt.xlabel('\nCentimeters of rainfall', c=txt_color2, fontsize=14)
plt.ylabel('Frequency of occurrence', c=txt_color2, fontsize=14)
plt.tight_layout()
#plt.savefig('City1_Raw.png', facecolor=facecolor)
plt.show()
city1 = df['City1'].values
f = Fitter(city1, distributions=get_common_distributions())
f.fit()
fig = f.plot_pdf(names=None, Nbest=4, lw=1, method='sumsquare_error')
plt.show()
print(f.get_best(method = 'sumsquare_error'))
The issue is with the plots it shows. The first histogram it generates is
Next I get another graph with best fitted distributions which is
Then an output statement
{'chi2': {'df': 10.692966790090342, 'loc': 16.690849400411103, 'scale': 118.71595997157786}}
Process finished with exit code 0
I have a couple of questions. Why is chi2, the best fitted distribution not plotted on the graph?
How do I plot these distributions on top of the histograms and not separately? The hist() function in fitter library can do that but there I don't get to control the bins and so I end up getting like 100 bins with some flat looking data.
How do I solve this issue? I need to plot the best fit curve on the histogram that looks like image1. Can I use any other module/package to get the work done in similar way? This uses least squares fit but I am OK with least likelihood or log likelihood too.
Simple way of plotting things on top of each other (using some properties of the Fitter class)
import scipy.stats as st
import matplotlib.pyplot as plt
from fitter import Fitter, get_common_distributions
from scipy import stats
numberofpoints=50000
df = stats.norm.rvs( loc=1090, scale=500, size=numberofpoints)
fig, ax = plt.subplots(1, figsize=(16, 6))
n, bins, patches = ax.hist( df, bins=30, density=True)
f = Fitter(df, distributions=get_common_distributions())
f.fit()
errorlist = sorted(
[
[f._fitted_errors[dist], dist]
for dist in get_common_distributions()
]
)[:4]
for err, dist in errorlist:
ax.plot( f.x, f.fitted_pdf[dist] )
plt.show()
Using the histogram normalization, one would need to play with scaling to generalize again.
I'm trying to change a colorbar attached to a scatter plot so that the minimum and maximum of the colorbar are the minimum and maximum of the data, but I want the data to be centred at zero as I'm using a colormap with white at zero. Here is my example
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0, 1, 61)
y = np.linspace(0, 1, 61)
C = np.linspace(-10, 50, 61)
M = np.abs(C).max() # used for vmin and vmax
fig, ax = plt.subplots(1, 1, figsize=(5,3), dpi=150)
sc=ax.scatter(x, y, c=C, marker='o', edgecolor='k', vmin=-M, vmax=M, cmap=plt.cm.RdBu_r)
cbar=fig.colorbar(sc, ax=ax, label='$R - R_0$ (mm)')
ax.set_xlabel('x')
ax.set_ylabel('y')
As you can see from the attached figure, the colorbar goes down to -M, where as I want the bar to just go down to -10, but if I let vmin=-10 then the colorbar won't be zerod at white. Normally, setting vmin to +/- M when using contourf the colorbar automatically sorts to how I want. This sort of behaviour is what I expect when contourf uses levels=np.linspace(-M,M,61) rather than setting it with vmin and vmax with levels=62. An example showing the default contourf colorbar behaviour I want in my scatter example is shown below
plt.figure(figsize=(6,5), dpi=150)
plt.contourf(x, x, np.reshape(np.linspace(-10, 50, 61*61), (61,61)),
levels=62, vmin=-M, vmax=M, cmap=plt.cm.RdBu_r)
plt.colorbar(label='$R - R_0$ (mm)')
Does anyone have any thoughts? I found this link which I thought might solve the problem, but when executing the cbar.outline.set_ydata line I get this error AttributeError: 'Polygon' object has no attribute 'set_ydata' .
EDIT a little annoyed that someone has closed this question without allowing me to clarify any questions they might have, as none of the proposed solutions are what I'm asking for.
As for Normalize.TwoSlopeNorm, I do not want to rescale the smaller negative side to use the entire colormap range, I just want the colorbar attached to the side of my graph to stop at -10.
This link also does not solve my issue, as it's the TwoSlopeNorm solution again.
After changing the ylim of the colorbar, the rectangle formed by the surrounding spines is too large. You can make this outline invisible. And then add a new rectangular border:
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0, 1, 61)
y = np.linspace(0, 1, 61)
C = np.linspace(-10, 50, 61)
M = np.abs(C).max() # used for vmin and vmax
fig, ax = plt.subplots(1, 1, figsize=(5, 3), dpi=150)
sc = ax.scatter(x, y, c=C, marker='o', edgecolor='k', vmin=-M, vmax=M, cmap=plt.cm.RdBu_r)
cbar = fig.colorbar(sc, ax=ax, label='$R - R_0$ (mm)')
cb_ymin = C.min()
cb_ymax = C.max()
cb_xmin, cb_xmax = cbar.ax.get_xlim()
cbar.ax.set_ylim(cb_ymin, cb_ymax)
cbar.outline.set_visible(False) # hide the surrounding spines, which are too large after set_ylim
cbar.ax.add_patch(plt.Rectangle((cb_xmin, cb_ymin), cb_xmax - cb_xmin, cb_ymax - cb_ymin,
fc='none', ec='black', clip_on=False))
plt.show()
Another approach until v3.5 is released is to make a custom colormap that does what you want (see also https://matplotlib.org/stable/tutorials/colors/colormap-manipulation.html#sphx-glr-tutorials-colors-colormap-manipulation-py)
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.cm as cm
from matplotlib.colors import ListedColormap
fig, axs = plt.subplots(2, 1)
X = np.random.randn(32, 32) + 2
pc = axs[0].pcolormesh(X, vmin=-6, vmax=6, cmap='RdBu_r')
fig.colorbar(pc, ax=axs[0])
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.cm as cm
from matplotlib.colors import ListedColormap
fig, axs = plt.subplots(2, 1)
X = np.random.randn(32, 32) + 2
pc = axs[0].pcolormesh(X, vmin=-6, vmax=6, cmap='RdBu_r')
fig.colorbar(pc, ax=axs[0])
def keep_center_colormap(vmin, vmax, center=0):
vmin = vmin - center
vmax = vmax - center
dv = max(-vmin, vmax) * 2
N = int(256 * dv / (vmax-vmin))
RdBu_r = cm.get_cmap('RdBu_r', N)
newcolors = RdBu_r(np.linspace(0, 1, N))
beg = int((dv / 2 + vmin)*N / dv)
end = N - int((dv / 2 - vmax)*N / dv)
newmap = ListedColormap(newcolors[beg:end])
return newmap
newmap = keep_center_colormap(-2, 6, center=0)
pc = axs[1].pcolormesh(X, vmin=-2, vmax=6, cmap=newmap)
fig.colorbar(pc, ax=axs[1])
plt.show()
I'm trying to extend the plot by plt.xlim(-1.5, 1.5) and plt.ylim(-1.5, 1.5). Could you please explain why the the range of the plot is not as expected?
import pandas as pd
from sklearn import preprocessing
from sklearn import decomposition
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
import numpy as np
# Change the image format to svg for better quality
%config InlineBackend.figure_format = 'svg'
decathlon = pd.read_csv("https://raw.githubusercontent.com/leanhdung1994/Deep-Learning/main/decathlon.txt", sep='\t')
decathlon_scaled = decathlon.copy()
decathlon_scaled.iloc[:, 0:10] = preprocessing.scale(decathlon.iloc[:, 0:10])
pca_scaled = decomposition.PCA(n_components = 10).fit(decathlon_scaled.iloc[:, 0:10])
decathlon_scaled_pca = pca_scaled.transform(decathlon_scaled.iloc[:, 0:10])
decathlon_scaled_pca_nor = decathlon_scaled_pca / np.sqrt((decathlon_scaled_pca ** 2).sum(axis = 0))
decathlon_scaled_nor = decathlon_scaled.iloc[:, 0:10] / np.sqrt((decathlon_scaled.iloc[:, 0:10] ** 2).sum(axis = 0))
decathlon_corr_circle = decathlon_scaled_pca_nor.T.dot(decathlon_scaled_nor)
decathlon_corr_circle
tmp = np.transpose(decathlon_corr_circle)[:, 0:2]
tmp = pd.DataFrame(tmp)
tmp.index = decathlon.columns[0:10]
fig = plt.figure(figsize = 1 * np.array(plt.rcParams['figure.figsize'])) # This is to have bigger plot
ax = sns.scatterplot(data = tmp,
x = tmp[0], y = tmp[1])
for i in range(10):
plt.arrow(0, 0, tmp[0][i], tmp[1][i],
color = 'orange', head_width = 0.025, length_includes_head = True)
circle = plt.Circle((0, 0), 1, color='g', fill=False)
ax.add_artist(circle)
plt.xlim(-1.5, 1.5)
plt.ylim(-1.5, 1.5)
plt.axis('equal')
The problem is that using plt.axis('equal') is equivalent to using ax.set_aspect('equal', adjustable='datalim'). That adjustable='datalim' is modifying the axis limits, even if you don't want it to.
Using the object-oriented approach for all of the last 3 lines of code is one way to solve this problem, since the default value of adjustable is box, not datalim. box means the shape of the axes will be changed to enforce the equal aspect ratio, compared to datalim which will keep the axes the same size, but change the axis limits. Using the state-machine version, plt.axis('equal'), doesn't allow you to set it to box, so the object-oriented approach is the best option to give you more control.
Change
plt.xlim(-1.5, 1.5)
plt.ylim(-1.5, 1.5)
plt.axis('equal')
to
ax.set_xlim(-1.5, 1.5)
ax.set_ylim(-1.5, 1.5)
ax.set_aspect('equal')
When dealing with overlapping high density scatter or line plots of different colors it can be convenient to implement additive blending schemes, where the RGB colors of each marker add together to produce the final color in the canvas. This is a common operation in 2D and 3D render engines.
However, in Matplotlib I've only found support for alpha/opacity blending. Is there any roundabout way of doing it or am I stuck with rendering to bitmap and then blending them in some paint program?
Edit: Here's some example code and a manual solution.
This will produce two partially overlapping random distributions:
x1 = randn(1000)
y1 = randn(1000)
x2 = randn(1000) * 5
y2 = randn(1000)
scatter(x1,y1,c='b',edgecolors='none')
scatter(x2,y2,c='r',edgecolors='none')
This will produce in matplotlib the following:
As you can see, there are some overlapping blue points that are occluded by red points and we would like to see them. By using alpha/opacity blending in matplotlib, you can do:
scatter(x1,y1,c='b',edgecolors='none',alpha=0.5)
scatter(x2,y2,c='r',edgecolors='none',alpha=0.5)
Which will produce the following:
But what I really want is the following:
I can do it manually by rendering each plot independently to a bitmap:
xlim = plt.xlim()
ylim = plt.ylim()
scatter(x1,y1,c='b',edgecolors='none')
plt.xlim(xlim)
plt.ylim(ylim)
scatter(x2,y2,c='r',edgecolors='none')
plt.xlim(xlim)
plt.ylim(ylim)
plt.savefig(r'scatter_blue.png',transparent=True)
plt.savefig(r'scatter_red.png',transparent=True)
Which gives me the following images:
What you can do then is load them as independent layers in Paint.NET/PhotoShop/gimp and just additive blend them.
Now ideal would be to be able to do this programmatically in Matplotlib, since I'll be processing hundreds of these!
If you only need an image as the result, you can get the canvas buffer as a numpy array, and then do the blending, here is an example:
from matplotlib import pyplot as plt
import numpy as np
fig, ax = plt.subplots()
ax.scatter(x1,y1,c='b',edgecolors='none')
ax.set_xlim(-4, 4)
ax.set_ylim(-4, 4)
ax.patch.set_facecolor("none")
ax.patch.set_edgecolor("none")
fig.canvas.draw()
w, h = fig.canvas.get_width_height()
img = np.frombuffer(fig.canvas.buffer_rgba(), np.uint8).reshape(h, w, -1).copy()
ax.clear()
ax.scatter(x2,y2,c='r',edgecolors='none')
ax.set_xlim(-4, 4)
ax.set_ylim(-4, 4)
ax.patch.set_facecolor("none")
ax.patch.set_edgecolor("none")
fig.canvas.draw()
img2 = np.frombuffer(fig.canvas.buffer_rgba(), np.uint8).reshape(h, w, -1).copy()
img[img[:, :, -1] == 0] = 0
img2[img2[:, :, -1] == 0] = 0
fig.clf()
plt.imshow(np.maximum(img, img2))
plt.subplots_adjust(0, 0, 1, 1)
plt.axis("off")
plt.show()
the result:
This feature is now supported by my matplotlib backend https://github.com/anntzer/mplcairo (master only):
import matplotlib; matplotlib.use("module://mplcairo.qt")
from matplotlib import pyplot as plt
from mplcairo import operator_t
import numpy as np
x1 = np.random.randn(1000)
y1 = np.random.randn(1000)
x2 = np.random.randn(1000) * 5
y2 = np.random.randn(1000)
fig, ax = plt.subplots()
# The figure and axes background must be made transparent.
fig.patch.set(alpha=0)
ax.patch.set(alpha=0)
pc1 = ax.scatter(x1, y1, c='b', edgecolors='none')
pc2 = ax.scatter(x2, y2, c='r', edgecolors='none')
operator_t.ADD.patch_artist(pc2) # Use additive blending.
plt.show()
I am trying to add ticks and labels to a color bar, but it just doesn't seem to show up in the output. I have tried two approaches(as shown in the code below). Second appraoch was to do as shown in another question on Stack Overflow here: How to add Matplotlib Colorbar Ticks.
I must be overlooking something very simple here as I am a beginner in Matplotlib and Python.
I have managed to obtain the color bar, but the ticks I want just don't show up. Any help here will be greatly appreciated as I have been stuck at it for hours after trying and searching.
Here is the code I used to generate a heatmap using hexbin over a basemap.
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
from matplotlib.colors import LinearSegmentedColormap
from matplotlib import cm
#Loading data from CSV file
DATA_FILE = '....../Population_data.csv'
roc_data = pd.read_csv(DATA_FILE)
roc_data.head()
#Creating figure window
fig = plt.figure(figsize=(14,10))
ax = fig.add_subplot(111)
#Drawing the basemap
m = Basemap(projection='merc', lat_0=43.12, lon_0=-77.626,
resolution = 'i',llcrnrlon=-78.236,
llcrnrlat=42.935,
urcrnrlon=-77.072,
urcrnrlat=43.349)
m.drawcoastlines()
m.drawcounties(zorder=20, color='red')
m.drawcountries()
m.drawmapboundary()
#plotting the heatmap using hexbin
x, y = m(roc_data['Longitude'].values, roc_data['Latitude'].values)
values = roc_data['Total(20-64)']
m.hexbin(x, y, gridsize = 125, bins = 'log', C = values, cmap = cm.Reds)
#Defining minimum, mean and maximum population values
max_p = roc_data['Total(20-64)'].max()
min_p = roc_data['Total(20-64)'].min()
mean_p = roc_data['Total(20-64)'].mean()
#Adding Colorbar
cb = m.colorbar(location = 'bottom', format = '%d', label = 'Population by Census Blocks')
#setting ticks
#cb.set_ticks([48, 107, 1302]) #First approach, didn't work
#cb.set_ticklabels(['Min', 'Mean', 'Max'])
cb.set_ticks([min_p, mean_p, max_p]) #Second appraoch, assumed ticks and tick labels should be same
cb.set_ticklabels([min_p, mean_p, max_p]) #from the above mentioned stackoverflow question, but did't work
plt.show()
The output I get by using the first or second approach for colorbar ticks is the same. It is as here:
Heatmap and colorbar with no ticks and labels
I want the minimum, median and maximum population values (48, 107 and 1302) to be shown on the colorbar with the labels Min, Mean and Max. Thank you for your time
When plotting the hexbin plot with mode bins = 'log', the colors will be plotted with a logarithmic scaling. This means that if the data minimum, mean and maximum are min, mean and max, their values on the logarithmically scaled colorbar are log10(min), log10(mean), log10(max).
The ticks on the colorbar therefore needs to be set with the log values. The ticklabels can be set to any value. However I would think that simply putting something like "mean" on a logarithmic scale may not be too informative.
A particularity is that the minimum of the colorbar is actually log10(min+1). The +1 is due to the log which is negative below 1.
Here is a complete example.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(42)
from mpl_toolkits.basemap import Basemap
from matplotlib import cm
lon = -78.236+np.random.rand(1000)*(-77.072+78.236)
lat = 42.935 + np.random.rand(1000)*(43.349-42.935)
t = 99+np.random.normal(10,20,1000)
t[:50] = np.linspace(48,1302)
roc_data = pd.DataFrame({'Longitude':lon, 'Latitude':lat, "T":t })
#Creating figure window
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111)
#Drawing the basemap
m = Basemap(projection='merc', lat_0=43.12, lon_0=-77.626,
resolution = 'i',llcrnrlon=-78.236,
llcrnrlat=42.935,
urcrnrlon=-77.072,
urcrnrlat=43.349)
m.drawcoastlines()
m.drawcounties(zorder=20, color='red')
m.drawcountries()
m.drawmapboundary()
#plotting the heatmap using hexbin
x, y = m(roc_data['Longitude'].values, roc_data['Latitude'].values)
values = roc_data['T']
m.hexbin(x, y, gridsize = 125, bins = 'log', C = values, cmap = cm.Reds) #bins = 'log',
#Defining minimum, mean and maximum population values
max_p = roc_data['T'].max()
min_p = roc_data['T'].min()
mean_p = roc_data['T'].mean()
print [min_p, mean_p, max_p]
print [np.log10(min_p), np.log10(mean_p), np.log10(max_p)]
#Adding Colorbar
cb = m.colorbar(location = 'bottom', format = '%d', label = 'Population by Census Blocks') #format = '%d',
#setting ticks
cb.set_ticks([np.log10(min_p+1), np.log10(mean_p), np.log10(max_p)])
cb.set_ticklabels(['Min\n({:.1f})'.format(min_p), 'Mean\n({:.1f})'.format(mean_p), 'Max\n({:.1f})'.format(max_p)])
plt.tight_layout()
plt.show()