Python - plt.subplot() with plot created in a for loop - python-3.x

I create the following plots within a for loop:
cat_var={}
categories=['symboling','fuel-type','make']
for x in categories:
cat_var[x]=df[x].unique()
for key in cat_var.keys():
plt.figure(figsize=(10,10))
for value in cat_var[key]:
subset= df[df[key] == value]
sns.distplot(subset['price'], hist = False, kde = True,kde_kws = {'linewidth': 3},label = value)
plt.legend(prop={'size': 16}, title = key)
plt.title('Price distribution per %s level'% key)
plt.xlabel('Price')
plt.ylabel('Density')
plt.show()
It is working, but I would like to plot them so they appear all on the same row, whereas now I have them all on the same column. I was thinking about using something like:
fig, axes = plt.subplots(1, 3,figsize=(20,20))
but this does not produce the desired output wherever I put this last line of code.
Any thoughts?

If you want to have three plots on the same figure, then you need to use subplots and specify where each plot goes
fig, ax = plt.subplots(1, 3,figsize=(20,20))
for idx, key in enumerate(cat_var):
for value in cat_var[key]:
subset= df[df[key] == value]
sns.distplot(subset['price'], hist = False, ax= ax[idx], kde = True,kde_kws = {'linewidth': 3},label = value)
plt.show()
With the following output:

Related

Make one y-axis label bold in matplotlib

Goodmorning,
Question, I've got this script that creates a horizontal bar chart (see image)
I would like to have one label in the y-axis bold "Nederland".
I've searched an tried a lot, but I really have no idea how I can do this.
I found this solution:
Matplotlib - Changing the color of a single x-axis tick label
But I could not get it to work.
Any hint to a solution would be great.
def AVG_BarChart(self, data:dict=None, graph_file:str = None, datum:str=None, countries:dict=None, watermarktext:str="energieprijzenbot.nl", prijsper:str="kWh")->bool:
plt.figure(figsize=(9, 6))
plt.xlabel(f"Prijs per {prijsper}")
plt.title(f"Gemiddelde {prijsper} inkoopprijs per land {datum}")
colors = ["#FE8000", "#EFBD76", "#FFA52B", "#FF9D3C", "#FFF858", "#FCFFCB", "#07EEB2", "#FF4179","#E05B4B", "#E09336", "#DAB552", "#DBD9A6", "#87B49C", "#4B8A7E", "#A5DD96", "#E1F3C9", "#0095AD", "#00D5E5", "#82E9F0", "#C0ED42", "#FFE301", "#FFF352", "#FF85DA", "#FF69B3","#A15AC4", "#3F7539", "#B8CBAD", "#E1E2C2", "#F84040", "#9D1E29"]
random.shuffle(colors)
values = 2 ** np.random.randint(2, 10, len(data))
max_value = values.max()
labels = list(data.keys())
values = list(data.values())
height = 0.9
plt.barh(y=labels, width=values, height=height, color=colors, align='center', alpha=0.8)
ax = plt.gca()
ax.xaxis.set_major_formatter('€ {x:n}')
plt.bar_label(ax.containers[0], labels=[f'€ {x:n}' for x in ax.containers[0].datavalues], label_type="edge", padding=-50)
ax.text(0.5, 0.5, watermarktext, transform=ax.transAxes,
fontsize=40, color='gray', alpha=0.3,
ha='center', va='center', rotation='30')
for i, (label, value) in enumerate(zip(labels, values)):
country_iso = self.get_key(val=label, my_dict=countries).lower()
self.offset_image(x=value, y=i, flag=country_iso, bar_is_too_short=value < max_value / 10, ax=ax)
plt.subplots_adjust(left=0.15)
plt.savefig(graph_file, bbox_inches='tight', width = 0.4)
return True
I tried looping thru the labels like this
i = 0
for w in ax.get_yticklabels():
country = ax.get_yticklabels()[i].get_text()
if country == "Nederland":
ax.get_yticklabels()[i].set_color('red')
ax.get_yticklabels()[i].set_fontweight('bold')
i += 1
When debugging I actually get a country name back, but when running the script normal, all country labels are empty...
So, I was close to the answer. But somehow I got back empty .get_text() string.
# ... some code
labels = list(data.keys())
# ... more code
ax.set_yticklabels(labels)
for lab in ax.get_yticklabels():
if lab.get_text() == "Nederland":
lab.set_fontweight('bold')
I just hope by setting the labels again, It does not mix up anything :-)

How could I edit my code to plot 4D contour something similar to this example in python?

Similar to many other researchers on stackoverflow who are trying to plot a contour graph out of 4D data (i.e., X,Y,Z and their corresponding value C), I am attempting to plot a 4D contour map out of my data. I have tried many of the suggested solutions in stackover flow. From all of the plots suggested this, and this were the closest to what I want but sill not quite what I need in terms of data interpretation. Here is the ideal plot example: (source)
Here is a subset of the data. I put it on the dropbox. Once this data is downloaded to the directory of the python file, the following code will work. I have modified this script from this post.
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import matplotlib.tri as mtri
#####Importing the data
df = pd.read_csv('Data_4D_plot.csv')
do_random_pt_example = False;
index_x = 0; index_y = 1; index_z = 2; index_c = 3;
list_name_variables = ['x', 'y', 'z', 'c'];
name_color_map = 'seismic';
if do_random_pt_example:
number_of_points = 200;
x = np.random.rand(number_of_points);
y = np.random.rand(number_of_points);
z = np.random.rand(number_of_points);
c = np.random.rand(number_of_points);
else:
x = df['X'].to_numpy();
y = df['Y'].to_numpy();
z = df['Z'].to_numpy();
c = df['C'].to_numpy();
#end
#-----
# We create triangles that join 3 pt at a time and where their colors will be
# determined by the values of their 4th dimension. Each triangle contains 3
# indexes corresponding to the line number of the points to be grouped.
# Therefore, different methods can be used to define the value that
# will represent the 3 grouped points and I put some examples.
triangles = mtri.Triangulation(x, y).triangles;
choice_calcuation_colors = 2;
if choice_calcuation_colors == 1: # Mean of the "c" values of the 3 pt of the triangle
colors = np.mean( [c[triangles[:,0]], c[triangles[:,1]], c[triangles[:,2]]], axis = 0);
elif choice_calcuation_colors == 2: # Mediane of the "c" values of the 3 pt of the triangle
colors = np.median( [c[triangles[:,0]], c[triangles[:,1]], c[triangles[:,2]]], axis = 0);
elif choice_calcuation_colors == 3: # Max of the "c" values of the 3 pt of the triangle
colors = np.max( [c[triangles[:,0]], c[triangles[:,1]], c[triangles[:,2]]], axis = 0);
#end
#----------
###=====adjust this part for the labeling of the graph
list_name_variables[index_x] = 'X (m)'
list_name_variables[index_y] = 'Y (m)'
list_name_variables[index_z] = 'Z (m)'
list_name_variables[index_c] = 'C values'
# Displays the 4D graphic.
fig = plt.figure(figsize = (15,15));
ax = fig.gca(projection='3d');
triang = mtri.Triangulation(x, y, triangles);
surf = ax.plot_trisurf(triang, z, cmap = name_color_map, shade=False, linewidth=0.2);
surf.set_array(colors); surf.autoscale();
#Add a color bar with a title to explain which variable is represented by the color.
cbar = fig.colorbar(surf, shrink=0.5, aspect=5);
cbar.ax.get_yaxis().labelpad = 15; cbar.ax.set_ylabel(list_name_variables[index_c], rotation = 270);
# Add titles to the axes and a title in the figure.
ax.set_xlabel(list_name_variables[index_x]); ax.set_ylabel(list_name_variables[index_y]);
ax.set_zlabel(list_name_variables[index_z]);
ax.view_init(elev=15., azim=45)
plt.show()
Here would be the output:
Although it looks brilliant, it is not quite what I am looking for (the above contour map example). I have modified the following script from this post in the hope to reach the required graph, however, the chart looks nothing similar to what I was expecting (something similar to the previous output graph). Warning: the following code may take some time to run.
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
df = pd.read_csv('Data_4D_plot.csv')
x = df['X'].to_numpy();
y = df['Y'].to_numpy();
z = df['Z'].to_numpy();
cc = df['C'].to_numpy();
# convert to 2d matrices
Z = np.outer(z.T, z)
X, Y = np.meshgrid(x, y)
C = np.outer(cc.T,cc)
# fourth dimention - colormap
# create colormap according to cc-value
color_dimension = C # change to desired fourth dimension
minn, maxx = color_dimension.min(), color_dimension.max()
norm = matplotlib.colors.Normalize(minn, maxx)
m = plt.cm.ScalarMappable(norm=norm, cmap='jet')
m.set_array([])
fcolors = m.to_rgba(color_dimension)
# plot
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(X,Y,Z, rstride=1, cstride=1, facecolors=fcolors, vmin=minn, vmax=maxx, shade=False)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
plt.show()
Now I was wondering from our kind community and experts if you can help me to plot a contour figure similar to the example graph (image one in this post), where the contours are based on the values within the range of C?

Recover elements from each cluster generated by scipy dendrogram

I'm building a dendrogram and truncating it to show only the largest 6 clusters. Also, the labeling is done via a simple leaf label function:
def llf(id):
return str(id)
tree = sch.dendrogram(Z, truncate_mode='lastp',
leaf_label_func=llf, p=6, show_contracted=False,
show_leaf_counts=False, leaf_rotation=90,
no_labels = False, orientation='right')
My output looks like this:
My goal is to replace the non descriptive labels for the leaves with the minimum value of the members from within that cluster. For example, if the top leaf is the cluster that contains the range from 10 to 1000, then I would like to replace '2468' with 10. The actual logic to replace the ticks in the plot is easy to implement:
fig, ax = plt.subplots()
mislabels = ["foo" for i in range(7)]
ax.set_xticklabels(mislabels, fontsize=10, rotation=45)
Any ideas regarding how to extract the values from within the leaders?
So far I'm able to map each singleton leaf to its cluster using fcluster. However, that only maps my initial 1230 points to a cluster. I need to map the point labeled as '2468' to its cluster and I'm not sure how to do that.
Thanks!
I found the way to do it
fig, ax = plt.subplots(2,2,figsize=(10,5))
ax = ax.ravel()
# [idx_plot[k]:, idx_plot[k]:]
for k, val in enumerate(linkages['ward']):
cluster_local = cluster_labels[val]['ward'][6]
leaders = sch.leaders(linkages['ward'][val], cluster_local)
dates_labels = dict()
for v, i in enumerate(leaders[1]):
date_idx = np.where(cluster_local == i)
dates_labels[leaders[0][v]] = (fechas[val][idx_plot[val]:][date_idx[0][0]].strftime('%y/%m'), fechas[val][idx_plot[val]:][date_idx[0][-1]].strftime('%y/%m'))
mislabels = [dates_labels[leaders[0][i]][0] + ', ' + dates_labels[leaders[0][i]][1] for i in range(6)]
yuca = sch.dendrogram(linkages['ward'][val], truncate_mode='lastp', ax=ax[k], leaf_label_func=llf, p=6, show_contracted=False, show_leaf_counts=False,
leaf_rotation=0, no_labels=False, orientation = 'right' )
# ax[k].set_xticklabels(mislabels, fontsize=10, rotation=90)
ax[k].set_yticklabels(mislabels, fontsize=10, rotation=0)
ax[k].set_title(val)
plt.tight_layout()
plt.show()

matplotlib: get the subplot layout?

I have a function that creates a grid of similar 2D histograms. So that I can select whether to put this new plot on a pre-existing figure, I do the following:
def make_hist2d(x, y, current_fig=False, layout=(1,1,1),*args):
if current_fig:
fig = _plt.gcf()
ax = fig.add_subplot(*layout) # layout=(nrows, ncols, nplot)
else:
fig, ax = _plt.subplots()
H, x, y = np.histogram2d(...)
# manipulate the histogram, e.g. column normalize.
XX, YY = _np.meshgrid(xedges, yedges)
Image = ax.pcolormesh(XX, YY, Hplot.T, norm=norm, **pcmesh_kwargs)
ax.autoscale(tight=True)
grid_kargs = {'orientation': 'vertical'}
cax, kw = _mpl.colorbar.make_axes_gridspec(ax, **grid_kargs)
cbar = fig.colorbar(Image, cax=cax)
cbar.set_label(cbar_title)
return fig, ax, cbar
def hist2d_grid(data_dict, key_pairs, layout, *args): # ``*args`` are things like xlog, ylog, xlabel, etc.
# that are common to all subplots in the figure.
fig, ax = _plt.subplots()
nplots = range(len(key_pairs) + 1) # key_pairs = ((k1a, k1b), (k2a, k2b), ..., (kna, knb))
ax_list = []
for pair, i in zip(key_pairs, nplots):
fig, ax, cbar = make_hist2d(data[k1a], data[k1b]
ax_list.append(ax)
return fig, ax_list
Then I call something like:
hgrid = hist2d_grid(...)
However, if I want to add a new figure to the grid, I don't know of a good way to get the subplot layout. For example, is there something like:
layout = fig.get_layout()
That would give me something like (nrows, ncols, n_subplots)?
I could do this with something like:
n_plot = len(ax_list) / 2 # Each subplot generates a plot and a color bar.
n_rows = np.floor(np.sqrt(n_ax))
n_cols = np.ceil(np.sqrt(n_ax))
But I have to deal with special cases like a (2,4) subplot array for which I would get n_rows = 2 and n_cols = 3, which means that I would be passing (2,3,8) to ax.add_subplot(), which clearly doesn't work because 8 > 3*2.
As ax returned by fig, ax = plt.subplots(4,2) is a numpy array of axes, then ax.shape will give you the layout information you want, e.g.
nrows, ncols = ax.shape
n_subplots = nrows*ncols
You can also get the locations of the various axes by looping over the children of the figure object,
[[f.colNum, f.rowNum] for f in fig.get_children()[1:]]
and probably get the size from the final element fig.get_children()[-1]
You could also use gridspec to be more explicit about the location of subplots if needed. With gridspec you setup the gridspec object and pass to subplot,
import matplotlib.gridspec as gridspec
gs = gridspec.GridSpec(2, 2)
ax = plt.subplot(gs[0, 0])
To get the layout you can then use,
gs.get_geometry()

how to update a matplotlib heatmap plot without creating a new window

I have matrix class that inherits from list. This class can display itself as a matplotlib heatmap representation of the matrix.
I'm trying to have the class written such that when I change values in the matrix, I can call the matrix's method plot() and it'll update the plot to reflect the matrix changes in the heatmap.
However, every time I run the method plot(), it creates a new heatmap in a new window instead of updating the existing plot. How could I get it simply to update the existing plot?
In the code below, there are three main parts: the main function shows how an instance of the matrix class is created, plotted and updated; the matrix class is basically a list object, with some minor functionality (including plotting) bolted on; the function plotList() is the function the matrix class calls in order to generate the plot object initially.
import time
import random
import matplotlib.pyplot as plt
plt.ion()
import numpy as np
def main():
print("plot 2 x 2 matrix and display it changing in a loop")
matrix = Matrix(
numberOfColumns = 2,
numberOfRows = 2,
randomise = True
)
# Plot the matrix.
matrix.plot()
# Change the matrix, redrawing it after each change.
for row in range(len(matrix)):
for column in range(len(matrix[row])):
input("Press Enter to continue.")
matrix[row][column] = 10
matrix.plot()
input("Press Enter to terminate.")
matrix.closePlot()
class Matrix(list):
def __init__(
self,
*args,
numberOfColumns = 3,
numberOfRows = 3,
element = 0.0,
randomise = False,
randomiseLimitLower = -0.2,
randomiseLimitUpper = 0.2
):
# list initialisation
super().__init__(self, *args)
self.numberOfColumns = numberOfColumns
self.numberOfRows = numberOfRows
self.element = element
self.randomise = randomise
self.randomiseLimitLower = randomiseLimitLower
self.randomiseLimitUpper = randomiseLimitUpper
# fill with default element
for column in range(self.numberOfColumns):
self.append([element] * self.numberOfRows)
# fill with pseudorandom elements
if self.randomise:
random.seed()
for row in range(self.numberOfRows):
for column in range(self.numberOfColumns):
self[row][column] = random.uniform(
self.randomiseLimitUpper,
self.randomiseLimitLower
)
# plot
self._plot = plotList(
list = self,
mode = "return"
)
# for display or redraw plot behaviour
self._plotShown = False
def plot(self):
# display or redraw plot
self._plot.draw()
if self._plotShown:
#self._plot = plotList(
# list = self,
# mode = "return"
# )
array = np.array(self)
fig, ax = plt.subplots()
heatmap = ax.pcolor(array, cmap = plt.cm.Blues)
self._plot.draw()
else:
self._plot.show()
self._plotShown = True
def closePlot(self):
self._plot.close()
def plotList(
list = list,
mode = "plot" # plot/return
):
# convert list to NumPy array
array = np.array(list)
# create axis labels
labelsColumn = []
labelsRow = []
for rowNumber in range(0, len(list)):
labelsRow.append(rowNumber + 1)
for columnNumber in range(0, len(list[rowNumber])):
labelsColumn.append(columnNumber)
fig, ax = plt.subplots()
heatmap = ax.pcolor(array, cmap = plt.cm.Blues)
# display plot or return plot object
if mode == "plot":
plt.show()
elif mode == "return":
return(plt)
else:
Exception
if __name__ == '__main__':
main()
I'm using Python 3 in Ubuntu.
The method plot(self) creates a new figure in the line fig, ax = plt.subplots(). To use an existing figure you can give your figure a number or name when it's first created in plotList():
fig = plt.figure('matrix figure')
ax = fig.add_subplot(111)
then use
plt.figure('matrix figure')
ax = gca() # gets current axes
to make that the active figure and axes. Alternately, you might want to the figure and axis created in plotList and pass them to plot.

Resources