How to select specific number of colors to show in color bar from a big list ? - Matplotlib - python-3.x

I plotted some data which has 70 classes, so when I built the color bar it's very difficult to distinguish between each legend as shown below:
The code that I'm using is:
formation_colors = # 70 colors
formation_labels = # 70 labels
data = # the section of the entire dataset which only has 13 labels
data = data.sort_values(by='DEPTH_MD')
ztop=data.DEPTH_MD.min(); zbot=data.DEPTH_MD.max()
cmap_formations = colors.ListedColormap(formation_colors[0:len(formation_colors)], 'indexed')
cluster_f = np.repeat(np.expand_dims(data['Formations'].values,1), 100, 1)
fig = plt.figure(figsize=(2,10))
ax = fig.add_subplot()
im_f = ax.imshow(cluster_f, interpolation='none', aspect='auto', cmap = cmap_formations, vmin=0, vmax=69)
ax.set_xlabel('FORMATION')
ax.set_xticklabels(['']);
divider_f = make_axes_locatable(ax)
cax_f = divider_f.append_axes("right", size="20%", pad=0.05)
cbar_f = plt.colorbar(im_f, cax = cax_f,)
cbar_f.set_ticks(range(0,len(formation_labels))); cbar_f.set_ticklabels(formation_labels)
So far, if I just change:
1. cmap_formations = colors.ListedColormap(formation_colors[0:len(formation_colors)], 'indexed')
2. cbar_f.set_ticks(range(0,len(formation_labels))); cbar_f.set_ticklabels(formation_labels)
to:
cmap_formations = colors.ListedColormap(formation_colors[0:len(data['FORMATION'].unique())], 'indexed')
cbar_f.set_ticks(range(0,len(data['FORMATION'].unique()))); cbar_f.set_ticklabels(data['FORMATION'].unique())
I get, the corresponding colors in the cbar, however the plot is no longer correct and also the legends are out of square
Thank you so much if you have any idea how to do this.

Although not explicitly mentioned in the question, I suppose data['FORMATION'] contains indices from 0 till 69 into the lists of formation_colors and formation_labels
The main problem is that data['FORMATION'] needs to be renumbered to be new indices (with numbers 0 till 12) into the new list of unique colors. np.unique(..., return_inverse=True) returns both the list of unique numbers, and the renumbering for the values.
To be able to reindex the list of colors and of labels, it helps to convert them to numpy arrays.
To make the code easier to debug, the following test uses a simple relation between the list of colors and the list of labels.
from matplotlib import pyplot as plt
from matplotlib import colors
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
import numpy as np
import pandas as pd
formation_colors = np.random.choice(list(colors.CSS4_COLORS), 70, replace=False) # 70 random color names
formation_labels = ['lbl_' + c for c in formation_colors] # 70 labels
formation_colors = np.asarray(formation_colors)
formation_labels = np.asarray(formation_labels)
f = np.random.randint(0, 70, 13)
d = np.sort(np.random.randint(0, 5300, 13))
data = pd.DataFrame({'FORMATION': np.repeat(f, np.diff(np.append(0, d))),
'DEPTH_MD': np.arange(d[-1])})
data = data.sort_values(by='DEPTH_MD')
ztop = data['DEPTH_MD'].min()
zbot = data['DEPTH_MD'].max()
unique_values, formation_new_values = np.unique(data['FORMATION'], return_inverse=True)
cmap_formations = colors.ListedColormap(formation_colors[unique_values], 'indexed')
cluster_f = formation_new_values.reshape(-1, 1)
fig = plt.figure(figsize=(3, 10))
ax = fig.add_subplot()
im_f = ax.imshow(cluster_f, extent=[0, 1, zbot, ztop],
interpolation='none', aspect='auto', cmap=cmap_formations, vmin=0, vmax=len(unique_values)-1)
ax.set_xlabel('FORMATION')
ax.set_xticks([])
divider_f = make_axes_locatable(ax)
cax_f = divider_f.append_axes("right", size="20%", pad=0.05)
cbar_f = plt.colorbar(im_f, cax=cax_f)
cbar_f.set_ticks(np.linspace(0, len(unique_values)-1, 2*len(unique_values)+1)[1::2])
cbar_f.set_ticklabels(formation_labels[unique_values])
plt.subplots_adjust(left=0.2, right=0.5)
plt.show()
Here is a comparison plot:

Related

Plotly : How to enable text label in line graph for the last value?

I am trying to build a graph where the line graph should show the value of only the last element in some beautiful formating.
line graph with no text at end
Now the current method of the text shows for all elements and is a straight text that creates a lot of collisions with different lines in the same graph and looks clumsy.
Will be very nice to achieve something as mentioned in the below image.
desired line graph with text
This is now handled through:
legendgroup = d.name
Plot 1: All
Plot 2: Deselect GOOG in the legend and see that the marker disappears as well:
Complet code:
# imports
import pandas as pd
import plotly.express as px
# data
df = px.data.stocks()
df = df.drop('AMZN', axis = 1)
colors = px.colors.qualitative.T10
# plotly
fig = px.line(df,
x = 'date',
y = [c for c in df.columns if c != 'date'],
template = 'plotly_dark',
color_discrete_sequence = colors,
title = 'Stocks',
)
# move legend
fig.layout.legend.x = -0.3
# add traces for annotations and text for end of lines
for i, d in enumerate(fig.data):
fig.add_scatter(x=[d.x[-1]], y = [d.y[-1]],
mode = 'markers+text',
text = d.y[-1],
textfont = dict(color=d.line.color),
textposition='middle right',
marker = dict(color = d.line.color, size = 12),
legendgroup = d.name,
showlegend=False)
fig.show()

How could I edit my code to plot 4D contour something similar to this example in python?

Similar to many other researchers on stackoverflow who are trying to plot a contour graph out of 4D data (i.e., X,Y,Z and their corresponding value C), I am attempting to plot a 4D contour map out of my data. I have tried many of the suggested solutions in stackover flow. From all of the plots suggested this, and this were the closest to what I want but sill not quite what I need in terms of data interpretation. Here is the ideal plot example: (source)
Here is a subset of the data. I put it on the dropbox. Once this data is downloaded to the directory of the python file, the following code will work. I have modified this script from this post.
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import matplotlib.tri as mtri
#####Importing the data
df = pd.read_csv('Data_4D_plot.csv')
do_random_pt_example = False;
index_x = 0; index_y = 1; index_z = 2; index_c = 3;
list_name_variables = ['x', 'y', 'z', 'c'];
name_color_map = 'seismic';
if do_random_pt_example:
number_of_points = 200;
x = np.random.rand(number_of_points);
y = np.random.rand(number_of_points);
z = np.random.rand(number_of_points);
c = np.random.rand(number_of_points);
else:
x = df['X'].to_numpy();
y = df['Y'].to_numpy();
z = df['Z'].to_numpy();
c = df['C'].to_numpy();
#end
#-----
# We create triangles that join 3 pt at a time and where their colors will be
# determined by the values of their 4th dimension. Each triangle contains 3
# indexes corresponding to the line number of the points to be grouped.
# Therefore, different methods can be used to define the value that
# will represent the 3 grouped points and I put some examples.
triangles = mtri.Triangulation(x, y).triangles;
choice_calcuation_colors = 2;
if choice_calcuation_colors == 1: # Mean of the "c" values of the 3 pt of the triangle
colors = np.mean( [c[triangles[:,0]], c[triangles[:,1]], c[triangles[:,2]]], axis = 0);
elif choice_calcuation_colors == 2: # Mediane of the "c" values of the 3 pt of the triangle
colors = np.median( [c[triangles[:,0]], c[triangles[:,1]], c[triangles[:,2]]], axis = 0);
elif choice_calcuation_colors == 3: # Max of the "c" values of the 3 pt of the triangle
colors = np.max( [c[triangles[:,0]], c[triangles[:,1]], c[triangles[:,2]]], axis = 0);
#end
#----------
###=====adjust this part for the labeling of the graph
list_name_variables[index_x] = 'X (m)'
list_name_variables[index_y] = 'Y (m)'
list_name_variables[index_z] = 'Z (m)'
list_name_variables[index_c] = 'C values'
# Displays the 4D graphic.
fig = plt.figure(figsize = (15,15));
ax = fig.gca(projection='3d');
triang = mtri.Triangulation(x, y, triangles);
surf = ax.plot_trisurf(triang, z, cmap = name_color_map, shade=False, linewidth=0.2);
surf.set_array(colors); surf.autoscale();
#Add a color bar with a title to explain which variable is represented by the color.
cbar = fig.colorbar(surf, shrink=0.5, aspect=5);
cbar.ax.get_yaxis().labelpad = 15; cbar.ax.set_ylabel(list_name_variables[index_c], rotation = 270);
# Add titles to the axes and a title in the figure.
ax.set_xlabel(list_name_variables[index_x]); ax.set_ylabel(list_name_variables[index_y]);
ax.set_zlabel(list_name_variables[index_z]);
ax.view_init(elev=15., azim=45)
plt.show()
Here would be the output:
Although it looks brilliant, it is not quite what I am looking for (the above contour map example). I have modified the following script from this post in the hope to reach the required graph, however, the chart looks nothing similar to what I was expecting (something similar to the previous output graph). Warning: the following code may take some time to run.
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
df = pd.read_csv('Data_4D_plot.csv')
x = df['X'].to_numpy();
y = df['Y'].to_numpy();
z = df['Z'].to_numpy();
cc = df['C'].to_numpy();
# convert to 2d matrices
Z = np.outer(z.T, z)
X, Y = np.meshgrid(x, y)
C = np.outer(cc.T,cc)
# fourth dimention - colormap
# create colormap according to cc-value
color_dimension = C # change to desired fourth dimension
minn, maxx = color_dimension.min(), color_dimension.max()
norm = matplotlib.colors.Normalize(minn, maxx)
m = plt.cm.ScalarMappable(norm=norm, cmap='jet')
m.set_array([])
fcolors = m.to_rgba(color_dimension)
# plot
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(X,Y,Z, rstride=1, cstride=1, facecolors=fcolors, vmin=minn, vmax=maxx, shade=False)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
plt.show()
Now I was wondering from our kind community and experts if you can help me to plot a contour figure similar to the example graph (image one in this post), where the contours are based on the values within the range of C?

Matplotlib - Horizontal Bar Chart Timeline With Dates - Xticks not showing date

Trying to make a graph that looks like the first image here.
However when I try and implement it, I can't work out how to get the dates to print on the X axis, the scale seems about right, just the xticks seem not to be dates, but some basically random number. The typical output is visible in figure 2.
How can I get the dates to show on the xticks. I would like it to show every month, or quarter between 2019-12-01 and 2021-03-01 (march 1st).
Bonus points for any formatting that makes it look more like the first picture.
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
event = np.array(['Groupone','Grouptwo','Group3','Group4','Group5','Group6'])
begin = np.array([datetime(year=2019,month=12,day=1),datetime(year=2020,month=2,day=1),datetime(year=2020,month=5,day=1),datetime(year=2020,month=11,day=1),datetime(year=2019,month=12,day=1),datetime(year=2020,month=5,day=1)])
end = np.array([datetime(year=2019,month=12,day=30),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2020,month=7,day=3)])
beg_sort = np.sort(begin)
end_sort = end[np.argsort(begin)]
evt_sort = event[np.argsort(begin)]
plt.barh(range(len(beg_sort)), end_sort-beg_sort, left=beg_sort, align='center')
plt.yticks(range(len(beg_sort)), evt_sort)
plt.yticks(range(len(beg_sort)), evt_sort)
plt.show()(begin)
end_sort = end[np.argsort(begin)]
evt_sort = event[np.argsort(begin)]
plt.barh(range(len(beg_sort)), end_sort-beg_sort, left=beg_sort, align='center')
plt.yticks(range(len(beg_sort)), evt_sort)
plt.show()
You can plot each bar as line, choosing the width of the line (lw) you prefer:
# Set the color of the grid lines
mpl.rcParams['grid.color'] = "w"
fig, ax = plt.subplots(1, 1)
# Plot eac item as a line
for i, (b, e, l) in enumerate(zip(beg_sort, end_sort, evt_sort)):
ax.plot_date([b, e], [i + 1] * 2, ls='-', marker=None, lw=10) # 10 for the line width
# Set ticks and labels on y axis
ax.set_yticks(range(1, len(evt_sort) + 1))
ax.set_yticklabels(evt_sort)
# Set color and transparency of the grid
ax.patch.set_facecolor('gray')
ax.patch.set_alpha(0.3)
# activate grid
ax.grid(True)
Moreover, you can play with the background grid, customizing it according to your needs.
Hacked something together that works, posting for curiosity, however go with PieCot's answer above:
import matplotlib.pyplot as plt
import numpy as np
from datetime import date
from datetime import datetime
import matplotlib.dates as mdates
#https://stackoverflow.com/questions/58387731/plotting-month-year-as-x-ticks-in-matplotlib
fig, ax = plt.subplots(ncols=2, nrows=1, figsize=(15, 4.18))
#fig, ax = plt.figure( figsize=(15, 4.18))
event = np.array(['Groupone','Grouptwo','Group3','Group4','Group5','Group6'])
begin = np.array([datetime(year=2019,month=12,day=1),datetime(year=2020,month=2,day=1),datetime(year=2020,month=5,day=1),datetime(year=2020,month=11,day=1),datetime(year=2019,month=12,day=1),datetime(year=2020,month=5,day=1)])
end = np.array([datetime(year=2019,month=12,day=30),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2020,month=7,day=3)])
beg_sort = np.sort(begin)
end_sort = end[np.argsort(begin)]
evt_sort = event[np.argsort(begin)]
#start_m = click.prompt('Start month', type=int)
#start_y = click.prompt('Start year', type=int)
#end_m = click.prompt('End month', type=int)
#end_y = click.prompt('End year', type=int)
start_m = 12
start_y = 2019
end_m = 3
end_y = 2021
months = mdates.MonthLocator() # Add tick every month
#days = mdates.DayLocator(range(1,32,5)) # Add tick every 5th day in a month
#monthFmt = mdates.DateFormatter('%b') # Use abbreviated month name
ax[1].xaxis.set_major_locator(months)
#ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%m-%Y'))
ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%b-%Y'))
ax[1].xaxis.set_tick_params(rotation=90)
#ax.xaxis.set_tick_params(rotation=30)
#ax[1].xaxis.set_major_formatter(monthFmt)
#ax[0].xaxis.set_minor_locator(days)
start = date(year=start_y,month=start_m,day=1)
print(start)
end = date(year=end_y,month=end_m,day=1)
print(end)
Nticks = 6
delta = (end-start)/Nticks
tick_dates = [start + i*delta for i in range(Nticks)]
x_ticks = ['{}/{}'.format(d.month,d.year) for d in tick_dates]
print(x_ticks)
plt.barh(range(len(beg_sort)), end_sort-beg_sort, left=beg_sort, align='center')
plt.yticks(range(len(beg_sort)), evt_sort)
plt.yticks(range(len(beg_sort)), evt_sort)
#plt.xticks = x_ticks
#plt.set_xticks(x_ticks)
#plt.show()
fig.delaxes(ax[0])
plt.savefig('gwern.pdf',bbox_inches='tight')

matplotlib: get the subplot layout?

I have a function that creates a grid of similar 2D histograms. So that I can select whether to put this new plot on a pre-existing figure, I do the following:
def make_hist2d(x, y, current_fig=False, layout=(1,1,1),*args):
if current_fig:
fig = _plt.gcf()
ax = fig.add_subplot(*layout) # layout=(nrows, ncols, nplot)
else:
fig, ax = _plt.subplots()
H, x, y = np.histogram2d(...)
# manipulate the histogram, e.g. column normalize.
XX, YY = _np.meshgrid(xedges, yedges)
Image = ax.pcolormesh(XX, YY, Hplot.T, norm=norm, **pcmesh_kwargs)
ax.autoscale(tight=True)
grid_kargs = {'orientation': 'vertical'}
cax, kw = _mpl.colorbar.make_axes_gridspec(ax, **grid_kargs)
cbar = fig.colorbar(Image, cax=cax)
cbar.set_label(cbar_title)
return fig, ax, cbar
def hist2d_grid(data_dict, key_pairs, layout, *args): # ``*args`` are things like xlog, ylog, xlabel, etc.
# that are common to all subplots in the figure.
fig, ax = _plt.subplots()
nplots = range(len(key_pairs) + 1) # key_pairs = ((k1a, k1b), (k2a, k2b), ..., (kna, knb))
ax_list = []
for pair, i in zip(key_pairs, nplots):
fig, ax, cbar = make_hist2d(data[k1a], data[k1b]
ax_list.append(ax)
return fig, ax_list
Then I call something like:
hgrid = hist2d_grid(...)
However, if I want to add a new figure to the grid, I don't know of a good way to get the subplot layout. For example, is there something like:
layout = fig.get_layout()
That would give me something like (nrows, ncols, n_subplots)?
I could do this with something like:
n_plot = len(ax_list) / 2 # Each subplot generates a plot and a color bar.
n_rows = np.floor(np.sqrt(n_ax))
n_cols = np.ceil(np.sqrt(n_ax))
But I have to deal with special cases like a (2,4) subplot array for which I would get n_rows = 2 and n_cols = 3, which means that I would be passing (2,3,8) to ax.add_subplot(), which clearly doesn't work because 8 > 3*2.
As ax returned by fig, ax = plt.subplots(4,2) is a numpy array of axes, then ax.shape will give you the layout information you want, e.g.
nrows, ncols = ax.shape
n_subplots = nrows*ncols
You can also get the locations of the various axes by looping over the children of the figure object,
[[f.colNum, f.rowNum] for f in fig.get_children()[1:]]
and probably get the size from the final element fig.get_children()[-1]
You could also use gridspec to be more explicit about the location of subplots if needed. With gridspec you setup the gridspec object and pass to subplot,
import matplotlib.gridspec as gridspec
gs = gridspec.GridSpec(2, 2)
ax = plt.subplot(gs[0, 0])
To get the layout you can then use,
gs.get_geometry()

how to update a matplotlib heatmap plot without creating a new window

I have matrix class that inherits from list. This class can display itself as a matplotlib heatmap representation of the matrix.
I'm trying to have the class written such that when I change values in the matrix, I can call the matrix's method plot() and it'll update the plot to reflect the matrix changes in the heatmap.
However, every time I run the method plot(), it creates a new heatmap in a new window instead of updating the existing plot. How could I get it simply to update the existing plot?
In the code below, there are three main parts: the main function shows how an instance of the matrix class is created, plotted and updated; the matrix class is basically a list object, with some minor functionality (including plotting) bolted on; the function plotList() is the function the matrix class calls in order to generate the plot object initially.
import time
import random
import matplotlib.pyplot as plt
plt.ion()
import numpy as np
def main():
print("plot 2 x 2 matrix and display it changing in a loop")
matrix = Matrix(
numberOfColumns = 2,
numberOfRows = 2,
randomise = True
)
# Plot the matrix.
matrix.plot()
# Change the matrix, redrawing it after each change.
for row in range(len(matrix)):
for column in range(len(matrix[row])):
input("Press Enter to continue.")
matrix[row][column] = 10
matrix.plot()
input("Press Enter to terminate.")
matrix.closePlot()
class Matrix(list):
def __init__(
self,
*args,
numberOfColumns = 3,
numberOfRows = 3,
element = 0.0,
randomise = False,
randomiseLimitLower = -0.2,
randomiseLimitUpper = 0.2
):
# list initialisation
super().__init__(self, *args)
self.numberOfColumns = numberOfColumns
self.numberOfRows = numberOfRows
self.element = element
self.randomise = randomise
self.randomiseLimitLower = randomiseLimitLower
self.randomiseLimitUpper = randomiseLimitUpper
# fill with default element
for column in range(self.numberOfColumns):
self.append([element] * self.numberOfRows)
# fill with pseudorandom elements
if self.randomise:
random.seed()
for row in range(self.numberOfRows):
for column in range(self.numberOfColumns):
self[row][column] = random.uniform(
self.randomiseLimitUpper,
self.randomiseLimitLower
)
# plot
self._plot = plotList(
list = self,
mode = "return"
)
# for display or redraw plot behaviour
self._plotShown = False
def plot(self):
# display or redraw plot
self._plot.draw()
if self._plotShown:
#self._plot = plotList(
# list = self,
# mode = "return"
# )
array = np.array(self)
fig, ax = plt.subplots()
heatmap = ax.pcolor(array, cmap = plt.cm.Blues)
self._plot.draw()
else:
self._plot.show()
self._plotShown = True
def closePlot(self):
self._plot.close()
def plotList(
list = list,
mode = "plot" # plot/return
):
# convert list to NumPy array
array = np.array(list)
# create axis labels
labelsColumn = []
labelsRow = []
for rowNumber in range(0, len(list)):
labelsRow.append(rowNumber + 1)
for columnNumber in range(0, len(list[rowNumber])):
labelsColumn.append(columnNumber)
fig, ax = plt.subplots()
heatmap = ax.pcolor(array, cmap = plt.cm.Blues)
# display plot or return plot object
if mode == "plot":
plt.show()
elif mode == "return":
return(plt)
else:
Exception
if __name__ == '__main__':
main()
I'm using Python 3 in Ubuntu.
The method plot(self) creates a new figure in the line fig, ax = plt.subplots(). To use an existing figure you can give your figure a number or name when it's first created in plotList():
fig = plt.figure('matrix figure')
ax = fig.add_subplot(111)
then use
plt.figure('matrix figure')
ax = gca() # gets current axes
to make that the active figure and axes. Alternately, you might want to the figure and axis created in plotList and pass them to plot.

Resources