How to translate hexagon matplotlib plot to an interactive bokeh plot? - python-3.x

I have been working with the excellent minisom package and want to plot interactively the hexagonal map that reflects the results of the self-organising maps training process. There's already a code example that does this statically using matplotlib but to do so interactively, I would like to use bokeh. This is where I am struggling.
This is the code to generate a simplified matplotlib example of what's already on the package page:
from minisom import MiniSom
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import RegularPolygon
from matplotlib import cm
from bokeh.plotting import figure
from bokeh.io import save, show, output_file, output_notebook
output_notebook()
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/00236/seeds_dataset.txt',
names=['area', 'perimeter', 'compactness', 'length_kernel', 'width_kernel',
'asymmetry_coefficient', 'length_kernel_groove', 'target'], sep='\t+')
t = data['target'].values
data = data[data.columns[:-1]]
# data normalisation
data = (data - np.mean(data, axis=0)) / np.std(data, axis=0)
data = data.values
# initialisation and training
som = MiniSom(15, 15, data.shape[1], sigma=1.5, learning_rate=.7, activation_distance='euclidean',
topology='hexagonal', neighborhood_function='gaussian', random_seed=10)
som.train(data, 1000, verbose=True)
# plot hexagonal topology
f = plt.figure(figsize=(10,10))
ax = f.add_subplot(111)
ax.set_aspect('equal')
xx, yy = som.get_euclidean_coordinates()
umatrix = som.distance_map()
weights = som.get_weights()
for i in range(weights.shape[0]):
for j in range(weights.shape[1]):
wy = yy[(i, j)]*2/np.sqrt(3)*3/4
hex = RegularPolygon((xx[(i, j)], wy), numVertices=6, radius=.95/np.sqrt(3),
facecolor=cm.Blues(umatrix[i, j]), alpha=.4, edgecolor='gray')
ax.add_patch(hex)
for x in data:
w = som.winner(x)
# place a marker on the winning position for the sample xx
wx, wy = som.convert_map_to_euclidean(w)
wy = wy * 2 / np.sqrt(3) * 3 / 4
plt.plot(wx, wy, markerfacecolor='None',
markeredgecolor='black', markersize=12, markeredgewidth=2)
plt.show()
matplotlib hexagonal topology plot
I've tried to translate the code into bokeh but the resulting hex plot (to me, primitively) looks like it needs to be flipped vertically onto the points and for the skew to be straightened out.
tile_centres_column = []
tile_centres_row = []
colours = []
for i in range(weights.shape[0]):
for j in range(weights.shape[1]):
wy = yy[(i, j)] * 2 / np.sqrt(3) * 3 / 4
tile_centres_column.append(xx[(i, j)])
tile_centres_row.append(wy)
colours.append(cm.Blues(umatrix[i, j]))
weight_x = []
weight_y = []
for x in data:
w = som.winner(x)
wx, wy = som.convert_map_to_euclidean(xy=w)
wy = wy * 2 / np.sqrt(3) * 3/4
weight_x.append(wx)
weight_y.append(wy)
# plot hexagonal topology
plot = figure(plot_width=800, plot_height=800,
match_aspect=True)
plot.hex_tile(q=tile_centres_column, r=tile_centres_row,
size=.95 / np.sqrt(3),
color=colours,
fill_alpha=.4,
line_color='black')
plot.dot(x=weight_x, y=weight_y,
fill_color='black',
size=12)
show(plot)
bokeh hexagonal topology plot
How can I translate this into a bokeh plot?

Found out how to do it after reaching out to the minisom package author for help. Complete code available here.
from bokeh.colors import RGB
from bokeh.io import curdoc, show, output_notebook
from bokeh.transform import factor_mark, factor_cmap
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import figure, output_file
hex_centre_col, hex_centre_row = [], []
hex_colour = []
label = []
# define labels
SPECIES = ['Kama', 'Rosa', 'Canadian']
for i in range(weights.shape[0]):
for j in range(weights.shape[1]):
wy = yy[(i, j)] * 2 / np.sqrt(3) * 3 / 4
hex_centre_col.append(xx[(i, j)])
hex_centre_row.append(wy)
hex_colour.append(cm.Blues(umatrix[i, j]))
weight_x, weight_y = [], []
for cnt, i in enumerate(data):
w = som.winner(i)
wx, wy = som.convert_map_to_euclidean(xy=w)
wy = wy * 2 / np.sqrt(3) * 3 / 4
weight_x.append(wx)
weight_y.append(wy)
label.append(SPECIES[t[cnt]-1])
# convert matplotlib colour palette to bokeh colour palette
hex_plt = [(255 * np.array(i)).astype(int) for i in hex_colour]
hex_bokeh = [RGB(*tuple(rgb)).to_hex() for rgb in hex_plt]
output_file("resulting_images/som_seed_hex.html")
# initialise figure/plot
fig = figure(title="SOM: Hexagonal Topology",
plot_height=800, plot_width=800,
match_aspect=True,
tools="wheel_zoom,save,reset")
# create data stream for plotting
source_hex = ColumnDataSource(
data = dict(
x=hex_centre_col,
y=hex_centre_row,
c=hex_bokeh
)
)
source_pages = ColumnDataSource(
data=dict(
wx=weight_x,
wy=weight_y,
species=label
)
)
# define markers
MARKERS = ['diamond', 'cross', 'x']
# add shapes to plot
fig.hex(x='y', y='x', source=source_hex,
size=100 * (.95 / np.sqrt(3)),
alpha=.4,
line_color='gray',
fill_color='c')
fig.scatter(x='wy', y='wx', source=source_pages,
legend_field='species',
size=20,
marker=factor_mark(field_name='species', markers=MARKERS, factors=SPECIES),
color=factor_cmap(field_name='species', palette='Category10_3', factors=SPECIES))
# add hover-over tooltip
fig.add_tools(HoverTool(
tooltips=[
("label", '#species'),
("(x,y)", '($x, $y)')],
mode="mouse",
point_policy="follow_mouse"
))
show(fig)

Related

How could I edit my code to plot 4D contour something similar to this example in python?

Similar to many other researchers on stackoverflow who are trying to plot a contour graph out of 4D data (i.e., X,Y,Z and their corresponding value C), I am attempting to plot a 4D contour map out of my data. I have tried many of the suggested solutions in stackover flow. From all of the plots suggested this, and this were the closest to what I want but sill not quite what I need in terms of data interpretation. Here is the ideal plot example: (source)
Here is a subset of the data. I put it on the dropbox. Once this data is downloaded to the directory of the python file, the following code will work. I have modified this script from this post.
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import matplotlib.tri as mtri
#####Importing the data
df = pd.read_csv('Data_4D_plot.csv')
do_random_pt_example = False;
index_x = 0; index_y = 1; index_z = 2; index_c = 3;
list_name_variables = ['x', 'y', 'z', 'c'];
name_color_map = 'seismic';
if do_random_pt_example:
number_of_points = 200;
x = np.random.rand(number_of_points);
y = np.random.rand(number_of_points);
z = np.random.rand(number_of_points);
c = np.random.rand(number_of_points);
else:
x = df['X'].to_numpy();
y = df['Y'].to_numpy();
z = df['Z'].to_numpy();
c = df['C'].to_numpy();
#end
#-----
# We create triangles that join 3 pt at a time and where their colors will be
# determined by the values of their 4th dimension. Each triangle contains 3
# indexes corresponding to the line number of the points to be grouped.
# Therefore, different methods can be used to define the value that
# will represent the 3 grouped points and I put some examples.
triangles = mtri.Triangulation(x, y).triangles;
choice_calcuation_colors = 2;
if choice_calcuation_colors == 1: # Mean of the "c" values of the 3 pt of the triangle
colors = np.mean( [c[triangles[:,0]], c[triangles[:,1]], c[triangles[:,2]]], axis = 0);
elif choice_calcuation_colors == 2: # Mediane of the "c" values of the 3 pt of the triangle
colors = np.median( [c[triangles[:,0]], c[triangles[:,1]], c[triangles[:,2]]], axis = 0);
elif choice_calcuation_colors == 3: # Max of the "c" values of the 3 pt of the triangle
colors = np.max( [c[triangles[:,0]], c[triangles[:,1]], c[triangles[:,2]]], axis = 0);
#end
#----------
###=====adjust this part for the labeling of the graph
list_name_variables[index_x] = 'X (m)'
list_name_variables[index_y] = 'Y (m)'
list_name_variables[index_z] = 'Z (m)'
list_name_variables[index_c] = 'C values'
# Displays the 4D graphic.
fig = plt.figure(figsize = (15,15));
ax = fig.gca(projection='3d');
triang = mtri.Triangulation(x, y, triangles);
surf = ax.plot_trisurf(triang, z, cmap = name_color_map, shade=False, linewidth=0.2);
surf.set_array(colors); surf.autoscale();
#Add a color bar with a title to explain which variable is represented by the color.
cbar = fig.colorbar(surf, shrink=0.5, aspect=5);
cbar.ax.get_yaxis().labelpad = 15; cbar.ax.set_ylabel(list_name_variables[index_c], rotation = 270);
# Add titles to the axes and a title in the figure.
ax.set_xlabel(list_name_variables[index_x]); ax.set_ylabel(list_name_variables[index_y]);
ax.set_zlabel(list_name_variables[index_z]);
ax.view_init(elev=15., azim=45)
plt.show()
Here would be the output:
Although it looks brilliant, it is not quite what I am looking for (the above contour map example). I have modified the following script from this post in the hope to reach the required graph, however, the chart looks nothing similar to what I was expecting (something similar to the previous output graph). Warning: the following code may take some time to run.
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
df = pd.read_csv('Data_4D_plot.csv')
x = df['X'].to_numpy();
y = df['Y'].to_numpy();
z = df['Z'].to_numpy();
cc = df['C'].to_numpy();
# convert to 2d matrices
Z = np.outer(z.T, z)
X, Y = np.meshgrid(x, y)
C = np.outer(cc.T,cc)
# fourth dimention - colormap
# create colormap according to cc-value
color_dimension = C # change to desired fourth dimension
minn, maxx = color_dimension.min(), color_dimension.max()
norm = matplotlib.colors.Normalize(minn, maxx)
m = plt.cm.ScalarMappable(norm=norm, cmap='jet')
m.set_array([])
fcolors = m.to_rgba(color_dimension)
# plot
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(X,Y,Z, rstride=1, cstride=1, facecolors=fcolors, vmin=minn, vmax=maxx, shade=False)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
plt.show()
Now I was wondering from our kind community and experts if you can help me to plot a contour figure similar to the example graph (image one in this post), where the contours are based on the values within the range of C?

How to select specific number of colors to show in color bar from a big list ? - Matplotlib

I plotted some data which has 70 classes, so when I built the color bar it's very difficult to distinguish between each legend as shown below:
The code that I'm using is:
formation_colors = # 70 colors
formation_labels = # 70 labels
data = # the section of the entire dataset which only has 13 labels
data = data.sort_values(by='DEPTH_MD')
ztop=data.DEPTH_MD.min(); zbot=data.DEPTH_MD.max()
cmap_formations = colors.ListedColormap(formation_colors[0:len(formation_colors)], 'indexed')
cluster_f = np.repeat(np.expand_dims(data['Formations'].values,1), 100, 1)
fig = plt.figure(figsize=(2,10))
ax = fig.add_subplot()
im_f = ax.imshow(cluster_f, interpolation='none', aspect='auto', cmap = cmap_formations, vmin=0, vmax=69)
ax.set_xlabel('FORMATION')
ax.set_xticklabels(['']);
divider_f = make_axes_locatable(ax)
cax_f = divider_f.append_axes("right", size="20%", pad=0.05)
cbar_f = plt.colorbar(im_f, cax = cax_f,)
cbar_f.set_ticks(range(0,len(formation_labels))); cbar_f.set_ticklabels(formation_labels)
So far, if I just change:
1. cmap_formations = colors.ListedColormap(formation_colors[0:len(formation_colors)], 'indexed')
2. cbar_f.set_ticks(range(0,len(formation_labels))); cbar_f.set_ticklabels(formation_labels)
to:
cmap_formations = colors.ListedColormap(formation_colors[0:len(data['FORMATION'].unique())], 'indexed')
cbar_f.set_ticks(range(0,len(data['FORMATION'].unique()))); cbar_f.set_ticklabels(data['FORMATION'].unique())
I get, the corresponding colors in the cbar, however the plot is no longer correct and also the legends are out of square
Thank you so much if you have any idea how to do this.
Although not explicitly mentioned in the question, I suppose data['FORMATION'] contains indices from 0 till 69 into the lists of formation_colors and formation_labels
The main problem is that data['FORMATION'] needs to be renumbered to be new indices (with numbers 0 till 12) into the new list of unique colors. np.unique(..., return_inverse=True) returns both the list of unique numbers, and the renumbering for the values.
To be able to reindex the list of colors and of labels, it helps to convert them to numpy arrays.
To make the code easier to debug, the following test uses a simple relation between the list of colors and the list of labels.
from matplotlib import pyplot as plt
from matplotlib import colors
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
import numpy as np
import pandas as pd
formation_colors = np.random.choice(list(colors.CSS4_COLORS), 70, replace=False) # 70 random color names
formation_labels = ['lbl_' + c for c in formation_colors] # 70 labels
formation_colors = np.asarray(formation_colors)
formation_labels = np.asarray(formation_labels)
f = np.random.randint(0, 70, 13)
d = np.sort(np.random.randint(0, 5300, 13))
data = pd.DataFrame({'FORMATION': np.repeat(f, np.diff(np.append(0, d))),
'DEPTH_MD': np.arange(d[-1])})
data = data.sort_values(by='DEPTH_MD')
ztop = data['DEPTH_MD'].min()
zbot = data['DEPTH_MD'].max()
unique_values, formation_new_values = np.unique(data['FORMATION'], return_inverse=True)
cmap_formations = colors.ListedColormap(formation_colors[unique_values], 'indexed')
cluster_f = formation_new_values.reshape(-1, 1)
fig = plt.figure(figsize=(3, 10))
ax = fig.add_subplot()
im_f = ax.imshow(cluster_f, extent=[0, 1, zbot, ztop],
interpolation='none', aspect='auto', cmap=cmap_formations, vmin=0, vmax=len(unique_values)-1)
ax.set_xlabel('FORMATION')
ax.set_xticks([])
divider_f = make_axes_locatable(ax)
cax_f = divider_f.append_axes("right", size="20%", pad=0.05)
cbar_f = plt.colorbar(im_f, cax=cax_f)
cbar_f.set_ticks(np.linspace(0, len(unique_values)-1, 2*len(unique_values)+1)[1::2])
cbar_f.set_ticklabels(formation_labels[unique_values])
plt.subplots_adjust(left=0.2, right=0.5)
plt.show()
Here is a comparison plot:

Stack area chart with quantitative x-axis with altair

Area charts in altair are automatically stacked when the x-axis is time. But when x belongs to a quantitative data type, areas are not stacked.
import pandas as pd
import numpy as np
import string
import altair as alt
np.random.seed(394378)
n_series = 3
series_names = list(string.ascii_lowercase)[:n_series]
x_range = range(0, 21)
df = pd.DataFrame({"Series": np.tile(series_names, len(x_range)),
"X": np.repeat(x_range, n_series),
"Y": np.random.poisson(lam = 10, size = len(x_range) * n_series)})
alt.Chart(df).\
mark_area().\
encode(
x = "X:Q",
y = "Y:Q",
color = "Series:N"
)
How can I stack areas?
You can do this by passing stack=True to the y encoding. For example:
alt.Chart(df).\
mark_area().\
encode(
x = "X:Q",
y = alt.Y("Y:Q", stack=True),
color = "Series:N"
)

python bokeh interactively plot n curves between min and max

I am trying to generate the plot of a function of two parameters, where one is used as x_axis and for the other I plot n curves, varying the parameter between a min and max value.
The following code works:
import numpy as np
import bokeh
from bokeh.plotting import figure
from bokeh.io import push_notebook, show, output_notebook
output_notebook()
x = np.linspace(0,10,100)
f = figure()
fmin=1
fmax=3
nfreq=4
freq=np.linspace(fmin,fmax,nfreq)
for i in freq:
y = np.sin(i*x)
f.line(x,y)
show(f)
Now I would like to have 3 sliders to interactively vary fmin, fmax and nfreq. I could not figure out how to do it...
Thanks for any help
This example works for Bokeh v1.0.4. Run as: bokeh serve --show app.py
The content of app.py:
import numpy as np
from bokeh.models import Slider, Row, Column
from bokeh.plotting import figure, show, curdoc
from bokeh.models.sources import ColumnDataSource
plot = figure()
layout = Column(plot)
sources, lines = {}, {}
def get_x(n): return [np.linspace(0, 10, 100) for i in range(n)]
def get_y(n): return [np.sin(i * np.linspace(0, 10, 100)) for i in n]
def update(attr, old, new):
update_sources(layout.children[-3].value, layout.children[-2].value, layout.children[-1].value)
def update_sources(fmin, fmax, nfreq):
freq = np.linspace(fmin, fmax, nfreq)
for f, x, y in zip(freq, get_x(len(freq)), get_y(freq)):
data = {'x': x, 'y': y}
if f not in sources:
sources[f] = ColumnDataSource(data)
line = plot.line('x', 'y', source = sources[f])
lines[f] = line
else:
sources[f].data = data
for line in lines:
lines[line].visible = (False if line not in freq else True)
for txt, max in zip(['fmin', 'fmax', 'nfreq'], [3, 4, 5]):
slider = Slider(start = 1, end = max, value = 1, title = txt)
slider.on_change('value', update)
layout.children.append(slider)
update_sources(layout.children[-3].value, layout.children[-2].value, layout.children[-1].value)
[plot.line('x', 'y', source = sources[idx]) for idx in sources]
curdoc().add_root(layout)

Ploting building surfaces from CityGML data

I've been struggling with 3D ploting some coordinates since a long ago and now I'm really frustrated, so your help will be really appreciated.
I'd like to plot the facade of a building from a CityGML file (which is originally simply an XML file). I have no problem with parsing the CityGML file using XML.etree and extracting the coordinates. But after extracting the coordinates, I cann't find a way to 3D plot them.
from xml.etree import ElementTree as ET
tree = ET.parse('3860_5819__.gml')
root = tree.getroot()
namespaces = {
'ns0': "http://www.opengis.net/citygml/1.0",
'ns1': "http://www.opengis.net/gml",
'ns2': "http://www.opengis.net/citygml/building/1.0"
}
c = 0
wallString = []
for wallSurface in root.findall('.//ns2:WallSurface', namespaces):
for posList in wallSurface.findall('.//ns1:posList', namespaces):
c += 1
wallCoordinates = posList.text
wallCoordinates = wallCoordinates.split()
wallString.append(wallCoordinates)
verts = []
for string in wallString:
X, Y, Z = [], [], []
c = 0
for value in string:
value = float(value)
if c % 3 == 0:
X.append(value)
elif c % 3 == 1:
Y.append(value)
else:
Z.append(value)
c += 1
if c > len(string) - 3:
break
vert = [list(zip(X, Y, Z))]
verts.append(vert)
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import matplotlib.pyplot as plt
fig = plt.figure()
ax = Axes3D(fig)
for vert in verts:
ax.add_collection3d(Poly3DCollection(vert))
ax.autoscale_view(tight=True, scalex=True, scaley=True, scalez=True)
plt.show()
plt.close()
Could the problem be that I can't make my plot "tight"? And if not, is there something I'm doing fundamentally wrong?
If relevant, the CityGML file in this case is related to TU Berlin center of entrepreneurship which can be taken from here.
Just realized that nothing was wrong with the main part of the code. The only issue was that the axis were not set. I change the plot part like this:
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d as mpl3
fig = plt.figure()
ax = mpl3.Axes3D(fig)
for vert in verts:
poly = mpl3.art3d.Poly3DCollection(vert)
ax.add_collection3d(poly)
ax.set_xlim3d(left=386284-50,right=386284+50)
ax.set_ylim3d(bottom=5819224-50, top=5819224+50)
ax.set_zlim3d(bottom=32-10,top=32+20)
plt.show()
plt.close()
Now it works perfectly fine.

Resources