Python: Data limit in Plotly frames? - python-3.x

I have recently started to use Plotly to make 3D plots in python and I wanted to create an animation of what is going on in terms of column vectos of a 3 by 3 matrix when applying Gaussain elimination.
I wrote a function to get the row echelon form and the history of the matrix obtained at each step.
Then I wanted to plot the comuns vectors at each step of the algorithm.
At first I was able to get an animation of the the evolution of the three vectors by adpating this code : https://plotly.com/python/visualizing-mri-volume-slices/
But then I wanted to show on each frame the three row vectors of a given step and the three row vectors from the matrix of the previous step with opacity 0.2.
And when I added that part of the code I got a strange behavior from Plotly. It only showed me the three first vectors which are given to the frame and not all of them.
Here the code I have so far :
import numpy as np
import numpy.linalg as la
import plotly.graph_objects as go
v1 = np.array([5,2,1])
v2 = np.array([2,3,2])
v3 = np.array([3,-1,1])
A = np.transpose(np.vstack([v1,v2,v3]))
# G, H = pivot_Gauss(A)
H = [np.array([[ 5, 2, 3],[ 2, 3, -1],[ 1, 2, 1]]), np.array([[ 1, 0, 0],[ 2, 3, -1],[ 1, 2, 1]]),
np.array([[ 1, 0, 0],[ 0, 3, -1],[ 1, 2, 1]]), np.array([[ 1, 0, 0],[ 0, 3, -1],[ 0, 2, 1]]),
np.array([[1, 0, 0],[0, 1, 0],[0, 2, 1]]), np.array([[1, 0, 0],[0, 1, 0],[0, 0, 1]]),
np.array([[1, 0, 0],[0, 1, 0],[0, 0, 1]]) ]
G = np.array([[1,0,0],[0,1,0],[0,0,1]]) # results obtained using the function pivot_Gauss(A)
nb_frames = len(H)
frames = []
v_norm = 5
colors = ["blue","red","green"]
for k in range(nb_frames): # go.Frame(data,name=str(k))
dat = []
for j in range(np.shape(A)[1]):
v = H[k][:,j]
if la.norm(v) != 0 :
d1 = go.Scatter3d( x=[0,v[0]],y=[0,v[1]],z=[0,v[2]],name="v"+str(k+j+1),hoverinfo='name',
marker=dict(size=0), line=dict(color=colors[j], width=10 ))
dat.append(d1)
d2 = go.Cone(x=[v[0]],y=[v[1]],z=[v[2]],
u=[v[0]/v_norm],v=[v[1]/v_norm],w=[v[2]/v_norm],sizeref=1,
sizemode="scaled",anchor="cm",name="v"+str(k+j+1),hoverinfo='x+y+z+name',
colorscale=[[0, colors[j]], [1,colors[j]]],showscale=False)
dat.append(d2)
if k>0 : # add column vectors of previous Gaussain elimination step (causes some troubles,
#if this if section is commented I get an animation of the three clumn vectors of current step)
vk = H[k-1][:,j]
if la.norm(v) != 0 :
d3 = go.Scatter3d( x=[0,vk[0]],y=[0,vk[1]],z=[0,vk[2]],name="v"+str(k+j+1),hoverinfo='name',
marker=dict(size=0), line=dict(color=colors[j], width=10), opacity = 0.2 )
dat.append(d3)
d4 = go.Cone(x=[vk[0]],y=[vk[1]],z=[vk[2]],
u=[vk[0]/v_norm],v=[vk[1]/v_norm],w=[vk[2]/v_norm],sizeref=1,
sizemode="scaled",anchor="cm",name="v"+str(k+j+1),hoverinfo='x+y+z+name',
colorscale=[[0, colors[j]], [1,colors[j]]],showscale=False,opacity=0.2)
dat.append(d4)
frames.append(go.Frame(data=dat,name=str(k)))
fig = go.Figure(frames=frames)
# Add data to be displayed before animation starts
for j in range(A.shape[1]):
v = A[:,j]
if la.norm(v) != 0 :
fig.add_trace( go.Scatter3d( x=[0,v[0]],y=[0,v[1]],z=[0,v[2]],name="v"+str(k+1),hoverinfo='name',
marker=dict(size=0), line=dict(color=colors[j], width=10 )) )
fig.add_trace( go.Cone(x=[v[0]],y=[v[1]],z=[v[2]],
u=[v[0]/v_norm],v=[v[1]/v_norm],w=[v[2]/v_norm],sizeref=1,
sizemode="scaled",anchor="cm",name="v"+str(k+1),hoverinfo='x+y+z+name',
colorscale=[[0, colors[j]], [1,colors[j]]],showscale=False) )
### This remained almost exactly as the Plotly example
def frame_args(duration):
return {
"frame": {"duration": duration},
"mode": "immediate",
"fromcurrent": True,
"transition": {"duration": duration, "easing": "linear"},
}
sliders = [
{
"pad": {"b": 10, "t": 60},
"len": 0.9,
"x": 0.1,
"y": 0,
"steps": [
{
"args": [[f.name], frame_args(0)],
"label": str(k),
"method": "animate",
}
for k, f in enumerate(fig.frames)
],
}
]
matrix_but = [
{"buttons: [{},{},{},{},{},{}]"}
]
# Layout
fig.update_layout(
title='Pivot de Gauss',
width=600,
height=400,
scene=dict(xaxis=dict(autorange=True),
yaxis=dict(autorange=True),
zaxis=dict(autorange=True),
aspectratio=dict(x=1, y=1, z=1),
),
updatemenus = [
{
"buttons": [
{
"args": [None, frame_args(200)],
"label": "▶", # play symbol
"method": "animate",
},
{
"args": [[None], frame_args(0)],
"label": "◼", # pause symbol
"method": "animate",
},
],
"direction": "left",
"pad": {"r": 10, "t": 70},
"type": "buttons",
"x": 0.1,
"y": 0,
}
],
sliders=sliders
)
fig.show()
You will notice that for each vector I first draw a 3D line and then use cone to get the it arrow_shaped. It might not be the best way to do it, but I do not want to use cone alone as the apsect does not fit what I would like.
I stumbled across a (I think) similar question here : https://community.plotly.com/t/only-one-trace-showing-per-frame-in-animated-plot/25803
But I did not undestand the answer nor the example.
It seems from what I get that only the first six elemetns of the data contained in each frame is taken into account, but I do not understand why and I would like to show everything.
If someone has some insight (and a solution) on the subject, it would be warmly welcomed.
I can clarify things if needed.
Image of the two first column vectors of matrix from current step and first column vector of matrix from previous step
Image of the three column vectors of current matrix when part below if k>0 is commented

It seems from what I get that only the first six elemetns of the data contained in each frame is taken into account, but I do not understand why and I would like to show everything.
There's this paragraph under the heading 'Current Animation Limitations and Caveats':
Animations are designed to work well when each row of input is present across all animation frames, and when categorical values mapped to symbol, color and facet are constant across frames. Animations may be misleading or inconsistent if these constraints are not met.
Though in your first frame you have only three vectors (three lines plus three coneheads) to plot, it violates the above constraint when following frames contain six vectors. To overcome this restriction, we could insert the three vectors in the first frame (and also in the data to be displayed before animation starts) twice, i. e. to the
if k>0 : # add column vectors of previous Gaussain elimination step (causes some troubles,
block add an
else:
dat.append(d1)
dat.append(d2)
block, and in the
if la.norm(v) != 0 :
block duplicate the two fig.add_trace calls.

Related

Convert a list of labels into number given a defined dictionary

I have the following dictionary defined in my code:
label_dict = {'positive': 1, 'negative': 0}
I also have a label_list that contains two possible values: "positive" and "negative".
I want to essentially map each label in label_list to the respective numeric value defined by label_dict.
I have the following for loop defined as well: for label in range(len(label_list)): for iterating through label_list.
How can I accomplish this? Any help is much appreciated.
One solution is to convert your label_list to Series and use mapping and then return it back to list again like that:
import pandas as pd
label_dict = {'positive': 1, 'negative': 0}
label_list = ["positive","negative","negative","positive",
"negative","positive","negative"]
new_lst = pd.Series(label_list).map(label_dict).tolist()
#output
print(new_lst) # [1, 0, 0, 1, 0, 1, 0]

Compare a list of lists and display result using OpenCV

I have two list of lists that I want to compare element by element first.
a=[[0,0,0,0,3],[1,2]] # This is the key (list of list)
b=[[0,0,0,2,3],[1,0]] # This is the list of list I want to compare against the key list
I also have an image that has 5 rows (first rectangle) (because a[0] has 5 elements) and 2 rows (second rectangle) (because a[1] has 2 elements)
For the following image, I know the corner points and the height of each individual rows. These are stored in another list. xyh=[[115,60,70],[350,55,70]]
What I would like to do is compare my list b against a and if they match eg a[0][0]=b[0][0] then the text "match" will be displayed in the first first column's first row, if they don't match then the element from the list a will be displayed.
This is the expected result at the end.
My MWE:
import cv2
import numpy as np
total_elements=7
a=[[0,0,0,0,3],[1,2]] # This is the key (list of list)
b=[[0,0,0,2,3],[1,0]] # This is the list of list I want to compare against the key list
xyh=[[115,60,70],[350,55,70]] # List for the top-right corner points of the columns and the height of each rows.
img=img=cv2.imread('1.jpg')
imgFinal=img.copy()
# How do I loop over all three list of lists and then display the result in imgFinal?
cv2.imshow('Expected Result',imgFinal)
For the iterating, I'd use Python's built-in zip function, so you can iterate the elements of all lists at the same time. The rest is simple value checking and putting the correct text to the image via cv2.putText. I first thought about using NumPy for the value checking, but since you have to use loops anyway for putting the text, all logic can be placed inside the loops.
Here's my full code:
import cv2
# Lists
a = [[0, 0, 0, 0, 3], [1, 2]]
b = [[0, 0, 0, 2, 3], [1, 0]]
xyh = [[115, 60, 70], [350, 55, 70]]
# Original image and copy
img = cv2.imread('1.jpg')
imgFinal = img.copy()
# Iterate elements of all lists at the same time
for first_image, second_image, coords in zip(a, b, xyh):
# Get initial x, y coordinates for the current image
x = coords[0] - 60
y = coords[1] + int(coords[2] / 2)
# Iterate elements in each image
for image_a, image_b in zip(first_image, second_image):
# Put correct text to the image
if image_a == image_b:
imgFinal = cv2.putText(imgFinal, 'Match', (x, y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.5, (0, 0, 0), 1)
else:
imgFinal = cv2.putText(imgFinal, str(image_a), (x, y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.5, (0, 0, 0), 1)
# Increment y coordinate to next row
y = y + coords[2]
# Output
cv2.imshow('Expected Result', imgFinal)
cv2.waitKey(0)
cv2.destroyAllWindows()
And that'd be output:
OpenCV has only limited support for text rendering, so the text doesn't look that nice.
----------------------------------------
System information
----------------------------------------
Platform: Windows-10-10.0.16299-SP0
Python: 3.8.5
OpenCV: 4.4.0
----------------------------------------

Matplotlib Control Spacing Between Bars

I am trying to insert spacing between two specific bars but cannot find any easy way to do this. I can manually add a dummy row with with 0 height to create and empty space but doesn't give me control of how wide the space should be. Is there a more programmatic method I can use to control the spacing between bars at any position?
Example Code:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
mydict = {
'Event': ['Running', 'Swimming', 'Biking', '', 'Hiking', 'Jogging'],
'Completed': [2, 4, 3, 0, 7, 9],
'Participants': [10, 20, 35, 0, 10, 20]}
df = pd.DataFrame(mydict).set_index('Event')
df = df.assign(Completion=(df.Completed / df.Participants) * 100)
plt.subplots(figsize=(5, 4))
print(df.index)
ax = sns.barplot(x=df.Completion, y=df.index, color="orange", orient='h')
plt.xticks(rotation=60)
plt.tight_layout()
plt.show()
Example DataFrame Output:
Completed Participants Completion
Event
Running 2 10 20.000000
Swimming 4 20 20.000000
Biking 3 35 8.571429
0 0 NaN
Hiking 7 10 70.000000
Jogging 9 20 45.000000
Example output (blue arrows added outside of code to show where empty row was added.):
I think you can access the position of the boxes and the name of the labels. Then modify them. You may find an more general way depending on your use case, but this works for the given example.
#define a function to add space starting a specific label
def add_space_after(ax, label_shift='', extra_space=0):
bool_space = False
# get postion of current ticks
ticks_position = np.array(ax.get_yticks()).astype(float)
# iterate over the boxes/label
for i, (patch, label) in enumerate(zip(ax.patches, ax.get_yticklabels())):
# if the label to start the shift found
if label.get_text()==label_shift: bool_space = True
# reposition the boxes and the labels afterward
if bool_space:
patch.set_y(patch.get_y() + extra_space)
ticks_position[i] += extra_space
# in the case where the spacing is needed
if bool_space:
ax.set_yticks(ticks_position)
ax.set_ylim([ax.get_ylim()[0]+extra_space, ax.get_ylim()[1]])
#note: no more blank row
mydict = {
'Event': ['Running', 'Swimming', 'Biking', 'Hiking', 'Jogging'],
'Completed': [2, 4, 3, 7, 9],
'Participants': [10, 20, 35, 10, 20]}
df = pd.DataFrame(mydict).set_index('Event')
df = df.assign(Completion=(df.Completed / df.Participants) * 100)
ax = sns.barplot(x=df.Completion, y=df.index, color="orange", orient='h')
plt.xticks(rotation=60)
plt.tight_layout()
#use the function
add_space_after(ax, 'Hiking', 0.6)
plt.show()

Roll of different amount along a single axis in a 3D matrix [duplicate]

I have a matrix (2d numpy ndarray, to be precise):
A = np.array([[4, 0, 0],
[1, 2, 3],
[0, 0, 5]])
And I want to roll each row of A independently, according to roll values in another array:
r = np.array([2, 0, -1])
That is, I want to do this:
print np.array([np.roll(row, x) for row,x in zip(A, r)])
[[0 0 4]
[1 2 3]
[0 5 0]]
Is there a way to do this efficiently? Perhaps using fancy indexing tricks?
Sure you can do it using advanced indexing, whether it is the fastest way probably depends on your array size (if your rows are large it may not be):
rows, column_indices = np.ogrid[:A.shape[0], :A.shape[1]]
# Use always a negative shift, so that column_indices are valid.
# (could also use module operation)
r[r < 0] += A.shape[1]
column_indices = column_indices - r[:, np.newaxis]
result = A[rows, column_indices]
numpy.lib.stride_tricks.as_strided stricks (abbrev pun intended) again!
Speaking of fancy indexing tricks, there's the infamous - np.lib.stride_tricks.as_strided. The idea/trick would be to get a sliced portion starting from the first column until the second last one and concatenate at the end. This ensures that we can stride in the forward direction as needed to leverage np.lib.stride_tricks.as_strided and thus avoid the need of actually rolling back. That's the whole idea!
Now, in terms of actual implementation we would use scikit-image's view_as_windows to elegantly use np.lib.stride_tricks.as_strided under the hoods. Thus, the final implementation would be -
from skimage.util.shape import view_as_windows as viewW
def strided_indexing_roll(a, r):
# Concatenate with sliced to cover all rolls
a_ext = np.concatenate((a,a[:,:-1]),axis=1)
# Get sliding windows; use advanced-indexing to select appropriate ones
n = a.shape[1]
return viewW(a_ext,(1,n))[np.arange(len(r)), (n-r)%n,0]
Here's a sample run -
In [327]: A = np.array([[4, 0, 0],
...: [1, 2, 3],
...: [0, 0, 5]])
In [328]: r = np.array([2, 0, -1])
In [329]: strided_indexing_roll(A, r)
Out[329]:
array([[0, 0, 4],
[1, 2, 3],
[0, 5, 0]])
Benchmarking
# #seberg's solution
def advindexing_roll(A, r):
rows, column_indices = np.ogrid[:A.shape[0], :A.shape[1]]
r[r < 0] += A.shape[1]
column_indices = column_indices - r[:,np.newaxis]
return A[rows, column_indices]
Let's do some benchmarking on an array with large number of rows and columns -
In [324]: np.random.seed(0)
...: a = np.random.rand(10000,1000)
...: r = np.random.randint(-1000,1000,(10000))
# #seberg's solution
In [325]: %timeit advindexing_roll(a, r)
10 loops, best of 3: 71.3 ms per loop
# Solution from this post
In [326]: %timeit strided_indexing_roll(a, r)
10 loops, best of 3: 44 ms per loop
In case you want more general solution (dealing with any shape and with any axis), I modified #seberg's solution:
def indep_roll(arr, shifts, axis=1):
"""Apply an independent roll for each dimensions of a single axis.
Parameters
----------
arr : np.ndarray
Array of any shape.
shifts : np.ndarray
How many shifting to use for each dimension. Shape: `(arr.shape[axis],)`.
axis : int
Axis along which elements are shifted.
"""
arr = np.swapaxes(arr,axis,-1)
all_idcs = np.ogrid[[slice(0,n) for n in arr.shape]]
# Convert to a positive shift
shifts[shifts < 0] += arr.shape[-1]
all_idcs[-1] = all_idcs[-1] - shifts[:, np.newaxis]
result = arr[tuple(all_idcs)]
arr = np.swapaxes(result,-1,axis)
return arr
I implement a pure numpy.lib.stride_tricks.as_strided solution as follows
from numpy.lib.stride_tricks import as_strided
def custom_roll(arr, r_tup):
m = np.asarray(r_tup)
arr_roll = arr[:, [*range(arr.shape[1]),*range(arr.shape[1]-1)]].copy() #need `copy`
strd_0, strd_1 = arr_roll.strides
n = arr.shape[1]
result = as_strided(arr_roll, (*arr.shape, n), (strd_0 ,strd_1, strd_1))
return result[np.arange(arr.shape[0]), (n-m)%n]
A = np.array([[4, 0, 0],
[1, 2, 3],
[0, 0, 5]])
r = np.array([2, 0, -1])
out = custom_roll(A, r)
Out[789]:
array([[0, 0, 4],
[1, 2, 3],
[0, 5, 0]])
By using a fast fourrier transform we can apply a transformation in the frequency domain and then use the inverse fast fourrier transform to obtain the row shift.
So this is a pure numpy solution that take only one line:
import numpy as np
from numpy.fft import fft, ifft
# The row shift function using the fast fourrier transform
# rshift(A,r) where A is a 2D array, r the row shift vector
def rshift(A,r):
return np.real(ifft(fft(A,axis=1)*np.exp(2*1j*np.pi/A.shape[1]*r[:,None]*np.r_[0:A.shape[1]][None,:]),axis=1).round())
This will apply a left shift, but we can simply negate the exponential exponant to turn the function into a right shift function:
ifft(fft(...)*np.exp(-2*1j...)
It can be used like that:
# Example:
A = np.array([[1,2,3,4],
[1,2,3,4],
[1,2,3,4]])
r = np.array([1,-1,3])
print(rshift(A,r))
Building on divakar's excellent answer, you can apply this logic to 3D array easily (which was the problematic that brought me here in the first place). Here's an example - basically flatten your data, roll it & reshape it after::
def applyroll_30(cube, threshold=25, offset=500):
flattened_cube = cube.copy().reshape(cube.shape[0]*cube.shape[1], cube.shape[2])
roll_matrix = calc_roll_matrix_flattened(flattened_cube, threshold, offset)
rolled_cube = strided_indexing_roll(flattened_cube, roll_matrix, cube_shape=cube.shape)
rolled_cube = triggered_cube.reshape(cube.shape[0], cube.shape[1], cube.shape[2])
return rolled_cube
def calc_roll_matrix_flattened(cube_flattened, threshold, offset):
""" Calculates the number of position along time axis we need to shift
elements in order to trig the data.
We return a 1D numpy array of shape (X*Y, time) elements
"""
# armax(...) finds the position in the cube (3d) where we are above threshold
roll_matrix = np.argmax(cube_flattened > threshold, axis=1) + offset
# ensure we don't have index out of bound
roll_matrix[roll_matrix>cube_flattened.shape[1]] = cube_flattened.shape[1]
return roll_matrix
def strided_indexing_roll(cube_flattened, roll_matrix_flattened, cube_shape):
# Concatenate with sliced to cover all rolls
# otherwise we shift in the wrong direction for my application
roll_matrix_flattened = -1 * roll_matrix_flattened
a_ext = np.concatenate((cube_flattened, cube_flattened[:, :-1]), axis=1)
# Get sliding windows; use advanced-indexing to select appropriate ones
n = cube_flattened.shape[1]
result = viewW(a_ext,(1,n))[np.arange(len(roll_matrix_flattened)), (n - roll_matrix_flattened) % n, 0]
result = result.reshape(cube_shape)
return result
Divakar's answer doesn't do justice to how much more efficient this is on large cube of data. I've timed it on a 400x400x2000 data formatted as int8. An equivalent for-loop does ~5.5seconds, Seberg's answer ~3.0seconds and strided_indexing.... ~0.5second.

Interactive Plot of Pandas Data-frame Color coding based on a group from a Column

I have an example pandas dataframe as follows:
day id cnt
2 catx 4
2 kagm 3
2 dyrt 5
3 catx 3
3 kagm 3
3 dyrt 4
5 catx 2
5 kagm 2
5 dyrt 2
I want to plot the scatter data cnt (y) vs day(x), where the points will be labeled (colored/legend) based on the id column.
Now this is pretty simple in seaborn/matplotlib which I know can be plotted and the plot can be saved to a file.
However, I am looking to have an interactive plot using plotly/bokeh/d3/mp3ld etc and finally, put that plot into an url (of my choice or maybe an account based as in plotly). My goal is also to have hover function, which will show me the value of the points when I take the cursor over a specific cursor point.
I have tried bokeh/plotly with cufflinks using ColumnDataSource and everything to try out to get the plots. However, have failed to get anything which I am looking for. Can I get some help in this direction from the experts? Thanks in anticipation.
This code plots the data the way you requested. I created a new dataframe for every category in your dataframe so the interactive legend also works. An array with hex color strings is generated with the length of the number of unique categories and added to the dataframe to give every category it's own color.
#!/usr/bin/python3
import pandas as pd
from bokeh.models import ColumnDataSource
from bokeh.palettes import all_palettes
from bokeh.plotting import figure, output_file, show
data = {'day': [2, 2, 2, 3, 3, 3, 5, 5, 5], 'id': ['catx', 'kagm', 'dyrt', 'catx', 'kagm', 'dyrt', 'catx', 'kagm', 'dyrt'], 'cnt': [4, 3, 5, 3, 3, 4, 2, 2, 2]}
df = pd.DataFrame.from_dict(data)
output_file('plot.html')
tooltips = [
("day", "#day"),
("id", "#$name"),
("count", "#cnt")]
p = figure(tooltips=tooltips, plot_width=800, plot_height=800)
sources = []
colors = all_palettes['Viridis'][len(set(df['id'].tolist()))]
pd.options.mode.chained_assignment = None #Supress false positive warning
for ID, color in zip(set(df['id'].tolist()), colors):
dfSubset = df.loc[df['id'] == ID]
dfSubset['color'] = color
sources.append(ColumnDataSource(dfSubset))
p.circle(x = 'day', y = 'cnt', legend = 'id', color = 'color', name = 'id', alpha = 0.5, size = 15, source = sources[-1])
p.legend.click_policy="hide"
show(p)

Resources