more efficient multi variate visualization - python-3.x

Hi here is a sample code with 6 dimensional dataset.
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import pandas as pd
import seaborn as sns
def f(a,b,c,d,e):
return a*b*c*d*e/1e14
# characteristics
a=np.arange(1000,11000,1000)
b=np.arange(45,100,10)
c=np.arange(10000,60000,5000)
d=np.arange(1,6,.5)*1000
e=np.array([1,2])
gr=np.array(np.meshgrid(a,b,c,d,e)).T.reshape(-1,5)
cost=np.array([f(*j) for j in gr])
df=pd.DataFrame(np.column_stack([gr,cost]),columns=['a','b','c','d','e','cost'])
I would like to look into possible trends in the data. For example i would like to know the effect of "a" on "cost" either while keeping the rest of the columns constant or not etc. Is there a better way of gaining insight from the data than this;
fig = plt.figure(figsize=[10,8])
ax = Axes3D(fig)
temp=df[(df.a==4000) & (df.d==1000)]
ax.plot_trisurf(temp.b, temp.c, temp.cost, cmap=cm.jet, linewidth=0.2)
ax.set_xlabel('b', fontsize=16)
ax.set_ylabel('c', fontsize=16)
ax.set_zlabel('cost', fontsize=16)
I have also tried these but not clear what they mean. For example, in this case shouldn't all correlation values be equal in the heat map for cost-variable pairs?
# Various visualizaiton methods
#a)
f, ax = plt.subplots(figsize=(10, 6))
corr = df.corr()
hm = sns.heatmap(round(corr,2), annot=True, ax=ax, cmap="coolwarm",fmt='.2f',
linewidths=.05)
f.subplots_adjust(top=0.93)
#b)
pd.plotting.scatter_matrix(df, alpha=0.2, figsize=(16, 16))

Related

How to show all dates in matplotlib from excel import?

I have a code in python 3.11 for a contour plot generating from an excel table using matplotlib. The result shows only first days of months on the x axis (for example 1.6.2022, 1.7.2022 ...). I want all days from the excel source table. Her's the code:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import Normalize
import pandas as pd
import matplotlib.dates as mdates
# import data from excel file
df = pd.read_excel('temperature_data.xlsx', index_col=0)
# Assign columns to variables
time = df.columns
depth = df.index
temperature = df.to_numpy()
# Creating the graph
fig, ax = plt.subplots()
min_temp = temperature.min()
max_temp = temperature.max()
cs = plt.contourf(time, depth, temperature, levels=np.arange(round(min_temp), round(max_temp)+2, 2), cmap='coolwarm', vmin=min_temp, vmax=max_temp)
cs2 = plt.contour(time, depth, temperature, levels=np.arange(round(min_temp), round(max_temp)+2, 2), colors='black')
plt.gca().invert_yaxis()
plt.clabel(cs2, inline=1, fontsize=10, fmt='%d')
plt.title('Teplota vody [°C]')
plt.xticks(rotation=90, ha='right')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%d.%m.%Y'))
#ax.set_xlim(df.index.min(), df.index.max())
#ax.set_xlabel('Time')
ax.set_ylabel('hloubka [m]')
norm = Normalize(vmin=min_temp, vmax=max_temp)
plt.colorbar(cs, cmap='coolwarm', norm=norm)
plt.show()
Thank you for your help.

Can I generate a contourplot from three columns of data in python without using meshgrid?

I have three columns of data. They are too large to generate meshgrids from. So e.g. in order to generate a surface plot from the data, I use a method like so
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
x, y, z = np.loadtxt('data_file', unpack=True)
df = pd.DataFrame({'x':x, 'y':y, 'z':z})
fig = plt.figure()
ax = Axes3D(fig)
surf = ax.plot_trisurf(df.x, df.y, df.z, cmap=cm.jet, linewidth=0.05)
fig.colorbar(surf, shrink=0.5, aspect=5)
plt.show()
Is there a similar alternative to plot_trisurf for contours?

How to add color and legend by points' label one by one in python?

I want to divide and color points,val_lab(611,3) by their labels,pred_LAB(611,)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = plt.axes(projection = '3d')
ax.set_xlabel('L')
ax.set_ylabel('A')
ax.set_zlabel('B')
for i in range(0, len(val_lab)):
ax.scatter3D(
val_lab[i,0],
val_lab[i,1],
val_lab[i,2],
s = 8,
marker='o',
c = pred_LAB
#cmap = 'rainbow'
)
#ax.legend(*points.legend_elements(), title = 'clusters')
plt.show()
The problem is it shows error,
c' argument has 611 elements, which is not acceptable for use with 'x'
with size 1, 'y' with size 1.
However, if the dataset only have ten points,it can show the figure correctly, I don't know how to solve this problem, besides, how to add legend of this figure?
In your solution you would want to replace c = pred_LAB with c = pred_LAB[i]. But you do not have to use a for loop to plot the data. You can just use the following:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# generate random input data
val_lab = np.random.randint(0,10,(611,3))
pred_LAB = np.random.uniform(0,1, (611,))
# plot data
fig = plt.figure()
ax = plt.axes(projection = '3d')
ax.set_xlabel('L')
ax.set_ylabel('A')
ax.set_zlabel('B')
# create one 3D scatter plot - no for loop
ax.scatter3D(
val_lab[:,0],
val_lab[:,1],
val_lab[:,2],
s = 8,
marker='o',
c = pred_LAB,
cmap = 'rainbow',
label='my points'
)
# add legend
plt.legend()
plt.show()

How to set x ticks for seaborn (python) line plot

I made a line plot chart for years 1960-2014 using seaborn but the xticks aren't correct. I want only intervals to appear (like 1960, 1970, 1980, etc).How do i adjust the xticks? I tried rotating it but it didn't seem to work. Here is my code:
#plot figure using sns
g=sns.relplot(x="Year", y="Indicator_Value",
data=Emissions_C_df,
kind="line",
style="Indicator_Name",
hue="Indicator_Name",
)
plt.show()
You can use a MaxNLocator from matplotlib.ticker for the major ticks (decades) and manually set specific minor ticks with a FixedLocator.
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FixedLocator, MaxNLocator
a = np.arange(50)
d = {'Year': 1953+a, 'Indicator_Value': a}
df = pd.DataFrame(data=d)
g = sns.relplot(x="Year", y="Indicator_Value",
data=df,
kind="line")
ax = plt.gca()
ax.xaxis.set_major_locator(MaxNLocator(steps=[10]))
ax.xaxis.set_minor_locator(FixedLocator(range(1960,1970)))
plt.show()

Implementing ipywidget slider for time

I am trying to create a slider for time in Jupyter Notebook using ipywidgets. I would like to take the tabulated experimental data (see figure below) and control the value bounds with the help of a slider. The graph should be a force-displacement graph, evolving in time:
This is my python code:
from ipywidgets import IntSlider, interact, FloatSlider
u = fdat1['C_1_Weg_R4[mm]'].values
f = fdat1['C_1_Kraft_R4[kN]'].values
t = fdat1['S/No'].values
#interact(t = IntSlider(min = 0, max = max(fdat0['S/No'].values)))
def aa_(t):
plt.plot(f[t],u[t])
plt.grid()
plt.xlabel("force [kN]")
plt.ylabel("displacement [mm]")
plt.title("Load-displacement curve for \nexperiment")
fdat1 is the name of the tabulated data. I have also considered using "C_1_Zeit[s]" column as my slider values, but these are not integer values.
The problem is that nothing gets plotted, but the slider works and the graph changes scale.
I have been searching online for some time now and would really appreciate some help.
Thank you in advance!
Edit:
from ipywidgets import IntSlider, interact, FloatSlider
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame.from_records(
[np.linspace(0,30, num=30), np.linspace(0,20, num=30), ]).T
df.columns=['A', 'B']
#interact(t = IntSlider(min = 0, max = 21))
def aa_(t):
plt.scatter(df['A'], df['B'])
plt.grid()
plt.xlabel("force [kN]")
plt.ylabel("displacement [mm]")
plt.title("Load-displacement curve for \nexperiment")
plt.xlim(0, 30)
plt.ylim(0, 30)
Inside your plotting function, create a slice of your results dataframe that slices based on the slider value.
from ipywidgets import IntSlider, interact, FloatSlider
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
results = pd.DataFrame.from_records(
[np.linspace(0,30, num=30), np.linspace(0,20, num=30), ]).T
results.columns=['A', 'B']
#interact(t = IntSlider(min = 0, max = 21))
def aa_(t):
df = results.iloc[:t] # make the slice here
plt.scatter(df['A'], df['B'])
plt.grid()
plt.xlabel("force [kN]")
plt.ylabel("displacement [mm]")
plt.title("Load-displacement curve for \nexperiment")
plt.xlim(0, 30)
plt.ylim(0, 30)
So, basically, this should have been the correct code:
from ipywidgets import IntSlider, interact, FloatSlider
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
u = fdat1['C_1_Weg_R4[mm]'].values #loads displacement values from fdat1
f = fdat1['C_1_Kraft_R4[kN]'].values #loads force values from fdat1
df = pd.DataFrame.from_dict([u,f]).T #creates a dataframe
df.columns=['A', 'B']
#interact(t = IntSlider(min = 0, max = df.shape[0])) #interactive scatterplot with a slider for time
def scatterplot_(t):
plt.scatter(df.loc[0:t,'A'], df.loc[0:t,'B'])
plt.grid()
plt.xlabel("force [kN]")
plt.ylabel("displacement [mm]")
plt.title("Load-displacement curve for \nexperiment")
plt.xlim(-5, 5)
plt.ylim(-25, 25)

Resources