AttributeError: module 'pandas.plotting' has no attribute '_matplotlib' - python-3.x

I am generating a graph using pandas. When I run my script locally, I get this error:
AttributeError: module 'pandas.plotting' has no attribute '_matplotlib'
I installed pandas for both my user and system. The operating system is WSL2 Ubuntu 20.04.
This is the relevant part of my code:
import pandas as pd
import matplotlib.pyplot as plt
def plot_multi(data, cols=None, spacing=.1, **kwargs):
from pandas import plotting
# Get default color style from pandas - can be changed to any other color list
if cols is None: cols = data.columns
if len(cols) == 0: return
colors = getattr(getattr(plotting, '_matplotlib').style, '_get_standard_colors')(num_colors=len(cols))
# First axis
print(data.loc[:, cols[0]])
ax = data.loc[:, cols[0]].plot(label=cols[0], color=colors[0], **kwargs)
ax.set_ylabel(ylabel=cols[0])
lines, labels = ax.get_legend_handles_labels()
for n in range(1, len(cols)):
# Multiple y-axes
ax_new = ax.twinx()
ax_new.spines['right'].set_position(('axes', 1 + spacing * (n - 1)))
data.loc[:, cols[n]].plot(ax=ax_new, label=cols[n], color=colors[n % len(colors)], **kwargs)
ax_new.set_ylabel(ylabel=cols[n])
# Proper legend position
line, label = ax_new.get_legend_handles_labels()
lines += line
labels += label
ax.legend(lines, labels, loc=0)
return ax
This worked on a University lab machine. Not sure why it's not working locally.

Related

Bar plot with different minimal value for each bar

I'm trying to reproduce this type of graph :
basically, the Y axis represent the date of beginning and end of a phenomenon for each year.
but here is what I have when I try to plot my data :
It seems that no matter what, the bar for each year is plotted from the y axis minimal value.
Here is the data I use
Here is my code :
select=pd.read_excel("./writer.xlsx")
select=pd.DataFrame(select)
select["dte"]=pd.to_datetime(select.dte)
select["month_day"]=pd.DatetimeIndex(select.dte).strftime('%B %d')
select["month"]=pd.DatetimeIndex(select.dte).month
select["day"]=pd.DatetimeIndex(select.dte).day
gs=gridspec.GridSpec(2,2)
fig=plt.figure()
ax1=plt.subplot(gs[0,0])
ax2=plt.subplot(gs[0,1])
ax3=plt.subplot(gs[1,:])
###2 others graphs that works just fine
data=pd.DataFrame()
del select["res"],select["Seuil"],select["Seuil%"] #these don't matter for that graph
for year_ in list(set(select.dteYear)):
temp=select.loc[select["dteYear"]==year_]
temp2=temp.iloc[[0,-1]] #the beginning and ending of the phenomenon
data=pd.concat([data,temp2]).reset_index(drop=True)
data=data.sort_values(["month","day"])
ax3.bar(data["dteYear"],data["month_day"],tick_label=data["dteYear"])
plt.show()
If you have some clue to help me, I'd really appreciate, because I havn't found any model to make this type of graph.
thanks !
EDIT :
I tried something else :
height,bottom,x_position=[], [], []
for year_ in list(set(select.dteYear)):
temp=select.loc[select["dteYear"]==year_]
bottom.append(temp["month_day"].iloc[0])
height.append(temp["month_day"].iloc[-1])
x_position.append(year_)
temp2=temp.iloc[[0,-1]]
data=pd.concat([data,temp2]).reset_index(drop=True)
ax3.bar(x=x_position,height=height,bottom=bottom,tick_label=x_position)
I got this error :
Traceback (most recent call last):
File "C:\Users\E31\Documents\cours\stage_dossier\projet_python\tool_etiage\test.py", line 103, in <module>
ax3.bar(x=x_position,height=height,bottom=bottom,tick_label=x_position)
File "C:\Users\E31\AppData\Local\Programs\Python\Python39\lib\site-packages\matplotlib\__init__.py", line 1352, in inner
return func(ax, *map(sanitize_sequence, args), **kwargs)
File "C:\Users\E31\AppData\Local\Programs\Python\Python39\lib\site-packages\matplotlib\axes\_axes.py", line 2357, in bar
r = mpatches.Rectangle(
File "C:\Users\E31\AppData\Local\Programs\Python\Python39\lib\site-packages\matplotlib\patches.py", line 752, in __init__
super().__init__(**kwargs)
File "C:\Users\E31\AppData\Local\Programs\Python\Python39\lib\site-packages\matplotlib\patches.py", line 101, in __init__
self.set_linewidth(linewidth)
File "C:\Users\E31\AppData\Local\Programs\Python\Python39\lib\site-packages\matplotlib\patches.py", line 406, in set_linewidth
self._linewidth = float(w)
TypeError: only size-1 arrays can be converted to Python scalars
To make a bar graph that shows a difference between dates you should start by getting your data into a nice format in the dataframe where it is easy to access the bottom and top values of the bar for each year you are plotting. After this you can simply plot the bars and indicate the 'bottom' parameter. The hardest part in your case may be specifying the datetime differences correctly. I added a x tick locator and y tick formatter for the datetimes.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.dates as mdates
# make function that returns a random datetime
# between a start and stop date
def random_date(start, stop):
days = (stop - start).days
rand = np.random.randint(days)
return start + pd.Timedelta(rand, unit='days')
# simulate poster's data
T1 = pd.to_datetime('July 1 2021')
T2 = pd.to_datetime('August 1 2021')
T3 = pd.to_datetime('November 1 2021')
df = pd.DataFrame({
'year' : np.random.choice(np.arange(1969, 2020), size=15, replace=False),
'bottom' : [random_date(T1, T2) for x in range(15)],
'top' : [random_date(T2, T3) for x in range(15)],
}).sort_values(by='year').set_index('year')
# define fig/ax and figsize
fig, ax = plt.subplots(figsize=(16,8))
# plot data
ax.bar(
x = df.index,
height = (df.top - df.bottom),
bottom = df.bottom,
color = '#9e7711'
)
# add x_locator (every 2 years), y tick datetime formatter, grid
# hide top/right spines, and rotate the x ticks for readability
x_locator = ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(2))
y_formatter = ax.yaxis.set_major_formatter(mdates.DateFormatter('%d %b'))
tick_params = ax.tick_params(axis='x', rotation=45)
grid = ax.grid(axis='y', dashes=(8,3), alpha=0.3, color='gray')
hide_spines = [ax.spines[s].set_visible(False) for s in ['top','right']]

How to draw a line on a plot?

I am trying to draw a line on a plit, but the plot is not even showing.
I have checked the values of xPoints and yPoints and they exist.
What is the cause?
import matplotlib.pyplot as plt
import numpy as np
def calculateFuncFor(x):
ePower = np.e**np.exp(x)
result = 1 - ePower
return "{:.4f}".format(result) #format the result
xPoints = np.linspace(0,1) #outputs 50 points between 0 and 1
yPoints = np.zeros(len(xPoints)) #fill a list with 50 zeros
for i in range(len(xPoints)):
yPoints[i] = calculateFuncFor(xPoints[i])
plt.plot(xPoints, yPoints,'ro')
plt.show()
Try putting in the first cell of your Jupyter Notebook the following:
%matplotlib inline
% denotes the IPython magic built-in commands

How to color the line graph according to conditions in a plot?

I try to find the solution for plot of the data
I have a graph of trajectory according to time(x) and kilometers(y) and i need to mark with different colours where the availability parameter from dataframe is 0 or 100
I try this but i have completly different result that i expected
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
# Read file, using ; as delimiter
filename = "H:\\run_linux\\river_km_calculations\\route2_8_07_23_07\\true_route2_8_07_23_07_test.csv"
df = pd.read_csv(filename, delimiter=';', parse_dates=['datetime']) #dtype={'lon_deg':'float', 'lat_deg':'float'})
df = df[189940:]
df.set_index('datetime', inplace=False)
plt.plot( df['datetime'], df['river_km'])
plt.show()
connection = 100
noconection = 0
def conditions(s):
if (s['age_gps_data'] <= 1.5) or (s['age_gps_data'] >=0.5 ):
return 100
else:
return 0
df['availability'] = df.apply(conditions, axis=1)
internet = np.ma.masked_where(df.availability == connection, df.availability)
nointernet = np.ma.masked_where((df.availability == noconection) , df.availability)
fig, ax = plt.subplots()
ax.plot(df.river_km, internet, df.river_km, nointernet)
plt.show()
How I can mark on a plot with different colours where availability is 0 and where is 100 and where is no value of these parameter?
What I want to achieve should looks like this:

Log scales with Seaborn kdeplot

I am trying to make a nice free energy surface (heat map) using Seaborn's kdeplot.
I am very close but can not figure out a way to change the color bar scale. The color bar scale is important since it is supposed to represent the difference in energy at different coordinates on the map. I need to know how to scale the values of the color bar by -(0.5961573)*log(x), where x is the values of the color bar. I may also then need to normalize the color bar from there so that the max value is 0.
Here is what I currently have:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import seaborn as sns
rs=[]
dihes=[]
with open(sys.argv[1], 'r') as f:
for line in f:
time,r,dihe = line.split()
rs.append(float(r))
dihes.append(float(dihe))
sns.set_style("white")
sns.kdeplot(rs, dihes, n_levels=25, cbar=True, cmap="Purples_d")
plt.show()
This gets me:
The arrays rs and dihes are simple one dimensional arrays.
Any suggestions on how to scale the color bar (z-axis) would be very helpful!
One way to do it is to create the graph manually and then modify the labels directly. This involves a couple more lines of code. You may have to tweak the formatting a bit but something like this should get you on the right track.
The following is adapted from this answer and this answer.
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
rs=[]
dihes=[]
with open(sys.argv[1], 'r') as f:
for line in f:
time,r,dihe = line.split()
rs.append(float(r))
dihes.append(float(dihe))
x = rs
y = dihes
kde = stats.gaussian_kde([x, y])
xx, yy = np.mgrid[min(x):max(x):(max(x)-min(x))/100, min(y):max(y):(max(y)-min(y))/100]
density = kde(np.c_[xx.flat, yy.flat].T).reshape(xx.shape)
sns.set_style("white")
fig, ax = plt.subplots()
cset = ax.contour(xx, yy, density, 25, cmap="Purples_r")
cb = fig.colorbar(cset)
cb.ax.set_yticklabels(map(lambda x: -0.5961573*np.log(float(x.get_text())),
cb.ax.get_yticklabels()))
A bit late to the party, but I ended up putting together this context manager which switches plotted density values to a logarithmic scale:
import contextlib
import seaborn as sns
#contextlib.contextmanager
def plot_kde_as_log(base=np.exp(1), support_threshold=1e-4):
"""Context manager to render density estimates on a logarithmic scale.
Usage:
with plot_kde_as_log():
sns.jointplot(x='x', y='y', data=df, kind='kde')
"""
old_stats = sns.distributions._has_statsmodels
old_univar = sns.distributions._scipy_univariate_kde
old_bivar = sns.distributions._scipy_bivariate_kde
sns.distributions._has_statsmodels = False
def log_clip_fn(v):
v = np.log(np.clip(v, support_threshold, np.inf))
v -= np.log(support_threshold)
v /= np.log(base)
return v
def new_univar(*args, **kwargs):
x, y = old_univar(*args, **kwargs)
y = log_clip_fn(y)
return x, y
def new_bivar(*args, **kwargs):
x, y, z = old_bivar(*args, **kwargs)
z = log_clip_fn(z)
return x, y, z
sns.distributions._scipy_univariate_kde = new_univar
sns.distributions._scipy_bivariate_kde = new_bivar
try:
yield
finally:
sns.distributions._has_statsmodels = old_stats
sns.distributions._scipy_univariate_kde = old_univar
sns.distributions._scipy_bivariate_kde = old_bivar
The benefit of this approach is that it keeps all of the styling and other options of sns.jointplot() without any additional effort.
I updated Walt W's context manager to work with newer versions of seaborn
#contextlib.contextmanager
def plot_kde_as_log(base=np.exp(1), support_threshold=1e-4):
"""Context manager to render density estimates on a logarithmic scale.
Usage:
with plot_kde_as_log():
sns.jointplot(x='x', y='y', data=df, kind='kde')
"""
old_call = sns._statistics.KDE.__call__
def log_clip_fn(v):
v = np.log(np.clip(v, support_threshold, np.inf))
v -= np.log(support_threshold)
v /= np.log(base)
return v
def new_call(*args, **kwargs):
density, support = old_call(*args, **kwargs)
density = log_clip_fn(density)
return density, support
sns._statistics.KDE.__call__ = new_call
try:
yield
finally:
sns._statistics.KDE.__call__ = old_call

matplotlib animation with multiple plots and for loop

hey I'm trying to get matplotlib.animation to plot n plots in one graph like the first code block below, but when I run the script everything seems to run except none of the plots show up.
import matplotlib.pyplot as plt
# Data to be ploted
x = []
y = []
x2 = []
y2 = []
for i in range(-9,9):
x.append(i)
y.append(i**2)
x2.append(i)
y2.append(i**3)
# plot the data
plt.plot(x,y, label = 'first line')
# plot other data points
plt.plot(x2,y2, label = 'second line')
# add this before plt.show() to add labels to graph
plt.xlabel('X value')
plt.ylabel('Y value')
# add a title to graph
plt.title('interesting graph\nsubtitle')
plt.legend()
plt.show()
here is the code using animate:
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib import style
# better face
style.use('fivethirtyeight')
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
def anima(i):
graph_data = open('a.txt').read()
lines = graph_data.split('\n')
dataPoints = []
for i in lines:
# ignor empty lines
if len(i) > 1:
line = i.split('|') # delimiter is |
for a in range(len(line)):
try:
dataPoints[a].append(int(line[a]))
# if there is no dataPoint[a] it gets created
except:
dataPoints.append(int(line[a]))
# modify axis
ax1.clear()
# plot
for i in range(len(dataPoints)-1):
ax1.plot(dataPoints[1],dataPoints[i+1])
#where to animate, what to animate, how often to update
ani = animation.FuncAnimation(fig, anima, interval = 1000)
plt.show()
in a.txt I have this:
1|56|80|62
2|123|135|55
12|41|12|23
60|12|45|23
12|43|56|54
25|123|23|31
2|213|31|84
61|1|68|54
62|2|87|31
63|4|31|53
64|8|13|13
65|16|51|65
66|32|43|84
80|62|42|15
update:
I gave up on reading a file and am having a threaded function generate values for me and instead for having everything in one plot I am having everything in subplots(the number is going to be edited soon). when I run the code with a normal plot it works fine, but when I try to use animate... it shows the graphs but no plot once again. my problem is showing the animated plot
# check if os is linux
import platform
if str(platform.system()).lower() == str('linux').lower():
# must be set befor importing any other matplotlib
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib import style
from threading import Thread
# Change style
style.use('fivethirtyeight')
fig = plt.figure()
#list with all datapoints eg: [timeList],[graph1List]....
data_points = []
# 'name' of each graph in the list
graphs_ = [0]
def create_plots():
xs = []
ys = []
for i in range(-10,11):
x = i
y = i**3
xs.append(x)
ys.append(y)
data_points.append(xs)
data_points.append(ys)
t = Thread(target=create_plots)
t.start()
def anima(i):
for i in range(len(graphs_)):
graphs_[i]=fig.add_subplot(211+i)
graphs_[i].clear()
graphs_[i].plot(0,i+1)
while len(data_points) == 0:
print('.')
ani = animation.FuncAnimation(fig, anima, interval=1000)
plt.show()
1) Are you sure your anima(i) function gets called?
2) Why are you overwriting the variable i in anima(i) and again in line?
for i in lines:
# ignor empty lines

Resources