Plotting different dataframe data in subplots - python-3.x

I'm trying to plot data in two dataframes in two subplots. I'm referring to this link
import pandas as pd
import numpy as np
from pprint import pprint
from matplotlib import pyplot as plt
df1 = pd.DataFrame(np.random.randn(10, 10))
df2 = pd.DataFrame(np.random.randn(10, 10))
plt.figure()
fig, axes = plt.subplots(nrows=1, ncols=2)
df1.plot(ax=axes[0, 0], style='o-')
axes[0, 0].set_xlabel('x')
axes[0, 0].set_ylabel('y')
axes[0, 0].set_title('ttl')
df2.plot(ax=axes[0, 1], style='o-')
axes[0, 1].set_xlabel('x')
axes[0, 1].set_ylabel('y')
axes[0, 1].set_title('ttl')
However, I get the following error
df1.plot(ax=axes[0, 0], style='o-')
IndexError: too many indices for array
Any suggestions on how to resolve this will be really helpful.
EDIT: The answer provided below works for 1 row with 2 cols
I'm facing an error for 2 rows and 2 cols
import pandas as pd
import numpy as np
from pprint import pprint
from matplotlib import pyplot as plt
df1 = pd.DataFrame(np.random.randn(10, 10))
df2 = pd.DataFrame(np.random.randn(10, 10))
df3 = pd.DataFrame(np.random.randn(10, 10))
df4 = pd.DataFrame(np.random.randn(10, 10))
pprint(df1)
plt.figure()
fig, axes = plt.subplots(nrows=2, ncols=2)
df1.plot(ax=axes[0], style='o-')
axes[0].set_xlabel('x')
axes[0].set_ylabel('y')
axes[0].set_title('ttl')
df2.plot(ax=axes[1], style='o-')
axes[1].set_xlabel('x')
axes[1].set_ylabel('y')
axes[1].set_title('ttl')
df3.plot(ax=axes[2], style='o-')
axes[2].set_xlabel('x')
axes[2].set_ylabel('y')
axes[2].set_title('ttl')
df4.plot(ax=axes[3], style='o-')
axes[3].set_xlabel('x')
axes[3].set_ylabel('y')
axes[3].set_title('ttl')
plt.show()
Error:
AttributeError: 'numpy.ndarray' object has no attribute 'get_figure'
Any suggestions?

Axes are one dimensional, you have to do like this:
df1.plot(ax=axes[0], style='o-')
df2.plot(ax=axes[1], style='o-')
I suggest reading this, look at the squeeze parameter and you will understand this is happening.

Related

How to show all dates in matplotlib from excel import?

I have a code in python 3.11 for a contour plot generating from an excel table using matplotlib. The result shows only first days of months on the x axis (for example 1.6.2022, 1.7.2022 ...). I want all days from the excel source table. Her's the code:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import Normalize
import pandas as pd
import matplotlib.dates as mdates
# import data from excel file
df = pd.read_excel('temperature_data.xlsx', index_col=0)
# Assign columns to variables
time = df.columns
depth = df.index
temperature = df.to_numpy()
# Creating the graph
fig, ax = plt.subplots()
min_temp = temperature.min()
max_temp = temperature.max()
cs = plt.contourf(time, depth, temperature, levels=np.arange(round(min_temp), round(max_temp)+2, 2), cmap='coolwarm', vmin=min_temp, vmax=max_temp)
cs2 = plt.contour(time, depth, temperature, levels=np.arange(round(min_temp), round(max_temp)+2, 2), colors='black')
plt.gca().invert_yaxis()
plt.clabel(cs2, inline=1, fontsize=10, fmt='%d')
plt.title('Teplota vody [°C]')
plt.xticks(rotation=90, ha='right')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%d.%m.%Y'))
#ax.set_xlim(df.index.min(), df.index.max())
#ax.set_xlabel('Time')
ax.set_ylabel('hloubka [m]')
norm = Normalize(vmin=min_temp, vmax=max_temp)
plt.colorbar(cs, cmap='coolwarm', norm=norm)
plt.show()
Thank you for your help.

connect two points of a plot with another line

I want to connect two points in a data frame plot with another line and add it to the plot:
import numpy as np
from numpy.random import randn
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline
days = [datetime(2016, 1, 1), datetime(2016, 1, 2),datetime(2016, 1, 3),datetime(2016, 1, 4)]
dt_ind = pd.DatetimeIndex(days)
data = np.random.randn(4,2)
cols = ['A','B']
df = pd.DataFrame(data,dt_ind,cols)
df['A'].plot(figsize=(12,4), sort_columns=True)
here is the data frame:
enter image description here
and the plot:
enter image description here
how is that possible? for example add a line from point 2 to point 4 (or any two points)
You want to use matplotlib's plt.subplots() function to return a fig and ax object, so you can then add separate lines to your ax.
import numpy as np
from numpy.random import randn
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline
days = [datetime(2016, 1, 1),
datetime(2016, 1, 2),
datetime(2016, 1, 3),
datetime(2016, 1, 4)]
dt_ind = pd.DatetimeIndex(days)
data = np.random.randn(4,2)
cols = ['A','B']
df = pd.DataFrame(data,dt_ind,cols)
fig, ax = plt.subplots()
ax.plot(df['A'], color='red')
ax.plot([df.index[1], df.index[3]],
[df['A'][1], df['A'][3]], color='blue')

plot dataframe: overlay line and bar plot doesn't work for time series index?

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# create dataframes df0 and df1:
index0 = pd.date_range(start='2014-06-01 00:00:00', end='2014-06-01 00:15:00', freq='1S')
data0 = np.random.rand(len(index0))
df0 = pd.DataFrame(data=data0, index=index0, columns=['DF0'])
index1 = pd.date_range(start='2014-06-01 00:00:00', end='2014-06-01 00:15:00', freq='15S')
data1 = np.random.rand(len(index1))
df1 = pd.DataFrame(data=data1, index=index1, columns=['DF1'])
# plot df0 and df1:
fig,ax1 = plt.subplots(figsize=(40,10))
ax2 = ax1.twinx()
df0.plot.line( color="r", ax = ax1)
df1.plot.bar( color ='b', linewidth = 5, ax = ax2, alpha = 0.7)
plt.show()
I can overlay the dataframes as two line plots or as two barplots. But however hard I try, I can't manage to overlay a line plot with a bar plot or the other way round? With the code above I only get the barplot of df1 but don't see the lineplot of df0. What do I have to do differently?
bar plot takes categorical (string) values only as the x values. hence simple hack can be converting the time stamps to strings.
when you feed the float values, it converts them into str thereby they are not matching with the index of line plot x-values.
df0.index = df0.index.map(str)
Secondary axis would also be not required for this.
Try this!
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# create dataframes df0 and df1:
index0 = pd.date_range(start='2014-06-01 00:00:00',
end='2014-06-01 00:15:00', freq='1S')
data0 = np.random.rand(len(index0))
df0 = pd.DataFrame(data=data0, index=index0, columns=['DF0'])
df0.index = df0.index.map(str)
index1 = pd.date_range(start='2014-06-01 00:00:00',
end='2014-06-01 00:15:00', freq='15S')
data1 = np.random.rand(len(index1))
df1 = pd.DataFrame(data=data1, index=index1, columns=['DF1'])
# plot df0 and df1:
fig, ax1 = plt.subplots(figsize=(40, 10))
ax = df0.plot.line(color="r")
df1.plot.bar(color='b', linewidth=5, ax=ax, alpha=0.7)
plt.show()

How to plot a different candlestick graph on the same figure?

I want to plot candlestick graphs and save each graph I plot. For speed, I want to do this on the same figure.
So far, I can save the first image but the rest are blank.
I've tried using:
pltclf()
plt.cla()
fig.clear()
My code:
import pandas as pd
import matplotlib.pyplot as plt
from mpl_finance import candlestick2_ohlc
from itertools import count
colNames = ['Date', 'Time', 'Open', 'High', 'Low', 'Close']
allData = pd.read_csv('file.csv', header=0, names=colNames, usecols=colNames,
parse_dates={'Datetime': ['Date', 'Time']},
infer_datetime_format=True)
fig, ax = plt.subplots()
for n in count(0, 10):
subset = allData[n:n+10]
plt.axis('off')
try:
candlestick2_ohlc(ax, subset['Open'], subset['High'], subset['Low'],
subset['Close'], width=0.6, colorup='g',
colordown='r', alpha=1)
except ValueError:
print("Graphing done.")
break
fig.savefig("{}.png".format(str(n)))
plt.show()
fig.clf()
CSV File used (file.csv):
Date,Time,OpenBid,HighBid,LowBid,CloseBid
01/02/2009,09:31:00,1212.23,1212.29,1211.77,1211.77
01/02/2009,09:32:00,1211.53,1212.18,1211.29,1211.29
01/02/2009,09:33:00,1209.11,1209.91,1209.11,1209.91
01/02/2009,09:34:00,1210.3,1211.29,1210.3,1211.28
01/02/2009,09:35:00,1213.51,1214.51,1213.11,1213.11
01/02/2009,09:36:00,1212.79,1212.79,1212.56,1212.71
01/02/2009,09:37:00,1213.65,1214.33,1213.65,1213.75
01/02/2009,09:38:00,1213.39,1213.76,1213.39,1213.76
01/02/2009,09:39:00,1211.32,1213.17,1211.32,1213.17
01/02/2009,09:40:00,1213.92,1215.87,1213.92,1215.87
01/02/2009,09:41:00,1215.61,1216.1,1215.4,1216.1
01/02/2009,09:42:00,1215.74,1215.75,1214.07,1214.07
01/02/2009,09:43:00,1214.36,1214.88,1213.87,1214.88
01/02/2009,09:44:00,1215,1215,1213.35,1214.03
01/02/2009,09:45:00,1214.04,1214.44,1214.04,1214.12
01/02/2009,09:46:00,1214.16,1214.56,1214.16,1214.56
01/02/2009,09:47:00,1214.68,1214.93,1214.62,1214.68
01/02/2009,09:48:00,1215.59,1216.11,1215.59,1216.11
01/02/2009,09:49:00,1216.37,1216.37,1215.54,1215.61
01/02/2009,09:50:00,1215.29,1215.29,1214.28,1214.28
01/02/2009,09:51:00,1213.82,1213.82,1212.87,1212.99
01/02/2009,09:52:00,1212.25,1212.44,1212.21,1212.25
01/02/2009,09:53:00,1212.05,1212.05,1210.24,1210.24
01/02/2009,09:54:00,1210.05,1210.21,1209.27,1210.21
01/02/2009,09:55:00,1209.83,1210.68,1209.83,1209.99
01/02/2009,09:56:00,1209.97,1210.91,1209.97,1210.91
01/02/2009,09:57:00,1211.45,1212.62,1211.38,1212.17
01/02/2009,09:58:00,1212.42,1212.42,1212.26,1212.41
01/02/2009,09:59:00,1212.39,1212.39,1212.33,1212.33
01/02/2009,10:00:00,1212.28,1212.28,1211.26,1212.14
01/02/2009,10:01:00,1212.99,1214.66,1212.99,1214.19
01/02/2009,10:02:00,1213.91,1213.91,1213.16,1213.55
01/02/2009,10:03:00,1213.53,1213.53,1213.41,1213.46
01/02/2009,10:04:00,1212.67,1213.21,1212.54,1213.21
01/02/2009,10:05:00,1213.52,1213.81,1213.52,1213.52
01/02/2009,10:06:00,1213.34,1213.4,1213.29,1213.29
01/02/2009,10:07:00,1213.46,1213.6,1213.14,1213.14
01/02/2009,10:08:00,1213.37,1213.81,1213.37,1213.8
01/02/2009,10:09:00,1213.57,1214.5,1213.57,1214.45
01/02/2009,10:10:00,1214.93,1215.03,1214.62,1214.62
I think the issue is to do with candlestick2_ohlc and that it plots to 'ax', but I don't know how to resolve my issue.
Here is what I would do:
from mpl_finance import candlestick2_ohlc
from itertools import count
colNames = ['Date', 'Time', 'Open', 'High', 'Low', 'Close']
allData = pd.read_csv(StringIO(d), header=0, names=colNames, usecols=colNames,
parse_dates={'Datetime': ['Date', 'Time']},
infer_datetime_format=True)
fig, ax = plt.subplots()
for n in count(0, 10):
subset = allData[n:n+10]
plt.axis('off')
if len(subset)>0:
candlestick2_ohlc(ax, subset['Open'], subset['High'], subset['Low'],
subset['Close'], width=0.6, colorup='g',
colordown='r', alpha=1)
fig.savefig("{}.png".format(str(n)))
ax.clear()
else:
break

Pandas to MatPlotLib with Dollar Signs

Given the following data frame:
import pandas as pd
df=pd.DataFrame({'A':['$0-$20','$20+']})
df
A
0 0−20
1 $20+
I'd like to create a bar chart in MatPlotLib but I can't seem to get the dollar signs to show up correctly.
Here's what I have:
import matplotlib.pyplot as plt
import numpy as np
y=df.B
x=df.A
ind=np.arange(len(x))
fig, ax = plt.subplots(1, 1, figsize = (2,2))
plt.bar(ind, y, align='center', width=.5, edgecolor='none', color='grey')
ax.patch.set_facecolor('none')
ax.patch.set_alpha(0)
ax.set_ylim([0,5])
ax.set_xlabel(x,fontsize=12,rotation=0,color='grey')
ax.set_xticklabels('')
ax.set_yticklabels('')
I can get the labels to display "better" if I use df.A.values.tolist(), but that just corrects the format.
I'd like each label to display under each bar with the intended original format (with dollar signs).
Thanks in advance!
To specify the xticklabels, pass tick_label=x to plt.bar.
Matplotlib parses labels using a subset of the TeX markup
language. Dollar
signs indicate the beginning (and end) of math mode. So pairs of bare dollar signs are
getting unintentionally swallowed. Currently, there is no a way to disable mathtex parsing. So to prevent the dollar signs from being interpreted as math markup, replace the
bare $ with \$:
df['A'] = df['A'].str.replace('$', '\$')
For example,
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({'A': ['$0-$20', '$20+'], 'B': [10,20]})
df['A'] = df['A'].str.replace('$', '\$')
y = df['B']
x = df['A']
ind = np.arange(len(x))
fig, ax = plt.subplots(1, 1, figsize=(2, 2))
plt.bar(ind, y,
tick_label=x,
align='center', width=.5, edgecolor='none',
color='grey')
plt.show()
Alternatively, you could use df.plot(kind='bar'):
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({'A': ['$0-$20', '$20+'], 'B': [10,20]})
df['A'] = df['A'].str.replace('$', '\$')
fig, ax = plt.subplots(1, 1, figsize=(2, 2))
df.plot(kind='bar', x='A', y='B',
align='center', width=.5, edgecolor='none',
color='grey', ax=ax)
plt.xticks(rotation=25)
plt.show()

Resources