How to plot a different candlestick graph on the same figure? - python-3.x

I want to plot candlestick graphs and save each graph I plot. For speed, I want to do this on the same figure.
So far, I can save the first image but the rest are blank.
I've tried using:
pltclf()
plt.cla()
fig.clear()
My code:
import pandas as pd
import matplotlib.pyplot as plt
from mpl_finance import candlestick2_ohlc
from itertools import count
colNames = ['Date', 'Time', 'Open', 'High', 'Low', 'Close']
allData = pd.read_csv('file.csv', header=0, names=colNames, usecols=colNames,
parse_dates={'Datetime': ['Date', 'Time']},
infer_datetime_format=True)
fig, ax = plt.subplots()
for n in count(0, 10):
subset = allData[n:n+10]
plt.axis('off')
try:
candlestick2_ohlc(ax, subset['Open'], subset['High'], subset['Low'],
subset['Close'], width=0.6, colorup='g',
colordown='r', alpha=1)
except ValueError:
print("Graphing done.")
break
fig.savefig("{}.png".format(str(n)))
plt.show()
fig.clf()
CSV File used (file.csv):
Date,Time,OpenBid,HighBid,LowBid,CloseBid
01/02/2009,09:31:00,1212.23,1212.29,1211.77,1211.77
01/02/2009,09:32:00,1211.53,1212.18,1211.29,1211.29
01/02/2009,09:33:00,1209.11,1209.91,1209.11,1209.91
01/02/2009,09:34:00,1210.3,1211.29,1210.3,1211.28
01/02/2009,09:35:00,1213.51,1214.51,1213.11,1213.11
01/02/2009,09:36:00,1212.79,1212.79,1212.56,1212.71
01/02/2009,09:37:00,1213.65,1214.33,1213.65,1213.75
01/02/2009,09:38:00,1213.39,1213.76,1213.39,1213.76
01/02/2009,09:39:00,1211.32,1213.17,1211.32,1213.17
01/02/2009,09:40:00,1213.92,1215.87,1213.92,1215.87
01/02/2009,09:41:00,1215.61,1216.1,1215.4,1216.1
01/02/2009,09:42:00,1215.74,1215.75,1214.07,1214.07
01/02/2009,09:43:00,1214.36,1214.88,1213.87,1214.88
01/02/2009,09:44:00,1215,1215,1213.35,1214.03
01/02/2009,09:45:00,1214.04,1214.44,1214.04,1214.12
01/02/2009,09:46:00,1214.16,1214.56,1214.16,1214.56
01/02/2009,09:47:00,1214.68,1214.93,1214.62,1214.68
01/02/2009,09:48:00,1215.59,1216.11,1215.59,1216.11
01/02/2009,09:49:00,1216.37,1216.37,1215.54,1215.61
01/02/2009,09:50:00,1215.29,1215.29,1214.28,1214.28
01/02/2009,09:51:00,1213.82,1213.82,1212.87,1212.99
01/02/2009,09:52:00,1212.25,1212.44,1212.21,1212.25
01/02/2009,09:53:00,1212.05,1212.05,1210.24,1210.24
01/02/2009,09:54:00,1210.05,1210.21,1209.27,1210.21
01/02/2009,09:55:00,1209.83,1210.68,1209.83,1209.99
01/02/2009,09:56:00,1209.97,1210.91,1209.97,1210.91
01/02/2009,09:57:00,1211.45,1212.62,1211.38,1212.17
01/02/2009,09:58:00,1212.42,1212.42,1212.26,1212.41
01/02/2009,09:59:00,1212.39,1212.39,1212.33,1212.33
01/02/2009,10:00:00,1212.28,1212.28,1211.26,1212.14
01/02/2009,10:01:00,1212.99,1214.66,1212.99,1214.19
01/02/2009,10:02:00,1213.91,1213.91,1213.16,1213.55
01/02/2009,10:03:00,1213.53,1213.53,1213.41,1213.46
01/02/2009,10:04:00,1212.67,1213.21,1212.54,1213.21
01/02/2009,10:05:00,1213.52,1213.81,1213.52,1213.52
01/02/2009,10:06:00,1213.34,1213.4,1213.29,1213.29
01/02/2009,10:07:00,1213.46,1213.6,1213.14,1213.14
01/02/2009,10:08:00,1213.37,1213.81,1213.37,1213.8
01/02/2009,10:09:00,1213.57,1214.5,1213.57,1214.45
01/02/2009,10:10:00,1214.93,1215.03,1214.62,1214.62
I think the issue is to do with candlestick2_ohlc and that it plots to 'ax', but I don't know how to resolve my issue.

Here is what I would do:
from mpl_finance import candlestick2_ohlc
from itertools import count
colNames = ['Date', 'Time', 'Open', 'High', 'Low', 'Close']
allData = pd.read_csv(StringIO(d), header=0, names=colNames, usecols=colNames,
parse_dates={'Datetime': ['Date', 'Time']},
infer_datetime_format=True)
fig, ax = plt.subplots()
for n in count(0, 10):
subset = allData[n:n+10]
plt.axis('off')
if len(subset)>0:
candlestick2_ohlc(ax, subset['Open'], subset['High'], subset['Low'],
subset['Close'], width=0.6, colorup='g',
colordown='r', alpha=1)
fig.savefig("{}.png".format(str(n)))
ax.clear()
else:
break

Related

How to show all dates in matplotlib from excel import?

I have a code in python 3.11 for a contour plot generating from an excel table using matplotlib. The result shows only first days of months on the x axis (for example 1.6.2022, 1.7.2022 ...). I want all days from the excel source table. Her's the code:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import Normalize
import pandas as pd
import matplotlib.dates as mdates
# import data from excel file
df = pd.read_excel('temperature_data.xlsx', index_col=0)
# Assign columns to variables
time = df.columns
depth = df.index
temperature = df.to_numpy()
# Creating the graph
fig, ax = plt.subplots()
min_temp = temperature.min()
max_temp = temperature.max()
cs = plt.contourf(time, depth, temperature, levels=np.arange(round(min_temp), round(max_temp)+2, 2), cmap='coolwarm', vmin=min_temp, vmax=max_temp)
cs2 = plt.contour(time, depth, temperature, levels=np.arange(round(min_temp), round(max_temp)+2, 2), colors='black')
plt.gca().invert_yaxis()
plt.clabel(cs2, inline=1, fontsize=10, fmt='%d')
plt.title('Teplota vody [°C]')
plt.xticks(rotation=90, ha='right')
ax.xaxis.set_major_formatter(mdates.DateFormatter('%d.%m.%Y'))
#ax.set_xlim(df.index.min(), df.index.max())
#ax.set_xlabel('Time')
ax.set_ylabel('hloubka [m]')
norm = Normalize(vmin=min_temp, vmax=max_temp)
plt.colorbar(cs, cmap='coolwarm', norm=norm)
plt.show()
Thank you for your help.

Pandas and Matplotlib: Adding tooltip to make interactive

I am trying to add tool tip to the graph, so whenever we hover around the graph it will give the info. How do i add one and make it an interactive one?
import matplotlib.pyplot as plt
import pandas as pd
import pandas as pd
from numpy import nan
from matplotlib import dates as mpl_dates
df = dataset
df["Date"] = pd.to_datetime(df["Date"]).dt.strftime('%m/%d/%Y')
#df["Date"] = pd.to_datetime(df["Date"]).apply(lambda x: x.strftime('%B-%Y'))
df.loc[df['Actuals'] == 0, ['Actuals']] = nan
df.loc[df['Actuals'] > 0, ['Predicted_Lower']] = nan
df.loc[df['Actuals'] > 0, ['Predicted_Upper']] = nan
# gca stands for 'get current axis'
ax = plt.gca()
y1 = df['Predicted_Lower']
y2 = df['Predicted_Upper']
x = df['Date']
ax.fill_between(x,y1, y2, facecolor="blue", alpha=0.7)
df.plot(kind='line',x='Date',y='Predicted', color='black', ax=ax)
df.plot(kind='line',x='Date',y='Actuals', color='green', ax=ax)
df.plot(kind='line',x='Date',y='Predicted_Lower',color='white',ax=ax)
df.plot(kind='line',x='Date',y='Predicted_Upper',color='white', ax=ax)
date_format = mpl_dates.DateFormatter('%Y-%m-%d')
plt.gca().xaxis.set_major_formatter(date_format)
locs, labels = plt.xticks()
plt.xticks(locs[::3], labels[::3], rotation=45)
plt.show()
plt.xticks(rotation=45)
plt.legend(['Predicted','Actuals'])
plt.xlabel('Date')
df.head(30)
plt.show()
using pandas, matplotlib, I am getting the data from sql server that is connected to Power BI and writing pyscripts to display graphs.
It's possible using matplotlib as discussed here.
However, you might want to look into other plotting packages such as plotly where it is builtin, default behavior.
import plotly.express as px
df = pd.DataFrame(np.arange(20), columns=['x'])
df['y'] = df['x']**2
px.line(df, x='x', y='y')
In your example, you could try something like
px.line(df, x='Date', y=Predicted, ...)

Plotting different dataframe data in subplots

I'm trying to plot data in two dataframes in two subplots. I'm referring to this link
import pandas as pd
import numpy as np
from pprint import pprint
from matplotlib import pyplot as plt
df1 = pd.DataFrame(np.random.randn(10, 10))
df2 = pd.DataFrame(np.random.randn(10, 10))
plt.figure()
fig, axes = plt.subplots(nrows=1, ncols=2)
df1.plot(ax=axes[0, 0], style='o-')
axes[0, 0].set_xlabel('x')
axes[0, 0].set_ylabel('y')
axes[0, 0].set_title('ttl')
df2.plot(ax=axes[0, 1], style='o-')
axes[0, 1].set_xlabel('x')
axes[0, 1].set_ylabel('y')
axes[0, 1].set_title('ttl')
However, I get the following error
df1.plot(ax=axes[0, 0], style='o-')
IndexError: too many indices for array
Any suggestions on how to resolve this will be really helpful.
EDIT: The answer provided below works for 1 row with 2 cols
I'm facing an error for 2 rows and 2 cols
import pandas as pd
import numpy as np
from pprint import pprint
from matplotlib import pyplot as plt
df1 = pd.DataFrame(np.random.randn(10, 10))
df2 = pd.DataFrame(np.random.randn(10, 10))
df3 = pd.DataFrame(np.random.randn(10, 10))
df4 = pd.DataFrame(np.random.randn(10, 10))
pprint(df1)
plt.figure()
fig, axes = plt.subplots(nrows=2, ncols=2)
df1.plot(ax=axes[0], style='o-')
axes[0].set_xlabel('x')
axes[0].set_ylabel('y')
axes[0].set_title('ttl')
df2.plot(ax=axes[1], style='o-')
axes[1].set_xlabel('x')
axes[1].set_ylabel('y')
axes[1].set_title('ttl')
df3.plot(ax=axes[2], style='o-')
axes[2].set_xlabel('x')
axes[2].set_ylabel('y')
axes[2].set_title('ttl')
df4.plot(ax=axes[3], style='o-')
axes[3].set_xlabel('x')
axes[3].set_ylabel('y')
axes[3].set_title('ttl')
plt.show()
Error:
AttributeError: 'numpy.ndarray' object has no attribute 'get_figure'
Any suggestions?
Axes are one dimensional, you have to do like this:
df1.plot(ax=axes[0], style='o-')
df2.plot(ax=axes[1], style='o-')
I suggest reading this, look at the squeeze parameter and you will understand this is happening.

plot dataframe: overlay line and bar plot doesn't work for time series index?

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# create dataframes df0 and df1:
index0 = pd.date_range(start='2014-06-01 00:00:00', end='2014-06-01 00:15:00', freq='1S')
data0 = np.random.rand(len(index0))
df0 = pd.DataFrame(data=data0, index=index0, columns=['DF0'])
index1 = pd.date_range(start='2014-06-01 00:00:00', end='2014-06-01 00:15:00', freq='15S')
data1 = np.random.rand(len(index1))
df1 = pd.DataFrame(data=data1, index=index1, columns=['DF1'])
# plot df0 and df1:
fig,ax1 = plt.subplots(figsize=(40,10))
ax2 = ax1.twinx()
df0.plot.line( color="r", ax = ax1)
df1.plot.bar( color ='b', linewidth = 5, ax = ax2, alpha = 0.7)
plt.show()
I can overlay the dataframes as two line plots or as two barplots. But however hard I try, I can't manage to overlay a line plot with a bar plot or the other way round? With the code above I only get the barplot of df1 but don't see the lineplot of df0. What do I have to do differently?
bar plot takes categorical (string) values only as the x values. hence simple hack can be converting the time stamps to strings.
when you feed the float values, it converts them into str thereby they are not matching with the index of line plot x-values.
df0.index = df0.index.map(str)
Secondary axis would also be not required for this.
Try this!
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# create dataframes df0 and df1:
index0 = pd.date_range(start='2014-06-01 00:00:00',
end='2014-06-01 00:15:00', freq='1S')
data0 = np.random.rand(len(index0))
df0 = pd.DataFrame(data=data0, index=index0, columns=['DF0'])
df0.index = df0.index.map(str)
index1 = pd.date_range(start='2014-06-01 00:00:00',
end='2014-06-01 00:15:00', freq='15S')
data1 = np.random.rand(len(index1))
df1 = pd.DataFrame(data=data1, index=index1, columns=['DF1'])
# plot df0 and df1:
fig, ax1 = plt.subplots(figsize=(40, 10))
ax = df0.plot.line(color="r")
df1.plot.bar(color='b', linewidth=5, ax=ax, alpha=0.7)
plt.show()

Pandas to MatPlotLib with Dollar Signs

Given the following data frame:
import pandas as pd
df=pd.DataFrame({'A':['$0-$20','$20+']})
df
A
0 0−20
1 $20+
I'd like to create a bar chart in MatPlotLib but I can't seem to get the dollar signs to show up correctly.
Here's what I have:
import matplotlib.pyplot as plt
import numpy as np
y=df.B
x=df.A
ind=np.arange(len(x))
fig, ax = plt.subplots(1, 1, figsize = (2,2))
plt.bar(ind, y, align='center', width=.5, edgecolor='none', color='grey')
ax.patch.set_facecolor('none')
ax.patch.set_alpha(0)
ax.set_ylim([0,5])
ax.set_xlabel(x,fontsize=12,rotation=0,color='grey')
ax.set_xticklabels('')
ax.set_yticklabels('')
I can get the labels to display "better" if I use df.A.values.tolist(), but that just corrects the format.
I'd like each label to display under each bar with the intended original format (with dollar signs).
Thanks in advance!
To specify the xticklabels, pass tick_label=x to plt.bar.
Matplotlib parses labels using a subset of the TeX markup
language. Dollar
signs indicate the beginning (and end) of math mode. So pairs of bare dollar signs are
getting unintentionally swallowed. Currently, there is no a way to disable mathtex parsing. So to prevent the dollar signs from being interpreted as math markup, replace the
bare $ with \$:
df['A'] = df['A'].str.replace('$', '\$')
For example,
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({'A': ['$0-$20', '$20+'], 'B': [10,20]})
df['A'] = df['A'].str.replace('$', '\$')
y = df['B']
x = df['A']
ind = np.arange(len(x))
fig, ax = plt.subplots(1, 1, figsize=(2, 2))
plt.bar(ind, y,
tick_label=x,
align='center', width=.5, edgecolor='none',
color='grey')
plt.show()
Alternatively, you could use df.plot(kind='bar'):
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({'A': ['$0-$20', '$20+'], 'B': [10,20]})
df['A'] = df['A'].str.replace('$', '\$')
fig, ax = plt.subplots(1, 1, figsize=(2, 2))
df.plot(kind='bar', x='A', y='B',
align='center', width=.5, edgecolor='none',
color='grey', ax=ax)
plt.xticks(rotation=25)
plt.show()

Resources