Plot 350 users on bar chart using matplotlib - python-3.x

I'm trying to plot around 300 users and how many purchases they have made. My data is in a pandas dataframe, where the column 'ID' refers to a user and 'Number' to the number of purchases.
I have tried so far with the following code I have found but never manage to get all the IDs on one plot?
This is the code:
import random
# Prepare Data
n = subs_['Number'].unique().__len__()+1
all_colors = list(plt.cm.colors.cnames.keys())
random.seed(100)
c = random.choices(all_colors, k=n)
# Plot Bars
plt.figure(figsize=(16,10), dpi= 60)
plt.bar(subs_['ID'], subs_['Number'], color=c, width=.5)
for i, val in enumerate(subs_['Number'].values):
plt.text(i, val, float(val), horizontalalignment='center', verticalalignment='bottom', fontdict={'fontweight':500, 'size':10})
# Decoration
plt.gca().set_xticklabels(subs_['ID'], rotation=60, horizontalalignment= 'right')
plt.title("Number of purchases by user", fontsize=22)
plt.ylabel('# Purchases')
plt.ylim(0, 45)
plt.show()
bar chart of user purchases:

I think that your problem is coming from your IDE:
import random
import matplotlib.pyplot as plt
import pandas as pd
# Prepare Data
d = {'ID': range(1, 300), 'Number': range(1, 300)}
subs_ = pd.DataFrame(data=d)
n = subs_['Number'].unique().__len__()+1
all_colors = list(plt.cm.colors.cnames.keys())
random.seed(100)
c = random.choices(all_colors, k=n)
# Plot Bars
plt.figure(figsize=(16,10), dpi= 60)
plt.bar(subs_['ID'], subs_['Number'], color=c, width=.5)
for i, val in enumerate(subs_['Number'].values):
plt.text(i, val, float(val), horizontalalignment='center', verticalalignment='bottom', fontdict={'fontweight':500, 'size':10})
# Decoration
plt.gca().set_xticklabels(subs_['ID'], rotation=60, horizontalalignment= 'right')
plt.title("Number of purchases by user", fontsize=22)
plt.ylabel('# Purchases')
plt.ylim(0, 45)
plt.show()
Is working fine for me:

Related

Dynamic area coloring in plotly subplots

I have 6 graphs done with plotly-subplots where I show the stock price and its simple moving average (20 days for instance). What I would like to do is for each subplot (except one), color the area in red where the current price is below the simple moving average and green if the price is above its simple moving average (and if possible as extra, no color when the stock price is crossing over or under the average: so the color area would start and end at the marker).
I understand I could use add_vrect to define my color area but this seems to be ok for static data: here each subplot would need its "own conditional coloring"
For clarity, I used only 3 stocks/graphs in my example (and the coloring would be on the second and third graphs):
import yfinance as yf
import pandas as pd
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go
tickers = ['AAPL', 'MSFT']
df = yf.download(tickers, period='1y', interval='1d', progress=False)['Close']
df_bench = yf.download('SPY', period='1y', interval='1d', progress=False)
df['AAPL_SMA'] = df['AAPL'].rolling(20).mean()
df['MSFT_SMA'] = df['MSFT'].rolling(20).mean()
fig = make_subplots(
rows=1, cols=3,
subplot_titles=('AAPL', 'MSFT')
)
fig.add_trace(
go.Candlestick(x=df_bench .index,
open=df_bench ['Open'],
high=df_bench ['High'],
low=df_bench ['Low'],
close=df_bench ['Close'],
name='Benchmark'),
row=1, col=1
)
fig.add_trace(
go.Scatter(x=df.index, y=df['AAPL'], name='AAPL', marker_color='blue'),
row=1, col=2
)
fig.add_trace(
go.Scatter(x=df.index, y=df['AAPL_SMA'], name='HYG/TLT_SMA', marker_color='red'),
row=1, col=2
)
fig.add_trace(
go.Scatter(x=df.index, y=df['MSFT'], name='MSFT', marker_color='blue'),
row=1, col=3
)
fig.add_trace(
go.Scatter(x=df.index, y=df['MSFT_SMA'], name='MSFT_SMA', marker_color='red'),
row=1, col=3
)
fig.update_layout(height=1200, width=2400, title_text='Dynamic coloring test',
showlegend=False, template='plotly_white',
hovermode='x unified', xaxis_rangeslider_visible=False)
fig.show()
I have added two booleans columns in my dataframe df where I test whether the area should be red (-1) or green (1):
df['AAPL_Sig'] = np.where(df['AAPL'] < df['AAPL_SMA'], -1, 1)
df['MSFT_Sig'] = np.where(df['MSFT'] < df['MSFT_SMA'], -1, 1)
Now I am stuck and would need some pointers as to how to use add_vrect (or maybe there are better functions -?-) dynamically and on some of the subplots.

Matplotlib - Horizontal Bar Chart Timeline With Dates - Xticks not showing date

Trying to make a graph that looks like the first image here.
However when I try and implement it, I can't work out how to get the dates to print on the X axis, the scale seems about right, just the xticks seem not to be dates, but some basically random number. The typical output is visible in figure 2.
How can I get the dates to show on the xticks. I would like it to show every month, or quarter between 2019-12-01 and 2021-03-01 (march 1st).
Bonus points for any formatting that makes it look more like the first picture.
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
event = np.array(['Groupone','Grouptwo','Group3','Group4','Group5','Group6'])
begin = np.array([datetime(year=2019,month=12,day=1),datetime(year=2020,month=2,day=1),datetime(year=2020,month=5,day=1),datetime(year=2020,month=11,day=1),datetime(year=2019,month=12,day=1),datetime(year=2020,month=5,day=1)])
end = np.array([datetime(year=2019,month=12,day=30),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2020,month=7,day=3)])
beg_sort = np.sort(begin)
end_sort = end[np.argsort(begin)]
evt_sort = event[np.argsort(begin)]
plt.barh(range(len(beg_sort)), end_sort-beg_sort, left=beg_sort, align='center')
plt.yticks(range(len(beg_sort)), evt_sort)
plt.yticks(range(len(beg_sort)), evt_sort)
plt.show()(begin)
end_sort = end[np.argsort(begin)]
evt_sort = event[np.argsort(begin)]
plt.barh(range(len(beg_sort)), end_sort-beg_sort, left=beg_sort, align='center')
plt.yticks(range(len(beg_sort)), evt_sort)
plt.show()
You can plot each bar as line, choosing the width of the line (lw) you prefer:
# Set the color of the grid lines
mpl.rcParams['grid.color'] = "w"
fig, ax = plt.subplots(1, 1)
# Plot eac item as a line
for i, (b, e, l) in enumerate(zip(beg_sort, end_sort, evt_sort)):
ax.plot_date([b, e], [i + 1] * 2, ls='-', marker=None, lw=10) # 10 for the line width
# Set ticks and labels on y axis
ax.set_yticks(range(1, len(evt_sort) + 1))
ax.set_yticklabels(evt_sort)
# Set color and transparency of the grid
ax.patch.set_facecolor('gray')
ax.patch.set_alpha(0.3)
# activate grid
ax.grid(True)
Moreover, you can play with the background grid, customizing it according to your needs.
Hacked something together that works, posting for curiosity, however go with PieCot's answer above:
import matplotlib.pyplot as plt
import numpy as np
from datetime import date
from datetime import datetime
import matplotlib.dates as mdates
#https://stackoverflow.com/questions/58387731/plotting-month-year-as-x-ticks-in-matplotlib
fig, ax = plt.subplots(ncols=2, nrows=1, figsize=(15, 4.18))
#fig, ax = plt.figure( figsize=(15, 4.18))
event = np.array(['Groupone','Grouptwo','Group3','Group4','Group5','Group6'])
begin = np.array([datetime(year=2019,month=12,day=1),datetime(year=2020,month=2,day=1),datetime(year=2020,month=5,day=1),datetime(year=2020,month=11,day=1),datetime(year=2019,month=12,day=1),datetime(year=2020,month=5,day=1)])
end = np.array([datetime(year=2019,month=12,day=30),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2020,month=7,day=3)])
beg_sort = np.sort(begin)
end_sort = end[np.argsort(begin)]
evt_sort = event[np.argsort(begin)]
#start_m = click.prompt('Start month', type=int)
#start_y = click.prompt('Start year', type=int)
#end_m = click.prompt('End month', type=int)
#end_y = click.prompt('End year', type=int)
start_m = 12
start_y = 2019
end_m = 3
end_y = 2021
months = mdates.MonthLocator() # Add tick every month
#days = mdates.DayLocator(range(1,32,5)) # Add tick every 5th day in a month
#monthFmt = mdates.DateFormatter('%b') # Use abbreviated month name
ax[1].xaxis.set_major_locator(months)
#ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%m-%Y'))
ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%b-%Y'))
ax[1].xaxis.set_tick_params(rotation=90)
#ax.xaxis.set_tick_params(rotation=30)
#ax[1].xaxis.set_major_formatter(monthFmt)
#ax[0].xaxis.set_minor_locator(days)
start = date(year=start_y,month=start_m,day=1)
print(start)
end = date(year=end_y,month=end_m,day=1)
print(end)
Nticks = 6
delta = (end-start)/Nticks
tick_dates = [start + i*delta for i in range(Nticks)]
x_ticks = ['{}/{}'.format(d.month,d.year) for d in tick_dates]
print(x_ticks)
plt.barh(range(len(beg_sort)), end_sort-beg_sort, left=beg_sort, align='center')
plt.yticks(range(len(beg_sort)), evt_sort)
plt.yticks(range(len(beg_sort)), evt_sort)
#plt.xticks = x_ticks
#plt.set_xticks(x_ticks)
#plt.show()
fig.delaxes(ax[0])
plt.savefig('gwern.pdf',bbox_inches='tight')

How to add percentage label on top of bar chart from a data frame with different sum total data groups

I am new in coding with python, I am trying to develop a bar chart with percentage on top. I have a sample data frame Quiz2. I developed code and gives only 1600% at first single bar. Kindly any one with help how can i do it correct?
#Approach 2
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
sns.set()
%matplotlib inline
Quiz2 = pd.DataFrame({'Kaha': ['16', '5'], 'Shiny': ['16', '10']})
data=Quiz2 .rename(index={0: "Male", 1: "Female"})
data=data.astype(float)
Q1p = data[['Kaha','Shiny']].plot(kind='bar', figsize=(5, 5), legend=True, fontsize=12)
Q1p.set_xlabel("Gender", fontsize=12)
Q1p.set_ylabel("Number of people", fontsize=12)
#Q1p.set_xticklabels(x_labels)
for p in Q1p.patches:
width = p.get_width()
height = p.get_height()
x, y = p.get_xy()
Q1p.annotate(f'{height:.0%}', (x + width/2, y + height*1.02), ha='center')
plt.show()
I want the percentage of Kaha (with 21 sum total) to appear as (76.2% for Male and 23.8% for Female) and that of shy (with 26 sum total) as (61.5% for Male and 38.5%for Female). Kindly requesting help
In approach 2, the reason you have only 1 value displaying is the plt.show()
should be outdented so it comes after the processing of the for loop. You are getting a value of 1600% because you are plotting the value as the height of the bar in the line beginning with Q1p.annotate(f'{height:.0%}' Instead of height this should be height/10*total or something to give you the percentage.
Here is a solution, but not sure if I am computing the percentages correctly:
Quiz2 = pd.DataFrame({'Kaha': ['16', '5'], 'Shiny': ['16', '10']})
data=Quiz2 .rename(index={0: "Male", 1: "Female"})
data=data.astype(float)
total = len(data)*10
Q1p = data[['Kaha','Shiny']].plot(kind='bar', figsize=(5, 5), legend=True, fontsize=12)
Q1p.set_xlabel("Gender", fontsize=12)
Q1p.set_ylabel("Number of people", fontsize=12)
#Q1p.set_xticklabels(x_labels)
for p in Q1p.patches:
width = p.get_width()
height = p.get_height()
x, y = p.get_xy()
Q1p.annotate(f'{height/total:.0%}', (x + width/2, y + height*1.02), ha='center')
plt.show()

Why do I get this error: not enough values to unpack (expected 5, got 4)?

Basically I want a graph with 3 subplots. The 2nd plot will be a candlestick chart and is the one I am currently working on now. However, I keep getting a "not enough values to unpack" error and I don't really know why???
start = dt.datetime(2016,1,1)
end = dt.datetime(2016,12,31)
fig = plt.figure()
ax1 = plt.subplot2grid((6,1), (0,0), rowspan = 1, colspan = 1)
ax2 = plt.subplot2grid((6,1), (1,0), rowspan = 4, colspan = 1)
ax3 = plt.subplot2grid((6,1), (5,0), rowspan = 1, colspan = 1)
df2 = pdr.data.DataReader('TSLA', 'yahoo', startdate, enddate)
df2.drop('Adj Close', axis = 1, inplace = True)
MA20 = df2['Close'].rolling(20).mean()
MA50 = df2['Close'].rolling(50).mean()
cols = ['Open', 'High', 'Low', 'Close', 'Volume']
df2 = df2[cols] #reordering columns to OHLC order
candlestick_ohlc(ax2, df2, width=0.4, colorup='#77d879', colordown='#db3f3f')
plt.show()
I get this error:
>---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-329-f2bd49065390> in <module>
18 df2 = df2[cols]
19
---> 20 candlestick_ohlc(ax2, df2, width=0.4, colorup='#77d879', colordown='#db3f3f')
21
22 ax1.plot(MA20, label = '20-day')
>c:\users\cecilia\appdata\local\programs\python\python35\lib\site-packages\mpl_finance.py in candlestick_ohlc(ax, quotes, width, colorup, colordown, alpha)
234 return _candlestick(ax, quotes, width=width, colorup=colorup,
235 colordown=colordown,
--> 236 alpha=alpha, ochl=False)
237
238
>c:\users\cecilia\appdata\local\programs\python\python35\lib\site-packages\mpl_finance.py in _candlestick(ax, quotes, width, colorup, colordown, alpha, ochl)
281 t, open, close, high, low = q[:5]
282 else:
--> 283 t, open, high, low, close = q[:5]
284
285 if close >= open:
>ValueError: not enough values to unpack (expected 5, got 4)
The chart I get also does not seem right. It's fully red, but for candlesticks that closed higher than it opened, I have set the color to be green but no green candles appear. I have arranged the columns in the 'OHLC' order to correspond with the function, so I don't know why it comes out like this too.
candlestick_ohlc needs to take the date as input. Also, dates need be converted to numbers. Hence the following would work.
import datetime as dt
import matplotlib.pyplot as plt
import pandas_datareader as pdr
from mpl_finance import candlestick_ohlc
import matplotlib.dates as mdates
start = dt.datetime(2016,1,1)
end = dt.datetime(2016,12,31)
fig, ax = plt.subplots()
df2 = pdr.data.DataReader('TSLA', 'yahoo', start, end)
df2.drop('Adj Close', axis = 1, inplace = True)
df2.reset_index(inplace=True)
df2["Date"] = mdates.date2num(df2["Date"].values)
print(df2.head())
cols = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']
df2 = df2[cols] #reordering columns to OHLC order
candlestick_ohlc(ax, df2.values, width=0.4, colorup='#77d879', colordown='#db3f3f')
ax.xaxis_date()
plt.show()

python bokeh interactively plot n curves between min and max

I am trying to generate the plot of a function of two parameters, where one is used as x_axis and for the other I plot n curves, varying the parameter between a min and max value.
The following code works:
import numpy as np
import bokeh
from bokeh.plotting import figure
from bokeh.io import push_notebook, show, output_notebook
output_notebook()
x = np.linspace(0,10,100)
f = figure()
fmin=1
fmax=3
nfreq=4
freq=np.linspace(fmin,fmax,nfreq)
for i in freq:
y = np.sin(i*x)
f.line(x,y)
show(f)
Now I would like to have 3 sliders to interactively vary fmin, fmax and nfreq. I could not figure out how to do it...
Thanks for any help
This example works for Bokeh v1.0.4. Run as: bokeh serve --show app.py
The content of app.py:
import numpy as np
from bokeh.models import Slider, Row, Column
from bokeh.plotting import figure, show, curdoc
from bokeh.models.sources import ColumnDataSource
plot = figure()
layout = Column(plot)
sources, lines = {}, {}
def get_x(n): return [np.linspace(0, 10, 100) for i in range(n)]
def get_y(n): return [np.sin(i * np.linspace(0, 10, 100)) for i in n]
def update(attr, old, new):
update_sources(layout.children[-3].value, layout.children[-2].value, layout.children[-1].value)
def update_sources(fmin, fmax, nfreq):
freq = np.linspace(fmin, fmax, nfreq)
for f, x, y in zip(freq, get_x(len(freq)), get_y(freq)):
data = {'x': x, 'y': y}
if f not in sources:
sources[f] = ColumnDataSource(data)
line = plot.line('x', 'y', source = sources[f])
lines[f] = line
else:
sources[f].data = data
for line in lines:
lines[line].visible = (False if line not in freq else True)
for txt, max in zip(['fmin', 'fmax', 'nfreq'], [3, 4, 5]):
slider = Slider(start = 1, end = max, value = 1, title = txt)
slider.on_change('value', update)
layout.children.append(slider)
update_sources(layout.children[-3].value, layout.children[-2].value, layout.children[-1].value)
[plot.line('x', 'y', source = sources[idx]) for idx in sources]
curdoc().add_root(layout)

Resources