Difficults with groupby() and both Bokeh figure() - pandas-groupby

I don't know exactly where but I believe that I haven't assign the data to the figure, any help please to drag these data from the limbo back to the figure?
from bokeh.plotting import figure, output_notebook, show
import numpy as np
import pandas as pd
from bokeh.models import ColumnDataSource
from bokeh.colors import RGB
from bokeh.sampledata.autompg2 import autompg2 as data
data = pd.read_csv('auto-mpg2.csv', thousands=',', index_col='class')
output_notebook()
print(data.dtypes)
dataHeads = data.head()
group = data[100:150].groupby('class')
group1 = data[100:150].groupby('cty')
source = ColumnDataSource(group, group1)
#dataHeads
#yA = dataU.loc['a4']
#dataU.loc['malibu'].cty
p = figure(plot_height=500, plot_width=1200, x_range=group, title='City
MPG vs Class MPG',
x_axis_label='Class', y_axis_label='MPG')
p.vbar(x=index, top=index, width=0.9, color ='#35B778', source=source)
show(p)

Do you mean something like this?
from bokeh.plotting import figure, output_notebook, show
import numpy as np
import pandas as pd
from bokeh.models import ColumnDataSource
from bokeh.colors import RGB
from bokeh.sampledata.autompg2 import autompg2 as data
df = pd.DataFrame(data)
source = ColumnDataSource(df)
group = data[100:150].groupby('class')
p = figure(plot_height = 500, plot_width = 1200, x_range = group, title = 'City MPG vs Class MPG',
x_axis_label = 'Class', y_axis_label = 'MPG')
p.vbar(x = 'class', top = 'cty', width = 0.9, color = '#35B778', source = source)
show(p)

Related

formating datetime in bokeh hover tool

I am trying to have a properly formated date in bokeh when using hovertool and ploting data from a pandas dataframe. I think I tried all solution I have seen proposed around but none is working. With the example code below, I obtain this:
import pandas as pd
import datetime
from datetime import datetime
from bokeh.plotting import figure, show, gridplot
from bokeh.io import output_notebook, push_notebook
from bokeh.models.formatters import DatetimeTickFormatter
from bokeh.models import ColumnDataSource
from bokeh.models.tools import HoverTool
from bokeh.models import LinearAxis, Range1d
output_notebook()
data = [["2022-08-05", 25,545,1],["2022-08-06",33,543,0],["2022-08-07",17,542,1]]
df_test = pd.DataFrame(data, columns=['Date','sum','size','present'])
df4 = df_test.set_index('Date')
df4.index = pd.to_datetime(df4.index)
Source = ColumnDataSource(data={'date' : df4.index,
'percent' : 100*df4['sum']/df4['size']})
hover = HoverTool(tooltips = [('Date', '#date'),('Percent', '#percent')],
formatters = {'#date' : 'datetime'}, mode='vline')
f2 = figure(plot_height=400, plot_width=800,x_axis_label="Date",y_axis_label="Pourcentage de grises",\
x_axis_type="datetime")
l1 = f2.circle(x = 'date',y = "percent", source=Source, line_width=4,color="sandybrown",legend_label='% gris',
)
f2.legend.location = "top_right"
f2.legend.click_policy="hide"
f2.add_tools(hover)
show(f2)
Percentage(%) is a format, therefore we don't multiply 100,
#date{%F} is equivalent to #date{%Y-%m-%d}
Source = ColumnDataSource(data={'date' : df4.index,
'percent' : df4['sum']/df4['size']})
hover = HoverTool(tooltips = [('Date', '#date{%F}'),('Percent', '#percent{%0.2f}')],
formatters = {'#date' : 'datetime'}, mode='vline')
from bokeh.models.formatters import NumeralTickFormatter
f2.yaxis.formatter = NumeralTickFormatter(format='0.00 %')

Bokeh: Unable to plot legend

I am not sure how to add legend to my plots based on updates to the Bokeh Library. Here is my code -
import numpy as np
import pandas as pd
url = 'https://raw.githubusercontent.com/Deepakgthomas/Lemonade_Sales/main/Lemonade_Lab8.csv'
lemon = pd.read_csv(url)
from bokeh.models import ColumnDataSource
source_Q4 = ColumnDataSource(lemon)
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
output_notebook()
p = figure(title = "Lemon and Orange Sales by Temperature")
p.circle("Temperature", "Lemon", source = source_Q4, color = "green", size = 8, legend = dict(value = "Lemon"))
p.triangle("Temperature", "Lemon", source = source_Q4, color = "orange", size = 8, legend = dict(value = "Orange"))
p.legend.location = "top_left"
show(p)
However, this gives me the warning -
"BokehDeprecationWarning: 'legend' keyword is deprecated, use explicit 'legend_label', 'legend_field', or 'legend_group' keywords instead
BokehDeprecationWarning: 'legend' keyword is deprecated, use explicit 'legend_label', 'legend_field', or 'legend_group' keywords instead"
As the Warning states, use legend_label instead of legend. For more information, check the user guide.
import numpy as np
import pandas as pd
url = 'https://raw.githubusercontent.com/Deepakgthomas/Lemonade_Sales/main/Lemonade_Lab8.csv'
lemon = pd.read_csv(url)
from bokeh.models import ColumnDataSource
source_Q4 = ColumnDataSource(lemon)
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
output_notebook()
p = figure(title = "Lemon and Orange Sales by Temperature")
p.circle("Temperature", "Lemon", source = source_Q4, color = "green", size = 8, legend_label = "Lemon")
p.triangle("Temperature", "Orange", source = source_Q4, color = "orange", size = 8, legend_label = "Orange")
p.legend.location = "top_left"
show(p)

How can the `_property_values` of an element of a bokeh `figure.renderers` be changed directly?

How can the _property_values of an element of a bokeh figure.renderers be changed directly? I learned that the lements of renderers have an id, so I expect to do something like renderers['12345']. But as it is a list (a PropertyValueList to be more precise), this doesn't work. Instead, the only solution I found is to iterate over the list, storing the correct element in a new pointer (?), modifying the pointer and thus modifying the original element.
Here is my toy example where a vertical line in a histogram is updated based on some widget's value:
import hvplot.pandas
import ipywidgets as widgets
import numpy as np
from bokeh.io import push_notebook, show, output_notebook
from bokeh.models import Span
from bokeh.plotting import figure
%matplotlib inline
hist, edges = np.histogram([1, 2, 2])
p = figure()
r = p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:])
vline = Span(location=0, dimension='height')
p.renderers.extend([vline])
def update_hist(x):
myspan = [x for x in p.renderers if x.id==vline.id][0]
myspan._property_values['location'] = x
show(p, notebook_handle=True)
widgets.interact(update_hist, x = widgets.FloatSlider(min=1, max=2))
Bigreddot pointed me into the right direction: I don't have to update p directly, but the elements used to generate p (here the Span). By this I found the this question where the code bears the solution: update vline.location.
Full code:
import hvplot.pandas
import ipywidgets as widgets
import numpy as np
from bokeh.io import push_notebook, show, output_notebook
from bokeh.models import Span
from bokeh.plotting import figure
%matplotlib inline
hist, edges = np.histogram([1, 2, 2])
p = figure()
r = p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:])
vline = Span(location=0, dimension='height')
p.renderers.extend([vline])
show(p, notebook_handle=True)
def update_hist(x):
vline.location = x
push_notebook()
widgets.interact(update_hist, x = widgets.FloatSlider(min=1, max=2, step = 0.01))
As a Python beginner, I still often oversee, that Python does not have variables. So we can change an element x by changing y.
x = ['alice']
y = x
y[0] = 'bob'
x # is now ['bob] too

Trouble loading csv data into bokeh

I am having trouble loading data from a csv file into bokeh, from bokeh database it works, though when I try from a csv file it does not load, so I had been reading but no luck so far.
Thanks in advance
df = pd.read_csv('unemployment1948.csv', delimiter = ',', index_col =
'Year')
df = pd.DataFrame(df)
df.head()
output_notebook()
group = df[35:].groupby('Year')
source = ColumnDataSource(df)
group.describe()
df.columns
#source = ColumnDataSource(df(x=df.loc[15:40].index,
# y=df.loc[15:40].Annual))
p = figure(plot_height=300, plot_width=900, x_range=group,
title='Umployment over the years',
x_axis_label='Year', y_axis_label='Annual')
p.circle(x=index, y='Annual', width=0.9, color ='#35B778' , source=source)
show(p)
Your df is already a pandas DataFrame. Try this:
import os
import pandas as pd
from bokeh.io import show
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure
df = pd.read_csv(os.path.join(os.path.dirname(__file__), "unemployment1948.csv",))
output_notebook()
source = ColumnDataSource(df)
p = figure(plot_height = 300, plot_width = 900,
title = 'Umployment over the years',
x_axis_label = 'Year', y_axis_label = 'Annual')
p.circle(x = 'Year', y = 'Annual', line_width = 0.9, color = '#35B778' , source = source)
show(p)
I learned how to deal with DataFrame and ColumnDateSource, so you can manipulate the data easily and it does not need to use the OS module.
Thanks for the help.
import csv
#import re
import pandas as pd
import numpy as np
#import random
##from collections import Counter, defaultdict
#import random
dfU = pd.read_csv('unemployment1948.csv')
dfU = pd.DataFrame(dfU)
dfU.index
y = dfU.iloc[35:].Year
x = dfU.iloc[35:].Annual
#xRange = dfU.iloc[35:]
source = ColumnDataSource(dfU)
output_notebook()
p = figure(plot_height=300, plot_width=800, title='Unemployment over the years')
p.vbar(x='Year', top='Annual', width=0.9, color ='#35B778' , source=source)
show(p)

Why won't bokeh figure update with new data?

I'm creating a bokeh application that pulls data from Quandl stock prices and changes the plot based the stock symbol the user inputs. I used an example from this bokeh tuorial as a model.
Everything is working except the plot won't update when I input a new symbol.
I've tried passing the new data as a dictionary (before I was just passing a DataFrame to ColumnDataSource(), but no luck.
import pandas as pd
import numpy as np
from bokeh.models.widgets import TextInput, Select
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure
from bokeh.layouts import column, row
from bokeh.io import show, output_notebook
import quandl
This is the function to get the data:
def get_data(symbol):
dictionary = {}
data = quandl.get('WIKI/' + symbol, collapse = 'annual', returns='numpy')
df = pd.DataFrame(data)
dictionary['date'] = list(df.Date.values)
dictionary['high'] = list(df.High.values)
return dictionary
And this is a function for the plot:
def modify_doc(doc):
symbol = 'AAWW'
source = ColumnDataSource(data = get_data(symbol))
p = figure(x_axis_type='datetime', title='Stock Price', plot_height=350, plot_width=800)
p.xgrid.grid_line_color=None
p.ygrid.grid_line_alpha=0.5
p.xaxis.axis_label = 'year'
p.yaxis.axis_label = 'close'
r = p.line(source.data['date'],
source.data['high'],
line_color = 'navy')
select = Select(title="Color", value="navy", options=COLORS)
input = TextInput(title="Ticker Symbol", value=symbol)
def update_symbol(attrname, old, new):
source.data = get_data(input.value)
input.on_change('value', update_symbol)
layout = column(row(input, width=400), row(p))
doc.add_root(layout)
show(modify_doc)
I would think that the plot would update when the new symbol is entered, but it just stays the same.
Any thoughts?
Your code looks like Bokeh server application but you use show() what doesn't look good to me. You are also trying to update the figure by assigning new data to the source but you did not pass your source to the figure object so it won't have any effect. Could you try if this code works for you? (should work for Bokeh v1.0.4)
import random
import pandas as pd
from tornado.ioloop import IOLoop
from bokeh.server.server import Server
from bokeh.application import Application
from bokeh.application.handlers.function import FunctionHandler
from bokeh.plotting import figure, ColumnDataSource
from bokeh.models.widgets import TextInput
from bokeh.layouts import column, row
def make_document(doc):
symbol = 'AAWW'
def get_data(symbol):
dictionary = {}
data = quandl.get('WIKI/' + symbol, collapse = 'annual', returns = 'numpy')
df = pd.DataFrame(data)
dictionary['date'] = list(df.Date.values)
dictionary['high'] = list(df.High.values)
return dictionary
source = ColumnDataSource(data = get_data(symbol))
p = figure(x_axis_type = 'datetime', title = 'Stock Price', plot_height = 350, plot_width = 800)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_alpha = 0.5
p.xaxis.axis_label = 'year'
p.yaxis.axis_label = 'close'
r = p.line(x = 'date',
y = 'high',
source = source,
line_color = 'navy')
input = TextInput(title = "Ticker Symbol", value = symbol)
def update_symbol(attrname, old, new):
source.data = get_data(input.value)
input.on_change('value', update_symbol)
layout = column(row(input, width = 400), row(p))
doc.add_root(layout)
io_loop = IOLoop.current()
server = Server({'/myapp': Application(FunctionHandler(make_document))}, port = 5001, io_loop = io_loop)
server.start()
server.show('/myapp')
io_loop.start()
Basically the main change is here:
r = p.line(x = 'date',
y = 'high',
source = source,
line_color = 'navy')
Based on the answer I got from Tony, I just had to change one line of code:
r = p.line(x = 'date',
y = 'high',
source = source,
line_color = 'navy')

Resources