[![chart showing numbers without correct formatting][1]][1]
I need to format the label on these bars, so that they are rounded to nearest whole number. I have the following code:
def chart_tender_response_times(dataframe=None):
chart = (
alt.Chart(dataframe, title="Median time to respond to a tender")
.mark_bar()
.encode(
alt.X("year(date):O"
),
alt.Y("mean(median_duration):Q",
## This is our units section, only describe the units of measurement here.
axis=alt.Axis(title="Unit: days.")
),
alt.Tooltip(["mean(median_duration):Q"], format=",.2r", title="Days to respond to a tender")
)
)
text = (
chart.mark_text(align="center", baseline="bottom")
.encode(text='mean(median_duration):Q')
)
return chart+text
I've tried variations of the following...
text = (
chart.mark_text(align="center", baseline="bottom")
.encode(text='mean(median_duration):Q', format='.,2r')
)
but this returns the following schema validation error:
SchemaValidationError: Invalid specification
altair.vegalite.v3.api.Chart, validating 'required'
'data' is a required property
My hunch is that I have to somehow call and format the value, before adding it to the chart, but I can't see how to do this from either the documentation or the examples.
You need to wrap the format in alt.Text, as in encode(text=alt.Text('mean(median_duration):Q', format=',.2r'))
Also, I think format=',.0f' is more robust to round to the nearest integer (e.g. if you have 256.4, it would be rounded to 256, whereas with format=',.2r' you'd get 260)
Below is an example with a function a bit modified to fit another dataset (as you did not provide one):
import altair as alt
from vega_datasets import data
cars = data("cars")
def chart_tender_response_times(dataframe=None):
chart = (
alt.Chart(dataframe, title="Median time to respond to a tender")
.mark_bar()
.encode(
alt.X("year(Year):O"),
alt.Y(
"mean(Displacement):Q",
## This is our units section, only describe the units of measurement here.
axis=alt.Axis(title="Unit: days."),
),
alt.Tooltip(
["mean(Displacement):Q"],
format=",.0f",
title="Days to respond to a tender",
),
)
)
text = chart.mark_text(align="center", baseline="bottom").encode(
text=alt.Text("mean(Displacement):Q", format=",.0f")
)
return chart + text
chart_tender_response_times(cars)
Related
I'm trying to make the final segment of a line plot dashed to indicate incomplete data. From what I can tell I should be able to do this using a condition on strokeDash. However I can't figure out how to get the condition predicate to work using a datetime field.
alt.Chart(rates)
.mark_line(point=True)
.encode(
x=alt.X("start_date:T", scale=alt.Scale(nice="week")),
y="install_rate",
strokeDash=alt.condition(
f"datum.start_date > toDate({start_dates[-2].isoformat()})",
alt.value([5, 5]), # dashed line: 5 pixels dash + 5 pixels space
alt.value([0]), # solid line
)
)
This gives me an error:
Error: Illegal callee type: MemberExpression
You can fix the error you are encountering by making sure that pandas reads in the dates as a temporal data type:
import pandas as pd
import altair as alt
rates = pd.DataFrame({
'start_date': pd.to_datetime(['2022-05-06', '2022-05-13', '2022-05-19', '2022-05-25']),
'install_rate': [0.05, 0.06, 0.08, 0.09],
})
alt.Chart(rates).mark_line(point=True).encode(
x=alt.X("start_date:T"),
y="install_rate",
color=alt.condition(
f"datum.start_date > toDate('2022-05-19')",
alt.value('blue'),
alt.value('red')
)
)
However, as you can see the lines is not amenable to modifications via a condition. I think this is because it is considered a single continuous mark whereas the points are split up and can be changed individually.
You could group the line by creating a new separate field and grouping by it, which creates two separate lines.
rates['above_threshold'] = rates['start_date'] > '2022-05-13'
alt.Chart(rates).mark_line(point=True).encode(
x=alt.X("start_date:T"),
y="install_rate",
color='above_threshold')
However, that causes issues with the gap as you can see above. I think for your case the easiest might be to layer two charts with filter transforms:
base = alt.Chart(rates).encode(
x=alt.X("start_date:T"),
y="install_rate",
)
base.mark_line(strokeDash=[5, 5]).transform_filter(
f"datum.start_date > toDate('2022-05-19')"
) + base.mark_line().transform_filter(
f"datum.start_date < toDate('2022-05-20')"
)
I have the following graph in Altair:
The code used to generate it is as follows:
data = pd.read_csv(data_csv)
display(data)
display(set(data['algo_score_raw']))
# First generate base graph
base = alt.Chart(data).mark_circle(opacity=1, stroke='#4c78a8').encode(
x=alt.X('Paragraph:N', axis=None),
y=alt.Y('Section:N', sort=list(OrderedDict.fromkeys(data['Section']))),
size=alt.Size('algo_score_raw:Q', title="Number of Matches"),
).properties(
width=900,
height=500
)
# Next generate the overlying graph with the lines
lines = alt.Chart(data).mark_rule(stroke='#4c78a8').encode(
x=alt.X('Paragraph:N', axis=alt.Axis(labelAngle=0)),
y=alt.Y('Section:N', sort=list(OrderedDict.fromkeys(data['Section'])))
).properties(
width=900,
height=500
)
if max(data['algo_score_raw']) == 0:
return lines # no circles if no matches
else:
return base + lines
However, I don't want the decimal values in my legend; I only want 1.0, 2.0, and 3.0, because those are the only values that are actually present in my data. However, Altair seems to default to what you see above.
The legend is generated based on how you specify your encoding. It sounds like your data are better represented as ordered categories than as a continuous quantitative scale. You can specify this by changing the encoding type to ordinal:
size=alt.Size('algo_score_raw:O')
You can read more about encoding types at https://altair-viz.github.io/user_guide/encoding.html
You can use alt.Legend(tickCount=2)) (labelExpr could also be helpful, see the docs for more):
import altair as alt
from vega_datasets import data
source = data.cars()
source['Acceleration'] = source['Acceleration'] / 10
chart = alt.Chart(source).mark_circle(size=60).encode(
x='Horsepower',
y='Miles_per_Gallon',
size='Acceleration',
)
chart
chart.encode(size=alt.Size('Acceleration', legend=alt.Legend(tickCount=2)))
Using Altair, I'm trying to plot some data from a Dataframe:
plot_N50 = alt.Chart(data).mark_boxplot(opacity=0.5).encode(
y=alt.Y('N50', scale=alt.Scale(domain=[0, 35000], clamp=True), axis=alt.Axis(tickCount=9)),
color=alt.Color('Assembler', scale=alt.Scale(scheme='turbo'), legend=None),
column=alt.Column('Assembler:N',
title="",
header=alt.Header(labelAngle=-45, labelOrient='bottom', labelPadding=-5)
),
row=alt.Row('Amplicon:N',
title="",
sort='descending',
),
).configure_axis(
grid=False,
labelFontSize=12,
titleFontSize=12
).configure_view(
stroke=None
).properties(
height=height, width=width
)
works fine so far producing the following plot:
N50 plot
would it also be possible to enter a second column (actually the one in the row definition: Amplicon) to get the two plots side by side? In the DF the column Amplicon only has two states. I'm happy to provide any further information.
Thanks in advance
I am trying to create a plot composed of 2 charts stacked vertically: a time series chart showing a data and below it a time series chart showing texts representing events on the time axis. I want the data-chart having a grid, but the mark_text chart below not to show an outer line and no grid. I use the chart.configure_axis(grid=False) command to hide the axis but get the following error: Objects with "config" attribute cannot be used within LayerChart. Consider defining the config attribute in the LayerChart object instead.
I can't figure out, where to apply the configure_axis(grid=False) option, so it will only apply to the bottom plot. any help on this would be greatly appreciated. or any suggestion how to implement the label-plot in a different way.
here is my code:
import altair as alt
import pandas as pd
import locale
from altair_saver import save
from datetime import datetime
file = '.\lagebericht.csv'
df = pd.read_csv(file, sep=';')
source = df
locale.setlocale(locale.LC_ALL, "de_CH")
min_date = '2020-02-29'
domain_pd = pd.to_datetime([min_date, '2020-12-1']).astype(int) / 10 ** 6
base = alt.Chart(source, title='Neumeldungen BS').encode(
alt.X('test_datum:T', axis=alt.Axis(title="",format="%b %y"), scale = alt.Scale(domain=list(domain_pd) ))
)
bar = base.mark_bar(width = 1).encode(
alt.Y('faelle_bs:Q', axis=alt.Axis(title="Anzahl Fälle"), scale = alt.Scale(domain=(0, 120)))
)
line = base.mark_line(color='blue').encode(
y='faelle_Total:Q')
chart1 = (bar + line).properties(width=600)
events= pd.DataFrame({
'datum': [datetime(2020,7,1), datetime(2020,5,15)],
'const': [1,1],
'label': ['allgememeiner Lockdown', 'Gruppen > 50 verboten'],
})
base = alt.Chart(events).encode(
alt.X('datum:T', axis=alt.Axis(title="", format="%b %y"), scale = alt.Scale(domain=list(domain_pd) ))
)
points = base.mark_rule(color='blue').encode(
y=alt.Y('const:Q', axis=alt.Axis(title="",ticks=False, domain=False, labels=False), scale = alt.Scale(domain=(0, 10)))
)
text = base.mark_text(
align='right',
baseline='bottom',
angle = 20,
dx=0, # Nudges text to right so it doesn't appear on top of the bar
dy=20,
).encode(text='label:O').configure_axis(grid=False)
chart2 = (points + text).properties(width=600, height = 50)
save(chart1 & chart2, r"images\figs.html")
this is what it looks without the grid=False option:
enter image description here
The configure() method should be thought of as a way to specify a global chart theme; you cannot have different configurations within a single Chart (See https://altair-viz.github.io/user_guide/customization.html#global-config-vs-local-config-vs-encoding for a discussion of this).
The way to do what you want is not via global configuration, but via axis settings. For example, you can pass grid=False to alt.Axis:
points = alt.Chart(events).mark_rule(color='blue').encode(
x=alt.X('datum:T', axis=alt.Axis(title="", format="%b %y"), scale = alt.Scale(domain=list(domain_pd) )),
y=alt.Y('const:Q', axis=alt.Axis(title="",ticks=False, domain=False, labels=False), scale = alt.Scale(domain=(0, 10)))
)
text = alt.Chart(events).mark_text().encode(
x=alt.X('datum:T', axis=alt.Axis(title="", grid=False, format="%b %y"), scale = alt.Scale(domain=list(domain_pd) )),
text='label:O'
)
Here are three issues I have with tooltips and labels that I want to display on my Altair graph. All the issues are more or less linked.
First, I would like to modify the name of the information I display with the tooltip:
Year instead of properties.annee
Region instead of properties.region
Bioenergy instead of properties.bioenerie...
Second, I would like to round the values displayed in the tooltip.
"11.2" instead of "11.1687087653"
The code I wrote does what I want for the labels I put in the regions but it is not working for the tooltip.
Third, I would like to display the unit in the labels and in the tooltip but I don't find the correct syntax in the documentation.
Below is my code.
Thanks in advance for yous answers.
Bertrand
Current result of my code
def gen_map(data: gpd.geodataframe.GeoDataFrame, title: str, abs_values: bool):
data_json = json.loads(data.to_json())
choro_data = alt.Data(values=data_json['features'])
# Absolute values or relative values
if abs_values:
column = data.columns[0]
units = 'MW'
form = '.0f'
else:
column = data.columns[1]
units = '%'
form = '.1f'
# Base layer
layer = alt.Chart(choro_data, title=title).mark_geoshape(
stroke='white',
strokeWidth=1
).encode(
alt.Color(f'properties.{column}:Q',
type='quantitative',
title = f'Installed Capacity in {units}'),
tooltip=[f'properties.annee:Q',
f'properties.region:O',
f'properties.{column}:Q',
alt.Text(f'properties.{column}:Q', format=form)]
).transform_lookup(
lookup='region',
from_=alt.LookupData(choro_data, 'region')
).properties(
width=600,
height=500
)
# Label layer
labels = alt.Chart(choro_data).mark_text(baseline='top'
).properties(
width=600,
height=500
).encode(
longitude='properties.centroid_lon:Q',
latitude='properties.centroid_lat:Q',
text=alt.Text(f'properties.{column}:Q', format=form),
size=alt.value(14),
opacity=alt.value(1)
)
return layer + labels
gen_map(bioenergies_2019, 'Bioenergy in France in 2019', False)
Instead of a list of strings, use a list of alt.Tooltip objects:
tooltip=[alt.Tooltip('properties.annee:Q', title='Annee'),
alt.Tooltip('properties.region:O', title='Region'),
alt.Tooltip(f'properties.{column}:Q', title=f'{column}')]
You can additionally pass the format argument to specify the format of the value; for number formats, use d3-format codes; for date/time formats use d3-date-format codes.