How to overlay a line for an lm object on a ggplot2 scatterplot - graphics

I have some data,
calvarbyruno.1<-structure(list(Nominal = c(1, 3, 6, 10, 30, 50, 150, 250), Run = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1", "2", "3"), class = "factor"),
PAR = c(1.25000000000000e-05, 0.000960333333333333, 0.00205833333333334,
0.00423333333333333, 0.0322333333333334, 0.614433333333334,
1.24333333333333, 1.86333333333333), PredLin = c(-0.0119152187070942,
0.00375925114245899, 0.0272709559167888, 0.0586198956158952,
0.215364594111427, 0.372109292606959, 1.15583278508462, 1.93955627756228
), PredQuad = c(-0.0615895732702735, -0.0501563307416599,
-0.0330831368244257, -0.0104619953693943, 0.100190275883806,
0.20675348710041, 0.6782336426345, 1.04748729725370)), .Names = c("Nominal",
"Run", "PAR", "PredLin", "PredQuad"), row.names = c(NA, 8L), class = "data.frame")
calweight <- -2
for which I've created both a linear and a quadratic lm model
callin.1<-lm(PAR~Nominal,data=calvarbyruno.1,weight=Nominal^calweight)
calquad.1<-lm(PAR~Nominal+I(Nominal^2),data=calvarbyruno.1,weight=Nominal^calweight)
I can then plot my data values using ggplot2
qplot(PAR,Nominal,data=calvarbyruno.1)
But can't work out how to overlay a line representing the two lm objects... Any ideas ?

The easiest option is to use geom_smooth() and let ggplot2 fit the model for you.
ggplot(calvarbyruno.1, aes(y = PAR, x = Nominal, weight=Nominal^calweight)) +
geom_smooth(method = "lm") +
geom_smooth(method = "lm", formula = y ~ poly(x, 2), colour = "red") +
geom_point() +
coord_flip()
Or you can create a new dataset with the predicted values.
newdata <- data.frame(Nominal = pretty(calvarbyruno.1$Nominal, 100))
newdata$Linear <- predict(callin.1, newdata = newdata)
newdata$Quadratic <- predict(calquad.1, newdata = newdata)
require(reshape2)
newdata <- melt(newdata, id.vars = "Nominal", variable.name = "Model")
ggplot(calvarbyruno.1, aes(x = PAR, y = Nominal, weight=Nominal^calweight)) +
geom_line(data = newdata, aes(x = value, colour = Model)) +
geom_point()

Earlier I asked a related question and Hadley had this good answer. Using the predict function from that post you can add two columns to your data. One for each model:
calvarbyruno.1$calQuad <- predict(calquad.1)
calvarbyruno.1$callin <- predict(callin.1)
Then it's a matter of plotting the point and adding each model in as a line:
ggplot() +
geom_point(data=calvarbyruno.1, aes(PAR, Nominal), colour="green") +
geom_line(data=calvarbyruno.1, aes(calQuad, Nominal), colour="red" ) +
geom_line(data=calvarbyruno.1, aes(callin, Nominal), colour="blue" ) +
opts(aspect.ratio = 1)
And that results in this nice picture (yeah the colors could use some work):
(source: cerebralmastication.com)

Related

Statistics with one value per sample

I would like to run a statistic test across samples, but each sample has one measurement only.
My data frame is the following:
structure(list(Value = c(1.04, 1.48, 0.3, 0.5, 0.66, 0.99, 0.65,
0.62), Samples = c("S1", "S2", "S3", "S4", "S5", "S6", "S7",
"S8"), Concentration = c(20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L
)), class = "data.frame", row.names = c(NA, -8L))
Here are the codes I ran:
library(ggplot2)
library(dplyr)
library(combinat)
Data = read.csv("Stackoverflow_2023-01-29.csv", header = TRUE
p = ggbarplot
(Data, x = "Samples", y = "Value",color = "Samples", fill = "Samples")
p
new_list <- list()
new_list
x<- unique(Data$Samples)
x
m<- combn(x, 2)
m
for(i in 1:ncol(m)){
new_list[[i]] <- m[,i]
}
new_list
my_comparison <- new_list
my_comparison
p1<- p+ stat_compare_means(comparisons = my_comparison)
p1` [enter image description here][1]
The plot that I obtained is attached ad jpg
enter image description here
I would appreciate any help with the statistics. Many thanks.

Something is wrong; all the ROC metric values are missing (bartMachine R)

when fitting a BART model via bartMachine package I get the title error.
Here is the code and dput:
#Response must be a factor
train$occ <- as.factor(train$occ)
levels(train$occ) <- c("C0", "C1")
#Creating a tuning grid
tune_grid <- expand.grid(num_trees = c(50, 100, 200),
k = c(2, 3),
alpha = 0.95,
beta = 2,
nu = 3)
#Using caret for cross-validation
train_control <- trainControl(method = "cv",
number = 10,
classProbs = TRUE,
summaryFunction = twoClassSummary,
allowParallel = TRUE)
#Starting cluster
cluster <- makeCluster(8)
registerDoParallel(cluster)
#Running the BRT model
#Model training here
bart_train <- caret::train(x = train[, 5:9],
y = train$occ,
method = "bartMachine",
metric = "ROC",
trControl = train_control,
tuneGrid = tune_grid)
#Ending cluster
stopCluster(cluster)
registerDoSEQ()
Now the dput from train data.frame
structure(list(x = c(-49.2180036048647, -49.588646107472, -49.4994660957961,
-49.409070720487, -49.5901102247847, -49.408915914575), y = c(-28.8051270000448,
-28.7079195821462, -28.7107590910968, -28.7091665158844, -28.6199868803577,
-28.6218794939721), ua = c("ua_35", "ua_39", "ua_40", "ua_41",
"ua_47", "ua_49"), occ = structure(c(1L, 1L, 1L, 1L, 1L, 1L), levels = c("C0",
"C1"), class = "factor"), PC1 = c(5.45050585867435, 0.971417276490495,
3.04696429464962, 3.49333347532713, -2.1314970593002, 1.60231066244416
), PC2 = c(1.62129971834298, -2.67253316161164, 0.803381999088846,
1.24449786054891, -6.62041787415885, -1.15464748692714), PC3 = c(0.677239125000311,
-0.800550473360275, -0.666475036424968, -0.386198549623231, -0.197769597835757,
-0.445143448591713), PC4 = c(-4.01008804477917, 1.84301040539535,
0.967638087266157, 0.0781875925367184, 0.994999464193385, 1.58963654917174
), PC5 = c(0.785837462504693, 0.0665561890147296, -0.72701888727977,
-0.739597425424334, 0.712329685720884, -0.351229249996707), block = c(1L,
1L, 1L, 1L, 1L, 1L)), row.names = c(10L, 12L, 13L, 14L, 15L,
16L), class = "data.frame")
There are no NAs in my data, I double-checked and also ran previous models with the same data. The issue is only with method = ''bartMachine''

How do I add vertical moving hover line to my plotly chart

I am trying to achieve what is done here: https://www.quantalys.com/Fonds/120955 with javascript in python plotly. I want to add the hover vertical line and the red annotation on the x axis. I have done some searching on goolgle but I couldn't find the the answer I'm looking for. My current chart looks like this:
trace1 = go.Scatter(
x = df1.x,
y = df1.y,
name = "M&G OPTIMAL INCOME FD EUR AH ACC",
hoverinfo= 'name',
opacity=0.7,
mode = 'lines',
line = dict(
color = ('rgb(2, 12, 245)'),
width = 1,
),
)
trace2 = go.Scatter(
x = df2.x,
y = df2.y,
opacity=0.7,
name = "Alloc Flexible Prudent Monde",
hoverinfo= 'name',
mode = 'lines',
line = dict(
color = ('rgb(67, 45, 24)'),
width = 1,
)
)
trace3 = go.Scatter(
x = df3.x,
y = df3.y,
name = "25% MSCI World + 75% ML Global",
hoverinfo= 'name',
mode = 'lines',
opacity=0.7,
line = dict(
color = ('rgb(205, 12, 24)'),
width = 1,
)
)
layout = go.Layout(
xaxis=dict(
showline=True,
showgrid=True,
showticklabels=True,
linecolor='rgb(204, 204, 204)',
linewidth=2,
mirror=True,
),
yaxis=dict(
showline=True,
showgrid=True,
showticklabels=True,
linecolor='rgb(204, 204, 204)',
linewidth=2,
mirror=True,
),
showlegend=True,
)
data= [trace1, trace2,trace3]
fig = dict(data=data, layout=layout)
iplot(fig, filename='line-mode')
Add this to your layout definition.
showlegend = True,
hovermode = 'x'
Add this to your xaxis definition.
showspikes = True,
spikemode = 'across',
spikesnap = 'cursor',
showline=True,
showgrid=True,
...
And add this to your layout definition:
spikedistance = -1,
xaxis=dict(...
Please refer to this post and the documentation by plotly. :)
EDIT
You ask for the x-axis lable. Please use
spikemode = 'across+toaxis'
Additionally I would suggest to use
spikedash = 'solid'
because it is better fitting your example.

How can I use the plotly dropdown menu feature to update the z value in my choropleth map?

I just want to create a menu on the plot where I'm able to change the z-value in data only. I tried looking at other examples on here: https://plot.ly/python/dropdowns/#restyle-dropdown but it was hard since the examples were not exactly similar to my plot.
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
data = [go.Choropleth(
locations = df['CODE'],
z = df['GDP (BILLIONS)'],
text = df['COUNTRY'],
colorscale = [
[0, "rgb(5, 10, 172)"],
[0.35, "rgb(40, 60, 190)"],
[0.5, "rgb(70, 100, 245)"],
[0.6, "rgb(90, 120, 245)"],
[0.7, "rgb(106, 137, 247)"],
[1, "rgb(220, 220, 220)"]
],
autocolorscale = False,
reversescale = True,
marker = go.choropleth.Marker(
line = go.choropleth.marker.Line(
color = 'rgb(180,180,180)',
width = 0.5
)),
colorbar = go.choropleth.ColorBar(
tickprefix = '$',
title = 'GDP<br>Billions US$'),
)]
layout = go.Layout(
title = go.layout.Title(
text = '2014 Global GDP'
),
geo = go.layout.Geo(
showframe = False,
showcoastlines = False,
projection = go.layout.geo.Projection(
type = 'equirectangular'
)
),
annotations = [go.layout.Annotation(
x = 0.55,
y = 0.1,
xref = 'paper',
yref = 'paper',
text = 'Source: <a href="https://www.cia.gov/library/publications/the-world-factbook/fields/2195.html">\
CIA World Factbook</a>',
showarrow = False
)]
)
fig = go.Figure(data = data, layout = layout)
py.iplot(fig, filename = 'd3-world-map')
It's been a while since this was asked, but I figured it was still worth answering. I can't speak to how this might have changed since it was asked in 2019, but this works today.
First, I'll provide the code I used to create the new z values and the dropdown menu, then I'll provide all of the code I used to create these graphs in one chunk (easier to cut and paste...and all that).
This is the data I used for the alternate data in the z field.
import plotly.graph_objects as go
import pandas as pd
import random
z2 = df['GDP (BILLIONS)'] * .667 + 12
random.seed(21)
random.shuffle(z2)
df['z2'] = z2 # example as another column in df
print(df.head()) # validate as expected
z3 = df['GDP (BILLIONS)'] * .2 + 1000
random.seed(231)
random.shuffle(z3) # example as a series outside of df
z4 = df['GDP (BILLIONS)']**(1/3) * df['GDP (BILLIONS)']**(1/2)
random.seed(23)
random.shuffle(z4)
z4 = z4.tolist() # example as a basic Python list
To add buttons to change z, you'll add updatemenus to your layout. Each dict() is a separate dropdown option. At a minimum, each button requires a method, a label, and args. These represent what is changing (method for data, layout, or both), what it's called in the dropdown (label), and the new information (the new z in this example).
args for changes to data (where the method is either restyle or update) can also include the trace the change applies to. So if you had a bar chart and a line graph together, you may have a button that only changes the bar graph.
Using the same structure you have:
updatemenus = [go.layout.Updatemenu(
x = 1, xanchor = 'right', y = 1.15, type = "dropdown",
pad = {'t': 5, 'r': 20, 'b': 5, 'l': 30}, # around all buttons (not indiv buttons)
buttons = list([
dict(
args = [{'z': [df['GDP (BILLIONS)']]}], # original data; nest data in []
label = 'Return to the Original z',
method = 'restyle' # restyle is for trace updates
),
dict(
args = [{'z': [df['z2']]}], # nest data in []
label = 'A different z',
method = 'restyle'
),
dict(
args = [{'z': [z3]}], # nest data in []
label = 'How about this z?',
method = 'restyle'
),
dict(
args = [{'z': [z4]}], # nest data in []
label = 'Last option for z',
method = 'restyle'
)])
)]
All code used to create this graph in one chunk (includes code shown above).
import plotly.graph_objs as go
import pandas as pd
import ssl
import random
# to collect data without an error
ssl._create_default_https_context = ssl._create_unverified_context
# data used in plot
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
# z values used in buttons
z2 = df['GDP (BILLIONS)'] * .667 + 12
random.seed(21)
random.shuffle(z2)
df['z2'] = z2 # example as another column in the data frame
print(df.head()) # validate as expected
z3 = df['GDP (BILLIONS)'] * .2 + 1000
random.seed(231)
random.shuffle(z3) # example as a series outside of the data frame
z4 = df['GDP (BILLIONS)']**(1/3) * df['GDP (BILLIONS)']**(1/2)
random.seed(23)
random.shuffle(z4)
z4 = z4.tolist() # example as a basic Python list
data = [go.Choropleth(
locations = df['CODE'], z = df['GDP (BILLIONS)'], text = df['COUNTRY'],
colorscale = [
[0, "rgb(5, 10, 172)"],
[0.35, "rgb(40, 60, 190)"],
[0.5, "rgb(70, 100, 245)"],
[0.6, "rgb(90, 120, 245)"],
[0.7, "rgb(106, 137, 247)"],
[1, "rgb(220, 220, 220)"]],
reversescale = True,
marker = go.choropleth.Marker(
line = go.choropleth.marker.Line(
color = 'rgb(180,180,180)', width = 0.5)),
colorbar = go.choropleth.ColorBar(
tickprefix = '$',
title = 'GDP<br>Billions US$',
len = .6) # I added this for aesthetics
)]
layout = go.Layout(
title = go.layout.Title(text = '2014 Global GDP'),
geo = go.layout.Geo(
showframe = False, showcoastlines = False,
projection = go.layout.geo.Projection(
type = 'equirectangular')
),
annotations = [go.layout.Annotation(
x = 0.55, y = 0.1, xref = 'paper', yref = 'paper',
text = 'Source: <a href="https://www.cia.gov/library/publications/the-world-factbook/fields/2195.html">\
CIA World Factbook</a>',
showarrow = False
)],
updatemenus = [go.layout.Updatemenu(
x = 1, xanchor = 'right', y = 1.15, type = "dropdown",
pad = {'t': 5, 'r': 20, 'b': 5, 'l': 30},
buttons = list([
dict(
args = [{'z': [df['GDP (BILLIONS)']]}], # original data; nest data in []
label = 'Return to the Original z',
method = 'restyle' # restyle is for trace updates only
),
dict(
args = [{'z': [df['z2']]}], # nest data in []
label = 'A different z',
method = 'restyle'
),
dict(
args = [{'z': [z3]}], # nest data in []
label = 'How about this z?',
method = 'restyle'
),
dict(
args = [{'z': [z4]}], # nest data in []
label = 'Last option for z',
method = 'restyle'
)])
)]
)
fig = go.Figure(data = data, layout = layout)
fig.show()

Bokeh – ColumnDataSource not updating whiskered-plot

I’m having issues with the following code (I’ve cut out large pieces but I can add them back in – these seemed like the important parts). In my main code, I set up a plot (“sectionizePlot”) which is a simple variation on another whiskered-plot
I’m looking to update them on the fly. In the same script, I’m using a heatmap (“ModifiedGenericHeatMap”) which updates fine.
Any ideas how I might update my whiskered-plot? Updating the ColumnDataSource doesn’t seem to work (which makes sense). I’m guessing that I am running into issues with adding each circle/point individually onto the plot.
One idea would be to clear the plot each time and manually add the points onto the plot, but it would need to be cleared each time, which I’m unsure of how to do.
Any help would be appreciated. I’m just a lowly Scientist trying to utilize Bokeh in Pharma research.
def ModifiedgenericHeatMap(source, maxPct):
colors = ["#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce", "#ddb7b1", "#cc7878", "#933b41", "#550b1d"]
#mapper = LinearColorMapper(palette=colors, low=0, high=data['count'].max())
mapper = LinearColorMapper(palette=colors, low=0, high=maxPct)
TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"
globalDist = figure(title="derp",
x_range=cols, y_range=list(reversed(rows)),
x_axis_location="above", plot_width=1000, plot_height=400,
tools=TOOLS, toolbar_location='below')
globalDist.grid.grid_line_color = None
globalDist.axis.axis_line_color = None
globalDist.axis.major_tick_line_color = None
globalDist.axis.major_label_text_font_size = "5pt"
globalDist.axis.major_label_standoff = 0
globalDist.xaxis.major_label_orientation = pi / 3
globalDist.rect(x="cols", y="rows", width=1, height=1,
source=source,
fill_color={'field': 'count', 'transform': mapper},
line_color=None)
color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="5pt",
ticker=BasicTicker(desired_num_ticks=len(colors)),
# fix this via using a formatter with accounts for
formatter=PrintfTickFormatter(format="%d%%"),
label_standoff=6, border_line_color=None, location=(0, 0))
text_props = {"source": source, "text_align": "left", "text_baseline": "middle"}
x = dodge("cols", -0.4, range=globalDist.x_range)
r = globalDist.text(x=x, y=dodge("rows", 0.3, range=globalDist.y_range), text="count", **text_props)
r.glyph.text_font_size = "8pt"
globalDist.add_layout(color_bar, 'right')
globalDist.select_one(HoverTool).tooltips = [
('Well:', '#rows #cols'),
('Count:', '#count'),
]
return globalDist
def sectionizePlot(source, source_error, type, base):
print("sectionize plot created with typ: " + type)
colors = []
for x in range(0, len(base)):
colors.append(getRandomColor())
title = type + "-wise Intensity Distribution"
p = figure(plot_width=600, plot_height=300, title=title)
p.add_layout(
Whisker(source=source_error, base="base", upper="upper", lower="lower"))
for i, sec in enumerate(source.data['base']):
p.circle(x=source_error.data["base"][i], y=sec, color=colors[i])
p.xaxis.axis_label = type
p.yaxis.axis_label = "Intensity"
if (type.split()[-1] == "Row"):
print("hit a row")
conv = dict(enumerate(list("nABCDEFGHIJKLMNOP")))
conv.pop(0)
p.xaxis.major_label_overrides = conv
p.xaxis.ticker = SingleIntervalTicker(interval=1)
return p
famData = dict()
e1FractSource = ColumnDataSource(dict(count=[], cols=[], rows=[], index=[]))
e1Fract = ModifiedgenericHeatMap(e1FractSource, 100)
rowSectTotSource = ColumnDataSource(data=dict(base=[]))
rowSectTotSource_error = ColumnDataSource(data=dict(base=[], lower=[], upper=[]))
rowSectPlot_tot = sectionizePlot(rowSectTotSource,rowSectTotSource_error, "eSum Row", rowBase)
def update(selected=None):
global famData
famData = getFAMData(file_source_dt1, True)
global e1Stack
e1Fract = (famData['e1Sub'] / famData['eSum']) * 100
e1Stack = e1Fract.stack(dropna=False).reset_index()
e1Stack.columns = ["rows", "cols", "count"]
e1Stack['count'] = e1Stack['count'].apply(lambda x: round(x, 1))
e1FractSource.data = dict(cols=e1Stack["cols"], count=(e1Stack["count"]),
rows=e1Stack["rows"], index=e1Stack.index.values, codon=wells, )
rowData, colData = sectionize(famData['eSum'], rows, cols)
rowData_lower, rowData_upper = getLowerUpper(rowData)
rowBase = list(range(1, 17))
rowSectTotSource_error.data = dict(base=rowBase, lower=rowData_lower, upper=rowData_upper, )
rowSectTotSource.data = dict(base=rowData)
rowSectPlot_tot.title.text = "plot changed in update"
layout = column(e1FractSource, rowSectPlot_tot)
update()
curdoc().add_root(layout)
curdoc().title = "Specs"
print("ok")

Resources