Statistics with one value per sample - statistics

I would like to run a statistic test across samples, but each sample has one measurement only.
My data frame is the following:
structure(list(Value = c(1.04, 1.48, 0.3, 0.5, 0.66, 0.99, 0.65,
0.62), Samples = c("S1", "S2", "S3", "S4", "S5", "S6", "S7",
"S8"), Concentration = c(20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L
)), class = "data.frame", row.names = c(NA, -8L))
Here are the codes I ran:
library(ggplot2)
library(dplyr)
library(combinat)
Data = read.csv("Stackoverflow_2023-01-29.csv", header = TRUE
p = ggbarplot
(Data, x = "Samples", y = "Value",color = "Samples", fill = "Samples")
p
new_list <- list()
new_list
x<- unique(Data$Samples)
x
m<- combn(x, 2)
m
for(i in 1:ncol(m)){
new_list[[i]] <- m[,i]
}
new_list
my_comparison <- new_list
my_comparison
p1<- p+ stat_compare_means(comparisons = my_comparison)
p1` [enter image description here][1]
The plot that I obtained is attached ad jpg
enter image description here
I would appreciate any help with the statistics. Many thanks.

Related

How to download matplotlib graphs generated in a Streamlit app

Is there any way to allow users to download a graph made using matplotlib/seaborn in a Streamlit app? I managed to get a download button working to download a dataframe as a csv, but I can't figure out how to allow users to download a graph. I have provided a code snippet below of the graph code.
Thank you for your help!
fig_RFC_scatter, ax = plt.subplots(1,1, figsize = (5,4))
ax = sns.scatterplot(data = RFC_results_df_subset, x = X_Element_RM, y = Y_Element_RM, hue = "F_sil_remaining", edgecolor = "k", legend = "full")
ax = sns.scatterplot(data = RFC_results_df_subset, x = X_Element_CM, y = Y_Element_CM, hue = "F_sil_remaining", edgecolor = "k", legend = None, marker = "s")
ax = plt.xlabel(X_Element_RM)
ax = plt.ylabel(Y_Element_RM)
ax = plt.xlim(x_min, x_max)
ax = plt.ylim(y_min, y_max)
ax = plt.xscale(x_scale)
ax = plt.yscale(y_scale)
ax = plt.axhline(y = 1, color = "grey", linewidth = 0.5, linestyle = "--")
ax = plt.axvline(x = 1, color = "grey", linewidth = 0.5, linestyle = "--")
ax = plt.legend(bbox_to_anchor = (1, 0.87), frameon = False, title = "% Sil. Remaining")
st.write(fig_RFC_scatter)
Two approaches first save the image to file and offer for download and second save image to memory (no clutter on disk) and offer for download.
First
"""
Ref: https://docs.streamlit.io/library/api-reference/widgets/st.download_button
"""
import io
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st
X = [1, 2, 3, 4, 5, 6, 7, 8]
Y = [1500, 1550, 1600, 1640, 1680, 1700, 1760, 1800]
sns.scatterplot(x=X, y=Y)
# Save to file first or an image file has already existed.
fn = 'scatter.png'
plt.savefig(fn)
with open(fn, "rb") as img:
btn = st.download_button(
label="Download image",
data=img,
file_name=fn,
mime="image/png"
)
Second
Save to memory first.
fn = 'scatter.png'
img = io.BytesIO()
plt.savefig(img, format='png')
btn = st.download_button(
label="Download image",
data=img,
file_name=fn,
mime="image/png"
)
Downloaded file output

How to combine two x axes using subplot

I have two plots (candle and bar). I want to combine them in the same subplot (x-axes in the top for volume and x-axes at the bottom for dates), but when I try to combine both in the same subplot (row=2, col=1), the result it's not the expected.
import plotly from plotly
import subplots
import random
import pandas as pd
import plotly.graph_objects as go
fig = make_subplots(rows=3, cols=2)
high = 40
low = 5
dev = 1
days = 18
fake_market = []
for each in range(days):
ohlc = []
ohlc.append(each)
if each == 0:
o = random.randrange(low, high)
ohlc.append(o)
else:
ohlc.append(c) #I know
h = random.randrange(o, high)
ohlc.append(h)
l = random.randrange(low, o)
ohlc.append(l)
c = random.randrange(l, h)
ohlc.append(c)
fake_market.append(ohlc)
fake_volume = [[x, random.randrange(1, 200)] for x in range(low, (high+1))]
df = pd.DataFrame(fake_market, columns=["Date", "Open", "High", "Low", "Close"])
df2 = pd.DataFrame(fake_volume, columns=["Volume", "Price"])
fecha = ['2019-03-22', '2019-03-23', '2019-03-24', '2019-03-25',
'2019-03-26', '2019-03-27', '2019-03-28', '2019-03-29',
'2019-03-30', '2019-03-31', '2019-04-01', '2019-04-02',
'2019-04-03', '2019-04-04', '2019-04-05', '2019-04-06',
'2019-04-07', '2019-04-08']
candle =go.Candlestick(
x=fecha,
open=[str(x) for x in df.Open.to_list()],
high=[str(x) for x in df.High.to_list()],
low=[str(x) for x in df.Low.to_list()],
close=[str(x) for x in df.Close.to_list()],
visible=True,
showlegend=True,
xaxis='x3',
yaxis ='y3')
vol_bar = go.Bar(
x=[str(x) for x in df2.Price.to_list()],
y=[str(x) for x in df2.Volume.to_list()],
xaxis='x4',
yaxis ='y4',
name='volume',
orientation="h",
opacity=0.4, marker=dict(
color='rgba(246, 78, 139, 0.6)', line=dict(color='rgba(246, 78, 139, 1.0)', width=0.1)))
fig.add_trace(candle, row=2, col=1)
fig.add_trace(vol_bar, row=2, col=1)
fig.update_layout(
yaxis3=dict(
title="yaxis3 title",
titlefont=dict(
color="#1f77b4"
),
tickfont=dict(
color="#1f77b4"
)
),
yaxis4=dict(
title="yaxis4 title", side='right',
titlefont=dict(
color="#1f77b4"
),
tickfont=dict(
color="#1f77b4"
)
),
xaxis3=dict(
title="xaxis3 volume", side='top', overlaying='x',
titlefont=dict(
color="#9467bd"
),
tickfont=dict(
color="#9467bd"
)),
xaxis4=dict(
title="xaxis4 date", side='bottom',
titlefont=dict(
color="#9467bd"
),
tickfont=dict(
color="#9467bd"
))
)
fig.update_xaxes(rangeslider_visible=False)
fig.show()
The plots are showed as follow:
The plots:
The result: The result:
The plot expected: The plot expected:
Apparently the x axes are mixing and that is the reason for getting a distorted graph is obtained.

How do I add vertical moving hover line to my plotly chart

I am trying to achieve what is done here: https://www.quantalys.com/Fonds/120955 with javascript in python plotly. I want to add the hover vertical line and the red annotation on the x axis. I have done some searching on goolgle but I couldn't find the the answer I'm looking for. My current chart looks like this:
trace1 = go.Scatter(
x = df1.x,
y = df1.y,
name = "M&G OPTIMAL INCOME FD EUR AH ACC",
hoverinfo= 'name',
opacity=0.7,
mode = 'lines',
line = dict(
color = ('rgb(2, 12, 245)'),
width = 1,
),
)
trace2 = go.Scatter(
x = df2.x,
y = df2.y,
opacity=0.7,
name = "Alloc Flexible Prudent Monde",
hoverinfo= 'name',
mode = 'lines',
line = dict(
color = ('rgb(67, 45, 24)'),
width = 1,
)
)
trace3 = go.Scatter(
x = df3.x,
y = df3.y,
name = "25% MSCI World + 75% ML Global",
hoverinfo= 'name',
mode = 'lines',
opacity=0.7,
line = dict(
color = ('rgb(205, 12, 24)'),
width = 1,
)
)
layout = go.Layout(
xaxis=dict(
showline=True,
showgrid=True,
showticklabels=True,
linecolor='rgb(204, 204, 204)',
linewidth=2,
mirror=True,
),
yaxis=dict(
showline=True,
showgrid=True,
showticklabels=True,
linecolor='rgb(204, 204, 204)',
linewidth=2,
mirror=True,
),
showlegend=True,
)
data= [trace1, trace2,trace3]
fig = dict(data=data, layout=layout)
iplot(fig, filename='line-mode')
Add this to your layout definition.
showlegend = True,
hovermode = 'x'
Add this to your xaxis definition.
showspikes = True,
spikemode = 'across',
spikesnap = 'cursor',
showline=True,
showgrid=True,
...
And add this to your layout definition:
spikedistance = -1,
xaxis=dict(...
Please refer to this post and the documentation by plotly. :)
EDIT
You ask for the x-axis lable. Please use
spikemode = 'across+toaxis'
Additionally I would suggest to use
spikedash = 'solid'
because it is better fitting your example.

How can I use the plotly dropdown menu feature to update the z value in my choropleth map?

I just want to create a menu on the plot where I'm able to change the z-value in data only. I tried looking at other examples on here: https://plot.ly/python/dropdowns/#restyle-dropdown but it was hard since the examples were not exactly similar to my plot.
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
data = [go.Choropleth(
locations = df['CODE'],
z = df['GDP (BILLIONS)'],
text = df['COUNTRY'],
colorscale = [
[0, "rgb(5, 10, 172)"],
[0.35, "rgb(40, 60, 190)"],
[0.5, "rgb(70, 100, 245)"],
[0.6, "rgb(90, 120, 245)"],
[0.7, "rgb(106, 137, 247)"],
[1, "rgb(220, 220, 220)"]
],
autocolorscale = False,
reversescale = True,
marker = go.choropleth.Marker(
line = go.choropleth.marker.Line(
color = 'rgb(180,180,180)',
width = 0.5
)),
colorbar = go.choropleth.ColorBar(
tickprefix = '$',
title = 'GDP<br>Billions US$'),
)]
layout = go.Layout(
title = go.layout.Title(
text = '2014 Global GDP'
),
geo = go.layout.Geo(
showframe = False,
showcoastlines = False,
projection = go.layout.geo.Projection(
type = 'equirectangular'
)
),
annotations = [go.layout.Annotation(
x = 0.55,
y = 0.1,
xref = 'paper',
yref = 'paper',
text = 'Source: <a href="https://www.cia.gov/library/publications/the-world-factbook/fields/2195.html">\
CIA World Factbook</a>',
showarrow = False
)]
)
fig = go.Figure(data = data, layout = layout)
py.iplot(fig, filename = 'd3-world-map')
It's been a while since this was asked, but I figured it was still worth answering. I can't speak to how this might have changed since it was asked in 2019, but this works today.
First, I'll provide the code I used to create the new z values and the dropdown menu, then I'll provide all of the code I used to create these graphs in one chunk (easier to cut and paste...and all that).
This is the data I used for the alternate data in the z field.
import plotly.graph_objects as go
import pandas as pd
import random
z2 = df['GDP (BILLIONS)'] * .667 + 12
random.seed(21)
random.shuffle(z2)
df['z2'] = z2 # example as another column in df
print(df.head()) # validate as expected
z3 = df['GDP (BILLIONS)'] * .2 + 1000
random.seed(231)
random.shuffle(z3) # example as a series outside of df
z4 = df['GDP (BILLIONS)']**(1/3) * df['GDP (BILLIONS)']**(1/2)
random.seed(23)
random.shuffle(z4)
z4 = z4.tolist() # example as a basic Python list
To add buttons to change z, you'll add updatemenus to your layout. Each dict() is a separate dropdown option. At a minimum, each button requires a method, a label, and args. These represent what is changing (method for data, layout, or both), what it's called in the dropdown (label), and the new information (the new z in this example).
args for changes to data (where the method is either restyle or update) can also include the trace the change applies to. So if you had a bar chart and a line graph together, you may have a button that only changes the bar graph.
Using the same structure you have:
updatemenus = [go.layout.Updatemenu(
x = 1, xanchor = 'right', y = 1.15, type = "dropdown",
pad = {'t': 5, 'r': 20, 'b': 5, 'l': 30}, # around all buttons (not indiv buttons)
buttons = list([
dict(
args = [{'z': [df['GDP (BILLIONS)']]}], # original data; nest data in []
label = 'Return to the Original z',
method = 'restyle' # restyle is for trace updates
),
dict(
args = [{'z': [df['z2']]}], # nest data in []
label = 'A different z',
method = 'restyle'
),
dict(
args = [{'z': [z3]}], # nest data in []
label = 'How about this z?',
method = 'restyle'
),
dict(
args = [{'z': [z4]}], # nest data in []
label = 'Last option for z',
method = 'restyle'
)])
)]
All code used to create this graph in one chunk (includes code shown above).
import plotly.graph_objs as go
import pandas as pd
import ssl
import random
# to collect data without an error
ssl._create_default_https_context = ssl._create_unverified_context
# data used in plot
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
# z values used in buttons
z2 = df['GDP (BILLIONS)'] * .667 + 12
random.seed(21)
random.shuffle(z2)
df['z2'] = z2 # example as another column in the data frame
print(df.head()) # validate as expected
z3 = df['GDP (BILLIONS)'] * .2 + 1000
random.seed(231)
random.shuffle(z3) # example as a series outside of the data frame
z4 = df['GDP (BILLIONS)']**(1/3) * df['GDP (BILLIONS)']**(1/2)
random.seed(23)
random.shuffle(z4)
z4 = z4.tolist() # example as a basic Python list
data = [go.Choropleth(
locations = df['CODE'], z = df['GDP (BILLIONS)'], text = df['COUNTRY'],
colorscale = [
[0, "rgb(5, 10, 172)"],
[0.35, "rgb(40, 60, 190)"],
[0.5, "rgb(70, 100, 245)"],
[0.6, "rgb(90, 120, 245)"],
[0.7, "rgb(106, 137, 247)"],
[1, "rgb(220, 220, 220)"]],
reversescale = True,
marker = go.choropleth.Marker(
line = go.choropleth.marker.Line(
color = 'rgb(180,180,180)', width = 0.5)),
colorbar = go.choropleth.ColorBar(
tickprefix = '$',
title = 'GDP<br>Billions US$',
len = .6) # I added this for aesthetics
)]
layout = go.Layout(
title = go.layout.Title(text = '2014 Global GDP'),
geo = go.layout.Geo(
showframe = False, showcoastlines = False,
projection = go.layout.geo.Projection(
type = 'equirectangular')
),
annotations = [go.layout.Annotation(
x = 0.55, y = 0.1, xref = 'paper', yref = 'paper',
text = 'Source: <a href="https://www.cia.gov/library/publications/the-world-factbook/fields/2195.html">\
CIA World Factbook</a>',
showarrow = False
)],
updatemenus = [go.layout.Updatemenu(
x = 1, xanchor = 'right', y = 1.15, type = "dropdown",
pad = {'t': 5, 'r': 20, 'b': 5, 'l': 30},
buttons = list([
dict(
args = [{'z': [df['GDP (BILLIONS)']]}], # original data; nest data in []
label = 'Return to the Original z',
method = 'restyle' # restyle is for trace updates only
),
dict(
args = [{'z': [df['z2']]}], # nest data in []
label = 'A different z',
method = 'restyle'
),
dict(
args = [{'z': [z3]}], # nest data in []
label = 'How about this z?',
method = 'restyle'
),
dict(
args = [{'z': [z4]}], # nest data in []
label = 'Last option for z',
method = 'restyle'
)])
)]
)
fig = go.Figure(data = data, layout = layout)
fig.show()

How to overlay a line for an lm object on a ggplot2 scatterplot

I have some data,
calvarbyruno.1<-structure(list(Nominal = c(1, 3, 6, 10, 30, 50, 150, 250), Run = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1", "2", "3"), class = "factor"),
PAR = c(1.25000000000000e-05, 0.000960333333333333, 0.00205833333333334,
0.00423333333333333, 0.0322333333333334, 0.614433333333334,
1.24333333333333, 1.86333333333333), PredLin = c(-0.0119152187070942,
0.00375925114245899, 0.0272709559167888, 0.0586198956158952,
0.215364594111427, 0.372109292606959, 1.15583278508462, 1.93955627756228
), PredQuad = c(-0.0615895732702735, -0.0501563307416599,
-0.0330831368244257, -0.0104619953693943, 0.100190275883806,
0.20675348710041, 0.6782336426345, 1.04748729725370)), .Names = c("Nominal",
"Run", "PAR", "PredLin", "PredQuad"), row.names = c(NA, 8L), class = "data.frame")
calweight <- -2
for which I've created both a linear and a quadratic lm model
callin.1<-lm(PAR~Nominal,data=calvarbyruno.1,weight=Nominal^calweight)
calquad.1<-lm(PAR~Nominal+I(Nominal^2),data=calvarbyruno.1,weight=Nominal^calweight)
I can then plot my data values using ggplot2
qplot(PAR,Nominal,data=calvarbyruno.1)
But can't work out how to overlay a line representing the two lm objects... Any ideas ?
The easiest option is to use geom_smooth() and let ggplot2 fit the model for you.
ggplot(calvarbyruno.1, aes(y = PAR, x = Nominal, weight=Nominal^calweight)) +
geom_smooth(method = "lm") +
geom_smooth(method = "lm", formula = y ~ poly(x, 2), colour = "red") +
geom_point() +
coord_flip()
Or you can create a new dataset with the predicted values.
newdata <- data.frame(Nominal = pretty(calvarbyruno.1$Nominal, 100))
newdata$Linear <- predict(callin.1, newdata = newdata)
newdata$Quadratic <- predict(calquad.1, newdata = newdata)
require(reshape2)
newdata <- melt(newdata, id.vars = "Nominal", variable.name = "Model")
ggplot(calvarbyruno.1, aes(x = PAR, y = Nominal, weight=Nominal^calweight)) +
geom_line(data = newdata, aes(x = value, colour = Model)) +
geom_point()
Earlier I asked a related question and Hadley had this good answer. Using the predict function from that post you can add two columns to your data. One for each model:
calvarbyruno.1$calQuad <- predict(calquad.1)
calvarbyruno.1$callin <- predict(callin.1)
Then it's a matter of plotting the point and adding each model in as a line:
ggplot() +
geom_point(data=calvarbyruno.1, aes(PAR, Nominal), colour="green") +
geom_line(data=calvarbyruno.1, aes(calQuad, Nominal), colour="red" ) +
geom_line(data=calvarbyruno.1, aes(callin, Nominal), colour="blue" ) +
opts(aspect.ratio = 1)
And that results in this nice picture (yeah the colors could use some work):
(source: cerebralmastication.com)

Resources