Incorrect number of dimensions when switching between 2 group and more than 2 group LDA in Shiny - switch-statement

I have been teaching myself how to make shiny apps to include wit research articles to make methods more available to practitioners. I am using shiny to make a web app that does discriminant function analysis on a certain set of variables. The application works just fine when 3 or more groups are selected from the analysis, but when I used the switch function to alter the code to 2 groups, I am running into an incorrect number of dimensions error. The code was working fine and then I updated R and rStudio and now whenever I try the 2 group comparisons, I received "incorrect number of dimensions". I have been trying to debug for a few hours to no avail.
In the shiny interface the user can select between a two group analysis and a more than 2 group analysis, select the groups and then enter the data into a data entry table. I have the table set up so that the reference data is subset according to whichever variables are entered.
Below is the server.R code - please forgive the awful state of the coding - this is my first attempt and I am learning as I go.
And the server.R
server.R
mand<-read.csv("data/berg_full.csv", sep=',', header = T)
library(shiny)
library(knitr)
library(httr)
library(fields)
library(psych)
library(dplyr)
library(PerformanceAnalytics)
library(caret)
library(e1071)
library(DT)
library(MASS)
library(stats)
library(klaR)
library(Morpho)
shinyServer(function(input, output) {
# get the reference data from the selectize input
refdata <- reactive({
input$evaluate
isolate({
if(length(input$refsamp) == 0) return(NULL)
switch(input$refsamp,
"mandible" = mand,
NULL)
})
})
getdata<-reactive({
input$evaluate
filtereddata<-refdata()
filtereddata<- filtereddata %>% filter(Group %in% input$group) %>% droplevels()
return(filtereddata)
})
elements <- reactive({
input$evaluate
isolate({
elements <- c()
if(!is.na(input$GNI)) elements <- c(elements, "GNI" = input$GNI)
if(!is.na(input$HML)) elements <- c(elements, "HML" = input$HML)
if(!is.na(input$TML)) elements <- c(elements, "TML" = input$TML)
if(!is.na(input$GOG)) elements <- c(elements, "GOG" = input$GOG)
if(!is.na(input$CDL)) elements <- c(elements, "CDL" = input$CDL)
if(!is.na(input$WRB)) elements <- c(elements, "WRB" = input$WRB)
if(!is.na(input$XRH)) elements <- c(elements, "XRH" = input$XRH)
if(!is.na(input$MLT)) elements <- c(elements, "MLT" = input$MLT)
if(!is.na(input$MAN)) elements <- c(elements, "MAN" = input$MAN)
if(!is.na(input$XDA)) elements <- c(elements, "XDA" = input$XDA)
if(!is.na(input$TLM23)) elements <- c(elements, "TLM23" = input$TLM23)
if(!is.na(input$CS)) elements <- c(elements, "CS" = input$CS)
if(!is.na(input$L_Bord)) elements <- c(elements, "L_Bord" = input$L_Bord)
if(!is.na(input$AscRam)) elements <- c(elements, "AscRam" = input$AscRam)
if(!is.na(input$GF)) elements <- c(elements, "GF" = input$GF)
if(!is.na(input$MT)) elements <- c(elements, "MT" = input$MT)
if(!is.na(input$PREI)) elements <- c(elements, "PREI" = input$PREI)
if(length(elements) == 0) return(NULL)
return(data.frame(as.list(elements)))
})
})
##create elements input table
el_names <- c("<h4>Metric</h4>", "<h5>New Data</h5>")
GNI <- c("GNI",
"<input id='GNI' class='shiny-bound-input' type='number' value='NA' min='0' max='50'>"
)
HML <- c("HML",
"<input id='HML' class='shiny-bound-input' type='number' value='NA' min='0' max='50'>"
)
TML <- c("TML",
"<input id='TML' class='shiny-bound-input' type='number' value='NA' min='0' max='25'>"
)
GOG <- c("GOG",
"<input id='GOG' class='shiny-bound-input' type='number' value='NA' min='0' max='150'>"
)
CDL <- c("CDL",
"<input id='CDL' class='shiny-bound-input' type='number' value='NA' min='0' max='160'>"
)
WRB <- c("WRB",
"<input id='WRB' class='shiny-bound-input' type='number' value='NA' min='0' max='100'>"
)
XRH <- c("XRH",
"<input id='XRH' class='shiny-bound-input' type='number' value='NA' min='0' max='100'>"
)
MLT <- c("MLT",
"<input id='MLT' class='shiny-bound-input' type='number' value='NA' min='0' max='150'>"
)
MAN <- c("MAN",
"<input id='MAN' class='shiny-bound-input' type='number' value='NA' min='0' max='180'>"
)
XDA <- c("XDA",
"<input id='XDA' class='shiny-bound-input' type='number' value='NA' min='0' max='100'>"
)
TLM23 <- c("TLM23",
"<input id='TLM23' class='shiny-bound-input' type='number' value='NA' min='0' max='100'>"
)
output$el_table <- renderTable({
data.frame(el_names, GNI, HML, TML, GOG, CDL, WRB, XRH, MLT, MAN, XDA, TLM23)
}, sanitize.text.function = function(x) x, sanitize.rownames.function = function(x) x, sanitize.colnames.function = function(x) x, include.rownames = FALSE, include.colnames = FALSE)
el_names1 <- c("<h4>Morphoscopic</h4>", "<h5>New Data</h5>")
CS <- c("Chin Shape",
"<input id='CS' class='shiny-bound-input' type='number' value='NA' min='1' max='4'>"
)
L_Bord <- c("LBM",
"<input id='L_Bord' class='shiny-bound-input' type='number' value='NA' min='1' max='4'>"
)
AscRam <- c("Ascending Ramus",
"<input id='AscRam' class='shiny-bound-input' type='number' value='NA' min='1' max='4'>"
)
GF <- c("Gonial Flare",
"<input id='GF' class='shiny-bound-input' type='number' value='NA' min='1' max='5'>"
)
MT <- c("Mand. Torus",
"<input id='MT' class='shiny-bound-input' type='number' value='NA' min='1' max='2'>"
)
PREI <- c("PREI",
"<input id='PREI' class='shiny-bound-input' type='number' value='NA' min='1' max='4'>"
)
output$el_table1 <- renderTable({
data.frame(el_names1, CS, L_Bord, AscRam, GF, MT, PREI)
}, sanitize.text.function = function(x) x, sanitize.rownames.function = function(x) x, sanitize.colnames.function = function(x) x, include.rownames = FALSE, include.colnames = FALSE)
## create reference data from new data
refsamp <- reactive({
if (is.null(getdata()) | is.null(elements())) return()
ref <- dplyr::select_(getdata(), .dots = c("Group", names(elements()))) %>% droplevels()
return(ref)
})
## create lda model, plot, and typicality probabilities
lda_mod <- eventReactive(input$evaluate, {
lda_data<-na.omit(refsamp()) %>% droplevels()
ngroups<-nlevels(lda_data$Group)
lda_formula<-as.formula(Group ~ .)
if(length(input$numgroups) == 0) return(NULL)
switch(input$numgroups,
"multigroup" = {
model_group<-MASS::lda(lda_formula, data = lda_data, prior= rep(1, ngroups)/ngroups)
model_group1<-MASS::lda(lda_formula, data = lda_data, prior= rep(1, ngroups)/ngroups, CV=TRUE)
tracetab<-prop.table(model_group$svd^2)
df1v<-round((tracetab[1]), digits=3)
df2v<-round((tracetab[2]), digits=3)
estgroup<-data.frame(predict(model_group, newdata = elements(), type="class", CV=TRUE))
groupprob<-predict(model_group, newdata=elements(), type="posterior", CV=TRUE)
pp<-as.data.frame(round(groupprob$posterior, digits=3))
p<-predict(model_group, lda_data, CV=T)
ct<-table(lda_data$Group, model_group1$class)
cm<-caret::confusionMatrix(ct)
con<-cm
n<-as.matrix(model_group$counts)
colnames(n)<-c("n")
classmat<-cbind(n, ct)
tcc<-paste(sum(diag(ct)), "out of", sum(ct), "correct", "=", (100*(round(sum(diag(prop.table(ct))), digits=3))), "%", "Total Correct Classification Cross-validated")
percenttab<-tcc
ppv<-as.data.frame(con$byClass[,3])
colnames(ppv)<-c("PPV")
x<-p$x[,1]
y<-p$x[,2]
Group<-lda_data$Group
df<-data.frame(Group,x,y)
centroids<-aggregate(cbind(x,y)~Group,df,mean)
cen<-as.matrix(centroids)
qx<-as.numeric(estgroup$x.LD1)
qy<-as.numeric(estgroup$x.LD2)
inddist<-data.frame(qx, qy)
indie<-as.matrix(inddist)
eucdist<-fields::rdist(cen[,2:3], indie)
grouplev<-data.frame(model_group$lev)
eucdist1<-cbind(grouplev, round(eucdist, digits=3))
colnames(eucdist1)<-c("Group", "Dist.")
ldaplot<-ggplot2::ggplot(data=df, aes(x, y, color=Group)) + geom_point(alpha=0.5) + labs(x="DF1", y="DF2") + geom_point(data=centroids, size=5) + geom_point(aes(x=estgroup$x.LD1, y=estgroup$x.LD2), size=6, col="black", pch=8) + stat_ellipse(type="norm", level=0.90)
},
"twogroup" = {
model_group<-MASS::lda(lda_formula, data = lda_data, prior= rep(1, ngroups)/ngroups, na.action=na.omit)
model_group1<-MASS::lda(lda_formula, data = lda_data, prior= rep(1, ngroups)/ngroups, CV=TRUE)
tracetab<-prop.table(model_group$svd^2)
df1v<-round((tracetab[1]), digits=3)
estgroup<-data.frame(predict(model_group, newdata = elements(), type="class", CV=TRUE))
groupprob<-predict(model_group, newdata=elements(), type="posterior", CV=TRUE)
pp<-as.data.frame(round(groupprob$posterior, digits=3))
p<-predict(model_group, lda_data, CV=T)
ct<-table(lda_data$Group, model_group1$class)
cm<-caret::confusionMatrix(ct)
con<-cm
n<-as.matrix(model_group$counts)
colnames(n)<-c("n")
classmat<-cbind(n, ct)
tcc<-paste(sum(diag(ct)), "out of", sum(ct), "correct", "=", (100*(round(sum(diag(prop.table(ct))), digits=3))), "%", "Total Correct Classification Cross-validated")
percenttab<-tcc
ppv<-as.data.frame(con$byClass[,3])
colnames(ppv)<-c("PPV")
x<-p$x[,1]
Group<-lda_data$Group
df<-data.frame(Group,x)
centroids<-aggregate(cbind(x)~Group,df,mean)
cen<-as.matrix(centroids)
qx<-as.numeric(estgroup$LD1)
inddist<-data.frame(qx)
indie<-as.matrix(inddist)
eucdist<-fields::rdist(cen[,2], indie)
grouplev<-data.frame(model_group$lev)
eucdist1<-cbind(grouplev, round(eucdist, digits=3))
colnames(eucdist1)<-c("Group", "Dist.")
ldaplot<-ggplot2::ggplot(data=df, aes(x=x, fill=Group)) + geom_histogram() + labs(x="Discriminant Function Score", y="Count") + geom_vline(aes(xintercept=qx))
}
)
return(list(model_group, estgroup,groupprob, p, ct, cm, ldaplot, qx, qy, pp, df1v, df2v, classmat, percenttab, tcc, ppv, eucdist1, model_group1))
})
classmatrix <- eventReactive(input$evaluate, {
fit<-lda_mod()[[1]]
ctab<-lda_mod()[[5]]
n<-as.matrix(fit$counts)
colnames(n)<-c("n")
nclassmat<-cbind(n, ctab)
classperc<-(100*round(prop.table(ctab,1), digits=3))
right<-sum(diag(ctab))
of<-sum(ctab)
totalcorrect<-100*(round(sum(diag(prop.table(ctab))), digits=3))
return(list(nclassmat, classperc, right, of, totalcorrect))
})
tps <- eventReactive(input$evaluate, {
tdat<-refsamp()
sub<-na.omit(tdat)
ngroups<-nlevels(tdat$Group)
g<-sub$Group
g<-as.vector(g)
fit<-MASS::lda(Group ~., data=sub, prior= rep(1, ngroups)/ngroups)
p<-predict(fit, sub)
ref<-as.matrix(p$x[,1], p$x[,2])
ind<-elements()
est<-predict(fit, ind)
pred<-as.matrix(est$x[,1], est$x[,2])
typClass<-typprobClass(pred, ref, groups = g, method="chisquare", cv=TRUE, sep=T, robust="mcd")
tp<-as.data.frame(round(typClass$probs, digits=3))
return(list(tp))
})
elemnames<-eventReactive(input$evaluate, {
enames<-elements()
return(list(enames))
})
Tech <-reactive({
input$tech
})
Case <-reactive({
input$case
})
TECH<-eventReactive(input$evaluate,{
techie<-Tech()
return(list(techie))
})
CASE<-eventReactive(input$evaluate,{
case1<-Case()
return(list(case1))
})
## summary statistics by group
datasummary<-eventReactive(input$evaluate, {
gd<-refsamp()
gd<-na.omit(gd)
groupsummary<-psych::describeBy(gd, group='Group')
return(list(groupsummary))
})
## output group classification
output$lda_pred <- renderText({
if(is.null(lda_mod())) return()
a<-lda_mod()[[2]]
paste("Predicted Group =", a[,1])
})
##output for model summary
output$modsum <- renderPrint({
if(is.null(lda_mod())) return()
lda_mod()[[1]]
})
## output posterior probabilities
output$lda_prob <- renderPrint({
if(is.null(lda_mod())) return()
posteriors<- lda_mod()[[10]]
print(posteriors[order(-posteriors[1,])], row.names=FALSE)
})
## output typicality probabilities
output$typs <- renderPrint({
if(is.null(tps())) return()
typsy<- tps()[[1]]
print(typsy[order(-typsy[1,])], row.names=FALSE)
})
##output distance from centroids
output$cendist<-renderPrint({
if(is.null(lda_mod())) return()
distcen<-lda_mod()[[17]]
print(distcen[order(distcen[,2]),], row.names=FALSE)
})
## output confusion matrix
output$confusionm<-renderPrint({
if(is.null(classmatrix())) return()
classmatrix()[[1]]
})
## output percent confusion matrix
output$confusionm1<-renderPrint({
if(is.null(classmatrix())) return()
classmatrix()[[2]]
})
## output total correct classification
output$confusionm2<-renderText({
if(is.null(classmatrix())) return()
paste(classmatrix()[[3]], "out of", classmatrix()[[4]], "=", classmatrix()[[5]],"%", "Total Correct Classification Cross-validated")
})
## output positive predictive value
output$confusionm3<-renderPrint({
if(is.null(lda_mod())) return()
pospred<-lda_mod()[[16]]
round(pospred, digits=3)
})
## output summary statistics
output$summarystat<-renderPrint({
if(is.null(datasummary())) return()
datasummary()[[1]]
})
#scatterplot output
output$ldaplot<- renderPlot({
if(is.null(lda_mod())) return()
lda_mod()[[7]]
})
# New data LD scores
output$number1 <- renderText({
if(is.null(lda_mod())) return()
ld1<-lda_mod()[[8]]
ldv1<-lda_mod()[[11]]
paste("Classified Individual's DF1 Score = ", round(ld1, digits=3), "Variation Accounted For in DF1:", round((ldv1*100), digits=2),"%")
})
# New data LD scores
output$number2 <- renderText({
if(is.null(lda_mod())) return()
ld2<-lda_mod()[[9]]
ldv2<-lda_mod()[[12]]
paste("Classified Individual's DF2 Score = ", round(ld2, digits=3), "Variation Accounted For in DF2:", round((ldv2*100), digits=2),"%")
})
## output model specs
output$modelspec<-renderPrint({
if(is.null(lda_mod())) return()
lda_mod()[[1]]
})
##case number
output$casenum<- renderPrint({
cake<-CASE()[[1]]
print(as.name(cake), row.names=FALSE)
})
##case analyst
output$analyst<- renderPrint({
tach<-TECH()[[1]]
print(as.name(tach), row.names=FALSE)
})
##output Title and Date
output$title<-renderPrint({
today<-Sys.Date()
cat(sprintf('Sex and Ancestry Estimation Report %s\n', today))
})
##output variables and measures for case
output$elnamez<-renderPrint({
e<-elemnames()[[1]]
print(e, row.names=FALSE)
})
## output confusion matrix print
output$confusionmp<-renderPrint({
if(is.null(classmatrix())) return()
classmatrix()[[1]]
})
## output total correct classificationprint
output$confusionm2p<-renderText({
if(is.null(classmatrix())) return()
paste(classmatrix()[[3]], "out of", classmatrix()[[4]], "=", classmatrix()[[5]], "%", "Total Correct Classification Cross-validated")
})
## output percent confusion matrix print
output$confusionm1p<-renderPrint({
if(is.null(classmatrix())) return()
classmatrix()[[2]]
})
## output posterior probabilities print
output$lda_probp <- renderPrint({
if(is.null(lda_mod())) return()
posteriors1<-lda_mod()[[10]]
print(posteriors1[order(-posteriors1[1,])], row.names=FALSE)
})
## output typicality probabilities print
output$typsp <- renderPrint({
if(is.null(tps())) return()
typsy1<- tps()[[1]]
print(typsy1[order(-typsy1[1,])], row.names=FALSE)
})
#scatterplot output print
output$ldaplotp<- renderPlot({
if(is.null(lda_mod())) return()
lda_mod()[[7]]
})
## output group classification
output$ldapredp <- renderText({
if(is.null(lda_mod())) return()
a<-lda_mod()[[2]]
paste("Predicted Group =", a[,1])
})
})
I have tried changing where I put the switch function in the lda_mod but the same issue keeps coming up. Any suggestions would be much appreciated.

Figured it out. When calling on the output value for positive predictive value in confusionMatrix, the subset [,3] worked in the multigroup case and for the 2 group case, it would only accept [3].

Related

Statistics with one value per sample

I would like to run a statistic test across samples, but each sample has one measurement only.
My data frame is the following:
structure(list(Value = c(1.04, 1.48, 0.3, 0.5, 0.66, 0.99, 0.65,
0.62), Samples = c("S1", "S2", "S3", "S4", "S5", "S6", "S7",
"S8"), Concentration = c(20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L
)), class = "data.frame", row.names = c(NA, -8L))
Here are the codes I ran:
library(ggplot2)
library(dplyr)
library(combinat)
Data = read.csv("Stackoverflow_2023-01-29.csv", header = TRUE
p = ggbarplot
(Data, x = "Samples", y = "Value",color = "Samples", fill = "Samples")
p
new_list <- list()
new_list
x<- unique(Data$Samples)
x
m<- combn(x, 2)
m
for(i in 1:ncol(m)){
new_list[[i]] <- m[,i]
}
new_list
my_comparison <- new_list
my_comparison
p1<- p+ stat_compare_means(comparisons = my_comparison)
p1` [enter image description here][1]
The plot that I obtained is attached ad jpg
enter image description here
I would appreciate any help with the statistics. Many thanks.

How to load output data from a print function into a pandas data frame?

I have a function that provides me with output values. However, I want those values to be displayed in a pandas data frame.
Function:
def print_response(response):
for report in response.get('reports', []):
columnHeader = report.get('columnHeader', {})
dimensionHeaders = columnHeader.get('dimensions', [])
metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
rows = report.get('data', {}).get('rows', [])
for row in rows:
dimensions = row.get('dimensions', [])
dateRangeValues = row.get('metrics', [])
for header, dimension in zip(dimensionHeaders, dimensions):
print ( header + ': ' + dimension )
for i, values in enumerate(dateRangeValues):
print ('Date range (' + str(i) + ')' )
for metricHeader, value in zip(metricHeaders, values.get('values')):
print ( metricHeader.get('name') + ': ' + value )
When running the Code, it gives me this Output (I posted only 1 row):
Date: 20180430
Customers: 257917
I want this data a pandas data frame though.
PLEASE HELP, I AM NEW TO THIS!

iterative string substitution

I have a string and I want to add swap in a substring every 3 characters and generate 1 output for each iteration: for example
print(str1)
DDDDDDDDDDDDDDDDDDDDDDDD
print(str2)
nnn
the output should look like this
nnnDDDDDDDDDDDDDDDDDDDDDD
DDDnnnDDDDDDDDDDDDDDDDDDD
DDDDDDnnnDDDDDDDDDDDDDDDD
DDDDDDDDDnnnDDDDDDDDDDDDD
DDDDDDDDDDDDnnnDDDDDDDDDD
DDDDDDDDDDDDDDDDnnnDDDDDD
DDDDDDDDDDDDDDDDDDDnnnDDD
DDDDDDDDDDDDDDDDDDDDDDnnn
any ideas?
positions <- seq(1, nchar(str1), by=nchar(str2))
split.str1 <- substring(str1, positions, positions+nchar(str2)-1)
result <- sapply(seq(nchar(str1)/nchar(str2))
, function(i) {
split.str1[i] <- str2
paste(split.str1, collapse='')
})
result
## [1] "nnnDDDDDDDDDDDDDDDDDDDDD" "DDDnnnDDDDDDDDDDDDDDDDDD" "DDDDDDnnnDDDDDDDDDDDDDDD" "DDDDDDDDDnnnDDDDDDDDDDDD"
## [5] "DDDDDDDDDDDDnnnDDDDDDDDD" "DDDDDDDDDDDDDDDnnnDDDDDD" "DDDDDDDDDDDDDDDDDDnnnDDD" "DDDDDDDDDDDDDDDDDDDDDnnn"
Thanks #BondedDust for the matrix suggestion for presentation:
as.matrix(result)
## [,1]
## [1,] "nnnDDDDDDDDDDDDDDDDDDDDD"
## [2,] "DDDnnnDDDDDDDDDDDDDDDDDD"
## [3,] "DDDDDDnnnDDDDDDDDDDDDDDD"
## [4,] "DDDDDDDDDnnnDDDDDDDDDDDD"
## [5,] "DDDDDDDDDDDDnnnDDDDDDDDD"
## [6,] "DDDDDDDDDDDDDDDnnnDDDDDD"
## [7,] "DDDDDDDDDDDDDDDDDDnnnDDD"
## [8,] "DDDDDDDDDDDDDDDDDDDDDnnn"

How to overlay a line for an lm object on a ggplot2 scatterplot

I have some data,
calvarbyruno.1<-structure(list(Nominal = c(1, 3, 6, 10, 30, 50, 150, 250), Run = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1", "2", "3"), class = "factor"),
PAR = c(1.25000000000000e-05, 0.000960333333333333, 0.00205833333333334,
0.00423333333333333, 0.0322333333333334, 0.614433333333334,
1.24333333333333, 1.86333333333333), PredLin = c(-0.0119152187070942,
0.00375925114245899, 0.0272709559167888, 0.0586198956158952,
0.215364594111427, 0.372109292606959, 1.15583278508462, 1.93955627756228
), PredQuad = c(-0.0615895732702735, -0.0501563307416599,
-0.0330831368244257, -0.0104619953693943, 0.100190275883806,
0.20675348710041, 0.6782336426345, 1.04748729725370)), .Names = c("Nominal",
"Run", "PAR", "PredLin", "PredQuad"), row.names = c(NA, 8L), class = "data.frame")
calweight <- -2
for which I've created both a linear and a quadratic lm model
callin.1<-lm(PAR~Nominal,data=calvarbyruno.1,weight=Nominal^calweight)
calquad.1<-lm(PAR~Nominal+I(Nominal^2),data=calvarbyruno.1,weight=Nominal^calweight)
I can then plot my data values using ggplot2
qplot(PAR,Nominal,data=calvarbyruno.1)
But can't work out how to overlay a line representing the two lm objects... Any ideas ?
The easiest option is to use geom_smooth() and let ggplot2 fit the model for you.
ggplot(calvarbyruno.1, aes(y = PAR, x = Nominal, weight=Nominal^calweight)) +
geom_smooth(method = "lm") +
geom_smooth(method = "lm", formula = y ~ poly(x, 2), colour = "red") +
geom_point() +
coord_flip()
Or you can create a new dataset with the predicted values.
newdata <- data.frame(Nominal = pretty(calvarbyruno.1$Nominal, 100))
newdata$Linear <- predict(callin.1, newdata = newdata)
newdata$Quadratic <- predict(calquad.1, newdata = newdata)
require(reshape2)
newdata <- melt(newdata, id.vars = "Nominal", variable.name = "Model")
ggplot(calvarbyruno.1, aes(x = PAR, y = Nominal, weight=Nominal^calweight)) +
geom_line(data = newdata, aes(x = value, colour = Model)) +
geom_point()
Earlier I asked a related question and Hadley had this good answer. Using the predict function from that post you can add two columns to your data. One for each model:
calvarbyruno.1$calQuad <- predict(calquad.1)
calvarbyruno.1$callin <- predict(callin.1)
Then it's a matter of plotting the point and adding each model in as a line:
ggplot() +
geom_point(data=calvarbyruno.1, aes(PAR, Nominal), colour="green") +
geom_line(data=calvarbyruno.1, aes(calQuad, Nominal), colour="red" ) +
geom_line(data=calvarbyruno.1, aes(callin, Nominal), colour="blue" ) +
opts(aspect.ratio = 1)
And that results in this nice picture (yeah the colors could use some work):
(source: cerebralmastication.com)

How to create an "inkblot" chart with R?

How can I create a chart like
http://junkcharts.typepad.com/junk_charts/2010/01/leaving-ink-traces.html
where several time series (one per country) are displayed horizontally as symmetric areas?
I think if I could display one time series in this way, it is easy to generalize to several using mfrow.
Sample data:
#Solar energy production in Europe, by country (EC),(1 000 toe)
Country,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007
Belgium,1,1,1,1,1,1,2,2,3,3,3,5
Bulgaria,-,-,-,-,-,-,-,-,-,-,-,-
Czech Republic,0,0,0,0,0,0,0,0,2,2,3,4
Denmark,6,7,7,8,8,8,9,9,9,10,10,11
Germany (including ex-GDR from 1991),57,70,83,78,96,150,184,216,262,353,472,580
Estonia,-,-,-,-,-,-,-,-,-,-,-,-
Ireland,0,0,0,0,0,0,0,0,0,0,1,1
Greece,86,89,93,97,99,100,99,99,101,102,109,160
Spain,26,23,26,29,33,38,43,48,58,65,83,137
France,15,16,17,18,26,19,19,18,19,22,29,37
Italy,8,9,11,11,12,14,16,18,21,30,38,56
Cyprus,32,33,34,35,35,34,35,36,40,41,43,54
Latvia,-,-,-,-,-,-,-,-,-,-,-,-
Lithuania,-,-,-,-,-,-,-,-,-,-,-,-
Luxembourg (Grand-Duché),0,0,0,0,0,0,0,0,1,2,2,2
Hungary,0,0,0,0,0,1,2,2,2,2,2,3
Netherlands,6,7,8,10,12,14,16,19,20,22,22,23
Austria,42,48,55,58,64,69,74,80,86,92,101,108
Poland,0,0,0,0,0,0,0,0,0,0,0,0
Portugal,16,16,17,18,18,19,20,21,21,23,24,28
Romania,0,0,0,0,0,0,0,0,0,0,0,0
Slovenia,-,-,-,-,-,-,-,-,-,-,-,-
Slovakia,0,0,0,0,0,0,0,0,0,0,0,0
Finland,0,0,0,0,1,1,1,1,1,1,1,1
Sweden,4,4,5,5,5,6,4,5,5,6,6,9
United Kingdom,6,6,7,7,11,13,16,20,25,30,37,46
Croatia,0,0,0,0,0,0,0,0,0,0,0,1
Turkey,159,179,210,236,262,287,318,350,375,385,402,420
Iceland,-,-,-,-,-,-,-,-,-,-,-,-
Norway,0,0,0,0,0,0,0,0,0,0,0,0
Switzerland,18,19,21,23,24,26,23,24,25,26,28,30
#-='Not applicable' or 'Real zero' or 'Zero by default' :=Not available "
#Source of Data:,Eurostat, http://spreadsheets.google.com/ccc?key=0Agol553XfuDZdFpCQU1CUVdPZ3M0djJBSE1za1NGV0E&hl=en_GB
#Last Update:,30.04.2009
#Date of extraction:,17 Aug 2009 07:41:12 GMT, http://epp.eurostat.ec.europa.eu/tgm/table.do?tab=table&init=1&plugin=1&language=en&pcode=ten00082
You can use polygon in base graphics, for instance
x <- seq(as.POSIXct("1949-01-01", tz="GMT"), length=36, by="months")
y <- rnorm(length(x))
plot(x, y, type="n", ylim=c(-1,1)*max(abs(y)))
polygon(c(x, rev(x)), c(y, -rev(y)), col="cornflowerblue", border=NA)
Update: Using panel.polygon from lattice:
library("lattice")
library("RColorBrewer")
df <- data.frame(x=rep(x,3),
y=rnorm(3*length(x)),
variable=gl(3, length(x)))
p <- xyplot(y~x|variable, data=df,
ylim=c(-1,1)*max(abs(y)),
layout=c(1,3),
fill=brewer.pal(3, "Pastel2"),
panel=function(...) {
args <- list(...)
print(args)
panel.polygon(c(args$x, rev(args$x)),
c(args$y, -rev(args$y)),
fill=args$fill[panel.number()],
border=NA)
})
print(p)
With a little polishing, this ggplot solution will look like what you want:
alt text http://www.imagechicken.com/uploads/1264790429056858700.png
Here's how to make it from your data:
require(ggplot2)
First, let's take your input data and import and restructure it into a form ggplot likes:
rdata = read.csv("data.csv",
# options: load '-' as na, ignore first comment line #Solar,
# strip whitespace that ends line, accept numbers as col headings
na.strings="-", skip=1, strip.white=T, check.names=F)
# Convert to long format and check years are numeric
data = melt(rdata)
data = transform(data,year=as.numeric(as.character(variable)))
# geom_ribbon hates NAs.
data = data[!is.na(data$value),]
> summary(data)
Country variable value year
Austria : 12 1996 : 25 Min. : 0.00 Min. :1996
Belgium : 12 1997 : 25 1st Qu.: 0.00 1st Qu.:1999
Croatia : 12 1998 : 25 Median : 7.00 Median :2002
Cyprus : 12 1999 : 25 Mean : 36.73 Mean :2002
Czech Republic: 12 2000 : 25 3rd Qu.: 30.00 3rd Qu.:2004
Denmark : 12 2001 : 25 Max. :580.00 Max. :2007
(Other) :228 (Other):150
Now let's plot it:
ggplot(data=data, aes(fill=Country)) +
facet_grid(Country~.,space="free", scales="free_y") +
opts(legend.position="none") +
geom_ribbon(aes(x=year,ymin=-value,ymax=+value))
Using rcs' first approach, here a solution for the sample data with base graphics:
rawData <- read.csv("solar.csv", na.strings="-")
data <- ts(t(as.matrix(rawData[,2:13])), names=rawData[,1], start=1996)
inkblot <- function(series, col=NULL, min.height=40, col.value=24, col.category=17, ...) {
# assumes non-negative values
# assumes that series is multivariate series
# assumes that series names are set, i.e. colnames(series) != NULL
x <- as.vector(time(series))
if(length(col)==0){
col <- rainbow(dim(series)[2])
}
ytotal <- 0
for(category in colnames(series)) {
y <- series[, category]
y <- y[!is.na(y)]
ytotal <- ytotal + max(y, min.height)
}
oldpar = par(no.readonly = TRUE)
par(mar=c(2,3,0,10)+0.1, cex=0.7)
plot(x, 1:length(x), type="n", ylim=c(0,1)*ytotal, yaxt="n", xaxt="n", bty="n", ylab="", xlab="", ...)
axis(side=1, at=x)
catNumber <- 1
offset <- 0
for(category in rev(colnames(series))) {
print(paste("working on: ", category))
y <- 0.5 * as.vector(series[,category])
offset <- offset + max(max(abs(y[!is.na(y)])), 0.5*min.height)
print(paste("offset= ", str(offset)))
polygon(c(x, rev(x)), c(offset+y, offset-rev(y)), col=col[catNumber], border=NA)
mtext(text=y[1], side=2, at=offset, las=2, cex=0.7, col=col.value)
mtext(text=y[length(y)], side=4, line=-1, at=offset, las=2, cex=0.7, col=col.value)
mtext(text=category, side=4, line=2, at=offset, las=2, cex=0.7, col=col.category)
offset <- offset + max(max(abs(y[!is.na(y)])), 0.5*min.height)
catNumber <- catNumber + 1
}
}
inkblot(data)
I still need to figure out the vertical grid lines and the transparent coloring.
Late to this game, but I created a stacked "blot" chart using ggplot2 and another set of data. This uses geom_polygon after the data has been smoothed out.
# data: Masaaki Ishida (luna#pos.to)
# http://luna.pos.to/whale/sta.html
head(blue, 2)
## Season Norway U.K. Japan Panama Denmark Germany U.S.A. Netherlands
## ## [1,] 1931 0 6050 0 0 0 0 0 0
## ## [2,] 1932 10128 8496 0 0 0 0 0 0
## ## U.S.S.R. South.Africa TOTAL
## ## [1,] 0 0 6050
## ## [2,] 0 0 18624
hourglass.plot <- function(df) {
stack.df <- df[,-1]
stack.df <- stack.df[,sort(colnames(stack.df))]
stack.df <- apply(stack.df, 1, cumsum)
stack.df <- apply(stack.df, 1, function(x) sapply(x, cumsum))
stack.df <- t(apply(stack.df, 1, function(x) x - mean(x)))
# use this for actual data
## coords.df <- data.frame(x = rep(c(df[,1], rev(df[,1])), times = dim(stack.df)[2]), y = c(apply(stack.df, 1, min), as.numeric(apply(stack.df, 2, function(x) c(rev(x),x)))[1:(length(df[,1])*length(colnames(stack.df))*2-length(df[,1]))]), id = rep(colnames(stack.df), each = 2*length(df[,1])))
## qplot(x = x, y = y, data = coords.df, geom = "polygon", color = I("white"), fill = id)
# use this for smoothed data
density.df <- apply(stack.df, 2, function(x) spline(x = df[,1], y = x))
id.df <- sort(rep(colnames(stack.df), each = as.numeric(lapply(density.df, function(x) length(x$x)))))
density.df <- do.call("rbind", lapply(density.df, as.data.frame))
density.df <- data.frame(density.df, id = id.df)
smooth.df <- data.frame(x = unlist(tapply(density.df$x, density.df$id, function(x) c(x, rev(x)))), y = c(apply(unstack(density.df[,2:3]), 1, min), unlist(tapply(density.df$y, density.df$id, function(x) c(rev(x), x)))[1:(table(density.df$id)[1]+2*max(cumsum(table(density.df$id))[-dim(stack.df)[2]]))]), id = rep(names(table(density.df$id)), each = 2*table(density.df$id)))
qplot(x = x, y = y, data = smooth.df, geom = "polygon", color = I("white"), fill = id)
}
hourglass.plot(blue[,-12]) + opts(title = c("Blue Whale Catch"))
alt text http://probabilitynotes.files.wordpress.com/2010/06/bluewhalecatch.png

Resources