I am trying to use the following code (adapted from the code given in Gelman and Hill's Book) to estimate a varying coefficient/intercept ordered probit model in Jags. However, it is giving me a "Observed node inconsistent with unobserved parents at initialization.Try setting appropriate initial values". Where am I going wrong? Could somebody please help me? Thanks in advance !!
rm(list=ls(all=TRUE));
options(warn=-1)
library(mvtnorm)
library(arm)
library(foreign)
library("R2jags")
library(MCMCpack)
set.seed(1)
standardizeCols = function( dataMat ) {
zDataMat = dataMat
for ( colIdx in 1:NCOL( dataMat ) ) {
mCol = mean( dataMat[,colIdx] )
sdCol = sd( dataMat[,colIdx] )
zDataMat[,colIdx] = ( dataMat[,colIdx] - mCol ) / sdCol
}
return( zDataMat )
}
keep<-1
nobs = 150;
nis<-sample(1:40,nobs,replace=T) # number obs per subject
id<-rep(1:nobs,nis)
N<-length(id)
corr_beta = 0.6;
Sigma_beta = matrix(c(1, corr_beta, corr_beta, corr_beta,
corr_beta, 1, corr_beta, corr_beta,
corr_beta, corr_beta, 1, corr_beta,
corr_beta, corr_beta, corr_beta, 1), ncol=4);
betas <- rmvnorm(n=N, mean=c(-1.45, 0.90, 0.25, -2.3), sigma=Sigma_beta);
#Generate the data
x3 = matrix(0, nrow=N,ncol=3);
y3 = matrix(0, nrow=N,ncol=1);
for (i in 1:N) {
error_v = rnorm(1,0,1);
x3[i,1] = rnorm(1,0,1);
x3[i,2] = rnorm(1,0,1);
x3[i,3] = rnorm(1,0,1);
y3[i,1] = betas[id[i], 1] + betas[id[i], 2]*x3[i,1] + betas[id[i], 3]*x3[i,2] + betas[id[i], 4]*x3[i,3] + error_v;
}
cutoff=c(-100, 0, 1.5, 2.4, 100)
k=length(cutoff)-1;
Y3<-cut(y3, br = cutoff, right=TRUE, include.lowest = TRUE, labels = FALSE)
Y3=Y3
X3=x3
m1=max(Y3)
y = as.vector( Y3 )
n = length(y)
J<-length(unique(id))
X = cbind(1, standardizeCols( X3 ))
nPred = NCOL(X)
subjects<-as.vector(as.numeric(id))
K=nPred
W <- diag (K)
# MCMC settings
ni <- 5000; nb <- 2500; nt <- 6; nc <- 3
tau1u=c(0,1,2)
jags_data <- list ("n", "J", "K", "y", "subjects", "X", "W", "m1")
inits <- function (){
list (B.raw=array(rnorm(J*K),c(J,K)), mu.raw=rnorm(K), sigma.y=runif(1), Tau.B.raw=rwish(K+1,diag(K)), xi=runif(K))
}
params <- c ("B", "mu", "sigma.B", "rho.B", "tau1u")
cat("model {
for (i in 1:n){
y.hat[i] <- inprod(B[subjects[i],],X[i,])
y[i] ~ dcat(p[i,])
estar[i]~dnorm (y.hat[i], tau.y);
for (j in 1:(m1-1)) {
Q1[i,j]<-pnorm(tau1[j]-estar[i],0,1)
}
p[i,1] <- Q1[i,1]
for(j in 2:(m1-1)) {
p[i,j] <- Q1[i,j] - Q1[i,j-1]
}
p[i,m1] <- 1 - Q1[i,m1-1]
}
tau.y <- pow(sigma.y, -2)
sigma.y ~ dunif (0, 100)
# thresholds (unordered priors)
for(j in 1:(m1-1)){
tau1u[j] ~ dnorm(0,.01)
}
# ordered thresholds
tau1 <- sort(tau1u)
for (j in 1:J){
for (k in 1:K){
B[j,k] <- xi[k]*B.raw[j,k]
}
B.raw[j,1:K] ~ dmnorm (mu.raw[], Tau.B.raw[,])
}
for (k in 1:K){
mu[k] <- xi[k]*mu.raw[k]
mu.raw[k] ~ dnorm (0, .0001)
xi[k] ~ dunif (0, 100)
}
Tau.B.raw[1:K,1:K] ~ dwish (W[,], df)
df <- K+1
Sigma.B.raw[1:K,1:K] <- inverse(Tau.B.raw[,])
for (k in 1:K){
for (k.prime in 1:K){
rho.B[k,k.prime] <- Sigma.B.raw[k,k.prime]/sqrt(Sigma.B.raw[k,k]*Sigma.B.raw[k.prime,k.prime])
}
sigma.B[k] <- abs(xi[k])*sqrt(Sigma.B.raw[k,k])
}
}", fill=TRUE, file="wishart2.txt")
# Start Gibbs sampler
outj <- jags(jags_data, inits=inits, parameters.to.save=params, model.file="wishart2.txt", n.thin=nt, n.chains=nc, n.burnin=nb, n.iter=ni)
Your initial values function returns random numbers from normal and uniform distributions, which it appears are not close enough to sensible values to allow a non-0 posterior value to be calculated. I think you need to choose your initial values more carefully, and perhaps based on the values generated in the data, to ensure that the model compiles. Do Gelman and Hill give initial values for their model that you could start with?
Update: you could also try removing your 'inits=inits' argument to allow JAGS to select its own initial values, which works for most (although not all) models. I dont use R2JAGS though so I'm not sure if this is allowed for the jags function (but it is for rjags and runjags).
Related
It appears that for larger nnet::multinom multinomial regression models (with a few thousand coefficients), calculating the Hessian (the matrix of second derivatives of the negative log likelihood, also known as the observed Fisher information matrix) becomes super slow, which then prevents me from calculating the variance-covariance matrix & allowing me to calculate confidence intervals on model predictions.
It seems the culprit is the following pure R function - it seems it uses some code to calculate the Fisher information matrix analytically using code contributed by David Firth :
https://github.com/cran/nnet/blob/master/R/vcovmultinom.R
multinomHess = function (object, Z = model.matrix(object))
{
probs <- object$fitted
coefs <- coef(object)
if (is.vector(coefs)) {
coefs <- t(as.matrix(coefs))
probs <- cbind(1 - probs, probs)
}
coefdim <- dim(coefs)
p <- coefdim[2L]
k <- coefdim[1L]
ncoefs <- k * p
kpees <- rep(p, k)
n <- dim(Z)[1L]
## Now compute the observed (= expected, in this case) information,
## e.g. as in T Amemiya "Advanced Econometrics" (1985) pp 295-6.
## Here i and j are as in Amemiya, and x, xbar are vectors
## specific to (i,j) and to i respectively.
info <- matrix(0, ncoefs, ncoefs)
Names <- dimnames(coefs)
if (is.null(Names[[1L]]))
Names <- Names[[2L]]
else Names <- as.vector(outer(Names[[2L]], Names[[1L]], function(name2,
name1) paste(name1, name2, sep = ":")))
dimnames(info) <- list(Names, Names)
x0 <- matrix(0, p, k + 1L)
row.totals <- object$weights
for (i in seq_len(n)) {
Zi <- Z[i, ]
xbar <- rep(Zi, times=k) * rep(probs[i, -1, drop=FALSE], times=kpees)
for (j in seq_len(k + 1)) {
x <- x0
x[, j] <- Zi
x <- x[, -1, drop = FALSE]
x <- x - xbar
dim(x) <- c(1, ncoefs)
info <- info + (row.totals[i] * probs[i, j] * crossprod(x))
}
}
info
}
The info in the Advanced Econometrics book that is referenced states
From this explanation, we can see that the Hessian indeed is just given by the sum of a bunch of crossproducts. I also saw this and this in terms of derivation of how to calculate the Hessian matrix of a multinomial regression model, which may be even more elegant and efficient, as the Hessian is there calculated based on a sum of Kronecker products.
For a smallish nnet::multinom model (in which I am modelling the frequency of different SARS-CoV2 lineages through time) the provided function runs quickly :
library(nnet)
library(splines)
download.file("https://www.dropbox.com/s/gt0yennn2gkg3rd/smallmodel.RData?dl=1",
"smallmodel.RData",
method = "auto", mode="wb")
load("smallmodel.RData")
length(fit_multinom_small$lev) # k=12 outcome levels
dim(coef(fit_multinom_small)) # 11 x 3 = (k-1) x p = 33 coefs
system.time(hess <- nnet:::multinomHess(fit_multinom_small)) # 0.11s
dim(hess) # 33 33
but doing this for a large model takes more than 2 hours (even though the model itself fits in ca. 1 minute) (again modelling the frequency of different SARS-CoV2 lineages through time, but now across different continents / countries) :
download.file("https://www.dropbox.com/s/mpz08jj7fmubd68/bigmodel.RData?dl=1",
"bigmodel.RData",
method = "auto", mode="wb")
load("bigmodel.RData")
length(fit_global_multi_last3m$lev) # k=20 outcome levels
dim(coef(fit_global_multi_last3m)) # 19 x 229 = (k-1) x p = 4351 coefficients
system.time(hess <- nnet:::multinomHess(fit_global_multi_last3m)) # takes forever
I was now looking for ways to speed up the above function.
The obvious attempt could be to port it to Rcpp, but unfortunately I am not so experienced in this. Anybody any thoughts?
EDIT: From the info here and here, it appears that calculating the Hessian for a multinomial fit should just come down to calculating a sum of Kronecker products, which we can just do from R using efficient matrix algebra, but right now I am unsure how to include my total row counts fit$weights. Anybody any idea?
download.file("https://www.dropbox.com/s/gt0yennn2gkg3rd/smallmodel.RData?dl=1",
"smallmodel.RData",
method = "auto", mode="wb")
load("smallmodel.RData")
library(nnet)
length(fit_multinom_small$lev) # k=12 outcome levels
dim(coef(fit_multinom_small)) # 11 x 3 = (k-1) x p = 33 coefs
fit = fit_multinom_small
Z = model.matrix(fit)
P = fitted(fit)[, -1, drop=F]
k = ncol(P) # nr of outcome categories-1
p = ncol(Z) # nr of parameters
n = nrow(Z) # nr of observations
ncoefs = k*p
library(fastmatrix)
# Fisher information matrix
info <- matrix(0, ncoefs, ncoefs)
for (i in 1:n) { # sum over observations
info = info + kronecker.prod(diag(P[i,]) - tcrossprod(P[i,]), tcrossprod(Z[i,]))
}
Figured it out in the end & was able to calculate the observed Fisher information matrix using Kronecker products, as well as port that bit to Rcpp, using Armadillo classes (full disclosure: I made that Rcpp port just using OpenAI's code-davinci / Codex, https://openai.com/blog/openai-codex/, and surprisingly it worked straight out of the box - AI is getting better every day; parallelReduce could still be used to parallelize the accumulation I presume; the function was faster than an equivalent RcppEigen implementation I tried). The mistake I made was that the formula above was the observed Fisher information for a single observation, so I had to accumulate over observations & I also had to take into account my total row counts.
Rcpp function:
// RcppArmadillo utility function to calculate observed Fisher
// information matrix of multinomial fit, with
// probs=fitted probabilities (with 1st category/column dropped)
// Z = model matrix
// row_totals = row totals
// We do this using Kronecker products, as in
// https://ieeexplore.ieee.org/abstract/document/1424458
// B. Krishnapuram; L. Carin; M.A.T. Figueiredo; A.J. Hartemink
// Sparse multinomial logistic regression: fast algorithms and
// generalization bounds
// IEEE Transactions on Pattern Analysis and Machine
// Intelligence ( Volume: 27, Issue: 6, June 2005)
#include <RcppArmadillo.h>
using namespace arma;
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::export]]
arma::mat calc_infmatrix_RcppArma(arma::mat probs, arma::mat Z, arma::vec row_totals) {
int n = Z.n_rows;
int p = Z.n_cols;
int k = probs.n_cols;
int ncoefs = k * p;
arma::mat info = arma::zeros<arma::mat>(ncoefs, ncoefs);
arma::mat diag_probs;
arma::mat tcrossprod_probs;
arma::mat tcrossprod_Z;
arma::mat kronecker_prod;
for (int i = 0; i < n; i++) {
diag_probs = arma::diagmat(probs.row(i));
tcrossprod_probs = arma::trans(probs.row(i)) * probs.row(i);
tcrossprod_Z = (arma::trans(Z.row(i)) * Z.row(i)) * row_totals(i);
kronecker_prod = arma::kron(diag_probs - tcrossprod_probs, tcrossprod_Z);
info += kronecker_prod;
}
return info;
}
saved as "calc_infmatrix_arma.cpp".
library(Rcpp)
library(RcppArmadillo)
sourceCpp("calc_infmatrix_arma.cpp")
R wrapper function :
# Function to calculate Hessian / observed Fisher information
# matrix of nnet::multinom multinomial fit object
fastmultinomHess <- function(object, Z = model.matrix(object)) {
probs <- object$fitted # predicted probabilities, avoid napredict from fitted.default
coefs <- coef(object)
if (is.vector(coefs)){ # ie there are only 2 response categories
coefs <- t(as.matrix(coefs))
probs <- cbind(1 - probs, probs)
}
coefdim <- dim(coefs)
p <- coefdim[2L] # nr of parameters
k <- coefdim[1L] # nr out outcome categories-1
ncoefs <- k * p # nr of coefficients
n <- dim(Z)[1L] # nr of observations
# Now compute the Hessian = the observed
# (= expected, in this case)
# Fisher information matrix
info <- calc_infmatrix_RcppArma(probs = probs[, -1, drop=F],
Z = Z,
row_totals = object$weights)
Names <- dimnames(coefs)
if (is.null(Names[[1L]])) Names <- Names[[2L]] else Names <- as.vector(outer(Names[[2L]], Names[[1L]],
function(name2, name1)
paste(name1, name2, sep = ":")))
dimnames(info) <- list(Names, Names)
return(info)
}
For my larger model this now calculates in 100s instead of >2 hours, so almost 80 times faster :
download.file("https://www.dropbox.com/s/mpz08jj7fmubd68/bigmodel.RData?dl=1",
"bigmodel.RData",
method = "auto", mode="wb")
load("bigmodel.RData")
object = fit_global_multi_last3m # large nnet::multinom fit
system.time(info <- fastmultinomHess(object, Z = model.matrix(object))) # 103s
system.time(info <- nnet:::multinomHess(object, Z = model.matrix(object))) # 8127s = 2.25h
A pure R version of the calc_infmatrix function (ca. 5x slower than the Rcpp function above) would be
# Utility function to calculate observed Fisher information matrix
# of multinomial fit, with
# probs=fitted probabilities (with 1st category/column dropped)
# Z = model matrix
# row_totals = row totals
calc_infmatrix = function(probs, Z, row_totals) {
require(fastmatrix) # for kronecker.prod Kronecker product function
n <- nrow(Z)
p <- ncol(Z)
k <- ncol(probs)
ncoefs <- k * p
info <- matrix(0, ncoefs, ncoefs)
for (i in 1:n) {
info <- info + kronecker.prod((diag(probs[i,]) - tcrossprod(probs[i,])), tcrossprod(Z[i,])*row_totals[i] )
}
return(info)
}
I have used the Equation of Motion (Newtons Law) for a simple spring and mass scenario incorporating it into the given 2nd ODE equation y" + (k/m)x = 0; y(0) = 3; y'(0) = 0.
Using the Euler method and the exact solution to solve the problem, I have been able to run and receive some ok results. However, when I execute a plot of the results I get this diagonal line across the oscillating results that I am after.
Current plot output with diagonal line
Can anyone help point out what is causing this issue, and how I can fix it please?
MY CODE:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from sympy import Function, dsolve, Eq, Derivative, sin, cos, symbols
from sympy.abc import x, i
import math
# Given is y" + (k/m)x = 0; y(0) = 3; y'(0) = 0
# Parameters
h = 0.01; #Step Size
t = 50.0; #Time(sec)
k = 1; #Spring Stiffness
m = 1; #Mass
x0 = 3;
v0 = 0;
# Exact Analytical Solution
x_exact = x0*cos(math.sqrt(k/m)*t);
v_exact = -x0*math.sqrt(k/m)*sin(math.sqrt(k/m)*t);
# Eulers Method
x = np.zeros( int( t/h ) );
v = np.zeros( int( t/h ) );
x[1] = x0;
v[1] = v0;
x_exact = np.zeros( int( t/h ) );
v_exact = np.zeros( int( t/h ) );
te = np.zeros( int( t/h ) );
x_exact[1] = x0;
v_exact[1] = v0;
#print(len(x));
for i in range(1, int(t/h) - 1): #MAIN LOOP
x[i+1] = x[i] + h*v[i];
v[i+1] = v[i] - h*k/m*x[i];
te[i] = i * h
x_exact[i] = x0*cos(math.sqrt(k/m)* te[i]);
v_exact[i] = -x0*math.sqrt(k/m)*sin(math.sqrt(k/m)* te[i]);
# print(x_exact[i], '\t'*2, x[i]);
#plot
%config InlineBackend.figure_format = 'svg'
plt.plot(te, x_exact, te ,v_exact)
plt.title("DISPLACEMENT")
plt.xlabel("Time (s)")
plt.ylabel("Displacement (m)")
plt.grid(linewidth=0.3)
An in some details more direct computation is
te = np.arange(0,t,h)
N = len(te)
w = (k/m)**0.5
x_exact = x0*np.cos(w*te);
v_exact = -x0*w*np.sin(w*te);
plt.plot(te, x_exact, te ,v_exact)
resulting in
Note that arrays in python start at the index zero,
x = np.empty(N)
v = np.empty(N)
x[0] = x0;
v[0] = v0;
for i in range(N - 1): #MAIN LOOP
x[i+1] = x[i] + h*v[i];
v[i+1] = v[i] - h*k/m*x[i];
plt.plot(te, x, te ,v)
then gives the plot
with the expected increasing amplitude.
I am fairly new learner to Rcpp, primarily needing it to speed up slow R code that is not easily parallelized because of dependencies within for loop iterations.
I wish to convert the following R code to C++ code to be directly used via Rcpp.
migrate_r <- function(pop) {
if (m != 0) {
if (model == "Step") {
for (i in 1:K) {
for (j in 1:K) {
for (k in 2:(K - 1)) {
i <- sample(perms, size = ceiling(perms * m/2), replace = FALSE)
j <- sample(perms, size = ceiling(perms * m/2), replace = FALSE)
tmp <- pop[i,, sample(k)]
pop[i,, sample(k)] <- pop[j,, sample(k)]
pop[j,, sample(k)] <- tmp
}
}
}
}
}
pop
}
My attempt is as follows:
// [[Rcpp::depends(RcppArmadillo)]]
#define ARMA_DONT_PRINT_OPENMP_WARNING
#include <RcppArmadillo.h>
#include <RcppArmadilloExtensions/sample.h>
#include <set>
using namespace Rcpp;
// [[Rcpp::export]]
arma::Cube<int> migrate_cpp(arma::Cube<int> pop) {
String model;
int i, j, k, K, perms, tmp;
double m;
if (m != 0) {
if (model == "Step") {
for (i = 0; i < K; i++) {
for (j = 0; j < K; j++) {
for(k = 1; k < (K - 1); k++) {
i = RcppArmadillo::sample(perms, ceil(perms * m / 2), false);
j = RcppArmadillo::sample(perms, ceil(perms * m / 2), false);
tmp = pop[i, RcppArmadillo::sample(k, K, true)];
pop[i, RcppArmadillo::sample(k, K, true)] = pop[j, RcppArmadillo::sample(k, K, true)];
pop[j, RcppArmadillo::sample(k, K, true)] = tmp;
}
}
}
}
}
return pop;
}
Essentially both functions swap random rows in an 3-dimensional array ('pop') via a temporary variable. The C++ code doesn't run.
I know I am close to getting the C++ code to work, which will result in massive speedup compared to the R for loop.
Is there something I am missing here? Any assistance is greatly appreciated and warmly welcomed.
A reproducible example
##### Load packages #####
library(Rcpp)
library(RcppArmadillo)
### Set parameters ###
K <- 2
N <- 6
Hstar <- 5
probs <- rep(1/Hstar, Hstar)
m <- 0.20
perms <- 2 # number of permutations
num.specs <- ceiling(N / K)
haps <- 1:Hstar
specs <- 1:num.specs
gen.perms <- function() {
sample(haps, size = num.specs, replace = TRUE, prob = probs)
}
pop <- array(dim = c(perms, num.specs, K))
for (i in 1:K) {
pop[,, i] <- replicate(perms, gen.perms())
}
pop
, , 1
[,1] [,2] [,3]
[1,] 3 5 1
[2,] 2 3 3
, , 2
[,1] [,2] [,3]
[1,] 2 5 3
[2,] 3 5 3
migrate_r(pop) # notice rows have been swapped between subarrays
, , 1
[,1] [,2] [,3]
[1,] 3 5 1
[2,] 2 5 3
, , 2
[,1] [,2] [,3]
[1,] 3 5 3
[2,] 2 3 3
Each time I run my JAGS model using the jags() function, I get very different values of fitted parameters. However, I want other people to reproduce my results.
I tried to add set.seed(123), but it didn't help. This link describes how to achieve my goal using the run.jags() function. I wonder how I can do similar things using jags(). Thank you!
Below is my model in R:
##------------- read data -------------##
m <- 6
l <- 3
node <- read.csv("answer.csv", header = F)
n <- nrow(node)
# values of nodes
## IG
IG <- c(c(0.0, 1.0, 0.0), c(0.0, 0.0, 1.0), c(1.0, 0.0, 0.0), c(1.0, 0.0, 0.0), c(0.0, 1.0, 0.0), c(0.0, 0.0, 1.0))
IG <- matrix(IG, nrow=6, ncol=3, byrow=T)
V_IG <- array(0, dim=c(n, m, l))
for (i in 1:n){
for (j in 1:m){
for (k in 1:l)
{
V_IG[i,j,k] <- IG[j,k] # alternatively, V[i,j,k] <- PTS[j,k]
}
}
}
## PTS
PTS <- c(c(1.0, 0.5, 0.0), c(1.0, 0.0, 0.5), c(1.0, 1.0, 0.0), c(1.0, 0.0, 1.0), c(0.0, 0.5, 1.0), c(0.0, 1.0, 0.5))
PTS <- matrix(PTS, nrow=m, ncol=3, byrow=T)
V_PTS <- array(0, dim=c(n, m, l))
for (i in 1:n){
for (j in 1:m){
for (k in 1:l)
{
V_PTS[i,j,k] <- PTS[j,k]
}
}
}
##------------- fit model -------------##
set.seed(123)
data <- list("n", "m", "V_IG", "V_PTS", "node")
myinits <- list(list(tau = rep(1,n), theta = rep(0.5,n)))
parameters <- c("tau", "theta")
samples <- jags(data, inits=myinits, parameters,
model.file ="model.txt", n.chains=1, n.iter=10000,
n.burnin=1, n.thin=1, DIC=T)
And my model file model.txt:
model{
# data: which node (1, 2, 3) was chosen by each child in each puzzle
for(i in 1:n) # for each child
{
for (j in 1:m) # for each problem
{
# node chosen
node[i,j] ~ dcat(mu[i,j,1:3])
mu[i,j,1:3] <- exp_v[i,j,1:3] / sum(exp_v[i,j,1:3])
for (k in 1:3) {
exp_v[i,j,k] <- exp((V_IG[i,j,k]*theta[i] + V_PTS[i,j,k]*(1-theta[i]))/tau[i])
}
}
}
# priors on tau and theta
for (i in 1:n)
{
tau[i] ~ dgamma(0.001,0.001)
theta[i] ~ dbeta(1,1)
}
}
I know this is an older question, but for anyone using the jagsUI package, the jags() function has an argument for setting the seed, 'seed = ####'. So for example, a JAGS call could be;
np.sim1 <- jags(data = data1, parameters.to.save = params1, model.file = "mod1_all.txt",
n.chains = nc, n.iter = ni, n.burnin = nb, n.thin = nt, seed = 4879)
summary(np.sim1)
Here is a toy example for linear regression. First the model:
model{
a0 ~ dnorm(0, 0.0001)
a1 ~ dnorm(0, 0.0001)
tau ~ dgamma(0.001,0.001)
for (i in 1:100) {
y[i] ~ dnorm(mu[i], tau)
mu[i] <- a0 + a1 * x[i]
}
}
Now we generate some data and you the set.seed function to generate identical results from multiple calls to the jags function.
# make the data and prepare what we need to fit the model
x <- rnorm(100)
y <- 1 + 1.2 * x + rnorm(100)
data <- list("x", "y")
parameters <- c("a0", "a1", "tau")
inits = list(list(a0 = 1, a1=0.5, tau = 1))
# First fit
set.seed(121)
samples <- jags(data, inits,
parameters,model.file = "./sov/lin_reg.R",
n.chains = 1, n.iter = 5000, n.burnin = 1, n.thin = 1)
# second fit
set.seed(121) # with set.seed at same value
samples2 <- jags(data, inits,
parameters,model.file = "./sov/lin_reg.R",
n.chains = 1, n.iter = 5000, n.burnin = 1, n.thin = 1)
If we pull out the draws for one of the parameters from samples and samples2 we can see that they have generated the same values.
a0_1 <- samples$BUGSoutput$sims.list$a0
a0_2 <- samples2$BUGSoutput$sims.list$a0
head(cbind(a0_1, a0_2))
[,1] [,2]
[1,] 1.0392019 1.0392019
[2,] 0.9155636 0.9155636
[3,] 0.9497509 0.9497509
[4,] 1.0706620 1.0706620
[5,] 0.9901852 0.9901852
[6,] 0.9307072 0.9307072
I am working on a Shiny App design and I wonder how to click a point on a dot-plot in Shiny R. I want to design an app that if the users select a point on the dot-plot, they can see the correspond table (3 columns ). The plot is a p-value distribution. The following is my code:
ui:
library(shiny)
library(shinythemes)
library(pander)
fluidPage(theme = shinytheme("flatly"),
titlePanel(h3("Chi-Squared Goodness-fit-Test and Simulation ")),
fluidRow(
column(3, offset = 0.5,wellPanel(
sliderInput("n", "Number of Samples:", min = 10, max = 1000, value = 50 ,
step = 1),
sliderInput("n2", "The number of Categories:", min = 1, max = 8, value = 5 ,
step = 1) ,
sliderInput("n3", "The number of Simulation:", min = 1, max = 1000, value = 5 ,
step = 1),
submitButton("Submit")
)),
column(7,align="center", tableOutput("values")),
column(5,offset=1, align="center",
plotOutput("plot1", click=" Click" ,width = 600, height = 430)
)
)
)
server:
library(shiny)
library(shinythemes)
library(pander)
function(input, output) {
output$plot1 <- renderPlot({
num_of_samples = input$n
nn= input$n2
ss= input$n3
pp=numeric(0)
for(i in 1:ss){
x <- sample(1:nn,num_of_samples,replace=T)
nulls=1/(1:nn)
total=table(x)
expected=nulls*total
a <- chisq.test(table(x), correct= FALSE, rescale.p=TRUE )
pp[i]=a$p.value
}
if (ss<=50) {stripchart(pp,method = "stack",offset = 0.8, at = .15, pch = 19,
main="P-value Distribution of Chi-Squared Test", xlab="P Value")}
else {hist(pp,breaks=5,main="P-value Distribution of Chi-Squared Test", xlab="P Value")}
})
sliderValues <- reactive({
num_of_samples = input$n
nn= input$n2
# pp=numeric(0)
x <- sample(1:nn,num_of_samples,replace=T)
# Compose data frame
xx=cbind(paste0(LETTERS[1:nn]),table(x ),round(rep(num_of_samples/nn,nn),2))
xx=as.data.frame(xx)
colnames(xx)=c("Categories","Observed Value","Expected Value")
xx
})
output$values <- renderTable({
sliderValues()},
align="c"
)
}