### In order to make the code work across both JAGS and Stan, we need
### to first convert Stan and JAGS output to coda output.  The file
### coda2.R has the necessary conversion routines.

## Also, we want to make sure that the code is safe in the sense that
## if it passed a NULL sample, it will do something sensible.


### Assume we are looking at the output of a MCMC (this is the
### permuted output, i.e., data from each chain is appended).

### Define the following constant/data
### J -- number of observations
### K -- number of components
### R -- (total) number of MCMC simulation (discarding warmup)

###
### Y[J] -- data for person i (Log pause times)
### pi[R,K] -- component probs for person i
### mu[R,K] -- component means for person i
### sigma[R,K] -- component sds for person i
### The function combineChains (in coda2) is useful to moving a
### mcmc.list object into this shape.

### Note that the funcion combineChains is applied to pi, mu and
### sigma, collapsing across chains if an mcmc.list object is input
### instead of a matrix.

### Assume for most applications K < J < R, so want to vectorize on R.

### Calculate likelihoods of observations.

## This function returns a RxJxK array of p(Y[j]|mu[r,k],sigma[r,k])*pi[r,k]
mcmixZPost <- function (Y,pi,mu,sigma) {
  J <- length(Y)
  R <- nrow(pi)
  K <- ncol(pi)
  post <- array(NA,c(R,J,K))
  for (j in 1:J) {
    for (k in 1:K) {
      post[,j,k] <- dnorm(Y[j],mu[,k],sigma[,k])*pi[,k]
    }
  }
  post
}

## The posterior predictive density
##  This is a R x J matrix p(Y[j]|pi[r,],mu[r,],sigma[r,])
mcmixPost <- function (Y,pi,mu,sigma) {
  J <- length(Y)
  R <- nrow(pi)
  K <- ncol(pi)
  post <- array(0,c(R,J))
  for (j in 1:J) {
    for (k in 1:K) {
      post[,j] <- post[,j] + dnorm(Y[j],mu[,k],sigma[,k])*pi[,k]
    }
  }
  post
}

### Assign observations to components
## This function returns a RxJxK array of p(Z[j]|Y[j],mu[r,k],sigma[r,k],pi[r,k])
mcmixZprob <- function (Y,pi,mu,sigma) {
  J <- length(Y)
  R <- nrow(pi)
  K <- ncol(pi)
  post <- array(NA,c(R,J,K))
  for (j in 1:J) {
    for (k in 1:K) {
      post[,j,k] <- dnorm(Y[j],mu[,k],sigma[,k])*pi[,k]
    }
    post[,j,] <- post[,j,]/apply(post[,j,],1,sum)
  }
  post
}

## Returns a RxJ matrix of values in 1:K assigning each observation to
## the modal choice
mcmixZmode <- function (Y,pi,mu,sigma) {
  J <- length(Y)
  R <- nrow(pi)
  K <- ncol(pi)
  mode <- as.data.frame(matrix(NA,R,J)) # Need data frame or loose
                                        # factorness of result.
  for (j in 1:J) {
    post <- array(NA,c(R,K))
    for (k in 1:K) {
      post[,k] <- dnorm(Y[j],mu[,k],sigma[,k])*pi[,k]
    }
    post <- post/apply(post,1,sum)
    mode[[j]] <- factor(apply(post,1,which.max),levels=1:K)
  }
  names(mode) <- paste("Obs",1:J,sep="")
  mode
}

## WAIC & DIC

mcmixWAIC <- function (Y,pi,mu,sigma) {
  ppd <- mcmixPost(Y,pi,mu,sigma)  # R x J matrix
  lppd <- sum(log(apply(ppd,2,mean)))
  pWAIC1 <- 2*sum(log(apply(ppd,2,mean))-apply(log(ppd),2,mean))
  pWAIC2 <- sum(apply(log(ppd),2,var))
  WAIC1 <- -2*(lppd-pWAIC1)
  WAIC2 <- -2*(lppd-pWAIC2)
  c(lppd=lppd, pWAIC1=pWAIC1, WAIC1=WAIC1, pWAIC2=pWAIC2, WAIC2=WAIC2)
}

mcmixDIC <- function (Y,pi,mu,sigma) {
  ppd <- mcmixPost(Y,pi,mu,sigma)  # R x J matrix
  sppd <- apply(log(ppd),1,sum) ## Sum across observations.
  pi.bayes <- matrix(apply(pi,2,mean),1)
  mu.bayes <- matrix(apply(mu,2,mean),1)
  sigma.bayes <- matrix(sqrt(apply(sigma^2,2,mean)),1) # Take average of
                                        # variance, not sd
  lppd.bayes <- sum(log(mcmixPost(Y,pi.bayes,mu.bayes,sigma.bayes)))
  pDIC <- 2*(lppd.bayes - mean(sppd))
  pDICalt <- 2*var(sppd)
  DIC <- -2*(lppd.bayes-pDIC)
  DICalt <- -2*(lppd.bayes-pDICalt)
  c(lppd=mean(sppd), lppd.bayes=lppd.bayes, pDIC=pDIC, DIC=DIC,
    pDICalt=pDICalt, DICalt=DICalt)
}

### Hierarchical model
### Y[I,J] -- data for person i (Log pause times)
### pi[R,I,K] -- component probs for person i
### mu[R,I,K] -- component means for person i
### sigma[R,I,K] -- component sds for person i
### The expression Karray(combineChains(mcmc),K) is useful for
### reshaping the data into this form.

mchmixWAIC <- function (Y,J,pi,mu,sigma) {
  pi <- combineChains(pi)
  mu <- combineChains(mu)
  sigma <- combineChains(sigma)
  lppd <- 0
  pWAIC1 <- 0
  pWAIC2 <- 0
  I <- nrow(Y)
  for (i in 1:I) {
    waic <- mcmixWAIC(Y[i,1:J[i]],pi[,i,],mu[,i,],sigma[,i,])
    lppd <- lppd+waic["lppd"]
    pWAIC1 <- pWAIC1 + waic["pWAIC1"]
    pWAIC2 <- pWAIC2 + waic["pWAIC1"]
  }
  WAIC1 <- -2*(lppd-pWAIC1)
  WAIC2 <- -2*(lppd-pWAIC2)
  result <- c(lppd=lppd, pWAIC1=pWAIC1, WAIC1=WAIC1, pWAIC2=pWAIC2,
              WAIC2=WAIC2)
  ## R drops names when I want them and preserves them when I don't Argh!
  names(result) <- c("lppd","pWAIC1","WAIC1","pWAIC2","WAIC2")
  result
}

## For DIC, we need to get total deviance before summarizing, so need
## to do the looping somewhat differently
mchmixDIC <- function (Y,J,pi,mu,sigma) {
  lppd <- 0
  lppd.bayes <- 0
  pDIC1 <- 0
  pDIC2 <- 0
  I <- nrow(Y)
  K <- dim(pi)[1]
  sppd <- rep(0,K)
  for (i in 1:I) {
    ppd <- mcmixPost(Y[i,1:J[i]],pi[,i,],mu[,i,],sigma[,i,])  # R x J matrix
    sppd <- sppd + apply(log(ppd),1,sum) ## Sum across observations.
    pi.bayes <- matrix(apply(pi[,i,],2,mean),1)
    mu.bayes <- matrix(apply(mu[,i,],2,mean),1)
    sigma.bayes <- matrix(sqrt(apply(sigma[,i,]^2,2,mean)),1)
                                        # Take average of
                                        # variance, not sd
    lppd.bayes <- lppd.bayes +
      sum(log(mcmixPost(Y[i,1:J[i]],pi.bayes,mu.bayes,sigma.bayes)))
  }
  pDIC <- 2*(lppd.bayes - mean(sppd))
  pDICalt <- 2*var(sppd)
  DIC <- -2*(lppd.bayes-pDIC)
  DICalt <- -2*(lppd.bayes-pDICalt)
  c(lppd=mean(sppd), lppd.bayes=lppd.bayes, pDIC=pDIC, DIC=DIC,
    pDICalt=pDICalt, DICalt=DICalt)
}