library("mixtools") #library("coda") ## This function generates the inits. ## Chain is the chain number ## Y is the data as a ragged array, Level 2 index is the first index. ## J is the number of Level 1 units for each Level 2 unit. ## resample -- logical flag, if true, data should be a sample ## frac -- fraction of the original data to use in the sample ## replace -- should the sample be with replacement (frac=1,replace=TRUE is a bootstrap). ## n.retry -- number of times to resample if EM does not converge. hmmInits<- function (chain,Y,J,resample=chain>1, frac=chain.frac[chain], replace=chain.boot & (chain >1), n.retry=3,decreasing=FALSE) { I <- nrow(Y) init.mu <- matrix(NA,I,K) init.tau <- matrix(NA,I,K) init.pi <- matrix(NA,I,K) for (n in 1:I) { dat <- Y[n,1:J[n]] ## We will retry the sampling when EM does not converge. We will ## give up if retry reaches 0 without convergence success <- FALSE retry <- n.retry while (!success && retry >0) { if (resample) { ## generate random starting points consistent with the observed ## data by running EM on a random subset of the data subset <- sample(dat,round(frac*length(dat)),replace=replace) } else { subset <- dat retry <- 0 } nm <- try(normalmixEM(subset,k=K)) if (class(nm) == "try-error") { retry <- retry -1 } else { success <- TRUE } } if (!success) { cat("Could not find an adequate sample for subject ",n,"chain ",d,"\n") next } ## Okay, successfully fit the model. ## change variabes into the parameterization we use. nm$pi <- nm$lambda nm$tau <- 1/nm$sigma^2 ## Identify the clusters ord <- order(nm[[level1.idparam]],decreasing=decreasing) init.pi[n,] <- nm$pi[ord] init.mu[n,] <- nm$mu[ord] init.tau[n,] <- nm$tau[ord] } pi0 <- apply(init.pi,2,mean,na.rm=TRUE) mu0 <- apply(init.mu,2,mean,na.rm=TRUE) beta0 <- apply(init.mu,2,sd,na.rm=TRUE) tau0 <- apply(log(init.tau),2,mean,na.rm=TRUE) gamma0 <- apply(log(init.tau),2,sd,na.rm=TRUE) ## Mean imputation for in any NAs because model did not fit. if (any (is.na(init.pi[1,]))) { init.pi[is.na(init.pi[1,]),] <- pi0 init.tau[is.na(init.pi[1,]),] <- exp(tau0) init.mu[is.na(init.pi[1,]),] <- mu0 } theta <- sweep(sweep(init.mu,2,mu0),2,beta0,"/") eta <- sweep(sweep(log(init.tau),2,tau0),2,gamma0,"/") list(pi=init.pi, ## mu=init.mu, ## Don't think we can ## initialize this as it is now a derived ## parameter ## tau=init.tau,## Same problem ## But we can initialize theta and eta theta=theta, eta=eta, alpha0=pi0, alphaN=I/2, mu0 = mu0, beta0 = beta0, tau0=tau0, gamma0=gamma0 ) } ### In order to make the code work across both JAGS and Stan, we need ### to first convert Stan and JAGS output to coda output. The file ### stancoda.R has the necessary conversion routines. ## Also, we want to make sure that the code is safe in the sense that ## if it passed a NULL sample, it will do something sensible. ### sortindex --- this function generates a set of indexes, one for ### each MCMC sample in each chain, that will be used to sort the ### other samples. ## samples -- an mcmc.list object containing the results for the idparam ## decreasing -- if true, sort in decreasing instead of increasing ## order. sortindex <- function(samples, decreasing=FALSE) { lapply(samples, function (theta) t(apply(theta,1,order,decreasing=decreasing)) ) } ## index is output of previous function. Might want to cache this if ## sorting both level1 and level2. level2sort <- function(samples,params=names(samples), index=sortindex(samples[[level2.idparam]],decreasing), verbose=TRUE) { if (length(index)==0) return(samples) #Sort failed for some reason result <- lapply(params, function (param) { if (verbose) cat("Labeling components for",param,"\n") mcmc <- samples[[param]] for (d in 1:length(mcmc)) { for (r in 1:nrow(mcmc[[d]])) { mcmc[[d]][r,] <- mcmc[[d]][r,index[[d]][r,]] } } ## varnames(mcmc) <- paste(param,"[",1:nvar(mcmc), ## "]",sep="") mcmc}) names(result) <- params result } level1sort <- function(samples,params=names(samples), index=sortindex(samples[[level2.idparam]],decreasing), K=K, verbose=TRUE) { if (length(index)==0) return(samples) #Sort failed for some reason result <- lapply(params, function (param) { cat("Labeling components for ",param,"\n") mclist <- samples[[param]] N <- nvar(mclist)/K for (d in 1:length(mclist)) { for (r in 1:nrow(mclist[[d]])) { ## The expression below is essentally this ## inner loop vectorized for speed ## for (n in 0:(N-1)) { ## mclist[[d]][r,n*K+1:K] <- ## mclist[[d]][r,n*K+index[[d]][r,]] ## } mclist[[d]][r,] <- mclist[[d]][r,rep(1:N,K)+ rep((index[[d]][r,]-1)*N,each=N)] } } ## Not sure if we need this, should not be taken ## care of in stan2coda or jags2coda ## varnames(mclist) <- paste(param,"[", ## rep(1:N,each=K),",", ## rep(1:K,N), ## "]",sep="") mclist}) names(result) <- params result }