--- title: "Gellman and Hill 13.2" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) library(nlme) ``` _Models for adjusting individual ratings: a committee of 10 persons is evaluating 100 job applicants. Each person on the committee reads 30 applications (structured so that each application is read by 3 people) and gives a numerical rating between 1 and 10._ _(a) It would be nautal to rate the applications based on their combined scores; however, there is aworry that different raters use different standards (severity) and we would like to correct for this. Set up a model for the ratings (with parameters for the applicants and the raters)._ ### Problem setup I <- 100L # applicants J <- 10L # raters W <- 3L # ratings per applicant theta[i] -- Ability of Applicant i theta ~ unif(1,10) _1 and 10 are hyperparameters_ eta[j] -- Severity of Rater j eta ~ norm(0,tau) _0 and tau are hypeparameters_ tau <- 2 epsilon[i,j] -- measurement error epsilon ~ norm(0,sigma) sigma <- 1 Y[i,j] = theta[i]+eta[j] +epsilon[i,j] Score ~ (1 | applicant) + (1 | rater) ### Design Matrix There are `r choose(10,3)` ways of assigning 3 raters, so more patterns than we have rows. ```{r} I <- 100L # applicants J <- 10L # raters W <- 3L # ratings per applicant tau <- 2 # SD of raters sigma <- 1 # Error SD assignment <- matrix(0L,I,J) for (i in 1L:I) { workload <- colSums(assignment) available <- which (workload < W*I/J) if (i > 75L) cat("Round ",i,": available = ", paste(available,collapse=", "),"\n") ## If not W raters available, then swap until we have enough free raters. while (length(available) < W) { slacker <- which.min(workload) pswaps <- which(!assignment[1L:(i-1L),slacker]) swaprow <- sample(pswaps,1L) swapcol <- sample(which(as.logical(assignment[swaprow,])),1L) assignment[swaprow,swapcol] <- 0L assignment[swaprow,slacker] <- 1L workload <- colSums(assignment) available <- which(workload < W*I/J) cat("Round ",i,"x: availble=",paste(available,collapse=", "), "\n") } assignment[i,sample(available,W)] <- 1L } colSums(assignment) rowSums(assignment) write.csv(assignment,"assignment.csv") ``` ## Simulating parameters ability[i] -- random value between 1 and 10 severity[j] -- random value with mean 0 and SD tau=2 rating[i,j] = ability[i] + severity[j] + error[i,j] ```{r} ## simulate parameters/latent variables ability <- runif(I,1,10) severity <- rnorm(J,0,tau) applicant <- rep(1L:I,each=W) rater <- sapply(1L:I, function (i) which(as.logical(assignment[i,]))) str(rater) rating <- ability[applicant] + severity[rater] + rnorm(I*W,0,sigma) rating <- pmax(1,pmin(rating,10)) ## between 1 and 10 ratings.df <- data.frame(applicant=applicant, rater=as.vector(rater), rating=rating) ratings.df write.csv(ratings.df,"ratings.csv") ``` # Plot the data to see if it worked ```{r} library(lattice) ratings.df1 <- data.frame(ratings.df, ability=ability[applicant], severity=severity[rater]) ``` ```{r} xyplot(rating~ability,data=ratings.df1) xyplot(rating~ability|rater,data=ratings.df1) boxplot(rating~rater,data=ratings.df1) ``` Fit the model ```{r} library(arm) fit <- lmer(rating ~ (1|applicant) + (1|rater), data=ratings.df) display(fit) sqrt(9^2/12) # sd of unif(1,10) ``` ## Check our simulation. ```{r} plot(ability,coef(fit)$applicant[,1]) plot(severity,ranef(fit)$rater[,1]) plot(fit) boxplot(resid(fit)~as.vector(rater)) ``` _(b) It is possible that some persons on the committee show more variation than others in their ratings. Expand your model to allow this._ Change this part of the model: epsilon[i,j] -- measurement error epsilon ~ norm(0,sigma[j]) sigma[j] ~ gamma(alpha,scale) alpha <- 2 scale <- .5 ```{r} alpha <- 2 scale <- .5 curve(dgamma(x,alpha,scale=scale),xlim=c(0,5)) ``` Simulation is almost the same, and we can reuse the assignment matrix. ```{r} ## simulate parameters/latent variables #ability <- runif(I,1,10) #severity <- rnorm(J,0,tau) #applicant <- rep(1L:I,each=W) #rater <- # sapply(1L:I, # function (i) # which(as.logical(assignment[i,]))) #str(rater) sigma2 <- rgamma(J,alpha,scale=scale) rating2 <- ability[applicant] + severity[rater] + rnorm(I*W,0,sigma2[rater]) rating2 <- pmax(1,pmin(rating2,10)) ## between 1 and 10 ratings2.df <- data.frame(applicant=applicant, rater=as.vector(rater), rating=rating2, severity=severity[rater], ability=ability[applicant], sigma2=sigma2[rater]) ratings2.df ``` ```{r} xyplot(rating~ability,data=ratings2.df) xyplot(rating~ability|rater,data=ratings2.df) boxplot(rating~rater,data=ratings2.df) ``` Go ahead and fit the wrong model (we know this doesn't have the right variance structure). ```{r} fit2 <- lmer(rating ~ (1|applicant) + (1|rater), data=ratings2.df) display(fit2) ``` ```{r} plot(ability,coef(fit2)$applicant[,1]) plot(severity,ranef(fit2)$rater[,1]) plot(fit2) boxplot(resid(fit2)~as.vector(rater)) ```