---
title: "Gellman and Hill 13.2"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(nlme)
```
_Models for adjusting individual ratings: a committee of 10 persons is evaluating 100 job applicants. Each person on the committee reads 30 applications (structured so that each application is read by 3 people) and gives a numerical rating between 1 and 10._
_(a) It would be nautal to rate the applications based on their combined scores; however, there is aworry that different raters use different standards (severity) and we would like to correct for this. Set up a model for the ratings (with parameters for the applicants and the raters)._
### Problem setup
I <- 100L # applicants
J <- 10L # raters
W <- 3L # ratings per applicant
theta[i] -- Ability of Applicant i
theta ~ unif(1,10) _1 and 10 are hyperparameters_
eta[j] -- Severity of Rater j
eta ~ norm(0,tau) _0 and tau are hypeparameters_
tau <- 2
epsilon[i,j] -- measurement error
epsilon ~ norm(0,sigma)
sigma <- 1
Y[i,j] = theta[i]+eta[j] +epsilon[i,j]
Score ~ (1 | applicant) + (1 | rater)
### Design Matrix
There are `r choose(10,3)` ways of assigning 3 raters, so more patterns than we have rows.
```{r}
I <- 100L # applicants
J <- 10L # raters
W <- 3L # ratings per applicant
tau <- 2 # SD of raters
sigma <- 1 # Error SD
assignment <- matrix(0L,I,J)
for (i in 1L:I) {
workload <- colSums(assignment)
available <- which (workload < W*I/J)
if (i > 75L)
cat("Round ",i,": available = ",
paste(available,collapse=", "),"\n")
## If not W raters available, then swap until we have enough free raters.
while (length(available) < W) {
slacker <- which.min(workload)
pswaps <- which(!assignment[1L:(i-1L),slacker])
swaprow <- sample(pswaps,1L)
swapcol <- sample(which(as.logical(assignment[swaprow,])),1L)
assignment[swaprow,swapcol] <- 0L
assignment[swaprow,slacker] <- 1L
workload <- colSums(assignment)
available <- which(workload < W*I/J)
cat("Round ",i,"x: availble=",paste(available,collapse=", "),
"\n")
}
assignment[i,sample(available,W)] <- 1L
}
colSums(assignment)
rowSums(assignment)
write.csv(assignment,"assignment.csv")
```
## Simulating parameters
ability[i] -- random value between 1 and 10
severity[j] -- random value with mean 0 and SD tau=2
rating[i,j] = ability[i] + severity[j] + error[i,j]
```{r}
## simulate parameters/latent variables
ability <- runif(I,1,10)
severity <- rnorm(J,0,tau)
applicant <- rep(1L:I,each=W)
rater <-
sapply(1L:I,
function (i)
which(as.logical(assignment[i,])))
str(rater)
rating <- ability[applicant] + severity[rater] + rnorm(I*W,0,sigma)
rating <- pmax(1,pmin(rating,10)) ## between 1 and 10
ratings.df <- data.frame(applicant=applicant,
rater=as.vector(rater),
rating=rating)
ratings.df
write.csv(ratings.df,"ratings.csv")
```
# Plot the data to see if it worked
```{r}
library(lattice)
ratings.df1 <- data.frame(ratings.df, ability=ability[applicant], severity=severity[rater])
```
```{r}
xyplot(rating~ability,data=ratings.df1)
xyplot(rating~ability|rater,data=ratings.df1)
boxplot(rating~rater,data=ratings.df1)
```
Fit the model
```{r}
library(arm)
fit <- lmer(rating ~ (1|applicant) + (1|rater), data=ratings.df)
display(fit)
sqrt(9^2/12) # sd of unif(1,10)
```
## Check our simulation.
```{r}
plot(ability,coef(fit)$applicant[,1])
plot(severity,ranef(fit)$rater[,1])
plot(fit)
boxplot(resid(fit)~as.vector(rater))
```
_(b) It is possible that some persons on the committee show more variation than others in their ratings. Expand your model to allow this._
Change this part of the model:
epsilon[i,j] -- measurement error
epsilon ~ norm(0,sigma[j])
sigma[j] ~ gamma(alpha,scale)
alpha <- 2
scale <- .5
```{r}
alpha <- 2
scale <- .5
curve(dgamma(x,alpha,scale=scale),xlim=c(0,5))
```
Simulation is almost the same, and we can reuse the assignment matrix.
```{r}
## simulate parameters/latent variables
#ability <- runif(I,1,10)
#severity <- rnorm(J,0,tau)
#applicant <- rep(1L:I,each=W)
#rater <-
# sapply(1L:I,
# function (i)
# which(as.logical(assignment[i,])))
#str(rater)
sigma2 <- rgamma(J,alpha,scale=scale)
rating2 <- ability[applicant] + severity[rater] + rnorm(I*W,0,sigma2[rater])
rating2 <- pmax(1,pmin(rating2,10)) ## between 1 and 10
ratings2.df <- data.frame(applicant=applicant,
rater=as.vector(rater),
rating=rating2,
severity=severity[rater],
ability=ability[applicant],
sigma2=sigma2[rater])
ratings2.df
```
```{r}
xyplot(rating~ability,data=ratings2.df)
xyplot(rating~ability|rater,data=ratings2.df)
boxplot(rating~rater,data=ratings2.df)
```
Go ahead and fit the wrong model (we know this doesn't have the right variance structure).
```{r}
fit2 <- lmer(rating ~ (1|applicant) + (1|rater), data=ratings2.df)
display(fit2)
```
```{r}
plot(ability,coef(fit2)$applicant[,1])
plot(severity,ranef(fit2)$rater[,1])
plot(fit2)
boxplot(resid(fit2)~as.vector(rater))
```