## Friday, November 20, 2009

### My implementation of Berry and Berry's hierarchical Bayes algorithm for adverse events

I've been working on this for quite some time (see here for a little background), so I'm pleased that it looks close to done at least as far as the core algorithm. It uses global variables for now, and I'm sure there are a couple of other bugs lurking, but here it is, after the jump.

```const.sqrt2pi <- sqrt(2*3.14159265358979)
logit <- function(x) return(x/(1-x))
thetacond.log <- function(b,j,thet=theta[b,j]) {
res <- y[b,j]*(thet+gamm[b,j])-nt*log(1+exp(thet+gamm[b,j]))+
log(pi[b]*(thet==0) +(1-pi[b])/
(const.sqrt2pi*sigma.theta[b])*exp(-0.5*(thet-mu.theta[b])^2/sigma.theta[b]^2))
return(res)
}
thetabj.update <- function() {
# this is a mixture MH step, as described in Berry and Berry (2004)
for (b in 1:nbodysys) {
for (j in 1:nae[b]) {
# simulate new candidate - point mass at 0 and normal mixture
if (runif(1)<0.5) {
thetabj.cand <- 0
} else {
thetabj.cand <- rnorm(1,mean=theta[b,j],sd=sigma.mh.theta)
}
#cat("b=",b,", j=",j,sep="")
ccond.ratio <- exp(thetacond.log(b,j,thet=thetabj.cand)-thetacond.log(b,j))
if (theta[b,j]==0 && thetabj.cand==0 && runif(1)
theta[b,j]<<-thetabj.cand
#cat("cond=1, acceptance probability=",ccond.ratio,'\n',sep="")
}
else if (thetabj.cand==0 && theta[b,j]!=0 && runif(1)<
ccond.ratio*exp(-0.5*theta[b,j]^2/sigma.mh.theta^2)/sigma.mh.theta/const.sqrt2pi)
{
theta[b,j]<<-thetabj.cand
#cat("cond=2, acc prob=",ccond.ratio*exp(-0.5*theta[b,j]^2/sigma.mh.theta^2)/sigma.mh.theta/const.sqrt2pi,'\n',sep="")
}
else if (thetabj.cand!=0 && theta[b,j]==0 &&
runif(1))
{
theta[b,j]<<-thetabj.cand
#cat("cond=3, acc prob=",ccond.ratio/exp(-0.5*theta[b,j]^2/sigma.mh.theta^2)*sigma.mh.theta*const.sqrt2pi,'\n',sep="")
}
else if (thetabj.cand!=0 && theta[b,j]!=0 && runif(1)
{
theta[b,j]<<-thetabj.cand
#cat("cond=4, acceptance probability=",ccond.ratio,'\n',sep="")
}
#else cat('\n')
}
}
}
gammabj.cond.log <- function(b,j,g=gamm[b,j]) {
tmp1 <- exp(theta[b,j]+g)
tmp2 <- exp(g)
return(y[b,j]*(theta[b,j]+g)-nt*log(1+tmp1)+x[b,j]*g-nc*log(1+tmp2)
-0.5/sigma.gamma^2*(g-mu.gamma[b])^2)
}
gammabj.update <- function() {
for (b in 1:nbodysys) {
for (j in 1:nae[b]) {
cand <- rnorm(1,mean=gamm[b,j],sd=sigma.mh.gamma)
acc.prob <- exp(gammabj.cond.log(b,j,g=cand)-gammabj.cond.log(b,j))
#if (acc.prob>1) then acc.prob<-1 # not sure we need this for program
if (runif(1)<<-cand
}
}
}
pib.update <- function() {
for (b in 1:nbodysys) {
pi[b] <<- rbeta(1,alpha.pi+sum(theta[b,1:nae[b]]==0,na.rm=T),beta.pi+nae[b]-sum(theta[b,1:nae[b]]==0,na.rm=T))
}
}
sigma.theta.update <- function() {
for (b in 1:nbodysys) {
sigma2.rand <- rgamma(1,nae[b]/2+alpha.theta,
scale=1/beta.theta+0.5*sum((theta[b,1:nae[b]]-mu.theta[b])^2,na.rm=T)) # fixed
sigma.theta[b] <<- 1/sqrt(sigma2.rand)
}
}
sigma.gamma.update <- function()
{
tmp<-0
for (b in 1:nbodysys) {
tmp <- tmp+sum((gamm[b,1:nae[b]]-mu.gamma[b])^2,na.rm=T)
}
sigma2.rand <- rgamma(1,0.5*sum(nae)+alpha.sigma.gamma,scale=1/beta.sigma.gamma+
0.5*tmp) #fixed
sigma.gamma <<- 1/sqrt(sigma.gamma)
}
tau.gamma0.update <- function()
{
tau2.gamma0 <- rgamma(1,0.5*nbodysys+alpha.tau.gamma,scale=1/beta.tau.gamma+0.5*
sum((mu.gamma-mu.gamma0)^2)) #fixed
tau.gamma0 <<- 1/sqrt(tau2.gamma0)
}
tau.theta0.update <- function()
{
tau2.theta0 <- rgamma(1,0.5*nbodysys+alpha.tau.theta,scale=1/beta.tau.theta+0.5*
sum((mu.theta-mu.theta0)^2)) #fixed
tau.theta0 <<- 1/sqrt(tau2.theta0)
}
mu.gamma.update <- function() {
for (b in 1:nbodysys) {
gibbs.mu <- (tau.gamma0^2*sum(gamm[b,1:nae[b]],na.rm=T)+sigma.gamma^2*mu.gamma0)/
(tau.gamma0^2*nae[b]+sigma.gamma^2)
gibbs.sigma2 <- tau.gamma0^2*sigma.gamma^2/(tau.gamma0^2*nae[b]+sigma.gamma^2)
mu.gamma[b] <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2))
}
}
mu.theta.update <- function() {
for (b in 1:nbodysys) {
gibbs.mu <- (tau.theta0^2*sum(theta[b,1:nae[b]],na.rm=T)+sigma.theta^2*mu.theta0)/
(tau.theta0^2*nae[b]+sigma.theta^2)
gibbs.sigma2 <- tau.theta0^2*sigma.theta^2/(tau.theta0^2*nae[b]+sigma.theta^2)
mu.theta[b] <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2))
}
}
mu.gamma0.update <- function()
{
gibbs.mu <- (tau.gamma00^2*sum(mu.gamma)+tau.gamma0^2*mu.gamma00) /
(tau.gamma00^2*nbodysys+tau.gamma0^2)
gibbs.sigma2 <- tau.gamma00^2*tau.gamma0^2/(tau.gamma00^2*nbodysys +
tau.gamma0^2)
mu.gamma0 <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2))
}
mu.theta0.update <- function()
{
gibbs.mu <- (tau.theta00^2*sum(mu.theta)+tau.theta0^2*mu.theta00) /
(tau.theta00^2*nbodysys+tau.theta0^2)
gibbs.sigma2 <- tau.theta00^2*tau.theta0^2/(tau.theta00^2*nbodysys +
tau.theta0^2)
mu.theta0 <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2))
}
alpha.pi.logcond <- function(alpha=alpha.pi) {
if (alpha<=1) return(-Inf)
else {
return(nbodysys*(lgamma(alpha+beta.pi)-lgamma(alpha))+
(alpha+1)*sum(log(pi))-alpha*lambda.alpha)
}
}
alpha.pi.update <- function() {
cand <- rnorm(1,mean=alpha.pi,sd=sigma.mh.theta)
acc.prob <- exp(alpha.pi.logcond(alpha=cand)-alpha.pi.logcond())
if (runif(1)<<-cand
}
beta.pi.logcond <- function(beta=beta.pi) {
if (beta<=1) return(-Inf)
else {
return(nbodysys*(lgamma(beta+alpha.pi)-lgamma(beta))+
(beta+1)*sum(log(1-pi))-beta*lambda.beta)
}
}
beta.pi.update <- function() {
cand <- rnorm(1,mean=beta.pi,sd=sigma.mh.theta)
acc.prob <- exp(beta.pi.logcond(beta=cand)-beta.pi.logcond())
if (runif(1)<<-cand
}
# set up constants as in sec 4
mu.theta00<-0
tau.theta00 <- sqrt(10)
mu.gamma00 <- 0
tau.gamma00 <- sqrt(10)
alpha.theta <- 3
beta.theta <- 1
alpha.theta0 <- 3
beta.theta0 <- 1
alpha.tau.gamma<-3
beta.tau.gamma <- 1
alpha.tau.theta <- 3
beta.tau.theta <- 1
alpha.sigma.gamma<-3
beta.sigma.gamma <- 1
lambda.alpha<-1
lambda.beta <- 1
# parms for the example in berry and berry
ae.dat\$bodysysf <- factor(ae.dat\$bodysys)
nt <- 148
nc <- 132
nae <- tapply(ae.dat\$aenum,ae.dat\$bodysysf,length)
nbodysys <- length(levels(ae.dat\$bodysysf))
y <- matrix(NA,nrow=nbodysys,ncol=max(nae))
x <- matrix(NA,nrow=nbodysys,ncol=max(nae))
for (i in 1:nbodysys) {
y[i,1:nae[i]] <- ae.dat\$trtct[ae.dat\$bodysysf==as.numeric(levels(ae.dat\$bodysysf)[i])]
x[i,1:nae[i]] <- ae.dat\$contct[ae.dat\$bodysysf==as.numeric(levels(ae.dat\$bodysysf)[i])]
}
sigma.mh.theta <- 2
sigma.mh.gamma <- 2
# set up containers
theta <- matrix(NA,nrow=nbodysys,ncol=max(nae))
gamm <- matrix(NA,nrow=nbodysys,ncol=max(nae))
pi <- rep(NA,length=nbodysys)
sigma.theta <- rep(NA,length=nbodysys)
sigma.gamma <- NA
tau.gamma0 <- NA
tau.theta0 <- NA
mu.gamma <- rep(NA,length=nbodysys)
mu.theta <- rep(NA,length=nbodysys)
mu.gamma0 <- NA
mu.theta0 <- NA
alpha.pi <- NA
beta.pi <- NA

# initial values
gamm <- logit(x/nc)
theta <- logit(y/nt)-gamm
pi <- rep(.5,length=nbodysys)
sigma.theta <- rep(1,length=nbodysys)
sigma.gamma <- 1
tau.gamma0 <- 1
tau.theta0 <- 1
mu.gamma <- rep(0,length=nbodysys)
mu.theta <- rep(0,length=nbodysys)
mu.gamma0 <- 0
mu.theta0 <- 1
alpha.pi <- 1.5
beta.pi <- 1.5
# chains
n.iter <- 11000
n.burnin <- 1000
theta.chain <- array(NA,dim=c(nbodysys,max(nae),n.iter))
gammabj.chain <- array(NA,dim=c(nbodysys,max(nae),n.iter))
for (i in 1:n.iter)
{
thetabj.update()
theta.chain[,,i] <- theta
gammabj.update()
gammabj.chain[,,i] <- gamm
pib.update()
sigma.theta.update()
sigma.gamma.update()
tau.gamma0.update()
tau.theta0.update()
mu.gamma.update()
mu.theta.update()
mu.gamma0.update()
mu.theta0.update()
alpha.pi.update()
beta.pi.update()
}
for (b in 1:nbodysys)
{
for (j in 1:nae[b])
{
if (b==1 && j==1) {
cat(sprintf("%5s%5s%10s%10s\n","b","j","P(th=0)","P(th>0)"))
cat(sprintf("%5s%5s%10s%10s\n","---","---","------","------"))
}
cat(sprintf("%5s%5d%10.3f%10.3f\n",names(nae)[b],j,sum(theta.chain[b,j,(n.burnin+1):n.iter]==0)/sum(!is.na(theta.chain[b,j,(n.burnin+1):n.iter])),sum(theta.chain[b,j,(n.burnin+1):n.iter]>0)/sum(!is.na(theta.chain[b,j,(n.burnin+1):n.iter]))))
}
}
#mean(theta.chain[1,1,(n.burnin+1):n.iter],na.rm=T)
plot(theta.chain[1,1,(n.burnin+1):n.iter],type='b',pch=0)
plot(theta.chain[2,4,(n.burnin+1):n.iter],type='b')
plot(theta.chain[2,4,1:n.iter],type='b')
sum(theta.chain[2,4,])/n.iter
gammabj.chain[1,1,(n.burnin+1):n.iter]
plot(theta.chain[6,2,(n.burnin+1):n.iter],type='b')
hist(theta.chain[1,1,(n.burnin+1):n.iter])
#acceptance rate
foo <- theta.chain[1,1,(n.burnin+1):n.iter]
sum(foo[2:(n.iter-n.burnin)]!=foo[1:(n.iter-n.burnin-1)])/(n.iter-n.burnin+1)
```

The datafile ae.dat copies the vaccine data from the article and looks like the following:

```# data for Berry and Berry AE model
bodysys aenum ae trtct contct
1 1 astenia/fatigue 57 40
1 2 fever 34 26
1 3 infection/fungal 2 0
1 4 infection/viral 3 1
1 5 malaise 27 20
3 1 anorexia 7 2
3 2 cendisiasis/oral 2 0
3 3 constipation 2 0
3 4 diarrhea 24 10
3 5 castroenteritis 3 1
3 6 nausea 2 7
3 7 vomiting 19 19
6 1 dehydration 0 2
8 1 crying 2 0
8 2 insomnia 2 2
8 3 irritability  75 43
9 1 bronchitis 4 1
9 2 congestion/nasal 4 2
9 3 congestion/respiratory 1 2
9 4 cough 13 8
9 5 infection/upper_respiratory 28 20
9 6 laryngotracheobronchitis 2 1
9 7 pharyngitis 13 8
9 8 rhinorrhea 15 14
9 9 sinusitis 3 1
9 10 tonsillitis 2 1
9 11 wheezing 3 1
10 1 bite/sting 4 0
10 2 eczema 2 0
10 3 pruritis 2 1
10 4 rash 13 3
10 5 rash/diaper 6 2
10 6 rash/measles/rubella-like 8 1
10 7 rash/varicella-like 4 2
10 8 urticaria 0 2
10 9 viral/exanthema 1 2
11 1 conjunctivitis 0 2
11 2 otitis/media 18 14
11 3 otorrhea 2 1
```

I make no claims to the correctness of this code, but I have gotten results that are close to the Berry and Berry article.