Realizations in Biostatistics: My implementation of Berry and Berry's hierarchical Bayes algorithm for adverse events

I've been working on this for quite some time (see here for a little background), so I'm pleased that it looks close to done at least as far as the core algorithm. It uses global variables for now, and I'm sure there are a couple of other bugs lurking, but here it is, after the jump.

const.sqrt2pi <- sqrt(2*3.14159265358979)
logit <- function(x) return(x/(1-x))
thetacond.log <- function(b,j,thet=theta[b,j]) {
    res <- y[b,j]*(thet+gamm[b,j])-nt*log(1+exp(thet+gamm[b,j]))+
        log(pi[b]*(thet==0) +(1-pi[b])/
            (const.sqrt2pi*sigma.theta[b])*exp(-0.5*(thet-mu.theta[b])^2/sigma.theta[b]^2))
    return(res)
}
thetabj.update <- function() {
    # this is a mixture MH step, as described in Berry and Berry (2004)
    for (b in 1:nbodysys) {
        for (j in 1:nae[b]) {
                                        # simulate new candidate - point mass at 0 and normal mixture
            if (runif(1)<0.5) {
                thetabj.cand <- 0
            } else {
                thetabj.cand <- rnorm(1,mean=theta[b,j],sd=sigma.mh.theta)
            }
            #cat("b=",b,", j=",j,sep="")
            ccond.ratio <- exp(thetacond.log(b,j,thet=thetabj.cand)-thetacond.log(b,j))
            if (theta[b,j]==0 && thetabj.cand==0 && runif(1)
                theta[b,j]<<-thetabj.cand
                #cat("cond=1, acceptance probability=",ccond.ratio,'\n',sep="")
            }
            else if (thetabj.cand==0 && theta[b,j]!=0 && runif(1)<
                     ccond.ratio*exp(-0.5*theta[b,j]^2/sigma.mh.theta^2)/sigma.mh.theta/const.sqrt2pi)
            {
                theta[b,j]<<-thetabj.cand
                #cat("cond=2, acc prob=",ccond.ratio*exp(-0.5*theta[b,j]^2/sigma.mh.theta^2)/sigma.mh.theta/const.sqrt2pi,'\n',sep="")
            }
            else if (thetabj.cand!=0 && theta[b,j]==0 &&
                     runif(1))
            {
                theta[b,j]<<-thetabj.cand
                #cat("cond=3, acc prob=",ccond.ratio/exp(-0.5*theta[b,j]^2/sigma.mh.theta^2)*sigma.mh.theta*const.sqrt2pi,'\n',sep="")
            }
            else if (thetabj.cand!=0 && theta[b,j]!=0 && runif(1)
            {
                theta[b,j]<<-thetabj.cand
                #cat("cond=4, acceptance probability=",ccond.ratio,'\n',sep="")
            }
            #else cat('\n')
        }
    }
}
gammabj.cond.log <- function(b,j,g=gamm[b,j]) {
    tmp1 <- exp(theta[b,j]+g)
    tmp2 <- exp(g)
    return(y[b,j]*(theta[b,j]+g)-nt*log(1+tmp1)+x[b,j]*g-nc*log(1+tmp2)
           -0.5/sigma.gamma^2*(g-mu.gamma[b])^2)
}
gammabj.update <- function() {
    for (b in 1:nbodysys) {
        for (j in 1:nae[b]) {
            cand <- rnorm(1,mean=gamm[b,j],sd=sigma.mh.gamma)
            acc.prob <- exp(gammabj.cond.log(b,j,g=cand)-gammabj.cond.log(b,j))
                                        #if (acc.prob>1) then acc.prob<-1 # not sure we need this for program
            if (runif(1)<<-cand
        }
    }
}
pib.update <- function() {
    for (b in 1:nbodysys) {
        pi[b] <<- rbeta(1,alpha.pi+sum(theta[b,1:nae[b]]==0,na.rm=T),beta.pi+nae[b]-sum(theta[b,1:nae[b]]==0,na.rm=T))
    }
}
sigma.theta.update <- function() {
    for (b in 1:nbodysys) {
        sigma2.rand <- rgamma(1,nae[b]/2+alpha.theta,
                              scale=1/beta.theta+0.5*sum((theta[b,1:nae[b]]-mu.theta[b])^2,na.rm=T)) # fixed
        sigma.theta[b] <<- 1/sqrt(sigma2.rand)
    }
}
sigma.gamma.update <- function()
{
    tmp<-0
    for (b in 1:nbodysys) {
        tmp <- tmp+sum((gamm[b,1:nae[b]]-mu.gamma[b])^2,na.rm=T)
    }
    sigma2.rand <- rgamma(1,0.5*sum(nae)+alpha.sigma.gamma,scale=1/beta.sigma.gamma+
                          0.5*tmp) #fixed
    sigma.gamma <<- 1/sqrt(sigma.gamma)
}
tau.gamma0.update <- function()
{
    tau2.gamma0 <- rgamma(1,0.5*nbodysys+alpha.tau.gamma,scale=1/beta.tau.gamma+0.5*
                          sum((mu.gamma-mu.gamma0)^2)) #fixed
    tau.gamma0 <<- 1/sqrt(tau2.gamma0)
}
tau.theta0.update <- function()
{
    tau2.theta0 <- rgamma(1,0.5*nbodysys+alpha.tau.theta,scale=1/beta.tau.theta+0.5*
                          sum((mu.theta-mu.theta0)^2)) #fixed
    tau.theta0 <<- 1/sqrt(tau2.theta0)
}
mu.gamma.update <- function() {
    for (b in 1:nbodysys) {
        gibbs.mu <- (tau.gamma0^2*sum(gamm[b,1:nae[b]],na.rm=T)+sigma.gamma^2*mu.gamma0)/
            (tau.gamma0^2*nae[b]+sigma.gamma^2)
        gibbs.sigma2 <- tau.gamma0^2*sigma.gamma^2/(tau.gamma0^2*nae[b]+sigma.gamma^2)
        mu.gamma[b] <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2))
    }
}
mu.theta.update <- function() {
    for (b in 1:nbodysys) {
        gibbs.mu <- (tau.theta0^2*sum(theta[b,1:nae[b]],na.rm=T)+sigma.theta^2*mu.theta0)/
            (tau.theta0^2*nae[b]+sigma.theta^2)
        gibbs.sigma2 <- tau.theta0^2*sigma.theta^2/(tau.theta0^2*nae[b]+sigma.theta^2)
        mu.theta[b] <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2))
    }
}
mu.gamma0.update <- function()
{
    gibbs.mu <- (tau.gamma00^2*sum(mu.gamma)+tau.gamma0^2*mu.gamma00) /
        (tau.gamma00^2*nbodysys+tau.gamma0^2)
    gibbs.sigma2 <- tau.gamma00^2*tau.gamma0^2/(tau.gamma00^2*nbodysys +
                                                tau.gamma0^2)
    mu.gamma0 <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2))
}
mu.theta0.update <- function()
{
    gibbs.mu <- (tau.theta00^2*sum(mu.theta)+tau.theta0^2*mu.theta00) /
        (tau.theta00^2*nbodysys+tau.theta0^2)
    gibbs.sigma2 <- tau.theta00^2*tau.theta0^2/(tau.theta00^2*nbodysys +
                                                tau.theta0^2)
    mu.theta0 <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2))
}
alpha.pi.logcond <- function(alpha=alpha.pi) {
    if (alpha<=1) return(-Inf)
    else {
        return(nbodysys*(lgamma(alpha+beta.pi)-lgamma(alpha))+
               (alpha+1)*sum(log(pi))-alpha*lambda.alpha)
    }
}
alpha.pi.update <- function() {
    cand <- rnorm(1,mean=alpha.pi,sd=sigma.mh.theta)
    acc.prob <- exp(alpha.pi.logcond(alpha=cand)-alpha.pi.logcond())
    if (runif(1)<<-cand
}
beta.pi.logcond <- function(beta=beta.pi) {
    if (beta<=1) return(-Inf)
    else {
        return(nbodysys*(lgamma(beta+alpha.pi)-lgamma(beta))+
               (beta+1)*sum(log(1-pi))-beta*lambda.beta)
    }
}
beta.pi.update <- function() {
    cand <- rnorm(1,mean=beta.pi,sd=sigma.mh.theta)
    acc.prob <- exp(beta.pi.logcond(beta=cand)-beta.pi.logcond())
    if (runif(1)<<-cand
}
                                        # set up constants as in sec 4
mu.theta00<-0
tau.theta00 <- sqrt(10)
mu.gamma00 <- 0
tau.gamma00 <- sqrt(10)
alpha.theta <- 3
beta.theta <- 1
alpha.theta0 <- 3
beta.theta0 <- 1
alpha.tau.gamma<-3
beta.tau.gamma <- 1
alpha.tau.theta <- 3
beta.tau.theta <- 1
alpha.sigma.gamma<-3
beta.sigma.gamma <- 1
lambda.alpha<-1
lambda.beta <- 1
                                        # parms for the example in berry and berry
ae.dat <- read.table(file="ae.dat",header=T)
ae.dat$bodysysf <- factor(ae.dat$bodysys)
nt <- 148
nc <- 132
nae <- tapply(ae.dat$aenum,ae.dat$bodysysf,length)
nbodysys <- length(levels(ae.dat$bodysysf))
y <- matrix(NA,nrow=nbodysys,ncol=max(nae))
x <- matrix(NA,nrow=nbodysys,ncol=max(nae))
for (i in 1:nbodysys) {
y[i,1:nae[i]] <- ae.dat$trtct[ae.dat$bodysysf==as.numeric(levels(ae.dat$bodysysf)[i])]
x[i,1:nae[i]] <- ae.dat$contct[ae.dat$bodysysf==as.numeric(levels(ae.dat$bodysysf)[i])]
}
sigma.mh.theta <- 2
sigma.mh.gamma <- 2
                                        # set up containers
theta <- matrix(NA,nrow=nbodysys,ncol=max(nae))
gamm <- matrix(NA,nrow=nbodysys,ncol=max(nae))
pi <- rep(NA,length=nbodysys)
sigma.theta <- rep(NA,length=nbodysys)
sigma.gamma <- NA
tau.gamma0 <- NA
tau.theta0 <- NA
mu.gamma <- rep(NA,length=nbodysys)
mu.theta <- rep(NA,length=nbodysys)
mu.gamma0 <- NA
mu.theta0 <- NA
alpha.pi <- NA
beta.pi <- NA


                                        # initial values
gamm <- logit(x/nc)
theta <- logit(y/nt)-gamm
pi <- rep(.5,length=nbodysys)
sigma.theta <- rep(1,length=nbodysys)
sigma.gamma <- 1
tau.gamma0 <- 1
tau.theta0 <- 1
mu.gamma <- rep(0,length=nbodysys)
mu.theta <- rep(0,length=nbodysys)
mu.gamma0 <- 0
mu.theta0 <- 1
alpha.pi <- 1.5
beta.pi <- 1.5
                                        # chains
n.iter <- 11000
n.burnin <- 1000
theta.chain <- array(NA,dim=c(nbodysys,max(nae),n.iter))
gammabj.chain <- array(NA,dim=c(nbodysys,max(nae),n.iter))
for (i in 1:n.iter)
{
    thetabj.update()
    theta.chain[,,i] <- theta
    gammabj.update()
    gammabj.chain[,,i] <- gamm
    pib.update()
    sigma.theta.update()
    sigma.gamma.update()
    tau.gamma0.update()
    tau.theta0.update()
    mu.gamma.update()
    mu.theta.update()
    mu.gamma0.update()
    mu.theta0.update()
    alpha.pi.update()
    beta.pi.update()
}
for (b in 1:nbodysys)
{
    for (j in 1:nae[b])
    {
        if (b==1 && j==1) {
            cat(sprintf("%5s%5s%10s%10s\n","b","j","P(th=0)","P(th>0)"))
            cat(sprintf("%5s%5s%10s%10s\n","---","---","------","------"))
        }
        cat(sprintf("%5s%5d%10.3f%10.3f\n",names(nae)[b],j,sum(theta.chain[b,j,(n.burnin+1):n.iter]==0)/sum(!is.na(theta.chain[b,j,(n.burnin+1):n.iter])),sum(theta.chain[b,j,(n.burnin+1):n.iter]>0)/sum(!is.na(theta.chain[b,j,(n.burnin+1):n.iter]))))
    }
}
#mean(theta.chain[1,1,(n.burnin+1):n.iter],na.rm=T)
plot(theta.chain[1,1,(n.burnin+1):n.iter],type='b',pch=0)
plot(theta.chain[2,4,(n.burnin+1):n.iter],type='b')
plot(theta.chain[2,4,1:n.iter],type='b')
sum(theta.chain[2,4,])/n.iter
gammabj.chain[1,1,(n.burnin+1):n.iter]
plot(theta.chain[6,2,(n.burnin+1):n.iter],type='b')
hist(theta.chain[1,1,(n.burnin+1):n.iter])
#acceptance rate
foo <- theta.chain[1,1,(n.burnin+1):n.iter]
sum(foo[2:(n.iter-n.burnin)]!=foo[1:(n.iter-n.burnin-1)])/(n.iter-n.burnin+1)

The datafile ae.dat copies the vaccine data from the article and looks like the following:

# data for Berry and Berry AE model
bodysys aenum ae trtct contct
1 1 astenia/fatigue 57 40
1 2 fever 34 26
1 3 infection/fungal 2 0
1 4 infection/viral 3 1
1 5 malaise 27 20
3 1 anorexia 7 2
3 2 cendisiasis/oral 2 0
3 3 constipation 2 0
3 4 diarrhea 24 10
3 5 castroenteritis 3 1
3 6 nausea 2 7
3 7 vomiting 19 19
5 1 lymphadenopathy 3 2
6 1 dehydration 0 2
8 1 crying 2 0
8 2 insomnia 2 2
8 3 irritability  75 43
9 1 bronchitis 4 1
9 2 congestion/nasal 4 2
9 3 congestion/respiratory 1 2
9 4 cough 13 8
9 5 infection/upper_respiratory 28 20
9 6 laryngotracheobronchitis 2 1
9 7 pharyngitis 13 8
9 8 rhinorrhea 15 14
9 9 sinusitis 3 1
9 10 tonsillitis 2 1
9 11 wheezing 3 1
10 1 bite/sting 4 0
10 2 eczema 2 0
10 3 pruritis 2 1
10 4 rash 13 3
10 5 rash/diaper 6 2
10 6 rash/measles/rubella-like 8 1
10 7 rash/varicella-like 4 2
10 8 urticaria 0 2
10 9 viral/exanthema 1 2
11 1 conjunctivitis 0 2
11 2 otitis/media 18 14
11 3 otorrhea 2 1

I make no claims to the correctness of this code, but I have gotten results that are close to the Berry and Berry article.

Friday, November 20, 2009

My implementation of Berry and Berry's hierarchical Bayes algorithm for adverse events