<- function(par){
stochasticModel if (par[2] + par[1] <= 0) return(rep(-9999,20))
else return(rnorm(20, mean = (2.7*par[1] * par[2]), sd = par[2] + par[1] ))
}
15 Approximate Bayesian Inference
15.1 Motivation
In Hartig, F.; Calabrese, J. M.; Reineking, B.; Wiegand, T. & Huth, A. (2011) Statistical inference for stochastic simulation models - theory and application. Ecol. Lett., 14, 816-827, we classify two main competing methods for models via simulation-based likelihood approximation
Likelihood-approximations based on local, non-parametric approximations of the variance of the simulation outputs, particularly Approximate Bayesian Computation (ABC, Beaumont, M. A. (2010) Approximate Bayesian computation in evolution and ecology. Annu. Rev. Ecol. Evol. Syst., 41, 379-406.)
Likelihood-approximations based on parametric, typically global approximation of the simulation output such as Synthetic Likelihood, see Wood, S. N. (2010) Statistical inference for noisy nonlinear ecological dynamic systems. Nature, 466, 1102-1104. An example for fitting a stochastic forest gap model do data via this method is Hartig, F.; Dislich, C.; Wiegand, T. & Huth, A. (2014) Technical Note: Approximate Bayesian parameterization of a process-based tropical forest model. Biogeosciences, 11, 1261-1272.
15.2 Example
Assume we have a stochastic model that we want to fit. It takes one parameter, and has an output of 10 values which happen to be around the mean of the parameter that we put in
Lets’s create some data with known parameters
<- stochasticModel(c(3,-2)) data
15.2.1 Summary statistics
We want to use ABC / synthetic likelihood to infer the parameters that were used. Both ABC and synthetic likelihoods require summary statistics, we use mean and sd of the data.
<- mean(data)
meandata <- sd(data) standarddeviationdata
15.2.2 ABC-MCMC solution
Following Marjoram, P.; Molitor, J.; Plagnol, V. & Tavare, S. (2003) Markov chain Monte Carlo without likelihoods. Proc. Natl. Acad. Sci. USA, 100, 15324-15328, we plug the ABC acceptance into a standard metropolis hastings MCMC.
library(coda)
<- function(startvalue, iterations){
run_MCMC_ABC
= array(dim = c(iterations+1,2))
chain 1,] = startvalue
chain[
for (i in 1:iterations){
# proposalfunction
= rnorm(2,mean = chain[i,], sd= c(0.2,0.2))
proposal
<- stochasticModel(proposal)
simulation
# comparison with the observed summary statistics
<- abs(mean(simulation) - meandata)
diffmean <- abs(sd(simulation) - standarddeviationdata)
diffsd
if((diffmean < 0.3) & (diffsd < 0.3)){
+1,] = proposal
chain[ielse{
}+1,] = chain[i,]
chain[i
}
}return(mcmc(chain))
}
<- run_MCMC_ABC(c(3,-2),50000)
posterior plot(posterior)
15.2.3 Synthetic likelihood
Following Wood, S. N. (2010) Statistical inference for noisy nonlinear ecological dynamic systems. Nature, 466, 1102-1104 and Hartig, F.; Dislich, C.; Wiegand, T. & Huth, A. (2014) Technical Note: Approximate Bayesian parameterization of a process-based tropical forest model. Biogeosciences, 11, 1261-1272, the synthetic likelihood approach is based on sampling a few times from the model, and approximating the likelihood by fitting a Gaussian distribution to the simulation outputs:
<- function(startvalue, iterations){
run_MCMC_Synthetic
= array(dim = c(iterations+1,2))
chain 1,] = startvalue
chain[
for (i in 1:iterations){
# proposalfunction
= rnorm(2,mean = chain[i,], sd= c(0.2,0.2))
proposal
# simulate several model runs
<- matrix(NA, nrow = 100, ncol = 2)
simualatedData for (i in 1:100){
<- stochasticModel(c(3,-2))
simulation <- c(mean(simulation) , sd(simulation))
simualatedData[i,]
}<- fitdistr(simualatedData[1,], "normal")
syntheticLikelihood1 <- fitdistr(simualatedData[2,], "normal")
syntheticLikelihood2
<- dnorm(meandata-syntheticLikelihood1$estimate[1], sd = syntheticLikelihood1$estimate[2], log = T)
prob1
<- dnorm(standarddeviationdata-syntheticLikelihood2$estimate[1], sd = syntheticLikelihood2$estimate[2], log = T)
prob2
if(prob < runif(1)){
+1,] = proposal
chain[ielse{
}+1,] = chain[i,]
chain[i
}
}return(mcmc(chain))
}
<- run_MCMC_ABC(c(3,-2),20000)
posterior plot(posterior)
15.3 A movement model fit with ABC-Rejection and ABC-MCMC
The ABC rejection was originally proposed by Tavare, 1997. The ABC-MCMC was suggested by Marjoram, P.; Molitor, J.; Plagnol, V. & Tavare, S. (2003) Markov chain Monte Carlo without likelihoods. Proc. Natl. Acad. Sci. USA, 100, 15324-15328.
Code implemented by Florian Hartig, following the pseudocode from Hartig, F.; Calabrese, J. M.; Reineking, B.; Wiegand, T. & Huth, A. (2011) Statistical inference for stochastic simulation models - theory and application. Ecol. Lett., 14, 816-827., supporting information.
15.3.1 The model and data
15.3.1.1 Process-model
library(compiler)
<- function(params=2, startvalues = c(0,0,0), steps = 200){
model = startvalues[1]
x = startvalues[2]
y = startvalues[3]
direction
= params[1]
movementLength = 1
turningWidth
= data.frame(x = rep(NA, steps+1), y = rep(NA, steps+1))
output 1, ] = c(x,y)
output[for (i in 1:steps){
= direction + rnorm(1,0,turningWidth)
direction = rexp(1, 1/movementLength)
length = x + sin(direction) * length
x = y + cos(direction) * length
y +1, ] = c(x,y)
output[i
}return(output)
}<- cmpfun(model) model
Let’s see what the model does
<- model()
data plot(data, type = "l")
15.3.1.2 Observation model
Assume we have recorded the test data. In fact, let’s do it a bit harder. Assume we observe with error, and our measurement device has a problem - if the x-values have a digit larger than 0.7, we get an NA
<- function(realData, sd=1){
observationModel $xobs = rnorm(length(realData$x), mean = realData$x, sd = sd)
realData$yobs = rnorm(length(realData$x), mean = realData$y, sd = sd)
realData$xobs[realData$xobs - floor(realData$xobs) > 0.7 ] = NA
realDatareturn(realData)
}
<- observationModel(data)
obsdata plot(data, type = "l")
points(obsdata$xobs, obsdata$yobs, col = "red", pch = 4)
15.3.2 Summary statistics
<- function(dat){
summaryStatistics = mean(sqrt(diff(dat$xobs)^2 + diff(dat$yobs)^2), na.rm = T)
meandisplacement
= mean(sqrt(diff(dat$xobs, lag = 2)^2 + diff(dat$yobs, lag = 2)^2), na.rm = T)/3
meandisplacement10
#meanturning = mean(abs(diff(atan2(diff(dat$yobs),diff(dat$xobs)))), na.rm = T)
return(c(meandisplacement, meandisplacement10))
}
<- summaryStatistics(obsdata)
dataSummary dataSummary
[1] 2.803799 1.433168
15.3.3 ABC rejection algorithm
= 10000
n = data.frame(movementLength = runif(n, 0, 5), error = runif(n, 0,5), distance = rep(NA, n))
fit
for (i in 1:n){
<- model(fit[i,1])
simulatedPrediction <- observationModel(simulatedPrediction, fit[i,2])
simulatedObservation<- summaryStatistics(simulatedObservation)
simulatedSummary
simulatedSummary#deviation = max( simulatedSummary - dataSummary)
= sqrt(sum((simulatedSummary - dataSummary)^2))
deviation 3] = deviation
fit[i, }
I had already calculated the euclidian distance between observed and simulated summaries. We now plot parameters for different acceptance intervals
plot(fit[fit[,3] < 1, 1:2], xlim = c(0,6), ylim = c(0,5), col = "lightgrey", main = "Accepted parameters for \n different values of epsilon")
points(fit[fit[,3] < 0.2, 1:2], pch = 18, col = "gray")
points(fit[fit[,3] < 0.1, 1:2], pch = 8, col = "red")
legend("topright", c("< 1", "< 0.2", "< 0.1"), pch = c(1,18,8), col = c("lightgrey", "gray", "red"))
abline(v = 2)
abline(h = 1)
15.3.4 ABC-MCMC Algorithm
= 10000
n = data.frame(movementLength = rep(NA, n), error = rep(NA, n), distance = rep(NA, n))
fit
= c(2,0.9)
currentPar for (i in 1:n){
= rnorm(2,currentPar, sd = c(0.2,0.2))
newPar if (min(newPar) < 0 ) fit[i,] = c(currentPar, deviation)
else{
<- model(newPar[1])
simulatedPrediction <- observationModel(simulatedPrediction, newPar[2])
simulatedObservation<- summaryStatistics(simulatedObservation)
simulatedSummary = sqrt(sum( simulatedSummary - dataSummary)^2)
deviation
if (deviation < 0.2){
= c(newPar, deviation)
fit[i,] = newPar
currentPar
} else fit[i,] = c(currentPar, deviation)
} }
plot(fit[, 1:2], xlim = c(0,6), ylim = c(0,5), col = "#00000022", main = "Accepted parameters")
abline(v = 2)
abline(h = 1)