<- 10
meanSize <- 1
trueLogSd <- 500
sampleSize = rexp(rate = 1/meanSize, n = sampleSize)
truevalues = rlnorm(n = length(truevalues), mean = log(truevalues), sd = trueLogSd) observations
8 Error in variable models
8.1 Regression dillution in distribution estimates
8.1.1 Creation of the data
Assume we observe data from an ecological system that creates an exponential size distribution (e.g. tree sizes, see Taubert, F.; Hartig, F.; Dobner, H.-J. & Huth, A. (2013) On the Challenge of Fitting Tree Size Distributions in Ecology. PLoS ONE, 8, e58036-), but our measurments are performed with a substantial lognormal observation error
Plotting true and observed data
<- ceiling(max(observations,truevalues))
maxV <- rbind(
counts obs = hist(observations, breaks = 0:maxV, plot = F)$counts,
true = hist(truevalues, breaks = 0:maxV, plot = F)$counts
)barplot(log(t(counts)+1), beside=T)
8.1.2 Fitting a non-hierarchical model leads to bias
= textConnection('
normalModel model {
# Priors
meanSize ~ dunif(1,100)
# Likelihood
for(i in 1:nObs){
true[i] ~ dexp(1/meanSize)
}
}
')
# Bundle data
<- observations[observations>0]
positiveObservations = list(true = positiveObservations, nObs=length(positiveObservations))
data
# Parameters to be monitored (= to estimate)
= c("meanSize")
params
= jags.model( file= normalModel , data=data, n.chains = 3, n.adapt= 500) jagsModel
Compiling model graph
Resolving undeclared variables
Allocating nodes
Graph information:
Observed stochastic nodes: 500
Unobserved stochastic nodes: 1
Total graph size: 505
Initializing model
= coda.samples( jagsModel , variable.names=params,n.iter=5000)
results plot(results)
The main thing to note about this is that parameter estimates are heavily biased.
8.1.3 Fitting a hierarchical model removes the bias
Model specification if hierarchical model that accounts for the observation error in Jags
= textConnection('
hierarchicalModel model {
# Priors
meanSize ~ dunif(1,100)
sigma ~ dunif(0,20) # Precision 1/variance JAGS and BUGS use prec instead of sd
precision <- pow(sigma, -2)
# Likelihood
for(i in 1:nObs){
true[i] ~ dexp(1/meanSize)
observed[i] ~ dlnorm(log(true[i]), precision)
}
}
')
# Bundle data
= list(observed = observations, nObs=length(observations))
data # Parameters to be monitored (= to estimate)
= c("meanSize", "sigma")
params
= jags.model( file= hierarchicalModel , data=data, n.chains = 3, n.adapt= 500) jagsModel
Compiling model graph
Resolving undeclared variables
Allocating nodes
Graph information:
Observed stochastic nodes: 500
Unobserved stochastic nodes: 502
Total graph size: 1511
Initializing model
#update(jagsModel, 2500) # updating without sampling
= coda.samples( jagsModel , variable.names=params,n.iter=5000)
results plot(results)
It’s always good to check the correlation structure in the posterior
8.2 Regression dilution in slope estimates
library(EcoData)
library(rjags)
= nrow(volcanoisland)
nobs
# imagine we had a very bad measurement devide for the altitude
$sAltitudeR = volcanoisland$sAltitude + rnorm(nobs)
volcanoisland
plot(log(windObs) ~ sAltitude, data = volcanoisland)
= lm(log(windObs) ~ sAltitude, data = volcanoisland)
fit summary(fit)
Call:
lm(formula = log(windObs) ~ sAltitude, data = volcanoisland)
Residuals:
Min 1Q Median 3Q Max
-3.5295 -0.5738 0.0083 0.5897 2.3810
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.74308 0.02687 64.87 <2e-16 ***
sAltitude 0.41156 0.02688 15.31 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.8498 on 998 degrees of freedom
Multiple R-squared: 0.1902, Adjusted R-squared: 0.1894
F-statistic: 234.3 on 1 and 998 DF, p-value: < 2.2e-16
abline(fit, col = "red")
= lm(log(windObs) ~ sAltitudeR, data = volcanoisland)
fit summary(fit)
Call:
lm(formula = log(windObs) ~ sAltitudeR, data = volcanoisland)
Residuals:
Min 1Q Median 3Q Max
-3.3499 -0.5476 0.0075 0.6033 2.5089
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.75802 0.02821 62.31 <2e-16 ***
sAltitudeR 0.21449 0.01938 11.07 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.8911 on 998 degrees of freedom
Multiple R-squared: 0.1094, Adjusted R-squared: 0.1085
F-statistic: 122.5 on 1 and 998 DF, p-value: < 2.2e-16
abline(fit, col = "blue")
let’s see if we can correct the error
= list(WindObs = log(volcanoisland$windObs),
data Altitude = volcanoisland$sAltitudeR,
plot = as.numeric(volcanoisland$plot),
nobs = nobs,
nplots = length(unique(volcanoisland$plot)))
= "model{
modelCode
# Likelihood
for(i in 1:nobs){
# error on y
WindObs[i] ~ dnorm(mu[i],tau)
# error on x
Altitude[i] ~ dnorm(TrueAltitude[plot[i]],tauMeasure)
mu[i] <- AltitudeEffect*TrueAltitude[plot[i]]+ Intercept
}
# Prior distributions
# For location parameters, normal choice is wide normal
AltitudeEffect ~ dnorm(0,0.0001)
Intercept ~ dnorm(0,0.0001)
for(i in 1:nplots){
TrueAltitude[i] ~ dnorm(0,0.0001)
}
# For scale parameters, normal choice is decaying
tau ~ dgamma(0.001, 0.001)
sigma <- 1/sqrt(tau)
tauMeasure ~ dgamma(0.001, 0.001)
sdMeasure <- 1/sqrt(tauMeasure)
}
"
<- jags.model(file= textConnection(modelCode), data=data, n.chains = 3) jagsModel
Compiling model graph
Resolving undeclared variables
Allocating nodes
Graph information:
Observed stochastic nodes: 2000
Unobserved stochastic nodes: 104
Total graph size: 3314
Initializing model
<- c("AltitudeEffect","Intercept","sigma", "sdMeasure")
para.names <- coda.samples(jagsModel, variable.names = para.names, n.iter = 5000)
Samples plot(Samples)
summary(Samples)
Iterations = 1:5000
Thinning interval = 1
Number of chains = 3
Sample size per chain = 5000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
AltitudeEffect 0.4789 0.03043 0.0002485 0.0004178
Intercept 1.7767 0.02995 0.0002445 0.0003376
sdMeasure 1.0588 0.02678 0.0002186 0.0002798
sigma 0.7837 0.01972 0.0001610 0.0002141
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
AltitudeEffect 0.4215 0.4585 0.4782 0.4986 0.5408
Intercept 1.7191 1.7565 1.7767 1.7970 1.8352
sdMeasure 1.0085 1.0405 1.0580 1.0763 1.1137
sigma 0.7463 0.7701 0.7833 0.7968 0.8233