# Normal vs. Poisson errors
# Yvonne Buckley 13/07/2014
# Here I produce two figures (Fig. 2 (a) & (b)), one showing data where the
# errors are normally distributed and where the variance is constant with
# regards to the mean and another where the errors are not necessarily normally
# distributed and are not constant, but the variance increases with the mean.
## Thanks to Corey Merow for some example code from: Merow et al. 2014
# First simulate some data based on the sizes of plants
min_size <- 0.01; max_size = 3
int <- 0
slope <- 5
# choose 100 random numbers between min and max sizes
x_sim <- runif(500,min_size,max_size)
# generate simulated y-values from the Normal distribution, with a mean equal
# to the fitted relationship and a constant standard deviation.
# The sd does not change with the fitted values, it is constant.
# The errors of this relationship will be normally distributed, with constant
# variance and with a mean=0
y_sim_norm <- rnorm(length(x_sim), int + slope * x_sim, sd=1.5)
# Generate simulated y-values from the Poisson distribution, with a mean equal
# to the fitted relationship and a standard deviation equal to the mean of the
# fitted values the errors of this relationship will have a mean = 0 but will
# increase with the fitted relationship
y_sim_pois <- rpois(length(x_sim),int + slope * x_sim)
lm_norm <- lm(y_sim_norm ~ x_sim)
summary(lm_norm)
par(mfrow=c(2,2))
plot(lm_norm)
## Linear model of data with normally distributed errors
lm_pois <- lm(y_sim_pois ~ x_sim)
summary(lm_pois)
plot(lm_pois)
# Linear model of data with Poisson distributed errors
# you can see evidence of non-normality and heteroscedasticity of errors here,
# errors increase with the fitted values, the linear pattern in negative
# residuals at low fitted values is due to the bounding at zero.
glm1 <- glm(y_sim_pois ~ x_sim, family=poisson)
summary(glm1)
# very mild overdispersion, close to 1, happy with that
# Set up data and models to reproduce Fig. 2 comparing distributions of errors
# from normal vs. poisson generated data
width_hist <- (max(x_sim) - min(x_sim))/6
hist_spots <- c(min(x_sim), median(x_sim), max(x_sim)-width_hist)
upper_hist <- hist_spots + width_hist
# Set up the size (x) values where where error distributions will be evaluated
# over
# Estimate standard deviation of y data within each sector of the data
stdev_norm <- rep(NA,length(hist_spots)) ## set up standard deviation estimate vector
stdev_pois <- rep(NA,length(hist_spots))
ct <- ifelse(x_sim >= hist_spots[1] & x_sim < upper_hist[1],1,
ifelse(x_sim >= hist_spots[2] & x_sim < upper_hist[2],2,
ifelse(x_sim >= hist_spots[3] & x_sim < upper_hist[3],3,NA)))
y_errs_norm <- residuals(lm_norm)
y_errs_pois <- residuals(lm_pois)
for(i in 1:3) {
stdev_norm[i] <- sd(y_errs_norm[ct==i], na.rm=T )
stdev_pois[i] <- sd(y_errs_pois[ct==i], na.rm=T )
}
stdev_norm; stdev_pois
# you should notice here that the st dev. of the normally generated data should
# be similar across the range of x values, the st. dev. of the poisson generated
# data should increase with the range of x values.
par(mfrow=c(2,1), cex=0.8, pch=1, bty="l", mar=c(4, 4, 2, 1) + 0.1)
# PLOT data generated with normal errors
plot(x_sim,y_sim_norm, xlab="", ylab='Offspring', cex.lab=1.2,
xlim=c(min(x_sim), max(x_sim)+width_hist/2),
ylim=c(0, max(y_sim_norm, y_sim_pois)), main="(a) Normal")
# plot x and y data with normal errors
lines(seq(min_size,max_size,by=.01),
predict(lm_norm,data.frame(x_sim=seq(min_size,max_size,by=.01))),lwd=3)
## plot fitted relationship as predicted by the lm.norm model
abline(v=hist_spots)
# Generate denisty histograms for the variance of errors at each of the
# specified points
for(i in 1:length(hist.spots)){
temp.dens=density(y.errs.norm[x.sim>hist.spots[i] & x.simhist.spots[i] & x.sim