# binomial.r
# Yvonne Buckley, 15/08/2014
# reproduces Fig. 6.3 the variance in proabability of survival from Binomial
# distributions, demonstrates use of binomial denominator
# The function v1 produces the variance of the binomial distribution
# Inputs: None
# Outputs: None
# BINOMIAL DISTRIBUTION
# Showing how variance changes with the mean
v1 <- function(n, p) {
vv <- n * p * (1-p)
return(vv)
}
# the variance of a binomial distribution is np(1-p),
# the variance is bounded at p=0 and p=1 and is at
# its maximum at p=0.5 where an event is as likely to happen as not.
par(mfrow = c(1,1))
n <- 5 # no. of trials
p <- seq(0.01,1,0.01) # p is the probability of success
plot(p, v1(n, p), type = "l", bty = "l", xlab = "Probability of survival",
ylab = "Variance", lwd = 2, ylim=c(0, n*0.5^2+0.2) )
n <- 1
lines(p, v1(n, p), type = "l", bty = "l", xlab = "Probability of survival",
ylab = "Variance", lwd = 2, lty = 3)
legend("topright", legend = c("trials = 5", "trials = 1"), lty = c(1,3),
lwd = 2, bty = "n")
# THE BINOMIAL DENOMINATOR
# Here I demonstrate how ignoring the denominator in success/failure data can
# influence the estimated mean proportion
success <- c(2,4,5,5,13,18,55,103)
# no. of successes in a particular trial, e.g. no. of fish in a tank surviving
fail <- c(0, 0, 1, 5,17, 22, 45, 97)
# no. of failures in a particular trial, no. of fish in the same tank dying
sample_size <- success + fail
# the sample size or binomial denominator, the total number of fish in each tank
sample_size
y <- cbind(success, fail) # creates the response variable for a binomial GLM
y
proportion <- success/(sample_size)
# calculate the proportion of fish alive from each tank
cbind(success, fail, sample_size, proportion)
# you can see that the proportion surviving from the lower sample sizes is very
# high but for larger sample sizes it's approx. 50%
mean(proportion) # mean survival, ignoring sample size
mod1 <- glm(y~1, family = binomial(link = logit))
# a GLM which calculates average survival taking sample size (binomial
# denominator) into account
prop_bin <- exp(coef(mod1))/(exp(coef(mod1))+1)
# taking the parameter estimate from the model and applying the inverse link
# function to find the mean proportion surviving
prop_bin; mean(proportion)
# contrasting the two average probabilities of survival,
# the binomial proportion weights towards the larger sample sizes where survival
# was approx. 50%. The mean proportion is higher as it weights all samples
# equally regardless of the total number within each sample.