################################################################################
### R BASICS WORKSHOP ###
### EXERCISE 7.3: Generation of random and regular sequences ###
### (and a bit on graphics) ###
### ###
### Center for Conservation and Sustainable Development ###
### Missouri Botanical Garden ###
### Website: rbasicsworkshop.weebly.com ###
################################################################################
### INTRODUCTION ###############################################################
# The IGM2 model (Field et al. 2005, see exercises 6.1 and 7.1) was used in a
# study on the spatial variation of plant richness across Northwestern South
# America (Jiménez et al. 2009. Ecography 32: 433-448). This study estimated
# values for coefficients b0, b1, b2, b3, and b4 (see exercise 5.1).
# Particularly intersting were values obtained for b1 and b4 (the effects of Ran
# and TOPOG on plant richness, respectively): b1 = 0.00883 (standard error =
# 0.00331) and b4 = 8.33077 (standard error = 2.50506). In contrast, the values
# obtained by Field et al. were: b1 = 0.2987 and b4 = 42.7155 (Table 1 in Field
# et al. 2005). These coefficients cannot be directly compared because the scale
# of measurement of the response variable differs between studies. But the ratio
# b4/b1 is comparable across studies. It reflects the relative importance of
# TOPOG and Ran as determinants of plant species richness. There are obvious
# differences in the ratio between studies: 42.7155/0.2987 = 143.0047 according
# to Field et al., and 8.33077/0.00883 = 943.4621 according to Jiménez et al..
# Estimation of b1 and b4 in the last study is associated with relatively large
# sampling variances as measured by the standard error (see above). How to
# account for this uncertainty when comparing studies in terms of the ratio
# b4/b1? One could use a parametric bootstrap approach, assuming that b1 has a
# normal distribution with mean = 0.00883 and standard deviation = 0.00331, and
# b4 a normal distribution with mean = 8.33077 and standard deviation = 2.50506.
# The code below runs a parametric bootstrap to generate 95% confidence intervals
# for the ratio b4/b1.
## TASK 1 ##
# Use and carefully consider the code below, visiting the
# help pages as needed. By example, type "?quantile" or "help(quantile)" to
# learn about function "quantile". Using the confidence intervals obtained with
# the code below, answer this question: is it possible to conclude that the
# ratio b4/b1 differs between the study by Field et al. and that by Jiménez
# et al.?
# generates a bootstrap distribution for b1
b1_bootstrap <- rnorm(10000, mean = 0.00883, sd = 0.00331)
# generates a bootstrap distribution for b4
b4_bootstrap <- rnorm(10000, mean = 8.33077, sd = 2.50506)
# generates a bootstrap distribution for ratio b4/b1
ratio_bootstrap <- b4_bootstrap/b1_bootstrap
# generates a 95% confidence interval for ratio b4/b1.
quantile(ratio_bootstrap, probs=c(0.025, 0.975))
## TASK 2 ##
# Consider the following (fake) data on the relationship between age (in years)
# and change in body mass (in grams) during the season of low fruit availability
# for a parrot population.
age <- c(7, 12, 12, 26, 33, 28, 36, 44, 39, 46, 39, 46, 8, 3, 17, 19, 25, 24,
40, 42, 35, 54, 47, 55, 10, 12, 24, 18, 25, 31, 40, 44, 41, 51, 56, 52, 10,
16, 27, 26, 28, 31, 34, 35, 39, 49, 51, 47, 18, 7, 18, 21, 33, 29, 33, 38,
38, 50, 52, 47, 16, 12, 20, 19, 32, 18, 39, 43, 35, 49, 53, 53)
body.mass.change <- c(-1.3, -4.8, 1.5, 3.6, 2.5, 5.9, 0.8, 0.3, 1.5, -3.9, -3.9,
-3.1, -2.0, -4.3, -1.4, 1.3, 3.2, 5.5, 0.4, -1.1, 1.6, -2.7, -4.7, -4.3,
-2.8, -3.6, 1.4, 3.1, 1.6, 4.3, 0.0, -0.1, 2.4, -2.1, -4.1, -3.0, -1.7,
-4.6, 1.9, 3.2, 3.1, 4.4, 0.7, 0.5, 2.1, -1.1, -3.9, -3.9, -1.5, -3.0, 0.9,
3.9, 1.4, 5.1, -0.7, 1.8, 1.1, -2.0, -2.5, -3.1, -0.7, -3.4, 0.6, 3.2, 1.5,
6.2, -0.1, 1.8, 1.7, -1.7, -2.0, -4.6)
# Use function "plot" to examine the relationship between age and body mass change.
# Use argument "xlab" and "ylab" to label the axes with the variable names and
# respective measurement units.
## TASK 3 ##
# The relationship looks quadratic, right? Use function "lm" to fit a quadratic
# model to the data, and name the result "model.1" using this code:
model.1 <- lm(body.mass.change ~ age + I(age^2))
# notice the use of "I" in the code line above. Go to the help page of function
# "lm". You will see that the first argument is "formula". Follow the link that
# takes you to the help page for formula. In the "Details" section of that page
# the use of "I" is explained. Now see the summary of the model using this code:
summary(model.1)
## TASK 4 ##
# use the function "points" to add predicted values (as points) to the plot you
# created in exercise 5.23. You can type "attributes(model.1)" to see the list
# of attributes of object model.1. Note that you can get predicted values by
# typing "model.1$fitted.values".
## TASK 5 ##
# use the function "points" to add a line representing the quadratic function to
# the plot you created in exercise 5.24.
x <- seq(min(age), max(age), 1)
y <- model.1$coefficients[1] + model.1$coefficients[2]*x + model.1$coefficients[3]*(x^2)
points(x, y, type="l", col="red", lwd=2)
# you can change the limits for the y axis so as to see all predicted values:
plot(age, body.mass.change, xlab="Age (years)", ylab="Change in body mass (g)", cex.axis=1.5, cex.lab=1.5, pch=19, cex=1.5, ylim=c(-6,6))
points(age, model.1$fitted.values, col="red", pch=19, cex=1)
points(x, y, type="l", col="red", lwd=2)
## TASK 6 ##
# According to the results obtained from model.1, what is the age at which the
# parrots experience (on average) the highest gain (i.e., most positive change)
# in body mass? The value of age at the maximum of the function (i.e., when the
# derivative is zero) is:
as.numeric(-(model.1$coefficients[2])/(2*model.1$coefficients[3]))
# it is easy to do the derivative by hand, but you could use function "D" to
# obtain derivatives. If this is of interest, see the help page for function
#"D" and try this code:
D(expression(b0 + b1*x + b2*x^2), "x")
## TASK 7 ##
# See the help page for the function "vcov" by typing "help(vcov)" or "?vcov".
# Then obtain the variance-covariance matrix of the regression coefficients
#in model.1 using this code:
vcov(model.1)
# use the variance-covariance, along with parametric bootstrap, to calculate 95%
# confidence intervals for the age at which the parrots experience (on average)
# the highest gain (i.e., most positive change) in body mass. To do this part of
# the exercise you need to install package "mvtnorm" in your computer. You can
# install packages using function "install.packages" (see the respective help
# page) or going to the drop-down menu at the top of the R display when the
# console window is active. Once package "mvtnorm" is installed in your computer,
# you can load it using function "library" (see respective help page) by typing:
library(mvtnorm)
# See the help page for the function "rmvnorm" by typing "help(rmvnorm)" or
# "?rmvnorm". Then use function "rmvnorm" (in package mvtnorm) to generate 1000
# values of the second and third regression coefficients (i.e., the coefficients
# for "age" and "age^2") using parametric bootstrap, assuming that the sampling
# distribution of the coefficients is multivariate normal. You will see that one
# of the arguments required by this function is the variance-covariance of the
# multivariate normal distribution of interest. Define this variance-covariance
# by extracting the relevant values of the variance-covariance matrix of the
# regression coefficients in model.1:
VCV <- vcov(model.1)[2:3,2:3]
# now use function "rmvnorm" to generate 1000 values of the second and third
# regression coefficients, that we will call b2 and b3:
boo.b2b3 <- rmvnorm(1000, mean = c(model.1$coefficients[2], model.1$coefficients[3]), sigma = VCV)
# next, generate 1000 bootstrap values of the value of age at the maximum of the
# function (i.e., when the derivative is zero, see Exercise 5.26):
boo.age.max.func <- -boo.b2b3[,1]/(2*boo.b2b3[,2])
#examine your results by creating a histogram with function "hist"
hist(boo.age.max.func, breaks=100)
#calculate the lower limit of 95% confidence interval unsing function "quantile"
quantile(boo.age.max.func, probs = c(0.025))
#add the lower limit to the histogram
abline(v=quantile(boo.age.max.func, probs = c(0.025)), col="red", lty=3)
#calculate the upper limit of 95% confidence interval unsing function "quantile"
quantile(boo.age.max.func, probs = c(0.975))
#add the upper limit to the histogram
abline(v=quantile(boo.age.max.func, probs = c(0.975)), col="red", lty=3)
#last, create the plot of the original data, and add to it the regression line,
# the age at which the parrots experience (on average) the highest gain (i.e.,
# most positive change) in body mass and the respective confidence intervals.
plot(age, body.mass.change, xlab="Age (years)", ylab="Change in body mass (g)", cex.axis=1.5, cex.lab=1.5, pch=19, cex=1.5, ylim=c(-6,6))
points(age, model.1$fitted.values, col="red", pch=19, cex=1)
points(x, y, type="l", col="red", lwd=2)
abline(v=as.numeric(-(model.1$coefficients[2])/(2*model.1$coefficients[3])), col="red", lty=1) #this is the point estimate of the age at which parrots experience highest gain in body mass
abline(v=quantile(boo.age.max.func, probs = c(0.025)), col="red", lty=3) #this is the lower limit of the 95% confidence interval
abline(v=quantile(boo.age.max.func, probs = c(0.975)), col="red", lty=3) #this is the upper limit of the 95% confidence interval
################################################################################
################################################################################
################################################################################
################################################################################
### TASK ANSWERS ###############################################################
################################################################################
################################################################################
################################################################################
################################################################################
## TASK 1 ##
# Yes
## TASK 2 ##
plot(age, body.mass.change, xlab="Age (years)", ylab="Body mass change (g)")
## TASK 3 ##
# you already have the answer
## TASK 3 ##
points(age, model.1$fitted.values, col="red", pch=19)
## TASK 5 ##
# you already have the answer
## TASK 6 ##
# you already have the answer
## TASK 7 ##
# you already have the answer