################################################################################
### R BASICS WORKSHOP ###
### PRESENTATION 6: OBJECT MANIPULATION ###
### ###
### Center for Conservation and Sustainable Development ###
### Missouri Botanical Garden ###
### Website: rbasicsworkshop.weebly.com ###
################################################################################
# The material below is largely based on sections 3.5.4 to 3.5.8 of "R for beginners".
# It is divided into the following sections:
# A) Introduction to the indexing system
# B) Indexing character vectors
# C) Indexing numerical vectors
# D) Indexing matrices
# E) Indexing arrays
# F) Indexing lists
# G) Indexing data frames
# H) Accessing values of an object with names
# I) Pop quiz 1: accessing values in an "lm" object
# J) Pop quiz 2: study and correct indexing mistakes
# K) Indexing a phylogeny
# This presentation will cover sections A, B, C, D, F, G and H. After the presentation,
# participants will study sections E, I, J, and optionally K.
#################################################################################################################################
# A) Introduction to the indexing system
#################################################################################################################################
#The indexing system is an eficient and flexible way to access selectively
#the elements of an object; it can be either numeric or logical. For indexing
#we will use square brackets, the operators in the Table in page 25 of
#"R for beginners," and the functions "c", "which", "is.na", "lower.tri",
#"upper.tri", "diag", and "unique". We will also illustrate the use of function
#"str" to display the structure of R objects.
#################################################################################################################################
#B) Indexing character vectors: this section is divided into i) generation of vectors, ii) numeric indexing and iii) logical indexing
#################################################################################################################################
### Generate character vectors to illustrate indexing
#generate a vector of genus names
genus.epithet <- paste("G", 1:10, sep="")
#generate a vector with the numbe of species in each of the genera
species.per.genus <- c(35, 71, 40, 432, 45, 24, 14, 201, 5, 33)
#generate a vector of specific epithets
species.epithet <- paste("S", sequence(species.per.genus), sep="")
#generate a vector of with the genus and specific epithet for each species
genus <- rep(genus.epithet, times=species.per.genus)
length(genus)
length(species.epithet)
species.name <- paste(genus, species.epithet, sep="_")
length(species.name)
#enter (fake) data on the geographic distribution of each species: occurrence in Eastern Asia (EA),
#Eastern North America (ENA), Western North America (WNA)
geographic.distribution <- c("EA","WNA","WNA","EA","WNA","WNA","WNA","ENA","EA","WNA","EA","EA","WNA","WNA","ENA","WNA","WNA","EA","ENA","ENA","WNA","ENA","EA","WNA",
"EA","ENA","ENA","EA","EA","ENA","ENA","ENA","WNA","WNA","EA","WNA","ENA","EA","ENA","EA","WNA","WNA","EA","EA","ENA","ENA","WNA","WNA",
"WNA","EA","EA","WNA","WNA","ENA","WNA","ENA","EA","WNA","WNA","ENA","ENA","ENA","EA","WNA","WNA","ENA","ENA","EA","EA","EA","ENA","WNA",
"EA","WNA","ENA","ENA","EA","ENA","WNA","ENA","ENA","EA","EA","ENA","EA","EA","EA","ENA","WNA","ENA","WNA","WNA","EA","WNA","WNA","ENA",
"EA","ENA","WNA","WNA","EA","ENA","EA","WNA","EA","EA","EA","EA","WNA","WNA","EA","EA","WNA","WNA","ENA","ENA","WNA","EA","ENA","EA","WNA",
"ENA","ENA","WNA","ENA","ENA","WNA","WNA","ENA","ENA","ENA","EA","EA","EA","ENA","EA","ENA","WNA","ENA","WNA","WNA","WNA","ENA","ENA","WNA",
"ENA","ENA","ENA","ENA","ENA","EA","EA","ENA","EA","ENA","EA","WNA","WNA","EA","ENA","ENA","ENA","EA","ENA","EA","ENA","EA","ENA","EA","EA",
"EA","EA","WNA","EA","ENA","EA","ENA","EA","EA","ENA","ENA","ENA","ENA","EA","EA","WNA","EA","WNA","EA","WNA","WNA","WNA","WNA","ENA","EA",
"EA","WNA","ENA","EA","EA","EA","EA","ENA","ENA","WNA","WNA","WNA","WNA","WNA","WNA","ENA","EA","ENA","ENA","EA","WNA","WNA","WNA","EA","ENA",
"ENA","ENA","ENA","ENA","EA","EA","ENA","EA","ENA","WNA","ENA","WNA","EA","EA","EA","ENA","EA","WNA","ENA","WNA","ENA","EA","WNA","WNA","EA",
"WNA","EA","ENA","WNA","ENA","EA","ENA","EA","ENA","WNA","WNA","WNA","ENA","EA","WNA","EA","WNA","ENA","ENA","EA","WNA","WNA","WNA","EA","ENA",
"WNA","WNA","EA","ENA","WNA","EA","WNA","EA","ENA","ENA","EA","ENA","EA","ENA","WNA","ENA","WNA","EA","ENA","ENA","WNA","ENA","ENA","EA","ENA",
"ENA","EA","ENA","EA","ENA","ENA","EA","ENA","ENA","WNA","WNA","WNA","WNA","WNA","EA","EA","ENA","WNA","EA","WNA","WNA","WNA","EA","EA","ENA",
"EA","ENA","ENA","ENA","ENA","ENA","EA","WNA","ENA","ENA","WNA","EA","ENA","WNA","EA","EA","WNA","ENA","WNA","WNA","ENA","ENA","EA","WNA","EA",
"WNA","EA","ENA","WNA","EA","ENA","EA","EA","EA","WNA","EA","EA","ENA","EA","WNA","ENA","EA","EA","ENA","WNA","EA","ENA","WNA","WNA","ENA",
"WNA","ENA","EA","ENA","ENA","ENA","EA","EA","ENA","ENA","ENA","WNA","EA","ENA","EA","EA","WNA","ENA","WNA","EA","EA","WNA","WNA","ENA","WNA",
"ENA","WNA","EA","WNA","WNA","WNA","ENA","WNA","WNA","ENA","EA","ENA","EA","ENA","ENA","EA","WNA","ENA","ENA","ENA","WNA","ENA","WNA","WNA",
"EA","WNA","ENA","EA","WNA","WNA","ENA","EA","WNA","EA","ENA","WNA","EA","EA","EA","ENA","WNA","WNA","EA","WNA","ENA","ENA","WNA","EA","ENA",
"WNA","EA","ENA","ENA","ENA","WNA","EA","EA","EA","ENA","EA","WNA","WNA","ENA","WNA","ENA","WNA","EA","EA","ENA","WNA","EA","EA","ENA","WNA",
"EA","ENA","ENA","ENA","ENA","WNA","ENA","ENA","WNA","ENA","EA","WNA","ENA","ENA","ENA","WNA","ENA","WNA","EA","EA","EA","WNA","EA","ENA",
"ENA","ENA","WNA","EA","ENA","ENA","ENA","WNA","ENA","EA","WNA","EA","EA","ENA","ENA","EA","WNA","EA","EA","ENA","ENA","WNA","ENA","ENA","EA",
"EA","EA","EA","WNA","EA","WNA","ENA","EA","ENA","EA","WNA","WNA","EA","ENA","EA","WNA","EA","ENA","ENA","WNA","EA","EA","EA","ENA","EA","EA",
"WNA","EA","WNA","ENA","WNA","ENA","WNA","WNA","EA","ENA","ENA","WNA","ENA","ENA","WNA","WNA","EA","WNA","EA","EA","ENA","EA","WNA","WNA","ENA",
"EA","ENA","WNA","ENA","ENA","WNA","ENA","ENA","ENA","ENA","WNA","WNA","ENA","ENA","ENA","ENA","WNA","EA","ENA","ENA","ENA","EA","ENA","EA","EA",
"ENA","EA","ENA","WNA","WNA","ENA","ENA","WNA","EA","EA","WNA","EA","WNA","WNA","WNA","ENA","ENA","ENA","WNA","ENA","ENA","EA","EA","EA","WNA",
"EA","ENA","ENA","EA","EA","EA","WNA","ENA","WNA","WNA","EA","ENA","WNA","WNA","WNA","WNA","EA","WNA","EA","WNA","ENA","ENA","ENA","WNA","EA",
"EA","WNA","EA","ENA","EA","WNA","EA","EA","ENA","EA","WNA","EA","ENA","WNA","EA","EA","ENA","EA","ENA","WNA","EA","EA","EA","EA","EA","ENA",
"ENA","WNA","WNA","ENA","WNA","WNA","ENA","EA","EA","WNA","WNA","WNA","ENA","ENA","WNA","WNA","WNA","EA","EA","WNA","WNA","ENA","WNA","WNA",
"ENA","ENA","EA","WNA","WNA","EA","ENA","ENA","EA","EA","WNA","WNA","ENA","ENA","ENA","ENA","WNA","EA","WNA","EA","WNA","ENA","WNA","WNA","EA",
"EA","WNA","WNA","EA","WNA","WNA","WNA","EA","ENA","EA","ENA","EA","EA","EA","EA","EA","ENA","WNA","WNA","EA","WNA","WNA","EA","WNA","ENA","WNA",
"ENA","WNA","WNA","WNA","ENA","EA","ENA","ENA","WNA","EA","ENA","ENA","WNA","ENA","ENA","ENA","ENA","EA","WNA","EA","ENA","EA","ENA","ENA","WNA",
"EA","ENA","ENA","ENA","ENA","ENA","EA","EA","WNA","EA","ENA","ENA","ENA","EA","EA","EA","WNA","ENA","WNA","ENA","WNA","WNA","EA","WNA","WNA","EA",
"WNA","ENA","WNA","EA","ENA","EA","ENA","ENA","WNA","WNA","EA","ENA","EA","WNA","WNA","WNA","EA","WNA","WNA","ENA","ENA","ENA","ENA","WNA","WNA",
"WNA","ENA","WNA","EA","ENA","ENA","WNA","WNA","EA","ENA","ENA","WNA","WNA","EA","ENA","EA","ENA","ENA","EA","WNA","WNA","ENA","EA","EA","WNA","EA",
"ENA","ENA","EA","EA","WNA","ENA","WNA","ENA","ENA","EA","WNA","WNA","ENA","ENA","WNA","ENA","EA","ENA","WNA","EA","ENA","ENA","EA","WNA","ENA","WNA",
"ENA","ENA","ENA","EA","WNA","EA","WNA","EA","WNA","EA","EA","ENA","EA","WNA","ENA","ENA","WNA","ENA","EA","EA","ENA","WNA","EA","ENA","WNA","WNA","WNA")
#examine some properties of the resulting vectors
str(species.name)
str(geographic.distribution)
attributes(species.name) #returns null because vector "species.name" has only intrinsic attributes: mode and length
attributes(geographic.distribution) #returns null because vector "geographic.distribution" has only intrinsic attributes: mode and length
mode(species.name) #the mode is the basic type of the elements of the object
length(species.name) #the length is the number of elements of the object
mode(geographic.distribution) #the mode is the basic type of the elements of the object
length(geographic.distribution) #the length is the number of elements of the object
### Numeric indexing
species.name[4]
species.name[4:7]
species.name[7:4]
species.name[c(4,7)]
species.name[c(7,4)]
### Logical indexing
species.name[species.name!="G1_S4"]
species.name[species.name!="G1_S4" & species.name!="G1_S7" & species.name!="G1_13"]
cbind(species.name, geographic.distribution) #this is not indexing, but a way to place character vectors side by side
species.name[geographic.distribution=="EA"]
species.name[geographic.distribution=="WNA"]
species.name[geographic.distribution=="ENA"]
species.name[geographic.distribution=="ENA" | geographic.distribution=="WNA"]
geographic.distribution[species.name=="G4_S1"]
geographic.distribution[species.name=="G1_S3"]
geographic.distribution[species.name=="G1_S12"]
geographic.distribution[species.name=="G1_S12"] <- "WNA"
geographic.distribution[species.name=="G1_S12"]
cbind(species.name, geographic.distribution) #this is not indexing, but a way to place character vectors side by side
#################################################################################################################################
#C) Indexing numeric vectors: this section is divided into i) generation of vectors, ii) numeric indexing and iii) logical indexing
#################################################################################################################################
### Generate two numeric vectors to illustrate indexing
x <- c(1,45,89.6,34.2,23.2,456,2,32,90,8)
x.3 <- c(2,7,5,4,3,6,9,5,2,1,3,1,4,5,4,3,2,2,4,5,2,3,8,4,6,5,6,3,3,3,2,2,6,8,6,3,2,3,7,5)
#examine some properties of the resulting vectors
str(x)
str(x.3)
attributes(x) #returns null because vector "x" has only intrinsic attributes: mode and length
attributes(x.3) #returns null because vector "x.3" has only intrinsic attributes: mode and length
mode(x) #the mode is the basic type of the elements of the object
length(x) #the length is the number of elements of the object
mode(x.3) #the mode is the basic type of the elements of the object
length(x.3) #the length is the number of elements of the object
### Numeric indexing
x[3]
x[3] <- 10.2
x[-6]
x.2 <- x[-6]
x[3:9]
x[c(3,5,8)]
### Logical indexing
w <- x>32
x[w]
x[x>32]
which(x>32)
x
x[x>32 & x<90]
x[x<32 | x>90]
x[x>=32 & x<=90]
x[x<=32 | x>=90]
x[x<=32 | x>=90] <- NA
x.3
x.3[x.3 %% 2 == 0]
x.3[x.3>5]
x.3[x.3>5] <- NA
x.3
is.na(x.3)
!is.na(x.3)
x.3[is.na(x.3)]
x.3[!is.na(x.3)]
#################################################################################################################################
#D) Indexing matrices: this section is divided into i) generation of a matrix, ii) numeric indexing and iii) logical indexing;
#a few functions to manipulate matrices are opportunistically provided.
#################################################################################################################################
### Generate a matrix to illustrate indexing
a <- c(1.72, -0.99, -0.05, -0.53, -0.04, 0.62, 0.44, 0.77, -0.24, 0.36, -0.16, -1.47, 0.66, 1.10, -1.80, -0.21, -1.65, 1.14, -0.57, 0.20)
X <- matrix(a, 5, 4)
X
#examine properties of the resulting matrix
class(X)
mode(X)
attributes(X)
str(X)
### Numeric indexing
X[3,4]
X[4,3]
X[1:3,4]
X[1:3,2:4]
X[1:3,c(2,4)]
X[4,3] <- 5000
X[4,]
X[-5,]
X[-c(1,2),]
X[5,1]
X[-5,-1]
X.2 <- X[-5,]
X.2
### Logical indexing
X>0
X[X>0]
which(X>0, arr.ind=T)
X[which(X>0, arr.ind=T)]
X[X>0]
X.2
lower.tri(X.2)
X.2[lower.tri(X.2)]
upper.tri(X.2)
X.2[upper.tri(X.2)]
diag(X.2)
X.2[4,] <- NA
X.2
is.na(X.2)
!is.na(X.2)
X.2[is.na(X.2)]
X.2[!is.na(X.2)]
#################################################################################################################################
#E) Indexing arrays: this section is divided into i) generation of an array, ii) numeric indexing and iii) logical indexing
#################################################################################################################################
### Generate arrays
#arrays are used in package MCLUST to store multiple variance-covariance matrices,
#useful for species delimitation (e.g., Edwards and Knowles, 2014, Proceedings of the Royal Society
#of London B: Biological Sciences 281: 20132765):
Sigma <- array(c(4.79, 2.55, 2.55, 2.62, 0.95, -0.55, -0.55, 1.30, 1.30), dim=c(2,2,2))
Sigma
#examine properties of the resulting array
class(Sigma)
mode(Sigma)
attributes(Sigma)
str(Sigma)
#another array
Y <- array(c(34,33,12,21,45,65,89,8,65,10,74,32,23,69,78,76,29,21,54,49) , dim=c(2,5,2))
Y
#examine properties of the resulting array
class(Y)
mode(Y)
attributes(Y)
str(Y)
### Numeric indexing
Sigma
Sigma[,,1]
Sigma[,,2]
Sigma[1,2,]
Y[1,,]
Y[,,1]
Y[,2:5,1]
Y[,,1]
Y[,,-1]
### Logical indexing
Y>15
Y[Y>15]
which(Y>15, arr.ind=T)
Y[which(Y>15, arr.ind=T)]
Y[Y>15]
Y[Y>15]<-NA
is.na(Y)
!is.na(Y)
Y[is.na(Y)]
Y[!is.na(Y)]
#################################################################################################################################
#F) Indexing lists: this section is divided into i) generation of a list, ii) numeric indexing and iii) logical indexing
#################################################################################################################################
### Generate a list to illustrate indexing
L1 <- list(c(0.01, 3.1), c(0.02, 4.0), c(0.05, 3.5), c(0.01, 2.9), c(0.03, 3.1), c(0.04, 3.4))
#examine properties of the resulting list
attributes(L1) #returns null because list "L1" has only intrinsic attributes: mode and length
class(L1)
mode(L1)
length(L1)
str(L1)
### Numeric indexing
L1
L1[1]
L1[-1]
L1[1:3]
L1[1:3][1]
L1[1:3][-1]
L1[[1]]
class(L1[1])
class(L1[[1]])
L1[[1]][2]
L1[[1]]
L1[[1]][1]
L1[[1]]<-3
L1
attributes(L1[1]) ##returns null because list "L1[1]" has only intrinsic attributes: mode and length
mode(L1[1]) #the mode is the basic type of the elements of the object
length(L1[1])
str(L1[1])
### Logical indexing
L1[[2]]
L1[[2]]>2
L1[[2]][L1[[2]]>2]
#################################################################################################################################
#G) Indexing data frames: this section is divided into i) loading a data frame, ii) numeric indexing and iii) logical indexing
#################################################################################################################################
### Load the "Iris" data frame to illustrate indexing. This famous data frame contains data on flower morphology for Iris species
#collected by Edgar Anderson and used by Ronald Fisher to introduce discriminant function analysis
#(http://en.wikipedia.org/wiki/Iris_flower_data_set).
data(iris)
?iris
class(iris)
attributes(iris)
str(cor.iris)
### Numeric indexing
iris[1:5,]
iris[,5]
### Logical indexing
iris[iris[,5]=="setosa",]
iris[iris[,5]=="virginica",]
#################################################################################################################################
#H) Accessing values of an object with names
#################################################################################################################################
#Names are labels of the elements of an object. If the elements of an object have names, they can be extracted by using them as
#indices. This may be termed `subsetting' (rather than `extraction') because the attributes of the original object are kept.
attributes(iris)
iris["Species"]
class(iris["Species"])
iris["Sepal.Width"]
iris[c("Sepal.Length", "Sepal.Width")]
#add names to a vector, and use them to extract elements
names(species.name)
names(species.name) <- paste("species", 1:length(species.name), sep=".")
attributes(species.name)
species.name[1:5]
species.name["species.3"]
species.name["species.637"]
#add names to a list, and use them to extract elements
names(L1)
names(L1) <- c("rita", "thomas", "carl", "andrea", "sleeping", "beauty")
L1
attributes(L1)
L1["rita"]
L1["carl"]
#To extract a vector or a factor from a data frame, one may use the operator "$".
iris$Species
class(iris$Species)
as.vector(iris$Species) #converts a factor into a vector
unique(iris$Species) #extracts unique elements in a factor
unique(as.vector(iris$Species)) #extracts unique elements in a vector
iris$Sepal.Width
iris$Sepal.Length
#The operator "$" also works with lists.
L1$sleeping
L1$beauty
#################################################################################################################################
#I) Pop quiz 1: accessing values in an "lm" object. Answers are at the end of this script.
#################################################################################################################################
#Consider the following (fake) data on the relationship between age (in years) and change in body mass (in grams) during the season of low fruit availability
#for a parrot population.
age <- c(7, 12, 12, 26, 33, 28, 36, 44, 39, 46, 39, 46, 8, 3, 17, 19, 25, 24, 40, 42, 35, 54, 47, 55, 10, 12, 24, 18, 25, 31, 40, 44, 41, 51, 56, 52, 10, 16, 27, 26, 28,
31, 34, 35, 39, 49, 51, 47, 18, 7, 18, 21, 33, 29, 33, 38, 38, 50, 52, 47, 16, 12, 20, 19, 32, 18, 39, 43, 35, 49, 53, 53)
body.mass.change <- c(-1.3, -4.8, 1.5, 3.6, 2.5, 5.9, 0.8, 0.3, 1.5, -3.9, -3.9, -3.1, -2.0, -4.3, -1.4, 1.3, 3.2, 5.5, 0.4, -1.1, 1.6, -2.7, -4.7, -4.3, -2.8, -3.6, 1.4, 3.1,
1.6, 4.3, 0.0, -0.1, 2.4, -2.1, -4.1, -3.0, -1.7, -4.6, 1.9, 3.2, 3.1, 4.4, 0.7, 0.5, 2.1, -1.1, -3.9, -3.9, -1.5, -3.0, 0.9, 3.9, 1.4, 5.1, -0.7, 1.8, 1.1, -2.0, -2.5, -3.1,
-0.7, -3.4, 0.6, 3.2, 1.5, 6.2, -0.1, 1.8, 1.7, -1.7, -2.0, -4.6)
#plot the relationship between the two variables
plot(age, body.mass.change)
#conduct a (ordinary least squares) regression of age (the response variable) on age (the independent variable),
#using function "lm". Feel free to see the help page for function "lm" by typing "?lm". The result of the
#regression is assigned to an object named "model.1":
model.1 <- lm(body.mass.change ~ age + I(age^2))
#see a summary of the regression results
summary(model.1)
#examine some properties of object "model 1"
class(model.1)
attributes(model.1)
str(model.1)
#Task 1
#use the operator "$" in combination with numerical indexing to
#extract the first regression coefficient (i.e, the intercept)
#in the "coefficients" attribute of object model.1
#Task 2
#use the operator "$" in combination with numerical indexing to
#extract the second regression coefficient (for age)
#in the "coefficients" attribute of object model.1
#Task 3
#use the operator "$" in combination with numerical indexing to
#extract the second regression coefficient (for age squared)
#in the "coefficients" attribute of object model.1
#Task 4
#use the operator "$" to access the residuals of the regression
#Task 5
#use the operator "$" in combination with numerical indexing
#to access the first 10 residuals of the regression
#Task 6
#use the operator "$" to access the fitted values of the regression
#Task 7
#plot the fitted values of the regression (in the y axis) agains
#the variable age. Use the operator "$" to access the fitted values
#of the regression
#Task 8, optional:
#plot observed and fitted values against age in a single figure
#################################################################################################################################
#J) Pop quiz 2: study and correct the following indexing mistakes. Make an effort to read the error messages, they are often
# useful. Answers are at the end of this script.
#################################################################################################################################
#Task 1
#a researcher is trying to extract the third element of vector x with the following code:
x(3)
#What is wrong? Why is the error message referring to a function?
#Task 2
#a researcher is trying to extract the second and third elements of vector x using code:
x[2,3]
#What is wrong?
#Task 3
#a researcher is trying to extract the element in the third row and fourth column of matrix X:
X(3,4)
#What is wrong?
#Task 4
#a researcher is trying to extract the element in the second row and sixth column of matrix X:
X[6,2]
#What is wrong?
#Task 5
#a researcher is trying to extract the element in the fifth row and fourth column of matrix X:
X[4,5]
#What is wrong?
#Task 6
#a student is trying to extract the first three elements of list L1:
L1[[1:3]]
#Task 7
#a student is trying to extract a list with all but the first element of L1:
L1[[-1]]
#Task 8
#a professor is trying to determine if the numbers in list L1 are higher than 2:
L1>2
#Task 9
#now the professor is trying to determine if the numbers in the second element of list L1 are higher than 2:
L1[2]>2
#################################################################################################################################
#K) Indexing a phylogeny.
#################################################################################################################################
#load package "ape" (install the package before hand)
library(ape)
#set working directory
setwd("C:/_transfer/R_Basics_Workshop/St_louis_May_2017/Datasets") #working directory at Ivan's laptop
#read file "tree_central_ages.new" (available at the workshop website) with a dated phylogeny of plant
#families (in Newick format):
phylo.fam.world <- read.tree("tree_central_ages.new")
#Task 1
#plot the phylogeny
plot(phylo.fam.world, type="fan", cex=0.6)
#Task 2
#examine properties of object "phylo.fam.world"
class(phylo.fam.world)
attributes(phylo.fam.world)
str(phylo.fam.world) #note that objects of class "phylo" are lists
#Task 3
#Read the section on class "phylo" in page 30 of Paradis (2012, Analysis of phylogenetics
#and evolution with R", available at http://rbasicsworkshop.weebly.com/books.html). Use
#operator "$" to explore the components of class phylo: "edge", "edge.length", "tip.label",
#"Nnode", "node.lable", "root.edge". By example:
phylo.fam.world$edge
#note that you can also index each component, e.g.,
phylo.fam.world$edge[1:7,]
#Task 4
#examine the class of objects obtained by indexing with single and double square brackets,
#compare to section F (above)
class(phylo.fam.world[1])
class(phylo.fam.world[[1]])
class(phylo.fam.world[2])
class(phylo.fam.world[[2]])
class(phylo.fam.world[3])
class(phylo.fam.world[[3]])
#Task 5
#examine the tip labels
phylo.fam.world$tip.label
#study the code below, it is used to capitalize the first letter of the tip labels;
#visit the help pages of functions "paste", "toupper", and "substring":
phylo.fam.world$tip.label <- paste(toupper(substring(phylo.fam.world$tip.label, 1, 1)), substring(phylo.fam.world$tip.label, 2), sep="")
#examine the result
phylo.fam.world$tip.label
#Task 6 (optional, not about indexing)
#visit the help page of function "drop.tip". Use this function to plot selected tips of the phylogeny:
phylo.fam.world.1 <- drop.tip(phylo.fam.world, 25:476, subtree=TRUE)
plot(phylo.fam.world.1, type="p", cex=0.8, show.node.label=F)
axisPhylo(1)
phylo.fam.world.2 <- drop.tip(phylo.fam.world, c(1:24,51:476), subtree=TRUE)
plot(as.phylo(phylo.fam.world.2), type="p", cex=0.8,show.node.label=F)
axisPhylo(1)
phylo.fam.world.3 <- drop.tip(phylo.fam.world, c(1:50,70:476), subtree=TRUE)
plot(as.phylo(phylo.fam.world.3), type="p", cex=0.8,show.node.label=F)
axisPhylo(1)
#################################################################################################################################
#NOTE
#################################################################################################################################
#the (fake) data on geographic distributions was generated using function "sample":
geographic.distribution <- sample(c("EA", "ENA", "WNA"), size=length(species.name), replace=T)
#However, if each workshop participant were to use the code above, the participants would have
#several different data sets, preventing comparison of results. To print out the data generated
#from a single run of the line above, in a way than can be pasted and directly used in a script,
#one can use argument "useFancyQuotes" in function "options", as
#well as functions "cat" and "dQuote" (see the respective help functions if interested) in this way:
options(useFancyQuotes = F)
cat(dQuote(geographic.distribution), sep=",")
#similarly, the data for numeric vector "x.3" was generated this way:
x.3 <- rpois(40, lambda=5)
cat(x.3, sep=",")
#################################################################################################################################
#Answers to pop quiz 1
#################################################################################################################################
#Task 1
model.1$coefficients[1]
#Task 2
model.1$coefficients[2]
#Task 3
model.1$coefficients[3]
#Task 4
model.1$residuals
#Task 5
model.1$residuals[1:10]
#Task 6
model.1$fitted.values
#Task 7
plot(age, model.1$fitted.values, col="red", pch=19)
#Task 8
#first use function "range" to examine the range of observed and fitted values,
#feel free to see the help page for function "range" by typing "?range":
range(model.1$fitted.values)
range(body.mass.change)
#the range of fitted and observed values can be obtained this way:
range(c(model.1$fitted.values,body.mass.change))
#plot observed values against age, using argument "ylim" of the "plot" function
#to make sure the range of the y axis is enought to accomodate fitted values;
#feel free to see the help page for "plot.default" by typing "?plot.default"
plot(age, body.mass.change, ylim=range(c(model.1$fitted.values,body.mass.change)))
points(age, model.1$fitted.values, col="red", pch=19)
#imagine you wanted to plot the fitted values as a line, and you tried fiddling
#with argument "type" of the "plot" function:
plot(age, body.mass.change)
points(age, model.1$fitted.values, col="red", type="l", pch=19)
#clearly, that would not work; but you can use your indexing skills to
#order the variables using function "order" (see respective help page)
o <- order(age)
plot(age, body.mass.change, ylim=range(c(model.1$fitted.values,body.mass.change)))
points(age[o], model.1$fitted.values[o], col="red", type="l", pch=19, lwd=2)
#################################################################################################################################
#Answers to pop quiz 2
#################################################################################################################################
#Task 1
x(3)
#never use round brackets for indexing! Always use square brackets. The error message
#refers to a function because round brackets are used to provide arguments for a function.
#Here is the correct code:
x[3]
#Task 2
x[2,3]
x[2]
x[3]
#Task 3
X(3,4)
X[3,4]
#Task 4
X[6,2]
X[5,2] #within the bounds of the number of rows and columns of X
#Task 5
X[4,5]
X[5,4] #within the bounds of the number of rows and columns of X
#Task 6
L1[[1:3]]
L1[1:3]
#Task 7
L1[[-1]]
L1[-1]
#note that negative numbers can also be used to index particular elements of a list,
#by example, this code extracts all but the first number in the vector that is the second
#element of L1:
L1[[2]][-1] #produces a numeric vector with only one element
#Task 8
L1>2
#use function "unlist":
unlist(L1)>2
#Task 9
L1[2]>2
L1[[2]]>2