################################################################################ ### R BASICS WORKSHOP ### ### EXERCISE 3.1: OBJECTS ### ### ### ### Center for Conservation and Sustainable Development ### ### Missouri Botanical Garden ### ### Website: rbasicsworkshop.weebly.com ### ################################################################################ ## OBJECTIVE: ## The objective of this exercise is to become familiar with different types of ## objects in R. rand.1 <- rnorm(100, mean=5, sd=1.5) # Creates an object with 100 random values taken from a normal distribution with # an average of 5 and a standard deviation of 1.5. ## TASK 1: What class of object is this (hint: use function 'class')? ## TASK 2: What type of data does it contain? ## TASK 3: What is the length of this object? ## TASK 4: What are the mean and standard deviation of rand.1? ## TASK 5: Re-write "rand.1" with another set of 100 random values taken from a ## normal distribution with an average of 50 and a standard deviation of 15. ## TASK 6: What are the mean and standard deviation of this new version of ## rand.1? ?as.character # Opens the help file for function 'as.character' ## TASK 7: Use the function 'as.character' to transform the values in 'rand.1' ## to characters and save them into an object called 'rand.2'. class(rand.2) mode(rand.2) length(rand.2) # Confirms the changes made to the object. rand.1 rand.2 ## When printing these two objects on the screen, pay attention at how ## they look different. identical(rand.1, rand.2) ## TASK 8: What does the function 'identical' do, and why is this FALSE? ## TASK 9: Use the function 'as.numeric' to create a new object rand.3 ## back-transforming rand.2 from text to numbers. class(rand.1) class(rand.3) identical(class(rand.1), class(rand.3)) # This confirms the change identical(rand.1, rand.3) ## TASK 10: Can you figure it out why this is still FALSE? What is the result of ## 'rand.1' minus 'rand.3'? In computers, you have to be careful when working ## with numbers with a large number of decimals. seq.1 <- 1:5 seq.1 ## Creates a vector with the sequence 1, 2, 3, 4, 5. ## TASK 11: We want to create a vector of length 10 that is a random sample of ## numbers from the sequence in 'seq.1'. To do this, we use the function 'sample'. ## The line below, however, has an error. Fix the problem using the help and the ## error the line returns. sample(x=seq.1, size=10) ## TASK 12: Create an object of name 'rand.3' (over-writing the previously created ## object) that is (1) a character vector of length 300 that is (2) a random ## sample of integers from 1 to 10 (Note that this requires multiple ## functions in the same command). is.numeric(rand.3) # This should be FALSE. It tests whether the mode of the object is 'numeric', # meaning that the data it contains are numbers. is.character(rand.3) # This should be TRUE. class(rand.3) mode(rand.3) length(rand.3) # Confirms the class, mode and length of the object you created. rand.4 <- as.numeric(rand.3) # Creates a numeric vector transforming the values in 'rand.3'. ## TASK 13: Create a vector 'rand.5' by applying the function 'as.factor' ## to 'rand.4' ## TASK 14: What is the class of 'rand.5'? plot(rand.4, col="lightgreen") plot(rand.5, col="blue") # Although in essence both objects have numbers, whether the object is of class # numeric or class factor determines the way the 'plot' function will handle # the data. M.abund <- matrix(rpois(1000, 1)*rpois(1000, 10), ncol=50) M.abund ## TASK 15: What does the function 'rpois' do? What is this line of code doing? class(M.abund) ## TASK 16: Without using R, can you predict what the dimensions of this matrix ## will be? What its length will be? Confirm your predictions using the ## functions 'dim' and 'length'. # Let us suppose that the matrix 'M.abund' is a matrix of abundances of species # (columns) at various sites (rows). In these matrices, it is useful to have # names for the rows and the columns. The following line of code creates names # for the columns that represent different species. ## TASK 17: There is a mistake in the line below, read the error message, find ## the mistake and fix it. colnames(M.abund) <- paste("sp", 1:ncol("M.abund"), sep="_") colnames(M.abund) M.abund ## TASK 18: Can you predict what will be the output of the following line of code? class(colnames(M.abund)) ## TASK 19: Now, write code to give names to the rows (i.e. sites) of the matrix. M.presence <- M.abund > 0 ## You will learn more about these operators, but for now, what this line ## of code is doing is comparing each element of the matrix 'M.abund' to the value ## of 0, and producing a new matrix of identical dimensions. This new matrix is ## filled with TRUE and FALSE values, depending on whether the values in the ## original matrix 'M.abund' are greater than zero or not. M.presence ## Confirms that the object looks the way it should. Notice that it ## inherits the column and row names from the matrix 'M.abund'. ## TASK 20: What will be the class of this object? What will be its mode? spp.abund <- colMeans(M.abund) # The function 'colMeans' calculates the mean values of each column. This # produces a vector with the mean number of individuals for each species. ## TASK 21: What are the names of the elements in this vector? Where did they ## come from? spp.occup <- colSums(M.presence) # This is a very useful trick: R handles TRUEs like 1s and FALSEs like 0s. So # making a sum of the columns in the TRUE/FALSE matrix produces a vector that # counts the number of sites where a species is present. # Now, lets see if there is a relationship between abundance and occupancy in # these simulated data. LM.abund.occup <- lm(spp.abund~spp.occup) ## TASK 22: What does the function 'lm' do? ## TASK 23: What is the class of this object? ## TASK 24: Investigate the structure of this object with the function 'str'. ## TASK 25: Use the function 'summary' on this object to get the results. ## TASK 26: Make a simple scatter-plot that corresponds to this regression using ## the function 'plot'. ## TASK 27: Make a similar analysis that investigates the relationship between ## total numbers of individuals at a site with species richness (use 'rowSums' ## to obtain the total number of individuals per site). rm(M.abund) ## TASK 28: What is the function 'rm' doing? sim.matrix <- matrix(rpois(50, 5), nrow=10, ncol=5) # Creates a matrix of 5 columns and 10 rows, filling it with random values from # a Poisson distribution with a mean of 5. rownames(sim.matrix) <- paste("site", 1:nrow(sim.matrix), sep="_") colnames(sim.matrix) <- paste("sp", 1:ncol(sim.matrix), sep="_") # Creates names for rows and columns. letters # 'letters' is a vector containing the alphabet available in R. L.vector <- sample(c(letters, LETTERS), 50, replace=TRUE) ## TASK 29: What are the class, mode and length of this vector? ## TASK 30: Create a vector with integers from 1 to the length of vector 'L.vector'. ## TASK 31: Use the vector in the previous step (TASK 30) to put names to the ## elements in the vector 'L.vector' ## TASK 32: Create matrix named 'L' that contains the values in 'L.vector', and ## that has 5 rows (and as many columns as necessary to hold the data). ###**Download the file: StreblidaeOnBats.txt from the workshop website**### # Using the following line of code, open the file "StreblidaeOnBats.txt", # and save the data into an object named 'streb'. streb <- read.table(file.choose(), header=TRUE, sep="\t") # This dataset contains information on the abundance and richness of three # species of bat ectoparasites on individual hosts (rows) collected at various # localities. ## TASK 33: Without using R, do you know what class of object 'streb' is? Confirm ## your prediction using R. dim(streb) # Gives the dimensions of 'streb'. ## TASK 34: How many variables and how many observations are there in this dataset? ## TASK 35: Get the names of the variables in this dataset. rownames(streb) # Obtains the names of the rows/observations in the dataset. ## TASK 36: Change the names of the rows by combining the characters "obs_" with ## the number of the observation; for example 'obs_1', 'obs_2', etc. ## TASK 37: What is the structure of the dataset. HINT: use the function 'str'. ## TASK 38: Make a summary of the dataset using the function 'summary'. streb_sex <- streb$Sex # You will learn more about indexing, but for now, the line above takes the # column named 'Sex' in the dataframe 'streb', and puts that data into a new # object 'streb_sex'. ## TASK 39: What is the class of 'streb_sex'? streb_sex.2 <- as.character(streb$Sex) ## TASK 40: What is the class of 'streb_sex.2'? ## TASK 41: Can you predict what the following lines of code will produce? levels(streb_sex) levels (streb_sex.2) ## TASK 42: Can you predict what the following lines of code will produce? plot(streb_sex) plot(streb_sex.2) ## Notice how these two lines print slightly different data streb_sex as.character(streb_sex) ## TASK 43: Use the function 'identical' to test that 'streb_sex' and ## 'streb_sex.2' have the same data. Do they? SexMatrix <- cbind(streb_sex, as.character(streb_sex)) # The 'cbind' function concatenates vectors to make matrices, each vector becomes # a column ('cbind' = bind columns). SexMatrix ## TASK 44: What happened with the data in the factor object 'streb_sex' when ## making a matrix with it? ## Run the following code, which creates a number of vectors: V1 <- rnorm(10) class(V1) V2 <- rpois(10, 5) class(V2) V3 <- sample(letters, 10) class(V3) V4 <- sample(c(TRUE, FALSE), 11, replace=TRUE) class(V4) ## TASK 45: Can you predict why the following will line of code produces an error? DF <- data.frame(V1, V2, V3, V4) ## TASK 46: Change how 'V4' was created making it have only 10 elements. Then ## produce a data frame with vectors 'V1' to 'V4'. ## TASK 47: Produce a list with the name 'DF.list' that has the same data as DF, ## and where each element in the list is one of the vectors 'V1' to 'V4'. ################################################################################ ### TASK SOLUTIONS ############################################################# ################################################################################ ## TASK 1 ## class(rand.1) # Numeric vector ## TASK 2 ## mode(rand.1) ## TASK 3 ## length(rand.1) ## TASK 4 ## mean(rand.1) sd(rand.1) ## TASK 5 ## rand.1 <- rand(100, mean=5, sd=1.5) ## TASK 6 ## mean(rand.1) sd(rand.1) # No, because these are independent random draws ## TASK 7 ## rand.2 <- as.character(rand.1) ## TASK 8 ## help(identical) # Tests whether two objects are exactly equal. It is false because one is a # numeric vector and the other is a character vector ## TASK 9 ## rand.3 <- as.numeric(rand.2) ## TASK 10 ## rand.1 - rand.3 # Differences are close to, but not exactly, zero. In the change of data type # some precision is lost ## TASK 11 ## help(sample) sample(x=seq.1, size=10, replace=TRUE) ## TASK 12 ## rand.3 <- as.character(sample(1:10, size=300, replace=TRUE)) ## TASK 13 ## rand.5 <- as.factor(rand.4) ## TASK 14 ## class(rand.5) ## TASK 15 ## help(rpois) # rpois draws random values from a Poisson distribution M.abund <- matrix(rpois(1000, 1)*rpois(1000, 10), ncol=50) # 1. Create a vector of 1000 values from a Poisson distribution # with lambda=1. # 2. Create another vector like in step 1. # 3. Multiply vectors created in steps 1 and 2. # 4. Put the result of step 3 into a matrix of 50 columns, # and as many rows as necessary. ## TASK 16 ## # 50 columns determined when setting parameter ncol=50 # 20 rows because, with 50 columns, 20 rows are needed to store 1000 values # length of 1000 because there are 1000 values in this matrix dim(M.abund) length(M.abund) ## TASK 17 ## # When using *M.abund*, we hope to be accessing the values within the object (matrix). # Because it is between quotation marks, we are telling this is a piece of text, # not the name of an object colnames(M.abund) <- paste("sp", 1:ncol(M.abund), sep="_") ## TASK 18 ## # It will be character because, even though the values inside of *M.abund* are numbers # the names of *M.abund* are text. Here we are asking for the class of the names, # not the class of the values inside the matrix class(colnames(M.abund)) ## TASK 19 ## rownames(M.abund) <- paste("site", 1:nrow(M.abund), sep="_") ## TASK 20 ## class(M.presence) mode(M.presence) ## TASK 21 ## names(spp.abund) # These are the sums per column, so the names came from the column names of M.abund ## TASK 22 ## help(lm) # lm fits linear models ## TASK 23 ## class(LM.abund.occup) ## TASK 24 ## str(LM.abund.occup) ## TASK 25 ## summary(LM.abund.occup) ## TASK 26 ## plot(spp.occup, spp.abund, cex=1.5, pch=21, col="black", bg="grey30", xlab="Species Occupancy", ylab="Species Mean Abundance") ## TASK 27 ## site.density <- rowSums(M.abund) site.rich <- rowSums(M.presence) plot(site.density, site.rich, cex=1.5, pch=21, col="black", bg="grey30", xlab="Site Density", ylab="Site Richness") LM.rich.dens <- lm(site.rich~site.density) summary(LM.rich.dens) ## TASK 28 ## help(rm) # rm removes objects from the R session ## TASK 29 ## class(L.vector) mode(L.vector) length(L.vector) ## TASK 30 ## seq(from=1, to=length(L.vector), by=1) # or 1:length(L.vector) ## TASK 31 ## names(L.vector) <- 1:length(L.vector) ## TASK 32 ## L <- matrix(L.vector, nrow=5) ## TASK 33 ## # data.frame. The function *read.table* always produces data.frames class(streb) ## TASK 34 ## dim(streb) # 139 observations # 13 variables ## TASK 35 ## colnames(streb) ## TASK 36 ## rownames(streb) <- paste("obs", 1:nrow(streb), sep="_") ## TASK 37 ## str(streb) ## TASK 38 ## summary(streb) ## TASK 39 ## class(streb_sex) ## TASK 40 ## class(streb_sex.2) ## TASK 41 ## levels(streb_sex) # Will return the levels of the variable levels(streb_sex.2) # Will return nothing because this is not a factor ## TASK 42 ## plot(streb_sex) # Will plot the number of observations per level plot(streb_sex.2) # Will plot nothing because this is a character vector ## TASK 43 ## identical(streb_sex, streb_sex.2) ## TASK 44 ## SexMatrix # Because matrices can contain only one type of data, numbers were transformed # into text ## TASK 45 ## # Because the last element used to build the data.frame contains 11 rows, when # all the others contain 10 ## TASK 46 ## V4 <- sample(c(TRUE, FALSE), 10, replace=TRUE) DF <- data.frame(V1, V2, V3, V4) ## TASK 47 ## DF.list <- list(V1, V2, V3, V4) str(DF.list)