April 13, 2017
x <- 1:5
y <- c(1, 2, 6, 7, 10)
x + y # R does an element by element summation
## [1] 2 4 9 11 15
x < y
## [1] FALSE FALSE TRUE TRUE TRUE
x <- matrix(1:9, ncol = 3)
y <- matrix(rep(c(5,6,7), 3), ncol = 3)
x + y # R does an element by element summation
## [,1] [,2] [,3]
## [1,] 6 9 12
## [2,] 8 11 14
## [3,] 10 13 16
x < y
## [,1] [,2] [,3]
## [1,] TRUE TRUE FALSE
## [2,] TRUE TRUE FALSE
## [3,] TRUE TRUE FALSE
The slides for “Reading and Writing Data” section were mainly from Dr. Roger D. Peng, Associate Professor at Johns Hopkins
Main functions for reading data into R:
read.table()
, read.csv()
: to read tabular datareadLines()
: to read lines of a text filesource()
, dget()
: to read R codeload()
: to read saved workspacesread.table()
and read.csv()
are covered in this lecture.Main functions for writing data from R:
write.table()
, write.csv()
: to write tabular data to filewriteLines()
: to write lines to a text filedump()
, dput()
: to write R code to a filesave()
: to save a workspacewrite.table()
is covered in this lectureread.table()
is the most commonly used function to read data in RType ?read.table
in your R console to see the important arguments in the function
read.csv()
is intended for reading comma separated value files
irisFile <- read.table(file = "iris.csv", sep=",", header = TRUE)
head(irisFile)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 Iris-setosa
## 2 4.9 3.0 1.4 0.2 Iris-setosa
## 3 4.7 3.2 1.3 0.2 Iris-setosa
## 4 4.6 3.1 1.5 0.2 Iris-setosa
## 5 5.0 3.6 1.4 0.2 Iris-setosa
## 6 5.4 3.9 1.7 0.4 Iris-setosa
write.table(irisFile, file = "new_iris.csv", sep = ",", col.names = TRUE)
print(paste("The year is", 2014))
## [1] "The year is 2014"
print(paste("The year is", 2015))
## [1] "The year is 2015"
print(paste("The year is", 2016))
## [1] "The year is 2016"
for(i in 2014:2016){
print(paste("The year is", i))
}
## [1] "The year is 2014"
## [1] "The year is 2015"
## [1] "The year is 2016"
vec <- seq(2, 20, by = 2)
vec
newvec <- vector("numeric", length = length(vec))
for(i in 1:length(vec)){
newvec[i] <- vec[i]^2
}
newvec
## [1] 4 16 36 64 100 144 196 256 324 400
x <- 7
if (x < 10){
print("x is less than 10")
}else{
print("x is greater than 10")
}
## [1] "x is less than 10"
age <- sample(1:100, 10)
ageCat <- rep(NA, length(age))
for (i in 1:length(age)) {
if (age[i] <= 35){
ageCat[i] <- "Young"
}else if (age[i] <= 55){
ageCat[i] <- "Middle-Aged"
}else{
ageCat[i] <- "Old"
}
}
age.df <- data.frame(age = age, ageCat = ageCat)
age.df[1:3,]
## age ageCat
## 1 30 Young
## 2 26 Young
## 3 41 Middle-Aged
mean()
Terminology:
There are two main ways to install a package in R:
install.packages()
install.packages("path_to_file", repos = NULL, type = "source")
library()
str(sample)
## function (x, size, replace = FALSE, prob = NULL)
Function arguments can either be matched by position within the parentheses or by name
sampSpace <- 1:6
sample(sampSpace, 1) # arguments with default values can be omitted
## [1] 1
sample(size = 1, x = sampSpace) # no need to remember the order
## [1] 5
sample(size = 1, sampSpace)
## [1] 5
yourFnName <- function(arg1, arg2, ...){
statements # body of your code
return(object) # what is to be returned
}
yourFnName(arg1, arg2, ...)
myMin <- function(a, b, c){
myMinVal <- min(a, b, c)
return(myMinVal)
}
myMin(10, 20, 30)
## [1] 10
myMin(10, NA, 20) # how to fix this so it returns 10?
## [1] NA
str(str)
## function (object, ...)
str(sample)
## function (x, size, replace = FALSE, prob = NULL)
genderF <- factor(sample(c("Male", "Female"), 20, replace = TRUE))
str(genderF)
## Factor w/ 2 levels "Female","Male": 1 2 1 1 2 1 2 2 2 1 ...
myMat <- matrix(1:10, ncol = 5)
str(myMat)
## int [1:2, 1:5] 1 2 3 4 5 6 7 8 9 10
myList <- list(numVec = 1:3, logVec = F, charVec = LETTERS[1:4])
str(myList)
## List of 3
## $ numVec : int [1:3] 1 2 3
## $ logVec : logi FALSE
## $ charVec: chr [1:4] "A" "B" "C" "D"
str(apply) # try ?apply for more info
## function (X, MARGIN, FUN, ...)
apply() is a function that applies a function (FUN) on a MARGIN of a matrix or dataframe (X)
FUN: refers to the function that we want to apply on the dataset
“…” : additional arguments of FUN
myMat <- matrix(1:10, ncol = 5)
myMat
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 3 5 7 9
## [2,] 2 4 6 8 10
apply(myMat, 2, sum)
## [1] 3 7 11 15 19
myMat <- matrix(1:10, ncol = 5)
myMat[2,c(2, 5)] <- NA
myMat
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 3 5 7 9
## [2,] 2 NA 6 8 NA
apply(myMat, 2, sum, na.rm = TRUE)
## [1] 3 3 11 15 9
head(iris) # more info ?iris
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
Suppose we are interested in getting the 25% and 75% quantiles of each numeric column
Check the help page of quantile() to see what arguments should be included.
apply(iris[,-5], 2, quantile, probs = c(0.25, 0.75))
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 25% 5.1 2.8 1.6 0.3
## 75% 6.4 3.3 5.1 1.8