9/25/2018 & 9/27/2018
command line
terminal
application running on your computerlist
command to show all files in long formless
to see the contents of the filenano
R
?R
analyses in dynamic, polished files using R markdown
Rstudio
R
#
symbols and the number of output items is in []
R
follows the normal priority of mathematical evaluation4*4
## [1] 16
(4+3*2^2)
## [1] 16
RMarkdown
R chunks
into Rmarkdown
documents<-
operator.R
is case sensitive.x <- 2 x*3
## [1] 6
y <- x * 3 y-2
## [1] 4
These do not work
3y <- 3 3*y <- 3
x+2 x^2 log(x)
log
- is a built in function of R
, and therefore the object of the function needs to be put in parenthesesy <- 67 print(y)
## [1] 67
x <- 124 z <- (x*y)^2 print(z)
## [1] 69022864
c
stands for concatenate
x <- "I Love" print (x)
## [1] "I Love"
y <- "Biostatistics" print (y)
## [1] "Biostatistics"
z <- c(x,y) print (z)
## [1] "I Love" "Biostatistics"
z
is now what is called a list of character values.factors
, and we can redefine our character variables as factors.z_factor <- as.factor(z) print (z_factor)
R
thinks in terms of vectors (a list of characters, factors or numerical values) and it will benefit any R
user to try to write programs with that in mind, as it will simplify most things.x <- c(2,3,4,2,1,2,4,5,10,8,9) print(x)
## [1] 2 3 4 2 1 2 4 5 10 8 9
mean(x) median(x) var(x) log(x) ln(x) sqrt(x) sum(x) length(x) sample(x, replace = T)
sample
) has an argument (replace=T
)R
and it is easy enough to write your own functions if none already exist to do what you want to do.- help(mean) - ?mean - example(mean) - help.search("mean") - apropos("mean") - args(mean)
seq
and sample
seq_1 <- seq(0.0, 10.0, by = 0.1) print(seq_1)
## [1] 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 ## [15] 1.4 1.5 1.6 1.7 1.8 1.9 2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 ## [29] 2.8 2.9 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4.0 4.1 ## [43] 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9 5.0 5.1 5.2 5.3 5.4 5.5 ## [57] 5.6 5.7 5.8 5.9 6.0 6.1 6.2 6.3 6.4 6.5 6.6 6.7 6.8 6.9 ## [71] 7.0 7.1 7.2 7.3 7.4 7.5 7.6 7.7 7.8 7.9 8.0 8.1 8.2 8.3 ## [85] 8.4 8.5 8.6 8.7 8.8 8.9 9.0 9.1 9.2 9.3 9.4 9.5 9.6 9.7 ## [99] 9.8 9.9 10.0
seq_2 <- seq(10.0, 0.0, by = -0.1) print(seq_2)
## [1] 10.0 9.9 9.8 9.7 9.6 9.5 9.4 9.3 9.2 9.1 9.0 8.9 8.8 8.7 ## [15] 8.6 8.5 8.4 8.3 8.2 8.1 8.0 7.9 7.8 7.7 7.6 7.5 7.4 7.3 ## [29] 7.2 7.1 7.0 6.9 6.8 6.7 6.6 6.5 6.4 6.3 6.2 6.1 6.0 5.9 ## [43] 5.8 5.7 5.6 5.5 5.4 5.3 5.2 5.1 5.0 4.9 4.8 4.7 4.6 4.5 ## [57] 4.4 4.3 4.2 4.1 4.0 3.9 3.8 3.7 3.6 3.5 3.4 3.3 3.2 3.1 ## [71] 3.0 2.9 2.8 2.7 2.6 2.5 2.4 2.3 2.2 2.1 2.0 1.9 1.8 1.7 ## [85] 1.6 1.5 1.4 1.3 1.2 1.1 1.0 0.9 0.8 0.7 0.6 0.5 0.4 0.3 ## [99] 0.2 0.1 0.0
seq_square <- (seq_2)*(seq_2) print(seq_square)
## [1] 100.00 98.01 96.04 94.09 92.16 90.25 88.36 86.49 84.64 82.81 ## [11] 81.00 79.21 77.44 75.69 73.96 72.25 70.56 68.89 67.24 65.61 ## [21] 64.00 62.41 60.84 59.29 57.76 56.25 54.76 53.29 51.84 50.41 ## [31] 49.00 47.61 46.24 44.89 43.56 42.25 40.96 39.69 38.44 37.21 ## [41] 36.00 34.81 33.64 32.49 31.36 30.25 29.16 28.09 27.04 26.01 ## [51] 25.00 24.01 23.04 22.09 21.16 20.25 19.36 18.49 17.64 16.81 ## [61] 16.00 15.21 14.44 13.69 12.96 12.25 11.56 10.89 10.24 9.61 ## [71] 9.00 8.41 7.84 7.29 6.76 6.25 5.76 5.29 4.84 4.41 ## [81] 4.00 3.61 3.24 2.89 2.56 2.25 1.96 1.69 1.44 1.21 ## [91] 1.00 0.81 0.64 0.49 0.36 0.25 0.16 0.09 0.04 0.01 ## [101] 0.00
seq_square_new <- (seq_2)^2 print(seq_square_new)
## [1] 100.00 98.01 96.04 94.09 92.16 90.25 88.36 86.49 84.64 82.81 ## [11] 81.00 79.21 77.44 75.69 73.96 72.25 70.56 68.89 67.24 65.61 ## [21] 64.00 62.41 60.84 59.29 57.76 56.25 54.76 53.29 51.84 50.41 ## [31] 49.00 47.61 46.24 44.89 43.56 42.25 40.96 39.69 38.44 37.21 ## [41] 36.00 34.81 33.64 32.49 31.36 30.25 29.16 28.09 27.04 26.01 ## [51] 25.00 24.01 23.04 22.09 21.16 20.25 19.36 18.49 17.64 16.81 ## [61] 16.00 15.21 14.44 13.69 12.96 12.25 11.56 10.89 10.24 9.61 ## [71] 9.00 8.41 7.84 7.29 6.76 6.25 5.76 5.29 4.84 4.41 ## [81] 4.00 3.61 3.24 2.89 2.56 2.25 1.96 1.69 1.44 1.21 ## [91] 1.00 0.81 0.64 0.49 0.36 0.25 0.16 0.09 0.04 0.01 ## [101] 0.00
x <- rnorm (10000, 0, 10) y <- sample (1:10000, 10000, replace = T) xy <- cbind(x,y) plot(x,y)
x <- rnorm (10000, 0, 10) y <- sample (1:10000, 10000, replace = T) xy <- cbind(x,y) plot(xy)
x <- rnorm (10000, 0, 10) y <- sample (1:10000, 10000, replace = T) xy <- cbind(x,y) hist(x)
x <-rnorm(1000, 0, 100) hist(x, xlim = c(-500,500)) curve(50000*dnorm(x, 0, 100), xlim = c(-500,500), add=TRUE, col='Red')
- dnorm()
generates the probability density, which can be plotted using the curve()
function. - Note that is curve is added to the plot using add=TRUE
hist
function.plot
function (as well as a number of more sophisticated plotting functions).high level
plotting function, which sets the stageLow level
plotting functions will tweak the plots and make them beautifulR
is that the options for the arguments make sense.GGPlot2
seq_1 <- seq(0.0, 10.0, by = 0.1) plot (seq_1, xlab="space", ylab ="function of space", type = "p", col = "red")
seq_1 <- seq(0.0, 10.0, by = 0.1) seq_2 <- seq(10.0, 0.0, by = -0.1)
par(mfrow=c(2,2)) plot (seq_1, xlab="time", ylab ="p in population 1", type = "p", col = 'red') plot (seq_2, xlab="time", ylab ="p in population 2", type = "p", col = 'green') plot (seq_square, xlab="time", ylab ="p2 in population 2", type = "p", col = 'blue') plot (seq_square_new, xlab="time", ylab ="p in population 1", type = "l", col = 'yellow')
heads <- rbinom(n=1000, size=20, prob=0.5) hist(heads)
hist(rbinom(n=1000, size=20, prob=0.5))
habitat <- factor(c("mixed", "wet", "wet", "wet", "dry", "dry", "dry","mixed")) temp <- c(3.4, 3.4, 8.4, 3, 5.6, 8.1, 8.3, 4.5) elevation <- c(0, 9.2, 3.8, 5, 5.6, 4.1, 7.1, 5.3)
mydata <- data.frame(habitat, temp, elevation) row.names(mydata) <- c("Reedy Lake", "Pearcadale", "Warneet", "Cranbourne", "Lysterfield", "Red Hill", "Devilbend", "Olinda")
R
is being able to import data from an external sourceR
.YourFile <- read.table('yourfile.csv', header=T, row.names=1, sep=',') YourFile <- read.table('yourfile.txt', header=T, row.names=1, sep='\t')
write.table(YourFile, "yourfile.csv", quote=F, row.names=T, sep=",") write.table(YourFile, "yourfile.txt", quote=F, row.names=T, sep="\t")
print (YourFile[,2]) print (YourFile$temp) print (YourFile[2,]) plot (YourFile$temp, YourFile$elevation)
tapply(YourFile$temp, YourFile$habitat, mean) tapply(YourFile$temp, YourFile$habitat, var)
GacuRNAseq_Subset.csv
RNAseq_Data <- read.table('GacuRNAseq_subset.csv', header=TRUE, sep=',') print (RNAseq_Data) head (RNAseq_Data) tail (RNAseq_Data) print (RNAseq_Data[,2]) print (RNAseq_Data[1,]) print (RNAseq_Data[1,2]) print (RNAseq_Data$ENSGACG00000000010) print (RNAseq_Data$ENSGACG00000000010>45.0)
summary1 <- summary(RNAseq_Data $ENSGACG00000000003) print (summary1) hist(RNAseq_Data $ENSGACG00000000003) boxplot(RNAseq_Data$ENSGACG00000000003) boxplot(RNAseq_Data$ENSGACG00000000003~RNAseq_Data$Population) plot(RNAseq_Data $ENSGACG00000000003, RNAseq_Data$ENSGACG00000000003) boxplot(RNAseq_Data $ENSGACG00000000003~RNAseq_Data$Treatment, col = "red", ylab = "Expression Level", xlab = "Treatment level", border ="orange", main = "Boxplot of variation in gene expression across microbiota treatments")