9/25/2018 & 9/27/2018
command lineterminal application running on your computerlist command to show all files in long formless to see the contents of the filenanoR?R analyses in dynamic, polished files using R markdownRstudioR# symbols and the number of output items is in []R follows the normal priority of mathematical evaluation4*4
## [1] 16
(4+3*2^2)
## [1] 16
RMarkdownR chunks into Rmarkdown documents<- operator.R is case sensitive.x <- 2 x*3
## [1] 6
y <- x * 3 y-2
## [1] 4
These do not work
3y <- 3 3*y <- 3
x+2 x^2 log(x)
log - is a built in function of R, and therefore the object of the function needs to be put in parenthesesy <- 67 print(y)
## [1] 67
x <- 124 z <- (x*y)^2 print(z)
## [1] 69022864
c stands for concatenatex <- "I Love" print (x)
## [1] "I Love"
y <- "Biostatistics" print (y)
## [1] "Biostatistics"
z <- c(x,y) print (z)
## [1] "I Love" "Biostatistics"
z is now what is called a list of character values.factors, and we can redefine our character variables as factors.z_factor <- as.factor(z) print (z_factor)
R thinks in terms of vectors (a list of characters, factors or numerical values) and it will benefit any R user to try to write programs with that in mind, as it will simplify most things.x <- c(2,3,4,2,1,2,4,5,10,8,9) print(x)
## [1] 2 3 4 2 1 2 4 5 10 8 9
mean(x) median(x) var(x) log(x) ln(x) sqrt(x) sum(x) length(x) sample(x, replace = T)
sample) has an argument (replace=T)R and it is easy enough to write your own functions if none already exist to do what you want to do.- help(mean)
- ?mean
- example(mean)
- help.search("mean")
- apropos("mean")
- args(mean)
seq and sampleseq_1 <- seq(0.0, 10.0, by = 0.1) print(seq_1)
## [1] 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 ## [15] 1.4 1.5 1.6 1.7 1.8 1.9 2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 ## [29] 2.8 2.9 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4.0 4.1 ## [43] 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9 5.0 5.1 5.2 5.3 5.4 5.5 ## [57] 5.6 5.7 5.8 5.9 6.0 6.1 6.2 6.3 6.4 6.5 6.6 6.7 6.8 6.9 ## [71] 7.0 7.1 7.2 7.3 7.4 7.5 7.6 7.7 7.8 7.9 8.0 8.1 8.2 8.3 ## [85] 8.4 8.5 8.6 8.7 8.8 8.9 9.0 9.1 9.2 9.3 9.4 9.5 9.6 9.7 ## [99] 9.8 9.9 10.0
seq_2 <- seq(10.0, 0.0, by = -0.1) print(seq_2)
## [1] 10.0 9.9 9.8 9.7 9.6 9.5 9.4 9.3 9.2 9.1 9.0 8.9 8.8 8.7 ## [15] 8.6 8.5 8.4 8.3 8.2 8.1 8.0 7.9 7.8 7.7 7.6 7.5 7.4 7.3 ## [29] 7.2 7.1 7.0 6.9 6.8 6.7 6.6 6.5 6.4 6.3 6.2 6.1 6.0 5.9 ## [43] 5.8 5.7 5.6 5.5 5.4 5.3 5.2 5.1 5.0 4.9 4.8 4.7 4.6 4.5 ## [57] 4.4 4.3 4.2 4.1 4.0 3.9 3.8 3.7 3.6 3.5 3.4 3.3 3.2 3.1 ## [71] 3.0 2.9 2.8 2.7 2.6 2.5 2.4 2.3 2.2 2.1 2.0 1.9 1.8 1.7 ## [85] 1.6 1.5 1.4 1.3 1.2 1.1 1.0 0.9 0.8 0.7 0.6 0.5 0.4 0.3 ## [99] 0.2 0.1 0.0
seq_square <- (seq_2)*(seq_2) print(seq_square)
## [1] 100.00 98.01 96.04 94.09 92.16 90.25 88.36 86.49 84.64 82.81 ## [11] 81.00 79.21 77.44 75.69 73.96 72.25 70.56 68.89 67.24 65.61 ## [21] 64.00 62.41 60.84 59.29 57.76 56.25 54.76 53.29 51.84 50.41 ## [31] 49.00 47.61 46.24 44.89 43.56 42.25 40.96 39.69 38.44 37.21 ## [41] 36.00 34.81 33.64 32.49 31.36 30.25 29.16 28.09 27.04 26.01 ## [51] 25.00 24.01 23.04 22.09 21.16 20.25 19.36 18.49 17.64 16.81 ## [61] 16.00 15.21 14.44 13.69 12.96 12.25 11.56 10.89 10.24 9.61 ## [71] 9.00 8.41 7.84 7.29 6.76 6.25 5.76 5.29 4.84 4.41 ## [81] 4.00 3.61 3.24 2.89 2.56 2.25 1.96 1.69 1.44 1.21 ## [91] 1.00 0.81 0.64 0.49 0.36 0.25 0.16 0.09 0.04 0.01 ## [101] 0.00
seq_square_new <- (seq_2)^2 print(seq_square_new)
## [1] 100.00 98.01 96.04 94.09 92.16 90.25 88.36 86.49 84.64 82.81 ## [11] 81.00 79.21 77.44 75.69 73.96 72.25 70.56 68.89 67.24 65.61 ## [21] 64.00 62.41 60.84 59.29 57.76 56.25 54.76 53.29 51.84 50.41 ## [31] 49.00 47.61 46.24 44.89 43.56 42.25 40.96 39.69 38.44 37.21 ## [41] 36.00 34.81 33.64 32.49 31.36 30.25 29.16 28.09 27.04 26.01 ## [51] 25.00 24.01 23.04 22.09 21.16 20.25 19.36 18.49 17.64 16.81 ## [61] 16.00 15.21 14.44 13.69 12.96 12.25 11.56 10.89 10.24 9.61 ## [71] 9.00 8.41 7.84 7.29 6.76 6.25 5.76 5.29 4.84 4.41 ## [81] 4.00 3.61 3.24 2.89 2.56 2.25 1.96 1.69 1.44 1.21 ## [91] 1.00 0.81 0.64 0.49 0.36 0.25 0.16 0.09 0.04 0.01 ## [101] 0.00
x <- rnorm (10000, 0, 10) y <- sample (1:10000, 10000, replace = T) xy <- cbind(x,y) plot(x,y)
x <- rnorm (10000, 0, 10) y <- sample (1:10000, 10000, replace = T) xy <- cbind(x,y) plot(xy)
x <- rnorm (10000, 0, 10) y <- sample (1:10000, 10000, replace = T) xy <- cbind(x,y) hist(x)
x <-rnorm(1000, 0, 100) hist(x, xlim = c(-500,500)) curve(50000*dnorm(x, 0, 100), xlim = c(-500,500), add=TRUE, col='Red')
-
dnorm() generates the probability density, which can be plotted using the curve() function. - Note that is curve is added to the plot using add=TRUE
hist function.plot function (as well as a number of more sophisticated plotting functions).high level plotting function, which sets the stageLow level plotting functions will tweak the plots and make them beautifulR is that the options for the arguments make sense.GGPlot2seq_1 <- seq(0.0, 10.0, by = 0.1) plot (seq_1, xlab="space", ylab ="function of space", type = "p", col = "red")
seq_1 <- seq(0.0, 10.0, by = 0.1) seq_2 <- seq(10.0, 0.0, by = -0.1)
par(mfrow=c(2,2)) plot (seq_1, xlab="time", ylab ="p in population 1", type = "p", col = 'red') plot (seq_2, xlab="time", ylab ="p in population 2", type = "p", col = 'green') plot (seq_square, xlab="time", ylab ="p2 in population 2", type = "p", col = 'blue') plot (seq_square_new, xlab="time", ylab ="p in population 1", type = "l", col = 'yellow')
heads <- rbinom(n=1000, size=20, prob=0.5) hist(heads)
hist(rbinom(n=1000, size=20, prob=0.5))
habitat <- factor(c("mixed", "wet", "wet", "wet", "dry", "dry", "dry","mixed"))
temp <- c(3.4, 3.4, 8.4, 3, 5.6, 8.1, 8.3, 4.5)
elevation <- c(0, 9.2, 3.8, 5, 5.6, 4.1, 7.1, 5.3)
mydata <- data.frame(habitat, temp, elevation)
row.names(mydata) <- c("Reedy Lake", "Pearcadale", "Warneet", "Cranbourne",
"Lysterfield", "Red Hill", "Devilbend", "Olinda")
R is being able to import data from an external sourceR.YourFile <- read.table('yourfile.csv', header=T, row.names=1, sep=',')
YourFile <- read.table('yourfile.txt', header=T, row.names=1, sep='\t')
write.table(YourFile, "yourfile.csv", quote=F, row.names=T, sep=",") write.table(YourFile, "yourfile.txt", quote=F, row.names=T, sep="\t")
print (YourFile[,2]) print (YourFile$temp) print (YourFile[2,]) plot (YourFile$temp, YourFile$elevation)
tapply(YourFile$temp, YourFile$habitat, mean) tapply(YourFile$temp, YourFile$habitat, var)
GacuRNAseq_Subset.csvRNAseq_Data <- read.table('GacuRNAseq_subset.csv', header=TRUE, sep=',')
print (RNAseq_Data)
head (RNAseq_Data)
tail (RNAseq_Data)
print (RNAseq_Data[,2])
print (RNAseq_Data[1,])
print (RNAseq_Data[1,2])
print (RNAseq_Data$ENSGACG00000000010)
print (RNAseq_Data$ENSGACG00000000010>45.0)
summary1 <- summary(RNAseq_Data $ENSGACG00000000003)
print (summary1)
hist(RNAseq_Data $ENSGACG00000000003)
boxplot(RNAseq_Data$ENSGACG00000000003)
boxplot(RNAseq_Data$ENSGACG00000000003~RNAseq_Data$Population)
plot(RNAseq_Data $ENSGACG00000000003, RNAseq_Data$ENSGACG00000000003)
boxplot(RNAseq_Data $ENSGACG00000000003~RNAseq_Data$Treatment,
col = "red", ylab = "Expression Level", xlab = "Treatment level",
border ="orange",
main = "Boxplot of variation in gene expression across microbiota treatments")