You installed Bioconductor Package in week 2, R Tutorial Class. We will practice the R code, with probability concept. Before we start tutorial, you may install a probability package. In R, each project have its workspace. Before you use functions in packages, you may call functions from the library.
# install.packages("prob")
library(prob)
# Discrete sample space & Events
library(prob)
# Sample space of two coin toss
two.coin <- tosscoin(2)
two.coin
## toss1 toss2
## 1 H H
## 2 T H
## 3 H T
## 4 T T
# event of obtaining exactly one head
subset(two.coin, (toss1 == 'H' | toss2 == 'H') & !(toss1 == 'H' & toss2 == 'H'))
## toss1 toss2
## 2 T H
## 3 H T
# Sample of rolling two dice
two.dice <- rolldie(2)
two.dice
## X1 X2
## 1 1 1
## 2 2 1
## 3 3 1
## 4 4 1
## 5 5 1
## 6 6 1
## 7 1 2
## 8 2 2
## 9 3 2
## 10 4 2
## 11 5 2
## 12 6 2
## 13 1 3
## 14 2 3
## 15 3 3
## 16 4 3
## 17 5 3
## 18 6 3
## 19 1 4
## 20 2 4
## 21 3 4
## 22 4 4
## 23 5 4
## 24 6 4
## 25 1 5
## 26 2 5
## 27 3 5
## 28 4 5
## 29 5 5
## 30 6 5
## 31 1 6
## 32 2 6
## 33 3 6
## 34 4 6
## 35 5 6
## 36 6 6
# Event that the sum of the number on the two dice equals 3
subset(two.dice, X1 + X2 == 3)
## X1 X2
## 2 2 1
## 7 1 2
# Continuous sample space & Events
# Sample space of measurement the relative humidity of air (assume uniform distribution)
f <- function(x){x/x}
S <- integrate(f, 0, 100)
S
## 100 with absolute error < 1.1e-12
# Event that the relative humidity is at least 90%
Event <- integrate(f, 0, 90)
Event
## 90 with absolute error < 1e-12
# Two coin toss event
two.coin <- tosscoin(2, makespace = TRUE)
# P(one head)
p1h <- Prob(two.coin, event = (toss1 == 'H' | toss2 == 'H') & !(toss1 == 'H' & toss2 == 'H'))
p1h
## [1] 0.5
# P(one tail)
p1t <- Prob(two.coin, event = (toss1 == 'T' | toss2 == 'T') & !(toss1 == 'T' & toss2 == 'T'))
p1t
## [1] 0.5
# P(one head U one tail)
p1h1t <- Prob(two.coin, event = (toss1 == 'H' | toss2 == 'T') | (toss1 == 'T' & toss2 == 'H'))
p1h1t
## [1] 1
# P(head)
ph <- Prob(two.coin, event = (toss1 == 'H' | toss2 == 'H'))
ph
## [1] 0.75
# P(tail)
pt <- Prob(two.coin, event = (toss1 == 'T' | toss2 == 'T'))
pt
## [1] 0.75
# P(head n tail)
phnt <- Prob(two.coin, event = !(toss1 == 'H' | toss2 == 'H') | !(toss1 == 'T' | toss2 == 'T'))
phnt
## [1] 0.5
# P(head U tail)
phut <- ph + pt - phnt
phut
## [1] 1
# Tossing two dice
two.dice <- rolldie(2, makespace = TRUE)
# A := {difference is 3}
A <- subset(two.dice, abs(X1 - X2) == 3)
A
## X1 X2 probs
## 4 4 1 0.02777778
## 11 5 2 0.02777778
## 18 6 3 0.02777778
## 19 1 4 0.02777778
## 26 2 5 0.02777778
## 33 3 6 0.02777778
pA <- Prob(A)
pA
## [1] 0.1666667
# B := {sum is a multiple of 3}
B <- subset(two.dice, abs(X1 + X2) %in% c(3,6,9,12))
B
## X1 X2 probs
## 2 2 1 0.02777778
## 5 5 1 0.02777778
## 7 1 2 0.02777778
## 10 4 2 0.02777778
## 15 3 3 0.02777778
## 18 6 3 0.02777778
## 20 2 4 0.02777778
## 23 5 4 0.02777778
## 25 1 5 0.02777778
## 28 4 5 0.02777778
## 33 3 6 0.02777778
## 36 6 6 0.02777778
pB <- Prob(B)
pB
## [1] 0.3333333
# P(A|B)
Prob(two.dice, event = (abs(X1 - X2) == 3), (given = abs(X1 + X2) %in% c(3,6,9,12)))
## [1] 0.1666667
# P(AnB)/P(B)
Prob(intersect(A, B))/Prob(B)
## [1] 0.1666667
# Dice example
one.die <- rolldie(1, makespace = TRUE)
one.die
## X1 probs
## 1 1 0.1666667
## 2 2 0.1666667
## 3 3 0.1666667
## 4 4 0.1666667
## 5 5 0.1666667
## 6 6 0.1666667
# A := the event that the number is even = {2,4,6}
A <- subset(one.die, (X1 %% 2 == 0))
A
## X1 probs
## 2 2 0.1666667
## 4 4 0.1666667
## 6 6 0.1666667
# B := the event that the number is greater than or equal to 3 = {3,4,5,6}
B <- subset(one.die, X1 >= 3)
B
## X1 probs
## 3 3 0.1666667
## 4 4 0.1666667
## 5 5 0.1666667
## 6 6 0.1666667
# P(A)
Prob(A)
## [1] 0.5
# P(B)
Prob(B)
## [1] 0.6666667
# P(A n B)
Prob(intersect(A,B))
## [1] 0.3333333
# P(B|A)
Prob(B, given = A)
## [1] 0.6666667
# P(B) = P(B|A) ?
identical(Prob(B), Prob(B, given = A))
## [1] TRUE
# P(A n B) = P(A)P(B) ?
identical(Prob(intersect(A,B)), Prob(A)*Prob(B))
## [1] TRUE
# Probability of test positive when RA is given, P(+|RA) = 0.95
# Probability of test positive when RA is not given, P(+|!RA) = 0.01
# background probability of RA and Normal, P(RA) = 0.005, P(Normal) = 0.995
test <- matrix(c(0.95, 0.01, 0.05, 0.99), nrow = 2, ncol = 2, byrow = TRUE)
rownames(test) <- c("TP", "TN")
colnames(test) <- c("RA", "Normal")
prevalance <- c(0.005, 0.995)
# considering prevalence
test_corrected <- matrix(c(test[,1] * prevalance[1], test[,2] * prevalance[2]),
nrow = 2, ncol = 2, byrow = FALSE)
rownames(test_corrected) <- c("TP", "TN")
colnames(test_corrected) <- c("RA", "Normal")
test_corrected <- as.data.frame(test_corrected)
test_corrected$Sum <- rowSums(test_corrected)
test_corrected$P_correct_Diagnosis[1] <- test_corrected$RA[1]/test_corrected$Sum[1]
test_corrected$P_correct_Diagnosis[2] <- test_corrected$Normal[2]/test_corrected$Sum[2]
# percent
test_corrected * 100
## RA Normal Sum P_correct_Diagnosis
## TP 0.475 0.995 1.47 32.31293
## TN 0.025 98.505 98.53 99.97463
# toss 2 coins
library(prob)
two.coin <- tosscoin(2, makespace = TRUE)
two.coin <- addrv(two.coin, X = ifelse(toss1 == "H", 1, 0) + ifelse(toss2 == "H", 1, 0))
pmf <- marginal(two.coin, vars = "X")
pmf
## X probs
## 1 0 0.25
## 2 1 0.50
## 3 2 0.25
barplot(pmf$probs, names.arg = pmf$X, main = "Histogram of X",
xlab = "X", ylab = "f(X)", ylim = c(0, 0.5))
# roll 2 dices
two.dice <- rolldie(2, makespace = TRUE)
two.dice <- addrv(two.dice, X = X1 + X2)
pmf <- marginal(two.dice, vars = "X")
pmf
## X probs
## 1 2 0.02777778
## 2 3 0.05555556
## 3 4 0.08333333
## 4 5 0.11111111
## 5 6 0.13888889
## 6 7 0.16666667
## 7 8 0.13888889
## 8 9 0.11111111
## 9 10 0.08333333
## 10 11 0.05555556
## 11 12 0.02777778
barplot(pmf$probs, names.arg = pmf$X, main = "Histogram of X",
xlab = "X", ylab = "f(X)", ylim = c(0, 0.2))
f <- function(x) {1.5 - 6 * (x - 50.0)^2}
integrate(f, 49.8, 50.1)
## 0.432 with absolute error < 4.8e-15
two.dice <- rolldie(2, makespace = TRUE)
two.dice <- addrv(two.dice, X = X1 + X2)
pmf <- marginal(two.dice, vars = "X")
Ex <- sum(pmf$X*pmf$probs)
# install.packages("psych")
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:prob':
##
## sim
## The following object is masked from 'package:fBasics':
##
## tr
## The following object is masked from 'package:timeSeries':
##
## outlier
# Arithmatic mean
vec <- c(10,2,19,24,6,23,47,24,54,77)
sum(vec)/length(vec)
## [1] 28.6
mean(vec)
## [1] 28.6
# Harmonic mean
x <- c(40, 20) # x1, x2, km/hr
length(x)/sum(1/x)
## [1] 26.66667
harmonic.mean(x)
## [1] 26.66667
mean(x) # cf
## [1] 30
geometric.mean(x) # cf
## [1] 28.28427
# Geometric mean
pop <- c(10000, 10500, 11550, 13860, 18156)
x <- c(1.05, 1.10, 1.20, 1.31)
# calculation of geometric mean
prod(x)^(1 / length(x))
## [1] 1.160803
geometric.mean(x)
## [1] 1.160803
# Population change over 4 decade
pop[1]*mean(x)^4
## [1] 18420.6
pop[1]*harmonic.mean(x)^4
## [1] 17900.19
pop[1]*geometric.mean(x)^4
## [1] 18156.6
two.dice <- rolldie(2, makespace = TRUE)
two.dice <- addrv(two.dice, X = X1 + X2)
pmf <- marginal(two.dice, vars = "X")
Ex2 <- sum(pmf$X^2 * pmf$probs)
Ex <- sum(pmf$X * pmf$probs)
Var <- Ex2 - Ex^2
SD <- sqrt(Var)
tbl <- matrix(c(rep(0,3),rep(1/32,2),rep(3/32,2),rep(1/16,3),1/8,2/8,0,1/8,rep(1/64,4),rep(0,3)),
nrow = 3, byrow = TRUE)
rownames(tbl) <- c(seq(0,2))
colnames(tbl) <- c(seq(0,6))
tbl <- as.data.frame(tbl)
tbl$py <- rowSums(tbl)
px <- colSums(tbl)
tbl <- rbind(tbl, px)
rownames(tbl) <- c(seq(0,2), "px")
barplot(tbl$py[1:3], names.arg = rownames(tbl)[1:3], main = "probability mass function of y",
xlab = "y", ylab = "p(y)", ylim = c(0, 1))
barplot(t(tbl)[1:7,4], names.arg = colnames(tbl)[1:7], main = "probability mass function of x",
xlab = "x", ylab = "p(x)", ylim = c(0, 1))
Ex <- sum(seq(0,6)*tbl[4,1:7])
Ex2 <- sum(seq(0,6)^2*tbl[4,1:7])
Var_x <- Ex2 - Ex^2
SD_x <- sqrt(Var_x)
Ey <- sum(seq(0,2)*tbl[1:3,8])
Ey2 <- sum(seq(0,2)^2*tbl[1:3,8])
Var_y <- Ey2 - Ey^2
SD_y <- sqrt(Var_y)
# Cov = E(XY) - E(X)E(Y)
Exy <- sum(0*seq(0,6)*tbl[1,1:7], 1*seq(0,6)*tbl[2,1:7], 2*seq(0,6)*tbl[3,1:7])
ExEy <- Ex*Ey
Cov <- Exy - ExEy
# Correlation Coefficient (Pearson Correlation Coefficient)
PCC <- Cov / sqrt(Var_x*Var_y)
PCC
## [1] -0.4929085