Em Algorithm
Em Algorithm
0.25
Density
0.20
0.15
0.10
0.05
0.00
0
x
##initial guesses for the distribution parameters
mu_1 <- 0
mu_2 <- 1
##latent variable parameters
tau_1 <- 0.5
tau_2 <- 0.5
10
for( i in 1:10 ) {
## Given the observed data and distribution parameters, what are the latent variables?
T_1 <- tau_1 * dnorm( x, mu_1 )
T_2 <- tau_2 * dnorm( x, mu_2 )
P_1 <- T_1 / (T_1 + T_2)
P_2 <- T_2 / (T_1 + T_2) ## note: P_2 = 1 - P_1
tau_1 <- mean(P_1)
tau_2 <- mean(P_2)
## Given the observed data, as well as the latent variables, what are the population parameters?
mu_1 <- sum( P_1 * x ) / sum(P_1)
mu_2 <- sum( P_2 * x ) / sum(P_2)
print( c(mu_1, mu_2, mean(P_1)) )
}
##
##
##
##
##
##
##
##
##
##
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
0.5045618
0.8546336
0.9732251
0.9853947
0.9864849
0.9865811
0.9865895
0.9865903
0.9865903
0.9865904
6.1011529
6.9403680
7.0006108
7.0054109
7.0058260
7.0058624
7.0058656
7.0058659
7.0058660
7.0058660
0.1002794
0.2301181
0.2423406
0.2434347
0.2435309
0.2435394
0.2435401
0.2435402
0.2435402
0.2435402
set.seed(123)
tau_true <- 0.25
x <- y <- rep(0,1000)
for( i in 1:1000 ) {
if( runif(1) < tau_true ) {
x[i] <- rnorm(1, mean=1);y[i] <- "heads"
} else {
x[i] <- rnorm(1, mean=4);y[i] <- "tails"
}
}
densityplot( ~x, par.settings = list( plot.symbol=list( col=as.factor(y) ) ) )
0.25
Density
0.20
0.15
0.10
0.05
0.00
2
x
mu_1 <- 0
mu_2 <- 1
tau_1 <- 0.5
tau_2 <- 0.5
for( i in 1:30 ) {
## Given the observed data and the distribution parameters, what are the latent variables?
T_1 <- tau_1 * dnorm( x, mu_1 )
T_2 <- tau_2 * dnorm( x, mu_2 )
P_1 <- T_1 / (T_1 + T_2)
P_2 <- T_2 / (T_1 + T_2) ## note: P_2 = 1 - P_1
tau_1 <- mean(P_1)
tau_2 <- mean(P_2)
## Given the observed data and the latent variables, what are the population parameters?
mu_1 <- sum( P_1 * x ) / sum(P_1)
mu_2 <- sum( P_2 * x ) / sum(P_2)
print( c(mu_1, mu_2, mean(P_1)) )
}
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
1.0835357
0.6797230
0.7320122
0.7910984
0.8298998
0.8545108
0.8701122
0.8800221
0.8863270
0.8903429
0.8929026
0.8945350
0.8955764
0.8962408
0.8966648
0.8969354
0.8971081
0.8972184
0.8972887
0.8973336
0.8973623
0.8973806
0.8973922
0.8973997
0.8974045
0.8974075
0.8974094
0.8974107
0.8974115
0.8974120
3.6048714
3.8663167
3.9306341
3.9574819
3.9730967
3.9827182
3.9887344
3.9925240
3.9949222
3.9964445
3.9974127
3.9980293
3.9984223
3.9986729
3.9988327
3.9989347
3.9989998
3.9990414
3.9990679
3.9990848
3.9990956
3.9991025
3.9991069
3.9991097
3.9991115
3.9991126
3.9991134
3.9991138
3.9991141
3.9991143
0.1320495
0.1865272
0.2059336
0.2165093
0.2230743
0.2272189
0.2298464
0.2315159
0.2325783
0.2332551
0.2336866
0.2339618
0.2341373
0.2342493
0.2343208
0.2343664
0.2343955
0.2344141
0.2344260
0.2344335
0.2344384
0.2344414
0.2344434
0.2344447
0.2344455
0.2344460
0.2344463
0.2344465
0.2344466
0.2344467
## number of iterations= 21
myEM$mu ## the means of the two distributions