--- title: "EM Simulations" author: "Math 152" date: "Fall 2018" output: html_document: df_print: paged subtitle: An implementation of a Gibbs Sampler --- ```{r global_options, include=FALSE} knitr::opts_chunk$set(message=FALSE, warning=FALSE, fig.height=5, fig.width=8, fig.align = "center") library(dplyr) library(ggplot2) library(readr) library(broom) library(gridExtra) library(grid) options(digits=5) ``` ## Censored Data Below we use example data based on a prostate cancer study. A few caveats include (1) survival data might not be best modeled by an exponential distribution, (2) this is a randomized study with two groups (`Treatment`) that are being ignored. ```{r} prostate <- read.csv("prostate.csv") prostate_stats <- prostate %>% summarize(MLE = sum(Time) / sum(Status), aveTime = mean(Time), sumTime = sum(Time), numFull = sum(Status), numObs = n()) prostate_stats # 0 is censored # 1 is uncensored prostate %>% group_by(Status) %>% summarize(mean(Time) , n()) reps <- 500 mu_i <- 50 mus <- c() for(i in 1:reps){ mu_i <- prostate_stats %>% summarize((sumTime + (numObs - numFull) * mu_i)/numObs) %>% pull() mus[i] <- mu_i } tail(mus) plot(mus) ``` ## Mixture Models (Geyser Data) ```{r} data(faithful) waiting <- faithful$waiting mu1_j = 50 mu2_j = 80 sig1_j = 5 sig2_j = 5 pi_j = 0.5 reps <- 500 mu1s <- c() mu2s <- c() sig1s <- c() sig2s <- c() pis <- c() for(i in 1:reps){ zs <- pi_j * dnorm(waiting, mu2_j, sig2_j) / ((1 - pi_j) * dnorm(waiting, mu1_j, sig1_j) + pi_j * dnorm(waiting, mu2_j, sig2_j)) mu1_j = sum((1-zs) * waiting) / sum(1 - zs) mu2_j = sum(zs * waiting) / sum(zs) sig1_j = sqrt(sum((1-zs)*(waiting - mu1_j)^2) / sum(1-zs)) sig2_j = sqrt(sum(zs*(waiting - mu2_j)^2) / sum(zs)) pi_j = sum(zs) / length(waiting) mu1s[i] <- mu1_j mu2s[i] <- mu2_j sig1s[i] <- sig1_j sig2s[i] <- sig2_j pis[i] <- pi_j } plot(mu1s) plot(mu2s) plot(pis) ```