Censored Data

Below we use example data based on a prostate cancer study. A few caveats include (1) survival data might not be best modeled by an exponential distribution, (2) this is a randomized study with two groups (Treatment) that are being ignored.

prostate <- read.csv("prostate.csv")

prostate_stats <- prostate %>% 
  summarize(MLE = sum(Time) / sum(Status),
            aveTime = mean(Time),
            sumTime = sum(Time), 
            numFull = sum(Status), numObs = n()) 
prostate_stats
# 0 is censored
# 1 is uncensored
prostate %>% group_by(Status) %>%
  summarize(mean(Time) , n())
reps <- 500
mu_i <- 50
mus <- c()
for(i in 1:reps){
  mu_i <- prostate_stats %>% 
    summarize((sumTime + (numObs - numFull) * mu_i)/numObs) %>% pull()
  mus[i] <- mu_i
}
tail(mus)
## [1] 315 315 315 315 315 315
plot(mus)

Mixture Models (Geyser Data)

data(faithful)

waiting <- faithful$waiting

mu1_j = 50
mu2_j = 80
sig1_j = 5
sig2_j = 5
pi_j = 0.5

reps <- 500
mu1s <- c()
mu2s <- c()
sig1s <- c()
sig2s <- c()
pis <- c()

for(i in 1:reps){
  zs <- pi_j * dnorm(waiting, mu2_j, sig2_j) / 
    ((1 - pi_j) * dnorm(waiting, mu1_j, sig1_j) + pi_j * dnorm(waiting, mu2_j, sig2_j))
  mu1_j = sum((1-zs) * waiting) / sum(1 - zs)
  mu2_j = sum(zs * waiting) / sum(zs)
  sig1_j = sqrt(sum((1-zs)*(waiting - mu1_j)^2) / sum(1-zs))
  sig2_j = sqrt(sum(zs*(waiting - mu2_j)^2) / sum(zs))
  pi_j = sum(zs) / length(waiting)
  
  mu1s[i] <- mu1_j
  mu2s[i] <- mu2_j
  sig1s[i] <- sig1_j
  sig2s[i] <- sig2_j
  pis[i] <- pi_j
  
}

plot(mu1s)

plot(mu2s)

plot(pis)