September 18 & 20, 2017

A few important R functions (ifelse)

temp <- c(-2:2); temp
## [1] -2 -1  0  1  2
ifelse(temp >=0, temp, -temp)  # absolute value
## [1] 2 1 0 1 2

A few important R functions (ifelse)

set.seed(4747)
diamonds %>% select(carat, cut, color, price) %>%
  sample_n(20) %>%
  mutate(price_cat = ifelse(price > 10000, "expensive",
                            ifelse( price > 1500, "medium", "cheap")))
## # A tibble: 20 x 5
##    carat       cut color price price_cat
##    <dbl>     <ord> <ord> <int>     <chr>
##  1  0.90   Premium     F  4801    medium
##  2  1.03     Ideal     F  7429    medium
##  3  0.32   Premium     H   648     cheap
##  4  0.26     Ideal     F   635     cheap
##  5  1.01   Premium     G  6529    medium
##  6  0.70      Good     F  2161    medium
##  7  1.21 Very Good     D 13630 expensive
##  8  0.73      Good     F  2191    medium
##  9  0.40     Ideal     G   702     cheap
## 10  0.30 Very Good     G   541     cheap
## 11  1.25   Premium     I  6300    medium
## 12  0.72     Ideal     G  2776    medium
## 13  0.57     Ideal     H  1077     cheap
## 14  0.40 Very Good     D   967     cheap
## 15  0.42   Premium     G  1179     cheap
## 16  0.54   Premium     F  1770    medium
## 17  0.31 Very Good     E   698     cheap
## 18  0.73 Very Good     F  2360    medium
## 19  0.69   Premium     H  1863    medium
## 20  0.30     Ideal     G   863     cheap

A few important R functions (and/or)

diamonds %>% 
  select(carat, cut, color, clarity, price) %>% 
  head()
## # A tibble: 6 x 5
##   carat       cut color clarity price
##   <dbl>     <ord> <ord>   <ord> <int>
## 1  0.23     Ideal     E     SI2   326
## 2  0.21   Premium     E     SI1   326
## 3  0.23      Good     E     VS1   327
## 4  0.29   Premium     I     VS2   334
## 5  0.31      Good     J     SI2   335
## 6  0.24 Very Good     J    VVS2   336

(diamonds$color == "E" & diamonds$cut == "Premium") %>% head()
## [1] FALSE  TRUE FALSE FALSE FALSE FALSE
(diamonds$color == "E" | diamonds$cut == "Premium") %>% head()
## [1]  TRUE  TRUE  TRUE  TRUE FALSE FALSE
# first element ONLY
(diamonds$color == "E" && diamonds$cut == "Premium") 
## [1] FALSE
# first element ONLY
(diamonds$color == "E" || diamonds$cut == "Premium") 
## [1] TRUE

Small simulation problem

Estimate \(E(X)\) where \(X=\max \{ k: \sum_{i=1}^k U_i < 1 \}\) and \(U_i\) are uniform(0,1).

k <- 0; sumU <- 0
while(sumU < 1) {
  sumU <- sumU + runif(1)
  k <- k+1
}
k; sumU
## [1] 2
## [1] 1.09

Small simulation problem

Estimate \(E(X)\) where \(X=\max \{ k: \sum_{i=1}^k U_i < 1 \}\) and \(U_i\) are uniform(0,1).

set.seed(4747)
allk <- c()
for(i in 1:1000){
  k <- 0; sumU <- 0  
  while(sumU < 1) {
    sumU <- sumU + runif(1)
    k <- k+1 }
  allk <- c(allk, k-1) }
mean(allk)
## [1] 1.74

Basic Blackjack

  • Card game, goal: sum cards as close to 21 without going over
  • A few nuances to card value (e.g., Ace can be 1 or 11)
  • Start with 2 cards, build up one card at a time
  • Lots of different strategies (also based on dealer's cards)

Thoughts on simulating

  • how to break down problem into small steps
  • check to see what works
  • simple is better

What do we need to simulate poker?

  • set-up of cards, dealing, hands
  • "score" (both sum of cards and payout)
  • strategies
  • result of strategies (summary of outcomes)

Source

  • Example and code come from Data Science in R: a case studies approach to computational reasoning and problem solving by Nolan and Temple Lang.
  • Chapter 9 Simulating Blackjack by Hadley Wickham
  • All R code is online at http://rdatasciencecases.org/
  • Link is also on the HW4 assignment

Setting up the Game in R

deck = rep(c(1:10, 10, 10, 10), 4)

shuffle_decks = function(ndecks){sample(rep(deck, ndecks))}

head(shuffle_decks(4), 10)
##  [1]  5  2 10  5  4  6 10  7  2 10

Outcome of cards in hand

handValue = function(cards) {
  value = sum(cards)
  
       # Check for an Ace and change value if it doesn't bust
  if (any(cards == 1) && value <= 11) 
    value = value + 10
  
       # Check bust (set to 0); check Blackjack (set to 21.5)
  if(value > 21)  
    0 
  else if (value == 21 && length(cards) == 2)  
    21.5 # Blackjack
  else 
    value
}
handValue(c(10,4))
## [1] 14

$ of cards in hand

winnings = function(dealer, players) {
  if (dealer > 21) {  # Dealer=Blackjack, ties players with Blackjack
    -1 * (players <= 21)
  } else if (dealer == 0) { # Dealer busts - all non-busted players win
    1.5 * (players > 21) +
      1 * (players <= 21 & players > 0) +
     -1 * (players == 0) 
  } else {            # Dealer 21 or below, all players > dealer win
    1.5 * (players > 21) +  
      1 * (players <= 21 & players > dealer) +
     -1 * (players <= 21 & players < dealer) 
  }
}
winnings(17,c(20, 21.5, 14, 0, 21))
## [1]  1.0  1.5 -1.0 -1.0  1.0

Better $ of cards in hand

winnings = function(dealer, players){
  (players > dealer & players > 21) * 1.5 + # Blackjack
  (players > dealer & players <= 21) * 1 +  # win
  (players < dealer | players == 0) * -1    # lose
}

winnings(17,c(20, 21.5, 14, 0, 21))
## [1]  1.0  1.5 -1.0 -1.0  1.0
winnings(21.5,c(20, 21.5, 14, 0, 21))
## [1] -1  0 -1 -1 -1

How well does handValue work?

test_cards = list( c(10, 1), c(10, 5, 6), c(10, 1, 1), 
                   c(7, 6, 1, 5), c(3, 6, 1, 1), 
                   c(2, 3, 4, 10), c(5, 1, 9, 1, 1),
                   c(5, 10, 7), c(10, 9, 1, 1, 1)) 

test_cards_val = c(21.5, 21, 12, 19, 21, 19, 17, 0, 0)
sapply(test_cards, handValue)  # apply the function handValue to test_cards
## [1] 21.5 21.0 12.0 19.0 21.0 19.0 17.0  0.0  0.0
identical(test_cards_val, sapply(test_cards, handValue))
## [1] TRUE

Testing winnings (create known)

test_vals = c(0, 16, 19, 20, 21, 21.5)

testWinnings =
  matrix(c( -1,  1,  1,  1,  1, 1.5,
            -1,  0,  1,  1,  1, 1.5,
            -1, -1,  0,  1,  1, 1.5,
            -1, -1, -1,  0,  1, 1.5,
            -1, -1, -1, -1,  0, 1.5,
            -1, -1, -1, -1, -1, 0), 
         nrow = length(test_vals), byrow = TRUE)
dimnames(testWinnings) = list(dealer = test_vals, 
                              player = test_vals)

testWinnings
##       player
## dealer  0 16 19 20 21 21.5
##   0    -1  1  1  1  1  1.5
##   16   -1  0  1  1  1  1.5
##   19   -1 -1  0  1  1  1.5
##   20   -1 -1 -1  0  1  1.5
##   21   -1 -1 -1 -1  0  1.5
##   21.5 -1 -1 -1 -1 -1  0.0

Does winnings work?

check = testWinnings  # make the matrix the right size
check[] = NA  # make all entries NA
 
for(i in seq_along(test_vals)) {
  for(j in seq_along(test_vals)) {
    check[i, j] = winnings(test_vals[i], test_vals[j])
  }
}

identical(check, testWinnings)
## [1] TRUE

Function for getting more cards

shoe = function(m = 1) sample(deck, m, replace = TRUE)

new_hand = function(shoe, cards = shoe(2), bet = 1) {
  list(bet = bet, shoe = shoe, cards = cards)
}
myCards = new_hand(shoe, bet = 7)
myCards
## $bet
## [1] 7
## 
## $shoe
## function (m = 1) 
## sample(deck, m, replace = TRUE)
## 
## $cards
## [1]  9 10

First action: hit

receive another card

hit = function(hand) {
  hand$cards = c(hand$cards, hand$shoe(1))
  hand
}

hit(myCards)$cards
## [1]  9 10  1

Second action: stand

stay with current cards

stand = function(hand) hand

stand(myCards)$cards
## [1]  9 10

Third action: double down

double the bet after receiving exactly one more card

dd =  function(hand) {
  hand$bet = hand$bet * 2
  hand = hit(hand)
  stand(hand)
}

dd(myCards)$cards
## [1]  9 10 10

Fourth action: split a pair

create two different hands from initial hand with two cards of the same value

splitPair = function(hand) {
  list( new_hand(hand$shoe, 
             cards = c(hand$cards[1], hand$shoe(1)),
             bet = hand$bet),
        new_hand(hand$shoe, 
             cards = c(hand$cards[2], hand$shoe(1)),
             bet = hand$bet))   }
splitHand = splitPair(myCards)

Results of splitting

(can we always split?)

splitHand[[1]]$cards
## [1]  9 10
splitHand[[2]]$cards
## [1] 10  3

Let's play! Not yet automated…

set.seed(470); dealer = new_hand(shoe); player = new_hand(shoe); 
dealer$cards[1]
## [1] 10
player$cards; player = hit(player); player$cards
## [1]  6 10
## [1]  6 10 10
dealer$cards; dealer = hit(dealer); dealer$cards
## [1] 10  8
## [1] 10  8  1

Who won?

dealer$cards; player$cards
## [1] 10  8  1
## [1]  6 10 10
handValue(dealer$cards); handValue(player$cards)
## [1] 19
## [1] 0
winnings(handValue(dealer$cards), handValue(player$cards))
## [1] -1

Simply strategy

recall the handValue function – what if player busts?

strategy_simple = function(mine, dealerFaceUp) {
  if (handValue(dealerFaceUp) > 6 && handValue(mine) < 17) 
     "H" 
  else 
     "S"
}

Better simple strategy

strategy_simple = function(mine, dealerFaceUp) {
  if (handValue(mine) == 0) return("S")
  if (handValue(dealerFaceUp) > 6 && handValue(mine) < 17) 
     "H" 
  else 
     "S"
}

Dealer

The dealer gets cards regardless of what the player does

dealer_cards = function(shoe) {
  cards = shoe(2)
  while(handValue(cards) < 17 && handValue(cards) > 0) {
    cards = c(cards, shoe(1))
  }
  cards
}

dealer_cards(shoe)
## [1]  2  9 10
dealer_cards(shoe)
## [1] 10  3  3  2

Playing a hand

play_hand = function(shoe, strategy, 
                      hand = new_hand(shoe), 
                      dealer = dealer_cards(shoe)) {
  
  face_up_card = dealer[1]
  
  action = strategy(hand$cards, face_up_card)
  while(action != "S" && handValue(hand$cards) != 0) {
    if (action == "H") {
      hand = hit(hand)
      action = strategy(hand$cards, face_up_card)
    } else {
      stop("Unknown action: should be one of S, H")
    }
  }  

  winnings(handValue(dealer), handValue(hand$cards)) * hand$bet
}

Play a few hands

play_hand(shoe, strategy_simple)
## [1] 1
play_hand(shoe, strategy_simple)
## [1] -1
play_hand(shoe, strategy_simple, new_hand(shoe, bet=7))
## [1] 7
play_hand(shoe, strategy_simple, new_hand(shoe, bet=7))
## [1] 7

Repeated games

To repeat the game, we simply repeat the play_hand function and keep track of the dollars gained or lost.

reps=10
money=20
for(i in 1:reps){
  money <- money + play_hand(shoe, strategy_simple)
  print(money)}
## [1] 21
## [1] 20
## [1] 19
## [1] 20.5
## [1] 21.5
## [1] 22.5
## [1] 23.5
## [1] 22.5
## [1] 21.5
## [1] 21.5

Pass the Pigs

A game by Winning Moves (previously Hasboro) involving rolling pigs

Pig Rules

  • Roll pigs, accumulate points
  • Stop if you want or if you Pig Out (no Oinker)
  • First player to 100 points wins

What is your best strategy for winning???

Generating U[0,1] variables

 a=31541435235; b=23462146143; m=423514351351

xval=47; reps=10000; xval.all = c()

for(i in 1:reps){   xval = (a*xval + b) %% m
  xval.all = c(xval.all, xval)   }

Plot of U[0,1] values

hist(xval.all/m)

Generating random Weibull(2,1) variables

unifdata = runif(10000,0,1)
weib1data = sqrt(-log(1-unifdata))
weib2data = rweibull(10000,2,1)

Plot of Weibull data

par(mfrow=c(1,2))
hist(weib1data, freq=FALSE, xlab="Weibull(2,1)", main="Using Inverse Transform")
hist(weib2data, freq=FALSE, xlab="Weibull(2,1)", main="Using R Function")

Sensitivity of CI to tech conditions

Consider the following set up (as in the WU):

set.seed(4747)
n<- 100 
x1 <- rep(c(0,1), each=n/2)
x2 <- runif(n, min=-1, max=1)

beta0 <- -1; beta1 <- 0.5; beta2 <- 1.5

y <- beta0 + beta1*x1 + beta2*x2 + rnorm(n, mean=0, sd = 1)

eqVar <- data.frame(why=y, ex1 = x1, ex2 = x2)

Plot of data

ggplot(eqVar, aes(x=ex2, y=why, color=as.factor(ex1))) + geom_point() +
    stat_smooth(method="lm", se=FALSE)  + scale_colour_tableau("colorblind10")

Running a linear model

CI <- lm(y~x1+x2) %>% tidy(conf.int=TRUE) 

CI
##          term estimate std.error statistic  p.value conf.low
## 1 (Intercept)   -0.908     0.126     -7.23 1.14e-10   -1.157
## 2          x1    0.585     0.177      3.31 1.33e-03    0.234
## 3          x2    1.031     0.161      6.42 5.11e-09    0.712
##   conf.high
## 1    -0.658
## 2     0.936
## 3     1.350
between(beta2, CI[3,6], CI[3,7])
## [1] FALSE

What happens to the CI of coefficients in repeated samples? (eq var)

beta2.in <- c()
beta0 <- -1; beta1 <- 0.5; beta2 <- 1.5
n<- 100; reps=10000

set.seed(4747)
for(i in 1:reps){
  x1 <- rep(c(0,1), each=n/2)
  x2 <- runif(n, min=-1, max=1)
  y <- beta0 + beta1*x1 + beta2*x2 + rnorm(n, mean=0, sd = 1)

  CI <- lm(y~x1+x2) %>% tidy(conf.int=TRUE) 
  
  beta2.in <- c(beta2.in, between(beta2, CI[3,6], CI[3,7]))
}
sum(beta2.in)/reps
## [1] 0.952

Sensitivity of CI to tech conditions

Consider the following set up (note the difference in variability):

set.seed(470)
n<- 100 
x1 <- rep(c(0,1), each=n/2)
x2 <- runif(n, min=-1, max=1)

beta0 <- -1; beta1 <- 0.5; beta2 <- 1.5

y <- beta0 + beta1*x1 + beta2*x2 + 
  rnorm(n, mean=0, sd = 1+ x1 + 10*abs(x2))

uneqVar <- data.frame(why=y, ex1 = x1, ex2 = x2)

Plot of data

ggplot(uneqVar, aes(x=ex2, y=why, color=as.factor(ex1))) + geom_point() +
    stat_smooth(method="lm", se=FALSE)  + scale_colour_tableau("colorblind10")

What happens to the CI of coefficients in repeated samples? (uneq var)

beta2.in <- c()
beta0 <- -1; beta1 <- 0.5; beta2 <- 1.5
n<- 100; reps=10000

set.seed(4747)
for(i in 1:reps){
  x1 <- rep(c(0,1), each=n/2)
  x2 <- runif(n, min=-1, max=1)
  y <- beta0 + beta1*x1 + beta2*x2 + rnorm(n, mean=0, sd = 1+ x1 + 10*abs(x2))

  CI <- lm(y~x1+x2) %>% tidy(conf.int=TRUE) 
  
  beta2.in <- c(beta2.in, between(beta2, CI[3,6], CI[3,7]))
}
sum(beta2.in)/reps
## [1] 0.872

Equal variance: type I error?

set.seed(4747)
pvals = c()
reps=10000
for(i in 1:reps){
  x1 <- rnorm(10, mean=47, sd=1)
  x2 <- rnorm(10, mean=47, sd=1)
  pvals <- c(pvals, t.test(x1,x2, var.equal=TRUE)$p.value) }
sum(pvals < .05)/reps
## [1] 0.0484

Unequal variance: type I error?

set.seed(4747)
pvals = c()
reps=10000
for(i in 1:reps){
  x1 <- rnorm(10, mean=47, sd=1)
  x2 <- rnorm(10, mean=47, sd=100)
  pvals <- c(pvals, t.test(x1,x2, var.equal=TRUE)$p.value) }
sum(pvals < .05)/reps
## [1] 0.0617