temp <- c(-2:2); temp
## [1] -2 -1 0 1 2
ifelse(temp >=0, temp, -temp) # absolute value
## [1] 2 1 0 1 2
September 18 & 20, 2017
temp <- c(-2:2); temp
## [1] -2 -1 0 1 2
ifelse(temp >=0, temp, -temp) # absolute value
## [1] 2 1 0 1 2
set.seed(4747)
diamonds %>% select(carat, cut, color, price) %>%
sample_n(20) %>%
mutate(price_cat = ifelse(price > 10000, "expensive",
ifelse( price > 1500, "medium", "cheap")))
## # A tibble: 20 x 5 ## carat cut color price price_cat ## <dbl> <ord> <ord> <int> <chr> ## 1 0.90 Premium F 4801 medium ## 2 1.03 Ideal F 7429 medium ## 3 0.32 Premium H 648 cheap ## 4 0.26 Ideal F 635 cheap ## 5 1.01 Premium G 6529 medium ## 6 0.70 Good F 2161 medium ## 7 1.21 Very Good D 13630 expensive ## 8 0.73 Good F 2191 medium ## 9 0.40 Ideal G 702 cheap ## 10 0.30 Very Good G 541 cheap ## 11 1.25 Premium I 6300 medium ## 12 0.72 Ideal G 2776 medium ## 13 0.57 Ideal H 1077 cheap ## 14 0.40 Very Good D 967 cheap ## 15 0.42 Premium G 1179 cheap ## 16 0.54 Premium F 1770 medium ## 17 0.31 Very Good E 698 cheap ## 18 0.73 Very Good F 2360 medium ## 19 0.69 Premium H 1863 medium ## 20 0.30 Ideal G 863 cheap
diamonds %>% select(carat, cut, color, clarity, price) %>% head()
## # A tibble: 6 x 5 ## carat cut color clarity price ## <dbl> <ord> <ord> <ord> <int> ## 1 0.23 Ideal E SI2 326 ## 2 0.21 Premium E SI1 326 ## 3 0.23 Good E VS1 327 ## 4 0.29 Premium I VS2 334 ## 5 0.31 Good J SI2 335 ## 6 0.24 Very Good J VVS2 336
(diamonds$color == "E" & diamonds$cut == "Premium") %>% head()
## [1] FALSE TRUE FALSE FALSE FALSE FALSE
(diamonds$color == "E" | diamonds$cut == "Premium") %>% head()
## [1] TRUE TRUE TRUE TRUE FALSE FALSE
# first element ONLY (diamonds$color == "E" && diamonds$cut == "Premium")
## [1] FALSE
# first element ONLY (diamonds$color == "E" || diamonds$cut == "Premium")
## [1] TRUE
Estimate \(E(X)\) where \(X=\max \{ k: \sum_{i=1}^k U_i < 1 \}\) and \(U_i\) are uniform(0,1).
k <- 0; sumU <- 0
while(sumU < 1) {
sumU <- sumU + runif(1)
k <- k+1
}
k; sumU
## [1] 2
## [1] 1.09
Estimate \(E(X)\) where \(X=\max \{ k: \sum_{i=1}^k U_i < 1 \}\) and \(U_i\) are uniform(0,1).
set.seed(4747)
allk <- c()
for(i in 1:1000){
k <- 0; sumU <- 0
while(sumU < 1) {
sumU <- sumU + runif(1)
k <- k+1 }
allk <- c(allk, k-1) }
mean(allk)
## [1] 1.74
deck = rep(c(1:10, 10, 10, 10), 4)
shuffle_decks = function(ndecks){sample(rep(deck, ndecks))}
head(shuffle_decks(4), 10)
## [1] 5 2 10 5 4 6 10 7 2 10
handValue = function(cards) {
value = sum(cards)
# Check for an Ace and change value if it doesn't bust
if (any(cards == 1) && value <= 11)
value = value + 10
# Check bust (set to 0); check Blackjack (set to 21.5)
if(value > 21)
0
else if (value == 21 && length(cards) == 2)
21.5 # Blackjack
else
value
}
handValue(c(10,4))
## [1] 14
winnings = function(dealer, players) {
if (dealer > 21) { # Dealer=Blackjack, ties players with Blackjack
-1 * (players <= 21)
} else if (dealer == 0) { # Dealer busts - all non-busted players win
1.5 * (players > 21) +
1 * (players <= 21 & players > 0) +
-1 * (players == 0)
} else { # Dealer 21 or below, all players > dealer win
1.5 * (players > 21) +
1 * (players <= 21 & players > dealer) +
-1 * (players <= 21 & players < dealer)
}
}
winnings(17,c(20, 21.5, 14, 0, 21))
## [1] 1.0 1.5 -1.0 -1.0 1.0
winnings = function(dealer, players){
(players > dealer & players > 21) * 1.5 + # Blackjack
(players > dealer & players <= 21) * 1 + # win
(players < dealer | players == 0) * -1 # lose
}
winnings(17,c(20, 21.5, 14, 0, 21))
## [1] 1.0 1.5 -1.0 -1.0 1.0
winnings(21.5,c(20, 21.5, 14, 0, 21))
## [1] -1 0 -1 -1 -1
handValue work?test_cards = list( c(10, 1), c(10, 5, 6), c(10, 1, 1),
c(7, 6, 1, 5), c(3, 6, 1, 1),
c(2, 3, 4, 10), c(5, 1, 9, 1, 1),
c(5, 10, 7), c(10, 9, 1, 1, 1))
test_cards_val = c(21.5, 21, 12, 19, 21, 19, 17, 0, 0)
sapply(test_cards, handValue) # apply the function handValue to test_cards
## [1] 21.5 21.0 12.0 19.0 21.0 19.0 17.0 0.0 0.0
identical(test_cards_val, sapply(test_cards, handValue))
## [1] TRUE
test_vals = c(0, 16, 19, 20, 21, 21.5)
testWinnings =
matrix(c( -1, 1, 1, 1, 1, 1.5,
-1, 0, 1, 1, 1, 1.5,
-1, -1, 0, 1, 1, 1.5,
-1, -1, -1, 0, 1, 1.5,
-1, -1, -1, -1, 0, 1.5,
-1, -1, -1, -1, -1, 0),
nrow = length(test_vals), byrow = TRUE)
dimnames(testWinnings) = list(dealer = test_vals,
player = test_vals)
testWinnings
## player ## dealer 0 16 19 20 21 21.5 ## 0 -1 1 1 1 1 1.5 ## 16 -1 0 1 1 1 1.5 ## 19 -1 -1 0 1 1 1.5 ## 20 -1 -1 -1 0 1 1.5 ## 21 -1 -1 -1 -1 0 1.5 ## 21.5 -1 -1 -1 -1 -1 0.0
winnings work?check = testWinnings # make the matrix the right size
check[] = NA # make all entries NA
for(i in seq_along(test_vals)) {
for(j in seq_along(test_vals)) {
check[i, j] = winnings(test_vals[i], test_vals[j])
}
}
identical(check, testWinnings)
## [1] TRUE
shoe = function(m = 1) sample(deck, m, replace = TRUE)
new_hand = function(shoe, cards = shoe(2), bet = 1) {
list(bet = bet, shoe = shoe, cards = cards)
}
myCards = new_hand(shoe, bet = 7)
myCards
## $bet ## [1] 7 ## ## $shoe ## function (m = 1) ## sample(deck, m, replace = TRUE) ## ## $cards ## [1] 9 10
receive another card
hit = function(hand) {
hand$cards = c(hand$cards, hand$shoe(1))
hand
}
hit(myCards)$cards
## [1] 9 10 1
stay with current cards
stand = function(hand) hand stand(myCards)$cards
## [1] 9 10
double the bet after receiving exactly one more card
dd = function(hand) {
hand$bet = hand$bet * 2
hand = hit(hand)
stand(hand)
}
dd(myCards)$cards
## [1] 9 10 10
create two different hands from initial hand with two cards of the same value
splitPair = function(hand) {
list( new_hand(hand$shoe,
cards = c(hand$cards[1], hand$shoe(1)),
bet = hand$bet),
new_hand(hand$shoe,
cards = c(hand$cards[2], hand$shoe(1)),
bet = hand$bet)) }
splitHand = splitPair(myCards)
(can we always split?)
splitHand[[1]]$cards
## [1] 9 10
splitHand[[2]]$cards
## [1] 10 3
set.seed(470); dealer = new_hand(shoe); player = new_hand(shoe); dealer$cards[1]
## [1] 10
player$cards; player = hit(player); player$cards
## [1] 6 10
## [1] 6 10 10
dealer$cards; dealer = hit(dealer); dealer$cards
## [1] 10 8
## [1] 10 8 1
dealer$cards; player$cards
## [1] 10 8 1
## [1] 6 10 10
handValue(dealer$cards); handValue(player$cards)
## [1] 19
## [1] 0
winnings(handValue(dealer$cards), handValue(player$cards))
## [1] -1
recall the handValue function – what if player busts?
strategy_simple = function(mine, dealerFaceUp) {
if (handValue(dealerFaceUp) > 6 && handValue(mine) < 17)
"H"
else
"S"
}
strategy_simple = function(mine, dealerFaceUp) {
if (handValue(mine) == 0) return("S")
if (handValue(dealerFaceUp) > 6 && handValue(mine) < 17)
"H"
else
"S"
}
The dealer gets cards regardless of what the player does
dealer_cards = function(shoe) {
cards = shoe(2)
while(handValue(cards) < 17 && handValue(cards) > 0) {
cards = c(cards, shoe(1))
}
cards
}
dealer_cards(shoe)
## [1] 2 9 10
dealer_cards(shoe)
## [1] 10 3 3 2
play_hand = function(shoe, strategy,
hand = new_hand(shoe),
dealer = dealer_cards(shoe)) {
face_up_card = dealer[1]
action = strategy(hand$cards, face_up_card)
while(action != "S" && handValue(hand$cards) != 0) {
if (action == "H") {
hand = hit(hand)
action = strategy(hand$cards, face_up_card)
} else {
stop("Unknown action: should be one of S, H")
}
}
winnings(handValue(dealer), handValue(hand$cards)) * hand$bet
}
play_hand(shoe, strategy_simple)
## [1] 1
play_hand(shoe, strategy_simple)
## [1] -1
play_hand(shoe, strategy_simple, new_hand(shoe, bet=7))
## [1] 7
play_hand(shoe, strategy_simple, new_hand(shoe, bet=7))
## [1] 7
To repeat the game, we simply repeat the play_hand function and keep track of the dollars gained or lost.
reps=10
money=20
for(i in 1:reps){
money <- money + play_hand(shoe, strategy_simple)
print(money)}
## [1] 21 ## [1] 20 ## [1] 19 ## [1] 20.5 ## [1] 21.5 ## [1] 22.5 ## [1] 23.5 ## [1] 22.5 ## [1] 21.5 ## [1] 21.5
A game by Winning Moves (previously Hasboro) involving rolling pigs
What is your best strategy for winning???
a=31541435235; b=23462146143; m=423514351351
xval=47; reps=10000; xval.all = c()
for(i in 1:reps){ xval = (a*xval + b) %% m
xval.all = c(xval.all, xval) }
hist(xval.all/m)
unifdata = runif(10000,0,1) weib1data = sqrt(-log(1-unifdata)) weib2data = rweibull(10000,2,1)
par(mfrow=c(1,2)) hist(weib1data, freq=FALSE, xlab="Weibull(2,1)", main="Using Inverse Transform") hist(weib2data, freq=FALSE, xlab="Weibull(2,1)", main="Using R Function")
Consider the following set up (as in the WU):
set.seed(4747) n<- 100 x1 <- rep(c(0,1), each=n/2) x2 <- runif(n, min=-1, max=1) beta0 <- -1; beta1 <- 0.5; beta2 <- 1.5 y <- beta0 + beta1*x1 + beta2*x2 + rnorm(n, mean=0, sd = 1) eqVar <- data.frame(why=y, ex1 = x1, ex2 = x2)
ggplot(eqVar, aes(x=ex2, y=why, color=as.factor(ex1))) + geom_point() +
stat_smooth(method="lm", se=FALSE) + scale_colour_tableau("colorblind10")
CI <- lm(y~x1+x2) %>% tidy(conf.int=TRUE) CI
## term estimate std.error statistic p.value conf.low ## 1 (Intercept) -0.908 0.126 -7.23 1.14e-10 -1.157 ## 2 x1 0.585 0.177 3.31 1.33e-03 0.234 ## 3 x2 1.031 0.161 6.42 5.11e-09 0.712 ## conf.high ## 1 -0.658 ## 2 0.936 ## 3 1.350
between(beta2, CI[3,6], CI[3,7])
## [1] FALSE
beta2.in <- c()
beta0 <- -1; beta1 <- 0.5; beta2 <- 1.5
n<- 100; reps=10000
set.seed(4747)
for(i in 1:reps){
x1 <- rep(c(0,1), each=n/2)
x2 <- runif(n, min=-1, max=1)
y <- beta0 + beta1*x1 + beta2*x2 + rnorm(n, mean=0, sd = 1)
CI <- lm(y~x1+x2) %>% tidy(conf.int=TRUE)
beta2.in <- c(beta2.in, between(beta2, CI[3,6], CI[3,7]))
}
sum(beta2.in)/reps
## [1] 0.952
Consider the following set up (note the difference in variability):
set.seed(470) n<- 100 x1 <- rep(c(0,1), each=n/2) x2 <- runif(n, min=-1, max=1) beta0 <- -1; beta1 <- 0.5; beta2 <- 1.5 y <- beta0 + beta1*x1 + beta2*x2 + rnorm(n, mean=0, sd = 1+ x1 + 10*abs(x2)) uneqVar <- data.frame(why=y, ex1 = x1, ex2 = x2)
ggplot(uneqVar, aes(x=ex2, y=why, color=as.factor(ex1))) + geom_point() +
stat_smooth(method="lm", se=FALSE) + scale_colour_tableau("colorblind10")
beta2.in <- c()
beta0 <- -1; beta1 <- 0.5; beta2 <- 1.5
n<- 100; reps=10000
set.seed(4747)
for(i in 1:reps){
x1 <- rep(c(0,1), each=n/2)
x2 <- runif(n, min=-1, max=1)
y <- beta0 + beta1*x1 + beta2*x2 + rnorm(n, mean=0, sd = 1+ x1 + 10*abs(x2))
CI <- lm(y~x1+x2) %>% tidy(conf.int=TRUE)
beta2.in <- c(beta2.in, between(beta2, CI[3,6], CI[3,7]))
}
sum(beta2.in)/reps
## [1] 0.872
set.seed(4747)
pvals = c()
reps=10000
for(i in 1:reps){
x1 <- rnorm(10, mean=47, sd=1)
x2 <- rnorm(10, mean=47, sd=1)
pvals <- c(pvals, t.test(x1,x2, var.equal=TRUE)$p.value) }
sum(pvals < .05)/reps
## [1] 0.0484
set.seed(4747)
pvals = c()
reps=10000
for(i in 1:reps){
x1 <- rnorm(10, mean=47, sd=1)
x2 <- rnorm(10, mean=47, sd=100)
pvals <- c(pvals, t.test(x1,x2, var.equal=TRUE)$p.value) }
sum(pvals < .05)/reps
## [1] 0.0617