temp <- c(-2:2); temp
## [1] -2 -1 0 1 2
ifelse(temp >=0, temp, -temp) # absolute value
## [1] 2 1 0 1 2
September 18 & 20, 2017
temp <- c(-2:2); temp
## [1] -2 -1 0 1 2
ifelse(temp >=0, temp, -temp) # absolute value
## [1] 2 1 0 1 2
set.seed(4747) diamonds %>% select(carat, cut, color, price) %>% sample_n(20) %>% mutate(price_cat = ifelse(price > 10000, "expensive", ifelse( price > 1500, "medium", "cheap")))
## # A tibble: 20 x 5 ## carat cut color price price_cat ## <dbl> <ord> <ord> <int> <chr> ## 1 0.90 Premium F 4801 medium ## 2 1.03 Ideal F 7429 medium ## 3 0.32 Premium H 648 cheap ## 4 0.26 Ideal F 635 cheap ## 5 1.01 Premium G 6529 medium ## 6 0.70 Good F 2161 medium ## 7 1.21 Very Good D 13630 expensive ## 8 0.73 Good F 2191 medium ## 9 0.40 Ideal G 702 cheap ## 10 0.30 Very Good G 541 cheap ## 11 1.25 Premium I 6300 medium ## 12 0.72 Ideal G 2776 medium ## 13 0.57 Ideal H 1077 cheap ## 14 0.40 Very Good D 967 cheap ## 15 0.42 Premium G 1179 cheap ## 16 0.54 Premium F 1770 medium ## 17 0.31 Very Good E 698 cheap ## 18 0.73 Very Good F 2360 medium ## 19 0.69 Premium H 1863 medium ## 20 0.30 Ideal G 863 cheap
diamonds %>% select(carat, cut, color, clarity, price) %>% head()
## # A tibble: 6 x 5 ## carat cut color clarity price ## <dbl> <ord> <ord> <ord> <int> ## 1 0.23 Ideal E SI2 326 ## 2 0.21 Premium E SI1 326 ## 3 0.23 Good E VS1 327 ## 4 0.29 Premium I VS2 334 ## 5 0.31 Good J SI2 335 ## 6 0.24 Very Good J VVS2 336
(diamonds$color == "E" & diamonds$cut == "Premium") %>% head()
## [1] FALSE TRUE FALSE FALSE FALSE FALSE
(diamonds$color == "E" | diamonds$cut == "Premium") %>% head()
## [1] TRUE TRUE TRUE TRUE FALSE FALSE
# first element ONLY (diamonds$color == "E" && diamonds$cut == "Premium")
## [1] FALSE
# first element ONLY (diamonds$color == "E" || diamonds$cut == "Premium")
## [1] TRUE
Estimate \(E(X)\) where \(X=\max \{ k: \sum_{i=1}^k U_i < 1 \}\) and \(U_i\) are uniform(0,1).
k <- 0; sumU <- 0 while(sumU < 1) { sumU <- sumU + runif(1) k <- k+1 } k; sumU
## [1] 2
## [1] 1.09
Estimate \(E(X)\) where \(X=\max \{ k: \sum_{i=1}^k U_i < 1 \}\) and \(U_i\) are uniform(0,1).
set.seed(4747) allk <- c() for(i in 1:1000){ k <- 0; sumU <- 0 while(sumU < 1) { sumU <- sumU + runif(1) k <- k+1 } allk <- c(allk, k-1) } mean(allk)
## [1] 1.74
deck = rep(c(1:10, 10, 10, 10), 4) shuffle_decks = function(ndecks){sample(rep(deck, ndecks))} head(shuffle_decks(4), 10)
## [1] 5 2 10 5 4 6 10 7 2 10
handValue = function(cards) { value = sum(cards) # Check for an Ace and change value if it doesn't bust if (any(cards == 1) && value <= 11) value = value + 10 # Check bust (set to 0); check Blackjack (set to 21.5) if(value > 21) 0 else if (value == 21 && length(cards) == 2) 21.5 # Blackjack else value } handValue(c(10,4))
## [1] 14
winnings = function(dealer, players) { if (dealer > 21) { # Dealer=Blackjack, ties players with Blackjack -1 * (players <= 21) } else if (dealer == 0) { # Dealer busts - all non-busted players win 1.5 * (players > 21) + 1 * (players <= 21 & players > 0) + -1 * (players == 0) } else { # Dealer 21 or below, all players > dealer win 1.5 * (players > 21) + 1 * (players <= 21 & players > dealer) + -1 * (players <= 21 & players < dealer) } } winnings(17,c(20, 21.5, 14, 0, 21))
## [1] 1.0 1.5 -1.0 -1.0 1.0
winnings = function(dealer, players){ (players > dealer & players > 21) * 1.5 + # Blackjack (players > dealer & players <= 21) * 1 + # win (players < dealer | players == 0) * -1 # lose } winnings(17,c(20, 21.5, 14, 0, 21))
## [1] 1.0 1.5 -1.0 -1.0 1.0
winnings(21.5,c(20, 21.5, 14, 0, 21))
## [1] -1 0 -1 -1 -1
handValue
work?test_cards = list( c(10, 1), c(10, 5, 6), c(10, 1, 1), c(7, 6, 1, 5), c(3, 6, 1, 1), c(2, 3, 4, 10), c(5, 1, 9, 1, 1), c(5, 10, 7), c(10, 9, 1, 1, 1)) test_cards_val = c(21.5, 21, 12, 19, 21, 19, 17, 0, 0) sapply(test_cards, handValue) # apply the function handValue to test_cards
## [1] 21.5 21.0 12.0 19.0 21.0 19.0 17.0 0.0 0.0
identical(test_cards_val, sapply(test_cards, handValue))
## [1] TRUE
test_vals = c(0, 16, 19, 20, 21, 21.5) testWinnings = matrix(c( -1, 1, 1, 1, 1, 1.5, -1, 0, 1, 1, 1, 1.5, -1, -1, 0, 1, 1, 1.5, -1, -1, -1, 0, 1, 1.5, -1, -1, -1, -1, 0, 1.5, -1, -1, -1, -1, -1, 0), nrow = length(test_vals), byrow = TRUE) dimnames(testWinnings) = list(dealer = test_vals, player = test_vals) testWinnings
## player ## dealer 0 16 19 20 21 21.5 ## 0 -1 1 1 1 1 1.5 ## 16 -1 0 1 1 1 1.5 ## 19 -1 -1 0 1 1 1.5 ## 20 -1 -1 -1 0 1 1.5 ## 21 -1 -1 -1 -1 0 1.5 ## 21.5 -1 -1 -1 -1 -1 0.0
winnings
work?check = testWinnings # make the matrix the right size check[] = NA # make all entries NA for(i in seq_along(test_vals)) { for(j in seq_along(test_vals)) { check[i, j] = winnings(test_vals[i], test_vals[j]) } } identical(check, testWinnings)
## [1] TRUE
shoe = function(m = 1) sample(deck, m, replace = TRUE) new_hand = function(shoe, cards = shoe(2), bet = 1) { list(bet = bet, shoe = shoe, cards = cards) } myCards = new_hand(shoe, bet = 7) myCards
## $bet ## [1] 7 ## ## $shoe ## function (m = 1) ## sample(deck, m, replace = TRUE) ## ## $cards ## [1] 9 10
receive another card
hit = function(hand) { hand$cards = c(hand$cards, hand$shoe(1)) hand } hit(myCards)$cards
## [1] 9 10 1
stay with current cards
stand = function(hand) hand stand(myCards)$cards
## [1] 9 10
double the bet after receiving exactly one more card
dd = function(hand) { hand$bet = hand$bet * 2 hand = hit(hand) stand(hand) } dd(myCards)$cards
## [1] 9 10 10
create two different hands from initial hand with two cards of the same value
splitPair = function(hand) { list( new_hand(hand$shoe, cards = c(hand$cards[1], hand$shoe(1)), bet = hand$bet), new_hand(hand$shoe, cards = c(hand$cards[2], hand$shoe(1)), bet = hand$bet)) } splitHand = splitPair(myCards)
(can we always split?)
splitHand[[1]]$cards
## [1] 9 10
splitHand[[2]]$cards
## [1] 10 3
set.seed(470); dealer = new_hand(shoe); player = new_hand(shoe); dealer$cards[1]
## [1] 10
player$cards; player = hit(player); player$cards
## [1] 6 10
## [1] 6 10 10
dealer$cards; dealer = hit(dealer); dealer$cards
## [1] 10 8
## [1] 10 8 1
dealer$cards; player$cards
## [1] 10 8 1
## [1] 6 10 10
handValue(dealer$cards); handValue(player$cards)
## [1] 19
## [1] 0
winnings(handValue(dealer$cards), handValue(player$cards))
## [1] -1
recall the handValue function – what if player busts?
strategy_simple = function(mine, dealerFaceUp) { if (handValue(dealerFaceUp) > 6 && handValue(mine) < 17) "H" else "S" }
strategy_simple = function(mine, dealerFaceUp) { if (handValue(mine) == 0) return("S") if (handValue(dealerFaceUp) > 6 && handValue(mine) < 17) "H" else "S" }
The dealer gets cards regardless of what the player does
dealer_cards = function(shoe) { cards = shoe(2) while(handValue(cards) < 17 && handValue(cards) > 0) { cards = c(cards, shoe(1)) } cards } dealer_cards(shoe)
## [1] 2 9 10
dealer_cards(shoe)
## [1] 10 3 3 2
play_hand = function(shoe, strategy, hand = new_hand(shoe), dealer = dealer_cards(shoe)) { face_up_card = dealer[1] action = strategy(hand$cards, face_up_card) while(action != "S" && handValue(hand$cards) != 0) { if (action == "H") { hand = hit(hand) action = strategy(hand$cards, face_up_card) } else { stop("Unknown action: should be one of S, H") } } winnings(handValue(dealer), handValue(hand$cards)) * hand$bet }
play_hand(shoe, strategy_simple)
## [1] 1
play_hand(shoe, strategy_simple)
## [1] -1
play_hand(shoe, strategy_simple, new_hand(shoe, bet=7))
## [1] 7
play_hand(shoe, strategy_simple, new_hand(shoe, bet=7))
## [1] 7
To repeat the game, we simply repeat the play_hand
function and keep track of the dollars gained or lost.
reps=10 money=20 for(i in 1:reps){ money <- money + play_hand(shoe, strategy_simple) print(money)}
## [1] 21 ## [1] 20 ## [1] 19 ## [1] 20.5 ## [1] 21.5 ## [1] 22.5 ## [1] 23.5 ## [1] 22.5 ## [1] 21.5 ## [1] 21.5
A game by Winning Moves (previously Hasboro) involving rolling pigs
What is your best strategy for winning???
a=31541435235; b=23462146143; m=423514351351 xval=47; reps=10000; xval.all = c() for(i in 1:reps){ xval = (a*xval + b) %% m xval.all = c(xval.all, xval) }
hist(xval.all/m)
unifdata = runif(10000,0,1) weib1data = sqrt(-log(1-unifdata)) weib2data = rweibull(10000,2,1)
par(mfrow=c(1,2)) hist(weib1data, freq=FALSE, xlab="Weibull(2,1)", main="Using Inverse Transform") hist(weib2data, freq=FALSE, xlab="Weibull(2,1)", main="Using R Function")
Consider the following set up (as in the WU):
set.seed(4747) n<- 100 x1 <- rep(c(0,1), each=n/2) x2 <- runif(n, min=-1, max=1) beta0 <- -1; beta1 <- 0.5; beta2 <- 1.5 y <- beta0 + beta1*x1 + beta2*x2 + rnorm(n, mean=0, sd = 1) eqVar <- data.frame(why=y, ex1 = x1, ex2 = x2)
ggplot(eqVar, aes(x=ex2, y=why, color=as.factor(ex1))) + geom_point() + stat_smooth(method="lm", se=FALSE) + scale_colour_tableau("colorblind10")
CI <- lm(y~x1+x2) %>% tidy(conf.int=TRUE) CI
## term estimate std.error statistic p.value conf.low ## 1 (Intercept) -0.908 0.126 -7.23 1.14e-10 -1.157 ## 2 x1 0.585 0.177 3.31 1.33e-03 0.234 ## 3 x2 1.031 0.161 6.42 5.11e-09 0.712 ## conf.high ## 1 -0.658 ## 2 0.936 ## 3 1.350
between(beta2, CI[3,6], CI[3,7])
## [1] FALSE
beta2.in <- c() beta0 <- -1; beta1 <- 0.5; beta2 <- 1.5 n<- 100; reps=10000 set.seed(4747) for(i in 1:reps){ x1 <- rep(c(0,1), each=n/2) x2 <- runif(n, min=-1, max=1) y <- beta0 + beta1*x1 + beta2*x2 + rnorm(n, mean=0, sd = 1) CI <- lm(y~x1+x2) %>% tidy(conf.int=TRUE) beta2.in <- c(beta2.in, between(beta2, CI[3,6], CI[3,7])) } sum(beta2.in)/reps
## [1] 0.952
Consider the following set up (note the difference in variability):
set.seed(470) n<- 100 x1 <- rep(c(0,1), each=n/2) x2 <- runif(n, min=-1, max=1) beta0 <- -1; beta1 <- 0.5; beta2 <- 1.5 y <- beta0 + beta1*x1 + beta2*x2 + rnorm(n, mean=0, sd = 1+ x1 + 10*abs(x2)) uneqVar <- data.frame(why=y, ex1 = x1, ex2 = x2)
ggplot(uneqVar, aes(x=ex2, y=why, color=as.factor(ex1))) + geom_point() + stat_smooth(method="lm", se=FALSE) + scale_colour_tableau("colorblind10")
beta2.in <- c() beta0 <- -1; beta1 <- 0.5; beta2 <- 1.5 n<- 100; reps=10000 set.seed(4747) for(i in 1:reps){ x1 <- rep(c(0,1), each=n/2) x2 <- runif(n, min=-1, max=1) y <- beta0 + beta1*x1 + beta2*x2 + rnorm(n, mean=0, sd = 1+ x1 + 10*abs(x2)) CI <- lm(y~x1+x2) %>% tidy(conf.int=TRUE) beta2.in <- c(beta2.in, between(beta2, CI[3,6], CI[3,7])) } sum(beta2.in)/reps
## [1] 0.872
set.seed(4747) pvals = c() reps=10000 for(i in 1:reps){ x1 <- rnorm(10, mean=47, sd=1) x2 <- rnorm(10, mean=47, sd=1) pvals <- c(pvals, t.test(x1,x2, var.equal=TRUE)$p.value) } sum(pvals < .05)/reps
## [1] 0.0484
set.seed(4747) pvals = c() reps=10000 for(i in 1:reps){ x1 <- rnorm(10, mean=47, sd=1) x2 <- rnorm(10, mean=47, sd=100) pvals <- c(pvals, t.test(x1,x2, var.equal=TRUE)$p.value) } sum(pvals < .05)/reps
## [1] 0.0617