Jo Hardin
October 25, 2017
library(GGally) # for plotting
library(caret) # for partitioning & classification
data(iris)
ggpairs(iris, color="Species", alpha=.4)
Without thinking about test / training data:
fitControl <- trainControl(method="none", classProbs = TRUE)
tr.iris <- train(Species ~ ., data=iris, method="knn", trControl = fitControl, tuneGrid= data.frame(k=3))
confusionMatrix(data=predict(tr.iris, newdata = iris), reference = iris$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 47 3
## virginica 0 3 47
##
## Overall Statistics
##
## Accuracy : 0.96
## 95% CI : (0.915, 0.9852)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.94
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.9400 0.9400
## Specificity 1.0000 0.9700 0.9700
## Pos Pred Value 1.0000 0.9400 0.9400
## Neg Pred Value 1.0000 0.9700 0.9700
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3133 0.3133
## Detection Prevalence 0.3333 0.3333 0.3333
## Balanced Accuracy 1.0000 0.9550 0.9550
set.seed(4747)
inTrain <- createDataPartition(y = iris$Species, p=0.7, list=FALSE)
iris.train <- iris[inTrain,]
iris.test <- iris[-c(inTrain),]
fitControl <- trainControl(method="none")
tr.iris <- train(Species ~ ., data=iris.train, method="knn", trControl = fitControl, tuneGrid= data.frame(k=5))
confusionMatrix(data=predict(tr.iris, newdata = iris.test), reference = iris.test$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 13 1
## virginica 0 2 14
##
## Overall Statistics
##
## Accuracy : 0.9333
## 95% CI : (0.8173, 0.986)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.8667 0.9333
## Specificity 1.0000 0.9667 0.9333
## Pos Pred Value 1.0000 0.9286 0.8750
## Neg Pred Value 1.0000 0.9355 0.9655
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.2889 0.3111
## Detection Prevalence 0.3333 0.3111 0.3556
## Balanced Accuracy 1.0000 0.9167 0.9333
set.seed(47)
fitControl <- trainControl(method="cv", number=10)
tr.iris <- train(Species ~ ., data=iris.train, method="knn", trControl = fitControl,
tuneGrid= data.frame(k=c(1,3,5,7,9,11)))
tr.iris
## k-Nearest Neighbors
##
## 105 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 95, 96, 94, 93, 95, 94, ...
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 1 0.9818182 0.9724957
## 3 0.9909091 0.9864198
## 5 0.9809091 0.9710351
## 7 0.9709091 0.9561167
## 9 0.9718182 0.9582454
## 11 0.9809091 0.9714944
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 3.
confusionMatrix(data=predict(tr.iris, newdata = iris.test), reference = iris.test$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 13 0
## virginica 0 2 15
##
## Overall Statistics
##
## Accuracy : 0.9556
## 95% CI : (0.8485, 0.9946)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9333
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.8667 1.0000
## Specificity 1.0000 1.0000 0.9333
## Pos Pred Value 1.0000 1.0000 0.8824
## Neg Pred Value 1.0000 0.9375 1.0000
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.2889 0.3333
## Detection Prevalence 0.3333 0.2889 0.3778
## Balanced Accuracy 1.0000 0.9333 0.9667