---
title: "CART Stuff"
output: html_notebook
---
## Useful packages:
```{r package}
library(rpart)
library(rpart.plot)
#install.packages("e1071")
library(caret)
library(ggplot2)
library(e1071)
library(flexmix)
```
## Classification
https://insightimi.wordpress.com/2020/03/08/classification-and-regression-tree/
* Data Set (requires free login) https://www.kaggle.com/sriharipramod/bank-loan-classification/data
```{r}
bankDf<-read.csv("https://pluto.coe.fsu.edu/svn/common/rgroup-shiny/data/UniversalBank.csv", header= T)
View(bankDf)
summary(bankDf)
```
```{r partition data}
bankDf <- bankDf[ , -c(1, 5)] ## Drop ID and Zip
set.seed(1)
trainIndex <- sample(1:nrow(bankDf), nrow(bankDf)*0.6)
trainData <- bankDf[trainIndex, ]
testData <- bankDf[-trainIndex, ]
```
```{r fit tree}
defaultTree <- rpart(Personal.Loan ~ ., data = trainData, method = "class")
defaultTree
```
```{r plottree}
prp(defaultTree, type = 1, extra = 1, under = TRUE, split.font = 1, varlen = -10)
```
```{r moreReport}
rattle::fancyRpartPlot(defaultTree)
```
```{r prediction}
head(predict(defaultTree,testData))
```
```{r ConfusionMatrix}
predictionModelTrain <- predict(defaultTree,trainData,type = "class")
confusionMatrix(predictionModelTrain, as.factor(trainData$Personal.Loan))
```
```{r ConfusionMatrixTest}
predictionModelTest <- predict(defaultTree, testData, type = "class")
confusionMatrix(predictionModelTest, as.factor(testData$Personal.Loan))
```
```{r complex}
complexDefaultTree <- rpart(Personal.Loan ~ ., data = trainData, method = "class", cp = 0, minsplit = 1)
length(complexDefaultTree$frame$var[complexDefaultTree$frame$var == ""])
```
```{r drawComplexTree}
prp(complexDefaultTree, type = 1, extra = 1, under = TRUE, split.font = 1, varlen = -10, box.col=ifelse(complexDefaultTree$frame$var == "", 'gray', 'white'))
printcp(complexDefaultTree)
plotcp(complexDefaultTree)
```
Data don't get better with cp < .021
```{r pruning}
prunedComplexDefaultTree <- prune(complexDefaultTree,cp = 0.021)
length(prunedComplexDefaultTree$frame$var[prunedComplexDefaultTree$frame$var == ""])
prp(prunedComplexDefaultTree, type = 1, extra = 1, split.font = 1, varlen = -10)
```
```{r fancyplotPruned}
rattle::fancyRpartPlot(prunedComplexDefaultTree)
```
```{r ConfusionNew}
predictionModelTestNew <- predict(prunedComplexDefaultTree,testData ,type = "class")
confusionMatrix(predictionModelTestNew, as.factor(testData$Personal.Loan))
```
## Regression
https://insightimi.wordpress.com/2020/03/15/cart-regression-tree-from-scratch-with-a-hands-on-examplein-r/
* Data Set (requires free login)
https://www.kaggle.com/hashroot97/carpriceprediction/version/1
```{r}
#importing the dataset
carprices<- read.csv("https://pluto.coe.fsu.edu/svn/common/rgroup-shiny/data/CarPricePrediction.csv", header= T)
#partitioning the data
set.seed(123)
split = sample.split(carprices$quality, SplitRatio = 4/5)
training_set = subset(carprices, split == TRUE)
test_set = subset(carprices, split == FALSE)
training_set
test_set
#Fitting Regression Model
library(rpart)
library(rpart.plot)
regressor = rpart(formula = quality ~ .,
data = training_set)
summary(regressor)
prp(regressor, type = 1, extra = 1, under = TRUE, split.font = 1, varlen = -10)
predict(regressor, test_set)
regressor = rpart(formula = quality ~ .,
data = test_set)
printcp(regressor)
plotcp(regressor)
Final_pruned_Tree = prune(regressor,cp = 0.019125)
length(Final_pruned_Tree$frame$var[Final_pruned_Tree$frame$var == ""])
prp(Final_pruned_Tree, type = 1, extra = 1, split.font = 1, varlen = -10)
New_predict = predict(Final_pruned_Tree, test_set)
```
## Mixture Models