#install.packages("rpart")
 #install.packages("rpart.plot") - when you run it on your R Studio you need to install these, here they are already #pre-installed. This is why I have commented them out.

 library(rpart)
 moody <- read.csv("https://raw.githubusercontent.com/kunal0895/RDatasets/master/Moody2018.csv")
 tree <- rpart(GRADE ~ SCORE+ON_SMARTPHONE+ASKS_QUESTIONS+LEAVES_EARLY+LATE_IN_CLASS+FINALEXAM,data=moody)
 #GRADE is the data we want to analyze and the rest of the data is what we use to accomplish this analysis.

 tree

 # each "*" means making decisions and the number in front of the text implies the order of the decisions made by the computer.

Texts may hard to interpret, so you may want to use a plot to visualize the decision tree.

Before using that, you need to install "rpart.plot"

 library(rpart)
 moody <- read.csv("https://raw.githubusercontent.com/kunal0895/RDatasets/master/Moody2018.csv")
 tree <- rpart(GRADE ~ SCORE+ON_SMARTPHONE+ASKS_QUESTIONS+LEAVES_EARLY+LATE_IN_CLASS+FINALEXAM,data=moody)
 library(rpart.plot)
 rpart.plot(tree)

The plot looks messy, right? Sometimes rpart will make decisions based on some trivial data without manually controlling it.

There are two basic ways to control the decision making.

1. minsplit

The minimum number of observations that must exist in a node in order for a split to be attempted.

 library(rpart)
 moody <- read.csv("https://raw.githubusercontent.com/kunal0895/RDatasets/master/Moody2018.csv")
 tree <- rpart(GRADE ~ SCORE+ON_SMARTPHONE+ASKS_QUESTIONS+LEAVES_EARLY+LATE_IN_CLASS+FINALEXAM,data=moody,control=rpart.control(minsplit = 300))
 #the minimum number of observatiions that must exist in a node is now 300.
 library(rpart.plot)
 rpart.plot(tree)

2. minbucket

the minimum number of observations in any terminal node

 library(rpart)
 moody <- read.csv("https://raw.githubusercontent.com/kunal0895/RDatasets/master/Moody2018.csv")
 tree <- rpart(GRADE ~ SCORE+ON_SMARTPHONE+ASKS_QUESTIONS+LEAVES_EARLY+LATE_IN_CLASS+FINALEXAM,data=moody,control=rpart.control(minbucket =100))
 #the minimum number of observation in any terminal node is now 100.
 library(rpart.plot)
 rpart.plot(tree)

Also, you can use the "type" function in "prp" (the simplified form of "plot rpart model") to change the type of the rpart plot.

 library(rpart)
 moody <- read.csv("https://raw.githubusercontent.com/kunal0895/RDatasets/master/Moody2018.csv")
 tree <- rpart(GRADE ~ SCORE+ON_SMARTPHONE+ASKS_QUESTIONS+LEAVES_EARLY+LATE_IN_CLASS+FINALEXAM,data=moody,control=rpart.control(minbucket =100))
 library(rpart.plot)
 rpart.plot(tree)
 prp(tree,type=1)
 #try to change the value of type and see what would happen.

You can also use a different way to show the information of each decision by using "extra".

 library(rpart)
 moody <- read.csv("https://raw.githubusercontent.com/kunal0895/RDatasets/master/Moody2018.csv")
 tree <- rpart(GRADE ~ SCORE+ON_SMARTPHONE+ASKS_QUESTIONS+LEAVES_EARLY+LATE_IN_CLASS+FINALEXAM,data=moody,control=rpart.control(minbucket =100))
 library(rpart.plot)
 rpart.plot(tree)
 prp(tree, extra=100)
 #Try different values of extra and see what would happen.

You can also change the shape of leaves in rpart plot to a rectangle by using "round=0"

 library(rpart)
 moody <- read.csv("https://raw.githubusercontent.com/kunal0895/RDatasets/master/Moody2018.csv")
 tree <- rpart(GRADE ~ SCORE+ON_SMARTPHONE+ASKS_QUESTIONS+LEAVES_EARLY+LATE_IN_CLASS+FINALEXAM,data=moody,control=rpart.control(minbucket =100))
 library(rpart.plot)
 rpart.plot(tree)
 prp(tree,round=0)
 #round = 0 means not round at all.

Next, you may want to know how to change the color of the leaf box.

 library(rpart)
 moody <- read.csv("https://raw.githubusercontent.com/kunal0895/RDatasets/master/Moody2018.csv")
 tree <- rpart(GRADE ~ SCORE+ON_SMARTPHONE+ASKS_QUESTIONS+LEAVES_EARLY+LATE_IN_CLASS+FINALEXAM,data=moody,control=rpart.control(minbucket =100))
 library(rpart.plot)
 rpart.plot(tree,box.col="pink")
 #change the color of the leaves into blue

If you want to paint your leaf boxes with different colors, you can use prp(tree, box.palette= "auto") (It won't show up on the online laboratory, maybe due to different versions, but you can use your own software to try).

Then how can we change the line style of the branch?

 library(rpart)
 moody <- read.csv("https://raw.githubusercontent.com/kunal0895/RDatasets/master/Moody2018.csv")
 tree <- rpart(GRADE ~ SCORE+ON_SMARTPHONE+ASKS_QUESTIONS+LEAVES_EARLY+LATE_IN_CLASS+FINALEXAM,data=moody,control=rpart.control(minbucket =100))
 library(rpart.plot)
 rpart.plot(tree,branch.lty=3)

There are still some advanced parameters to refine your rpart plot and you can check those by yourself, but here is an easier way to make your plot beautiful.

We can use the "fancyRpartPlot"

Before using it, you may need to install "rattle" and "RColorBrewer" packages.

 library(rattle)
 library(rpart.plot)
 library(RColorBrewer)
 library(rpart)
 moody <- read.csv("https://raw.githubusercontent.com/kunal0895/RDatasets/master/Moody2018.csv")
 tree <- rpart(GRADE ~ SCORE+ON_SMARTPHONE+ASKS_QUESTIONS+LEAVES_EARLY+LATE_IN_CLASS+FINALEXAM,data=moody,control=rpart.control(minbucket =100))
 fancyRpartPlot(tree)

Now, you can start creating your own beautiful plot!

rpart(formula, data=, method=,control= )