-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathRandomForest.R
79 lines (59 loc) · 2.77 KB
/
RandomForest.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#Random Forest for classification
#-------------------------------------------------------------------#
#Library Required
#-------------------------------------------------------------------#
library(randomForest)
#-------------------------------------------------------------------#
#input parameters
#-------------------------------------------------------------------#
#c_path_out <- "D:/temp"
#c_path_in <- "D:/temp/sample.csv"
#c_var_in_dependent <- "var_1"
#c_var_in_independent <- c("var_2","var_3","var_4","var_5")
#n_tree <- 101
#-------------------------------------------------------------------#
#Functions
#-------------------------------------------------------------------#
classificationError <- function (actual, predicted)
{
error <- length(which(actual != predicted)) / length(actual)
}
#-------------------------------------------------------------------#
#load dataset
#-------------------------------------------------------------------#
data <- read.csv(c_path_out)
#-------------------------------------------------------------------#
#explore data
#-------------------------------------------------------------------#
#dim(data)
#summary(data)
#head(data,2)
#-------------------------------------------------------------------#
#subset
#-------------------------------------------------------------------#
data <- na.omit(data)
#-------------------------------------------------------------------#
#Split function for validation
#-------------------------------------------------------------------#
partition <- function(numRow,seed) {
set.seed(seed)
index <- sample(x = numRow,
size = trunc(numRow*(2/3)),
replace = FALSE)
}
index <- partition(numRow = nrow(data),seed = 123)
train <- data[index,]
test <- data[-index,]
#-------------------------------------------------------------------#
#Model Building
#-------------------------------------------------------------------#
formula <- paste0(c_var_in_dependent, "~", c_var_in_independent)
rf_fit <- randomForest(formula = as.formula(formula),
data = train,
ntree = n_tree)
#-------------------------------------------------------------------#
#Validation
#-------------------------------------------------------------------#
test$predicted <- predict(object = rf_fit,
newdata = test)
accuracy <- classificationError(test[,c_var_in_dependent],test$predicted)