Random forests or random decision forests are an ensemble learning method for classification, regression and other tasks that operates by constructing a multitude of decision trees at training time. For classification tasks, the output of the random forest is the class selected by most trees. For regression tasks, the mean or average prediction of the individual trees is returned.
Implementation :
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import numpy as np
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_blobs
from DecisionTree import DecisionTree
class RandomForest :
def __init__ ( self , nbr_trees , min_Samples_to_Split = 2 , maximum_Depth = 100 , nbr_features = None ):
self . nbr_trees = nbr_trees
self . min_Samples_to_Split = min_Samples_to_Split
self . maximum_Depth = maximum_Depth
self . nbr_features = nbr_features
def fit ( self , x , y ):
self . trees = []
for i in range ( self . nbr_trees ):
tree = DecisionTree ( min_Samples_to_Split = self . min_Samples_to_Split , maximum_Depth = self . maximum_Depth , nbr_features = self . nbr_features )
x_i , y_i = self . randomSubSet ( x , y )
tree . fit ( x_i , y_i )
self . trees . append ( tree )
def randomSubSet ( self , x , y ):
nbr_samples = x . shape [ 0 ]
indexes = np . random . choice ( nbr_samples , nbr_samples , replace = True )
return x [ indexes ] , y [ indexes ]
def predict ( self , x ):
trees_Predictions = np . array ([ t . predict ( x ) for t in self . trees ])
trees_Predictions = np . swapaxes ( trees_Predictions , 0 , 1 )
y_hat = np . array ([ Counter ( t_Predictions ). most_common ( 1 )[ 0 ][ 0 ] for t_Predictions in trees_Predictions ])
return y_hat
Testing The Model :
1
2
3
4
5
6
7
8
9
10
def accuracy ( y_true , y_pred ):
return np . sum ( y_true == y_pred ) / len ( y_true )
x , y = make_blobs ( n_samples = 100 , n_features = 10 , centers = 10 , random_state = 0 )
x_train , x_test , y_train , y_test = train_test_split ( x , y , test_size = 0.1 )
R_Forest = RandomForest ( nbr_trees = 10 )
R_Forest . fit ( x_train , y_train )
y_hat = R_Forest . predict ( x_test )
print ( "Model Accuracy : " , accuracy ( y_test , y_hat ))