-
Notifications
You must be signed in to change notification settings - Fork 6
/
repeatedRandomSubSamplingValidation_extra_tree_ensemble.m
143 lines (102 loc) · 4.12 KB
/
repeatedRandomSubSamplingValidation_extra_tree_ensemble.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
function [model] = repeatedRandomSubSamplingValidation_extra_tree_ensemble(subset,M,k,nmin,ns,flag)
% This function runs a repeated random sub-sampling validation of an
% ensemble of Exra-Trees
%
% Inputs:
% subset = observations
% M = number of trees in the ensemble
% k = number of random cut-directions
% nmin = minimum number of points per leaf
% ns = number of repetitions
% flag = if flag == 1, the model is then evaluated (and saved) on the
% full dataset
%
% Output:
% model = structure containing models and performance
%
%
% Copyright 2014 Matteo Giuliani
% Research Fellow, Politecnico di Milano
% matteo.giuliani@polimi.it
% http://giuliani.faculty.polimi.it
%
%
% Please refer to README.txt for further information.
%
%
% This file is part of MATLAB_IterativeInputSelection.
%
% MATLAB_IterativeInputSelection is free software: you can redistribute
% it and/or modify it under the terms of the GNU General Public License
% as published by the Free Software Foundation, either version 3 of the
% License, or (at your option) any later version.
%
% This code is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with MATLAB_IterativeInputSelection.
% If not, see <http://www.gnu.org/licenses/>.
%
% 0) SET THE PROBLEM PARAMETERS FOR THE ENSEMBLE CROSS-VALIDATION
% Number of lines to be used for validation and calibration
lv = floor(length(subset)/ns);
l = length(subset)-lv ;
% C-ExtraTrees algorithm parameters
rtensparam=init_extra_trees();
rtensparam.nbterms=M;
rtensparam.rtparam.extratreesk=k;
rtensparam.rtparam.nmin = nmin;
% 1) INITIALIZATION OF THE OUTPUT VARIABLES
% Initialize R2 AND RMSE VECTORS
Rt2_cal_pred = zeros(ns,1);
Rt2_val_pred = zeros(ns,1);
% Initialize the function output
model.cross_validation.performance = [];
% 2) MODEL CONSTRUCTION AND EVALUATION OF THE PERFORMANCES (k-fold cross-validation)
% Counter
% disp('Start cross-validation:')
for i = 1:ns
% Counter
% disp('Start cross-validation:'); disp(i);
% Define the calibration and validation data-set
[subset_tar, subset_val] = randomSampling_CalVal( subset, lv ) ;
% datasets
X1 = single(subset_tar(:,1:end-1));
Y1 = single(subset_tar(:,end));
ls1 = int32(1:size(subset_tar,1));
X2 = single(subset_val(:,1:end-1));
% Ensemble building + test the ensemble on the calibration and validation dataset
evalc('[finalResult_val_pred temp1 temp2 finalResult_cal_pred] = rtenslearn_c(X1,Y1,ls1,[],rtensparam,X2,0)');
Rt2_cal_pred(i) = Rt2_fit(subset_tar(:,end),finalResult_cal_pred);
Rt2_val_pred(i) = Rt2_fit(subset_val(:,end),finalResult_val_pred);
end
% Average R2
model.cross_validation.performance.Rt2_cal_pred = Rt2_cal_pred;
model.cross_validation.performance.Rt2_val_pred = Rt2_val_pred;
model.cross_validation.performance.Rt2_cal_pred_mean = mean(Rt2_cal_pred);
model.cross_validation.performance.Rt2_val_pred_mean = mean(Rt2_val_pred);
% 3) MODEL CONSTRUCTION ON THE WHOLE DATA-SET
% Check if is necessary to test the model on the whole data-set
if flag == 1
% Add new fields to the function output
model.complete_model.ensemble = [];
model.complete_model.trajectories = [];
model.complete_model.performance = [];
% Counter
% disp('Building and testing the final model');
% Model construction
X1 = single(subset(:,1:end-1));
Y1 = single(subset(:,end));
ls1 = int32(1:size(subset,1));
evalc('[temp0 temp1 ensemble finalResult_pred] = rtenslearn_c(X1,Y1,ls1,[],rtensparam,X2,0)');
model.complete_model.ensemble = ensemble;
model.complete_model.trajectories = finalResult_pred;
% Evaluate R2
model.complete_model.performance.Rt2 = Rt2_fit(subset(:,end),finalResult_pred);
else
return
end
% This code has been written by Matteo Giuliani.