-
Notifications
You must be signed in to change notification settings - Fork 0
/
trainNNModel.m
97 lines (74 loc) · 3.36 KB
/
trainNNModel.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
function [nn_params, cost, best_cv_fscores] = trainNNModel(X, y, num_labels, num_hidden_layers, hidden_layer_size, lambda, maxIterations, numTraningSteps, initial_nn_params, thresholds, train_indexes, cv_indexes, file, mu, sigma, label_affected_by_threshold, invertThreshold, U)%Because of lack of mem we load the data several times (TMP)
% Some useful variables
m = size(X, 1);
input_layer_size = size(X, 2);
cost = [];
aux_cost = [];
best_cv_fscores = [];
% Add ones to the X data matrix
X = [ones(m, 1) X];
% extrapolate the values into Y binary vectors, and thus the result is the binary matrix Y (classes)
I = eye(num_labels);
Y = zeros(m,num_labels);
for i=1:m
Y(i,:) = I(y(i),:);
end
fprintf('\nStart training NN model. Number of hidden layers: (%s), Hidden layer size: (%s), Lambda: (%s)...\n', num2str(num_hidden_layers), num2str(hidden_layer_size),...
num2str(lambda));
if(isempty(initial_nn_params))
%the initial neural network parameters are empty thus it means that a training from scratch is required
initial_nn_params = [randInitializeWeights(input_layer_size, hidden_layer_size)(:)];% first theta (input theta)
for i=2:num_hidden_layers
% iterates through the second hidden layer until the last hidden layer
initial_nn_params = [initial_nn_params ; randInitializeWeights(hidden_layer_size, hidden_layer_size)(:)];
end
initial_nn_params = [initial_nn_params ; randInitializeWeights(hidden_layer_size, num_labels)(:)];% last theta (output theta)
end
% Set options for fminunc
options = optimset('GradObj', 'on', 'MaxIter', maxIterations);
% Create "short hand" for the cost function to be minimized
costFunction = @(p) nnCostFunction(p, ...
input_layer_size, ...
num_hidden_layers, ...
hidden_layer_size, ...
num_labels, X, Y, lambda);
% Now, costFunction is a function that takes in only one argument (the
% neural network parameters)
for i=1:numTraningSteps
current_best_cv_fscore = -1;
[nn_params, aux_cost] = fmincg(costFunction, initial_nn_params, options);
initial_nn_params = nn_params;
cost = [cost ; aux_cost];
%loading cv data
clear X; clear y;
load(file);
X=X(cv_indexes, :); % selecting our cross validation data
y=y(cv_indexes, :); % selecting out cross validation data
X = featureNormalize(X, mu, sigma);%Normalizing the features
if (~isempty(U))
% Reduce our data to input_layer_size dims
X = projectData(X, U, input_layer_size);
end
for aThreshold = thresholds
%Evaluating this specific model against the CV data
print = aThreshold == 0;%Print the evaluation at each step only for the threshold == 0
[fscore confusionMatrix] = evaluateModel(nn_params, num_hidden_layers,...
hidden_layer_size, lambda, num_labels, X, y,...
aThreshold, label_affected_by_threshold, invertThreshold, print);
if(fscore > current_best_cv_fscore)
current_best_cv_fscore = fscore;
end
end
best_cv_fscores = [best_cv_fscores ; current_best_cv_fscore];
%reloading train data
clear X; clear y;
load(file);
X=X(train_indexes, :); % selecting our cross validation data
X = featureNormalize(X, mu, sigma);%Normalizing the features
if (~isempty(U))
% Reduce our data to input_layer_size dims
X = projectData(X, U, input_layer_size);
end
end
disp(best_cv_fscores);%Aux display to see the generalization optimization
end