-
Notifications
You must be signed in to change notification settings - Fork 1
/
dobscv.m
80 lines (75 loc) · 3.32 KB
/
dobscv.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
function foldIdx = dobscv(numCls, dataF, dataTNum, numFolds)
% DOB-SCV
% X. Zeng, T.R. Martinez, Distribution-balanced stratified cross validation
% for accuracy estimation, Journal of Experimental and Theoretical
% Artificial Intelligence 12 (1) (2000) 1–12.
% Instance indices of each class
class(numCls).indices = [];
% Fold indices for each instance
class(numCls).foldIndices = [];
% Indices of instances for each fold
foldIdx(numFolds).indices = [];
for i=1:numCls
class(i).indices = find(dataTNum == i);
auxFold = zeros(size(class(i).indices,1), 1);
%Separating instances into k folds with DOB-SCV
while any(auxFold == 0)
currentIdx = randi(size(class(i).indices,1));
% If the current instance does not belong to any fold, it will
% belong to fold 1 and its nearest neighbors that also do not
% belong to any fold will be set to folds 2, 3, and so on
if auxFold(currentIdx) == 0
% Finding current index on dataF
current = class(i).indices(currentIdx);
% Finding distance from current instance to all others of
% the same class
dist = distance(transpose(dataF(current, :)), ...
transpose(dataF(class(i).indices, :)));
% Finding sorted indices
[~, sortId] = sort(dist);
% As it does not belong to any fold, it will be set to fold
% one
auxFold(currentIdx) = 1;
fold = 2; % auxiliar for the remainder instances
% If the number of unassingned instances is bigger than
% number of folds, there will be another turn of fold
% assignment. Otherwise this will be the last turn.
if size(find(~auxFold),1) >= (numFolds - 1)
for j=2:size(sortId,2)
if fold > numFolds
break
elseif auxFold(sortId(j)) == 0
auxFold(sortId(j)) = fold;
fold = fold + 1;
end
end
else
% Guarantees assignment to folds 2 - 9
limit = size(find(~auxFold),1) + 1;
for j=2:size(sortId,2)
if fold > limit
break
elseif auxFold(sortId(j)) == 0
auxFold(sortId(j)) = fold;
fold = fold + 1;
end
end
end
end
end
class(i).foldIndices = auxFold;
% Special case for small classes with repeated doubled instances.
% Forcing different folds
if size(find(dataTNum == i), 1) <= numFolds
class(i).foldIndices = [];
for s=1:size(find(dataTNum == i), 1)
class(i).foldIndices = [class(i).foldIndices; s];
end
end
end
for i=1:numFolds
for j=1:numCls
foldIdx(i).indices = [foldIdx(i).indices; class(j).indices(class(j).foldIndices == i)];
end
end
end