Different results of AutoNN and DagNN? #36

ghost · 2018-04-11T04:45:40Z

Hi all.

I used DagNN to train my NN for application of image auto white balancing and achieved the final objective of approximately 0.1 (Euclidean distance with pdist function).

Yesterday I found AutoNN an impressive wrapper for MatConvNet and rebuilt my architecture in AutoNN. Using the same layers constructures and initializing with the same parameters, but the objective is 6~8 after same number of epoches.

Any idea about what mistakes I make will be appreciated.

Here are codes for net constructure in DagNN and AutoNN:

# DagNN code
opts.batchSize = [];
opts.imageSize = [384 384];
opts.averageImage = zeros(3,1) ;
opts.colorDeviation = zeros(3) ;
opts.cudnnWorkspaceLimit = 4*1024*1024*1204 ; % 4GB
opts = vl_argparse(opts, varargin) ;

net = dagnn.DagNN() ;

% -------------------------------------------------------------------------
% Add input section
% -------------------------------------------------------------------------

% Block #1
net.addLayer('conv1',...
             dagnn.Conv('size', [1 1 3 8], 'hasBias', true, 'stride', [1 1], 'pad', [0 0 0 0]),...
             {'inputimage'},...
             {'conv1'},...
             {'conv1f'  'conv1b'}); 
net.addLayer('relu1',...
             dagnn.ReLU(),...
             {'conv1'},...
             {'relu1'},...
             {});
net.addLayer('pool1',...
             dagnn.Pooling('method', 'max', 'poolSize', [2 2], 'stride', [2 2], 'pad', [0 0 0 0]),...
             {'relu1'},...
             {'pool1'},...
             {});

% Block #2
net.addLayer('conv2',...
             dagnn.Conv('size', [5 5 8 32], 'hasBias', true, 'stride', [1 1], 'pad', [2 2 2 2]),...
             {'pool1'},...
             {'conv2'},...
             {'conv2f'  'conv2b'});
net.addLayer('relu2',...
             dagnn.ReLU(),...
             {'conv2'},...
             {'relu2'},...
             {});
net.addLayer('pool2',...
             dagnn.Pooling('method', 'max', 'poolSize', [2, 2], 'stride', [2 2], 'pad', [0 0 0 0]),...
             {'relu2'},...
             {'pool2'},...
             {});

% Block #3
net.addLayer('conv3',...
             dagnn.Conv('size', [3 3 32 128], 'hasBias', true, 'stride', [3 3], 'pad', [3 3 3 3]),...
             {'pool2'},...
             {'conv3'},...
             {'conv3f'  'conv3b'});
net.addLayer('relu3',...
             dagnn.ReLU(),...
             {'conv3'},...
             {'relu3'},...
             {});
net.addLayer('pool3',...
             dagnn.Pooling('method', 'max', 'poolSize', [2, 2], 'stride', [2 2], 'pad', [0 0 0 0]),...
             {'relu3'},...
             {'pool3'},...
             {});

% Block #4
net.addLayer('conv4',...
             dagnn.Conv('size', [1 1 128 256], 'hasBias', true, 'stride', [2 2], 'pad', [0 0 0 0]),...
             {'pool3'},...
             {'conv4'},...
             {'conv4f'  'conv4b'});
net.addLayer('relu4',...
             dagnn.ReLU(),...
             {'conv4'},...
             {'relu4'},...
             {});

% Block #5
net.addLayer('conv5',...
             dagnn.Conv('size', [9 9 256 64], 'hasBias', true, 'stride', [1 1], 'pad', [0 0 0 0]),...
             {'relu4'},...
             {'conv5'},...
             {'conv5f'  'conv5b'}); 
net.addLayer('relu5',...
             dagnn.ReLU(),...
             {'conv5'},...
             {'relu5'},...
             {});

% Block #6
net.addLayer('cat1',...
             dagnn.Concat('dim', 3),...
             {'relu5', 'inputsensor', 'inputgyro'},...
             {'cat1'});
         
% Block #7: Muli-Layer-Perceptron
net.addLayer('fc1',...
             dagnn.Conv('size', [1 1 73 512], 'hasBias', true, 'stride', [1, 1], 'pad', [0 0 0 0]),...
             {'cat1'},...
             {'fc1'},...
             {'conv6f'  'conv6b'});
net.addLayer('relu6',...
             dagnn.ReLU(),...
             {'fc1'},...
             {'relu6'},...
             {});

% Block #8
net.addLayer('prediction',...
             dagnn.Conv('size', [1 1 512 2], 'hasBias', true, 'stride', [1, 1], 'pad', [0 0 0 0]),...
             {'relu6'},...
             {'prediction'},...
             {'conv7f'  'conv7b'});

% Block #9: pdist  
net.addLayer('objective',...
             dagnn.PDist('p', 2, 'aggregate', true),...
             {'prediction', 'label'},...
             {'objective'},...
             {}); 

% -------------------------------------------------------------------------
%                                                           Meta parameters
% -------------------------------------------------------------------------

net.meta.imageSize = opts.imageSize ;
net.meta.averageImage = opts.averageImage ;

lr = [0.001*ones(1,3), 0.0005*ones(1,3), 0.0001*ones(1,3), 0.00005*ones(1,3), 0.00001*ones(1,5)] ;
net.meta.trainOpts.learningRate = lr ;
net.meta.trainOpts.numEpochs = numel(lr) ;
net.meta.trainOpts.momentum = 0.9;
net.meta.trainOpts.batchSize = opts.batchSize ;
net.meta.trainOpts.numSubBatches = 1 ;
net.meta.trainOpts.weightDecay = 0.0001 ;

# params init
f = 1/100;
f_ind = net.layers(1).paramIndexes(1);                                             
b_ind = net.layers(1).paramIndexes(2);                                             
net.params(f_ind).value = 10*f*randn(size(net.params(f_ind).value), 'single');     
net.params(f_ind).learningRate = 1;                                                
net.params(f_ind).weightDecay = 1;                                                 
for l=2:length(net.layers)                                                         
	if(strcmp(class(net.layers(l).block), 'dagnn.Conv'))                           
		f_ind = net.layers(l).paramIndexes(1);                                     
		b_ind = net.layers(l).paramIndexes(2);
		[h,w,in,out] = size(net.params(f_ind).value);
		net.params(f_ind).value = f*randn(size(net.params(f_ind).value), 'single');
		net.params(f_ind).learningRate = 1;                                        
		net.params(f_ind).weightDecay = 1;                                         
		net.params(b_ind).value = f*randn(size(net.params(b_ind).value), 'single');
		net.params(b_ind).learningRate = 0.5;  
		net.params(b_ind).weightDecay = 1;
	end
end

# AutoNN code
opts.batchSize = 50;
opts.imageSize = [384 384];
opts.averageImage = zeros(3,1) ;
opts.colorDeviation = zeros(3) ;
opts.cudnnWorkspaceLimit = 4*1024*1024*1204 ; % 4GB
opts.learningRate = [0.001*ones(1,3), 0.0005*ones(1,3), 0.0001*ones(1,3), 0.00005*ones(1,3), 0.00001*ones(1,5)] ;
opts = vl_argparse(opts, varargin) ;

f = 1/100; % initialization parameter
% -------------------------------------------------------------------------
% Add input section
% -------------------------------------------------------------------------

inputimage = Input();
inputsensor = Input();
inputgyro = Input();
label = Input();

% Block #1
% create parameters explicitly
filterSize1 = [1 1 3 8];
filters1 = Param('value', 10*f*randn(filterSize1(1),filterSize1(2),filterSize1(3),filterSize1(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases1 = Param('value', zeros(filterSize1(4), 1, 'single'), 'learningRate', 1, 'weightDecay', 1);
conv1 = vl_nnconv(inputimage, filters1, biases1, 'stride', [1 1], 'pad', [0 0 0 0]);
relu1 = vl_nnrelu(conv1);
pool1 = vl_nnpool(relu1, 2, 'stride', 2);

% Block #2
filterSize2 = [5 5 8 32];
filters2 = Param('value', f*randn(filterSize2(1),filterSize2(2),filterSize2(3),filterSize2(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases2 = Param('value', f*randn(1,filterSize2(4), 'single'), 'learningRate', 0.5, 'weightDecay', 1);
conv2 = vl_nnconv(pool1, filters2, biases2, 'stride', [1 1], 'pad', [2 2 2 2]);
relu2 = vl_nnrelu(conv2);
pool2 = vl_nnpool(relu2, 2, 'stride', 2);

% Block #3
filterSize3 = [3 3 32 128];
filters3 = Param('value', f*randn(filterSize3(1),filterSize3(2),filterSize3(3),filterSize3(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases3 = Param('value', f*randn(1, filterSize3(4), 'single'), 'learningRate', 0.5, 'weightDecay', 1);
conv3 = vl_nnconv(pool2, filters3, biases3, 'stride', [3 3], 'pad', [3 3 3 3]);
relu3 = vl_nnrelu(conv3);
pool3 = vl_nnpool(relu3, 2, 'stride', 2);

% Block #4
filterSize4 = [1 1 128 256];
filters4 = Param('value', f*randn(filterSize4(1),filterSize4(2),filterSize4(3),filterSize4(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases4 = Param('value', f*randn(1, filterSize4(4), 'single'), 'learningRate', 0.5, 'weightDecay', 1);
conv4 = vl_nnconv(pool3, filters4, biases4, 'stride', [2 2], 'pad', [0 0 0 0]);
relu4 = vl_nnrelu(conv4);

% Block #5
filterSize5 = [9 9 256 64];
filters5 = Param('value', f*randn(filterSize5(1),filterSize5(2),filterSize5(3),filterSize5(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases5 = Param('value', f*randn(1, filterSize5(4), 'single'), 'learningRate', 0.5, 'weightDecay', 1);
conv5 = vl_nnconv(relu4, filters5, biases5, 'stride', [1 1], 'pad', [0 0 0 0]);
relu5 = vl_nnrelu(conv5);

% Block #6: concat
cat6 = cat(3, relu5, inputsensor, inputgyro);

% Block #7: Muli-Layer-Perceptron
filterSize7 = [1 1 73 512];
filters7 = Param('value', f*randn(filterSize7(1),filterSize7(2),filterSize7(3),filterSize7(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases7 = Param('value', f*randn(1, filterSize7(4), 'single'), 'learningRate', 0.5, 'weightDecay', 1);
fc7 = vl_nnconv(cat6, filters7, biases7, 'stride', [1 1], 'pad', [0 0 0 0]);
relu7 = vl_nnrelu(fc7);

% Block #8: prediction
filterSize8 = [1 1 512 2];
filters8 = Param('value', f*randn(filterSize8(1),filterSize8(2),filterSize8(3),filterSize8(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases8 = Param('value', f*randn(1, filterSize8(4), 'single'), 'learningRate', 0.5, 'weightDecay', 1);
prediction8 = vl_nnconv(relu7, filters8, biases8, 'stride', [1 1], 'pad', [0 0 0 0]);

% Block #9: pdist
objective = vl_nnpdist(prediction8, label, 2, 'aggregate', true);

% layers name assignment
Layer.workspaceNames();

% compile the network
inputimage.gpu = true;
net = Net(objective);

net.meta.imageSize = opts.imageSize ;
net.meta.averageImage = opts.averageImage ;

net.meta.trainOpts.learningRate = opts.learningRate ;
net.meta.trainOpts.numEpochs = numel(opts.learningRate) ;
net.meta.trainOpts.momentum = 0.85 ;
net.meta.trainOpts.batchSize = opts.batchSize ;
net.meta.trainOpts.numSubBatches = 1 ;
net.meta.trainOpts.weightDecay = 0.0001 ;

try
    layer = Layer.fromCompiledNet(net);
    layer{1}.sequentialNames;
    layer{1}.plotPDF();
catch
end

The text was updated successfully, but these errors were encountered:

ghost · 2018-04-11T04:49:02Z

@jotaf98

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Different results of AutoNN and DagNN? #36

Different results of AutoNN and DagNN? #36

ghost commented Apr 11, 2018 •

edited by ghost

Loading

ghost commented Apr 11, 2018

Different results of AutoNN and DagNN? #36

Different results of AutoNN and DagNN? #36

Comments

ghost commented Apr 11, 2018 • edited by ghost Loading

ghost commented Apr 11, 2018

ghost commented Apr 11, 2018 •

edited by ghost

Loading