Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Different results of AutoNN and DagNN? #36

Open
ghost opened this issue Apr 11, 2018 · 1 comment
Open

Different results of AutoNN and DagNN? #36

ghost opened this issue Apr 11, 2018 · 1 comment

Comments

@ghost
Copy link

ghost commented Apr 11, 2018

Hi all.

I used DagNN to train my NN for application of image auto white balancing and achieved the final objective of approximately 0.1 (Euclidean distance with pdist function).

Yesterday I found AutoNN an impressive wrapper for MatConvNet and rebuilt my architecture in AutoNN. Using the same layers constructures and initializing with the same parameters, but the objective is 6~8 after same number of epoches.

Any idea about what mistakes I make will be appreciated.

Here are codes for net constructure in DagNN and AutoNN:

# DagNN code
opts.batchSize = [];
opts.imageSize = [384 384];
opts.averageImage = zeros(3,1) ;
opts.colorDeviation = zeros(3) ;
opts.cudnnWorkspaceLimit = 4*1024*1024*1204 ; % 4GB
opts = vl_argparse(opts, varargin) ;

net = dagnn.DagNN() ;

% -------------------------------------------------------------------------
% Add input section
% -------------------------------------------------------------------------

% Block #1
net.addLayer('conv1',...
             dagnn.Conv('size', [1 1 3 8], 'hasBias', true, 'stride', [1 1], 'pad', [0 0 0 0]),...
             {'inputimage'},...
             {'conv1'},...
             {'conv1f'  'conv1b'}); 
net.addLayer('relu1',...
             dagnn.ReLU(),...
             {'conv1'},...
             {'relu1'},...
             {});
net.addLayer('pool1',...
             dagnn.Pooling('method', 'max', 'poolSize', [2 2], 'stride', [2 2], 'pad', [0 0 0 0]),...
             {'relu1'},...
             {'pool1'},...
             {});

% Block #2
net.addLayer('conv2',...
             dagnn.Conv('size', [5 5 8 32], 'hasBias', true, 'stride', [1 1], 'pad', [2 2 2 2]),...
             {'pool1'},...
             {'conv2'},...
             {'conv2f'  'conv2b'});
net.addLayer('relu2',...
             dagnn.ReLU(),...
             {'conv2'},...
             {'relu2'},...
             {});
net.addLayer('pool2',...
             dagnn.Pooling('method', 'max', 'poolSize', [2, 2], 'stride', [2 2], 'pad', [0 0 0 0]),...
             {'relu2'},...
             {'pool2'},...
             {});

% Block #3
net.addLayer('conv3',...
             dagnn.Conv('size', [3 3 32 128], 'hasBias', true, 'stride', [3 3], 'pad', [3 3 3 3]),...
             {'pool2'},...
             {'conv3'},...
             {'conv3f'  'conv3b'});
net.addLayer('relu3',...
             dagnn.ReLU(),...
             {'conv3'},...
             {'relu3'},...
             {});
net.addLayer('pool3',...
             dagnn.Pooling('method', 'max', 'poolSize', [2, 2], 'stride', [2 2], 'pad', [0 0 0 0]),...
             {'relu3'},...
             {'pool3'},...
             {});

% Block #4
net.addLayer('conv4',...
             dagnn.Conv('size', [1 1 128 256], 'hasBias', true, 'stride', [2 2], 'pad', [0 0 0 0]),...
             {'pool3'},...
             {'conv4'},...
             {'conv4f'  'conv4b'});
net.addLayer('relu4',...
             dagnn.ReLU(),...
             {'conv4'},...
             {'relu4'},...
             {});

% Block #5
net.addLayer('conv5',...
             dagnn.Conv('size', [9 9 256 64], 'hasBias', true, 'stride', [1 1], 'pad', [0 0 0 0]),...
             {'relu4'},...
             {'conv5'},...
             {'conv5f'  'conv5b'}); 
net.addLayer('relu5',...
             dagnn.ReLU(),...
             {'conv5'},...
             {'relu5'},...
             {});

% Block #6
net.addLayer('cat1',...
             dagnn.Concat('dim', 3),...
             {'relu5', 'inputsensor', 'inputgyro'},...
             {'cat1'});
         
% Block #7: Muli-Layer-Perceptron
net.addLayer('fc1',...
             dagnn.Conv('size', [1 1 73 512], 'hasBias', true, 'stride', [1, 1], 'pad', [0 0 0 0]),...
             {'cat1'},...
             {'fc1'},...
             {'conv6f'  'conv6b'});
net.addLayer('relu6',...
             dagnn.ReLU(),...
             {'fc1'},...
             {'relu6'},...
             {});

% Block #8
net.addLayer('prediction',...
             dagnn.Conv('size', [1 1 512 2], 'hasBias', true, 'stride', [1, 1], 'pad', [0 0 0 0]),...
             {'relu6'},...
             {'prediction'},...
             {'conv7f'  'conv7b'});

% Block #9: pdist  
net.addLayer('objective',...
             dagnn.PDist('p', 2, 'aggregate', true),...
             {'prediction', 'label'},...
             {'objective'},...
             {}); 

% -------------------------------------------------------------------------
%                                                           Meta parameters
% -------------------------------------------------------------------------

net.meta.imageSize = opts.imageSize ;
net.meta.averageImage = opts.averageImage ;

lr = [0.001*ones(1,3), 0.0005*ones(1,3), 0.0001*ones(1,3), 0.00005*ones(1,3), 0.00001*ones(1,5)] ;
net.meta.trainOpts.learningRate = lr ;
net.meta.trainOpts.numEpochs = numel(lr) ;
net.meta.trainOpts.momentum = 0.9;
net.meta.trainOpts.batchSize = opts.batchSize ;
net.meta.trainOpts.numSubBatches = 1 ;
net.meta.trainOpts.weightDecay = 0.0001 ;

# params init
f = 1/100;
f_ind = net.layers(1).paramIndexes(1);                                             
b_ind = net.layers(1).paramIndexes(2);                                             
net.params(f_ind).value = 10*f*randn(size(net.params(f_ind).value), 'single');     
net.params(f_ind).learningRate = 1;                                                
net.params(f_ind).weightDecay = 1;                                                 
for l=2:length(net.layers)                                                         
	if(strcmp(class(net.layers(l).block), 'dagnn.Conv'))                           
		f_ind = net.layers(l).paramIndexes(1);                                     
		b_ind = net.layers(l).paramIndexes(2);
		[h,w,in,out] = size(net.params(f_ind).value);
		net.params(f_ind).value = f*randn(size(net.params(f_ind).value), 'single');
		net.params(f_ind).learningRate = 1;                                        
		net.params(f_ind).weightDecay = 1;                                         
		net.params(b_ind).value = f*randn(size(net.params(b_ind).value), 'single');
		net.params(b_ind).learningRate = 0.5;  
		net.params(b_ind).weightDecay = 1;
	end
end
# AutoNN code
opts.batchSize = 50;
opts.imageSize = [384 384];
opts.averageImage = zeros(3,1) ;
opts.colorDeviation = zeros(3) ;
opts.cudnnWorkspaceLimit = 4*1024*1024*1204 ; % 4GB
opts.learningRate = [0.001*ones(1,3), 0.0005*ones(1,3), 0.0001*ones(1,3), 0.00005*ones(1,3), 0.00001*ones(1,5)] ;
opts = vl_argparse(opts, varargin) ;

f = 1/100; % initialization parameter
% -------------------------------------------------------------------------
% Add input section
% -------------------------------------------------------------------------

inputimage = Input();
inputsensor = Input();
inputgyro = Input();
label = Input();

% Block #1
% create parameters explicitly
filterSize1 = [1 1 3 8];
filters1 = Param('value', 10*f*randn(filterSize1(1),filterSize1(2),filterSize1(3),filterSize1(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases1 = Param('value', zeros(filterSize1(4), 1, 'single'), 'learningRate', 1, 'weightDecay', 1);
conv1 = vl_nnconv(inputimage, filters1, biases1, 'stride', [1 1], 'pad', [0 0 0 0]);
relu1 = vl_nnrelu(conv1);
pool1 = vl_nnpool(relu1, 2, 'stride', 2);

% Block #2
filterSize2 = [5 5 8 32];
filters2 = Param('value', f*randn(filterSize2(1),filterSize2(2),filterSize2(3),filterSize2(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases2 = Param('value', f*randn(1,filterSize2(4), 'single'), 'learningRate', 0.5, 'weightDecay', 1);
conv2 = vl_nnconv(pool1, filters2, biases2, 'stride', [1 1], 'pad', [2 2 2 2]);
relu2 = vl_nnrelu(conv2);
pool2 = vl_nnpool(relu2, 2, 'stride', 2);

% Block #3
filterSize3 = [3 3 32 128];
filters3 = Param('value', f*randn(filterSize3(1),filterSize3(2),filterSize3(3),filterSize3(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases3 = Param('value', f*randn(1, filterSize3(4), 'single'), 'learningRate', 0.5, 'weightDecay', 1);
conv3 = vl_nnconv(pool2, filters3, biases3, 'stride', [3 3], 'pad', [3 3 3 3]);
relu3 = vl_nnrelu(conv3);
pool3 = vl_nnpool(relu3, 2, 'stride', 2);

% Block #4
filterSize4 = [1 1 128 256];
filters4 = Param('value', f*randn(filterSize4(1),filterSize4(2),filterSize4(3),filterSize4(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases4 = Param('value', f*randn(1, filterSize4(4), 'single'), 'learningRate', 0.5, 'weightDecay', 1);
conv4 = vl_nnconv(pool3, filters4, biases4, 'stride', [2 2], 'pad', [0 0 0 0]);
relu4 = vl_nnrelu(conv4);

% Block #5
filterSize5 = [9 9 256 64];
filters5 = Param('value', f*randn(filterSize5(1),filterSize5(2),filterSize5(3),filterSize5(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases5 = Param('value', f*randn(1, filterSize5(4), 'single'), 'learningRate', 0.5, 'weightDecay', 1);
conv5 = vl_nnconv(relu4, filters5, biases5, 'stride', [1 1], 'pad', [0 0 0 0]);
relu5 = vl_nnrelu(conv5);

% Block #6: concat
cat6 = cat(3, relu5, inputsensor, inputgyro);

% Block #7: Muli-Layer-Perceptron
filterSize7 = [1 1 73 512];
filters7 = Param('value', f*randn(filterSize7(1),filterSize7(2),filterSize7(3),filterSize7(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases7 = Param('value', f*randn(1, filterSize7(4), 'single'), 'learningRate', 0.5, 'weightDecay', 1);
fc7 = vl_nnconv(cat6, filters7, biases7, 'stride', [1 1], 'pad', [0 0 0 0]);
relu7 = vl_nnrelu(fc7);

% Block #8: prediction
filterSize8 = [1 1 512 2];
filters8 = Param('value', f*randn(filterSize8(1),filterSize8(2),filterSize8(3),filterSize8(4), 'single'), 'learningRate', 1, 'weightDecay', 1);
biases8 = Param('value', f*randn(1, filterSize8(4), 'single'), 'learningRate', 0.5, 'weightDecay', 1);
prediction8 = vl_nnconv(relu7, filters8, biases8, 'stride', [1 1], 'pad', [0 0 0 0]);

% Block #9: pdist
objective = vl_nnpdist(prediction8, label, 2, 'aggregate', true);

% layers name assignment
Layer.workspaceNames();

% compile the network
inputimage.gpu = true;
net = Net(objective);

net.meta.imageSize = opts.imageSize ;
net.meta.averageImage = opts.averageImage ;

net.meta.trainOpts.learningRate = opts.learningRate ;
net.meta.trainOpts.numEpochs = numel(opts.learningRate) ;
net.meta.trainOpts.momentum = 0.85 ;
net.meta.trainOpts.batchSize = opts.batchSize ;
net.meta.trainOpts.numSubBatches = 1 ;
net.meta.trainOpts.weightDecay = 0.0001 ;

try
    layer = Layer.fromCompiledNet(net);
    layer{1}.sequentialNames;
    layer{1}.plotPDF();
catch
end
@ghost
Copy link
Author

ghost commented Apr 11, 2018

@jotaf98

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

0 participants