From 93e0bf9fc478d45c5300a66a2f6b0821c2c341ed Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 28 Sep 2017 17:48:56 +0900 Subject: [PATCH 01/57] add train_imagenet --- chainercv/links/__init__.py | 1 + chainercv/links/model/resnet/__init__.py | 1 + chainercv/links/model/resnet/resnet.py | 21 +++ examples/classification/train_imagenet.py | 168 ++++++++++++++++++++++ 4 files changed, 191 insertions(+) create mode 100644 examples/classification/train_imagenet.py diff --git a/chainercv/links/__init__.py b/chainercv/links/__init__.py index a7752c95e3..293621360a 100644 --- a/chainercv/links/__init__.py +++ b/chainercv/links/__init__.py @@ -8,6 +8,7 @@ from chainercv.links.model.faster_rcnn.faster_rcnn_vgg import FasterRCNNVGG16 # NOQA from chainercv.links.model.resnet import ResNet101 # NOQA from chainercv.links.model.resnet import ResNet152 # NOQA +from chainercv.links.model.resnet import ResNet18 # NOQA from chainercv.links.model.resnet import ResNet50 # NOQA from chainercv.links.model.segnet.segnet_basic import SegNetBasic # NOQA from chainercv.links.model.ssd import SSD300 # NOQA diff --git a/chainercv/links/model/resnet/__init__.py b/chainercv/links/model/resnet/__init__.py index c5f33cfdef..3156073602 100644 --- a/chainercv/links/model/resnet/__init__.py +++ b/chainercv/links/model/resnet/__init__.py @@ -4,4 +4,5 @@ from chainercv.links.model.resnet.resnet import ResNet # NOQA from chainercv.links.model.resnet.resnet import ResNet101 # NOQA from chainercv.links.model.resnet.resnet import ResNet152 # NOQA +from chainercv.links.model.resnet.resnet import ResNet18 # NOQA from chainercv.links.model.resnet.resnet import ResNet50 # NOQA diff --git a/chainercv/links/model/resnet/resnet.py b/chainercv/links/model/resnet/resnet.py index 969f1bf2af..1b1f00e0b4 100644 --- a/chainercv/links/model/resnet/resnet.py +++ b/chainercv/links/model/resnet/resnet.py @@ -91,12 +91,14 @@ class ResNet(PickableSequentialChain): """ _blocks = { + 'resnet18': [2, 2, 2, 2], 'resnet50': [3, 4, 6, 3], 'resnet101': [3, 4, 23, 3], 'resnet152': [3, 8, 36, 3] } _he_models = { + 'resnet18': {}, 'resnet50': { 'imagenet': { 'n_class': 1000, @@ -124,6 +126,7 @@ class ResNet(PickableSequentialChain): } _fb_models = { + 'resnet18': dict(), 'resnet50': dict(), 'resnet101': dict(), 'resnet152': dict() @@ -193,6 +196,24 @@ def _global_average_pooling_2d(x): return h +class ResNet18(ResNet): + + """ResNet-18 Network. + + Please consult the documentation for :class:`ResNet`. + + .. seealso:: + :class:`chainercv.links.model.resnet.ResNet` + + """ + + def __init__(self, n_class=None, pretrained_model=None, + mean=None, initialW=None, fb_resnet=False): + super(ResNet18, self).__init__( + 'resnet18', n_class, pretrained_model, + mean, initialW, fb_resnet) + + class ResNet50(ResNet): """ResNet-50 Network. diff --git a/examples/classification/train_imagenet.py b/examples/classification/train_imagenet.py new file mode 100644 index 0000000000..9d15ac31e4 --- /dev/null +++ b/examples/classification/train_imagenet.py @@ -0,0 +1,168 @@ +from __future__ import division +import matplotlib +matplotlib.use('agg') +import argparse + +import chainer +from chainer.datasets import TransformDataset +from chainer import iterators +from chainer.links import Classifier +from chainer import training +from chainer.training import extensions + +from chainercv.datasets import DirectoryParsingLabelDataset + +from chainercv.transforms import center_crop +from chainercv.transforms import pca_lighting +from chainercv.transforms import random_flip +from chainercv.transforms import random_sized_crop +from chainercv.transforms import resize +from chainercv.transforms import scale + +from chainercv.datasets import directory_parsing_label_names + +from chainercv.links import ResNet101 +from chainercv.links import ResNet152 +from chainercv.links import ResNet18 +from chainercv.links import ResNet50 + + +class TrainTransform(object): + + def __init__(self, mean): + self.mean = mean + + def __call__(self, in_data): + # https://github.com/facebook/fb.resnet.torch/blob/master/datasets/imagenet.lua#L80 + img, label = in_data + _, H, W = img.shape + img = random_sized_crop(img) + img = resize(img, (224, 224)) + img = random_flip(img, x_random=True) + img = pca_lighting(img, 25) + img -= self.mean + return img, label + + +class ValTransform(object): + + def __init__(self, mean): + self.mean = mean + + def __call__(self, in_data): + img, label = in_data + img = scale(img, 256) + img = center_crop(img, (224, 224)) + img -= self.mean + return img, label + + +def main(): + archs = { + 'resnet18': {'class': ResNet18, 'score_layer_name': 'fc6', + 'kwargs': {'fb_resnet': True}}, + 'resnet50': {'class': ResNet50, 'score_layer_name': 'fc6', + 'kwargs': {'fb_resnet': True}}, + 'resnet101': {'class': ResNet101, 'score_layer_name': 'fc6', + 'kwargs': {'fb_resnet': True}}, + 'resnet152': {'class': ResNet152, 'score_layer_name': 'fc6', + 'kwargs': {'fb_resnet': True}} + } + parser = argparse.ArgumentParser( + description='Learning convnet from ILSVRC2012 dataset') + parser.add_argument('train', help='Path to root of the train dataset') + parser.add_argument('val', help='Path to root of the validation dataset') + parser.add_argument('--arch', + '-a', choices=archs.keys(), default='resnet18', + help='Convnet architecture') + parser.add_argument('--pretrained_model') + parser.add_argument('--gpu', type=int, default=-1) + parser.add_argument('--loaderjob', type=int, default=4) + parser.add_argument('--batchsize', type=int, default=64) + parser.add_argument('--lr', type=float, default=1e-1) + parser.add_argument('--momentum', type=float, default=0.9) + parser.add_argument('--weight_decay', type=float, default=0.0001) + parser.add_argument('--out', type=str, default='result') + parser.add_argument('--step_size', type=int, default=30) + parser.add_argument('--epoch', type=int, default=90) + args = parser.parse_args() + + train_data = DirectoryParsingLabelDataset(args.train) + val_data = DirectoryParsingLabelDataset(args.val) + label_names = directory_parsing_label_names(args.train) + print('finished loading dataset') + + arch = archs[args.arch] + extractor = arch['class'](n_class=len(label_names), **arch['kwargs']) + extractor.pick = arch['score_layer_name'] + model = Classifier(extractor) + + train_data = TransformDataset(train_data, TrainTransform(extractor.mean)) + val_data = TransformDataset(val_data, ValTransform(extractor.mean)) + train_iter = chainer.iterators.MultiprocessIterator( + train_data, args.batchsize, shared_mem=3 * 224 * 224 * 4) + val_iter = iterators.MultiprocessIterator( + val_data, args.batchsize, + repeat=False, shuffle=False, shared_mem=3 * 224 * 224 * 4) + + optimizer = chainer.optimizers.MomentumSGD( + lr=args.lr, momentum=args.momentum) + optimizer.setup(model) + optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay)) + + if args.gpu >= 0: + chainer.cuda.get_device(args.gpu).use() + model.to_gpu() + + updater = chainer.training.StandardUpdater( + train_iter, optimizer, device=args.gpu) + + trainer = training.Trainer( + updater, (args.epoch, 'epoch'), out=args.out) + trainer.extend(extensions.ExponentialShift('lr', 0.1), + trigger=(args.step_size, 'epoch')) + + log_interval = 0.1, 'epoch' + print_interval = 0.1, 'epoch' + plot_interval = 1, 'epoch' + + trainer.extend( + extensions.snapshot_object(extractor, 'snapshot_model.npz'), + trigger=(args.epoch, 'epoch')) + trainer.extend(extensions.LogReport(trigger=log_interval)) + + trainer.extend(extensions.PrintReport( + ['iteration', 'epoch', 'elapsed_time', 'lr', + 'main/loss', 'validation/main/loss', + 'main/accuracy', 'validation/main/accuracy'] + ), trigger=print_interval) + + trainer.extend(extensions.ProgressBar(update_interval=10)) + + if extensions.PlotReport.available(): + trainer.extend( + extensions.PlotReport( + ['main/loss', 'validation/main/loss'], + file_name='loss.png', trigger=plot_interval + ), + trigger=plot_interval + ) + trainer.extend( + extensions.PlotReport( + ['main/accuracy', 'validation/main/accuracy'], + file_name='accuracy.png', trigger=plot_interval + ), + trigger=plot_interval + ) + trainer.extend( + extensions.Evaluator(val_iter, model, device=args.gpu), + trigger=(1, 'epoch') + ) + + trainer.extend(extensions.dump_graph('main/loss')) + + trainer.run() + + +if __name__ == '__main__': + main() From 2808c16a265e9058c97cce7ce88e6c0c82f37a0e Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 29 Sep 2017 13:16:52 +0900 Subject: [PATCH 02/57] add train_imagenet_mn --- examples/classification/train_imagenet_mn.py | 181 +++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 examples/classification/train_imagenet_mn.py diff --git a/examples/classification/train_imagenet_mn.py b/examples/classification/train_imagenet_mn.py new file mode 100644 index 0000000000..e1eae337f9 --- /dev/null +++ b/examples/classification/train_imagenet_mn.py @@ -0,0 +1,181 @@ +from __future__ import division +import matplotlib +matplotlib.use('agg') +import argparse + +import chainer +from chainer.datasets import TransformDataset +from chainer import iterators +from chainer.links import Classifier +from chainer import training +from chainer.training import extensions + +from chainercv.datasets import DirectoryParsingLabelDataset + +from chainercv.transforms import center_crop +from chainercv.transforms import pca_lighting +from chainercv.transforms import random_flip +from chainercv.transforms import random_sized_crop +from chainercv.transforms import resize +from chainercv.transforms import scale + +from chainercv.datasets import directory_parsing_label_names + +from chainercv.links import ResNet101 +from chainercv.links import ResNet152 +from chainercv.links import ResNet18 +from chainercv.links import ResNet50 + +import chainermn + + +class TrainTransform(object): + + def __init__(self, mean): + self.mean = mean + + def __call__(self, in_data): + # https://github.com/facebook/fb.resnet.torch/blob/master/datasets/imagenet.lua#L80 + img, label = in_data + _, H, W = img.shape + img = random_sized_crop(img) + img = resize(img, (224, 224)) + img = random_flip(img, x_random=True) + img = pca_lighting(img, 25) + img -= self.mean + return img, label + + +class ValTransform(object): + + def __init__(self, mean): + self.mean = mean + + def __call__(self, in_data): + img, label = in_data + img = scale(img, 256) + img = center_crop(img, (224, 224)) + img -= self.mean + return img, label + + +def main(): + archs = { + 'resnet18': {'class': ResNet18, 'score_layer_name': 'fc6', + 'kwargs': {'fb_resnet': True}}, + 'resnet50': {'class': ResNet50, 'score_layer_name': 'fc6', + 'kwargs': {'fb_resnet': True}}, + 'resnet101': {'class': ResNet101, 'score_layer_name': 'fc6', + 'kwargs': {'fb_resnet': True}}, + 'resnet152': {'class': ResNet152, 'score_layer_name': 'fc6', + 'kwargs': {'fb_resnet': True}} + } + parser = argparse.ArgumentParser( + description='Learning convnet from ILSVRC2012 dataset') + parser.add_argument('train', help='Path to root of the train dataset') + parser.add_argument('val', help='Path to root of the validation dataset') + parser.add_argument('--arch', + '-a', choices=archs.keys(), default='resnet18', + help='Convnet architecture') + parser.add_argument('--communicator', type=str, + default='hierarchical', help='Type of communicator') + parser.add_argument('--pretrained_model') + # parser.add_argument('--gpu', type=int, default=-1) + parser.add_argument('--loaderjob', type=int, default=4) + parser.add_argument('--batchsize', type=int, default=64, + help='Batch size for each worker') + parser.add_argument('--lr', type=float, default=1e-1) + parser.add_argument('--momentum', type=float, default=0.9) + parser.add_argument('--weight_decay', type=float, default=0.0001) + parser.add_argument('--out', type=str, default='result') + parser.add_argument('--step_size', type=int, default=30) + parser.add_argument('--epoch', type=int, default=90) + args = parser.parse_args() + + comm = chainermn.create_communicator(args.communicator) + device = comm.intra_rank + + label_names = directory_parsing_label_names(args.train) + + arch = archs[args.arch] + extractor = arch['class'](n_class=len(label_names), **arch['kwargs']) + extractor.pick = arch['score_layer_name'] + model = Classifier(extractor) + + if comm.rank == 0: + train_data = DirectoryParsingLabelDataset(args.train) + val_data = DirectoryParsingLabelDataset(args.val) + train_data = TransformDataset(train_data, TrainTransform(extractor.mean)) + val_data = TransformDataset(val_data, ValTransform(extractor.mean)) + print('finished loading dataset') + else: + train_data, val_data = None, None + train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True) + val_data = chainermn.scatter_dataset(val_data, comm, shuffle=True) + train_iter = chainer.iterators.MultiprocessIterator( + train_data, args.batchsize, shared_mem=3 * 224 * 224 * 4) + val_iter = iterators.MultiprocessIterator( + val_data, args.batchsize, + repeat=False, shuffle=False, shared_mem=3 * 224 * 224 * 4) + + optimizer = chainermn.create_multi_node_optimizer( + chainer.optimizers.MomentumSGD( + lr=args.lr, momentum=args.momentum), comm) + optimizer.setup(model) + optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay)) + + if device >= 0: + chainer.cuda.get_device(device).use() + model.to_gpu() + + updater = chainer.training.StandardUpdater( + train_iter, optimizer, device=device) + + trainer = training.Trainer( + updater, (args.epoch, 'epoch'), out=args.out) + trainer.extend(extensions.ExponentialShift('lr', 0.1), + trigger=(args.step_size, 'epoch')) + evaluator = chainermn.create_multi_node_evaluator( + extensions.Evaluator(val_iter, model, device=device), comm) + trainer.extend(evaluator, trigger=(1, 'epoch')) + + log_interval = 0.1, 'epoch' + print_interval = 0.1, 'epoch' + plot_interval = 1, 'epoch' + + if comm.rank == 0: + trainer.extend( + extensions.snapshot_object(extractor, 'snapshot_model.npz'), + trigger=(args.epoch, 'epoch')) + trainer.extend(extensions.LogReport(trigger=log_interval)) + trainer.extend(extensions.PrintReport( + ['iteration', 'epoch', 'elapsed_time', 'lr', + 'main/loss', 'validation/main/loss', + 'main/accuracy', 'validation/main/accuracy'] + ), trigger=print_interval) + trainer.extend(extensions.ProgressBar(update_interval=10)) + + if extensions.PlotReport.available(): + trainer.extend( + extensions.PlotReport( + ['main/loss', 'validation/main/loss'], + file_name='loss.png', trigger=plot_interval + ), + trigger=plot_interval + ) + trainer.extend( + extensions.PlotReport( + ['main/accuracy', 'validation/main/accuracy'], + file_name='accuracy.png', trigger=plot_interval + ), + trigger=plot_interval + ) + + + trainer.extend(extensions.dump_graph('main/loss')) + + trainer.run() + + +if __name__ == '__main__': + main() From 2c3667a1ee5d7d93b2f81f7bd9f057d53ab67dbc Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 29 Sep 2017 13:27:24 +0900 Subject: [PATCH 03/57] fix learning rate --- examples/classification/train_imagenet.py | 2 +- examples/classification/train_imagenet_mn.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/classification/train_imagenet.py b/examples/classification/train_imagenet.py index 9d15ac31e4..65e9c93f01 100644 --- a/examples/classification/train_imagenet.py +++ b/examples/classification/train_imagenet.py @@ -79,7 +79,7 @@ def main(): parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--loaderjob', type=int, default=4) parser.add_argument('--batchsize', type=int, default=64) - parser.add_argument('--lr', type=float, default=1e-1) + parser.add_argument('--lr', type=float, default=1e-2) parser.add_argument('--momentum', type=float, default=0.9) parser.add_argument('--weight_decay', type=float, default=0.0001) parser.add_argument('--out', type=str, default='result') diff --git a/examples/classification/train_imagenet_mn.py b/examples/classification/train_imagenet_mn.py index e1eae337f9..3e3d5cd361 100644 --- a/examples/classification/train_imagenet_mn.py +++ b/examples/classification/train_imagenet_mn.py @@ -84,7 +84,7 @@ def main(): parser.add_argument('--loaderjob', type=int, default=4) parser.add_argument('--batchsize', type=int, default=64, help='Batch size for each worker') - parser.add_argument('--lr', type=float, default=1e-1) + parser.add_argument('--lr', type=float, default=1e-2) parser.add_argument('--momentum', type=float, default=0.9) parser.add_argument('--weight_decay', type=float, default=0.0001) parser.add_argument('--out', type=str, default='result') From ea80eee0e08f1ea659d943ae2f9ace25c5b8475f Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 29 Sep 2017 17:17:17 +0900 Subject: [PATCH 04/57] add color_jitter --- chainercv/transforms/__init__.py | 1 + chainercv/transforms/image/color_jitter.py | 94 +++++++++++++++++++ docs/source/reference/transforms.rst | 4 + .../image_tests/test_color_jitter.py | 36 +++++++ 4 files changed, 135 insertions(+) create mode 100644 chainercv/transforms/image/color_jitter.py create mode 100644 tests/transforms_tests/image_tests/test_color_jitter.py diff --git a/chainercv/transforms/__init__.py b/chainercv/transforms/__init__.py index 045b3be405..8b9b6b93dc 100644 --- a/chainercv/transforms/__init__.py +++ b/chainercv/transforms/__init__.py @@ -3,6 +3,7 @@ from chainercv.transforms.bbox.resize_bbox import resize_bbox # NOQA from chainercv.transforms.bbox.translate_bbox import translate_bbox # NOQA from chainercv.transforms.image.center_crop import center_crop # NOQA +from chainercv.transforms.image.color_jitter import color_jitter # NOQA from chainercv.transforms.image.flip import flip # NOQA from chainercv.transforms.image.pca_lighting import pca_lighting # NOQA from chainercv.transforms.image.random_crop import random_crop # NOQA diff --git a/chainercv/transforms/image/color_jitter.py b/chainercv/transforms/image/color_jitter.py new file mode 100644 index 0000000000..1f7c3c5331 --- /dev/null +++ b/chainercv/transforms/image/color_jitter.py @@ -0,0 +1,94 @@ +import numpy as np +import random + + +def _grayscale(img): + out = np.zeros_like(img) + out[:] = 0.299 * img[0] + 0.587 * img[1] + 0.114 * img[2] + return out + + +def _blend(img_a, img_b, alpha): + return alpha * img_a + (1 - alpha) * img_b + + +def _brightness(img, var): + alpha = 1 + np.random.uniform(-var, var) + return _blend(img, np.zeros_like(img), alpha), alpha + + +def _contrast(img, var): + gray = _grayscale(img) + gray.fill(gray[0].mean()) + + alpha = 1 + np.random.uniform(-var, var) + return _blend(img, gray, alpha), alpha + + +def _saturation(img, var): + gray = _grayscale(img) + + alpha = 1 + np.random.uniform(-var, var) + return _blend(img, gray, alpha), alpha + + +def color_jitter(img, brightness_var=0.4, contrast_var=0.4, + saturation_var=0.4, return_param=False): + """Data augmentation on brightness, contrast and saturation. + + Args: + img (~numpy.ndarray): An image array to be augmented. This is in + CHW and RGB format. + brightness_var (float): Alpha for brightness is sampled from + :obj:`unif(-brightness_var, brightness_var)`. + contrast_var (float): Alpha for contrast is sampled from + :obj:`unif(-contrast_var, contrast_var)`. + saturation_var (float): Alpha for contrast is sampled from + :obj:`unif(-saturation_var, saturation_var)`. + return_param (bool): Returns parameters if :obj:`True`. + + Returns: + ~numpy.ndarray or (~numpy.ndarray, dict): + + If :obj:`return_param = False`, + returns an color jittered image. + + If :obj:`return_param = True`, returns a tuple of an array and a + dictionary :obj:`param`. + :obj:`param` is a dictionary of intermediate parameters whose + contents are listed below with key, value-type and the description + of the value. + + * **order** (*list of strings*): List containing three strings: \ + :obj:`'brightness'`, :obj:`'contrast'` and :obj:`'saturation'`. \ + They are ordered according to the order in which the data \ + augmentation functions are applied. + * **brightness_alpha** (*float*): Alpha used for brightness \ + data augmentation. + * **contrast_alpha** (*float*): Alpha used for contrast \ + data augmentation. + * **saturation_alpha** (*float*): Alpha used for saturation \ + data augmentation. + + """ + funcs = [] + if brightness_var > 0: + funcs.append(('brightness', lambda x: _brightness(x, brightness_var))) + if contrast_var > 0: + funcs.append(('contrast', lambda x: _contrast(x, contrast_var))) + if saturation_var > 0: + funcs.append(('saturation', lambda x: _saturation(x, saturation_var))) + random.shuffle(funcs) + + params = {'order': [key for key, val in funcs], + 'brightness_alpha': 1, + 'contrast_alpha': 1, + 'saturation_alpha': 1} + for key, func in funcs: + img, alpha = func(img) + params[key + '_alpha'] = alpha + img = np.minimum(np.maximum(img, 0), 255) + if return_param: + return img, params + else: + return img diff --git a/docs/source/reference/transforms.rst b/docs/source/reference/transforms.rst index 6e9c818b85..7b9bffeb92 100644 --- a/docs/source/reference/transforms.rst +++ b/docs/source/reference/transforms.rst @@ -11,6 +11,10 @@ center_crop ~~~~~~~~~~~ .. autofunction:: center_crop +color_jitter +~~~~~~~~~~~~ +.. autofunction:: color_jitter + flip ~~~~ .. autofunction:: flip diff --git a/tests/transforms_tests/image_tests/test_color_jitter.py b/tests/transforms_tests/image_tests/test_color_jitter.py new file mode 100644 index 0000000000..af72f9198a --- /dev/null +++ b/tests/transforms_tests/image_tests/test_color_jitter.py @@ -0,0 +1,36 @@ +import unittest + +import numpy as np + +from chainer import testing +from chainercv.transforms import color_jitter + + +class TestColorJitter(unittest.TestCase): + + def test_color_jitter_run_data_augmentation(self): + img = 255 * np.random.uniform(size=(3, 48, 32)).astype(np.float32) + + out, param = color_jitter(img, return_param=True) + self.assertEqual(out.shape, (3, 48, 32)) + self.assertEqual(out.dtype, img.dtype) + self.assertLessEqual(np.max(img), 255) + self.assertGreaterEqual(np.min(img), 0) + + self.assertEqual( + sorted(param['order']), ['brightness', 'contrast', 'saturation']) + self.assertIsInstance(param['brightness_alpha'], float) + self.assertIsInstance(param['contrast_alpha'], float) + self.assertIsInstance(param['saturation_alpha'], float) + + def test_color_jitter_no_data_augmentation(self): + img = 255 * np.random.uniform(size=(3, 48, 32)).astype(np.float32) + + out, param = color_jitter(img, 0, 0, 0, return_param=True) + np.testing.assert_equal(out, img) + self.assertEqual(param['brightness_alpha'], 1) + self.assertEqual(param['contrast_alpha'], 1) + self.assertEqual(param['saturation_alpha'], 1) + + +testing.run_module(__name__, __file__) From 63ae34656bb5b2516ab58d986ba95658f08f35d8 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 29 Sep 2017 17:36:27 +0900 Subject: [PATCH 05/57] use color_jitter --- examples/classification/train_imagenet.py | 4 +++- examples/classification/train_imagenet_mn.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/classification/train_imagenet.py b/examples/classification/train_imagenet.py index 65e9c93f01..7a84c8791c 100644 --- a/examples/classification/train_imagenet.py +++ b/examples/classification/train_imagenet.py @@ -13,6 +13,7 @@ from chainercv.datasets import DirectoryParsingLabelDataset from chainercv.transforms import center_crop +from chainercv.transforms import color_jitter from chainercv.transforms import pca_lighting from chainercv.transforms import random_flip from chainercv.transforms import random_sized_crop @@ -38,8 +39,9 @@ def __call__(self, in_data): _, H, W = img.shape img = random_sized_crop(img) img = resize(img, (224, 224)) - img = random_flip(img, x_random=True) + img = color_jitter(img) img = pca_lighting(img, 25) + img = random_flip(img, x_random=True) img -= self.mean return img, label diff --git a/examples/classification/train_imagenet_mn.py b/examples/classification/train_imagenet_mn.py index 3e3d5cd361..82d6b1daae 100644 --- a/examples/classification/train_imagenet_mn.py +++ b/examples/classification/train_imagenet_mn.py @@ -13,6 +13,7 @@ from chainercv.datasets import DirectoryParsingLabelDataset from chainercv.transforms import center_crop +from chainercv.transforms import color_jitter from chainercv.transforms import pca_lighting from chainercv.transforms import random_flip from chainercv.transforms import random_sized_crop @@ -40,8 +41,9 @@ def __call__(self, in_data): _, H, W = img.shape img = random_sized_crop(img) img = resize(img, (224, 224)) - img = random_flip(img, x_random=True) + img = color_jitter(img) img = pca_lighting(img, 25) + img = random_flip(img, x_random=True) img -= self.mean return img, label From c0c536b7a0810a1e8b364b384e4da57d96efe139 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 29 Sep 2017 18:00:05 +0900 Subject: [PATCH 06/57] update readme --- examples/classification/README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/examples/classification/README.md b/examples/classification/README.md index b24f016dd3..e1aec1ce6e 100644 --- a/examples/classification/README.md +++ b/examples/classification/README.md @@ -28,6 +28,18 @@ The score is reported using a weight converted from a weight trained by Caffe. $ python eval_imagenet.py [--model vgg16|resnet50|resnet101|resnet152] [--pretrained_model ] [--batchsize ] [--gpu ] [--crop center|10] ``` +## Training Models + +Training with single GPU. +``` +$ python train_imagenet.py [--gpu ] +``` + +Training with multiple GPUs. Please install ChainerMN to use this feature. +``` +$ mpiexec -n N python train_imagenet_mn.py +``` + ## How to prepare ImageNet Dataset From 056ec6b546ba3f3120ff12398dec85ef69c4851a Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 4 Oct 2017 17:52:18 +0900 Subject: [PATCH 07/57] make training code follow Training ImageNet 1 hour paper --- examples/classification/train_imagenet.py | 170 ------------------- examples/classification/train_imagenet_mn.py | 39 +++-- 2 files changed, 23 insertions(+), 186 deletions(-) delete mode 100644 examples/classification/train_imagenet.py diff --git a/examples/classification/train_imagenet.py b/examples/classification/train_imagenet.py deleted file mode 100644 index 7a84c8791c..0000000000 --- a/examples/classification/train_imagenet.py +++ /dev/null @@ -1,170 +0,0 @@ -from __future__ import division -import matplotlib -matplotlib.use('agg') -import argparse - -import chainer -from chainer.datasets import TransformDataset -from chainer import iterators -from chainer.links import Classifier -from chainer import training -from chainer.training import extensions - -from chainercv.datasets import DirectoryParsingLabelDataset - -from chainercv.transforms import center_crop -from chainercv.transforms import color_jitter -from chainercv.transforms import pca_lighting -from chainercv.transforms import random_flip -from chainercv.transforms import random_sized_crop -from chainercv.transforms import resize -from chainercv.transforms import scale - -from chainercv.datasets import directory_parsing_label_names - -from chainercv.links import ResNet101 -from chainercv.links import ResNet152 -from chainercv.links import ResNet18 -from chainercv.links import ResNet50 - - -class TrainTransform(object): - - def __init__(self, mean): - self.mean = mean - - def __call__(self, in_data): - # https://github.com/facebook/fb.resnet.torch/blob/master/datasets/imagenet.lua#L80 - img, label = in_data - _, H, W = img.shape - img = random_sized_crop(img) - img = resize(img, (224, 224)) - img = color_jitter(img) - img = pca_lighting(img, 25) - img = random_flip(img, x_random=True) - img -= self.mean - return img, label - - -class ValTransform(object): - - def __init__(self, mean): - self.mean = mean - - def __call__(self, in_data): - img, label = in_data - img = scale(img, 256) - img = center_crop(img, (224, 224)) - img -= self.mean - return img, label - - -def main(): - archs = { - 'resnet18': {'class': ResNet18, 'score_layer_name': 'fc6', - 'kwargs': {'fb_resnet': True}}, - 'resnet50': {'class': ResNet50, 'score_layer_name': 'fc6', - 'kwargs': {'fb_resnet': True}}, - 'resnet101': {'class': ResNet101, 'score_layer_name': 'fc6', - 'kwargs': {'fb_resnet': True}}, - 'resnet152': {'class': ResNet152, 'score_layer_name': 'fc6', - 'kwargs': {'fb_resnet': True}} - } - parser = argparse.ArgumentParser( - description='Learning convnet from ILSVRC2012 dataset') - parser.add_argument('train', help='Path to root of the train dataset') - parser.add_argument('val', help='Path to root of the validation dataset') - parser.add_argument('--arch', - '-a', choices=archs.keys(), default='resnet18', - help='Convnet architecture') - parser.add_argument('--pretrained_model') - parser.add_argument('--gpu', type=int, default=-1) - parser.add_argument('--loaderjob', type=int, default=4) - parser.add_argument('--batchsize', type=int, default=64) - parser.add_argument('--lr', type=float, default=1e-2) - parser.add_argument('--momentum', type=float, default=0.9) - parser.add_argument('--weight_decay', type=float, default=0.0001) - parser.add_argument('--out', type=str, default='result') - parser.add_argument('--step_size', type=int, default=30) - parser.add_argument('--epoch', type=int, default=90) - args = parser.parse_args() - - train_data = DirectoryParsingLabelDataset(args.train) - val_data = DirectoryParsingLabelDataset(args.val) - label_names = directory_parsing_label_names(args.train) - print('finished loading dataset') - - arch = archs[args.arch] - extractor = arch['class'](n_class=len(label_names), **arch['kwargs']) - extractor.pick = arch['score_layer_name'] - model = Classifier(extractor) - - train_data = TransformDataset(train_data, TrainTransform(extractor.mean)) - val_data = TransformDataset(val_data, ValTransform(extractor.mean)) - train_iter = chainer.iterators.MultiprocessIterator( - train_data, args.batchsize, shared_mem=3 * 224 * 224 * 4) - val_iter = iterators.MultiprocessIterator( - val_data, args.batchsize, - repeat=False, shuffle=False, shared_mem=3 * 224 * 224 * 4) - - optimizer = chainer.optimizers.MomentumSGD( - lr=args.lr, momentum=args.momentum) - optimizer.setup(model) - optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay)) - - if args.gpu >= 0: - chainer.cuda.get_device(args.gpu).use() - model.to_gpu() - - updater = chainer.training.StandardUpdater( - train_iter, optimizer, device=args.gpu) - - trainer = training.Trainer( - updater, (args.epoch, 'epoch'), out=args.out) - trainer.extend(extensions.ExponentialShift('lr', 0.1), - trigger=(args.step_size, 'epoch')) - - log_interval = 0.1, 'epoch' - print_interval = 0.1, 'epoch' - plot_interval = 1, 'epoch' - - trainer.extend( - extensions.snapshot_object(extractor, 'snapshot_model.npz'), - trigger=(args.epoch, 'epoch')) - trainer.extend(extensions.LogReport(trigger=log_interval)) - - trainer.extend(extensions.PrintReport( - ['iteration', 'epoch', 'elapsed_time', 'lr', - 'main/loss', 'validation/main/loss', - 'main/accuracy', 'validation/main/accuracy'] - ), trigger=print_interval) - - trainer.extend(extensions.ProgressBar(update_interval=10)) - - if extensions.PlotReport.available(): - trainer.extend( - extensions.PlotReport( - ['main/loss', 'validation/main/loss'], - file_name='loss.png', trigger=plot_interval - ), - trigger=plot_interval - ) - trainer.extend( - extensions.PlotReport( - ['main/accuracy', 'validation/main/accuracy'], - file_name='accuracy.png', trigger=plot_interval - ), - trigger=plot_interval - ) - trainer.extend( - extensions.Evaluator(val_iter, model, device=args.gpu), - trigger=(1, 'epoch') - ) - - trainer.extend(extensions.dump_graph('main/loss')) - - trainer.run() - - -if __name__ == '__main__': - main() diff --git a/examples/classification/train_imagenet_mn.py b/examples/classification/train_imagenet_mn.py index 82d6b1daae..e203b99c01 100644 --- a/examples/classification/train_imagenet_mn.py +++ b/examples/classification/train_imagenet_mn.py @@ -7,14 +7,13 @@ from chainer.datasets import TransformDataset from chainer import iterators from chainer.links import Classifier +from chainer.optimizer import WeightDecay from chainer import training from chainer.training import extensions from chainercv.datasets import DirectoryParsingLabelDataset from chainercv.transforms import center_crop -from chainercv.transforms import color_jitter -from chainercv.transforms import pca_lighting from chainercv.transforms import random_flip from chainercv.transforms import random_sized_crop from chainercv.transforms import resize @@ -29,6 +28,8 @@ import chainermn +from corrected_momentum_sgd import CorrectedMomentumSGD + class TrainTransform(object): @@ -41,8 +42,6 @@ def __call__(self, in_data): _, H, W = img.shape img = random_sized_crop(img) img = resize(img, (224, 224)) - img = color_jitter(img) - img = pca_lighting(img, 25) img = random_flip(img, x_random=True) img -= self.mean return img, label @@ -84,19 +83,23 @@ def main(): parser.add_argument('--pretrained_model') # parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--loaderjob', type=int, default=4) - parser.add_argument('--batchsize', type=int, default=64, + parser.add_argument('--batchsize', type=int, default=32, help='Batch size for each worker') - parser.add_argument('--lr', type=float, default=1e-2) + parser.add_argument('--lr', type=float) parser.add_argument('--momentum', type=float, default=0.9) parser.add_argument('--weight_decay', type=float, default=0.0001) parser.add_argument('--out', type=str, default='result') - parser.add_argument('--step_size', type=int, default=30) parser.add_argument('--epoch', type=int, default=90) args = parser.parse_args() comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank + if args.lr is not None: + lr = args.lr + else: + lr = 0.1 * (args.batchsize * comm.size) / 256 + label_names = directory_parsing_label_names(args.train) arch = archs[args.arch] @@ -107,7 +110,8 @@ def main(): if comm.rank == 0: train_data = DirectoryParsingLabelDataset(args.train) val_data = DirectoryParsingLabelDataset(args.val) - train_data = TransformDataset(train_data, TrainTransform(extractor.mean)) + train_data = TransformDataset( + train_data, TrainTransform(extractor.mean)) val_data = TransformDataset(val_data, ValTransform(extractor.mean)) print('finished loading dataset') else: @@ -121,14 +125,16 @@ def main(): repeat=False, shuffle=False, shared_mem=3 * 224 * 224 * 4) optimizer = chainermn.create_multi_node_optimizer( - chainer.optimizers.MomentumSGD( - lr=args.lr, momentum=args.momentum), comm) + CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm) optimizer.setup(model) - optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay)) + for param in model.params(): + if param.name != 'beta' and param.name != 'gamma': + param.update_rule.add_hook(WeightDecay(args.weight_decay)) if device >= 0: chainer.cuda.get_device(device).use() model.to_gpu() + chainer.cuda.set_max_workspace_size(1 * 1024 * 1024 * 1024) updater = chainer.training.StandardUpdater( train_iter, optimizer, device=device) @@ -136,7 +142,8 @@ def main(): trainer = training.Trainer( updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1), - trigger=(args.step_size, 'epoch')) + trigger=chainer.training.triggers.ManualScheduleTrigger( + [30, 60, 80], 'epoch')) evaluator = chainermn.create_multi_node_evaluator( extensions.Evaluator(val_iter, model, device=device), comm) trainer.extend(evaluator, trigger=(1, 'epoch')) @@ -147,13 +154,14 @@ def main(): if comm.rank == 0: trainer.extend( - extensions.snapshot_object(extractor, 'snapshot_model.npz'), + extensions.snapshot_object( + extractor, 'snapshot_model_{.updater.epoch}.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport( ['iteration', 'epoch', 'elapsed_time', 'lr', - 'main/loss', 'validation/main/loss', - 'main/accuracy', 'validation/main/accuracy'] + 'main/loss', 'validation/main/loss', + 'main/accuracy', 'validation/main/accuracy'] ), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) @@ -173,7 +181,6 @@ def main(): trigger=plot_interval ) - trainer.extend(extensions.dump_graph('main/loss')) trainer.run() From 03d4ac17b46ebc42224ecff2aeeb88dbef4aba3b Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 4 Oct 2017 17:53:52 +0900 Subject: [PATCH 08/57] set initial gamma to zero for the last bn of block --- chainercv/links/model/resnet/building_block.py | 1 + 1 file changed, 1 insertion(+) diff --git a/chainercv/links/model/resnet/building_block.py b/chainercv/links/model/resnet/building_block.py index e4a4dbc948..9bd59eca2d 100644 --- a/chainercv/links/model/resnet/building_block.py +++ b/chainercv/links/model/resnet/building_block.py @@ -83,6 +83,7 @@ def __init__(self, in_channels, mid_channels, out_channels, nobias=True) self.conv3 = Conv2DBNActiv(mid_channels, out_channels, 1, 1, 0, initialW=initialW, nobias=True, + bn_kwargs={'initial_gamma': 0}, activ=lambda x: x) if shortcut is not None: self.shortcut = shortcut From 193e47f845441fcdcb565f0b55bfb12c32bf276c Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 4 Oct 2017 17:55:47 +0900 Subject: [PATCH 09/57] add corrected_momentum_sgd --- .../classification/corrected_momentum_sgd.py | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 examples/classification/corrected_momentum_sgd.py diff --git a/examples/classification/corrected_momentum_sgd.py b/examples/classification/corrected_momentum_sgd.py new file mode 100644 index 0000000000..a2c1a66d2c --- /dev/null +++ b/examples/classification/corrected_momentum_sgd.py @@ -0,0 +1,62 @@ +from chainer import cuda +from chainer import optimizer + + +_default_hyperparam = optimizer.Hyperparameter() +_default_hyperparam.lr = 0.01 +_default_hyperparam.momentum = 0.9 + + +class CorrectedMomentumSGDRule(optimizer.UpdateRule): + + # use update rule used in frameworks like Torch. + + def __init__(self, parent_hyperparam=None, lr=None, momentum=None): + super(CorrectedMomentumSGDRule, self).__init__( + parent_hyperparam or _default_hyperparam) + if lr is not None: + self.hyperparam.lr = lr + if momentum is not None: + self.hyperparam.momentum = momentum + + def init_state(self, param): + xp = cuda.get_array_module(param.data) + with cuda.get_device_from_array(param.data): + self.state['v'] = xp.zeros_like(param.data) + + def update_core_cpu(self, param): + grad = param.grad + if grad is None: + return + v = self.state['v'] + v *= self.hyperparam.momentum + v -= self.hyperparam.lr * grad + param.data += v + + def update_core_gpu(self, param): + grad = param.grad + if grad is None: + return + cuda.elementwise( + 'T grad, T lr, T momentum', + 'T param, T v', + '''v = momentum * v - grad; + param += lr * v;''', + 'momentum_sgd')( + grad, self.hyperparam.lr, self.hyperparam.momentum, + param.data, self.state['v']) + + +class CorrectedMomentumSGD(optimizer.GradientMethod): + + def __init__(self, lr=_default_hyperparam.lr, + momentum=_default_hyperparam.momentum): + super(CorrectedMomentumSGD, self).__init__() + self.hyperparam.lr = lr + self.hyperparam.momentum = momentum + + lr = optimizer.HyperparameterProxy('lr') + momentum = optimizer.HyperparameterProxy('momentum') + + def create_update_rule(self): + return CorrectedMomentumSGDRule(self.hyperparam) From 1cb7d37348bee5b83648e4aeb3460fc2ea86a4ac Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 4 Oct 2017 18:03:09 +0900 Subject: [PATCH 10/57] add observe_lr extension --- examples/classification/train_imagenet_mn.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/classification/train_imagenet_mn.py b/examples/classification/train_imagenet_mn.py index e203b99c01..c542e031ab 100644 --- a/examples/classification/train_imagenet_mn.py +++ b/examples/classification/train_imagenet_mn.py @@ -153,6 +153,8 @@ def main(): plot_interval = 1, 'epoch' if comm.rank == 0: + trainer.extend(chainer.training.extensions.observe_lr(), + trigger=log_interval) trainer.extend( extensions.snapshot_object( extractor, 'snapshot_model_{.updater.epoch}.npz'), From d39ebf99faa45cc8a7d33ce75ba1f98b4d3ab0dc Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 7 Mar 2018 12:22:03 +0900 Subject: [PATCH 11/57] remove ResNet18 --- chainercv/links/__init__.py | 1 - chainercv/links/model/resnet/__init__.py | 1 - chainercv/links/model/resnet/resnet.py | 18 ------------------ examples/classification/train_imagenet_mn.py | 3 --- 4 files changed, 23 deletions(-) diff --git a/chainercv/links/__init__.py b/chainercv/links/__init__.py index 293621360a..a7752c95e3 100644 --- a/chainercv/links/__init__.py +++ b/chainercv/links/__init__.py @@ -8,7 +8,6 @@ from chainercv.links.model.faster_rcnn.faster_rcnn_vgg import FasterRCNNVGG16 # NOQA from chainercv.links.model.resnet import ResNet101 # NOQA from chainercv.links.model.resnet import ResNet152 # NOQA -from chainercv.links.model.resnet import ResNet18 # NOQA from chainercv.links.model.resnet import ResNet50 # NOQA from chainercv.links.model.segnet.segnet_basic import SegNetBasic # NOQA from chainercv.links.model.ssd import SSD300 # NOQA diff --git a/chainercv/links/model/resnet/__init__.py b/chainercv/links/model/resnet/__init__.py index 6154577145..7f687a2930 100644 --- a/chainercv/links/model/resnet/__init__.py +++ b/chainercv/links/model/resnet/__init__.py @@ -3,5 +3,4 @@ from chainercv.links.model.resnet.resnet import ResNet # NOQA from chainercv.links.model.resnet.resnet import ResNet101 # NOQA from chainercv.links.model.resnet.resnet import ResNet152 # NOQA -from chainercv.links.model.resnet.resnet import ResNet18 # NOQA from chainercv.links.model.resnet.resnet import ResNet50 # NOQA diff --git a/chainercv/links/model/resnet/resnet.py b/chainercv/links/model/resnet/resnet.py index 513cdc8864..7fa8374be6 100644 --- a/chainercv/links/model/resnet/resnet.py +++ b/chainercv/links/model/resnet/resnet.py @@ -204,24 +204,6 @@ def _global_average_pooling_2d(x): return h -class ResNet18(ResNet): - - """ResNet-18 Network. - - Please consult the documentation for :class:`ResNet`. - - .. seealso:: - :class:`chainercv.links.model.resnet.ResNet` - - """ - - def __init__(self, n_class=None, pretrained_model=None, - mean=None, initialW=None, fb_resnet=False): - super(ResNet18, self).__init__( - 'resnet18', n_class, pretrained_model, - mean, initialW, fb_resnet) - - class ResNet50(ResNet): """ResNet-50 Network. diff --git a/examples/classification/train_imagenet_mn.py b/examples/classification/train_imagenet_mn.py index c542e031ab..a8408d6f3d 100644 --- a/examples/classification/train_imagenet_mn.py +++ b/examples/classification/train_imagenet_mn.py @@ -23,7 +23,6 @@ from chainercv.links import ResNet101 from chainercv.links import ResNet152 -from chainercv.links import ResNet18 from chainercv.links import ResNet50 import chainermn @@ -62,8 +61,6 @@ def __call__(self, in_data): def main(): archs = { - 'resnet18': {'class': ResNet18, 'score_layer_name': 'fc6', - 'kwargs': {'fb_resnet': True}}, 'resnet50': {'class': ResNet50, 'score_layer_name': 'fc6', 'kwargs': {'fb_resnet': True}}, 'resnet101': {'class': ResNet101, 'score_layer_name': 'fc6', From d357af3e687d7420d3806a8d1f41d891fef532f9 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 24 May 2018 10:03:07 +0900 Subject: [PATCH 12/57] delete color_jitter --- chainercv/transforms/__init__.py | 1 - chainercv/transforms/image/color_jitter.py | 94 ------------------- .../image_tests/test_color_jitter.py | 36 ------- 3 files changed, 131 deletions(-) delete mode 100644 chainercv/transforms/image/color_jitter.py delete mode 100644 tests/transforms_tests/image_tests/test_color_jitter.py diff --git a/chainercv/transforms/__init__.py b/chainercv/transforms/__init__.py index 223b0ab1dd..c5b169a9fa 100644 --- a/chainercv/transforms/__init__.py +++ b/chainercv/transforms/__init__.py @@ -3,7 +3,6 @@ from chainercv.transforms.bbox.resize_bbox import resize_bbox # NOQA from chainercv.transforms.bbox.translate_bbox import translate_bbox # NOQA from chainercv.transforms.image.center_crop import center_crop # NOQA -from chainercv.transforms.image.color_jitter import color_jitter # NOQA from chainercv.transforms.image.flip import flip # NOQA from chainercv.transforms.image.pca_lighting import pca_lighting # NOQA from chainercv.transforms.image.random_crop import random_crop # NOQA diff --git a/chainercv/transforms/image/color_jitter.py b/chainercv/transforms/image/color_jitter.py deleted file mode 100644 index 1f7c3c5331..0000000000 --- a/chainercv/transforms/image/color_jitter.py +++ /dev/null @@ -1,94 +0,0 @@ -import numpy as np -import random - - -def _grayscale(img): - out = np.zeros_like(img) - out[:] = 0.299 * img[0] + 0.587 * img[1] + 0.114 * img[2] - return out - - -def _blend(img_a, img_b, alpha): - return alpha * img_a + (1 - alpha) * img_b - - -def _brightness(img, var): - alpha = 1 + np.random.uniform(-var, var) - return _blend(img, np.zeros_like(img), alpha), alpha - - -def _contrast(img, var): - gray = _grayscale(img) - gray.fill(gray[0].mean()) - - alpha = 1 + np.random.uniform(-var, var) - return _blend(img, gray, alpha), alpha - - -def _saturation(img, var): - gray = _grayscale(img) - - alpha = 1 + np.random.uniform(-var, var) - return _blend(img, gray, alpha), alpha - - -def color_jitter(img, brightness_var=0.4, contrast_var=0.4, - saturation_var=0.4, return_param=False): - """Data augmentation on brightness, contrast and saturation. - - Args: - img (~numpy.ndarray): An image array to be augmented. This is in - CHW and RGB format. - brightness_var (float): Alpha for brightness is sampled from - :obj:`unif(-brightness_var, brightness_var)`. - contrast_var (float): Alpha for contrast is sampled from - :obj:`unif(-contrast_var, contrast_var)`. - saturation_var (float): Alpha for contrast is sampled from - :obj:`unif(-saturation_var, saturation_var)`. - return_param (bool): Returns parameters if :obj:`True`. - - Returns: - ~numpy.ndarray or (~numpy.ndarray, dict): - - If :obj:`return_param = False`, - returns an color jittered image. - - If :obj:`return_param = True`, returns a tuple of an array and a - dictionary :obj:`param`. - :obj:`param` is a dictionary of intermediate parameters whose - contents are listed below with key, value-type and the description - of the value. - - * **order** (*list of strings*): List containing three strings: \ - :obj:`'brightness'`, :obj:`'contrast'` and :obj:`'saturation'`. \ - They are ordered according to the order in which the data \ - augmentation functions are applied. - * **brightness_alpha** (*float*): Alpha used for brightness \ - data augmentation. - * **contrast_alpha** (*float*): Alpha used for contrast \ - data augmentation. - * **saturation_alpha** (*float*): Alpha used for saturation \ - data augmentation. - - """ - funcs = [] - if brightness_var > 0: - funcs.append(('brightness', lambda x: _brightness(x, brightness_var))) - if contrast_var > 0: - funcs.append(('contrast', lambda x: _contrast(x, contrast_var))) - if saturation_var > 0: - funcs.append(('saturation', lambda x: _saturation(x, saturation_var))) - random.shuffle(funcs) - - params = {'order': [key for key, val in funcs], - 'brightness_alpha': 1, - 'contrast_alpha': 1, - 'saturation_alpha': 1} - for key, func in funcs: - img, alpha = func(img) - params[key + '_alpha'] = alpha - img = np.minimum(np.maximum(img, 0), 255) - if return_param: - return img, params - else: - return img diff --git a/tests/transforms_tests/image_tests/test_color_jitter.py b/tests/transforms_tests/image_tests/test_color_jitter.py deleted file mode 100644 index af72f9198a..0000000000 --- a/tests/transforms_tests/image_tests/test_color_jitter.py +++ /dev/null @@ -1,36 +0,0 @@ -import unittest - -import numpy as np - -from chainer import testing -from chainercv.transforms import color_jitter - - -class TestColorJitter(unittest.TestCase): - - def test_color_jitter_run_data_augmentation(self): - img = 255 * np.random.uniform(size=(3, 48, 32)).astype(np.float32) - - out, param = color_jitter(img, return_param=True) - self.assertEqual(out.shape, (3, 48, 32)) - self.assertEqual(out.dtype, img.dtype) - self.assertLessEqual(np.max(img), 255) - self.assertGreaterEqual(np.min(img), 0) - - self.assertEqual( - sorted(param['order']), ['brightness', 'contrast', 'saturation']) - self.assertIsInstance(param['brightness_alpha'], float) - self.assertIsInstance(param['contrast_alpha'], float) - self.assertIsInstance(param['saturation_alpha'], float) - - def test_color_jitter_no_data_augmentation(self): - img = 255 * np.random.uniform(size=(3, 48, 32)).astype(np.float32) - - out, param = color_jitter(img, 0, 0, 0, return_param=True) - np.testing.assert_equal(out, img) - self.assertEqual(param['brightness_alpha'], 1) - self.assertEqual(param['contrast_alpha'], 1) - self.assertEqual(param['saturation_alpha'], 1) - - -testing.run_module(__name__, __file__) From c677fcb62f9768a90d44fe090615a3891ee4c0ae Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 24 May 2018 10:03:26 +0900 Subject: [PATCH 13/57] move corrected_momentum_sgd to chainer_experimental --- chainercv/chainer_experimental/__init__.py | 1 + chainercv/chainer_experimental/optimizers/__init__.py | 1 + .../optimizers}/corrected_momentum_sgd.py | 0 examples/classification/README.md | 5 ----- examples/classification/train_imagenet_mn.py | 4 +--- 5 files changed, 3 insertions(+), 8 deletions(-) create mode 100644 chainercv/chainer_experimental/optimizers/__init__.py rename {examples/classification => chainercv/chainer_experimental/optimizers}/corrected_momentum_sgd.py (100%) diff --git a/chainercv/chainer_experimental/__init__.py b/chainercv/chainer_experimental/__init__.py index b91a45fb64..f0227014bd 100644 --- a/chainercv/chainer_experimental/__init__.py +++ b/chainercv/chainer_experimental/__init__.py @@ -1 +1,2 @@ from chainercv.chainer_experimental import datasets # NOQA +from chainercv.chainer_experimental import optimizers # NOQA diff --git a/chainercv/chainer_experimental/optimizers/__init__.py b/chainercv/chainer_experimental/optimizers/__init__.py new file mode 100644 index 0000000000..0ce7d43a78 --- /dev/null +++ b/chainercv/chainer_experimental/optimizers/__init__.py @@ -0,0 +1 @@ +from chainercv.chainer_experimental.optimizers.corrected_momentum_sgd import CorrectedMomentumSGD # NOQA diff --git a/examples/classification/corrected_momentum_sgd.py b/chainercv/chainer_experimental/optimizers/corrected_momentum_sgd.py similarity index 100% rename from examples/classification/corrected_momentum_sgd.py rename to chainercv/chainer_experimental/optimizers/corrected_momentum_sgd.py diff --git a/examples/classification/README.md b/examples/classification/README.md index 67d6f6227b..fa0350dddd 100644 --- a/examples/classification/README.md +++ b/examples/classification/README.md @@ -31,11 +31,6 @@ $ python eval_imagenet.py [--model vgg16|resnet50|resnet10 ## Training Models -Training with single GPU. -``` -$ python train_imagenet.py [--gpu ] -``` - Training with multiple GPUs. Please install ChainerMN to use this feature. ``` $ mpiexec -n N python train_imagenet_mn.py diff --git a/examples/classification/train_imagenet_mn.py b/examples/classification/train_imagenet_mn.py index a8408d6f3d..6ae388bfdb 100644 --- a/examples/classification/train_imagenet_mn.py +++ b/examples/classification/train_imagenet_mn.py @@ -21,14 +21,13 @@ from chainercv.datasets import directory_parsing_label_names +from chainercv.chainer_experimental.optimizers import CorrectedMomentumSGD from chainercv.links import ResNet101 from chainercv.links import ResNet152 from chainercv.links import ResNet50 import chainermn -from corrected_momentum_sgd import CorrectedMomentumSGD - class TrainTransform(object): @@ -78,7 +77,6 @@ def main(): parser.add_argument('--communicator', type=str, default='hierarchical', help='Type of communicator') parser.add_argument('--pretrained_model') - # parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--loaderjob', type=int, default=4) parser.add_argument('--batchsize', type=int, default=32, help='Batch size for each worker') From d5c1cd309a4bff09dbb7ea7addb7e60c9db29b15 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 24 May 2018 11:01:07 +0900 Subject: [PATCH 14/57] delete color_jitter from reference --- .../optimizers/corrected_momentum_sgd.py | 25 ++++++++++++++++++- docs/source/reference/transforms.rst | 4 --- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/chainercv/chainer_experimental/optimizers/corrected_momentum_sgd.py b/chainercv/chainer_experimental/optimizers/corrected_momentum_sgd.py index a2c1a66d2c..fb9a09bf27 100644 --- a/chainercv/chainer_experimental/optimizers/corrected_momentum_sgd.py +++ b/chainercv/chainer_experimental/optimizers/corrected_momentum_sgd.py @@ -9,7 +9,22 @@ class CorrectedMomentumSGDRule(optimizer.UpdateRule): - # use update rule used in frameworks like Torch. + """Update rule for the corrected momentum SGD. + + See :class:`~chainer.optimizers.CorrectedMomentumSGD` for the default + values of the hyperparameters. + + This implements momentum correction discussed in the third section of + `Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour \ + `_. + + Args: + parent_hyperparam (~chainer.optimizer.Hyperparameter): Hyperparameter + that provides the default values. + lr (float): Learning rate. + momentum (float): Exponential decay rate of the first order moment. + + """ def __init__(self, parent_hyperparam=None, lr=None, momentum=None): super(CorrectedMomentumSGDRule, self).__init__( @@ -49,6 +64,14 @@ def update_core_gpu(self, param): class CorrectedMomentumSGD(optimizer.GradientMethod): + """Momentum SGD optimizer. + + Args: + lr (float): Learning rate. + momentum (float): Exponential decay rate of the first order moment. + + """ + def __init__(self, lr=_default_hyperparam.lr, momentum=_default_hyperparam.momentum): super(CorrectedMomentumSGD, self).__init__() diff --git a/docs/source/reference/transforms.rst b/docs/source/reference/transforms.rst index 871730cfa9..5bf5d2078a 100644 --- a/docs/source/reference/transforms.rst +++ b/docs/source/reference/transforms.rst @@ -11,10 +11,6 @@ center_crop ~~~~~~~~~~~ .. autofunction:: center_crop -color_jitter -~~~~~~~~~~~~ -.. autofunction:: color_jitter - flip ~~~~ .. autofunction:: flip From 7e53af344198d0584f595bb2a5cf125bd249b6e7 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 1 Jun 2018 17:28:00 +0900 Subject: [PATCH 15/57] doc --- .../optimizers/corrected_momentum_sgd.py | 27 ++++++++++++++++--- .../source/reference/chainer_experimental.rst | 1 + .../chainer_experimental/optimizers.rst | 10 +++++++ 3 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 docs/source/reference/chainer_experimental/optimizers.rst diff --git a/chainercv/chainer_experimental/optimizers/corrected_momentum_sgd.py b/chainercv/chainer_experimental/optimizers/corrected_momentum_sgd.py index fb9a09bf27..4d971bc3b7 100644 --- a/chainercv/chainer_experimental/optimizers/corrected_momentum_sgd.py +++ b/chainercv/chainer_experimental/optimizers/corrected_momentum_sgd.py @@ -14,10 +14,6 @@ class CorrectedMomentumSGDRule(optimizer.UpdateRule): See :class:`~chainer.optimizers.CorrectedMomentumSGD` for the default values of the hyperparameters. - This implements momentum correction discussed in the third section of - `Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour \ - `_. - Args: parent_hyperparam (~chainer.optimizer.Hyperparameter): Hyperparameter that provides the default values. @@ -66,6 +62,29 @@ class CorrectedMomentumSGD(optimizer.GradientMethod): """Momentum SGD optimizer. + This implements momentum correction discussed in the third section of + `Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour \ + `_. + + :class:`~chainer.optimizers.MomentumSGD` implements equation (10) of the + paper. This optimizer implements equation (9), which takes momentum + correction into account. + + First, we set :math:`v_{t} = \\eta_{t} u_t`. + We substitute this relation to the equation (10) with momentum correction. + + .. math:: + + v_{t+1} &= m\\frac{\\eta_{t+1}}{\\eta_{t}}v_t + \\eta_{t+1}g_t \\\\ + &= m\\frac{\\eta_{t+1}}{\\eta_{t}}\\eta_{t}u_t + + \\eta_{t+1}g_t \\\\ + &= \\eta_{t+1}(m u_t + g_t) \\\\ + + From this result, we derive :math:`u_{t+1} = m u_t + g_t`, which is how + update tensors are calculated by + :class:`~chainer.optimizers.CorrectedMomentumSGD`. Thus, the equivalence + is shown. + Args: lr (float): Learning rate. momentum (float): Exponential decay rate of the first order moment. diff --git a/docs/source/reference/chainer_experimental.rst b/docs/source/reference/chainer_experimental.rst index 978598b63d..8f2d7f2a7c 100644 --- a/docs/source/reference/chainer_experimental.rst +++ b/docs/source/reference/chainer_experimental.rst @@ -13,3 +13,4 @@ Sliceable .. toctree:: chainer_experimental/sliceable + chainer_experimental/optimizers diff --git a/docs/source/reference/chainer_experimental/optimizers.rst b/docs/source/reference/chainer_experimental/optimizers.rst new file mode 100644 index 0000000000..2c20345af4 --- /dev/null +++ b/docs/source/reference/chainer_experimental/optimizers.rst @@ -0,0 +1,10 @@ +Optimizers +========== + +.. module:: chainercv.chainer_experimental.optimizers + + +CorrectedMomentumSGD +-------------------- + +.. autoclass:: CorrectedMomentumSGD From 43b548b4d15763ce828b6f8ca6fde902978aac19 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 1 Jun 2018 17:47:32 +0900 Subject: [PATCH 16/57] fix cpu mode of CorrectedMomentumSGD --- .../chainer_experimental/optimizers/corrected_momentum_sgd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chainercv/chainer_experimental/optimizers/corrected_momentum_sgd.py b/chainercv/chainer_experimental/optimizers/corrected_momentum_sgd.py index 4d971bc3b7..1029c8b1d0 100644 --- a/chainercv/chainer_experimental/optimizers/corrected_momentum_sgd.py +++ b/chainercv/chainer_experimental/optimizers/corrected_momentum_sgd.py @@ -41,8 +41,8 @@ def update_core_cpu(self, param): return v = self.state['v'] v *= self.hyperparam.momentum - v -= self.hyperparam.lr * grad - param.data += v + v -= grad + param.data += self.hyperparam.lr * v def update_core_gpu(self, param): grad = param.grad From a263992a8595227f4cb94a6cfe4c89c7d8854db9 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 20 Jun 2018 18:57:53 +0900 Subject: [PATCH 17/57] do not use lambdas --- .../ade20k_semantic_segmentation_dataset.py | 12 +++++++++--- .../ade20k/ade20k_test_image_dataset.py | 7 +++++-- ...cityscapes_semantic_segmentation_dataset.py | 5 ++++- .../cityscapes_test_image_dataset.py | 5 ++++- chainercv/datasets/cub/cub_label_dataset.py | 5 ++++- .../directory_parsing_label_dataset.py | 11 ++++++++--- .../online_products/online_products_dataset.py | 18 ++++++++++++------ 7 files changed, 46 insertions(+), 17 deletions(-) diff --git a/chainercv/datasets/ade20k/ade20k_semantic_segmentation_dataset.py b/chainercv/datasets/ade20k/ade20k_semantic_segmentation_dataset.py index c4e42537a4..5010f1c3f4 100644 --- a/chainercv/datasets/ade20k/ade20k_semantic_segmentation_dataset.py +++ b/chainercv/datasets/ade20k/ade20k_semantic_segmentation_dataset.py @@ -62,9 +62,15 @@ def __init__(self, data_dir='auto', split='train'): self.img_paths = sorted(glob.glob(os.path.join(img_dir, '*.jpg'))) self.label_paths = sorted(glob.glob(os.path.join(label_dir, '*.png'))) - self.add_getter('img', lambda i: read_image(self.img_paths[i])) - self.add_getter('iabel', lambda i: read_image( - self.label_paths[i], dtype=np.int32, color=False)[0]) + self.add_getter('img', self._get_image) + self.add_getter('label', self._get_label) def __len__(self): return len(self.img_paths) + + def _get_image(self, i): + return read_image(self.img_paths[i]) + + def _get_label(self, i): + return read_image( + self.label_paths[i], dtype=np.int32, color=False)[0] diff --git a/chainercv/datasets/ade20k/ade20k_test_image_dataset.py b/chainercv/datasets/ade20k/ade20k_test_image_dataset.py index 79f9cea43c..848e67cedf 100644 --- a/chainercv/datasets/ade20k/ade20k_test_image_dataset.py +++ b/chainercv/datasets/ade20k/ade20k_test_image_dataset.py @@ -1,4 +1,4 @@ -import glob +mport glob import os from chainercv.chainer_experimental.datasets.sliceable import GetterDataset @@ -41,8 +41,11 @@ def __init__(self, data_dir='auto'): img_dir = os.path.join(data_dir, 'release_test', 'testing') self.img_paths = sorted(glob.glob(os.path.join(img_dir, '*.jpg'))) - self.add_getter('img', lambda i: read_image(self.img_paths[i])) + self.add_getter('img', self._get_image) self.keys = 'img' # do not return tuple def __len__(self): return len(self.img_paths) + + def _get_image(self, i): + return read_image(self.img_paths[i]) diff --git a/chainercv/datasets/cityscapes/cityscapes_semantic_segmentation_dataset.py b/chainercv/datasets/cityscapes/cityscapes_semantic_segmentation_dataset.py index 047397ed58..9399981c08 100644 --- a/chainercv/datasets/cityscapes/cityscapes_semantic_segmentation_dataset.py +++ b/chainercv/datasets/cityscapes/cityscapes_semantic_segmentation_dataset.py @@ -88,12 +88,15 @@ def __init__(self, data_dir='auto', label_resolution=None, split='train', img_path = os.path.join(img_dir, city_dname, img_path) self.img_paths.append(img_path) - self.add_getter('img', lambda i: read_image(self.img_paths[i])) + self.add_getter('img', self._get_image) self.add_getter('label', self._get_label) def __len__(self): return len(self.img_paths) + def _get_image(self, i): + return read_image(self.img_paths[i]) + def _get_label(self, i): label_orig = read_image( self.label_paths[i], dtype=np.int32, color=False)[0] diff --git a/chainercv/datasets/cityscapes/cityscapes_test_image_dataset.py b/chainercv/datasets/cityscapes/cityscapes_test_image_dataset.py index d2eeb6a669..4069d908ae 100644 --- a/chainercv/datasets/cityscapes/cityscapes_test_image_dataset.py +++ b/chainercv/datasets/cityscapes/cityscapes_test_image_dataset.py @@ -54,8 +54,11 @@ def __init__(self, data_dir='auto'): os.path.join(city_dname, '*_leftImg8bit.png'))): self.img_paths.append(img_path) - self.add_getter('img', lambda i: read_image(self.img_paths[i])) + self.add_getter('img', self._get_image) self.keys = 'img' # do not return tuple def __len__(self): return len(self.img_paths) + + def _get_image(self, i): + return read_image(self.img_paths[i]) diff --git a/chainercv/datasets/cub/cub_label_dataset.py b/chainercv/datasets/cub/cub_label_dataset.py index da3ad54c5f..33c782a7ea 100644 --- a/chainercv/datasets/cub/cub_label_dataset.py +++ b/chainercv/datasets/cub/cub_label_dataset.py @@ -54,7 +54,7 @@ def __init__(self, data_dir='auto', return_bb=False, d_label in open(image_class_labels_file)] self._labels = np.array(labels, dtype=np.int32) - self.add_getter('label', lambda i: self._labels[i]) + self.add_getter('label', self._get_label) keys = ('img', 'label') if return_bb: @@ -62,3 +62,6 @@ def __init__(self, data_dir='auto', return_bb=False, if return_prob_map: keys += ('prob_map',) self.keys = keys + + def _get_label(self, i): + return self._labels[i] diff --git a/chainercv/datasets/directory_parsing_label_dataset.py b/chainercv/datasets/directory_parsing_label_dataset.py index 61346c1a97..c11cd9e597 100644 --- a/chainercv/datasets/directory_parsing_label_dataset.py +++ b/chainercv/datasets/directory_parsing_label_dataset.py @@ -138,9 +138,14 @@ def __init__(self, root, check_img_file=None, color=True, self.img_paths, self.labels = _parse_label_dataset( root, label_names, check_img_file) - self.add_getter('img', lambda i: - read_image(self.img_paths[i], color=self.color)) - self.add_getter('label', lambda i: self.labels[i]) + self.add_getter('img', self._get_image) + self.add_getter('label', self._get_label) def __len__(self): return len(self.img_paths) + + def _get_image(self, i): + return read_image(self.img_paths[i], color=self.color) + + def _get_label(self, i): + return self.labels[i] diff --git a/chainercv/datasets/online_products/online_products_dataset.py b/chainercv/datasets/online_products/online_products_dataset.py index 77429aaac8..feeca2979b 100644 --- a/chainercv/datasets/online_products/online_products_dataset.py +++ b/chainercv/datasets/online_products/online_products_dataset.py @@ -90,12 +90,18 @@ def __init__(self, data_dir='auto', split='train'): self.super_class_ids += [int(id_[2]) - 1 for id_ in ids_tmp] self.paths += [os.path.join(data_dir, id_[3]) for id_ in ids_tmp] - self.add_getter('img', lambda i: - utils.read_image(self.paths[i], color=True)) - self.add_getter('label', lambda i: - np.array(self.class_ids[i], np.int32)) - self.add_getter('super_label', lambda i: - np.array(self.super_class_ids[i], np.int32)) + self.add_getter('img', self._get_label) + self.add_getter('label', self._get_label) + self.add_getter('super_label', self._get_super_label) def __len__(self): return len(self.paths) + + def _get_image(self, i): + return utils.read_image(self.paths[i], color=True) + + def _get_label(self, i): + return np.array(self.class_ids[i], np.int32) + + def _get_super_label(self, i): + return np.array(self.super_class_ids[i], np.int32) From a4e14f039800137b8156c1513471ba52599c2ed3 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 20 Jun 2018 19:01:26 +0900 Subject: [PATCH 18/57] fix --- chainercv/datasets/ade20k/ade20k_test_image_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chainercv/datasets/ade20k/ade20k_test_image_dataset.py b/chainercv/datasets/ade20k/ade20k_test_image_dataset.py index 848e67cedf..411c053100 100644 --- a/chainercv/datasets/ade20k/ade20k_test_image_dataset.py +++ b/chainercv/datasets/ade20k/ade20k_test_image_dataset.py @@ -1,4 +1,4 @@ -mport glob +import glob import os from chainercv.chainer_experimental.datasets.sliceable import GetterDataset From 898dbbbcba52556af878ae9ad7016ac6ed29b791 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 20 Jun 2018 19:30:06 +0900 Subject: [PATCH 19/57] fix --- examples/classification/train_imagenet_mn.py | 21 +++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/examples/classification/train_imagenet_mn.py b/examples/classification/train_imagenet_mn.py index 6ae388bfdb..a2e89167e0 100644 --- a/examples/classification/train_imagenet_mn.py +++ b/examples/classification/train_imagenet_mn.py @@ -35,7 +35,6 @@ def __init__(self, mean): self.mean = mean def __call__(self, in_data): - # https://github.com/facebook/fb.resnet.torch/blob/master/datasets/imagenet.lua#L80 img, label = in_data _, H, W = img.shape img = random_sized_crop(img) @@ -61,18 +60,18 @@ def __call__(self, in_data): def main(): archs = { 'resnet50': {'class': ResNet50, 'score_layer_name': 'fc6', - 'kwargs': {'fb_resnet': True}}, + 'kwargs': {'arch': 'fb'}}, 'resnet101': {'class': ResNet101, 'score_layer_name': 'fc6', - 'kwargs': {'fb_resnet': True}}, + 'kwargs': {'arch': 'fb'}}, 'resnet152': {'class': ResNet152, 'score_layer_name': 'fc6', - 'kwargs': {'fb_resnet': True}} + 'kwargs': {'arch': 'fb'}} } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to root of the train dataset') parser.add_argument('val', help='Path to root of the validation dataset') parser.add_argument('--arch', - '-a', choices=archs.keys(), default='resnet18', + '-a', choices=archs.keys(), default='resnet50', help='Convnet architecture') parser.add_argument('--communicator', type=str, default='hierarchical', help='Type of communicator') @@ -94,6 +93,9 @@ def main(): lr = args.lr else: lr = 0.1 * (args.batchsize * comm.size) / 256 + if comm.rank == 0: + print('lr={}: lr is selected based on linear scaling rule'.format( + lr)) label_names = directory_parsing_label_names(args.train) @@ -114,10 +116,12 @@ def main(): train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True) val_data = chainermn.scatter_dataset(val_data, comm, shuffle=True) train_iter = chainer.iterators.MultiprocessIterator( - train_data, args.batchsize, shared_mem=3 * 224 * 224 * 4) + train_data, args.batchsize, shared_mem=3 * 224 * 224 * 4, + n_processes=args.loaderjob) val_iter = iterators.MultiprocessIterator( val_data, args.batchsize, - repeat=False, shuffle=False, shared_mem=3 * 224 * 224 * 4) + repeat=False, shuffle=False, shared_mem=3 * 224 * 224 * 4, + n_processes=args.loaderjob) optimizer = chainermn.create_multi_node_optimizer( CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm) @@ -129,7 +133,10 @@ def main(): if device >= 0: chainer.cuda.get_device(device).use() model.to_gpu() + + # Configure GPU setting chainer.cuda.set_max_workspace_size(1 * 1024 * 1024 * 1024) + chainer.using_config('autotune', True) updater = chainer.training.StandardUpdater( train_iter, optimizer, device=device) From 93c410f65687a0eabe8ac5eb480324a893db6be5 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 20 Jun 2018 19:32:30 +0900 Subject: [PATCH 20/57] style --- examples/classification/train_imagenet_mn.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/classification/train_imagenet_mn.py b/examples/classification/train_imagenet_mn.py index a2e89167e0..d375a11221 100644 --- a/examples/classification/train_imagenet_mn.py +++ b/examples/classification/train_imagenet_mn.py @@ -126,9 +126,10 @@ def main(): optimizer = chainermn.create_multi_node_optimizer( CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm) optimizer.setup(model) - for param in model.params(): - if param.name != 'beta' and param.name != 'gamma': - param.update_rule.add_hook(WeightDecay(args.weight_decay)) + for l in model.links(): + if not isinstance(l, chainer.links.BatchNormalization): + for param in l.params(): + param.update_rule.add_hook(WeightDecay(args.weight_decay)) if device >= 0: chainer.cuda.get_device(device).use() From 77cd4cb0056f236193decda5cbdcdab069b3736d Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 20 Jun 2018 19:32:41 +0900 Subject: [PATCH 21/57] remove redundant improt --- examples/classification/train_imagenet_mn.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/classification/train_imagenet_mn.py b/examples/classification/train_imagenet_mn.py index d375a11221..2f7cd1bc1a 100644 --- a/examples/classification/train_imagenet_mn.py +++ b/examples/classification/train_imagenet_mn.py @@ -1,6 +1,4 @@ from __future__ import division -import matplotlib -matplotlib.use('agg') import argparse import chainer From aa8b71363dee5c6be7bd187431e828ff50215330 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 20 Jun 2018 19:33:50 +0900 Subject: [PATCH 22/57] grammar --- examples/classification/train_imagenet_mn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/classification/train_imagenet_mn.py b/examples/classification/train_imagenet_mn.py index 2f7cd1bc1a..de8905f3bf 100644 --- a/examples/classification/train_imagenet_mn.py +++ b/examples/classification/train_imagenet_mn.py @@ -92,8 +92,8 @@ def main(): else: lr = 0.1 * (args.batchsize * comm.size) / 256 if comm.rank == 0: - print('lr={}: lr is selected based on linear scaling rule'.format( - lr)) + print('lr={}: lr is selected based on the linear ' + 'scaling rule'.format(lr)) label_names = directory_parsing_label_names(args.train) From e556613ea761e7284d78144f7a2c3addd55dac80 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 20 Jun 2018 19:41:42 +0900 Subject: [PATCH 23/57] update README --- examples/classification/README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/examples/classification/README.md b/examples/classification/README.md index fda70b8ad2..e0be7e56e1 100644 --- a/examples/classification/README.md +++ b/examples/classification/README.md @@ -32,10 +32,15 @@ $ python eval_imagenet.py [--model vgg16|resnet50|resnet10 ## Training Models Training with multiple GPUs. Please install ChainerMN to use this feature. +Please consult the full list of arguments with `python train_imagenet_mn.py -h`. ``` $ mpiexec -n N python train_imagenet_mn.py ``` +##### Performance tip +When training over multiple nodes, set the communicator to `pure_nccl` (requires NCCL2). +The default communicator (`hierarchical`) uses MPI to communicate between nodes, which is slower than the pure NCCL communicator. + ## How to prepare ImageNet Dataset From 8bb1135b54281f905fadb02fb9c375bda83beec7 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 20 Jun 2018 19:41:50 +0900 Subject: [PATCH 24/57] delete unnecessary cmd option --- examples/classification/train_imagenet_mn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/classification/train_imagenet_mn.py b/examples/classification/train_imagenet_mn.py index de8905f3bf..a1a925c8c7 100644 --- a/examples/classification/train_imagenet_mn.py +++ b/examples/classification/train_imagenet_mn.py @@ -73,7 +73,6 @@ def main(): help='Convnet architecture') parser.add_argument('--communicator', type=str, default='hierarchical', help='Type of communicator') - parser.add_argument('--pretrained_model') parser.add_argument('--loaderjob', type=int, default=4) parser.add_argument('--batchsize', type=int, default=32, help='Batch size for each worker') From 5d40516e626b6f0b2e4cbf380f6e5f2d715b259f Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 20 Jun 2018 19:44:33 +0900 Subject: [PATCH 25/57] use original style --- examples/classification/train_imagenet_mn.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/classification/train_imagenet_mn.py b/examples/classification/train_imagenet_mn.py index a1a925c8c7..345824cdaf 100644 --- a/examples/classification/train_imagenet_mn.py +++ b/examples/classification/train_imagenet_mn.py @@ -123,10 +123,9 @@ def main(): optimizer = chainermn.create_multi_node_optimizer( CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm) optimizer.setup(model) - for l in model.links(): - if not isinstance(l, chainer.links.BatchNormalization): - for param in l.params(): - param.update_rule.add_hook(WeightDecay(args.weight_decay)) + for param in model.params(): + if param.name != 'beta' and param.name != 'gamma': + param.update_rule.add_hook(WeightDecay(args.weight_decay)) if device >= 0: chainer.cuda.get_device(device).use() From 6de2a176bfa57cb6172f4fb2cc9f5540b1467da8 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 20 Jun 2018 19:45:46 +0900 Subject: [PATCH 26/57] simplify --- examples/classification/train_imagenet_mn.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/classification/train_imagenet_mn.py b/examples/classification/train_imagenet_mn.py index 345824cdaf..ba87f969cb 100644 --- a/examples/classification/train_imagenet_mn.py +++ b/examples/classification/train_imagenet_mn.py @@ -182,8 +182,6 @@ def main(): trigger=plot_interval ) - trainer.extend(extensions.dump_graph('main/loss')) - trainer.run() From 42ae6f911653b96a79aa21eda9d0ec1ca299a397 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 28 Jun 2018 12:55:11 +0900 Subject: [PATCH 27/57] initialize the last BN of each BuildingBlock in 1 hour style --- examples/classification/train_imagenet_mn.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/examples/classification/train_imagenet_mn.py b/examples/classification/train_imagenet_mn.py index ba87f969cb..1483ad562e 100644 --- a/examples/classification/train_imagenet_mn.py +++ b/examples/classification/train_imagenet_mn.py @@ -4,6 +4,7 @@ import chainer from chainer.datasets import TransformDataset from chainer import iterators +from chainer.links import BatchNormalization from chainer.links import Classifier from chainer.optimizer import WeightDecay from chainer import training @@ -20,6 +21,7 @@ from chainercv.datasets import directory_parsing_label_names from chainercv.chainer_experimental.optimizers import CorrectedMomentumSGD +from chainercv.links.model.resnet import Bottleneck from chainercv.links import ResNet101 from chainercv.links import ResNet152 from chainercv.links import ResNet50 @@ -100,6 +102,11 @@ def main(): extractor = arch['class'](n_class=len(label_names), **arch['kwargs']) extractor.pick = arch['score_layer_name'] model = Classifier(extractor) + # Following https://arxiv.org/pdf/1706.02677.pdf, + # the gamma of the last BN of each resblock is initialized by zeros. + for l in model.links(): + if isinstance(l, Bottleneck): + l.conv3.bn.gamma.data[:] = 0 if comm.rank == 0: train_data = DirectoryParsingLabelDataset(args.train) @@ -123,9 +130,10 @@ def main(): optimizer = chainermn.create_multi_node_optimizer( CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm) optimizer.setup(model) - for param in model.params(): - if param.name != 'beta' and param.name != 'gamma': - param.update_rule.add_hook(WeightDecay(args.weight_decay)) + for l in model.links(): + if not isinstance(l, BatchNormalization): + for param in l.params(): + param.update_rule.add_hook(WeightDecay(args.weight_decay)) if device >= 0: chainer.cuda.get_device(device).use() From 399a140d8647f7322c233c782ca073ee5a8209c9 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 28 Jun 2018 13:08:13 +0900 Subject: [PATCH 28/57] fix script --- examples/classification/train_imagenet_mn.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/classification/train_imagenet_mn.py b/examples/classification/train_imagenet_mn.py index 1483ad562e..283808540a 100644 --- a/examples/classification/train_imagenet_mn.py +++ b/examples/classification/train_imagenet_mn.py @@ -130,10 +130,9 @@ def main(): optimizer = chainermn.create_multi_node_optimizer( CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm) optimizer.setup(model) - for l in model.links(): - if not isinstance(l, BatchNormalization): - for param in l.params(): - param.update_rule.add_hook(WeightDecay(args.weight_decay)) + for param in model.params(): + if param.name not in ('beta', 'gamma'): + param.update_rule.add_hook(WeightDecay(args.weight_decay)) if device >= 0: chainer.cuda.get_device(device).use() From 02b4f307462c8e85a94c761f2518a3ffffa17dd2 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 28 Jun 2018 17:25:24 +0900 Subject: [PATCH 29/57] flake8 --- examples/classification/train_imagenet_mn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/classification/train_imagenet_mn.py b/examples/classification/train_imagenet_mn.py index 283808540a..26134e2319 100644 --- a/examples/classification/train_imagenet_mn.py +++ b/examples/classification/train_imagenet_mn.py @@ -4,7 +4,6 @@ import chainer from chainer.datasets import TransformDataset from chainer import iterators -from chainer.links import BatchNormalization from chainer.links import Classifier from chainer.optimizer import WeightDecay from chainer import training From c292e091ed6dd708c009af0c3bd989cbc25a1868 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Tue, 16 Oct 2018 07:36:01 +0900 Subject: [PATCH 30/57] add warmpup --- .../{train_imagenet_mn.py => train_imagenet_multi.py} | 6 ++++++ 1 file changed, 6 insertions(+) rename examples/classification/{train_imagenet_mn.py => train_imagenet_multi.py} (95%) diff --git a/examples/classification/train_imagenet_mn.py b/examples/classification/train_imagenet_multi.py similarity index 95% rename from examples/classification/train_imagenet_mn.py rename to examples/classification/train_imagenet_multi.py index 26134e2319..bc7a7156e5 100644 --- a/examples/classification/train_imagenet_mn.py +++ b/examples/classification/train_imagenet_multi.py @@ -146,6 +146,12 @@ def main(): trainer = training.Trainer( updater, (args.epoch, 'epoch'), out=args.out) + warmup_iter = 5 * len(train_data) // args.batchsize # 5 epochs + trainer.extend( + extensions.LinearShift( + 'lr', value_range=(0, lr), time_range=(0, warmup_iter)), + trigger=chainer.training.triggers.ManualScheduleTrigger( + list(range(warmup_iter + 1)), 'epoch')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=chainer.training.triggers.ManualScheduleTrigger( [30, 60, 80], 'epoch')) From 6c6042ef9f6103eaa5b65a7599510b97a4ad8d45 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Tue, 16 Oct 2018 08:16:03 +0900 Subject: [PATCH 31/57] warmup initial lr changed --- examples/classification/train_imagenet_multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/classification/train_imagenet_multi.py b/examples/classification/train_imagenet_multi.py index bc7a7156e5..df787d58c3 100644 --- a/examples/classification/train_imagenet_multi.py +++ b/examples/classification/train_imagenet_multi.py @@ -149,7 +149,7 @@ def main(): warmup_iter = 5 * len(train_data) // args.batchsize # 5 epochs trainer.extend( extensions.LinearShift( - 'lr', value_range=(0, lr), time_range=(0, warmup_iter)), + 'lr', value_range=(lr / comm.size, lr), time_range=(0, warmup_iter)), trigger=chainer.training.triggers.ManualScheduleTrigger( list(range(warmup_iter + 1)), 'epoch')) trainer.extend(extensions.ExponentialShift('lr', 0.1), From aacd11fd8d19aea54762cc750b17dd3e6459ffae Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 17 Oct 2018 08:40:50 +0900 Subject: [PATCH 32/57] update warmup --- examples/classification/train_imagenet_multi.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/classification/train_imagenet_multi.py b/examples/classification/train_imagenet_multi.py index df787d58c3..b7de7fba1c 100644 --- a/examples/classification/train_imagenet_multi.py +++ b/examples/classification/train_imagenet_multi.py @@ -147,11 +147,13 @@ def main(): trainer = training.Trainer( updater, (args.epoch, 'epoch'), out=args.out) warmup_iter = 5 * len(train_data) // args.batchsize # 5 epochs + warmup_mult = min((8 / comm.size, 1)) trainer.extend( extensions.LinearShift( - 'lr', value_range=(lr / comm.size, lr), time_range=(0, warmup_iter)), + 'lr', value_range=(lr * warmup_mult, lr), + time_range=(0, warmup_iter)), trigger=chainer.training.triggers.ManualScheduleTrigger( - list(range(warmup_iter + 1)), 'epoch')) + list(range(warmup_iter + 1)), 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=chainer.training.triggers.ManualScheduleTrigger( [30, 60, 80], 'epoch')) From bc2b34c7cc0e592c7d6915c32f962bd690bd8da6 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 18 Oct 2018 18:27:29 +0900 Subject: [PATCH 33/57] try to fix segfault --- .../classification/train_imagenet_multi.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/examples/classification/train_imagenet_multi.py b/examples/classification/train_imagenet_multi.py index b7de7fba1c..730a7e673b 100644 --- a/examples/classification/train_imagenet_multi.py +++ b/examples/classification/train_imagenet_multi.py @@ -27,6 +27,9 @@ import chainermn +import cv2 +cv2.setNumThreads(2) + class TrainTransform(object): @@ -56,6 +59,9 @@ def __call__(self, in_data): return img, label +import multiprocessing + + def main(): archs = { 'resnet50': {'class': ResNet50, 'score_layer_name': 'fc6', @@ -84,6 +90,19 @@ def main(): parser.add_argument('--epoch', type=int, default=90) args = parser.parse_args() + # We need to change the start method of multiprocessing module if we are + # using InfiniBand and MultiprocessIterator. This is because processes + # often crash when calling fork if they are using Infiniband. + # (c.f., https://www.open-mpi.org/faq/?category=tuning#fork-warning ) + # Also, just setting the start method does not seem to be sufficient + # to actually launch the forkserver, so also start a dummy process. + # This must be done *before* calling `chainermn.create_communicator`!!! + multiprocessing.set_start_method('forkserver') + # TODO make this silent + p = multiprocessing.Process(target=print, args=('Initialize forkserver',)) + p.start() + p.join() + comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank From 93fbd3657e4f273853228d67630da3ab12905b00 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 19 Oct 2018 18:58:19 +0900 Subject: [PATCH 34/57] fix --- examples/classification/train_imagenet_multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/classification/train_imagenet_multi.py b/examples/classification/train_imagenet_multi.py index 730a7e673b..ec7b0ab3e8 100644 --- a/examples/classification/train_imagenet_multi.py +++ b/examples/classification/train_imagenet_multi.py @@ -173,7 +173,7 @@ def main(): time_range=(0, warmup_iter)), trigger=chainer.training.triggers.ManualScheduleTrigger( list(range(warmup_iter + 1)), 'iteration')) - trainer.extend(extensions.ExponentialShift('lr', 0.1), + trainer.extend(extensions.ExponentialShift('lr', 0.1, init=lr), trigger=chainer.training.triggers.ManualScheduleTrigger( [30, 60, 80], 'epoch')) evaluator = chainermn.create_multi_node_evaluator( From 86e9deddc9806b690337eca0e2a83c18eb219798 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Fri, 19 Oct 2018 19:02:32 +0900 Subject: [PATCH 35/57] delete cv2 setNumThreads --- examples/classification/train_imagenet_multi.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/examples/classification/train_imagenet_multi.py b/examples/classification/train_imagenet_multi.py index ec7b0ab3e8..5bb2e594ad 100644 --- a/examples/classification/train_imagenet_multi.py +++ b/examples/classification/train_imagenet_multi.py @@ -1,5 +1,6 @@ from __future__ import division import argparse +import multiprocessing import chainer from chainer.datasets import TransformDataset @@ -27,9 +28,6 @@ import chainermn -import cv2 -cv2.setNumThreads(2) - class TrainTransform(object): @@ -59,9 +57,6 @@ def __call__(self, in_data): return img, label -import multiprocessing - - def main(): archs = { 'resnet50': {'class': ResNet50, 'score_layer_name': 'fc6', From 078bb4ce2e3be253063e87ebd48dd706c57bb386 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sat, 20 Oct 2018 14:34:24 +0900 Subject: [PATCH 36/57] delete CorrectedMomentumSGD --- chainercv/chainer_experimental/__init__.py | 1 - .../optimizers/__init__.py | 1 - .../optimizers/corrected_momentum_sgd.py | 104 ------------------ .../source/reference/chainer_experimental.rst | 1 - .../chainer_experimental/optimizers.rst | 10 -- examples/classification/README.md | 4 +- .../classification/train_imagenet_multi.py | 14 +-- 7 files changed, 6 insertions(+), 129 deletions(-) delete mode 100644 chainercv/chainer_experimental/optimizers/__init__.py delete mode 100644 chainercv/chainer_experimental/optimizers/corrected_momentum_sgd.py delete mode 100644 docs/source/reference/chainer_experimental/optimizers.rst diff --git a/chainercv/chainer_experimental/__init__.py b/chainercv/chainer_experimental/__init__.py index f0227014bd..b91a45fb64 100644 --- a/chainercv/chainer_experimental/__init__.py +++ b/chainercv/chainer_experimental/__init__.py @@ -1,2 +1 @@ from chainercv.chainer_experimental import datasets # NOQA -from chainercv.chainer_experimental import optimizers # NOQA diff --git a/chainercv/chainer_experimental/optimizers/__init__.py b/chainercv/chainer_experimental/optimizers/__init__.py deleted file mode 100644 index 0ce7d43a78..0000000000 --- a/chainercv/chainer_experimental/optimizers/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from chainercv.chainer_experimental.optimizers.corrected_momentum_sgd import CorrectedMomentumSGD # NOQA diff --git a/chainercv/chainer_experimental/optimizers/corrected_momentum_sgd.py b/chainercv/chainer_experimental/optimizers/corrected_momentum_sgd.py deleted file mode 100644 index 1029c8b1d0..0000000000 --- a/chainercv/chainer_experimental/optimizers/corrected_momentum_sgd.py +++ /dev/null @@ -1,104 +0,0 @@ -from chainer import cuda -from chainer import optimizer - - -_default_hyperparam = optimizer.Hyperparameter() -_default_hyperparam.lr = 0.01 -_default_hyperparam.momentum = 0.9 - - -class CorrectedMomentumSGDRule(optimizer.UpdateRule): - - """Update rule for the corrected momentum SGD. - - See :class:`~chainer.optimizers.CorrectedMomentumSGD` for the default - values of the hyperparameters. - - Args: - parent_hyperparam (~chainer.optimizer.Hyperparameter): Hyperparameter - that provides the default values. - lr (float): Learning rate. - momentum (float): Exponential decay rate of the first order moment. - - """ - - def __init__(self, parent_hyperparam=None, lr=None, momentum=None): - super(CorrectedMomentumSGDRule, self).__init__( - parent_hyperparam or _default_hyperparam) - if lr is not None: - self.hyperparam.lr = lr - if momentum is not None: - self.hyperparam.momentum = momentum - - def init_state(self, param): - xp = cuda.get_array_module(param.data) - with cuda.get_device_from_array(param.data): - self.state['v'] = xp.zeros_like(param.data) - - def update_core_cpu(self, param): - grad = param.grad - if grad is None: - return - v = self.state['v'] - v *= self.hyperparam.momentum - v -= grad - param.data += self.hyperparam.lr * v - - def update_core_gpu(self, param): - grad = param.grad - if grad is None: - return - cuda.elementwise( - 'T grad, T lr, T momentum', - 'T param, T v', - '''v = momentum * v - grad; - param += lr * v;''', - 'momentum_sgd')( - grad, self.hyperparam.lr, self.hyperparam.momentum, - param.data, self.state['v']) - - -class CorrectedMomentumSGD(optimizer.GradientMethod): - - """Momentum SGD optimizer. - - This implements momentum correction discussed in the third section of - `Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour \ - `_. - - :class:`~chainer.optimizers.MomentumSGD` implements equation (10) of the - paper. This optimizer implements equation (9), which takes momentum - correction into account. - - First, we set :math:`v_{t} = \\eta_{t} u_t`. - We substitute this relation to the equation (10) with momentum correction. - - .. math:: - - v_{t+1} &= m\\frac{\\eta_{t+1}}{\\eta_{t}}v_t + \\eta_{t+1}g_t \\\\ - &= m\\frac{\\eta_{t+1}}{\\eta_{t}}\\eta_{t}u_t + - \\eta_{t+1}g_t \\\\ - &= \\eta_{t+1}(m u_t + g_t) \\\\ - - From this result, we derive :math:`u_{t+1} = m u_t + g_t`, which is how - update tensors are calculated by - :class:`~chainer.optimizers.CorrectedMomentumSGD`. Thus, the equivalence - is shown. - - Args: - lr (float): Learning rate. - momentum (float): Exponential decay rate of the first order moment. - - """ - - def __init__(self, lr=_default_hyperparam.lr, - momentum=_default_hyperparam.momentum): - super(CorrectedMomentumSGD, self).__init__() - self.hyperparam.lr = lr - self.hyperparam.momentum = momentum - - lr = optimizer.HyperparameterProxy('lr') - momentum = optimizer.HyperparameterProxy('momentum') - - def create_update_rule(self): - return CorrectedMomentumSGDRule(self.hyperparam) diff --git a/docs/source/reference/chainer_experimental.rst b/docs/source/reference/chainer_experimental.rst index 8f2d7f2a7c..978598b63d 100644 --- a/docs/source/reference/chainer_experimental.rst +++ b/docs/source/reference/chainer_experimental.rst @@ -13,4 +13,3 @@ Sliceable .. toctree:: chainer_experimental/sliceable - chainer_experimental/optimizers diff --git a/docs/source/reference/chainer_experimental/optimizers.rst b/docs/source/reference/chainer_experimental/optimizers.rst deleted file mode 100644 index 2c20345af4..0000000000 --- a/docs/source/reference/chainer_experimental/optimizers.rst +++ /dev/null @@ -1,10 +0,0 @@ -Optimizers -========== - -.. module:: chainercv.chainer_experimental.optimizers - - -CorrectedMomentumSGD --------------------- - -.. autoclass:: CorrectedMomentumSGD diff --git a/examples/classification/README.md b/examples/classification/README.md index 4b9b052487..87f7283568 100644 --- a/examples/classification/README.md +++ b/examples/classification/README.md @@ -42,9 +42,9 @@ $ python eval_imagenet.py [--model vgg16|resnet50|resnet10 ## Training Models Training with multiple GPUs. Please install ChainerMN to use this feature. -Please consult the full list of arguments with `python train_imagenet_mn.py -h`. +Please consult the full list of arguments with `python train_imagenet_multi.py -h`. ``` -$ mpiexec -n N python train_imagenet_mn.py +$ mpiexec -n N python train_imagenet_multi.py ``` ##### Performance tip diff --git a/examples/classification/train_imagenet_multi.py b/examples/classification/train_imagenet_multi.py index 5bb2e594ad..9e52cee483 100644 --- a/examples/classification/train_imagenet_multi.py +++ b/examples/classification/train_imagenet_multi.py @@ -7,6 +7,7 @@ from chainer import iterators from chainer.links import Classifier from chainer.optimizer import WeightDecay +from chainer.optimizers import CorrectedMomentumSGD from chainer import training from chainer.training import extensions @@ -20,7 +21,6 @@ from chainercv.datasets import directory_parsing_label_names -from chainercv.chainer_experimental.optimizers import CorrectedMomentumSGD from chainercv.links.model.resnet import Bottleneck from chainercv.links import ResNet101 from chainercv.links import ResNet152 @@ -85,16 +85,10 @@ def main(): parser.add_argument('--epoch', type=int, default=90) args = parser.parse_args() - # We need to change the start method of multiprocessing module if we are - # using InfiniBand and MultiprocessIterator. This is because processes - # often crash when calling fork if they are using Infiniband. - # (c.f., https://www.open-mpi.org/faq/?category=tuning#fork-warning ) - # Also, just setting the start method does not seem to be sufficient - # to actually launch the forkserver, so also start a dummy process. - # This must be done *before* calling `chainermn.create_communicator`!!! + # This fixes a crash caused by a bug with multiprocessing and MPI. multiprocessing.set_start_method('forkserver') - # TODO make this silent - p = multiprocessing.Process(target=print, args=('Initialize forkserver',)) + p = multiprocessing.Process( + target=print, args=('Initialize forkserver',)) # NOQA p.start() p.join() From f591eadda047088d735cced555707d1c4bb289bb Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sat, 20 Oct 2018 14:43:29 +0900 Subject: [PATCH 37/57] simplify --- examples/classification/train_imagenet_multi.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/classification/train_imagenet_multi.py b/examples/classification/train_imagenet_multi.py index 9e52cee483..7284c9051e 100644 --- a/examples/classification/train_imagenet_multi.py +++ b/examples/classification/train_imagenet_multi.py @@ -155,10 +155,9 @@ def main(): trainer = training.Trainer( updater, (args.epoch, 'epoch'), out=args.out) warmup_iter = 5 * len(train_data) // args.batchsize # 5 epochs - warmup_mult = min((8 / comm.size, 1)) trainer.extend( extensions.LinearShift( - 'lr', value_range=(lr * warmup_mult, lr), + 'lr', value_range=(min((0.1, lr)), lr), time_range=(0, warmup_iter)), trigger=chainer.training.triggers.ManualScheduleTrigger( list(range(warmup_iter + 1)), 'iteration')) From 6a9f56bc9c5294c7ce5c6ab5ebddb469548f478b Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sat, 20 Oct 2018 15:14:03 +0900 Subject: [PATCH 38/57] fix flake8 --- examples/classification/train_imagenet_multi.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/classification/train_imagenet_multi.py b/examples/classification/train_imagenet_multi.py index 7284c9051e..dbd90de65e 100644 --- a/examples/classification/train_imagenet_multi.py +++ b/examples/classification/train_imagenet_multi.py @@ -87,8 +87,7 @@ def main(): # This fixes a crash caused by a bug with multiprocessing and MPI. multiprocessing.set_start_method('forkserver') - p = multiprocessing.Process( - target=print, args=('Initialize forkserver',)) # NOQA + p = multiprocessing.Process() p.start() p.join() From a3d2e3e3016a96cb17efa0e1314058872d65177b Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Sat, 20 Oct 2018 18:51:52 +0900 Subject: [PATCH 39/57] update README --- examples/classification/README.md | 33 ++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/examples/classification/README.md b/examples/classification/README.md index 87f7283568..4872d9dc50 100644 --- a/examples/classification/README.md +++ b/examples/classification/README.md @@ -4,17 +4,17 @@ Single crop error rate. -| Model | Top 1 | Reference Top 1 | -|:-:|:-:|:-:| -| VGG16 | 29.0 % | 28.5 % [1] | -| ResNet50 | 24.8 % | 24.7 % [2] | -| ResNet101 | 23.6 % | 23.6 % [2] | -| ResNet152 | 23.2 % | 23.0 % [2] | -| SE-ResNet50 | 22.7 % | 22.4 % [3,4] | -| SE-ResNet101 | 21.8 % | 21.8 % [3,4] | -| SE-ResNet152 | 21.4 % | 21.3 % [3,4] | -| SE-ResNeXt50 | 20.9 % | 21.0 % [3,4] | -| SE-ResNeXt101 | 19.7 % | 19.8 % [3,4] | +| Model | Top 1 | Reference Top 1 | Top 1 of model trained with ChainerCV | +|:-:|:-:|:-:|:-:| +| VGG16 | 29.0 % | 28.5 % [1] | | +| ResNet50 | 24.8 % | 24.7 % [2] | 23.53 % | +| ResNet101 | 23.6 % | 23.6 % [2] | | +| ResNet152 | 23.2 % | 23.0 % [2] | | +| SE-ResNet50 | 22.7 % | 22.4 % [3,4] | | +| SE-ResNet101 | 21.8 % | 21.8 % [3,4] | | +| SE-ResNet152 | 21.4 % | 21.3 % [3,4] | | +| SE-ResNeXt50 | 20.9 % | 21.0 % [3,4] | | +| SE-ResNeXt101 | 19.7 % | 19.8 % [3,4] | | Ten crop error rate. @@ -32,7 +32,7 @@ Ten crop error rate. The results can be reproduced by the following command. -The score is reported using a weight converted from a weight trained by Caffe. +The score is reported using a weight converted from a weight trained by Caffe or a weight trained with `train_imagenet_multi.py`. These scores are obtained using OpenCV backend. If Pillow is used, scores would differ. ``` @@ -51,6 +51,15 @@ $ mpiexec -n N python train_imagenet_multi.py Date: Mon, 19 Nov 2018 09:55:27 +0900 Subject: [PATCH 40/57] update README --- examples/classification/README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/classification/README.md b/examples/classification/README.md index 4872d9dc50..7a281c9aee 100644 --- a/examples/classification/README.md +++ b/examples/classification/README.md @@ -8,8 +8,8 @@ Single crop error rate. |:-:|:-:|:-:|:-:| | VGG16 | 29.0 % | 28.5 % [1] | | | ResNet50 | 24.8 % | 24.7 % [2] | 23.53 % | -| ResNet101 | 23.6 % | 23.6 % [2] | | -| ResNet152 | 23.2 % | 23.0 % [2] | | +| ResNet101 | 23.6 % | 23.6 % [2] | 22.22 % | +| ResNet152 | 23.2 % | 23.0 % [2] | 21.60 % | | SE-ResNet50 | 22.7 % | 22.4 % [3,4] | | | SE-ResNet101 | 21.8 % | 21.8 % [3,4] | | | SE-ResNet152 | 21.4 % | 21.3 % [3,4] | | @@ -53,12 +53,13 @@ The default communicator (`hierarchical`) uses MPI to communicate between nodes, ##### Detailed training results -Scores are calculated from the statistics collected from five experiments with different random seeds. +Here, we investigate the effect of the number of GPUs on the final performance. +For more statistically reliable results, we obtained results from five different random seeds. | Model | # GPUs | Top 1 | |:-:|:-:|:-:| | ResNet50 | 8 | 23.53 (std=0.06) | -| ResNet50 | 32 | XXX (std=YYY) | +| ResNet50 | 32 | 23.56 (std=0.11) | ## How to prepare ImageNet Dataset From 6d3cca9b23ec731a64cf979b71e8e5de730669b3 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 19 Nov 2018 10:17:04 +0900 Subject: [PATCH 41/57] use make_shift --- .../classification/train_imagenet_multi.py | 35 ++++++++++++------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/examples/classification/train_imagenet_multi.py b/examples/classification/train_imagenet_multi.py index dbd90de65e..cd976ba829 100644 --- a/examples/classification/train_imagenet_multi.py +++ b/examples/classification/train_imagenet_multi.py @@ -11,15 +11,15 @@ from chainer import training from chainer.training import extensions +from chainercv.datasets import directory_parsing_label_names from chainercv.datasets import DirectoryParsingLabelDataset - from chainercv.transforms import center_crop from chainercv.transforms import random_flip from chainercv.transforms import random_sized_crop from chainercv.transforms import resize from chainercv.transforms import scale -from chainercv.datasets import directory_parsing_label_names +from chainercv.chainer_experimental.training.extensions import make_shift from chainercv.links.model.resnet import Bottleneck from chainercv.links import ResNet101 @@ -153,16 +153,27 @@ def main(): trainer = training.Trainer( updater, (args.epoch, 'epoch'), out=args.out) - warmup_iter = 5 * len(train_data) // args.batchsize # 5 epochs - trainer.extend( - extensions.LinearShift( - 'lr', value_range=(min((0.1, lr)), lr), - time_range=(0, warmup_iter)), - trigger=chainer.training.triggers.ManualScheduleTrigger( - list(range(warmup_iter + 1)), 'iteration')) - trainer.extend(extensions.ExponentialShift('lr', 0.1, init=lr), - trigger=chainer.training.triggers.ManualScheduleTrigger( - [30, 60, 80], 'epoch')) + + @make_shift('lr') + def warmup_and_exponential_shift(trainer): + epoch = trainer.updater.epoch_detail + warmup_epoch = 5 + if epoch < warmup_epoch: + if lr > 0.1: + warmup_rate = 0.1 / lr + rate = warmup_rate \ + + (1 - warmup_rate) * epoch / warmup_rate + elif epoch < 30: + rate = 1 + elif epoch < 60: + rate = 0.1 + elif epoch < 80: + rate = 0.01 + else: + rate = 0.001 + return rate * lr + + trainer.extend(warmup_and_exponential_shift) evaluator = chainermn.create_multi_node_evaluator( extensions.Evaluator(val_iter, model, device=device), comm) trainer.extend(evaluator, trigger=(1, 'epoch')) From cc4c8751f8b4b278a2b269fa581a36744e98c1ea Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 19 Nov 2018 20:45:50 +0900 Subject: [PATCH 42/57] fix bug --- examples/classification/train_imagenet_multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/classification/train_imagenet_multi.py b/examples/classification/train_imagenet_multi.py index cd976ba829..2f41a656d1 100644 --- a/examples/classification/train_imagenet_multi.py +++ b/examples/classification/train_imagenet_multi.py @@ -162,7 +162,7 @@ def warmup_and_exponential_shift(trainer): if lr > 0.1: warmup_rate = 0.1 / lr rate = warmup_rate \ - + (1 - warmup_rate) * epoch / warmup_rate + + (1 - warmup_rate) * epoch / warmup_epoch elif epoch < 30: rate = 1 elif epoch < 60: From fbd4ae4fc348080291f9cacfe8ef33715d673c2d Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Tue, 20 Nov 2018 08:22:37 +0900 Subject: [PATCH 43/57] change commandline argument: arch -> model --- examples/classification/train_imagenet_multi.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/examples/classification/train_imagenet_multi.py b/examples/classification/train_imagenet_multi.py index 2f41a656d1..f32945d233 100644 --- a/examples/classification/train_imagenet_multi.py +++ b/examples/classification/train_imagenet_multi.py @@ -58,7 +58,7 @@ def __call__(self, in_data): def main(): - archs = { + model_cfgs = { 'resnet50': {'class': ResNet50, 'score_layer_name': 'fc6', 'kwargs': {'arch': 'fb'}}, 'resnet101': {'class': ResNet101, 'score_layer_name': 'fc6', @@ -70,9 +70,9 @@ def main(): description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to root of the train dataset') parser.add_argument('val', help='Path to root of the validation dataset') - parser.add_argument('--arch', - '-a', choices=archs.keys(), default='resnet50', - help='Convnet architecture') + parser.add_argument('--model', + '-m', choices=model_cfgs.keys(), default='resnet50', + help='Convnet models') parser.add_argument('--communicator', type=str, default='hierarchical', help='Type of communicator') parser.add_argument('--loaderjob', type=int, default=4) @@ -104,9 +104,10 @@ def main(): label_names = directory_parsing_label_names(args.train) - arch = archs[args.arch] - extractor = arch['class'](n_class=len(label_names), **arch['kwargs']) - extractor.pick = arch['score_layer_name'] + model_cfg = model_cfgs[args.model] + extractor = model_cfg['class']( + n_class=len(label_names), **model_cfg['kwargs']) + extractor.pick = model_cfg['score_layer_name'] model = Classifier(extractor) # Following https://arxiv.org/pdf/1706.02677.pdf, # the gamma of the last BN of each resblock is initialized by zeros. From a3d85d006472afdb36237a9f607ab9170a0aaaff Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Tue, 20 Nov 2018 08:35:54 +0900 Subject: [PATCH 44/57] fix an error that is raised when gpu <= 8 --- examples/classification/train_imagenet_multi.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/classification/train_imagenet_multi.py b/examples/classification/train_imagenet_multi.py index f32945d233..22b5fb020c 100644 --- a/examples/classification/train_imagenet_multi.py +++ b/examples/classification/train_imagenet_multi.py @@ -164,6 +164,8 @@ def warmup_and_exponential_shift(trainer): warmup_rate = 0.1 / lr rate = warmup_rate \ + (1 - warmup_rate) * epoch / warmup_epoch + else: + rate = 1 elif epoch < 30: rate = 1 elif epoch < 60: From 315a64578061881f9f75ec6f3af893ea76e0e513 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 26 Nov 2018 11:25:56 +0900 Subject: [PATCH 45/57] update README --- examples/classification/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/classification/README.md b/examples/classification/README.md index 7a281c9aee..fa8007951f 100644 --- a/examples/classification/README.md +++ b/examples/classification/README.md @@ -7,9 +7,9 @@ Single crop error rate. | Model | Top 1 | Reference Top 1 | Top 1 of model trained with ChainerCV | |:-:|:-:|:-:|:-:| | VGG16 | 29.0 % | 28.5 % [1] | | -| ResNet50 | 24.8 % | 24.7 % [2] | 23.53 % | -| ResNet101 | 23.6 % | 23.6 % [2] | 22.22 % | -| ResNet152 | 23.2 % | 23.0 % [2] | 21.60 % | +| ResNet50 | 24.8 % | 24.7 % [2] | 23.51 % | +| ResNet101 | 23.6 % | 23.6 % [2] | 22.07 % | +| ResNet152 | 23.2 % | 23.0 % [2] | 21.67 % | | SE-ResNet50 | 22.7 % | 22.4 % [3,4] | | | SE-ResNet101 | 21.8 % | 21.8 % [3,4] | | | SE-ResNet152 | 21.4 % | 21.3 % [3,4] | | From 7c73f05a8752a1b42e638f75b375c7928f7771ec Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 26 Nov 2018 11:39:09 +0900 Subject: [PATCH 46/57] add url link --- chainercv/links/model/resnet/resnet.py | 38 +++++++++++++++++++------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/chainercv/links/model/resnet/resnet.py b/chainercv/links/model/resnet/resnet.py index f96c992219..dc8afdd5fe 100644 --- a/chainercv/links/model/resnet/resnet.py +++ b/chainercv/links/model/resnet/resnet.py @@ -55,10 +55,11 @@ class ResNet(PickableSequentialChain): loaded from weights distributed on the Internet. The list of pretrained models supported are as follows: - * :obj:`imagenet`: Loads weights trained with ImageNet and distributed \ + * :obj:`imagenet`: Loads weights trained with ImageNet. \ + When :obj:`arch=='he'`, the weights distributed \ at `Model Zoo \ - `_. - This is only supported when :obj:`arch=='he'`. + `_ \ + are used. Args: n_layer (int): The number of layers. @@ -103,9 +104,30 @@ class ResNet(PickableSequentialChain): _models = { 'fb': { - 50: {}, - 101: {}, - 152: {} + 50: { + 'imagenet': { + 'param': {'n_class': 1000, 'mean': _imagenet_mean}, + 'overwritable': {'mean'}, + 'url': 'https://chainercv-models.preferred.jp/' + 'resnet152_imagenet_trained_2018_11_26.npz' + }, + }, + 101: { + 'imagenet': { + 'param': {'n_class': 1000, 'mean': _imagenet_mean}, + 'overwritable': {'mean'}, + 'url': 'https://chainercv-models.preferred.jp/' + 'resnet101_imagenet_trained_2018_11_26.npz' + }, + }, + 152: { + 'imagenet': { + 'param': {'n_class': 1000, 'mean': _imagenet_mean}, + 'overwritable': {'mean'}, + 'url': 'https://chainercv-models.preferred.jp/' + 'resnet152_imagenet_trained_2018_11_26.npz' + }, + }, }, 'he': { 50: { @@ -140,10 +162,6 @@ def __init__(self, n_layer, pretrained_model=None, mean=None, initialW=None, fc_kwargs={}, arch='fb'): if arch == 'fb': - if pretrained_model == 'imagenet': - raise ValueError( - 'Pretrained weights for Facebook ResNet models ' - 'are not supported. Please set arch to \'he\'.') stride_first = False conv1_no_bias = True elif arch == 'he': From 783ba369ea70423530db05a8e7f17d0d953ac270 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 26 Nov 2018 11:39:40 +0900 Subject: [PATCH 47/57] change default arch to fb for eval --- examples/classification/eval_imagenet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/classification/eval_imagenet.py b/examples/classification/eval_imagenet.py index fe6327f7d0..ccbb8ffaf2 100644 --- a/examples/classification/eval_imagenet.py +++ b/examples/classification/eval_imagenet.py @@ -37,7 +37,7 @@ def main(): parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--crop', choices=('center', '10'), default='center') - parser.add_argument('--resnet-arch', default='he') + parser.add_argument('--resnet-arch', default='fb') args = parser.parse_args() dataset = DirectoryParsingLabelDataset(args.val) From fbcbe561711e0230223a0b9d89112a1f7f96a682 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 26 Nov 2018 11:47:36 +0900 Subject: [PATCH 48/57] update README --- examples/classification/README.md | 42 ++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/examples/classification/README.md b/examples/classification/README.md index fa8007951f..a3c56c9cd8 100644 --- a/examples/classification/README.md +++ b/examples/classification/README.md @@ -2,19 +2,21 @@ ## Performance +### Weight conversion + Single crop error rate. -| Model | Top 1 | Reference Top 1 | Top 1 of model trained with ChainerCV | -|:-:|:-:|:-:|:-:| -| VGG16 | 29.0 % | 28.5 % [1] | | -| ResNet50 | 24.8 % | 24.7 % [2] | 23.51 % | -| ResNet101 | 23.6 % | 23.6 % [2] | 22.07 % | -| ResNet152 | 23.2 % | 23.0 % [2] | 21.67 % | -| SE-ResNet50 | 22.7 % | 22.4 % [3,4] | | -| SE-ResNet101 | 21.8 % | 21.8 % [3,4] | | -| SE-ResNet152 | 21.4 % | 21.3 % [3,4] | | -| SE-ResNeXt50 | 20.9 % | 21.0 % [3,4] | | -| SE-ResNeXt101 | 19.7 % | 19.8 % [3,4] | | +| Model | Top 1 | Reference Top 1 | +|:-:|:-:|:-:| +| VGG16 | 29.0 % | 28.5 % [1] | +| ResNet50 | 24.8 % | 24.7 % [2] | +| ResNet101 | 23.6 % | 23.6 % [2] | +| ResNet152 | 23.2 % | 23.0 % [2] | +| SE-ResNet50 | 22.7 % | 22.4 % [3,4] | +| SE-ResNet101 | 21.8 % | 21.8 % [3,4] | +| SE-ResNet152 | 21.4 % | 21.3 % [3,4] | +| SE-ResNeXt50 | 20.9 % | 21.0 % [3,4] | +| SE-ResNeXt101 | 19.7 % | 19.8 % [3,4] | Ten crop error rate. @@ -39,14 +41,25 @@ These scores are obtained using OpenCV backend. If Pillow is used, scores would $ python eval_imagenet.py [--model vgg16|resnet50|resnet101|resnet152|se-resnet50|se-resnet101|se-resnet152] [--pretrained-model ] [--batchsize ] [--gpu ] [--crop center|10] ``` -## Training Models +### Trained model -Training with multiple GPUs. Please install ChainerMN to use this feature. -Please consult the full list of arguments with `python train_imagenet_multi.py -h`. +Single crop error rates of the models trained with the ChainerCV's training script. + +| Model | Top 1 | Reference Top 1 | +|:-:|:-:|:-:| +| ResNet50 | 23.51 % | 23.60% [5] | +| ResNet101 | 22.07 % | 22.08% [5] | +| ResNet152 | 21.67 % | | + + +The scores of the models trained with `train_imagenet_multi.py`, which can be executed like below. +Please consult the full list of arguments for the training script with `python train_imagenet_multi.py -h`. ``` $ mpiexec -n N python train_imagenet_multi.py ``` +The training procedure carefully follows the "ResNet in 1 hour" paper [5]. + ##### Performance tip When training over multiple nodes, set the communicator to `pure_nccl` (requires NCCL2). The default communicator (`hierarchical`) uses MPI to communicate between nodes, which is slower than the pure NCCL communicator. @@ -92,3 +105,4 @@ The ImageNet Large Scale Visual Recognition Challenge (ILSVRC) dataset has 1000 2. Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. "Deep Residual Learning for Image Recognition" CVPR 2016 3. Jie Hu, Li Shen, Gang Sun. "Squeeze-and-Excitation Networks" CVPR 2018 4. https://github.com/hujie-frank/SENet +5. Priya Goyal, Piotr Dollár, Ross Girshick, Pieter Noordhuis, Lukasz Wesolowski, Aapo Kyrola, Andrew Tulloch, Yangqing Jia, Kaiming He. "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour" https://arxiv.org/abs/1706.02677 From 68479efffc1dc7d3c035e3d3979a1aa057577a85 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 26 Nov 2018 11:55:40 +0900 Subject: [PATCH 49/57] typo --- examples/classification/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/classification/README.md b/examples/classification/README.md index a3c56c9cd8..f0244d46f3 100644 --- a/examples/classification/README.md +++ b/examples/classification/README.md @@ -75,7 +75,7 @@ For more statistically reliable results, we obtained results from five different | ResNet50 | 32 | 23.56 (std=0.11) | -## How to prepare ImageNet Dataset +## How to prepare ImageNet dataset This instructions are based on the instruction found [here](https://github.com/facebook/fb.resnet.torch/blob/master/INSTALL.md#download-the-imagenet-dataset). From 175e0fbc025018171df9f6642964e94fef5ce13b Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 26 Nov 2018 11:56:16 +0900 Subject: [PATCH 50/57] update README --- examples/classification/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/classification/README.md b/examples/classification/README.md index f0244d46f3..513f55859b 100644 --- a/examples/classification/README.md +++ b/examples/classification/README.md @@ -1,6 +1,6 @@ # Classification -## Performance +## ImageNet ### Weight conversion From efdb5802bef6732879ccb586bfd42b8220805e34 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 26 Nov 2018 14:14:12 +0900 Subject: [PATCH 51/57] add cv2 option --- chainercv/links/model/resnet/resnet.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/chainercv/links/model/resnet/resnet.py b/chainercv/links/model/resnet/resnet.py index dc8afdd5fe..4b655d4438 100644 --- a/chainercv/links/model/resnet/resnet.py +++ b/chainercv/links/model/resnet/resnet.py @@ -109,7 +109,8 @@ class ResNet(PickableSequentialChain): 'param': {'n_class': 1000, 'mean': _imagenet_mean}, 'overwritable': {'mean'}, 'url': 'https://chainercv-models.preferred.jp/' - 'resnet152_imagenet_trained_2018_11_26.npz' + 'resnet152_imagenet_trained_2018_11_26.npz', + 'cv2': True, }, }, 101: { @@ -117,7 +118,8 @@ class ResNet(PickableSequentialChain): 'param': {'n_class': 1000, 'mean': _imagenet_mean}, 'overwritable': {'mean'}, 'url': 'https://chainercv-models.preferred.jp/' - 'resnet101_imagenet_trained_2018_11_26.npz' + 'resnet101_imagenet_trained_2018_11_26.npz', + 'cv2': True, }, }, 152: { @@ -125,7 +127,8 @@ class ResNet(PickableSequentialChain): 'param': {'n_class': 1000, 'mean': _imagenet_mean}, 'overwritable': {'mean'}, 'url': 'https://chainercv-models.preferred.jp/' - 'resnet152_imagenet_trained_2018_11_26.npz' + 'resnet152_imagenet_trained_2018_11_26.npz', + 'cv2': True, }, }, }, From 7ca130cd90e3ded34c4189af5b2c0c9ca5a19843 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 26 Nov 2018 14:16:03 +0900 Subject: [PATCH 52/57] fix doc --- examples/classification/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/classification/README.md b/examples/classification/README.md index 513f55859b..512b100b00 100644 --- a/examples/classification/README.md +++ b/examples/classification/README.md @@ -34,7 +34,7 @@ Ten crop error rate. The results can be reproduced by the following command. -The score is reported using a weight converted from a weight trained by Caffe or a weight trained with `train_imagenet_multi.py`. +The score is reported using weights converted from the weights trained by Caffe. These scores are obtained using OpenCV backend. If Pillow is used, scores would differ. ``` From 084fac9b1524e2a2a11d8ec30da27acab4d4e53b Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 26 Nov 2018 14:18:36 +0900 Subject: [PATCH 53/57] delete unnecessary options for iterators --- examples/classification/train_imagenet_multi.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/classification/train_imagenet_multi.py b/examples/classification/train_imagenet_multi.py index 22b5fb020c..a3cb8b0c89 100644 --- a/examples/classification/train_imagenet_multi.py +++ b/examples/classification/train_imagenet_multi.py @@ -127,12 +127,10 @@ def main(): train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True) val_data = chainermn.scatter_dataset(val_data, comm, shuffle=True) train_iter = chainer.iterators.MultiprocessIterator( - train_data, args.batchsize, shared_mem=3 * 224 * 224 * 4, - n_processes=args.loaderjob) + train_data, args.batchsize, n_processes=args.loaderjob) val_iter = iterators.MultiprocessIterator( val_data, args.batchsize, - repeat=False, shuffle=False, shared_mem=3 * 224 * 224 * 4, - n_processes=args.loaderjob) + repeat=False, shuffle=False, n_processes=args.loaderjob) optimizer = chainermn.create_multi_node_optimizer( CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm) From 1e19bdf9b90992dce34ede1cdd6091df787eff01 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Mon, 26 Nov 2018 16:34:33 +0900 Subject: [PATCH 54/57] delete performance related stuff --- examples/classification/README.md | 1 + examples/classification/train_imagenet_multi.py | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/examples/classification/README.md b/examples/classification/README.md index 512b100b00..0a7f080a18 100644 --- a/examples/classification/README.md +++ b/examples/classification/README.md @@ -63,6 +63,7 @@ The training procedure carefully follows the "ResNet in 1 hour" paper [5]. ##### Performance tip When training over multiple nodes, set the communicator to `pure_nccl` (requires NCCL2). The default communicator (`hierarchical`) uses MPI to communicate between nodes, which is slower than the pure NCCL communicator. +Also, cuDNN convolution functions can be optimized with extra commands (see https://docs.chainer.org/en/stable/performance.html#optimize-cudnn-convolution). ##### Detailed training results diff --git a/examples/classification/train_imagenet_multi.py b/examples/classification/train_imagenet_multi.py index a3cb8b0c89..6b23f8d259 100644 --- a/examples/classification/train_imagenet_multi.py +++ b/examples/classification/train_imagenet_multi.py @@ -143,10 +143,6 @@ def main(): chainer.cuda.get_device(device).use() model.to_gpu() - # Configure GPU setting - chainer.cuda.set_max_workspace_size(1 * 1024 * 1024 * 1024) - chainer.using_config('autotune', True) - updater = chainer.training.StandardUpdater( train_iter, optimizer, device=device) From 2b0dc983262557c8369ea126f61dd68d79960edc Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Wed, 28 Nov 2018 14:05:02 +0900 Subject: [PATCH 55/57] delete PlotReport --- examples/classification/train_imagenet_multi.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/examples/classification/train_imagenet_multi.py b/examples/classification/train_imagenet_multi.py index 6b23f8d259..91d00c9ccf 100644 --- a/examples/classification/train_imagenet_multi.py +++ b/examples/classification/train_imagenet_multi.py @@ -177,7 +177,6 @@ def warmup_and_exponential_shift(trainer): log_interval = 0.1, 'epoch' print_interval = 0.1, 'epoch' - plot_interval = 1, 'epoch' if comm.rank == 0: trainer.extend(chainer.training.extensions.observe_lr(), @@ -194,22 +193,6 @@ def warmup_and_exponential_shift(trainer): ), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) - if extensions.PlotReport.available(): - trainer.extend( - extensions.PlotReport( - ['main/loss', 'validation/main/loss'], - file_name='loss.png', trigger=plot_interval - ), - trigger=plot_interval - ) - trainer.extend( - extensions.PlotReport( - ['main/accuracy', 'validation/main/accuracy'], - file_name='accuracy.png', trigger=plot_interval - ), - trigger=plot_interval - ) - trainer.run() From dd814dca1ff1805688b197a68e1015d30c178483 Mon Sep 17 00:00:00 2001 From: Yusuke Niitani Date: Thu, 29 Nov 2018 17:27:31 +0900 Subject: [PATCH 56/57] fix README --- examples/classification/README.md | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/examples/classification/README.md b/examples/classification/README.md index 0a7f080a18..4fcfd7ff0d 100644 --- a/examples/classification/README.md +++ b/examples/classification/README.md @@ -4,14 +4,14 @@ ### Weight conversion -Single crop error rate. +Single crop error rates of the models with the weights converted from Caffe weights. | Model | Top 1 | Reference Top 1 | |:-:|:-:|:-:| | VGG16 | 29.0 % | 28.5 % [1] | -| ResNet50 | 24.8 % | 24.7 % [2] | -| ResNet101 | 23.6 % | 23.6 % [2] | -| ResNet152 | 23.2 % | 23.0 % [2] | +| ResNet50 (`arch=he`) | 24.8 % | 24.7 % [2] | +| ResNet101 (`arch=he`) | 23.6 % | 23.6 % [2] | +| ResNet152 (`arch=he`) | 23.2 % | 23.0 % [2] | | SE-ResNet50 | 22.7 % | 22.4 % [3,4] | | SE-ResNet101 | 21.8 % | 21.8 % [3,4] | | SE-ResNet152 | 21.4 % | 21.3 % [3,4] | @@ -23,9 +23,9 @@ Ten crop error rate. | Model | Top 1 | Reference Top 1 | |:-:|:-:|:-:| | VGG16 | 27.1 % | | -| ResNet50 | 23.0 % | 22.9 % [2] | -| ResNet101 | 21.8 % | 21.8 % [2] | -| ResNet152 | 21.4 % | 21.4 % [2] | +| ResNet50 (`arch=he`) | 23.0 % | 22.9 % [2] | +| ResNet101 (`arch=he`) | 21.8 % | 21.8 % [2] | +| ResNet152 (`arch=he`) | 21.4 % | 21.4 % [2] | | SE-ResNet50 | 20.8 % | | | SE-ResNet101 | 20.1 % | | | SE-ResNet152 | 19.7 % | | @@ -34,7 +34,6 @@ Ten crop error rate. The results can be reproduced by the following command. -The score is reported using weights converted from the weights trained by Caffe. These scores are obtained using OpenCV backend. If Pillow is used, scores would differ. ``` @@ -47,9 +46,9 @@ Single crop error rates of the models trained with the ChainerCV's training scri | Model | Top 1 | Reference Top 1 | |:-:|:-:|:-:| -| ResNet50 | 23.51 % | 23.60% [5] | -| ResNet101 | 22.07 % | 22.08% [5] | -| ResNet152 | 21.67 % | | +| ResNet50 (`arch=fb`) | 23.51 % | 23.60% [5] | +| ResNet101 (`arch=fb`) | 22.07 % | 22.08% [5] | +| ResNet152 (`arch=fb`) | 21.67 % | | The scores of the models trained with `train_imagenet_multi.py`, which can be executed like below. @@ -60,20 +59,20 @@ $ mpiexec -n N python train_imagenet_multi.py Date: Thu, 29 Nov 2018 17:27:59 +0900 Subject: [PATCH 57/57] remove unnecessary --- examples/classification/train_imagenet_multi.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/classification/train_imagenet_multi.py b/examples/classification/train_imagenet_multi.py index 91d00c9ccf..04cad88ca7 100644 --- a/examples/classification/train_imagenet_multi.py +++ b/examples/classification/train_imagenet_multi.py @@ -36,7 +36,6 @@ def __init__(self, mean): def __call__(self, in_data): img, label = in_data - _, H, W = img.shape img = random_sized_crop(img) img = resize(img, (224, 224)) img = random_flip(img, x_random=True)