-
Notifications
You must be signed in to change notification settings - Fork 11
/
get_training_utils.py
177 lines (156 loc) · 7.54 KB
/
get_training_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
'''
Utilities to get (optimal) training parameters such as learning rate schedules, learning rate multipliers, stopping criteria and optimizers for training of the five tasks
'''
import numpy as np
from experiment_utils import lr_schedule
from layca_optimizers import SGD,Adam,RMSprop,Adagrad
from keras.callbacks import Callback, LearningRateScheduler
class StoppingCriteria(Callback):
'''
Callback that stops training before the announced number of epochs when some criteria are met.
'''
def __init__(self, not_working=(0.,-1), finished = 0., converged = np.inf):
'''
not_working is a tuple (acc,nbepochs) with the accuracy that should be reached after nbepochs to consider the training as working
finished is a training loss value for which the training can be considered as finished
converged is the number of epochs with unchanged training loss which indicates that the network doesn't change anymore
'''
super().__init__()
self.acc, self.nbepochs = not_working
self.finished = finished
self.converged = converged
self.previous_loss = -1
self.counter = 0
def on_epoch_end(self, epoch, logs=None):
if epoch ==self.nbepochs and logs.get('acc')<= self.acc:
self.model.stop_training = True
if logs.get('loss')<=self.finished:
self.model.stop_training = True
if logs.get('loss') == self.previous_loss:
self.counter += 1
if self.counter >= self.converged:
self.model.stop_training = True
else:
self.counter = 0
self.previous_loss = logs.get('loss')
def get_training_schedule(task,lr,add = 0):
'''
get number of epochs and learning rate schedule for a given task and initial learning rate
'''
if task == 'C10-CNN1':
return 100+add, LearningRateScheduler(lr_schedule(lr,0.2,[80+add,90+add,97+add]))
elif task == 'C100-resnet':
return 100+add, LearningRateScheduler(lr_schedule(lr,0.1,[70+add,90+add,97+add]))
elif task == 'tiny-CNN':
return 80+add, LearningRateScheduler(lr_schedule(lr,0.2,[70+add]))
elif task == 'C10-resnet':
return 200, LearningRateScheduler(lr_schedule(lr,0.2,[60,120,160]))
else:
return 250, LearningRateScheduler(lr_schedule(lr,0.2,[100,170,220]))
def get_optimized_training_schedule(task, optimizer):
'''
get optimal number of epochs, initial learning rate and learning rate schedule for a given (task, optimizer) pair
'''
if task in ['C10-CNN1','C100-resnet','tiny-CNN']:
if 'layca' in optimizer:
lr = 3**-5 if optimizer in ['Adam_layca','SGD_AMom_layca'] else 3**-3
elif task in ['C10-CNN1','C100-resnet'] and optimizer=='SGD_normalized':
lr = 3**-2
elif (task, optimizer) == ('C10-CNN1','RMSprop'):
lr = 3**-6
elif (task, optimizer) == ('C100-resnet','Adam'):
lr = 3**-5
elif (task, optimizer) == ('tiny-CNN','Adagrad'):
lr = 3**-4
else:
lr = 3**-1
if task == 'C10-CNN1':
return 100, lr, LearningRateScheduler(lr_schedule(lr,0.2,[80,90,97]))
elif task == 'C100-resnet':
return 100, lr, LearningRateScheduler(lr_schedule(lr,0.1,[70,90,97]))
elif task == 'tiny-CNN':
if optimizer == 'SGD_weight_decay':
# SGD+L2 needed more epochs to reach 100% training acc
return 100, lr, LearningRateScheduler(lr_schedule(lr,0.2,[70,90,97]))
else:
return 80, lr, LearningRateScheduler(lr_schedule(lr,0.2,[70]))
elif task == 'C10-CNN2':
if optimizer in ['SGD_weight_decay','RMSprop_weight_decay']:
lr = 0.0003 if optimizer == 'RMSprop_weight_decay' else 0.5
return 250, lr, LearningRateScheduler(lr_schedule(lr,0.5,[i*25 for i in range(1,100)]))
else:
if 'layca' in optimizer:
lr = 3**-5 if optimizer in ['Adam_layca','SGD_AMom_layca'] else 3**-3
elif optimizer == 'SGD':
lr = 3**-1
elif optimizer == 'SGD_normalized':
lr = 3**-1
return 250, lr, LearningRateScheduler(lr_schedule(lr,0.2,[100,170,220]))
elif task == 'C100-WRN':
if optimizer in ['SGD_weight_decay','Adam_weight_decay']:
lr = 0.0003 if optimizer == 'Adam_weight_decay' else 0.1
return 200, lr, LearningRateScheduler(lr_schedule(lr,0.2,[60,120,160]))
elif 'layca' in optimizer:
lr = 3**-5 if optimizer in ['Adam_layca','SGD_AMom_layca'] else 3**-3
return 250, lr, LearningRateScheduler(lr_schedule(lr,0.2,[100,170,220]))
elif optimizer =='SGD_normalized' or optimizer == 'SGD':
lr = 3**-2
return 250, lr, LearningRateScheduler(lr_schedule(lr,0.2,[100,170,220]))
elif task == 'C10-resnet':
if optimizer in ['SGD','SGD_weight_decay']:
lr = 3**-1
elif optimizer in ['SGD_layca']:
lr = 3**-3
return 200, lr, LearningRateScheduler(lr_schedule(lr,0.2,[60,120,160]))
def get_stopping_criteria(task):
if task == 'C10-CNN1':
return [StoppingCriteria(not_working=(0.2,7), finished = 1e-4, converged = 3)]
elif task == 'C100-resnet':
return [StoppingCriteria(not_working=(0.1,7), finished = 1e-3, converged = 3)]
elif task == 'tiny-CNN':
return [StoppingCriteria(not_working=(0.2,10), finished = 1e-3, converged = 3)]
elif task == 'C10-resnet':
return [StoppingCriteria(not_working=(0.2,60))]
else:
return []
def get_kernel_layer_names(model):
'''
collects name of all layers of a model that contain a kernel in topological order (input layers first).
'''
layer_names = []
for l in model.layers:
if len(l.weights) >0:
if 'kernel' in l.weights[0].name:
layer_names.append(l.name)
return layer_names
def get_learning_rate_multipliers(model,alpha = 0):
'''
provides a dictionary (layer name, lr multiplier) as parametrized by alpha (cfr. Section 4.1)
'''
# get layer names in forward pass ordering (layers that are close to input go first)
layer_names = get_kernel_layer_names(model)
if alpha>0.:
mult = (1-alpha)**(5/(len(layer_names)-1))
multipliers = dict(zip(layer_names,[mult**(len(layer_names)-1-i) for i in range(len(layer_names))]))
elif alpha<=0.:
mult = (alpha+1)**(5/(len(layer_names)-1))
multipliers = dict(zip(layer_names,[mult**i for i in range(len(layer_names))]))
return multipliers
def get_optimizer(optimizer, lr, multipliers = {'sqfqzé':1.}):
'''
helper function to get a certain optimizer from a string describing it
multipliers have only been implemented for SGD
'''
if optimizer[:8] == 'SGD_AMom':
return SGD(lr, layca = 'layca' in optimizer, momentum = 0.9, adam_like_momentum = True, multipliers = multipliers)
elif optimizer[:3] == 'SGD':
return SGD(lr,
layca = 'layca' in optimizer,
normalized = 'normalized' in optimizer,
effective_lr = 'effective' in optimizer, multipliers = multipliers)
elif optimizer[:7] == 'RMSprop':
return RMSprop(lr, layca = 'layca' in optimizer)
elif optimizer[:4] == 'Adam':
return Adam(lr, layca = 'layca' in optimizer)
elif optimizer[:7] == 'Adagrad':
return Adagrad(lr, layca = 'layca' in optimizer)