forked from ai-adv-lab/deepspeech.mxnet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
deepspeech.cfg
102 lines (93 loc) · 2.84 KB
/
deepspeech.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
[common]
# method can be one of the followings - train,predict,load
mode = train
#ex: gpu0,gpu1,gpu2,gpu3
context = gpu0,gpu1,gpu2
#context = gpu0
# checkpoint prefix, check point will be saved under checkpoints folder with prefix
prefix = deep_bucket
# when mode is load or predict, model will be loaded from the file name with model_file under checkpoints
model_file = deep_bucketn_epoch0n_batch-0018
batch_size = 12
#batch_size=4
# log will be saved by the log_filename
log_filename = deep_bucket.log
# checkpoint set n to save checkpoints after n epoch
save_checkpoint_every_n_epoch = 1
save_checkpoint_every_n_batch = 3000
is_bi_graphemes = True
tensorboard_log_dir = tblog/deep_bucket
# if random_seed is -1 then it gets random seed from timestamp
mx_random_seed = -1
random_seed = -1
kvstore_option = device
[data]
max_duration = 16.0
train_json = ./train_corpus_all.json
test_json = ./test_corpus.json
val_json = ./test_corpus.json
language = en
width = 161
height = 1
channel = 1
stride = 1
[arch]
channel_num = 32
conv_layer1_filter_dim = [11, 41]
conv_layer1_stride = [2, 2]
conv_layer2_filter_dim = [11, 21]
conv_layer2_stride = [1, 2]
num_rnn_layer = 5
num_hidden_rnn_list = [1760, 1760, 1760, 1760, 1760]
num_hidden_proj = 0
num_rear_fc_layers = 1
num_hidden_rear_fc_list = [1760]
act_type_rear_fc_list = ["relu"]
#network: lstm, bilstm, gru, bigru
rnn_type = bigru
#vanilla_lstm or fc_lstm (no effect when network_type is gru, bigru)
lstm_type = fc_lstm
is_batchnorm = True
is_bucketing = True
buckets = [200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600]
[train]
num_epoch = 70
learning_rate = 0.0003
# constant learning rate annealing by factor
learning_rate_annealing = 1.1
initializer = Xavier
init_scale = 2
factor_type = in
# show progress every how nth batches
show_every = 100
save_optimizer_states = True
normalize_target_k = 100000
# overwrite meta files(feats_mean,feats_std,unicode_en_baidu_bi_graphemes.csv)
overwrite_meta_files = True
overwrite_bi_graphemes_dictionary = False
# save feature extracted from soundfile as csvfile, it can take too much disk space
save_feature_as_csvfile = False
enable_logging_train_metric = True
enable_logging_validation_metric = True
[load]
load_optimizer_states = True
is_start_from_batch = True
[optimizer]
optimizer = sgd
# define parameters for optimizer
# optimizer_params_dictionary should use " not ' as string wrapper
# sgd/nag
optimizer_params_dictionary={"momentum":0.9}
# dcasgd
# optimizer_params_dictionary={"momentum":0.9, "lamda":1.0}
# adam
# optimizer_params_dictionary={"beta1":0.9,"beta2":0.999}
# adagrad
# optimizer_params_dictionary={"eps":1e-08}
# rmsprop
# optimizer_params_dictionary={"gamma1":0.9, "gamma2":0.9,"epsilon":1e-08}
# adadelta
# optimizer_params_dictionary={"rho":0.95, "epsilon":1e-08}
# set to 0 to disable gradient clipping
clip_gradient = 100
weight_decay = 0.