Skip to content
This repository has been archived by the owner on Aug 3, 2021. It is now read-only.

Commit

Permalink
Merge pull request #268 from RaymondNie/18.11-dev-cudnnlstm
Browse files Browse the repository at this point in the history
CudnnLSTM compatibility for LM
  • Loading branch information
okuchaiev authored Oct 31, 2018
2 parents f192807 + 0c2059b commit aad1ca1
Show file tree
Hide file tree
Showing 13 changed files with 413 additions and 50 deletions.
134 changes: 134 additions & 0 deletions example_configs/lm/lstm-test-small-cudnn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import tensorflow as tf

from open_seq2seq.models import LSTMLM
from open_seq2seq.encoders import LMEncoder
from open_seq2seq.decoders import FakeDecoder
from open_seq2seq.data import WKTDataLayer
from open_seq2seq.parts.rnns.weight_drop import WeightDropLayerNormBasicLSTMCell
from open_seq2seq.losses import BasicSequenceLoss
from open_seq2seq.optimizers.lr_policies import fixed_lr

data_root = "[REPLACE THIS TO THE PATH WITH YOUR WikiText-2-raw DATA]"
processed_data_folder = 'wkt2-processed-data'

base_model = LSTMLM
bptt = 12
steps = 10

base_params = {
"restore_best_checkpoint": True,
"use_horovod": False,
"num_gpus": 2,

"batch_size_per_gpu": 160,
"num_epochs": 1500,
"save_summaries_steps": steps,
"print_loss_steps": steps,
"print_samples_steps": steps,
"save_checkpoint_steps": steps,
"processed_data_folder": processed_data_folder,
"logdir": "LSTM-FP32-2GPU-SMALL",
"eval_steps": steps * 2,

"optimizer": "Adam",
"optimizer_params": {},

"lr_policy": fixed_lr,
"lr_policy_params": {
"learning_rate": 9e-4
},

"summaries": ['learning_rate', 'variables', 'gradients',
'variable_norm', 'gradient_norm', 'global_gradient_norm'],

"dtype": tf.float32,
# "dtype": "mixed",
# "loss_scaling": "Backoff",
"encoder": LMEncoder,
"encoder_params": {
"initializer": tf.random_uniform_initializer,
"initializer_params": {
"minval": -0.1,
"maxval": 0.1,
},
"use_cudnn_rnn": True,
"cudnn_rnn_type": tf.contrib.cudnn_rnn.CudnnLSTM,
"core_cell": None,
"core_cell_params": {
"num_units": 128,
"forget_bias": 1.0,
},
"encoder_layers": 2,
"encoder_dp_input_keep_prob": 1.0,
"encoder_dp_output_keep_prob": 0.6,
"encoder_last_input_keep_prob": 1.0,
"encoder_last_output_keep_prob": 0.6,
"recurrent_keep_prob": 0.7,
'encoder_emb_keep_prob': 0.37,
"encoder_use_skip_connections": False,
"emb_size": 64,
"sampling_prob": 0.0, # 0 is always use the ground truth
"fc_use_bias": True,
"weight_tied": True,
"awd_initializer": False,
},

"decoder": FakeDecoder,

"regularizer": tf.contrib.layers.l2_regularizer,
"regularizer_params": {
'scale': 2e-6,
},

"loss": BasicSequenceLoss,
"loss_params": {
"offset_target_by_one": False,
"average_across_timestep": True,
"do_mask": False,
}
}

train_params = {
"data_layer": WKTDataLayer,
"data_layer_params": {
"data_root": data_root,
"processed_data_folder": processed_data_folder,
"pad_vocab_to_eight": False,
"rand_start": True,
"shuffle": False,
"shuffle_buffer_size": 25000,
"repeat": True,
"map_parallel_calls": 16,
"prefetch_buffer_size": 8,
"bptt": bptt,
"small": True,
},
}
eval_params = {
"data_layer": WKTDataLayer,
"data_layer_params": {
"processed_data_folder": processed_data_folder,
"pad_vocab_to_eight": False,
"shuffle": False,
"repeat": False,
"map_parallel_calls": 16,
"prefetch_buffer_size": 1,
"bptt": bptt,
"small": True,
},
}

infer_params = {
"data_layer": WKTDataLayer,
"data_layer_params": {
"processed_data_folder": processed_data_folder,
"pad_vocab_to_eight": False,
"shuffle": False,
"repeat": False,
"rand_start": False,
"map_parallel_calls": 16,
"prefetch_buffer_size": 8,
"bptt": bptt,
"seed_tokens": "something The only game",
},
}
2 changes: 2 additions & 0 deletions example_configs/lm/lstm-test-small-mixed.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
"minval": -0.1,
"maxval": 0.1,
},
"use_cudnn_rnn": False,
"cudnn_rnn_type": None,
"core_cell": WeightDropLayerNormBasicLSTMCell,
"core_cell_params": {
"num_units": 128,
Expand Down
2 changes: 2 additions & 0 deletions example_configs/lm/lstm-test-small.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
"minval": -0.1,
"maxval": 0.1,
},
"use_cudnn_rnn": False,
"cudnn_rnn_type": None,
"core_cell": WeightDropLayerNormBasicLSTMCell,
"core_cell_params": {
"num_units": 128,
Expand Down
4 changes: 3 additions & 1 deletion example_configs/lm/lstm-wkt103-mixed.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
"minval": -0.1,
"maxval": 0.1,
},
"use_cudnn_rnn": False,
"cudnn_rnn_type": None,
"core_cell": WeightDropLayerNormBasicLSTMCell,
"core_cell_params": {
"num_units": 1024,
Expand Down Expand Up @@ -125,4 +127,4 @@
"bptt": bptt,
"seed_tokens": "something The only game",
},
}
}
4 changes: 3 additions & 1 deletion example_configs/lm/lstm-wkt2-fp32.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
"minval": -0.1,
"maxval": 0.1,
},
"use_cudnn_rnn": False,
"cudnn_rnn_type": None,
"core_cell": WeightDropLayerNormBasicLSTMCell,
"core_cell_params": {
"num_units": 896,
Expand Down Expand Up @@ -124,4 +126,4 @@
"bptt": bptt,
"seed_tokens": "something The only game",
},
}
}
4 changes: 3 additions & 1 deletion example_configs/transfer/imdb-from-scratch.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@
"minval": -0.1,
"maxval": 0.1,
},
"use_cudnn_rnn": False,
"cudnn_rnn_type": None,
"core_cell": WeightDropLayerNormBasicLSTMCell,
"core_cell_params": {
"num_units": 896,
Expand Down Expand Up @@ -127,4 +129,4 @@
"binary": binary,
"max_length": max_length,
},
}
}
2 changes: 2 additions & 0 deletions example_configs/transfer/imdb-wkt103.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@
"minval": -0.1,
"maxval": 0.1,
},
"use_cudnn_rnn": False,
"cudnn_rnn_type": None,
"core_cell": WeightDropLayerNormBasicLSTMCell,
"core_cell_params": {
"num_units": 1024,
Expand Down
137 changes: 137 additions & 0 deletions example_configs/transfer/imdb-wkt2-cudnn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import tensorflow as tf

from open_seq2seq.models import LSTMLM
from open_seq2seq.encoders import LMEncoder
from open_seq2seq.decoders import FakeDecoder
from open_seq2seq.data import IMDBDataLayer
from open_seq2seq.parts.rnns.weight_drop import WeightDropLayerNormBasicLSTMCell
from open_seq2seq.losses import CrossEntropyLoss
from open_seq2seq.optimizers.lr_policies import fixed_lr

data_root = "[REPLACE THIS TO THE PATH WITH YOUR IMDB DATA]"
processed_data_folder = 'imdb-processed-data-wkt2'

base_model = LSTMLM
max_length = 256
binary = True
steps = 10

base_params = {
"restore_best_checkpoint": True,
"use_horovod": False,
"num_gpus": 1,

"batch_size_per_gpu": 16,
"eval_batch_size_per_gpu": 64,
"num_epochs": 100,
"save_summaries_steps": steps,
"print_loss_steps": steps,
"print_samples_steps": steps,
"save_checkpoint_steps": steps,
"load_model": "LSTM-FP32-2GPU-SMALL",
"logdir": "IMDB-WKT103-CUDNN-MIXED",
"lm_vocab_file": 'wkt2-processed-data/vocab.txt',
# "lm_vocab_file": '[LINK TO THE VOCAB FILE IN THE PROCESSED DATA USED TO TRAIN THE BASE LM]'
"processed_data_folder": processed_data_folder,
"eval_steps": steps,

"optimizer": "Adam",
"optimizer_params": {},
# luong10 decay scheme

"lr_policy": fixed_lr,
"lr_policy_params": {
"learning_rate": 1e-4
},

"summaries": ['learning_rate', 'variables', 'gradients',
'variable_norm', 'gradient_norm', 'global_gradient_norm'],
# "max_grad_norm": 0.25,
# "dtype": tf.float32,
"dtype": "mixed",
"loss_scaling": "Backoff",
"encoder": LMEncoder,
"encoder_params": {
"initializer": tf.random_uniform_initializer,
"initializer_params": {
"minval": -0.1,
"maxval": 0.1,
},
"use_cudnn_rnn": True,
"cudnn_rnn_type": tf.contrib.cudnn_rnn.CudnnLSTM,
"core_cell": None,
"core_cell_params": {
"num_units": 1024,
"forget_bias": 1.0,
},
"encoder_layers": 3,
"encoder_dp_input_keep_prob": 1.0,
"encoder_dp_output_keep_prob": 0.8,
"encoder_last_input_keep_prob": 1.0,
"encoder_last_output_keep_prob": 0.8,
"recurrent_keep_prob": 1.0,
'encoder_emb_keep_prob': 0.6,
"encoder_use_skip_connections": False,
"emb_size": 256,
"num_tokens_gen": 10,
"sampling_prob": 0.0, # 0 is always use the ground truth
"fc_use_bias": True,
"weight_tied": True,
"awd_initializer": False,
},

"decoder": FakeDecoder,

"regularizer": tf.contrib.layers.l2_regularizer,
"regularizer_params": {
'scale': 2e-6,
},

"loss": CrossEntropyLoss,
}

train_params = {
"data_layer": IMDBDataLayer,
"data_layer_params": {
"data_root": data_root,
"pad_vocab_to_eight": False,
"shuffle": True,
"shuffle_buffer_size": 25000,
"repeat": True,
"map_parallel_calls": 16,
"prefetch_buffer_size": 8,
"binary": binary,
"max_length": max_length,
"get_stats": True,
# "small": True,
},
}
eval_params = {
"data_layer": IMDBDataLayer,
"data_layer_params": {
# "data_root": data_root,
"pad_vocab_to_eight": False,
"shuffle": False,
"repeat": False,
"map_parallel_calls": 16,
"prefetch_buffer_size": 1,
"binary": binary,
"max_length": max_length,
# "small": True,
},
}

infer_params = {
"data_layer": IMDBDataLayer,
"data_layer_params": {
# "data_root": data_root,
"pad_vocab_to_eight": False,
"shuffle": False,
"repeat": False,
"rand_start": False,
"map_parallel_calls": 16,
"prefetch_buffer_size": 8,
"binary": binary,
"max_length": max_length,
},
}
2 changes: 2 additions & 0 deletions example_configs/transfer/imdb-wkt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@
"minval": -0.1,
"maxval": 0.1,
},
"use_cudnn_rnn": False,
"cudnn_rnn_type": None,
"core_cell": WeightDropLayerNormBasicLSTMCell,
"core_cell_params": {
"num_units": 896,
Expand Down
2 changes: 2 additions & 0 deletions example_configs/transfer/sst-wkt2-small.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@
"minval": -0.1,
"maxval": 0.1,
},
"use_cudnn_rnn": False,
"cudnn_rnn_type": None,
"core_cell": WeightDropLayerNormBasicLSTMCell,
"core_cell_params": {
"num_units": 128,
Expand Down
2 changes: 2 additions & 0 deletions example_configs/transfer/sst-wkt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@
"minval": -0.1,
"maxval": 0.1,
},
"use_cudnn_rnn": False,
"cudnn_rnn_type": None,
"core_cell": WeightDropLayerNormBasicLSTMCell,
"core_cell_params": {
"num_units": 896,
Expand Down
Loading

0 comments on commit aad1ca1

Please sign in to comment.