diff --git a/scripts/convert_pegasus_from_huggingface_to_uer.py b/scripts/convert_pegasus_from_huggingface_to_uer.py index 32c8caae..e6a9b3d0 100644 --- a/scripts/convert_pegasus_from_huggingface_to_uer.py +++ b/scripts/convert_pegasus_from_huggingface_to_uer.py @@ -7,7 +7,7 @@ uer_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) sys.path.insert(0, uer_dir) -from scripts.convert_bart_from_huggingface_to_uer import\ +from scripts.convert_bart_from_huggingface_to_uer import \ convert_encoder_decoder_transformer_from_huggingface_to_uer parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) @@ -15,8 +15,8 @@ help=".") parser.add_argument("--output_model_path", type=str, default="models/output_model.bin", help=".") -parser.add_argument("--layers_num", type=int, default=6, help=".") -parser.add_argument("--decoder_layers_num", type=int, default=6, help=".") +parser.add_argument("--layers_num", type=int, default=12, help=".") +parser.add_argument("--decoder_layers_num", type=int, default=12, help=".") args = parser.parse_args() @@ -24,8 +24,6 @@ output_model = collections.OrderedDict() -output_model["embedding.sinusoidalpos.pe"] = input_model["model.encoder.embed_positions.weight"].unsqueeze(1) -output_model["tgt_embedding.sinusoidalpos.pe"] = input_model["model.decoder.embed_positions.weight"].unsqueeze(1) output_model["embedding.word.embedding.weight"] = input_model["model.encoder.embed_tokens.weight"] output_model["tgt_embedding.word.embedding.weight"] = input_model["model.decoder.embed_tokens.weight"] output_model["target.lm.output_layer.weight"] = input_model["lm_head.weight"] diff --git a/scripts/convert_pegasus_from_uer_to_huggingface.py b/scripts/convert_pegasus_from_uer_to_huggingface.py index 64512deb..b9b6b575 100644 --- a/scripts/convert_pegasus_from_uer_to_huggingface.py +++ b/scripts/convert_pegasus_from_uer_to_huggingface.py @@ -3,11 +3,12 @@ import argparse import collections import torch +import math uer_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) sys.path.insert(0, uer_dir) -from scripts.convert_bart_from_uer_to_huggingface import\ +from scripts.convert_bart_from_uer_to_huggingface import \ convert_encoder_decoder_transformer_from_uer_to_huggingface parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) @@ -17,6 +18,7 @@ help=".") parser.add_argument("--layers_num", type=int, default=12, help=".") parser.add_argument("--decoder_layers_num", type=int, default=12, help=".") +parser.add_argument("--max_seq_length", type=int, default=1024, help=".") args = parser.parse_args() @@ -25,8 +27,21 @@ output_model = collections.OrderedDict() output_model["model.shared.weight"] = input_model["embedding.word.embedding.weight"] -output_model["model.encoder.embed_positions.weight"] = input_model["embedding.sinusoidalpos.pe"].squeeze(1) -output_model["model.decoder.embed_positions.weight"] = input_model["tgt_embedding.sinusoidalpos.pe"].squeeze(1) + +emb_size = input_model["embedding.word.embedding.weight"].shape[1] +pe = torch.zeros(args.max_seq_length, emb_size) +position = torch.arange(0, args.max_seq_length).unsqueeze(1) +div_term = torch.exp( + ( + torch.arange(0, emb_size, 2, dtype=torch.float) + *- (math.log(10000.0) / emb_size) + ) +) +pe[:, 0::2] = torch.sin(position.float() * div_term) +pe[:, 1::2] = torch.cos(position.float() * div_term) + +output_model["model.encoder.embed_positions.weight"] = pe +output_model["model.decoder.embed_positions.weight"] = pe output_model["model.encoder.embed_tokens.weight"] = input_model["embedding.word.embedding.weight"] output_model["model.decoder.embed_tokens.weight"] = input_model["tgt_embedding.word.embedding.weight"] output_model["lm_head.weight"] = input_model["target.lm.output_layer.weight"]