Skip to content

Commit

Permalink
Modify convert (#395)
Browse files Browse the repository at this point in the history
  • Loading branch information
Eric8932 authored Oct 19, 2023
1 parent 3e82267 commit 4f9d551
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 66 deletions.
4 changes: 2 additions & 2 deletions scripts/convert_bert_from_huggingface_to_uer.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ def main():
output_model["target.sp.linear_2.bias"] = input_model["cls.seq_relationship.bias"]
output_model["target.mlm.linear_1.weight"] = input_model["cls.predictions.transform.dense.weight"]
output_model["target.mlm.linear_1.bias"] = input_model["cls.predictions.transform.dense.bias"]
output_model["target.layer_norm.gamma"] = input_model["cls.predictions.transform.LayerNorm.weight"]
output_model["target.layer_norm.beta"] = input_model["cls.predictions.transform.LayerNorm.bias"]
output_model["target.mlm.layer_norm.gamma"] = input_model["cls.predictions.transform.LayerNorm.weight"]
output_model["target.mlm.layer_norm.beta"] = input_model["cls.predictions.transform.LayerNorm.bias"]
output_model["target.mlm.linear_2.weight"] = input_model["cls.predictions.decoder.weight"]
output_model["target.mlm.linear_2.bias"] = input_model["cls.predictions.bias"]

Expand Down
4 changes: 2 additions & 2 deletions scripts/convert_bert_from_original_tf_to_uer.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ def main():
output_model["target.sp.linear_2.bias"] = input_model["cls/seq_relationship/output_bias"]
output_model["target.mlm.linear_1.weight"] = input_model["cls/predictions/transform/dense/kernel"]
output_model["target.mlm.linear_1.bias"] = input_model["cls/predictions/transform/dense/bias"]
output_model["target.layer_norm.gamma"] = input_model["cls/predictions/transform/LayerNorm/gamma"]
output_model["target.layer_norm.beta"] = input_model["cls/predictions/transform/LayerNorm/beta"]
output_model["target.mlm.layer_norm.gamma"] = input_model["cls/predictions/transform/LayerNorm/gamma"]
output_model["target.mlm.layer_norm.beta"] = input_model["cls/predictions/transform/LayerNorm/beta"]
output_model["target.mlm.linear_2.weight"] = input_model["bert/embeddings/word_embeddings"]
output_model["target.mlm.linear_2.bias"] = input_model["cls/predictions/output_bias"]

Expand Down
4 changes: 2 additions & 2 deletions scripts/convert_bert_from_uer_to_huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ def main():
output_model["cls.seq_relationship.bias"] = input_model["target.sp.linear_2.bias"]
output_model["cls.predictions.transform.dense.weight"] = input_model["target.mlm.linear_1.weight"]
output_model["cls.predictions.transform.dense.bias"] = input_model["target.mlm.linear_1.bias"]
output_model["cls.predictions.transform.LayerNorm.weight"] = input_model["target.layer_norm.gamma"]
output_model["cls.predictions.transform.LayerNorm.bias"] = input_model["target.layer_norm.beta"]
output_model["cls.predictions.transform.LayerNorm.weight"] = input_model["target.mlm.layer_norm.gamma"]
output_model["cls.predictions.transform.LayerNorm.bias"] = input_model["target.mlm.layer_norm.beta"]
output_model["cls.predictions.decoder.weight"] = input_model["target.mlm.linear_2.weight"]
output_model["cls.predictions.bias"] = input_model["target.mlm.linear_2.bias"]

Expand Down
4 changes: 2 additions & 2 deletions scripts/convert_bert_from_uer_to_original_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ def main():
output_model["cls/seq_relationship/output_bias"] = input_model["target.sp.linear_2.bias"]
output_model["cls/predictions/transform/dense/kernel"] = input_model["target.mlm.linear_1.weight"]
output_model["cls/predictions/transform/dense/bias"] = input_model["target.mlm.linear_1.bias"]
output_model["cls/predictions/transform/LayerNorm/gamma"] = input_model["target.layer_norm.gamma"]
output_model["cls/predictions/transform/LayerNorm/beta"] = input_model["target.layer_norm.beta"]
output_model["cls/predictions/transform/LayerNorm/gamma"] = input_model["target.mlm.layer_norm.gamma"]
output_model["cls/predictions/transform/LayerNorm/beta"] = input_model["target.mlm.layer_norm.beta"]
output_model["cls/predictions/output_bias"] = input_model["target.mlm.linear_2.bias"]

tf_vars = []
Expand Down
108 changes: 54 additions & 54 deletions scripts/convert_t5_from_uer_to_huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,69 +34,69 @@
input_model["target.lm.output_layer.weight"]

for i in range(args.layers_num):
output_model["encoder.transformer." + str(i) + ".self_attn.linear_layers.0.weight"] = \
input_model["encoder.block." + str(i) + ".layer.0.SelfAttention.q.weight"]
output_model["encoder.transformer." + str(i) + ".self_attn.linear_layers.1.weight"] = \
input_model["encoder.block." + str(i) + ".layer.0.SelfAttention.k.weight"]
output_model["encoder.transformer." + str(i) + ".self_attn.linear_layers.2.weight"] = \
input_model["encoder.block." + str(i) + ".layer.0.SelfAttention.v.weight"]
output_model["encoder.transformer." + str(i) + ".self_attn.final_linear.weight"] = \
input_model["encoder.block." + str(i) + ".layer.0.SelfAttention.o.weight"]
output_model["encoder.transformer." + str(i) + ".layer_norm_1.weight"] = \
input_model["encoder.block." + str(i) + ".layer.0.layer_norm.weight"]
output_model["encoder.block." + str(i) + ".layer.0.SelfAttention.q.weight"] = \
input_model["encoder.transformer." + str(i) + ".self_attn.linear_layers.0.weight"]
output_model["encoder.block." + str(i) + ".layer.0.SelfAttention.k.weight"] = \
input_model["encoder.transformer." + str(i) + ".self_attn.linear_layers.1.weight"]
output_model["encoder.block." + str(i) + ".layer.0.SelfAttention.v.weight"] = \
input_model["encoder.transformer." + str(i) + ".self_attn.linear_layers.2.weight"]
output_model["encoder.block." + str(i) + ".layer.0.SelfAttention.o.weight"] = \
input_model["encoder.transformer." + str(i) + ".self_attn.final_linear.weight"]
output_model["encoder.block." + str(i) + ".layer.0.layer_norm.weight"] = \
input_model["encoder.transformer." + str(i) + ".layer_norm_1.weight"]

if args.type == "t5-v1_1":
output_model["encoder.transformer." + str(i) + ".feed_forward.linear_gate.weight"] = \
input_model["encoder.block." + str(i) + ".layer.1.DenseReluDense.wi_0.weight"]
output_model["encoder.transformer." + str(i) + ".feed_forward.linear_1.weight"] = \
input_model["encoder.block." + str(i) + ".layer.1.DenseReluDense.wi_1.weight"]
output_model["encoder.transformer." + str(i) + ".feed_forward.linear_2.weight"] = \
input_model["encoder.block." + str(i) + ".layer.1.DenseReluDense.wo.weight"]
output_model["encoder.block." + str(i) + ".layer.1.DenseReluDense.wi_0.weight"] = \
input_model["encoder.transformer." + str(i) + ".feed_forward.linear_gate.weight"]
output_model["encoder.block." + str(i) + ".layer.1.DenseReluDense.wi_1.weight"] = \
input_model["encoder.transformer." + str(i) + ".feed_forward.linear_1.weight"]
output_model["encoder.block." + str(i) + ".layer.1.DenseReluDense.wo.weight"] = \
input_model["encoder.transformer." + str(i) + ".feed_forward.linear_2.weight"]
else:
output_model["encoder.transformer." + str(i) + ".feed_forward.linear_1.weight"] = \
input_model["encoder.block." + str(i) + ".layer.1.DenseReluDense.wi.weight"]
output_model["encoder.transformer." + str(i) + ".feed_forward.linear_2.weight"] = \
input_model["encoder.block." + str(i) + ".layer.1.DenseReluDense.wo.weight"]
output_model["encoder.transformer." + str(i) + ".layer_norm_2.weight"] = \
input_model["encoder.block." + str(i) + ".layer.1.layer_norm.weight"]
output_model["encoder.block." + str(i) + ".layer.1.DenseReluDense.wi.weight"] = \
input_model["encoder.transformer." + str(i) + ".feed_forward.linear_1.weight"]
output_model["encoder.block." + str(i) + ".layer.1.DenseReluDense.wo.weight"] = \
input_model["encoder.transformer." + str(i) + ".feed_forward.linear_2.weight"]
output_model["encoder.block." + str(i) + ".layer.1.layer_norm.weight"] = \
input_model["encoder.transformer." + str(i) + ".layer_norm_2.weight"]

for i in range(args.decoder_layers_num):
output_model["decoder.transformer_decoder." + str(i) + ".self_attn.linear_layers.0.weight"] = \
input_model["decoder.block." + str(i) + ".layer.0.SelfAttention.q.weight"]
output_model["decoder.transformer_decoder." + str(i) + ".self_attn.linear_layers.1.weight"] = \
input_model["decoder.block." + str(i) + ".layer.0.SelfAttention.k.weight"]
output_model["decoder.transformer_decoder." + str(i) + ".self_attn.linear_layers.2.weight"] = \
input_model["decoder.block." + str(i) + ".layer.0.SelfAttention.v.weight"]
output_model["decoder.transformer_decoder." + str(i) + ".self_attn.final_linear.weight"] = \
input_model["decoder.block." + str(i) + ".layer.0.SelfAttention.o.weight"]
output_model["decoder.transformer_decoder." + str(i) + ".layer_norm_1.weight"] = \
input_model["decoder.block." + str(i) + ".layer.0.layer_norm.weight"]
output_model["decoder.block." + str(i) + ".layer.0.SelfAttention.q.weight"] = \
input_model["decoder.transformer_decoder." + str(i) + ".self_attn.linear_layers.0.weight"]
output_model["decoder.block." + str(i) + ".layer.0.SelfAttention.k.weight"] = \
input_model["decoder.transformer_decoder." + str(i) + ".self_attn.linear_layers.1.weight"]
output_model["decoder.block." + str(i) + ".layer.0.SelfAttention.v.weight"] = \
input_model["decoder.transformer_decoder." + str(i) + ".self_attn.linear_layers.2.weight"]
output_model["decoder.block." + str(i) + ".layer.0.SelfAttention.o.weight"] = \
input_model["decoder.transformer_decoder." + str(i) + ".self_attn.final_linear.weight"]
output_model["decoder.block." + str(i) + ".layer.0.layer_norm.weight"] = \
input_model["decoder.transformer_decoder." + str(i) + ".layer_norm_1.weight"]

output_model["decoder.transformer_decoder." + str(i) + ".context_attn.linear_layers.0.weight"] = \
input_model["decoder.block." + str(i) + ".layer.1.EncDecAttention.q.weight"]
output_model["decoder.transformer_decoder." + str(i) + ".context_attn.linear_layers.1.weight"] = \
input_model["decoder.block." + str(i) + ".layer.1.EncDecAttention.k.weight"]
output_model["decoder.transformer_decoder." + str(i) + ".context_attn.linear_layers.2.weight"] = \
input_model["decoder.block." + str(i) + ".layer.1.EncDecAttention.v.weight"]
output_model["decoder.transformer_decoder." + str(i) + ".context_attn.final_linear.weight"] = \
input_model["decoder.block." + str(i) + ".layer.1.EncDecAttention.o.weight"]
output_model["decoder.transformer_decoder." + str(i) + ".layer_norm_2.weight"] = \
input_model["decoder.block." + str(i) + ".layer.1.layer_norm.weight"]
output_model["decoder.block." + str(i) + ".layer.1.EncDecAttention.q.weight"] = \
input_model["decoder.transformer_decoder." + str(i) + ".context_attn.linear_layers.0.weight"]
output_model["decoder.block." + str(i) + ".layer.1.EncDecAttention.k.weight"] = \
input_model["decoder.transformer_decoder." + str(i) + ".context_attn.linear_layers.1.weight"]
output_model["decoder.block." + str(i) + ".layer.1.EncDecAttention.v.weight"] = \
input_model["decoder.transformer_decoder." + str(i) + ".context_attn.linear_layers.2.weight"]
output_model["decoder.block." + str(i) + ".layer.1.EncDecAttention.o.weight"] = \
input_model["decoder.transformer_decoder." + str(i) + ".context_attn.final_linear.weight"]
output_model["decoder.block." + str(i) + ".layer.1.layer_norm.weight"] = \
input_model["decoder.transformer_decoder." + str(i) + ".layer_norm_2.weight"]

if args.type == "t5-v1_1":
output_model["decoder.transformer_decoder." + str(i) + ".feed_forward.linear_gate.weight"] = \
input_model["decoder.block." + str(i) + ".layer.2.DenseReluDense.wi_0.weight"]
output_model["decoder.transformer_decoder." + str(i) + ".feed_forward.linear_1.weight"] = \
input_model["decoder.block." + str(i) + ".layer.2.DenseReluDense.wi_1.weight"]
output_model["decoder.transformer_decoder." + str(i) + ".feed_forward.linear_2.weight"] = \
input_model["decoder.block." + str(i) + ".layer.2.DenseReluDense.wo.weight"]
output_model["decoder.block." + str(i) + ".layer.2.DenseReluDense.wi_0.weight"] = \
input_model["decoder.transformer_decoder." + str(i) + ".feed_forward.linear_gate.weight"]
output_model["decoder.block." + str(i) + ".layer.2.DenseReluDense.wi_1.weight"] = \
input_model["decoder.transformer_decoder." + str(i) + ".feed_forward.linear_1.weight"]
output_model["decoder.block." + str(i) + ".layer.2.DenseReluDense.wo.weight"] = \
input_model["decoder.transformer_decoder." + str(i) + ".feed_forward.linear_2.weight"]
else:
output_model["decoder.transformer_decoder." + str(i) + ".feed_forward.linear_1.weight"] = \
input_model["decoder.block." + str(i) + ".layer.2.DenseReluDense.wi.weight"]
output_model["decoder.transformer_decoder." + str(i) + ".feed_forward.linear_2.weight"] = \
input_model["decoder.block." + str(i) + ".layer.2.DenseReluDense.wo.weight"]
output_model["decoder.transformer_decoder." + str(i) + ".layer_norm_3.weight"] = \
input_model["decoder.block." + str(i) + ".layer.2.layer_norm.weight"]
output_model["decoder.block." + str(i) + ".layer.2.DenseReluDense.wi.weight"] = \
input_model["decoder.transformer_decoder." + str(i) + ".feed_forward.linear_1.weight"]
output_model["decoder.block." + str(i) + ".layer.2.DenseReluDense.wo.weight"] = \
input_model["decoder.transformer_decoder." + str(i) + ".feed_forward.linear_2.weight"]
output_model["decoder.block." + str(i) + ".layer.2.layer_norm.weight"] = \
input_model["decoder.transformer_decoder." + str(i) + ".layer_norm_3.weight"]

output_model["encoder.final_layer_norm.weight"] = \
input_model["encoder.layer_norm.weight"]
Expand Down
4 changes: 2 additions & 2 deletions scripts/convert_xlmroberta_from_huggingface_to_uer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@
input_model["lm_head.dense.weight"]
output_model["target.mlm.linear_1.bias"] = \
input_model["lm_head.dense.bias"]
output_model["target.layer_norm.gamma"] = \
output_model["target.mlm.layer_norm.gamma"] = \
input_model["lm_head.layer_norm.weight"]
output_model["target.layer_norm.beta"] = \
output_model["target.mlm.layer_norm.beta"] = \
input_model["lm_head.layer_norm.bias"]
output_model["target.mlm.linear_2.weight"] = \
input_model["lm_head.decoder.weight"]
Expand Down
4 changes: 2 additions & 2 deletions scripts/convert_xlmroberta_from_uer_to_huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,9 @@
output_model["lm_head.dense.bias"] = \
input_model["target.mlm.linear_1.bias"]
output_model["lm_head.layer_norm.weight"] = \
input_model["target.layer_norm.gamma"]
input_model["target.mlm.layer_norm.gamma"]
output_model["lm_head.layer_norm.bias"] = \
input_model["target.layer_norm.beta"]
input_model["target.mlm.layer_norm.beta"]
output_model["lm_head.decoder.weight"] = \
input_model["target.mlm.linear_2.weight"]
output_model["lm_head.decoder.bias"] = \
Expand Down

0 comments on commit 4f9d551

Please sign in to comment.