Skip to content

Commit

Permalink
update before internship
Browse files Browse the repository at this point in the history
  • Loading branch information
AylaRT committed Feb 12, 2023
1 parent 3f09e45 commit 619ceca
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 2 deletions.
4 changes: 4 additions & 0 deletions multilingual.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,3 +752,7 @@ def multilingual_ate_sbm_complete(dp, out_fn, l1, l2, domains, iob_or_io, optimi
specific=specific, common=common, ood=ood, ne=ne,
partial=partial)
multilingual_ate_sbm(dp, l1, l2, output_dp_l1, output_dp_l2, out_fn)


# multilingual_ate_sbm_complete("unseen_corpora/batteries/", "test_batteries", "en", "fr",
# ["corp", "equi", "htfl", "wind"], "io")
5 changes: 3 additions & 2 deletions seq_bert_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ def extract_terms_sbm(dp, domains, iob_or_io, optimiser="AdamW", nr_hidden=1, si
tokenised_texts += " ".join(sentence_list) + "*_*"

# use pretrained model to tag sentence
print(sentence)
model.predict(sentence)
sentence_pred_string = sentence.to_tagged_string() # "token1 token2 <I> token3 token4 <B>"
split_sentence_pred_string = sentence_pred_string.split() # [token1, token2, <I>, token3, token4, <B>]
Expand Down Expand Up @@ -517,6 +518,6 @@ def extract_terms_sbm(dp, domains, iob_or_io, optimiser="AdamW", nr_hidden=1, si
return output_dp


# prep_corpus_sbm("unseen_corpora/mono_test/", "en", tok_nesting="eos")
# extract_terms_sbm("unseen_corpora/mono_test/", ["corp", "equi", "htfl", "wind"], "iob", optimiser="AdamW",
# prep_corpus_sbm("unseen_corpora/taalkunde/", "nl", tok_nesting="eos")
# extract_terms_sbm("unseen_corpora/taalkunde/", ["corp", "equi", "htfl", "wind"], "iob", optimiser="AdamW",
# nr_hidden=1, size=512, incl_incorr_tok=True, specific=1, common=1, ood=1, ne=1, partial=1)

0 comments on commit 619ceca

Please sign in to comment.