-
Notifications
You must be signed in to change notification settings - Fork 2
/
run_extract_bli_data.py
57 lines (50 loc) · 2.16 KB
/
run_extract_bli_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import os
import sys
lang_pairs = [('de', 'fi'),
('de', 'fr'),
('de', 'hr'),
('de', 'it'),
('de', 'ru'),
('de', 'tr'),
('en', 'de'),
('en', 'fi'),
('en', 'fr'),
('en', 'hr'),
('en', 'it'),
('en', 'ru'),
('en', 'tr'),
('fi', 'fr'),
('fi', 'hr'),
('fi', 'it'),
('fi', 'ru'),
('hr', 'fr'),
('hr', 'it'),
('hr', 'ru'),
('it', 'fr'),
('ru', 'fr'),
('ru', 'it'),
('tr', 'fi'),
('tr', 'fr'),
('tr', 'hr'),
('tr', 'it'),
('tr', 'ru')]
for (lang1, lang2) in lang_pairs:
print(lang1, lang2)
sys.stdout.flush()
size_train = "5k"
ROOT_EMB_SRC = "/media/data/WES/fasttext.wiki.{}.300.vocab_200K.vec".format(lang1)
ROOT_EMB_TRG = "/media/data/WES/fasttext.wiki.{}.300.vocab_200K.vec".format(lang2)
ROOT_TEST_DICT = "/media/data/xling-eval/bli_datasets/{}-{}/yacle.test.freq.2k.{}-{}.tsv".format(lang1, lang2, lang1, lang2)
ROOT_TRAIN_DICT = "/media/data/xling-eval/bli_datasets/{}-{}/yacle.train.freq.{}.{}-{}.tsv".format(lang1, lang2, size_train , lang1, lang2)
SAVE_ROOT = "/media/data/T2TData/" # save dir
os.system('python ./src/extract_bli_data.py --l1 {} --l2 {} --train_size {} --emb_src_dir {} --emb_tgt_dir {} --train_dict_dir {} --test_dict_dir {} --save_dir {}'.format(lang1, lang2, size_train, ROOT_EMB_SRC, ROOT_EMB_TRG, ROOT_TRAIN_DICT, ROOT_TEST_DICT, SAVE_ROOT))
for (lang1, lang2) in lang_pairs:
print(lang1, lang2)
sys.stdout.flush()
size_train = "1k"
ROOT_EMB_SRC = "/media/data/WES/fasttext.wiki.{}.300.vocab_200K.vec".format(lang1)
ROOT_EMB_TRG = "/media/data/WES/fasttext.wiki.{}.300.vocab_200K.vec".format(lang2)
ROOT_TEST_DICT = "/media/data/xling-eval/bli_datasets/{}-{}/yacle.test.freq.2k.{}-{}.tsv".format(lang1, lang2, lang1, lang2)
ROOT_TRAIN_DICT = "/media/data/xling-eval/bli_datasets/{}-{}/yacle.train.freq.{}.{}-{}.tsv".format(lang1, lang2, size_train , lang1, lang2)
SAVE_ROOT = "/media/data/T2TData/" # save dir
os.system('python ./src/extract_bli_data.py --l1 {} --l2 {} --train_size {} --emb_src_dir {} --tgt_src_dir {} --train_dict_dir {} --test_dict_dir {} --save_dir {}'.format(lang1, lang2, size_train, ROOT_EMB_SRC, ROOT_EMB_TRG, ROOT_TRAIN_DICT, ROOT_TEST_DICT, SAVE_ROOT))