-
Notifications
You must be signed in to change notification settings - Fork 0
/
command_parser.py
106 lines (86 loc) · 3.6 KB
/
command_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os
import re
import sys
from preferences import Paths
""" BabyLemmatizer 2 utils """
## TODO clean
def split_train_filename(orig_fn):
""" Split train filename into prefix and data_type """
prefix = re.sub('-(dev|test|train).+', '', orig_fn)
data_type = re.sub('.+-(test|dev|train).+', r'\1', orig_fn)
return prefix, data_type
def overwrite_prompt(prefix, files):
""" Ask user to confirm before deleting old models
:param prefix input prefix notation
:param fprefix actual file prefix """
np, vp = '', 's'
if len(files) > 1:
np, vp = vp, np
prompt = f'\n> Model{np} with following name{np} exist{vp}:\n'\
f' + {", ".join(files)}\n'\
f'\n> Rebuilding data or retraining the model will overwrite\n'\
f'> and delete the old model{np}.\n\n'\
' Y to continue\n'\
' N or anything else to cancel\n\n'\
' Answer: '
if files:
answer = input(prompt)
if answer == 'Y':
pass
else:
print(f'\n> Model{np} {prefix} exist{vp}. Canceled by user.')
sys.exit(1)
def parse_prefix(prefix, evaluate=False, build=False, train=False):
""" Parse star expressions for file prefixes
:param prefix model name prefix
:type prefix str """
"""
if build:
for filetype in ['-train.conllu', '-dev.conllu', '-test.conllu']:
P = os.path.join(Paths.conllu, prefix + filetype)
print(P)
if not os.path.isfile(P):
print(f'> Cannot find {P}')
print(f'> use --conllu-path=PATH and make sure you have test/dev/train data')
sys.exit(0)
return prefix
"""
""" Check if models already exist and prompt overwrite """
if prefix.endswith('*'):
models = [f for f in os.listdir(Paths.models) if f.startswith(prefix[:-1])]
else:
models = [f for f in os.listdir(Paths.models) if f == prefix]
""" Do not prompt if used for evaluation """
if evaluate:
if not models:
print(f'> Model "{prefix}" does not exist in /{Paths.models}')
print('> Use --model-path=PATH to give the correct path\n\n')
sys.exit(0)
return models
""" Test if model actually exists """
ask_prompt = False
if models:
for model in models:
tagger_path = os.path.join(Paths.models, model, 'tagger')
lemmatizer_path = os.path.join(Paths.models, model, 'lemmatizer')
if os.path.exists(tagger_path) or os.path.exists(lemmatizer_path):
a = sum(1 for x in os.listdir(tagger_path) if x.endswith('.pt'))
b = sum(1 for x in os.listdir(lemmatizer_path) if x.endswith('.pt'))
if a + b != 0:
ask_prompt = True
break
if ask_prompt:
overwrite_prompt(prefix, models)
# """ If models do not exist, check if train data exists and
#create model name lists """
prefixes = (split_train_filename(x)[0] for x
in os.listdir(Paths.conllu) if x.endswith('-train.conllu'))
if build:
if prefix.endswith('*'):
models = [f for f in prefixes if f.startswith(prefix[:-1])]
else:
models = [f for f in prefixes if f == prefix]
if not models:
print(f'> No training data for "{prefix}" in folder "{Paths.models}"')
sys.exit(0)
return models