-
Notifications
You must be signed in to change notification settings - Fork 0
/
txtbuilder.py
309 lines (244 loc) · 9.98 KB
/
txtbuilder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
# -*- coding: utf-8 -*-
import re
import random
MAX_LINES = 1000000
EMPTY_STRING = ''
DEFAULT_CALL_FUNCTIONS = True # TODO review upon release of RR feature
REMOVE_TRAILING_NEWLINES = False
UTF8SIG = 'utf-8-sig'
FLAG_SET_SORTED_RANDOM_DEFAULT = 0
SYMBL_USR_PRMPT = '~'
SYMBL_SET_PRMPT = '@'
# TXTB FUNCTIONS USED IN STATIC MEMBERS
def txtb_strip(data=EMPTY_STRING):
return data.strip()
def txtb_to_caps(data=EMPTY_STRING):
return data.upper()
def txtb_to_lower(data=EMPTY_STRING):
return data.lower()
def txtb_precede_minus(data=EMPTY_STRING):
result = EMPTY_STRING
if data:
split = data.split('\n')
split[0] = '- ' + split[0].title()
result = '\n'.join(split)
return result
def txtb_audiosurf_gamemode(data=EMPTY_STRING):
if data == 'none' or data == EMPTY_STRING:
return 'None'
result = EMPTY_STRING
tmp = data.split()
if len(tmp) > 1:
for part in tmp:
result += f'[as-{part}]'
result = result.strip()
else:
result = f'[as-{data}]'
return result
def txtb_rpl_newline(data=EMPTY_STRING):
return data.replace('\n ', '\n')
def txtb_str_title(data=EMPTY_STRING):
return data.title()
class TXTB:
charmap = {
's': ' ', # Space
't': '\t', # Tab
'n': '\n' # Newline
}
function_map = {
'strip': txtb_strip,
'rpl_newline': txtb_rpl_newline,
'caps': txtb_to_caps,
'lower': txtb_to_lower,
'linktitle': txtb_precede_minus,
'title': txtb_str_title,
'gametag': txtb_audiosurf_gamemode
}
section_separator = '%%'
data_separator = '%'
set_separator = '#'
def __init__(self, input_name):
"""Prepare data and syntax, tokenization"""
self.data = list() # Text used as building blocks
self.tokens = list() # Syntax as a list of tokens to interpret
self.set_index = dict() # index_in_data -> list_of_set_items
self.set_index_by_name = dict() # set_name -> index_in_data
ds = TXTB.data_separator
ss = TXTB.set_separator
with open(input_name, 'r', encoding=UTF8SIG) as fin:
all_lines = fin.readlines(MAX_LINES)
# Preprocess input text
found_section_separator = False
lines_containing_data = [EMPTY_STRING]
current_line = 0
lines_containing_syntax = list()
for line in all_lines:
if line.startswith(ds + ' ') or line.startswith(ds + '\n'):
lines_containing_data.append(EMPTY_STRING)
current_line += 1
continue
elif line.startswith(TXTB.section_separator):
found_section_separator = True
continue
if found_section_separator:
lines_containing_syntax.append(line)
else:
lines_containing_data[current_line] += line
for line in lines_containing_syntax:
self.tokens.extend(line.strip().split(' ')) # Trailing newline is preserved
self.data = lines_containing_data
if REMOVE_TRAILING_NEWLINES:
for i in range(len(self.data)):
self.data[i] = self.data[i].strip()
set_items_indices = [i for i, data_item in enumerate(self.data) if data_item[0] == ss]
for i in set_items_indices:
set_name, set_items = TXTB.get_set_data(self.data[i])
self.set_index[i] = set_items
self.set_index_by_name[set_name] = i
@staticmethod
def get_set_data(data_item):
"""Return the set name and its items as a list"""
line1, rest = data_item.split('\n', 1)
set_name = line1.split(TXTB.set_separator)[1]
the_set = {set_item.strip()
for line in rest.split('\n')
for set_item in line.split(',')
if set_item} # TODO optimize this if possible
set_items = list(the_set) # Convert back to list, no duplicates
if FLAG_SET_SORTED_RANDOM_DEFAULT == 0:
set_items.sort()
elif FLAG_SET_SORTED_RANDOM_DEFAULT == 1:
random.shuffle(set_items)
return set_name, set_items
def generate_txt(self, output_name):
"""Interpret syntax and write to file"""
fout = open(output_name, 'w', encoding=UTF8SIG)
seq_type = EMPTY_STRING # Also acts as a flag
seq_tokens = list() # Sequence tokens
for t in self.tokens:
if t[0] in [SYMBL_USR_PRMPT,SYMBL_SET_PRMPT]:
if seq_type:
if seq_type == t[0]:
seq_type = EMPTY_STRING
else:
seq_type = t[0]
seq_tokens.append(t) # Start/End
if len(seq_tokens) > 1 and seq_type == EMPTY_STRING:
self.handle_sequence(seq_tokens, fout)
seq_tokens.clear()
continue
if seq_type:
seq_tokens.append(t)
continue
# In any other case, just interpret each token
fout.write(TXTB.interpret(t, self.data))
fout.close()
def handle_sequence(self, seq_tokens, file_out):
"""Handles user prompts and set prompts"""
t_first = seq_tokens[0]
t_middle = seq_tokens[1:-1]
t_last = seq_tokens[-1]
if t_first[0] == SYMBL_USR_PRMPT:
user_prompt = ' '.join(t_middle)
if t_first.find('.') != -1:
user_prompt = TXTB.transform(user_prompt, t_first)
user_input = input(f'{user_prompt}: ')
file_out.write(TXTB.transform(user_input, t_last))
elif t_first[0] == SYMBL_SET_PRMPT:
for line in TXTB.combinations(t_middle, self.set_index, self.set_index_by_name):
file_out.write(line + '\n')
@staticmethod
def transform(target_text, token_with_functions):
"""Prepare functions to be applied on target text - Intermediate step"""
function_names = token_with_functions.split('.')[1:]
if not function_names:
return target_text
result = TXTB.call_functions_on(target_text, function_names)
return result
@staticmethod
def call_functions_on(target_text, functions):
"""Call functions with target text as an argument"""
for f in functions: # Does f exist in the dict keys?
if f in TXTB.function_map:
target_text = TXTB.function_map[f](target_text)
else:
print('Call not found: ' + f) # TODO THROW EXCEPTION
return target_text
@staticmethod
def interpret(token, data_section, call_functions=DEFAULT_CALL_FUNCTIONS):
"""
Interprets a syntax token. Examples: 'arbitraryText.caps' and '3.strip'
Tokens SYMBL_USR_PRMPT and SYMBL_SET_PRMPT never make it here.
DEFAULT_CALL_FUNCTIONS is True.
"""
atom = token.split('.')[0]
if atom in TXTB.charmap: # Character Sheet token?
t = TXTB.charmap[atom]
elif atom.isnumeric(): # Index token?
if int(atom) < len(data_section):
t = data_section[int(atom)]
else:
print('OUT OF BOUNDS IN DATA LIST')
t = EMPTY_STRING # TODO THROW EXCEPTION
else:
t = atom # Raw insertion
if call_functions:
return TXTB.transform(t, token) # TODO figure what this does
return t
@staticmethod
def count_iterations(limits):
counter = [0] * len(limits)
i = 0
while i != len(counter):
if i:
counter[i] += 1
if counter[i] == limits[i]:
counter[i] = 0
i += 1
continue
i = 0
yield counter
counter[0] += 1
if counter[i] == limits[i]:
counter[i] = 0
i += 1
@staticmethod
def combinations(tokens, set_index, set_index_by_name):
"""
Interprets tokens as a template, inserts set items and yields all combinations.
"""
template = ' '.join(tokens)
ss = TXTB.set_separator
# Token order is preserved throughout
token_mods = list()
nr_of_replacements = list()
token_replacement = [[t,None] for t in tokens if t[0] == ss]
for pair in token_replacement:
token = pair[0]
modifiers = list()
_, set_reference, modifiers_maybe = token.split(ss)
if set_reference.isnumeric():
index = int(set_reference)
else:
index = set_index_by_name[set_reference]
set_items = set_index[index]
pair[1] = set_items
set_len = len(set_items)
if len(modifiers_maybe) > 1:
modifiers = modifiers_maybe.split('.')[1:]
if modifiers:
first_modifier = modifiers[0]
if first_modifier.isnumeric() and int(first_modifier) <= len(set_items):
set_len = int(first_modifier)
modifiers = modifiers[1:] # Only function names
nr_of_replacements.append(set_len)
token_mods.append(modifiers)
for indices in TXTB.count_iterations(nr_of_replacements):
combination = template
generator = zip(*zip(*token_replacement),indices,token_mods)
for token, set_items, item_index, modifiers in generator:
set_item = set_items[item_index]
# Apply modifiers
set_item = TXTB.call_functions_on(set_item,modifiers)
combination = combination.replace(token,set_item)
yield combination