-
Notifications
You must be signed in to change notification settings - Fork 1
/
helpers.py
34 lines (25 loc) · 1.02 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import re
import string
from typing import Iterator, Sequence
import numpy as np
def split_str_idx(s: str, idx: Sequence[int]) -> Sequence[str]:
idx = [-1] + list(idx) + [len(s)]
return [s[i1 + 1: i2] for i1, i2 in zip(idx[:-1], idx[1:])]
def extract_keywords(message: str, mask: str, junk=None):
if junk is None:
junk = frozenset(string.whitespace + string.punctuation)
return frozenset(''.join(cx if cy == '0' and cx not in junk else ' ' for cx, cy in zip(message, mask)).split())
def fix_whitespace_problem(x, mask):
new_mask = []
bidx = 0
for m in re.finditer(r'\s{2,}', x):
new_mask.append(mask[bidx: m.start()])
new_mask.append('0' * (m.end() - m.start()))
bidx = m.start() + 1
# mask = mask[:m.start()] + ('0' * (m.end() - m.start())) + mask[m.start() + 1:]
new_mask.append(mask[bidx:])
return ''.join(new_mask)
def sensitivity_gen(step) -> Iterator[float]:
arr = [float(n) for n in np.arange(0, 1.0, step)]
arr.append(1.0)
yield from arr