-
Notifications
You must be signed in to change notification settings - Fork 2
/
regexfinder.py
44 lines (39 loc) · 1.65 KB
/
regexfinder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os
import re
# 0 find_empty: identify empty lines if 北京 appears consecutively
# 1 find double lines (sandwich) and merge with /N excluding empty
# 2 find double lines (beijing start) and merge with /N
# 3 remove all beijing by itself at the start
# 4 remove all empty lines
def find_empty(txtpath, beijing, empty):
contents = ""
numEmpty = 0
with open(txtpath, 'r', encoding='utf-8') as f:
lines = f.readlines()
for line in lines:
if line.strip() == beijing:
numEmpty += 1
if line is lines[-1]:
contents += f'{empty}\n'*(numEmpty)
else:
if numEmpty >= 2:
contents += f'{empty}\n'*(numEmpty-1)
contents += f'{beijing}\n'
elif numEmpty == 1:
contents += f'{beijing}\n'
contents += line
numEmpty = 0
return contents
beijing = "北京"
bei = beijing[0]
empty = "empty"
e = empty[0]
print(f'Enter path of result.txt excluding filename')
txtpath = input() + '\\result.txt'
contents = find_empty(txtpath, beijing, empty)
contents = re.sub(rf'{beijing}\n([^{bei}].*)\n([^{bei}{e}].*$)', rf'{beijing}\n\1\\N\2\n', contents, 0, re.M)
contents = re.sub(rf'^{beijing}([^\n]+)\n(.*$)', rf'{beijing}\n\1\\N\2\n', contents, 0, re.M)
contents = re.sub(rf'^{beijing}$', '', contents, 0, re.M)
contents = "\n".join([line for line in contents.split('\n') if line.strip()])
with open(os.path.dirname(txtpath)+'\\result_2.txt', 'w', encoding='utf-8') as g:
g.write(contents)