-
Notifications
You must be signed in to change notification settings - Fork 1
/
nlp.py
105 lines (85 loc) · 2.62 KB
/
nlp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# -*- coding: utf-8 -*-
#coding: utf-8
import sys
import MeCab
import string
from manager import db_manager
if len(sys.argv) < 2 :
print("arguments length is 0. input string wanted analyzed.")
sys.exit()
if len(sys.argv) > 2 :
print("arguments length is over 2. input one string wanted analyzed.")
sys.exit()
def extractTime(sentence):
try:
t = MeCab.Tagger ('-d /usr/local/lib/mecab/dic/mecab-ko-dic')
print("origin : "+sentence)
print("utf-8 : ")
print(sentence.encode('utf-8'))
print("utf-8(2) : " + str(sentence.encode('utf-8').decode('utf-8')))
print(type(u'하 하 성민이랑 평강이랑 11시 바다 고고싱'))
#m = t.parseToNode(str(sentence.encode('utf-8').decode('utf-8')))
m = t.parseToNode(u'하 하 성민이랑 평강이랑 11시 바다 고고싱')
while m:
print(m.surface, "\t", m.feature)
m = m.next
print("EOS")
print()
d = t.dictionary_info()
while d:
print("filename: %s" % d.filename)
print("charset: %s" % d.charset)
print("size: %d" % d.size)
print("type: %d" % d.type)
print("lsize: %d" % d.lsize)
print("rsize: %d" % d.rsize)
print("version: %d" % d.version)
d = d.next
except RuntimeError as e:
print("RuntimeError:", e)
def extractLocation(sentence):
try:
#print(MeCab.VERSION)
t = MeCab.Tagger ('-d /usr/local/lib/mecab/dic/mecab-ko-dic')
#print(t.parse(sentence))
t.parse(sentence)
m = t.parseToNode(sentence)
while m:
if (m.feature.find("동이름") > 0) or (m.feature.find("대학교") > 0):
print(m.surface, "\t", m.feature)
elif (m.feature.find("지하철") > 0):
partsOfFeature = m.feature.split(',')
for part in partsOfFeature:
if part.find('역') > 0:
print(part)
break
m = m.next
"""
lattice = MeCab.Lattice()
t.parse(lattice)
lattice.set_sentence(sentence)
len = lattice.size()
for i in range(len + 1):
b = lattice.begin_nodes(i)
e = lattice.end_nodes(i)
while b:
#print("B[%d] %s\t%s" % (i, b.surface, b.feature))
b = b.bnext
while e:
#print("E[%d] %s\t%s" % (i, e.surface, e.feature))
e = e.bnext
d = t.dictionary_info()
while d:
print("filename: %s" % d.filename)
print("charset: %s" % d.charset)
print("size: %d" % d.size)
print("type: %d" % d.type)
print("lsize: %d" % d.lsize)
print("rsize: %d" % d.rsize)
print("version: %d" % d.version)
d = d.next
"""
except RuntimeError as e:
print("RuntimeError:", e)
print("Import Mecab success with "+str(sys.argv[1]))
extractTime('')