-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
69 lines (39 loc) · 1.72 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from lib import *
from features import *
os.system('clear')
# In[2]:
print('Fetching News:')
url = ["https://www.bbc.co.uk/","https://in.yahoo.com/?p=us",'https://gadgets.ndtv.com/news',"https://timesofindia.indiatimes.com/business/india-business/met-finance-minister-before-leaving-the-country-vijay-mallya/articleshow/65785080.cms",'https://news.google.com/?hl=en-IN&gl=IN&ceid=IN:en']
categories = ['Medical','Entertainment','Business','Tech']
headlines = []
for i in range(len(url)):
headlines.append(extract_hedlines(url[i]))
for i in range(len(headlines)):
for lines, j in zip(headlines[i], range(len(headlines[i]))):
headlines[i][j] = normalise_text(lines)
# # converting lists of headlines to dataframe
testing_headlines ={}
for i in range(len(headlines)):
testing_headlines[url[i]] = pd.DataFrame({"TITLE":headlines[i]})
testing_headlines.keys()
# # load trained model, count_vectorizer, tf-idf
# In[9]:
print("Loading Stored Models..")
model = pickle.load(open('model.pkl', 'rb'))
cv = pickle.load(open('cv.pkl','rb'))
tv = pickle.load(open('tv.pkl','rb'))
print('processing Headlines...')
processed_data = {}
transformer = Transformer(cv, tv)
for link in testing_headlines.keys():
processed_data[link] = transformer.transform(testing_headlines[link]['TITLE'])
# In[11]:
prediction = {}
for link in testing_headlines.keys():
prediction[link] = model.predict(processed_data[link])
# In[14]:
for link in testing_headlines.keys():
print(link)
for i in range(len(testing_headlines[link])):
if(len(testing_headlines[link]['TITLE'][i])>40 and len(testing_headlines[link]['TITLE'][i])<100):
print(testing_headlines[link]['TITLE'][i]," : ",categories[prediction[link][i]])