-
Notifications
You must be signed in to change notification settings - Fork 128
/
seq2seq_glove_train.py
51 lines (36 loc) · 1.8 KB
/
seq2seq_glove_train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from __future__ import print_function
import pandas as pd
from sklearn.model_selection import train_test_split
from keras_text_summarization.library.utility.plot_utils import plot_and_save_history
from keras_text_summarization.library.seq2seq import Seq2SeqGloVeSummarizer
from keras_text_summarization.library.applications.fake_news_loader import fit_text
import numpy as np
LOAD_EXISTING_WEIGHTS = False
def main():
np.random.seed(42)
data_dir_path = './data'
very_large_data_dir_path = './very_large_data'
report_dir_path = './reports'
model_dir_path = './models'
print('loading csv file ...')
df = pd.read_csv(data_dir_path + "/fake_or_real_news.csv")
print('extract configuration from input texts ...')
Y = df.title
X = df['text']
config = fit_text(X, Y)
print('configuration extracted from input texts ...')
summarizer = Seq2SeqGloVeSummarizer(config)
summarizer.load_glove(very_large_data_dir_path)
if LOAD_EXISTING_WEIGHTS:
summarizer.load_weights(weight_file_path=Seq2SeqGloVeSummarizer.get_weight_file_path(model_dir_path=model_dir_path))
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, random_state=42)
print('training size: ', len(Xtrain))
print('testing size: ', len(Xtest))
print('start fitting ...')
history = summarizer.fit(Xtrain, Ytrain, Xtest, Ytest, epochs=20, batch_size=16)
history_plot_file_path = report_dir_path + '/' + Seq2SeqGloVeSummarizer.model_name + '-history.png'
if LOAD_EXISTING_WEIGHTS:
history_plot_file_path = report_dir_path + '/' + Seq2SeqGloVeSummarizer.model_name + '-history-v' + str(summarizer.version) + '.png'
plot_and_save_history(history, summarizer.model_name, history_plot_file_path, metrics={'loss', 'acc'})
if __name__ == '__main__':
main()