-
Notifications
You must be signed in to change notification settings - Fork 293
/
demo_insignificant_greyed_out.py
30 lines (28 loc) · 1.52 KB
/
demo_insignificant_greyed_out.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import spacy
from scattertext import SampleCorpora, produce_scattertext_explorer
from scattertext.CorpusFromPandas import CorpusFromPandas
from scattertext.termscoring.LogOddsUniformativePriorScore import LogOddsUninformativePriorScore
nlp = spacy.load('en_core_web_sm')
convention_df = SampleCorpora.ConventionData2012.get_data()
corpus = CorpusFromPandas(convention_df,
category_col='party',
text_col='text',
nlp=nlp).build()
term_freq_df = corpus.get_term_freq_df()
scores = -(LogOddsUninformativePriorScore
.get_thresholded_score(term_freq_df['democrat freq'],
term_freq_df['republican freq'],
alpha_w=2.,
threshold=0.1))
html = produce_scattertext_explorer(corpus,
category='democrat',
category_name='Democratic',
not_category_name='Republican',
scores=scores,
sort_by_dist=False,
gray_zero_scores=True,
minimum_term_frequency=5,
width_in_pixels=1000,
metadata=convention_df['speaker'])
open('./demo_insignificant_greyed_out.html', 'wb').write(html.encode('utf-8'))
print('Open ./demo_insignificant_greyed_out.html in Chrome or Firefox.')