Skip to content

Commit

Permalink
implement sentiment analysis into visualization
Browse files Browse the repository at this point in the history
  • Loading branch information
sharkey300 committed Oct 1, 2024
1 parent 0c38c24 commit 5804651
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 18 deletions.
14 changes: 13 additions & 1 deletion app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from util.constants import PARENT_DIR
from add_show import add_show
from visualize import generate_heatmap, generate_line_plot, generate_wordcloud
from visualize import generate_heatmap, generate_line_plot, generate_wordcloud, generate_sentiment

matplotlib.use('Agg')
app = Flask(__name__)
Expand Down Expand Up @@ -107,6 +107,18 @@ def wordcloud():
except Exception as e:
print(e)
return jsonify({'error': str(e)}), 400

@app.route('/api/sentiment')
@cache.cached(timeout=300, query_string=True)
def sentiment():
show = request.args.get('show')
season = request.args.get('season') or None
try:
image = generate_sentiment(show, filterSeason=season)
return image
except Exception as e:
print(e)
return jsonify({'error': str(e)}), 400

@app.route('/api/add_show')
def import_show():
Expand Down
51 changes: 42 additions & 9 deletions static/script.js
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,7 @@ function createVisualizationButton(type, name, onClick) {
visualizationBar.appendChild(button)
}

async function frequencyGraph(button, line = false) {
async function frequencyGraph(button, type) {
const visualizeMultipleCheckbox = document.getElementById('visualize-multiple')
const smooth = document.getElementById('smooth').checked
const marked = document.querySelector('.marked-words').childNodes
Expand All @@ -684,13 +684,13 @@ async function frequencyGraph(button, line = false) {
}
let shows = Object.keys(showMap)
for (const show of shows) {
await visualizeWords(words, line, show, null, smooth, true)
await visualizeWords(words, type, show, null, smooth, true)
}
return
}
button.disabled = true
if (path.length > 3) visualizeWords(words, line, path[2].title, path[4].title, smooth, line).then(() => button.disabled = false)
else visualizeWords(words, line, path[2].title, null, smooth).then(() => button.disabled = false)
if (path.length > 3) visualizeWords(words, type, path[2].title, path[4].title, smooth).then(() => button.disabled = false)
else visualizeWords(words, type, path[2].title, null, smooth).then(() => button.disabled = false)
}

function setupVisualization() {
Expand All @@ -699,7 +699,7 @@ function setupVisualization() {
const switcher = document.createElement('select')
switcher.name = 'switcher'
switcher.id = 'switcher'
const types = ['Heat Map', 'Line Plot', 'Word Cloud']
const types = ['Heat Map', 'Line Plot', 'Word Cloud', 'Sentiment']
types.forEach(el => {
let option = document.createElement('option')
option.value = el.replace(' ', '').toLowerCase()
Expand Down Expand Up @@ -730,10 +730,10 @@ function setupVisualization() {
createVisualizationCheckbox('always-show', 'visualize-multiple', 'Visualize Multiple', 'If checked, previous visualizations will not be cleared when displaying new ones.', false)
createVisualizationCheckbox('heatmap lineplot', 'smooth', 'Smooth Data', 'Smoothing reduces the influence of outliers, allowing you to more easily view overall trends. Turn this off if you want to view the true counts for each episode.', true)
createVisualizationButton('heatmap right', 'Generate Heat Map', async(button) => {
frequencyGraph(button)
frequencyGraph(button, 'heatmap')
})
createVisualizationButton('lineplot right', 'Generate Line Plot', async(button) => {
frequencyGraph(button, true)
frequencyGraph(button, 'lineplot')
})
createVisualizationButton('wordcloud right', 'Generate Word Cloud', async(button) => {
const path = document.querySelector('.path-bar').childNodes
Expand All @@ -744,12 +744,23 @@ function setupVisualization() {
else await wordCloud(path[2].title, path[4].title, path[6].title)
button.disabled = false
})
createVisualizationButton('sentiment right', 'Generate Sentiment', async(button) => {
const path = document.querySelector('.path-bar').childNodes
if (path.length === 1) return
button.disabled = true
if (path.length === 3) await sentiment(path[2].title)
else await sentiment(path[2].title, path[4].title)
button.disabled = false
})
switchType()
}

async function visualizeWords(words, line, show, season = null, smooth = true, visualizingMultiple = false) {
async function visualizeWords(words, type, show, season = null, smooth = true, visualizingMultiple = false) {
try {
const type = line ? 'lineplot' : 'heatmap'
if (type !== 'heatmap' && type !== 'lineplot') {
console.error(`Invalid type "${type}" passed to visualizeWords`)
return
}
let path = `/api/${type}?words=${words.join(',')}&show=${show}&smooth=${smooth}`
if (season) path += '&season=' + season
const request = await fetch(path)
Expand Down Expand Up @@ -801,6 +812,28 @@ async function wordCloud(show, season=null, episode=null) {
}
}

async function sentiment(show, season=null) {
try {
let path = `/api/sentiment?show=${show}`
if (season) path += '&season=' + season
const request = await fetch(path)
const response = await request.text()
const image = document.createElement('img')
const uri = 'data:image/png;base64,' + response
image.src = uri
image.alt = 'Sentiment'
const visualization = document.querySelector('.visualization-content')
if (!document.querySelector('#visualize-multiple').checked) {
while (visualization.firstChild) {
visualization.removeChild(visualization.firstChild)
}
}
visualization.appendChild(image)
} catch (err) {
console.error('Error fetching visualization:', err)
throw err
}
}

function createSearchBar() {
const frequencyBar = document.querySelector('.frequency-bar')
Expand Down
46 changes: 38 additions & 8 deletions visualize.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import base64
import io
import os
import re
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import numpy as np
from pandas import DataFrame as df
from wordcloud import WordCloud
Expand Down Expand Up @@ -33,10 +35,7 @@ def get_name_of_show(show):

def frequency_plot(df, starts=None, show=None, season=None, transform=None, color_map='Blues', norm=None, stretch_factor=1.5, plot_type='heatmap'):
xy = np.array(df)
if transform:
xy[:, 1:] = transform(xy[:, 1:])

if plot_type == 'heatmap':
if plot_type == 'heatmap' or plot_type == 'sentiment':
row_count = xy.shape[1] - 1
plt.rcParams["figure.figsize"] = (8, row_count * stretch_factor)
height_ratios = [stretch_factor] * row_count
Expand All @@ -50,12 +49,13 @@ def frequency_plot(df, starts=None, show=None, season=None, transform=None, colo
x = xy[:, 0]
extent = [x[0]-(x[1]-x[0])/2., x[-1]+(x[1]-x[0])/2.,0,1]
for ax_i, ax in enumerate(axs):
if plot_type == 'heatmap':
if plot_type == 'heatmap' or plot_type == 'sentiment':
i = ax_i + 1
y = xy[:, i]
ax.imshow(y[np.newaxis,:], cmap=color_map, aspect="auto", extent=extent, norm=norm)
ax.set_yticks([])
ax.set_ylabel(format_word(df.columns[i]))
if plot_type == 'heatmap':
ax.set_ylabel(format_word(df.columns[i]))
ax.set_xlim(extent[0], extent[1])
elif plot_type == 'line':
for i in range(1, xy.shape[1]):
Expand All @@ -69,7 +69,10 @@ def frequency_plot(df, starts=None, show=None, season=None, transform=None, colo
ax.set_xticks(starts)
ax.set_xticklabels(generate_season_labels(starts))
if ax_i == 0:
ax.set_title('Frequency of words in ' + (get_name_of_show(show) if show else 'show') + (' Season ' + str(int(season)) if season and int(season) > -1 else '') + ' by episode')
if plot_type == 'sentiment':
ax.set_title('Polarity of words in ' + (get_name_of_show(show) if show else 'show') + (' Season ' + str(int(season)) if season and int(season) > -1 else '') + ' by episode')
else:
ax.set_title('Frequency of words in ' + (get_name_of_show(show) if show else 'show') + (' Season ' + str(int(season)) if season and int(season) > -1 else '') + ' by episode')
if ax_i == len(axs) - 1:
ax.set_xlabel('Season' if starts else 'Episode')
buf = io.BytesIO()
Expand Down Expand Up @@ -176,4 +179,31 @@ def generate_wordcloud(width, height, show, season=None, episode=None, part=None
buf = io.BytesIO()
wordcloud.to_image().save(buf, format='PNG')
buf.seek(0)
return base64.b64encode(buf.read())
return base64.b64encode(buf.read())

def generate_sentiment(show, filterSeason=None):
sentiment = {}
starts = []
current_season = None
with open(f'{PARENT_DIR}/{show}/analysis/sentiment.txt', 'r', encoding='utf-8') as f:
index = 0
for line in f:
if re.match(r'\d+x\d+\.txt: [\d.-]+ [\d.-]+', line):
code, analysis = line.split(': ')
season, episode = code.split('.txt')[0].split('x')
if filterSeason and int(season) != int(filterSeason) or int(season) < 1:
continue
polarity, subjectivity = analysis.split(' ')
index += 1
if season != current_season:
starts.append(index)
current_season = season
sentiment[index] = {'polarity': float(polarity)}
data_frame = df(sentiment).T
data_frame.insert(0, 'episode', data_frame.index)
data_frame = data_frame.reset_index(drop=True)
if len(starts) == 1 and filterSeason is None:
filterSeason = '-1'
starts = None if filterSeason else starts
plot = frequency_plot(data_frame, starts=starts, season=filterSeason, show=show, color_map='RdYlGn', plot_type='sentiment', norm=Normalize(vmin=-0.25, vmax=0.25))
return plot

0 comments on commit 5804651

Please sign in to comment.