diff --git a/app.py b/app.py index 2b8d8af..7867782 100644 --- a/app.py +++ b/app.py @@ -7,7 +7,7 @@ from util.constants import PARENT_DIR from add_show import add_show -from visualize import generate_heatmap, generate_line_plot, generate_wordcloud +from visualize import generate_heatmap, generate_line_plot, generate_wordcloud, generate_sentiment matplotlib.use('Agg') app = Flask(__name__) @@ -107,6 +107,18 @@ def wordcloud(): except Exception as e: print(e) return jsonify({'error': str(e)}), 400 + +@app.route('/api/sentiment') +@cache.cached(timeout=300, query_string=True) +def sentiment(): + show = request.args.get('show') + season = request.args.get('season') or None + try: + image = generate_sentiment(show, filterSeason=season) + return image + except Exception as e: + print(e) + return jsonify({'error': str(e)}), 400 @app.route('/api/add_show') def import_show(): diff --git a/static/script.js b/static/script.js index 31fc128..3858647 100644 --- a/static/script.js +++ b/static/script.js @@ -668,7 +668,7 @@ function createVisualizationButton(type, name, onClick) { visualizationBar.appendChild(button) } -async function frequencyGraph(button, line = false) { +async function frequencyGraph(button, type) { const visualizeMultipleCheckbox = document.getElementById('visualize-multiple') const smooth = document.getElementById('smooth').checked const marked = document.querySelector('.marked-words').childNodes @@ -684,13 +684,13 @@ async function frequencyGraph(button, line = false) { } let shows = Object.keys(showMap) for (const show of shows) { - await visualizeWords(words, line, show, null, smooth, true) + await visualizeWords(words, type, show, null, smooth, true) } return } button.disabled = true - if (path.length > 3) visualizeWords(words, line, path[2].title, path[4].title, smooth, line).then(() => button.disabled = false) - else visualizeWords(words, line, path[2].title, null, smooth).then(() => button.disabled = false) + if (path.length > 3) visualizeWords(words, type, path[2].title, path[4].title, smooth).then(() => button.disabled = false) + else visualizeWords(words, type, path[2].title, null, smooth).then(() => button.disabled = false) } function setupVisualization() { @@ -699,7 +699,7 @@ function setupVisualization() { const switcher = document.createElement('select') switcher.name = 'switcher' switcher.id = 'switcher' - const types = ['Heat Map', 'Line Plot', 'Word Cloud'] + const types = ['Heat Map', 'Line Plot', 'Word Cloud', 'Sentiment'] types.forEach(el => { let option = document.createElement('option') option.value = el.replace(' ', '').toLowerCase() @@ -730,10 +730,10 @@ function setupVisualization() { createVisualizationCheckbox('always-show', 'visualize-multiple', 'Visualize Multiple', 'If checked, previous visualizations will not be cleared when displaying new ones.', false) createVisualizationCheckbox('heatmap lineplot', 'smooth', 'Smooth Data', 'Smoothing reduces the influence of outliers, allowing you to more easily view overall trends. Turn this off if you want to view the true counts for each episode.', true) createVisualizationButton('heatmap right', 'Generate Heat Map', async(button) => { - frequencyGraph(button) + frequencyGraph(button, 'heatmap') }) createVisualizationButton('lineplot right', 'Generate Line Plot', async(button) => { - frequencyGraph(button, true) + frequencyGraph(button, 'lineplot') }) createVisualizationButton('wordcloud right', 'Generate Word Cloud', async(button) => { const path = document.querySelector('.path-bar').childNodes @@ -744,12 +744,23 @@ function setupVisualization() { else await wordCloud(path[2].title, path[4].title, path[6].title) button.disabled = false }) + createVisualizationButton('sentiment right', 'Generate Sentiment', async(button) => { + const path = document.querySelector('.path-bar').childNodes + if (path.length === 1) return + button.disabled = true + if (path.length === 3) await sentiment(path[2].title) + else await sentiment(path[2].title, path[4].title) + button.disabled = false + }) switchType() } -async function visualizeWords(words, line, show, season = null, smooth = true, visualizingMultiple = false) { +async function visualizeWords(words, type, show, season = null, smooth = true, visualizingMultiple = false) { try { - const type = line ? 'lineplot' : 'heatmap' + if (type !== 'heatmap' && type !== 'lineplot') { + console.error(`Invalid type "${type}" passed to visualizeWords`) + return + } let path = `/api/${type}?words=${words.join(',')}&show=${show}&smooth=${smooth}` if (season) path += '&season=' + season const request = await fetch(path) @@ -801,6 +812,28 @@ async function wordCloud(show, season=null, episode=null) { } } +async function sentiment(show, season=null) { + try { + let path = `/api/sentiment?show=${show}` + if (season) path += '&season=' + season + const request = await fetch(path) + const response = await request.text() + const image = document.createElement('img') + const uri = 'data:image/png;base64,' + response + image.src = uri + image.alt = 'Sentiment' + const visualization = document.querySelector('.visualization-content') + if (!document.querySelector('#visualize-multiple').checked) { + while (visualization.firstChild) { + visualization.removeChild(visualization.firstChild) + } + } + visualization.appendChild(image) + } catch (err) { + console.error('Error fetching visualization:', err) + throw err + } +} function createSearchBar() { const frequencyBar = document.querySelector('.frequency-bar') diff --git a/visualize.py b/visualize.py index 777fd88..4bb58ce 100644 --- a/visualize.py +++ b/visualize.py @@ -1,7 +1,9 @@ import base64 import io import os +import re import matplotlib.pyplot as plt +from matplotlib.colors import Normalize import numpy as np from pandas import DataFrame as df from wordcloud import WordCloud @@ -33,10 +35,7 @@ def get_name_of_show(show): def frequency_plot(df, starts=None, show=None, season=None, transform=None, color_map='Blues', norm=None, stretch_factor=1.5, plot_type='heatmap'): xy = np.array(df) - if transform: - xy[:, 1:] = transform(xy[:, 1:]) - - if plot_type == 'heatmap': + if plot_type == 'heatmap' or plot_type == 'sentiment': row_count = xy.shape[1] - 1 plt.rcParams["figure.figsize"] = (8, row_count * stretch_factor) height_ratios = [stretch_factor] * row_count @@ -50,12 +49,13 @@ def frequency_plot(df, starts=None, show=None, season=None, transform=None, colo x = xy[:, 0] extent = [x[0]-(x[1]-x[0])/2., x[-1]+(x[1]-x[0])/2.,0,1] for ax_i, ax in enumerate(axs): - if plot_type == 'heatmap': + if plot_type == 'heatmap' or plot_type == 'sentiment': i = ax_i + 1 y = xy[:, i] ax.imshow(y[np.newaxis,:], cmap=color_map, aspect="auto", extent=extent, norm=norm) ax.set_yticks([]) - ax.set_ylabel(format_word(df.columns[i])) + if plot_type == 'heatmap': + ax.set_ylabel(format_word(df.columns[i])) ax.set_xlim(extent[0], extent[1]) elif plot_type == 'line': for i in range(1, xy.shape[1]): @@ -69,7 +69,10 @@ def frequency_plot(df, starts=None, show=None, season=None, transform=None, colo ax.set_xticks(starts) ax.set_xticklabels(generate_season_labels(starts)) if ax_i == 0: - ax.set_title('Frequency of words in ' + (get_name_of_show(show) if show else 'show') + (' Season ' + str(int(season)) if season and int(season) > -1 else '') + ' by episode') + if plot_type == 'sentiment': + ax.set_title('Polarity of words in ' + (get_name_of_show(show) if show else 'show') + (' Season ' + str(int(season)) if season and int(season) > -1 else '') + ' by episode') + else: + ax.set_title('Frequency of words in ' + (get_name_of_show(show) if show else 'show') + (' Season ' + str(int(season)) if season and int(season) > -1 else '') + ' by episode') if ax_i == len(axs) - 1: ax.set_xlabel('Season' if starts else 'Episode') buf = io.BytesIO() @@ -176,4 +179,31 @@ def generate_wordcloud(width, height, show, season=None, episode=None, part=None buf = io.BytesIO() wordcloud.to_image().save(buf, format='PNG') buf.seek(0) - return base64.b64encode(buf.read()) \ No newline at end of file + return base64.b64encode(buf.read()) + +def generate_sentiment(show, filterSeason=None): + sentiment = {} + starts = [] + current_season = None + with open(f'{PARENT_DIR}/{show}/analysis/sentiment.txt', 'r', encoding='utf-8') as f: + index = 0 + for line in f: + if re.match(r'\d+x\d+\.txt: [\d.-]+ [\d.-]+', line): + code, analysis = line.split(': ') + season, episode = code.split('.txt')[0].split('x') + if filterSeason and int(season) != int(filterSeason) or int(season) < 1: + continue + polarity, subjectivity = analysis.split(' ') + index += 1 + if season != current_season: + starts.append(index) + current_season = season + sentiment[index] = {'polarity': float(polarity)} + data_frame = df(sentiment).T + data_frame.insert(0, 'episode', data_frame.index) + data_frame = data_frame.reset_index(drop=True) + if len(starts) == 1 and filterSeason is None: + filterSeason = '-1' + starts = None if filterSeason else starts + plot = frequency_plot(data_frame, starts=starts, season=filterSeason, show=show, color_map='RdYlGn', plot_type='sentiment', norm=Normalize(vmin=-0.25, vmax=0.25)) + return plot \ No newline at end of file