Skip to content

Commit

Permalink
Filter maintenance.timeline to return data from expected date range b…
Browse files Browse the repository at this point in the history
…ased on limit (#960)

* added a method to filter commit activity data based on start date and end date for maintenance timeline to align parity with usage timelime

* fixed test error

* implemented _process_maintenance_timeline to return data that is consistent with how the usage timeline is implemented, refactored test_activity_dashboard to reflect the code changes, and cleaned up code to improve readability in model.py and test_activity_dashboard.py in general

* updated test_fixtures.py and resolved merge conflicts

* updated test_activity_dashboard.py

* refactored variable name and some more code cleanup

* refactored methods in test_activity_dashoboard.py

* code cleanup

* refactored name for generate_expected_maintenance_timeline

* minor spacing fix

* addressed code review feedback and created _process_for _dates to be used by both _process_usage_timeline and _process_maintenance_timeline to avoid code duplication

* minor code cleanup

* refactored model.py such that latest_commits.json and commit_activity.json contain data in which plugin is lower case to maintain parity with the implementation of install activity

* refactored _process_maintenance_timline method to be more readable

* refactored code to improve readability in model.py
  • Loading branch information
klai95 committed Apr 14, 2023
1 parent 66fbf2a commit 129aee8
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 50 deletions.
82 changes: 50 additions & 32 deletions backend/api/_tests/test_activity_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from datetime import datetime
from dateutil.relativedelta import relativedelta

from api._tests.test_fixtures import generate_expected_timeline
from api._tests.test_fixtures import generate_expected_usage_timeline, generate_expected_maintenance_timeline

BASE = datetime.today().date().replace(day=1)
DATE_LIST = [BASE - relativedelta(months=x) for x in range(12) if x % 2 == 0]
Expand All @@ -18,10 +18,19 @@
PLUGIN_NAME_CLEAN = 'string-1'
MOCK_PLUGIN_RECENT_INSTALLS = {PLUGIN_NAME_CLEAN: 25, 'foo': 10, 'bar': 30}
MOCK_PLUGIN_LATEST_COMMIT = 1672531200000
MOCK_PLUGIN_TOTAL_COMMIT = 200
MOCK_PLUGIN_TOTAL_COMMIT_EMPTY = 0
MOCK_PLUGIN_COMMIT_ACTIVITY = [{'timestamp': 1643673600000, 'commits': 200}]
MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY = []
MOCK_PLUGIN_TOTAL_COMMIT_NONEMPTY = 5
MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT = []
MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY = [
{'timestamp': 1672531200000, 'commits': 0},
{'timestamp': 1675209600000, 'commits': 0},
{'timestamp': 1677628800000, 'commits': 0}
]
MOCK_PLUGIN_COMMIT_ACTIVITY_NONEMPTY = [
{'timestamp': 1672531200000, 'commits': 1},
{'timestamp': 1675209600000, 'commits': 3},
{'timestamp': 1677628800000, 'commits': 1}
]


def generate_expected_metrics(timeline=None, total_installs=0, installs_in_last_30_days=0, latest_commit=None,
Expand All @@ -35,14 +44,15 @@ def generate_expected_metrics(timeline=None, total_installs=0, installs_in_last_
}
},
'maintenance': {
'timeline': commit_activity if commit_activity else MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY,
'timeline': commit_activity,
'stats': {
'latest_commit_timestamp': latest_commit,
'total_commits': total_commit
}
}
}


class TestActivityDashboard(unittest.TestCase):

@patch.object(model, 'get_latest_commit', return_value=None)
Expand All @@ -52,29 +62,32 @@ class TestActivityDashboard(unittest.TestCase):
def test_get_metrics_empty(self, mock_get_install_timeline_data, mock_get_recent_activity_data,
mock_get_commit_activity, mock_get_latest_commit):
expected = generate_expected_metrics(
timeline=generate_expected_timeline(-3, to_installs=lambda i: 0),
timeline=generate_expected_usage_timeline(-3, to_installs=lambda i: 0),
total_commit=MOCK_PLUGIN_TOTAL_COMMIT_EMPTY,
commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY
)
self._verify_results('3', expected, mock_get_commit_activity, mock_get_latest_commit, mock_get_install_timeline_data, mock_get_recent_activity_data)
self._verify_results('3', expected, mock_get_commit_activity, mock_get_latest_commit,
mock_get_install_timeline_data, mock_get_recent_activity_data)

@patch.object(model, 'get_latest_commit', return_value=MOCK_PLUGIN_LATEST_COMMIT)
@patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY)
@patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY_NONEMPTY)
@patch.object(model, 'get_recent_activity_data', return_value=MOCK_PLUGIN_RECENT_INSTALLS)
@patch.object(model, 'get_install_timeline_data', return_value=MOCK_DF.copy())
def test_get_metrics_nonempty(self, mock_get_install_timeline_data, mock_get_recent_activity_data,
mock_get_commit_activity, mock_get_latest_commit):
expected = generate_expected_metrics(
timeline=generate_expected_timeline(-3),
timeline=generate_expected_usage_timeline(-3),
total_installs=sum(MOCK_INSTALLS),
installs_in_last_30_days=25,
latest_commit=MOCK_PLUGIN_LATEST_COMMIT,
total_commit=MOCK_PLUGIN_TOTAL_COMMIT,
commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY
total_commit=MOCK_PLUGIN_TOTAL_COMMIT_NONEMPTY,
commit_activity=generate_expected_maintenance_timeline(-3)
)
self._verify_results('3', expected, mock_get_commit_activity, mock_get_latest_commit, mock_get_install_timeline_data, mock_get_recent_activity_data)
self._verify_results('3', expected, mock_get_commit_activity, mock_get_latest_commit,
mock_get_install_timeline_data, mock_get_recent_activity_data)

@patch.object(model, 'get_latest_commit', return_value=MOCK_PLUGIN_LATEST_COMMIT)
@patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY)
@patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT)
@patch.object(model, 'get_recent_activity_data', return_value=MOCK_PLUGIN_RECENT_INSTALLS)
@patch.object(model, 'get_install_timeline_data', return_value=MOCK_DF.copy())
def test_get_metrics_nonempty_zero_limit(self, mock_get_install_timeline_data, mock_get_recent_activity_data,
Expand All @@ -83,13 +96,14 @@ def test_get_metrics_nonempty_zero_limit(self, mock_get_install_timeline_data, m
total_installs=sum(MOCK_INSTALLS),
installs_in_last_30_days=25,
latest_commit=MOCK_PLUGIN_LATEST_COMMIT,
total_commit=MOCK_PLUGIN_TOTAL_COMMIT,
commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY
total_commit=MOCK_PLUGIN_TOTAL_COMMIT_EMPTY,
commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT
)
self._verify_results('0', expected, mock_get_commit_activity, mock_get_latest_commit, mock_get_install_timeline_data, mock_get_recent_activity_data)
self._verify_results('0', expected, mock_get_commit_activity, mock_get_latest_commit,
mock_get_install_timeline_data, mock_get_recent_activity_data)

@patch.object(model, 'get_latest_commit', return_value=MOCK_PLUGIN_LATEST_COMMIT)
@patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY)
@patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT)
@patch.object(model, 'get_recent_activity_data', return_value=MOCK_PLUGIN_RECENT_INSTALLS)
@patch.object(model, 'get_install_timeline_data', return_value=MOCK_DF.copy())
def test_get_metrics_nonempty_invalid_limit(self, mock_get_install_timeline_data, mock_get_recent_activity_data,
Expand All @@ -98,27 +112,30 @@ def test_get_metrics_nonempty_invalid_limit(self, mock_get_install_timeline_data
total_installs=sum(MOCK_INSTALLS),
installs_in_last_30_days=25,
latest_commit=MOCK_PLUGIN_LATEST_COMMIT,
total_commit=MOCK_PLUGIN_TOTAL_COMMIT,
commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY
total_commit=MOCK_PLUGIN_TOTAL_COMMIT_EMPTY,
commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT
)
self._verify_results('foo', expected, mock_get_commit_activity, mock_get_latest_commit, mock_get_install_timeline_data, mock_get_recent_activity_data)
self._verify_results('foo', expected, mock_get_commit_activity, mock_get_latest_commit,
mock_get_install_timeline_data, mock_get_recent_activity_data)

@patch.object(model, 'get_latest_commit', return_value=MOCK_PLUGIN_LATEST_COMMIT)
@patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY)
@patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT)
@patch.object(model, 'get_recent_activity_data', return_value=MOCK_PLUGIN_RECENT_INSTALLS)
@patch.object(model, 'get_install_timeline_data', return_value=MOCK_DF.copy())
def test_get_metrics_nonempty_negative_limit(self, mock_get_install_timeline_data, mock_get_recent_activity_data,
mock_get_commit_activity, mock_get_latest_commit):
mock_get_commit_activity, mock_get_latest_commit):
expected = generate_expected_metrics(
total_installs=sum(MOCK_INSTALLS),
installs_in_last_30_days=25,
latest_commit=MOCK_PLUGIN_LATEST_COMMIT,
total_commit=MOCK_PLUGIN_TOTAL_COMMIT,
commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY
total_commit=MOCK_PLUGIN_TOTAL_COMMIT_EMPTY,
commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT
)
self._verify_results('-5', expected, mock_get_commit_activity, mock_get_latest_commit, mock_get_install_timeline_data, mock_get_recent_activity_data)
self._verify_results('-5', expected, mock_get_commit_activity, mock_get_latest_commit,
mock_get_install_timeline_data, mock_get_recent_activity_data)

def _verify_results(self, limit, expected, mock_get_commit_activity, mock_get_latest_commit, mock_get_install_timeline_data, mock_get_recent_activity_data):
def _verify_results(self, limit, expected, mock_get_commit_activity, mock_get_latest_commit,
mock_get_install_timeline_data, mock_get_recent_activity_data):
from api.model import get_metrics_for_plugin
result = get_metrics_for_plugin(PLUGIN_NAME, limit, False)
self.assertEqual(expected, result)
Expand All @@ -131,13 +148,14 @@ def _verify_results(self, limit, expected, mock_get_commit_activity, mock_get_la
class TestMetricModel:
@pytest.mark.parametrize('commit_activity_input, commit_activity_result, latest_commit, total_commit, total_installs,'
'recent_installs, timeline, limit', [
(MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY, MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY, None, MOCK_PLUGIN_TOTAL_COMMIT_EMPTY, 0, 0, generate_expected_timeline(-3, to_installs=lambda i: 0), '3'),
(MOCK_PLUGIN_COMMIT_ACTIVITY, MOCK_PLUGIN_COMMIT_ACTIVITY, MOCK_PLUGIN_LATEST_COMMIT, MOCK_PLUGIN_TOTAL_COMMIT, 25, 21, generate_expected_timeline(-3), '3'),
(MOCK_PLUGIN_COMMIT_ACTIVITY, MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY, MOCK_PLUGIN_LATEST_COMMIT, MOCK_PLUGIN_TOTAL_COMMIT, 25, 21, [], '0'),
(MOCK_PLUGIN_COMMIT_ACTIVITY, MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY, MOCK_PLUGIN_LATEST_COMMIT, MOCK_PLUGIN_TOTAL_COMMIT, 25, 21, [], 'foo'),
(MOCK_PLUGIN_COMMIT_ACTIVITY, MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY, MOCK_PLUGIN_LATEST_COMMIT, MOCK_PLUGIN_TOTAL_COMMIT, 25, 21, [], '-5'),
(MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY, MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY, None, MOCK_PLUGIN_TOTAL_COMMIT_EMPTY, 0, 0, generate_expected_usage_timeline(-3, to_installs=lambda i: 0), '3'),
(MOCK_PLUGIN_COMMIT_ACTIVITY_NONEMPTY, MOCK_PLUGIN_COMMIT_ACTIVITY_NONEMPTY, MOCK_PLUGIN_LATEST_COMMIT, MOCK_PLUGIN_TOTAL_COMMIT_NONEMPTY, 25, 21, generate_expected_usage_timeline(-3), '3'),
(MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT, MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT, MOCK_PLUGIN_LATEST_COMMIT, MOCK_PLUGIN_TOTAL_COMMIT_EMPTY, 25, 21, [], '0'),
(MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT, MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT, MOCK_PLUGIN_LATEST_COMMIT, MOCK_PLUGIN_TOTAL_COMMIT_EMPTY, 25, 21, [], 'foo'),
(MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT, MOCK_PLUGIN_COMMIT_ACTIVITY_INVALID_LIMIT, MOCK_PLUGIN_LATEST_COMMIT, MOCK_PLUGIN_TOTAL_COMMIT_EMPTY, 25, 21, [], '-5'),
])
def test_metrics_api_using_dynamo(self, monkeypatch, commit_activity_input, commit_activity_result, latest_commit, total_commit, total_installs, recent_installs, timeline, limit):
def test_metrics_api_using_dynamo(self, monkeypatch, commit_activity_input, commit_activity_result, latest_commit,
total_commit, total_installs, recent_installs, timeline, limit):
monkeypatch.setattr(model, 'get_commit_activity', self._validate_args_return_value(commit_activity_input))
monkeypatch.setattr(model, 'get_latest_commit', self._validate_args_return_value(latest_commit))
monkeypatch.setattr(model.InstallActivity, 'get_total_installs', self._validate_args_return_value(total_installs))
Expand Down
17 changes: 13 additions & 4 deletions backend/api/_tests/test_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,18 @@
BASE = datetime.today().date().replace(day=1)


def generate_expected_timeline(start_range,
timestamp_key='timestamp',
installs_key='installs',
to_installs=lambda i: 2 if i % 2 == 0 else 0):
def generate_expected_usage_timeline(start_range,
timestamp_key='timestamp',
installs_key='installs',
to_installs=lambda i: 2 if i % 2 == 0 else 0):
to_timestamp = lambda i: int(pd.Timestamp(BASE + relativedelta(months=i)).timestamp()) * 1000
return [{timestamp_key: to_timestamp(i), installs_key: to_installs(i)} for i in range(start_range, 0)]


def generate_expected_maintenance_timeline(start_range,
timestamp_key='timestamp',
commits_key='commits',
to_commits=lambda i: 3 if i % 2 == 0 else 1):
to_timestamp = lambda i: int(pd.Timestamp(BASE + relativedelta(months=i)).timestamp()) * 1000
return [{timestamp_key: to_timestamp(i), commits_key: to_commits(i)} for i in range(start_range, 0)]

44 changes: 33 additions & 11 deletions backend/api/model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from concurrent import futures
from datetime import datetime
from datetime import date, datetime
import json
import os
from typing import Tuple, Dict, List, Callable, Any
Expand All @@ -18,7 +18,6 @@
from api.zulip import notify_new_packages
import boto3
import snowflake.connector as sc
from datetime import date
from dateutil.relativedelta import relativedelta

index_subset = {'name', 'summary', 'description_text', 'description_content_type',
Expand Down Expand Up @@ -430,11 +429,16 @@ def _update_activity_timeline_data():
write_data(csv_string, "activity_dashboard_data/plugin_installs.csv")


def _process_for_timeline(plugin_df, limit):
date_format = '%Y-%m-%d'
def _process_for_dates(limit):
end_date = date.today().replace(day=1) + relativedelta(months=-1)
start_date = end_date + relativedelta(months=-limit + 1)
dates = pd.date_range(start=start_date, periods=limit, freq='MS')
return start_date, end_date, dates


def _process_usage_timeline(plugin_df, limit):
date_format = '%Y-%m-%d'
start_date, end_date, dates = _process_for_dates(limit)
plugin_df = plugin_df[(plugin_df['MONTH'] >= start_date.strftime(date_format)) & (
plugin_df['MONTH'] <= end_date.strftime(date_format))]
result = []
Expand All @@ -448,6 +452,22 @@ def _process_for_timeline(plugin_df, limit):
return result


def _process_maintenance_timeline(commit_activity, limit):
start_date, end_date, dates = _process_for_dates(limit)
maintenance_dict = {}
for commit_obj in commit_activity:
commit_obj_datetime = datetime.utcfromtimestamp(commit_obj['timestamp'] / 1000)
if start_date <= commit_obj_datetime.date() <= end_date:
maintenance_dict[commit_obj_datetime] = commit_obj
result = []
for cur_date in dates:
if cur_date in maintenance_dict:
result.append(maintenance_dict[cur_date])
else:
result.append({'timestamp': int(cur_date.timestamp()) * 1000, 'commits': 0})
return result


def _process_for_stats(plugin_df):
if len(plugin_df) == 0:
return {}
Expand Down Expand Up @@ -483,7 +503,7 @@ def _update_recent_activity_data(number_of_time_periods=30, time_granularity='DA
def _update_repo_to_plugin_dict(repo_to_plugin_dict: dict, plugin_obj: dict):
code_repository = plugin_obj.get('code_repository')
if code_repository:
repo_to_plugin_dict[code_repository.replace('https://github.com/', '')] = plugin_obj['name']
repo_to_plugin_dict[code_repository.replace('https://github.com/', '')] = plugin_obj['name'].lower()
return repo_to_plugin_dict


Expand Down Expand Up @@ -524,7 +544,8 @@ def _update_latest_commits(repo_to_plugin_dict):
repo = row[0]
if repo in repo_to_plugin_dict:
plugin = repo_to_plugin_dict[repo]
data[plugin] = int(pd.to_datetime(row[1]).strftime("%s")) * 1000
timestamp = int(pd.to_datetime(row[1]).strftime("%s")) * 1000
data[plugin] = timestamp
write_data(json.dumps(data), "activity_dashboard_data/latest_commits.json")


Expand All @@ -547,9 +568,10 @@ def _update_commit_activity(repo_to_plugin_dict):
for cursor in cursor_list:
for repo, month, commit_count in cursor:
if repo in repo_to_plugin_dict:
plugin = repo_to_plugin_dict[repo]
timestamp = int(pd.to_datetime(month).strftime("%s")) * 1000
commits = int(commit_count)
data.setdefault(repo_to_plugin_dict[repo], []).append({'timestamp': timestamp, 'commits': commits})
data.setdefault(plugin, []).append({'timestamp': timestamp, 'commits': commits})
for plugin in data:
data[plugin] = sorted(data[plugin], key=lambda x: (x['timestamp']))
write_data(json.dumps(data), "activity_dashboard_data/commit_activity.json")
Expand All @@ -565,7 +587,7 @@ def _get_usage_data(plugin: str, limit: int, use_dynamo: bool) -> Dict[str, Any]
:params bool use_dynamo: Fetch data from dynamo if True, else fetch from s3.
"""
if use_dynamo:
timeline = InstallActivity.get_timeline(plugin, limit) if limit else []
usage_timeline = InstallActivity.get_timeline(plugin, limit) if limit else []
usage_stats = {
'total_installs': InstallActivity.get_total_installs(plugin),
'installs_in_last_30_days': InstallActivity.get_recent_installs(plugin, 30)
Expand All @@ -576,9 +598,9 @@ def _get_usage_data(plugin: str, limit: int, use_dynamo: bool) -> Dict[str, Any]
'total_installs': _process_for_stats(data).get('totalInstalls', 0),
'installs_in_last_30_days': get_recent_activity_data().get(plugin, 0),
}
timeline = _process_for_timeline(data, limit) if limit else []
usage_timeline = _process_usage_timeline(data, limit) if limit else []

return {'timeline': timeline, 'stats': usage_stats, }
return {'timeline': usage_timeline, 'stats': usage_stats, }


def get_metrics_for_plugin(plugin: str, limit: str, use_dynamo_for_usage: bool) -> Dict[str, Any]:
Expand All @@ -598,7 +620,7 @@ def get_metrics_for_plugin(plugin: str, limit: str, use_dynamo_for_usage: bool)

if limit.isdigit() and limit != '0':
month_delta = max(int(limit), 0)
maintenance_timeline = commit_activity[-month_delta:]
maintenance_timeline = _process_maintenance_timeline(commit_activity, int(limit))

maintenance_stats = {
'latest_commit_timestamp': get_latest_commit(plugin),
Expand Down
6 changes: 3 additions & 3 deletions backend/api/models/_tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest
from dateutil.relativedelta import relativedelta

from api._tests.test_fixtures import generate_expected_timeline
from api._tests.test_fixtures import generate_expected_usage_timeline
from api.models.metrics import InstallActivity

PLUGIN_NAME = 'foo'
Expand Down Expand Up @@ -80,8 +80,8 @@ def test_get_recent_installs(self, monkeypatch, results, expected):

@pytest.mark.parametrize('results, month_delta, expected', [
([], 0, []),
([], 1, generate_expected_timeline(-1, to_installs=lambda i: 0)),
(generate_expected_results(-4), 4, generate_expected_timeline(-4)),
([], 1, generate_expected_usage_timeline(-1, to_installs=lambda i: 0)),
(generate_expected_results(-4), 4, generate_expected_usage_timeline(-4)),
])
def test_get_timeline(self, monkeypatch, results, month_delta, expected):
mock_install_activity = Mock(return_value=results)
Expand Down

0 comments on commit 129aee8

Please sign in to comment.