Skip to content

Commit

Permalink
Merge branch 'summary-stats' into 'main'
Browse files Browse the repository at this point in the history
Summary statistics

Closes #14

See merge request water/computational-tools/surface-water-work/hyswap!23
  • Loading branch information
elbeejay committed Jul 10, 2023
2 parents 9e2a63e + e85d8a6 commit 0d33964
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 3 deletions.
4 changes: 1 addition & 3 deletions docs/source/meta/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,9 @@ Below are two recommended installation methods for users of the ``hyswap`` packa
Installation via ``pip``
^^^^^^^^^^^^^^^^^^^^^^^^

``hyswap`` is hosted on `pypi <pypiurl>`_ and can therefore be installed using ``pip``.
``hyswap`` is hosted on `pypi <https://pypi.org/project/hyswap/>`_ and can therefore be installed using ``pip``.
This can be done with the following command:

.. _pypiurl: https://pypi.org/project/hyswap/

.. code-block:: bash
pip install hyswap
Expand Down
73 changes: 73 additions & 0 deletions hyswap/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,79 @@ def munge_nwis_stats(df, source_pct_col=None, target_pct_col=None,
return df_slim


def calculate_summary_statistics(df, data_col="00060_Mean"):
"""
Calculate summary statistics for a site.
Parameters
----------
df : pandas.DataFrame
DataFrame containing daily values for the site. Expected to be from
`dataretrieval.nwis.get_dv()`, or similar.
data_col : str, optional
Name of the column in the dv_df DataFrame that contains the data of
interest. Default is "00060_Mean" which is the mean daily discharge
column.
Returns
-------
summary_df : pandas.DataFrame
DataFrame containing summary statistics for the site.
Examples
--------
Get some NWIS data and apply the function to get the summary statistics.
.. doctest::
>>> df, _ = dataretrieval.nwis.get_dv(
... "03586500", parameterCd="00060",
... startDT="2010-01-01", endDT="2010-12-31")
>>> summary_df = utils.calculate_summary_statistics(df)
>>> summary_df.shape
(8, 1)
>>> print(summary_df)
Summary Statistics
Site number 03586500
Begin date 2010-01-01
End date 2010-12-31
Count 365
Minimum 2.48
Mean 207.43
Median 82.5
Maximum 3710.0
"""
# make dictionary
summary_dict = {}
# populate it
# site number
summary_dict['Site number'] = str(int(df['site_no'][0])).zfill(8)
# dates
summary_dict['Begin date'] = df.index.min().strftime('%Y-%m-%d')
summary_dict['End date'] = df.index.max().strftime('%Y-%m-%d')
# count
summary_dict['Count'] = df[data_col].count()
# minimum
summary_dict['Minimum'] = df[data_col].min()
# mean
summary_dict['Mean'] = df[data_col].mean().round(2)
# median
summary_dict['Median'] = df[data_col].median()
# maximum
summary_dict['Maximum'] = df[data_col].max()

# make dataframe
summary_df = pd.DataFrame(summary_dict, index=[0])

# transpose and set column name
summary_df = summary_df.T
summary_df.columns = ['Summary Statistics']

# return dataframe
return summary_df


def set_data_type(data_type):
"""Function to set the data type for rolling averages.
Expand Down
25 changes: 25 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,31 @@ def test_munge_nwis_stats():
assert df_slim.columns.tolist() == [0, 5, 10, 25, 75, 90, 95, 100]


def test_calculate_summary_statistics():
"""Test the calculate_summary_statistics function."""
# make test dataframe
df = pd.DataFrame({
'datetime': pd.date_range('2000-01-01', '2000-01-10'),
'00060_Mean': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'site_no': np.ones(10) * 12345678
})
# set datetime as index
df.set_index('datetime', inplace=True)
# use function
df_stats = utils.calculate_summary_statistics(df, '00060_Mean')
# check output
assert df_stats.shape == (8, 1)
assert df_stats.columns[0] == 'Summary Statistics'
assert df_stats.iloc[0, 0] == '12345678'
assert df_stats.iloc[1, 0] == '2000-01-01'
assert df_stats.iloc[2, 0] == '2000-01-10'
assert df_stats.iloc[3, 0] == 10
assert df_stats.iloc[4, 0] == 1
assert df_stats.iloc[5, 0] == 5.5
assert df_stats.iloc[6, 0] == 5.5
assert df_stats.iloc[7, 0] == 10


def test_set_data_type():
"""Test the function set_data_type."""
assert utils.set_data_type('daily') == 'D'
Expand Down

0 comments on commit 0d33964

Please sign in to comment.