Skip to content

Commit

Permalink
return dataframe for fixed percentile thresholds
Browse files Browse the repository at this point in the history
  • Loading branch information
elbeejay committed Aug 18, 2023
1 parent a05a339 commit 0a2502e
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 13 deletions.
20 changes: 13 additions & 7 deletions hyswap/percentiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ def calculate_fixed_percentile_thresholds(
Returns
-------
percentiles : array_like
Percentiles of the data.
percentiles : pandas.DataFrame
Percentiles of the data in a DataFrame so the thresholds and
percentile values are tied together.
Examples
--------
Expand All @@ -45,7 +46,8 @@ def calculate_fixed_percentile_thresholds(
>>> results = percentiles.calculate_fixed_percentile_thresholds(
... data, method='linear')
>>> results
array([ 0., 5., 10., 25., 75., 90., 95., 100.])
thresholds 0 5 10 25 75 90 95 100
values 0.0 5.0 10.0 25.0 75.0 90.0 95.0 100.0
Calculate a different set of thresholds from some synthetic data.
Expand All @@ -55,9 +57,13 @@ def calculate_fixed_percentile_thresholds(
>>> results = percentiles.calculate_fixed_percentile_thresholds(
... data, percentiles=np.array((0, 10, 50, 90, 100)))
>>> results
array([ 0. , 9.2, 50. , 90.8, 100. ])
thresholds 0 10 50 90 100
values 0.0 9.2 50.0 90.8 100.0
"""
return np.percentile(data, percentiles, method=method, **kwargs)
pct = np.percentile(data, percentiles, method=method, **kwargs)
df = pd.DataFrame(data={"values": pct}, index=percentiles).T
df = df.rename_axis("thresholds", axis="columns")
return df


def calculate_variable_percentile_thresholds_by_day(
Expand Down Expand Up @@ -163,9 +169,9 @@ def calculate_variable_percentile_thresholds_by_day(
# only calculate data if there are at least min_years of data
if meta['n_years'] >= min_years:
# calculate percentiles for the day of year and add to DataFrame
percentiles_by_day.loc[t_idx == doy, :] = \
calculate_fixed_percentile_thresholds(
_pct = calculate_fixed_percentile_thresholds(
data, percentiles=percentiles, method=method, **kwargs)
percentiles_by_day.loc[t_idx == doy, :] = _pct.values.tolist()[0]
else:
# if there are not at least 10 years of data,
# set percentiles to NaN
Expand Down
18 changes: 12 additions & 6 deletions tests/test_percentiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,28 @@ def test_calculate_fixed_percentile_thresholds_defaults(self):
# test the function
percentiles_ = percentiles.calculate_fixed_percentile_thresholds(
self.data, method='linear')
assert percentiles_.shape == (8,)
assert percentiles_ == pytest.approx((0, 5, 10, 25, 75, 90, 95, 100))
assert percentiles_.shape == (1, 8)
assert percentiles_.columns.tolist() == [0, 5, 10, 25, 75, 90, 95, 100]
assert percentiles_.values.tolist()[0] == [
0.0, 5.0, 10.0, 25.0, 75.0, 90.0, 95.0, 100.0]

def test_custom_percentiles(self):
# set some percentile values as opposed to the defaults
percentiles_ = percentiles.calculate_fixed_percentile_thresholds(
self.data, percentiles=np.array((0, 10, 50, 90, 100)))
assert percentiles_.shape == (5,)
assert percentiles_ == pytest.approx((0, 9.2, 50, 90.8, 100))
assert percentiles_.shape == (1, 5)
assert percentiles_.columns.tolist() == [0, 10, 50, 90, 100]
assert percentiles_.values.tolist()[0] == [
0.0, 9.200000000000001, 50.0, 90.80000000000001, 100.0]

def test_kwargs_to_percentile(self):
# pass kwarg through to np.percentile
percentiles_ = percentiles.calculate_fixed_percentile_thresholds(
self.data, method='lower')
assert percentiles_.shape == (8,)
assert percentiles_ == pytest.approx((0, 5, 10, 25, 75, 90, 95, 100))
assert percentiles_.shape == (1, 8)
assert percentiles_.columns.tolist() == [0, 5, 10, 25, 75, 90, 95, 100]
assert percentiles_.values.tolist()[0] == [
0, 5, 10, 25, 75, 90, 95, 100]


class TestCalculateVariablePercentileThresholdsByDay:
Expand Down

0 comments on commit 0a2502e

Please sign in to comment.