From c5b42939c1a8279982d4b5d514b3c39f16531cc5 Mon Sep 17 00:00:00 2001 From: Nikolay Donets Date: Sat, 6 Jul 2024 20:40:30 +0100 Subject: [PATCH] refactor: update ruff lint rules and fix tests The old ` numpy.math ` module is replaced with `math`. Added numpy and doc-specific lint rules. Tests use `numpy.testing` instead of standard asserts with rounds --- pyentrp/entropy.py | 65 ++++++++++++++++++++++--------------------- pyproject.toml | 13 ++++++++- tests/test_entropy.py | 58 +++++++++++++++++++------------------- 3 files changed, 73 insertions(+), 63 deletions(-) diff --git a/pyentrp/entropy.py b/pyentrp/entropy.py index fd6dfd4..fdb2a32 100644 --- a/pyentrp/entropy.py +++ b/pyentrp/entropy.py @@ -1,11 +1,11 @@ +import math from collections import Counter import numpy as np -def time_delay_embedding(time_series, embedding_dimension, delay=1): - """ - Time-delayed embedding. +def time_delay_embedding(time_series, embedding_dimension, delay): + """Calculate time-delayed embedding. Parameters ---------- @@ -20,6 +20,7 @@ def time_delay_embedding(time_series, embedding_dimension, delay=1): ------- embedded : ndarray The embedded time series with shape (n_times - (order - 1) * delay, order). + """ series_length = len(time_series) embedded_series = np.empty((embedding_dimension, series_length - (embedding_dimension - 1) * delay)) @@ -29,8 +30,7 @@ def time_delay_embedding(time_series, embedding_dimension, delay=1): def util_pattern_space(time_series, lag, dim): - """ - Create a set of sequences with given lag and dimension + """Create a set of sequences with a given lag and dimension. Parameters ---------- @@ -49,6 +49,7 @@ def util_pattern_space(time_series, lag, dim): Raises ------ ValueError: If the lag is less than 1 or the result matrix exceeds the size limit. + """ n = len(time_series) @@ -66,8 +67,7 @@ def util_pattern_space(time_series, lag, dim): def util_granulate_time_series(time_series, scale): - """ - Extract coarse-grained time series + """Extract coarse-grained time series. Parameters ---------- @@ -79,7 +79,8 @@ def util_granulate_time_series(time_series, scale): Returns ------- cts : np.ndarray - Array of coarse-grained time series with given scale factor + Array of coarse-grained time series with a given scale factor + """ if not isinstance(time_series, np.ndarray): time_series = np.array(time_series) @@ -91,8 +92,7 @@ def util_granulate_time_series(time_series, scale): def shannon_entropy(time_series): - """ - Return the Shannon Entropy of the sample data. + """Calculate Shannon Entropy of the sample data. Parameters ---------- @@ -103,6 +103,7 @@ def shannon_entropy(time_series): ------- ent: float The Shannon Entropy as float value + """ if isinstance(time_series, str): # Calculate frequency counts @@ -130,11 +131,10 @@ def shannon_entropy(time_series): def sample_entropy(time_series, sample_length, tolerance=None): - """ - Calculates the sample entropy of degree m of a time_series. + """Calculate the sample entropy of degree m of a time_series. This method uses Chebyshev norm. - It is quite fast for random data, but can be slower is there is + It is quite fast for random data but can be slower is there is structure in the input time series. Parameters @@ -150,7 +150,7 @@ def sample_entropy(time_series, sample_length, tolerance=None): ------- sampen: np.ndarray Array of Sample Entropies SE. - SE[k] is ratio `#templates of length k+1` / `#templates of length k` + SE[k] is the ratio `#templates of length k+1` / `#templates of length k` where `#templates of length 0` = n*(n - 1) / 2, by definition Notes @@ -162,6 +162,7 @@ def sample_entropy(time_series, sample_length, tolerance=None): .. [1] http://en.wikipedia.org/wiki/Sample_Entropy .. [2] http://physionet.incor.usp.br/physiotools/sampen/ .. [3] Madalena Costa, Ary Goldberger, CK Peng. Multiscale entropy analysis of biological signals + """ if not isinstance(time_series, np.ndarray): time_series = np.array(time_series) @@ -177,7 +178,7 @@ def sample_entropy(time_series, sample_length, tolerance=None): # N_temp[k] holds matches templates of length k N_temp = np.zeros(sample_length + 2) - # Templates of length 0 matches by definition: + # Templates of length 0 match by definition: N_temp[0] = n * (n - 1) / 2 for i in range(n - sample_length - 1): @@ -195,9 +196,7 @@ def sample_entropy(time_series, sample_length, tolerance=None): def multiscale_entropy(time_series, sample_length, tolerance=None, maxscale=None): - """ - Calculate the Multiscale Entropy of the given time series considering - different time-scales of the time series. + """Calculate Multiscale Entropy considering different time-scales of the time series. Parameters ---------- @@ -219,6 +218,7 @@ def multiscale_entropy(time_series, sample_length, tolerance=None, maxscale=None ---------- .. [1] http://en.pudn.com/downloads149/sourcecode/math/detail646216_en.html Can be viewed at https://web.archive.org/web/20170207221539/http://en.pudn.com/downloads149/sourcecode/math/detail646216_en.html + """ if tolerance is None: tolerance = 0.1 * np.std(time_series) @@ -234,12 +234,11 @@ def multiscale_entropy(time_series, sample_length, tolerance=None, maxscale=None def permutation_entropy(time_series, order=3, delay=1, normalize=False): - """ - Permutation Entropy. + """Calculate Permutation Entropy. Parameters ---------- - time_series : list | np.array + time_series : list | np.ndarray Time series order : int Order of permutation entropy @@ -286,6 +285,7 @@ def permutation_entropy(time_series, order=3, delay=1, normalize=False): >>> # Return a value comprised between 0 and 1. >>> print(permutation_entropy(x, order=3, normalize=True)) 0.589 + """ x = np.array(time_series) hashmult = np.power(order, np.arange(order)) @@ -299,13 +299,13 @@ def permutation_entropy(time_series, order=3, delay=1, normalize=False): p = np.true_divide(c, c.sum()) pe = -np.multiply(p, np.log2(p)).sum() if normalize: - pe /= np.log2(np.math.factorial(order)) + factorial = math.factorial(order) + pe /= np.log2(factorial) return pe def multiscale_permutation_entropy(time_series, m, delay, scale): - """ - Calculate the Multiscale Permutation Entropy + """Calculate the Multiscale Permutation Entropy. Parameters ---------- @@ -328,6 +328,7 @@ def multiscale_permutation_entropy(time_series, m, delay, scale): .. [1] Francesco Carlo Morabito et al. Multivariate Multi-Scale Permutation Entropy for Complexity Analysis of Alzheimer`s Disease EEG. www.mdpi.com/1099-4300/14/7/1186 .. [2] http://www.mathworks.com/matlabcentral/fileexchange/37288-multiscale-permutation-entropy-mpe/content/MPerm.m + """ mspe = np.empty(scale) for i in range(scale): @@ -337,11 +338,10 @@ def multiscale_permutation_entropy(time_series, m, delay, scale): def weighted_permutation_entropy(time_series, order=2, delay=1, normalize=False): - """ - Calculate the weighted permutation entropy. Weighted permutation - entropy captures the information in the amplitude of a signal where - standard permutation entropy only measures the information in the - ordinal pattern, "motif". + """Calculate the weighted permutation entropy. + + Weighted permutation entropy captures the information in the amplitude of a signal where + standard permutation entropy only measures the information in the ordinal pattern, "motif". Parameters ---------- @@ -385,6 +385,7 @@ def weighted_permutation_entropy(time_series, order=2, delay=1, normalize=False) >>> # Return a value comprised between 0 and 1. >>> print(permutation_entropy(x, order=3, normalize=True)) 0.547 + """ x = time_delay_embedding(time_series, embedding_dimension=order, delay=delay) @@ -406,14 +407,13 @@ def weighted_permutation_entropy(time_series, order=2, delay=1, normalize=False) wpe = -np.dot(pw, b) if normalize: - wpe /= np.log2(np.math.factorial(order)) + wpe /= np.log2(math.factorial(order)) return wpe def composite_multiscale_entropy(time_series, sample_length, scale, tolerance=None): - """ - Composite Multiscale Entropy of the given time series + """Calculate Composite Multiscale Entropy. Parameters ---------- @@ -435,6 +435,7 @@ def composite_multiscale_entropy(time_series, sample_length, scale, tolerance=No ---------- .. [1] Wu, Shuen-De, et al. "Time series analysis using composite multiscale entropy." Entropy 15.3 (2013): 1069-1084. + """ if tolerance is None: tolerance = 0.1 * np.std(time_series) diff --git a/pyproject.toml b/pyproject.toml index da8250b..2b557c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,11 +15,11 @@ numpy = ">=1.26,<3.0" [tool.poetry.group.dev.dependencies] -ruff = ">=0.5.0,<0.5.1" black = ">=23.3,<25.0" codecov = "^2.1.13" commitizen = "^3.2.2" pre-commit = "^3.3.2" +ruff = ">=0.5.0,<0.5.1" [tool.commitizen] @@ -102,4 +102,15 @@ select = [ "PGH", # Ruff-specific rules "RUF", + # NumPy-specific rules + "NPY", + # pydocstyle + "D", ] +ignore = [ + "D100", "D104", + "D203", "D213" +] + +[tool.ruff.lint.per-file-ignores] +"**/{tests}/*" = ["D101", "D102", "D103"] diff --git a/tests/test_entropy.py b/tests/test_entropy.py index 4c28825..c130e07 100644 --- a/tests/test_entropy.py +++ b/tests/test_entropy.py @@ -4,8 +4,7 @@ from pyentrp import entropy as ent -np.random.seed(1234567) - +rng = np.random.default_rng(1234567) TIME_SERIES = [1, 1, 1, 2, 3, 4, 5] TIME_SERIES_STRING = "1112345" @@ -22,15 +21,15 @@ PERM_ENTROPY_BANDT = [4, 7, 9, 10, 6, 11, 3] -RANDOM_TIME_SERIES = np.random.rand(1000) +RANDOM_TIME_SERIES = rng.random(1000) class TestEntropy(unittest.TestCase): def test_shannon_entropy_string(self): - self.assertEqual(round(ent.shannon_entropy(TIME_SERIES_STRING), 5), SHANNON_ENTROPY) + np.testing.assert_allclose(ent.shannon_entropy(TIME_SERIES_STRING), SHANNON_ENTROPY, rtol=1e-5) def test_shannon_entropy_numerical(self): - self.assertEqual(round(ent.shannon_entropy(TIME_SERIES), 5), SHANNON_ENTROPY) + np.testing.assert_allclose(ent.shannon_entropy(TIME_SERIES), SHANNON_ENTROPY, rtol=1e-5) def test_sample_entropy(self): ts = TS_SAMPLE_ENTROPY @@ -42,49 +41,48 @@ def test_multiscale_entropy(self): multi_scale_entropy = ent.multiscale_entropy(RANDOM_TIME_SERIES, 4, maxscale=4) np.testing.assert_allclose( multi_scale_entropy, - np.array([2.52572864, 2.31911439, 1.65292302, 1.86075234]), + np.array([3.178054, 3.178054, 2.890372, 3.401197]), + rtol=1e-6, ) def test_permutation_entropy(self): - self.assertEqual( - np.round(ent.permutation_entropy(PERM_ENTROPY_BANDT, order=2, delay=1), 3), + np.testing.assert_allclose( + ent.permutation_entropy(PERM_ENTROPY_BANDT, order=2, delay=1), 0.918, + rtol=1e-3, ) - self.assertEqual( - np.round(ent.permutation_entropy(PERM_ENTROPY_BANDT, order=3, delay=1), 3), + + np.testing.assert_allclose( + ent.permutation_entropy(PERM_ENTROPY_BANDT, order=3, delay=1), 1.522, + rtol=1e-3, ) + # Assert that a fully random vector has an entropy of 0.99999... - self.assertEqual( - np.round( - ent.permutation_entropy(RANDOM_TIME_SERIES, order=3, delay=1, normalize=True), - 3, - ), + np.testing.assert_allclose( + ent.permutation_entropy(RANDOM_TIME_SERIES, order=3, delay=1, normalize=True), 0.999, + rtol=1e-3, ) def test_weighted_permutation_entropy(self): - self.assertEqual( - np.round( - ent.weighted_permutation_entropy(PERM_ENTROPY_BANDT, order=2, delay=1), - 3, - ), + np.testing.assert_allclose( + ent.weighted_permutation_entropy(PERM_ENTROPY_BANDT, order=2, delay=1), 0.913, + rtol=1e-3, ) - self.assertEqual( - np.round( - ent.weighted_permutation_entropy(PERM_ENTROPY_BANDT, order=3, delay=1), - 3, - ), + + np.testing.assert_allclose( + ent.weighted_permutation_entropy(PERM_ENTROPY_BANDT, order=3, delay=1), 1.414, + rtol=1e-3, ) + # Assert that a fully random vector has an entropy of 0.99999... - self.assertEqual( - np.round( - ent.weighted_permutation_entropy(RANDOM_TIME_SERIES, order=3, delay=1, normalize=True), - 3, - ), + np.testing.assert_allclose( + ent.weighted_permutation_entropy(RANDOM_TIME_SERIES, order=3, delay=1, normalize=True), 0.999, + rtol=1e-3, ) def test_multiscale_permutation_entropy(self):