Merge branch 'main' into feat/nan-support

predict-idlab · Jan 23, 2024 · 45fc0d3 · 45fc0d3
2 parents e285c1d + e4d4a66
commit 45fc0d3
Show file tree

Hide file tree

Showing 21 changed files with 345 additions and 645 deletions.
diff --git a/.github/workflows/ci-tsdownsample.yml b/.github/workflows/ci-tsdownsample.yml
@@ -46,7 +46,7 @@ jobs:
       matrix:
         os: ['windows-latest', 'macOS-latest', 'ubuntu-latest']
         rust: ['nightly']  # ['stable', 'beta']
-        python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
+        python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12']
 
     env:
       PYTHON: ${{ matrix.python-version }}
@@ -155,7 +155,7 @@ jobs:
         target: ${{ matrix.target }}
         manylinux: ${{ matrix.manylinux || 'auto' }}
         container: ${{ matrix.container }}
-        args: --release --out dist --interpreter ${{ matrix.interpreter || '3.7 3.8 3.9 3.10 3.11' }}
+        args: --release --out dist --interpreter ${{ matrix.interpreter || '3.7 3.8 3.9 3.10 3.11 3.12' }}
 
     - run: ${{ matrix.ls || 'ls -lh' }} dist/
 

diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml
@@ -43,6 +43,6 @@ jobs:
       # - run: rm tests/__init__.py
 
       - name: Run CodSpeed benchmarks
-        uses: CodSpeedHQ/action@v1
+        uses: CodSpeedHQ/action@v2
         with:
           run: pytest tests/benchmarks/ --codspeed
diff --git a/Cargo.toml b/Cargo.toml
@@ -9,10 +9,10 @@ license = "MIT"
 
 [dependencies]
 downsample_rs = { path = "downsample_rs", features = ["half"]}
-pyo3 = { version = "0.19", features = ["extension-module"] }
-numpy = { version = "0.19", features = ["half"] }
-half = { version = "2.1", default-features = false }
-paste = { version = "1.0.9", default-features = false }
+pyo3 = { version = "0.20", features = ["extension-module"] }
+numpy = { version = "0.20", features = ["half"] }
+half = { version = "2.3.1", default-features = false }
+paste = { version = "1.0.14", default-features = false }
 
 [lib]
 name = "tsdownsample"

diff --git a/README.md b/README.md
@@ -89,9 +89,10 @@ downsample([x], y, n_out, **kwargs) -> ndarray[uint64]
 
 - `x` is optional
 - `x` and `y` are both positional arguments
-- `n_out` is a mandatory keyword argument that defines the number of output values<sup>\*</sup>
-- `**kwargs` are optional keyword arguments _(see [table below](#downsampling-algorithms-📈))_:
-  - `n_threads`: how many threads to use for multi-threading (default `1`, so no multi-threading)
+- `n_out` is a mandatory keyword argument that defines the number of output values<sup>*</sup>
+- `**kwargs` are optional keyword arguments *(see [table below](#downsampling-algorithms-📈))*:
+  - `parallel`: whether to use multi-threading (default: `False`)  
+     ❗ The max number of threads can be configured with the `TSDOWNSAMPLE_MAX_THREADS` ENV var (e.g. `os.environ["TSDOWNSAMPLE_MAX_THREADS"] = "4"`)
   - ...
 
 **Returns**: a `ndarray[uint64]` of indices that can be used to index the original data.
@@ -102,13 +103,12 @@ downsample([x], y, n_out, **kwargs) -> ndarray[uint64]
 
 The following downsampling algorithms (classes) are implemented:
 
-|             Downsampler | Description                                                                                                                                                                                       | `**kwargs`                               |
-| ----------------------: | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------- |
-|     `MinMaxDownsampler` | selects the **min and max** value in each bin                                                                                                                                                     | `n_threads`                              |
-|         `M4Downsampler` | selects the [**min, max, first and last**](https://dl.acm.org/doi/pdf/10.14778/2732951.2732953) value in each bin                                                                                 | `n_threads`                              |
-|       `LTTBDownsampler` | performs the [**Largest Triangle Three Buckets**](https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf) algorithm                                                                            | `n_threads`                              |
-| `MinMaxLTTBDownsampler` | (_new two-step algorithm 🎉_) first selects `n_out` \* `minmax_ratio` **min and max** values, then further reduces these to `n_out` values using the **Largest Triangle Three Buckets** algorithm | `n_threads`, `minmax_ratio`<sup>\*</sup> |
-
+| Downsampler | Description | `**kwargs` |
+| ---:| --- |--- |
+| `MinMaxDownsampler` | selects the **min and max** value in each bin | `parallel` |
+| `M4Downsampler` | selects the [**min, max, first and last**](https://dl.acm.org/doi/pdf/10.14778/2732951.2732953) value in each bin | `parallel` |
+| `LTTBDownsampler` | performs the [**Largest Triangle Three Buckets**](https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf) algorithm | `parallel` |
+| `MinMaxLTTBDownsampler` | (*new two-step algorithm 🎉*) first selects `n_out` * `minmax_ratio` **min and max** values, then further reduces these to `n_out` values using the **Largest Triangle Three Buckets** algorithm | `parallel`, `minmax_ratio`<sup>*</sup> |
 
 <sup>*</sup><i>Default value for `minmax_ratio` is 4, which is empirically proven to be a good default. More details here: https://arxiv.org/abs/2305.00332</i>
 

diff --git a/downsample_rs/Cargo.toml b/downsample_rs/Cargo.toml
@@ -9,15 +9,15 @@ license = "MIT"
 [dependencies]
 # TODO: perhaps use polars?
 argminmax = { version = "0.6.1", features = ["half"] }
-# argminmax = { path = "../../argminmax" , features = ["half", "ndarray"] }
-half = { version = "2.1", default-features = false , features=["num-traits"], optional = true}
-num-traits = { version = "0.2.15", default-features = false }
-rayon = { version = "1.6.0", default-features = false }
+half = { version = "2.3.1", default-features = false , features=["num-traits"], optional = true}
+num-traits = { version = "0.2.17", default-features = false }
+once_cell = "1"
+rayon = { version = "1.8.0", default-features = false }
 
 [dev-dependencies]
-rstest = { version = "0.18.1", default-features = false }
+rstest = { version = "0.18.2", default-features = false }
 rstest_reuse = { version = "0.6", default-features = false }
-criterion = "0.4.0"
+criterion = "0.5.1"
 dev_utils = { path = "dev_utils" }
 
 [[bench]]

diff --git a/downsample_rs/benches/bench_m4.rs b/downsample_rs/benches/bench_m4.rs
@@ -14,15 +14,8 @@ fn m4_f32_random_array_long_single_core(c: &mut Criterion) {
 fn m4_f32_random_array_long_multi_core(c: &mut Criterion) {
     let n = config::ARRAY_LENGTH_LONG;
     let data = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
-    let all_threads: usize = utils::get_all_threads();
     c.bench_function("m4_p_f32", |b| {
-        b.iter(|| {
-            m4_mod::m4_without_x_parallel(
-                black_box(data.as_slice()),
-                black_box(2_000),
-                black_box(all_threads),
-            )
-        })
+        b.iter(|| m4_mod::m4_without_x_parallel(black_box(data.as_slice()), black_box(2_000)))
     });
 }
 
@@ -48,23 +41,15 @@ fn m4_f32_random_array_50M_multi_core(c: &mut Criterion) {
     let n = 50_000_000;
     let data = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
     let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
-    let all_threads: usize = utils::get_all_threads();
     c.bench_function("m4_p_50M_f32", |b| {
-        b.iter(|| {
-            m4_mod::m4_without_x_parallel(
-                black_box(data.as_slice()),
-                black_box(2_000),
-                black_box(all_threads),
-            )
-        })
+        b.iter(|| m4_mod::m4_without_x_parallel(black_box(data.as_slice()), black_box(2_000)))
     });
     c.bench_function("m4_x_p_50M_f32", |b| {
         b.iter(|| {
             m4_mod::m4_with_x_parallel(
                 black_box(x.as_slice()),
                 black_box(data.as_slice()),
                 black_box(2_000),
-                black_box(all_threads),
             )
         })
     });

diff --git a/downsample_rs/benches/bench_minmax.rs b/downsample_rs/benches/bench_minmax.rs
@@ -14,14 +14,9 @@ fn minmax_f32_random_array_long_single_core(c: &mut Criterion) {
 fn minmax_f32_random_array_long_multi_core(c: &mut Criterion) {
     let n = config::ARRAY_LENGTH_LONG;
     let data = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
-    let all_threads: usize = utils::get_all_threads();
     c.bench_function("minmax_p_f32", |b| {
         b.iter(|| {
-            minmax_mod::min_max_without_x_parallel(
-                black_box(data.as_slice()),
-                black_box(2_000),
-                black_box(all_threads),
-            )
+            minmax_mod::min_max_without_x_parallel(black_box(data.as_slice()), black_box(2_000))
         })
     });
 }
@@ -55,14 +50,9 @@ fn minmax_f32_random_array_50M_long_multi_core(c: &mut Criterion) {
     let n = 50_000_000;
     let data = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
     let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
-    let all_threads: usize = utils::get_all_threads();
     c.bench_function("minmax_p_50M_f32", |b| {
         b.iter(|| {
-            minmax_mod::min_max_without_x_parallel(
-                black_box(data.as_slice()),
-                black_box(2_000),
-                black_box(all_threads),
-            )
+            minmax_mod::min_max_without_x_parallel(black_box(data.as_slice()), black_box(2_000))
         })
     });
     c.bench_function("minmax_x_p_50M_f32", |b| {
@@ -71,7 +61,6 @@ fn minmax_f32_random_array_50M_long_multi_core(c: &mut Criterion) {
                 black_box(x.as_slice()),
                 black_box(data.as_slice()),
                 black_box(2_000),
-                black_box(all_threads),
             )
         })
     });

diff --git a/downsample_rs/benches/bench_minmaxlttb.rs b/downsample_rs/benches/bench_minmaxlttb.rs
@@ -25,15 +25,13 @@ fn minmaxlttb_f32_random_array_long_multi_core(c: &mut Criterion) {
     let n = config::ARRAY_LENGTH_LONG;
     let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
-    let all_threads: usize = utils::get_all_threads();
     c.bench_function("mlttb_x_p_f32", |b| {
         b.iter(|| {
             minmaxlttb_mod::minmaxlttb_with_x_parallel(
                 black_box(x.as_slice()),
                 black_box(y.as_slice()),
                 black_box(2_000),
                 black_box(MINMAX_RATIO),
-                black_box(all_threads),
             )
         })
     });
@@ -59,15 +57,13 @@ fn minmaxlttb_f32_random_array_50M_multi_core(c: &mut Criterion) {
     let n = 50_000_000;
     let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
-    let all_threads: usize = utils::get_all_threads();
     c.bench_function("mlttb_x_p_50M_f32", |b| {
         b.iter(|| {
             minmaxlttb_mod::minmaxlttb_with_x_parallel(
                 black_box(x.as_slice()),
                 black_box(y.as_slice()),
                 black_box(2_000),
                 black_box(MINMAX_RATIO),
-                black_box(all_threads),
             )
         })
     });
@@ -90,14 +86,12 @@ fn minmaxlttb_without_x_f32_random_array_50M_single_core(c: &mut Criterion) {
 fn minmaxlttb_without_x_f32_random_array_50M_multi_core(c: &mut Criterion) {
     let n = 50_000_000;
     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
-    let all_threads: usize = utils::get_all_threads();
     c.bench_function("mlttb_p_50M_f32", |b| {
         b.iter(|| {
             minmaxlttb_mod::minmaxlttb_without_x_parallel(
                 black_box(y.as_slice()),
                 black_box(2_000),
                 black_box(MINMAX_RATIO),
-                black_box(all_threads),
             )
         })
     });

diff --git a/downsample_rs/dev_utils/src/utils.rs b/downsample_rs/dev_utils/src/utils.rs
@@ -33,11 +33,3 @@ where
     }
     arr
 }
-
-// ------------- Multi-threading -------------
-
-use std::thread::available_parallelism;
-
-pub fn get_all_threads() -> usize {
-    available_parallelism().map(|x| x.get()).unwrap_or(1)
-}
diff --git a/downsample_rs/src/lib.rs b/downsample_rs/src/lib.rs
@@ -14,3 +14,22 @@ pub use m4::*;
 pub(crate) mod helpers;
 pub(crate) mod searchsorted;
 pub(crate) mod types;
+
+use once_cell::sync::Lazy;
+use rayon::{ThreadPool, ThreadPoolBuilder};
+
+// Inspired by: https://github.com/pola-rs/polars/blob/9a69062aa0beb2a1bc5d57294cac49961fc91058/crates/polars-core/src/lib.rs#L49
+pub static POOL: Lazy<ThreadPool> = Lazy::new(|| {
+    ThreadPoolBuilder::new()
+        .num_threads(
+            std::env::var("TSDOWNSAMPLE_MAX_THREADS")
+                .map(|s| s.parse::<usize>().expect("integer"))
+                .unwrap_or_else(|_| {
+                    std::thread::available_parallelism()
+                        .unwrap_or(std::num::NonZeroUsize::new(1).unwrap())
+                        .get()
+                }),
+        )
+        .build()
+        .expect("could not spawn threads")
+});