From 0359eba4179226ab97b1ef97b344d5fa4993567d Mon Sep 17 00:00:00 2001 From: Niels Praet <61625814+NielsPraet@users.noreply.github.com> Date: Wed, 2 Aug 2023 14:08:17 +0200 Subject: [PATCH] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20deps:=20update=20argminmax?= =?UTF-8?q?=20to=200.6.1=20(#57)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ⬆️ deps: update argminmax to 0.6.1 * ♻️ refactor: use rust slices in searchsorted.rs * ♻️ refactor: use slices in m4 generic * ♻️ tests: fix m4 refactor * ♻️ tests: refactor lttb to slices * ♻️ tests: refactor minmax to slices * ♻️ tests: refactor minmaxlttb to slices * 🎨 chore: cleanup code * 🐛 fix: fix rust to python bindings * 🎨 chore: remove unused variable * 🐛 fix: fix benchmark code * 🔥 chore: remove Average implementation for ndarray structs * ⏪️ chore: revert changes * ♻️ refactor: remove last dependencies of ndarray * ➖ deps: remove ndarray dependencies * :broom: remove unnecessary f16 import * 🎨 chore: remove useless trait bounds * ♻️ refactor: clean up m4 code * 🏗️ fix: update lib to reflect m4 file changes * ♻️ refactor: clean up minmax code * 🏗️ fix: update lib to reflect minmax file changes * ♻️ refactor: add new minmax file * ✅ tests: fix benchmarks * ✅ tests: fix benchmarks for minmax * ♻️ refactor: refactor minmaxlttb files * 🏗️ refactor: update lib to reflect changes to minmaxlttb files * ✅ tests: fix benchmarks for minmaxlttb * 🏗️ refactor: refactor lttb files * 🏗 refactor: update lib to reflect changes to lttb files * 🐛 fix: rename minmaxlttb mod name * ✅ tests: fix test_rust_mods * 🐛 fix: use new rust modules in python code * :pen: code review --------- Co-authored-by: Jeroen Van Der Donckt --- downsample_rs/Cargo.toml | 3 +- downsample_rs/benches/bench_lttb.rs | 25 +- downsample_rs/benches/bench_m4.rs | 59 ++- downsample_rs/benches/bench_minmax.rs | 75 ++- downsample_rs/benches/bench_minmaxlttb.rs | 73 ++- downsample_rs/dev_utils/Cargo.toml | 1 - downsample_rs/dev_utils/src/utils.rs | 10 +- downsample_rs/src/helpers.rs | 44 +- downsample_rs/src/{lttb/scalar.rs => lttb.rs} | 88 ++-- downsample_rs/src/lttb/mod.rs | 2 - downsample_rs/src/m4.rs | 452 ++++++++++++++++++ downsample_rs/src/m4/generic.rs | 227 --------- downsample_rs/src/m4/mod.rs | 5 - downsample_rs/src/m4/scalar.rs | 263 ---------- downsample_rs/src/m4/simd.rs | 261 ---------- downsample_rs/src/minmax.rs | 431 +++++++++++++++++ downsample_rs/src/minmax/generic.rs | 209 -------- downsample_rs/src/minmax/mod.rs | 5 - downsample_rs/src/minmax/scalar.rs | 263 ---------- downsample_rs/src/minmax/simd.rs | 265 ---------- downsample_rs/src/minmaxlttb.rs | 290 +++++++++++ downsample_rs/src/minmaxlttb/generic.rs | 108 ----- downsample_rs/src/minmaxlttb/mod.rs | 5 - downsample_rs/src/minmaxlttb/scalar.rs | 179 ------- downsample_rs/src/minmaxlttb/simd.rs | 178 ------- downsample_rs/src/searchsorted.rs | 121 ++--- src/lib.rs | 204 ++------ tests/test_rust_mods.py | 8 +- tsdownsample/downsampling_interface.py | 12 +- 29 files changed, 1437 insertions(+), 2429 deletions(-) rename downsample_rs/src/{lttb/scalar.rs => lttb.rs} (69%) delete mode 100644 downsample_rs/src/lttb/mod.rs create mode 100644 downsample_rs/src/m4.rs delete mode 100644 downsample_rs/src/m4/generic.rs delete mode 100644 downsample_rs/src/m4/mod.rs delete mode 100644 downsample_rs/src/m4/scalar.rs delete mode 100644 downsample_rs/src/m4/simd.rs create mode 100644 downsample_rs/src/minmax.rs delete mode 100644 downsample_rs/src/minmax/generic.rs delete mode 100644 downsample_rs/src/minmax/mod.rs delete mode 100644 downsample_rs/src/minmax/scalar.rs delete mode 100644 downsample_rs/src/minmax/simd.rs create mode 100644 downsample_rs/src/minmaxlttb.rs delete mode 100644 downsample_rs/src/minmaxlttb/generic.rs delete mode 100644 downsample_rs/src/minmaxlttb/mod.rs delete mode 100644 downsample_rs/src/minmaxlttb/scalar.rs delete mode 100644 downsample_rs/src/minmaxlttb/simd.rs diff --git a/downsample_rs/Cargo.toml b/downsample_rs/Cargo.toml index afdb78f..9c52eb8 100644 --- a/downsample_rs/Cargo.toml +++ b/downsample_rs/Cargo.toml @@ -8,8 +8,7 @@ license = "MIT" [dependencies] # TODO: perhaps use polars? -ndarray = {version = "0.15.6", default-features = false, features = ["rayon"] } -argminmax = { version = "0.3.1", features = ["half"] } +argminmax = { version = "0.6.1", features = ["half"] } # argminmax = { path = "../../argminmax" , features = ["half", "ndarray"] } half = { version = "2.1", default-features = false , features=["num-traits"], optional = true} num-traits = { version = "0.2.15", default-features = false } diff --git a/downsample_rs/benches/bench_lttb.rs b/downsample_rs/benches/bench_lttb.rs index 1576ea4..9e4eb8c 100644 --- a/downsample_rs/benches/bench_lttb.rs +++ b/downsample_rs/benches/bench_lttb.rs @@ -2,22 +2,33 @@ use downsample_rs::lttb as lttb_mod; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use dev_utils::{config, utils}; -use ndarray::Array1; fn lttb_f32_random_array_long(c: &mut Criterion) { let n = config::ARRAY_LENGTH_LONG; - let x = Array1::from((0..n).map(|i| i as i32).collect::>()); + let x = (0..n).map(|i| i as i32).collect::>(); let y = utils::get_random_array::(n, f32::MIN, f32::MAX); c.bench_function("lttb_scalx_f32", |b| { - b.iter(|| lttb_mod::lttb_with_x(black_box(x.view()), black_box(y.view()), black_box(2_000))) + b.iter(|| { + lttb_mod::lttb_with_x( + black_box(x.as_slice()), + black_box(y.as_slice()), + black_box(2_000), + ) + }) }); } fn lttb_f32_random_array_50m(c: &mut Criterion) { let n = 50_000_000; - let x = Array1::from((0..n).map(|i| i as i32).collect::>()); + let x = (0..n).map(|i| i as i32).collect::>(); let y = utils::get_random_array::(n, f32::MIN, f32::MAX); c.bench_function("lttb_scalx_50M_f32", |b| { - b.iter(|| lttb_mod::lttb_with_x(black_box(x.view()), black_box(y.view()), black_box(2_000))) + b.iter(|| { + lttb_mod::lttb_with_x( + black_box(x.as_slice()), + black_box(y.as_slice()), + black_box(2_000), + ) + }) }); } @@ -25,14 +36,14 @@ fn lttb_without_x_f32_random_array_long(c: &mut Criterion) { let n = config::ARRAY_LENGTH_LONG; let y = utils::get_random_array::(n, f32::MIN, f32::MAX); c.bench_function("lttb_scal_f32", |b| { - b.iter(|| lttb_mod::lttb_without_x(black_box(y.view()), black_box(2_000))) + b.iter(|| lttb_mod::lttb_without_x(black_box(y.as_slice()), black_box(2_000))) }); } fn lttb_without_x_f32_random_array_50m(c: &mut Criterion) { let n = 50_000_000; let y = utils::get_random_array::(n, f32::MIN, f32::MAX); c.bench_function("lttb_scal_50M_f32", |b| { - b.iter(|| lttb_mod::lttb_without_x(black_box(y.view()), black_box(2_000))) + b.iter(|| lttb_mod::lttb_without_x(black_box(y.as_slice()), black_box(2_000))) }); } diff --git a/downsample_rs/benches/bench_m4.rs b/downsample_rs/benches/bench_m4.rs index 3d2b091..11b28ed 100644 --- a/downsample_rs/benches/bench_m4.rs +++ b/downsample_rs/benches/bench_m4.rs @@ -2,16 +2,15 @@ use downsample_rs::m4 as m4_mod; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use dev_utils::{config, utils}; -use ndarray::Array1; fn m4_f32_random_array_long_single_core(c: &mut Criterion) { let n = config::ARRAY_LENGTH_LONG; let data = utils::get_random_array::(n, f32::MIN, f32::MAX); c.bench_function("m4_scal_f32", |b| { - b.iter(|| m4_mod::m4_scalar_without_x(black_box(data.view()), black_box(2_000))) + b.iter(|| m4_mod::m4_without_x(black_box(data.as_slice()), black_box(2_000))) }); c.bench_function("m4_simd_f32", |b| { - b.iter(|| m4_mod::m4_simd_without_x(black_box(data.view()), black_box(2_000))) + b.iter(|| m4_mod::m4_without_x(black_box(data.as_slice()), black_box(2_000))) }); } @@ -21,8 +20,8 @@ fn m4_f32_random_array_long_multi_core(c: &mut Criterion) { let all_threads: usize = utils::get_all_threads(); c.bench_function("m4_scal_p_f32", |b| { b.iter(|| { - m4_mod::m4_scalar_without_x_parallel( - black_box(data.view()), + m4_mod::m4_without_x_parallel( + black_box(data.as_slice()), black_box(2_000), black_box(all_threads), ) @@ -30,8 +29,8 @@ fn m4_f32_random_array_long_multi_core(c: &mut Criterion) { }); c.bench_function("m4_simd_p_f32", |b| { b.iter(|| { - m4_mod::m4_simd_without_x_parallel( - black_box(data.view()), + m4_mod::m4_without_x_parallel( + black_box(data.as_slice()), black_box(2_000), black_box(all_threads), ) @@ -42,27 +41,27 @@ fn m4_f32_random_array_long_multi_core(c: &mut Criterion) { fn m4_f32_random_array_50M_single_core(c: &mut Criterion) { let n = 50_000_000; let data = utils::get_random_array::(n, f32::MIN, f32::MAX); - let x = Array1::from((0..n).map(|i| i as i32).collect::>()); + let x = (0..n).map(|i| i as i32).collect::>(); c.bench_function("m4_scal_50M_f32", |b| { - b.iter(|| m4_mod::m4_scalar_without_x(black_box(data.view()), black_box(2_000))) + b.iter(|| m4_mod::m4_without_x(black_box(data.as_slice()), black_box(2_000))) }); c.bench_function("m4_simd_50M_f32", |b| { - b.iter(|| m4_mod::m4_simd_without_x(black_box(data.view()), black_box(2_000))) + b.iter(|| m4_mod::m4_without_x(black_box(data.as_slice()), black_box(2_000))) }); c.bench_function("m4_scalx_50M_f32", |b| { b.iter(|| { - m4_mod::m4_scalar_with_x( - black_box(x.view()), - black_box(data.view()), + m4_mod::m4_with_x( + black_box(x.as_slice()), + black_box(data.as_slice()), black_box(2_000), ) }) }); c.bench_function("m4_simdx_50M_f32", |b| { b.iter(|| { - m4_mod::m4_simd_with_x( - black_box(x.view()), - black_box(data.view()), + m4_mod::m4_with_x( + black_box(x.as_slice()), + black_box(data.as_slice()), black_box(2_000), ) }) @@ -72,12 +71,12 @@ fn m4_f32_random_array_50M_single_core(c: &mut Criterion) { fn m4_f32_random_array_50M_multi_core(c: &mut Criterion) { let n = 50_000_000; let data = utils::get_random_array::(n, f32::MIN, f32::MAX); - let x = Array1::from((0..n).map(|i| i as i32).collect::>()); + let x = (0..n).map(|i| i as i32).collect::>(); let all_threads: usize = utils::get_all_threads(); c.bench_function("m4_scal_p_50M_f32", |b| { b.iter(|| { - m4_mod::m4_scalar_without_x_parallel( - black_box(data.view()), + m4_mod::m4_without_x_parallel( + black_box(data.as_slice()), black_box(2_000), black_box(all_threads), ) @@ -85,8 +84,8 @@ fn m4_f32_random_array_50M_multi_core(c: &mut Criterion) { }); c.bench_function("m4_simd_p_50M_f32", |b| { b.iter(|| { - m4_mod::m4_simd_without_x_parallel( - black_box(data.view()), + m4_mod::m4_without_x_parallel( + black_box(data.as_slice()), black_box(2_000), black_box(all_threads), ) @@ -94,9 +93,9 @@ fn m4_f32_random_array_50M_multi_core(c: &mut Criterion) { }); c.bench_function("m4_scalx_p_50M_f32", |b| { b.iter(|| { - m4_mod::m4_scalar_with_x_parallel( - black_box(x.view()), - black_box(data.view()), + m4_mod::m4_with_x_parallel( + black_box(x.as_slice()), + black_box(data.as_slice()), black_box(2_000), black_box(all_threads), ) @@ -104,9 +103,9 @@ fn m4_f32_random_array_50M_multi_core(c: &mut Criterion) { }); c.bench_function("m4_simdx_p_50M_f32", |b| { b.iter(|| { - m4_mod::m4_simd_with_x_parallel( - black_box(x.view()), - black_box(data.view()), + m4_mod::m4_with_x_parallel( + black_box(x.as_slice()), + black_box(data.as_slice()), black_box(2_000), black_box(all_threads), ) @@ -118,13 +117,13 @@ fn m4_f32_random_array_50M_multi_core(c: &mut Criterion) { // let n = config::ARRAY_LENGTH_LONG; // let data = utils::get_worst_case_array::(n, 1.0); // c.bench_function("overlap_worst_long_f32", |b| { -// b.iter(|| minmax_mod::min_max_overlap(black_box(data.view()), black_box(2_000))) +// b.iter(|| minmax_mod::min_max_overlap(black_box(data.as_slice()), black_box(2_000))) // }); // c.bench_function("simple_worst_long_f32", |b| { -// b.iter(|| minmax_mod::min_max(black_box(data.view()), black_box(2_000))) +// b.iter(|| minmax_mod::min_max(black_box(data.as_slice()), black_box(2_000))) // }); // c.bench_function("simd_worst_long_f32", |b| { -// b.iter(|| minmax_mod::min_max_simd_f32(black_box(data.view()), black_box(2_000))) +// b.iter(|| minmax_mod::min_max_simd_f32(black_box(data.as_slice()), black_box(2_000))) // }); // } diff --git a/downsample_rs/benches/bench_minmax.rs b/downsample_rs/benches/bench_minmax.rs index 9133385..7311c3c 100644 --- a/downsample_rs/benches/bench_minmax.rs +++ b/downsample_rs/benches/bench_minmax.rs @@ -2,16 +2,15 @@ use downsample_rs::minmax as minmax_mod; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use dev_utils::{config, utils}; -use ndarray::Array1; fn minmax_f32_random_array_long_single_core(c: &mut Criterion) { let n = config::ARRAY_LENGTH_LONG; let data = utils::get_random_array::(n, f32::MIN, f32::MAX); c.bench_function("minmax_scal_f32", |b| { - b.iter(|| minmax_mod::min_max_scalar_without_x(black_box(data.view()), black_box(2_000))) + b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(2_000))) }); c.bench_function("minmax_simd_f32", |b| { - b.iter(|| minmax_mod::min_max_simd_without_x(black_box(data.view()), black_box(2_000))) + b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(2_000))) }); } @@ -21,8 +20,8 @@ fn minmax_f32_random_array_long_multi_core(c: &mut Criterion) { let all_threads: usize = utils::get_all_threads(); c.bench_function("minmax_scal_p_f32", |b| { b.iter(|| { - minmax_mod::min_max_scalar_without_x_parallel( - black_box(data.view()), + minmax_mod::min_max_without_x_parallel( + black_box(data.as_slice()), black_box(2_000), black_box(all_threads), ) @@ -30,8 +29,8 @@ fn minmax_f32_random_array_long_multi_core(c: &mut Criterion) { }); c.bench_function("minmax_simd_p_f32", |b| { b.iter(|| { - minmax_mod::min_max_simd_without_x_parallel( - black_box(data.view()), + minmax_mod::min_max_without_x_parallel( + black_box(data.as_slice()), black_box(2_000), black_box(all_threads), ) @@ -42,55 +41,55 @@ fn minmax_f32_random_array_long_multi_core(c: &mut Criterion) { fn minmax_f32_random_array_50M_single_core(c: &mut Criterion) { let n = 50_000_000; let data = utils::get_random_array::(n, f32::MIN, f32::MAX); - let x = Array1::from((0..n).map(|i| i as i32).collect::>()); + let x = (0..n).map(|i| i as i32).collect::>(); c.bench_function("minmax_scal_50M_f32", |b| { - b.iter(|| minmax_mod::min_max_scalar_without_x(black_box(data.view()), black_box(2_000))) + b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(2_000))) }); c.bench_function("minmax_simd_50M_f32", |b| { - b.iter(|| minmax_mod::min_max_simd_without_x(black_box(data.view()), black_box(2_000))) + b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(2_000))) }); c.bench_function("minmax_scalx_50M_f32", |b| { b.iter(|| { - minmax_mod::min_max_scalar_with_x( - black_box(x.view()), - black_box(data.view()), + minmax_mod::min_max_with_x( + black_box(x.as_slice()), + black_box(data.as_slice()), black_box(2_000), ) }) }); c.bench_function("minmax_simdx_50M_f32", |b| { b.iter(|| { - minmax_mod::min_max_simd_with_x( - black_box(x.view()), - black_box(data.view()), + minmax_mod::min_max_with_x( + black_box(x.as_slice()), + black_box(data.as_slice()), black_box(2_000), ) }) }); // c.bench_function("minmax_scal_50M_f32", |b| { - // b.iter(|| minmax_mod::min_max_scalar_without_x(black_box(data.view()), black_box(60_000))) + // b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(60_000))) // }); // c.bench_function("minmax_simd_50M_f32", |b| { - // b.iter(|| minmax_mod::min_max_simd_without_x(black_box(data.view()), black_box(60_000))) + // b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(60_000))) // }); // c.bench_function("minmax_scalx_50M_f32", |b| { - // b.iter(|| minmax_mod::min_max_scalar_with_x(black_box(x.view()), black_box(data.view()), black_box(60_000))) + // b.iter(|| minmax_mod::min_max_with_x(black_box(x.as_slice()), black_box(data.as_slice()), black_box(60_000))) // }); // c.bench_function("minmax_simdx_50M_f32", |b| { - // b.iter(|| minmax_mod::min_max_simd_with_x(black_box(x.view()), black_box(data.view()), black_box(60_000))) + // b.iter(|| minmax_mod::min_max_with_x(black_box(x.as_slice()), black_box(data.as_slice()), black_box(60_000))) // }); } fn minmax_f32_random_array_50M_long_multi_core(c: &mut Criterion) { let n = 50_000_000; let data = utils::get_random_array::(n, f32::MIN, f32::MAX); - let x = Array1::from((0..n).map(|i| i as i32).collect::>()); + let x = (0..n).map(|i| i as i32).collect::>(); let all_threads: usize = utils::get_all_threads(); c.bench_function("minmax_scal_p_50M_f32", |b| { b.iter(|| { - minmax_mod::min_max_scalar_without_x_parallel( - black_box(data.view()), + minmax_mod::min_max_without_x_parallel( + black_box(data.as_slice()), black_box(2_000), black_box(all_threads), ) @@ -98,8 +97,8 @@ fn minmax_f32_random_array_50M_long_multi_core(c: &mut Criterion) { }); c.bench_function("minmax_simd_p_50M_f32", |b| { b.iter(|| { - minmax_mod::min_max_simd_without_x_parallel( - black_box(data.view()), + minmax_mod::min_max_without_x_parallel( + black_box(data.as_slice()), black_box(2_000), black_box(all_threads), ) @@ -107,9 +106,9 @@ fn minmax_f32_random_array_50M_long_multi_core(c: &mut Criterion) { }); c.bench_function("minmax_scalx_p_50M_f32", |b| { b.iter(|| { - minmax_mod::min_max_scalar_with_x_parallel( - black_box(x.view()), - black_box(data.view()), + minmax_mod::min_max_with_x_parallel( + black_box(x.as_slice()), + black_box(data.as_slice()), black_box(2_000), black_box(all_threads), ) @@ -117,9 +116,9 @@ fn minmax_f32_random_array_50M_long_multi_core(c: &mut Criterion) { }); c.bench_function("minmax_simdx_p_50M_f32", |b| { b.iter(|| { - minmax_mod::min_max_simd_with_x_parallel( - black_box(x.view()), - black_box(data.view()), + minmax_mod::min_max_with_x_parallel( + black_box(x.as_slice()), + black_box(data.as_slice()), black_box(2_000), black_box(all_threads), ) @@ -127,16 +126,16 @@ fn minmax_f32_random_array_50M_long_multi_core(c: &mut Criterion) { }); // c.bench_function("minmax_scal_p_50M_f32", |b| { - // b.iter(|| minmax_mod::min_max_scalar_without_x_parallel(black_box(data.view()), black_box(60_000))) + // b.iter(|| minmax_mod::min_max_without_x_parallel(black_box(data.as_slice()), black_box(60_000))) // }); // c.bench_function("minmax_simd_p_50M_f32", |b| { - // b.iter(|| minmax_mod::min_max_simd_without_x_parallel(black_box(data.view()), black_box(60_000))) + // b.iter(|| minmax_mod::min_max_without_x_parallel(black_box(data.as_slice()), black_box(60_000))) // }); // c.bench_function("minmax_scalx_p_50M_f32", |b| { - // b.iter(|| minmax_mod::min_max_scalar_with_x_parallel(black_box(x.view()), black_box(data.view()), black_box(60_000))) + // b.iter(|| minmax_mod::min_max_with_x_parallel(black_box(x.as_slice()), black_box(data.as_slice()), black_box(60_000))) // }); // c.bench_function("minmax_simdx_p_50M_f32", |b| { - // b.iter(|| minmax_mod::min_max_simd_with_x_parallel(black_box(x.view()), black_box(data.view()), black_box(60_000))) + // b.iter(|| minmax_mod::min_max_with_x_parallel(black_box(x.as_slice()), black_box(data.as_slice()), black_box(60_000))) // }); } @@ -144,13 +143,13 @@ fn minmax_f32_random_array_50M_long_multi_core(c: &mut Criterion) { // let n = config::ARRAY_LENGTH_LONG; // let data = utils::get_worst_case_array::(n, 1.0); // c.bench_function("overlap_worst_long_f32", |b| { -// b.iter(|| minmax_mod::min_max_overlap(black_box(data.view()), black_box(2_000))) +// b.iter(|| minmax_mod::min_max_overlap(black_box(data.as_slice()), black_box(2_000))) // }); // c.bench_function("simple_worst_long_f32", |b| { -// b.iter(|| minmax_mod::min_max(black_box(data.view()), black_box(2_000))) +// b.iter(|| minmax_mod::min_max(black_box(data.as_slice()), black_box(2_000))) // }); // c.bench_function("simd_worst_long_f32", |b| { -// b.iter(|| minmax_mod::min_max_simd_f32(black_box(data.view()), black_box(2_000))) +// b.iter(|| minmax_mod::min_max_simd_f32(black_box(data.as_slice()), black_box(2_000))) // }); // } diff --git a/downsample_rs/benches/bench_minmaxlttb.rs b/downsample_rs/benches/bench_minmaxlttb.rs index 447dda0..f8a2610 100644 --- a/downsample_rs/benches/bench_minmaxlttb.rs +++ b/downsample_rs/benches/bench_minmaxlttb.rs @@ -2,19 +2,18 @@ use downsample_rs::minmaxlttb as minmaxlttb_mod; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use dev_utils::{config, utils}; -use ndarray::Array1; const MINMAX_RATIO: usize = 30; fn minmaxlttb_f32_random_array_long_single_core(c: &mut Criterion) { let n = config::ARRAY_LENGTH_LONG; - let x = Array1::from((0..n).map(|i| i as i32).collect::>()); + let x = (0..n).map(|i| i as i32).collect::>(); let y = utils::get_random_array::(n, f32::MIN, f32::MAX); c.bench_function("mmlttb_scalx_f32", |b| { b.iter(|| { - minmaxlttb_mod::minmaxlttb_scalar_with_x( - black_box(x.view()), - black_box(y.view()), + minmaxlttb_mod::minmaxlttb_with_x( + black_box(x.as_slice()), + black_box(y.as_slice()), black_box(2_000), black_box(MINMAX_RATIO), ) @@ -22,9 +21,9 @@ fn minmaxlttb_f32_random_array_long_single_core(c: &mut Criterion) { }); c.bench_function("mlttb_simdx_f32", |b| { b.iter(|| { - minmaxlttb_mod::minmaxlttb_simd_with_x( - black_box(x.view()), - black_box(y.view()), + minmaxlttb_mod::minmaxlttb_with_x( + black_box(x.as_slice()), + black_box(y.as_slice()), black_box(2_000), black_box(MINMAX_RATIO), ) @@ -34,14 +33,14 @@ fn minmaxlttb_f32_random_array_long_single_core(c: &mut Criterion) { fn minmaxlttb_f32_random_array_long_multi_core(c: &mut Criterion) { let n = config::ARRAY_LENGTH_LONG; - let x = Array1::from((0..n).map(|i| i as i32).collect::>()); + let x = (0..n).map(|i| i as i32).collect::>(); let y = utils::get_random_array::(n, f32::MIN, f32::MAX); let all_threads: usize = utils::get_all_threads(); c.bench_function("mmlttb_scalx_p_f32", |b| { b.iter(|| { - minmaxlttb_mod::minmaxlttb_scalar_with_x_parallel( - black_box(x.view()), - black_box(y.view()), + minmaxlttb_mod::minmaxlttb_with_x_parallel( + black_box(x.as_slice()), + black_box(y.as_slice()), black_box(2_000), black_box(MINMAX_RATIO), black_box(all_threads), @@ -50,9 +49,9 @@ fn minmaxlttb_f32_random_array_long_multi_core(c: &mut Criterion) { }); c.bench_function("mlttb_simdx_p_f32", |b| { b.iter(|| { - minmaxlttb_mod::minmaxlttb_simd_with_x_parallel( - black_box(x.view()), - black_box(y.view()), + minmaxlttb_mod::minmaxlttb_with_x_parallel( + black_box(x.as_slice()), + black_box(y.as_slice()), black_box(2_000), black_box(MINMAX_RATIO), black_box(all_threads), @@ -63,13 +62,13 @@ fn minmaxlttb_f32_random_array_long_multi_core(c: &mut Criterion) { fn minmaxlttb_f32_random_array_50M_single_core(c: &mut Criterion) { let n = 50_000_000; - let x = Array1::from((0..n).map(|i| i as i32).collect::>()); + let x = (0..n).map(|i| i as i32).collect::>(); let y = utils::get_random_array::(n, f32::MIN, f32::MAX); c.bench_function("mlttb_scalx_50M_f32", |b| { b.iter(|| { - minmaxlttb_mod::minmaxlttb_scalar_with_x( - black_box(x.view()), - black_box(y.view()), + minmaxlttb_mod::minmaxlttb_with_x( + black_box(x.as_slice()), + black_box(y.as_slice()), black_box(2_000), black_box(MINMAX_RATIO), ) @@ -77,9 +76,9 @@ fn minmaxlttb_f32_random_array_50M_single_core(c: &mut Criterion) { }); c.bench_function("mlttb_simdx_50M_f32", |b| { b.iter(|| { - minmaxlttb_mod::minmaxlttb_simd_with_x( - black_box(x.view()), - black_box(y.view()), + minmaxlttb_mod::minmaxlttb_with_x( + black_box(x.as_slice()), + black_box(y.as_slice()), black_box(2_000), black_box(MINMAX_RATIO), ) @@ -89,14 +88,14 @@ fn minmaxlttb_f32_random_array_50M_single_core(c: &mut Criterion) { fn minmaxlttb_f32_random_array_50M_multi_core(c: &mut Criterion) { let n = 50_000_000; - let x = Array1::from((0..n).map(|i| i as i32).collect::>()); + let x = (0..n).map(|i| i as i32).collect::>(); let y = utils::get_random_array::(n, f32::MIN, f32::MAX); let all_threads: usize = utils::get_all_threads(); c.bench_function("mlttb_scalx_p_50M_f32", |b| { b.iter(|| { - minmaxlttb_mod::minmaxlttb_scalar_with_x_parallel( - black_box(x.view()), - black_box(y.view()), + minmaxlttb_mod::minmaxlttb_with_x_parallel( + black_box(x.as_slice()), + black_box(y.as_slice()), black_box(2_000), black_box(MINMAX_RATIO), black_box(all_threads), @@ -105,9 +104,9 @@ fn minmaxlttb_f32_random_array_50M_multi_core(c: &mut Criterion) { }); c.bench_function("mlttb_simdx_p_50M_f32", |b| { b.iter(|| { - minmaxlttb_mod::minmaxlttb_simd_with_x_parallel( - black_box(x.view()), - black_box(y.view()), + minmaxlttb_mod::minmaxlttb_with_x_parallel( + black_box(x.as_slice()), + black_box(y.as_slice()), black_box(2_000), black_box(MINMAX_RATIO), black_box(all_threads), @@ -121,8 +120,8 @@ fn minmaxlttb_without_x_f32_random_array_50M_single_core(c: &mut Criterion) { let y = utils::get_random_array::(n, f32::MIN, f32::MAX); c.bench_function("mlttb_scal_50M_f32", |b| { b.iter(|| { - minmaxlttb_mod::minmaxlttb_scalar_without_x( - black_box(y.view()), + minmaxlttb_mod::minmaxlttb_without_x( + black_box(y.as_slice()), black_box(2_000), black_box(MINMAX_RATIO), ) @@ -130,8 +129,8 @@ fn minmaxlttb_without_x_f32_random_array_50M_single_core(c: &mut Criterion) { }); c.bench_function("mlttb_simd_50M_f32", |b| { b.iter(|| { - minmaxlttb_mod::minmaxlttb_simd_without_x( - black_box(y.view()), + minmaxlttb_mod::minmaxlttb_without_x( + black_box(y.as_slice()), black_box(2_000), black_box(MINMAX_RATIO), ) @@ -145,8 +144,8 @@ fn minmaxlttb_without_x_f32_random_array_50M_multi_core(c: &mut Criterion) { let all_threads: usize = utils::get_all_threads(); c.bench_function("mlttb_scal_p_50M_f32", |b| { b.iter(|| { - minmaxlttb_mod::minmaxlttb_scalar_without_x_parallel( - black_box(y.view()), + minmaxlttb_mod::minmaxlttb_without_x_parallel( + black_box(y.as_slice()), black_box(2_000), black_box(MINMAX_RATIO), black_box(all_threads), @@ -155,8 +154,8 @@ fn minmaxlttb_without_x_f32_random_array_50M_multi_core(c: &mut Criterion) { }); c.bench_function("mlttb_simd_p_50M_f32", |b| { b.iter(|| { - minmaxlttb_mod::minmaxlttb_simd_without_x_parallel( - black_box(y.view()), + minmaxlttb_mod::minmaxlttb_without_x_parallel( + black_box(y.as_slice()), black_box(2_000), black_box(MINMAX_RATIO), black_box(all_threads), diff --git a/downsample_rs/dev_utils/Cargo.toml b/downsample_rs/dev_utils/Cargo.toml index 458e27c..2ab2e6b 100644 --- a/downsample_rs/dev_utils/Cargo.toml +++ b/downsample_rs/dev_utils/Cargo.toml @@ -6,6 +6,5 @@ edition = "2021" description = "Shared utilities for development (tests & benchmarks)" [dependencies] -ndarray = { version = "0.15.6", default-features = false } rand = { version = "0.7.2", default-features = false } rand_distr = { version = "0.2.2", default-features = false } diff --git a/downsample_rs/dev_utils/src/utils.rs b/downsample_rs/dev_utils/src/utils.rs index 77033f2..f3413ae 100644 --- a/downsample_rs/dev_utils/src/utils.rs +++ b/downsample_rs/dev_utils/src/utils.rs @@ -1,23 +1,21 @@ -use ndarray::Array1; - use std::ops::{Add, Sub}; use rand::{thread_rng, Rng}; use rand_distr::Uniform; // random array that samples between min and max of T -pub fn get_random_array(n: usize, min_value: T, max_value: T) -> Array1 +pub fn get_random_array(n: usize, min_value: T, max_value: T) -> Vec where T: Copy + rand::distributions::uniform::SampleUniform, { let rng = thread_rng(); let uni = Uniform::new_inclusive(min_value, max_value); let arr: Vec = rng.sample_iter(uni).take(n).collect(); - Array1::from(arr) + arr } // worst case array that alternates between increasing max and decreasing min values -pub fn get_worst_case_array(n: usize, step: T) -> Array1 +pub fn get_worst_case_array(n: usize, step: T) -> Vec where T: Copy + Default + Sub + Add, { @@ -33,7 +31,7 @@ where max_value = max_value + step; } } - Array1::from(arr) + arr } // ------------- Multi-threading ------------- diff --git a/downsample_rs/src/helpers.rs b/downsample_rs/src/helpers.rs index fb8b436..f2961ab 100644 --- a/downsample_rs/src/helpers.rs +++ b/downsample_rs/src/helpers.rs @@ -1,6 +1,6 @@ -#[cfg(feature = "half")] -use half::f16; -use ndarray::ArrayView1; +use num_traits::AsPrimitive; + +use crate::types::Num; // ------------ AVERAGE @@ -20,38 +20,14 @@ use ndarray::ArrayView1; // See more details: https://github.com/numpy/numpy/blob/8cec82012694571156e8d7696307c848a7603b4e/numpy/core/_methods.py#L164 pub trait Average { - fn average(self) -> f64; -} - -impl Average for ArrayView1<'_, f64> { - fn average(self) -> f64 { - self.mean().unwrap() - } -} - -impl Average for ArrayView1<'_, f32> { - fn average(self) -> f64 { - self.mean().unwrap() as f64 - } + fn average(&self) -> f64; } -#[cfg(feature = "half")] -impl Average for ArrayView1<'_, f16> { - fn average(self) -> f64 { - self.fold(0f32, |acc, &x| acc + x.to_f32()) as f64 / self.len() as f64 +impl Average for [T] +where + T: Num + AsPrimitive, +{ + fn average(&self) -> f64 { + self.iter().fold(0f64, |acc, &x| acc + x.as_()) as f64 / self.len() as f64 } } - -macro_rules! impl_average { - ($($t:ty)*) => ($( - impl Average for ArrayView1<'_, $t> { - #[inline(always)] - fn average(self) -> f64 { - self.fold(0f64, |acc, &x| acc + x as f64) / self.len() as f64 - } - } - )*) -} - -// Implement for all signed and unsigned integers -impl_average!(i8 i16 i32 i64 u8 u16 u32 u64); diff --git a/downsample_rs/src/lttb/scalar.rs b/downsample_rs/src/lttb.rs similarity index 69% rename from downsample_rs/src/lttb/scalar.rs rename to downsample_rs/src/lttb.rs index dd2542a..20d28d4 100644 --- a/downsample_rs/src/lttb/scalar.rs +++ b/downsample_rs/src/lttb.rs @@ -1,6 +1,5 @@ -use super::super::helpers::Average; -use super::super::types::Num; -use ndarray::{Array1, ArrayView1}; +use super::helpers::Average; +use super::types::Num; use num_traits::AsPrimitive; use std::cmp; @@ -16,16 +15,13 @@ fn f64_to_i64unsigned(v: f64) -> i64 { // ----------- WITH X pub fn lttb_with_x, Ty: Num + AsPrimitive>( - x: ArrayView1, - y: ArrayView1, + x: &[Tx], + y: &[Ty], n_out: usize, -) -> Array1 -where - for<'a> ArrayView1<'a, Ty>: Average, -{ +) -> Vec { assert_eq!(x.len(), y.len()); if n_out >= x.len() { - return Array1::from((0..x.len()).collect::>()); + return (0..x.len()).collect::>(); } assert!(n_out >= 3); // avoid division by 0 @@ -34,10 +30,7 @@ where // Initially a is the first point in the triangle. let mut a: usize = 0; - let mut sampled_indices: Array1 = Array1::::default(n_out); - - let x_ptr = x.as_ptr(); - let y_ptr = y.as_ptr(); + let mut sampled_indices: Vec = vec![usize::default(); n_out]; // Always add the first point sampled_indices[0] = 0; @@ -47,33 +40,30 @@ where let avg_range_start = (every * (i + 1) as f64) as usize + 1; let avg_range_end = cmp::min((every * (i + 2) as f64) as usize + 1, x.len()); - // ArrayBase::slice is rather expensive.. - let y_slice = unsafe { - ArrayView1::from_shape_ptr(avg_range_end - avg_range_start, y_ptr.add(avg_range_start)) - }; + let y_slice = &y[avg_range_start..avg_range_end]; let avg_y: f64 = y_slice.average(); // TODO: avg_y could be approximated argminmax instead of mean? // TODO: below is faster than above, but not as accurate // let avg_x: f64 = (x_slice[avg_range_end - 1].as_() + x_slice[avg_range_start].as_()) / 2.0; - let avg_x: f64 = - unsafe { (x.uget(avg_range_end - 1).as_() + x.uget(avg_range_start).as_()) / 2.0 }; + let avg_x: f64 = unsafe { + (x.get_unchecked(avg_range_end - 1).as_() + x.get_unchecked(avg_range_start).as_()) + / 2.0 + }; // Get the range for this bucket let range_offs = (every * i as f64) as usize + 1; let range_to = avg_range_start; // = start of the next bucket // Point a - let point_ax = unsafe { x.uget(a).as_() }; - let point_ay = unsafe { y.uget(a).as_() }; + let point_ax = unsafe { x.get_unchecked(a).as_() }; + let point_ay = unsafe { y.get_unchecked(a).as_() }; let d1 = point_ax - avg_x; let d2 = avg_y - point_ay; let offset: f64 = d1 * point_ay + d2 * point_ax; - let x_slice = - unsafe { std::slice::from_raw_parts(x_ptr.add(range_offs), range_to - range_offs) }; - let y_slice = - unsafe { std::slice::from_raw_parts(y_ptr.add(range_offs), range_to - range_offs) }; + let x_slice = &x[range_offs..range_to]; + let y_slice = &y[range_offs..range_to]; (_, a) = y_slice.iter().zip(x_slice.iter()).enumerate().fold( (-1i64, a), |(max_area, a), (i, (y_, x_))| { @@ -103,12 +93,9 @@ where // ----------- WITHOUT X -pub fn lttb_without_x>(y: ArrayView1, n_out: usize) -> Array1 -where - for<'a> ArrayView1<'a, Ty>: Average, -{ +pub fn lttb_without_x>(y: &[Ty], n_out: usize) -> Vec { if n_out >= y.len() { - return Array1::from((0..y.len()).collect::>()); + return (0..y.len()).collect::>(); } assert!(n_out >= 3); // avoid division by 0 @@ -117,9 +104,7 @@ where // Initially a is the first point in the triangle. let mut a: usize = 0; - let mut sampled_indices: Array1 = Array1::::default(n_out); - - let y_ptr = y.as_ptr(); + let mut sampled_indices: Vec = vec![usize::default(); n_out]; // Always add the first point sampled_indices[0] = 0; @@ -129,10 +114,7 @@ where let avg_range_start = (every * (i + 1) as f64) as usize + 1; let avg_range_end = cmp::min((every * (i + 2) as f64) as usize + 1, y.len()); - // ArrayBase::slice is rather expensive.. - let y_slice = unsafe { - ArrayView1::from_shape_ptr(avg_range_end - avg_range_start, y_ptr.add(avg_range_start)) - }; + let y_slice = &y[avg_range_start..avg_range_end]; let avg_y: f64 = y_slice.average(); let avg_x: f64 = (avg_range_start + avg_range_end - 1) as f64 / 2.0; @@ -141,7 +123,7 @@ where let range_to = avg_range_start; // = start of the next bucket // Point a - let point_ay = unsafe { y.uget(a).as_() }; + let point_ay = unsafe { y.get_unchecked(a).as_() }; let point_ax = a as f64; let d1 = point_ax - avg_x; @@ -153,8 +135,7 @@ where let offset: f64 = d1 * point_ay; // TODO: for some reason is this faster than the loop below -> check if this is true for other devices - let y_slice = - unsafe { ArrayView1::from_shape_ptr(range_to - range_offs, y_ptr.add(range_offs)) }; + let y_slice = &y[range_offs..range_to]; (_, a) = y_slice .iter() .enumerate() @@ -205,31 +186,30 @@ mod tests { use dev_utils::utils; use super::{lttb_with_x, lttb_without_x}; - use ndarray::{array, Array1}; #[test] fn test_lttb_with_x() { - let x = array![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; - let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; - let sampled_indices = lttb_with_x(x.view(), y.view(), 4); - assert_eq!(sampled_indices, array![0, 1, 5, 9]); + let x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; + let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; + let sampled_indices = lttb_with_x(&x, &y, 4); + assert_eq!(sampled_indices, vec![0, 1, 5, 9]); } #[test] fn test_lttb_without_x() { - let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; - let sampled_indices = lttb_without_x(y.view(), 4); - assert_eq!(sampled_indices, array![0, 1, 5, 9]); + let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; + let sampled_indices = lttb_without_x(&y, 4); + assert_eq!(sampled_indices, vec![0, 1, 5, 9]); } #[test] fn test_random_same_output() { for _ in 0..100 { - let n = 5_000; - let x: Array1 = Array1::from((0..n).map(|i| i as i32).collect::>()); - let y = utils::get_random_array(n, f32::MIN, f32::MAX); - let sampled_indices1 = lttb_with_x(x.view(), y.view(), 200); - let sampled_indices2 = lttb_without_x(y.view(), 200); + const N: usize = 5_000; + let x: [i32; N] = core::array::from_fn(|i| i as i32); + let y = utils::get_random_array(N, f32::MIN, f32::MAX); + let sampled_indices1 = lttb_with_x(&x, y.as_slice(), 200); + let sampled_indices2 = lttb_without_x(y.as_slice(), 200); assert_eq!(sampled_indices1, sampled_indices2); } } diff --git a/downsample_rs/src/lttb/mod.rs b/downsample_rs/src/lttb/mod.rs deleted file mode 100644 index dd215d2..0000000 --- a/downsample_rs/src/lttb/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -mod scalar; -pub use scalar::*; diff --git a/downsample_rs/src/m4.rs b/downsample_rs/src/m4.rs new file mode 100644 index 0000000..fa3dd50 --- /dev/null +++ b/downsample_rs/src/m4.rs @@ -0,0 +1,452 @@ +use argminmax::ArgMinMax; +use num_traits::{AsPrimitive, FromPrimitive}; +use rayon::iter::IndexedParallelIterator; +use rayon::prelude::*; + +use crate::searchsorted::{ + get_equidistant_bin_idx_iterator, get_equidistant_bin_idx_iterator_parallel, +}; +use crate::types::Num; + +// ----------------------------------- NON-PARALLEL ------------------------------------ + +// ----------- WITH X + +pub fn m4_with_x(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec +where + for<'a> &'a [Ty]: ArgMinMax, + Tx: Num + FromPrimitive + AsPrimitive, + Ty: Copy + PartialOrd, +{ + assert_eq!(n_out % 4, 0); + let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 4); + m4_generic_with_x(arr, bin_idx_iterator, n_out, |arr| arr.argminmax()) +} + +// ----------- WITHOUT X + +pub fn m4_without_x(arr: &[T], n_out: usize) -> Vec +where + for<'a> &'a [T]: ArgMinMax, +{ + assert_eq!(n_out % 4, 0); + m4_generic(arr, n_out, |arr| arr.argminmax()) +} + +// ------------------------------------- PARALLEL -------------------------------------- + +// ----------- WITH X + +pub fn m4_with_x_parallel( + x: &[Tx], + arr: &[Ty], + n_out: usize, + n_threads: usize, +) -> Vec +where + for<'a> &'a [Ty]: ArgMinMax, + Tx: Num + FromPrimitive + AsPrimitive + Send + Sync, + Ty: Copy + PartialOrd + Send + Sync, +{ + assert_eq!(n_out % 4, 0); + let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 4, n_threads); + m4_generic_with_x_parallel(arr, bin_idx_iterator, n_out, n_threads, |arr| { + arr.argminmax() + }) +} + +// ----------- WITHOUT X + +pub fn m4_without_x_parallel( + arr: &[T], + n_out: usize, + n_threads: usize, +) -> Vec +where + for<'a> &'a [T]: ArgMinMax, +{ + assert_eq!(n_out % 4, 0); + m4_generic_parallel(arr, n_out, n_threads, |arr| arr.argminmax()) +} + +// TODO: check for duplicate data in the output array +// -> In the current implementation we always add 4 datapoints per bin (if of +// course the bin has >= 4 datapoints). However, the argmin and argmax might +// be the start and end of the bin, which would result in duplicate data in +// the output array. (this is for example the case for monotonic data). + +// ----------------- GENERICS + +// --------------------- WITHOUT X + +#[inline(always)] +pub(crate) fn m4_generic( + arr: &[T], + n_out: usize, + f_argminmax: fn(&[T]) -> (usize, usize), +) -> Vec { + // Assumes n_out is a multiple of 4 + if n_out >= arr.len() { + return (0..arr.len()).collect(); + } + + // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1) + let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 4) as f64; + + let mut sampled_indices: Vec = vec![usize::default(); n_out]; + + let mut start_idx: usize = 0; + for i in 0..n_out / 4 { + // Decided to use multiplication instead of adding to the accumulator (end) + // as multiplication seems to be less prone to rounding errors. + let end: f64 = block_size * (i + 1) as f64; + let end_idx: usize = end as usize + 1; + + let (min_index, max_index) = f_argminmax(&arr[start_idx..end_idx]); + + // Add the indexes in sorted order + sampled_indices[4 * i] = start_idx; + if min_index < max_index { + sampled_indices[4 * i + 1] = min_index + start_idx; + sampled_indices[4 * i + 2] = max_index + start_idx; + } else { + sampled_indices[4 * i + 1] = max_index + start_idx; + sampled_indices[4 * i + 2] = min_index + start_idx; + } + sampled_indices[4 * i + 3] = end_idx - 1; + + start_idx = end_idx; + } + + sampled_indices +} + +#[inline(always)] +pub(crate) fn m4_generic_parallel( + arr: &[T], + n_out: usize, + n_threads: usize, + f_argminmax: fn(&[T]) -> (usize, usize), +) -> Vec { + // Assumes n_out is a multiple of 4 + if n_out >= arr.len() { + return (0..arr.len()).collect::>(); + } + + // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1) + let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 4) as f64; + + // Store the enumerated indexes in the output array + let mut sampled_indices: Vec = (0..n_out).collect::>(); + + // to limit the amounts of threads Rayon uses, an explicit threadpool needs to be created + // in which the required code is "installed". This limits the amount of used threads. + // https://docs.rs/rayon/latest/rayon/struct.ThreadPool.html#method.install + let pool = rayon::ThreadPoolBuilder::new() + .num_threads(n_threads) + .build(); + + let func = || { + for chunk in sampled_indices.chunks_exact_mut(4) { + let i: f64 = unsafe { *chunk.get_unchecked(0) >> 2 } as f64; + let start_idx: usize = (block_size * i) as usize + (i != 0.0) as usize; + let end_idx: usize = (block_size * (i + 1.0)) as usize + 1; + + let (min_index, max_index) = f_argminmax(&arr[start_idx..end_idx]); + + chunk[0] = start_idx; + // Add the indexes in sorted order + if min_index < max_index { + chunk[1] = min_index + start_idx; + chunk[2] = max_index + start_idx; + } else { + chunk[1] = max_index + start_idx; + chunk[2] = min_index + start_idx; + } + chunk[3] = end_idx - 1; + } + }; + + pool.unwrap().install(func); // allow panic if pool could not be created + + sampled_indices.to_vec() +} + +// --------------------- WITH X + +#[inline(always)] +pub(crate) fn m4_generic_with_x( + arr: &[T], + bin_idx_iterator: impl Iterator>, + n_out: usize, + f_argminmax: fn(&[T]) -> (usize, usize), +) -> Vec { + // Assumes n_out is a multiple of 4 + if n_out >= arr.len() { + return (0..arr.len()).collect::>(); + } + + let mut sampled_indices: Vec = Vec::with_capacity(n_out); + + bin_idx_iterator.for_each(|bin| { + if let Some((start, end)) = bin { + if end <= start + 4 { + // If the bin has <= 4 elements, just add them all + for i in start..end { + sampled_indices.push(i); + } + } else { + // If the bin has > 4 elements, add the first and last + argmin and argmax + let step = &arr[start..end]; + let (min_index, max_index) = f_argminmax(step); + + sampled_indices.push(start); + + // Add the indexes in sorted order + if min_index < max_index { + sampled_indices.push(min_index + start); + sampled_indices.push(max_index + start); + } else { + sampled_indices.push(max_index + start); + sampled_indices.push(min_index + start); + } + + sampled_indices.push(end - 1); + } + } + }); + + sampled_indices +} + +#[inline(always)] +pub(crate) fn m4_generic_with_x_parallel( + arr: &[T], + bin_idx_iterator: impl IndexedParallelIterator>>, + n_out: usize, + n_threads: usize, + f_argminmax: fn(&[T]) -> (usize, usize), +) -> Vec { + // Assumes n_out is a multiple of 4 + if n_out >= arr.len() { + return (0..arr.len()).collect::>(); + } + + // to limit the amounts of threads Rayon uses, an explicit threadpool needs to be created + // in which the required code is "installed". This limits the amount of used threads. + // https://docs.rs/rayon/latest/rayon/struct.ThreadPool.html#method.install + let pool = rayon::ThreadPoolBuilder::new() + .num_threads(n_threads) + .build(); + + let iter_func = || { + bin_idx_iterator + .flat_map(|bin_idx_iterator| { + bin_idx_iterator + .map(|bin| { + match bin { + Some((start, end)) => { + if end <= start + 4 { + // If the bin has <= 4 elements, just return them all + return (start..end).collect::>(); + } + + // If the bin has > 4 elements, return the first and last + argmin and argmax + let step = &arr[start..end]; + let (min_index, max_index) = f_argminmax(step); + + // Return the indexes in sorted order + let mut sampled_index = vec![start, 0, 0, end - 1]; + if min_index < max_index { + sampled_index[1] = min_index + start; + sampled_index[2] = max_index + start; + } else { + sampled_index[1] = max_index + start; + sampled_index[2] = min_index + start; + } + sampled_index + } // If the bin is empty, return empty Vec + None => { + vec![] + } + } + }) + .collect::>>() + }) + .flatten() + .collect::>() + }; + + let result = pool.unwrap().install(iter_func); // allow panic if pool could not be created + result +} + +#[cfg(test)] +mod tests { + use num_traits::AsPrimitive; + use rstest::rstest; + use rstest_reuse::{self, *}; + + use super::{m4_with_x, m4_without_x}; + use super::{m4_with_x_parallel, m4_without_x_parallel}; + + use dev_utils::utils; + + fn get_array_f32(n: usize) -> Vec { + utils::get_random_array(n, f32::MIN, f32::MAX) + } + + // Template for the n_threads matrix + #[template] + #[rstest] + #[case(1)] + #[case(utils::get_all_threads() / 2)] + #[case(utils::get_all_threads())] + #[case(utils::get_all_threads() * 2)] + fn threads(#[case] n_threads: usize) {} + + #[test] + fn test_m4_scalar_without_x_correct() { + let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); + + let sampled_indices = m4_without_x(&arr, 12); + let sampled_values = sampled_indices + .iter() + .map(|x| arr[*x]) + .collect::>(); + + let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99]; + let expected_values = expected_indices + .iter() + .map(|x| *x as f32) + .collect::>(); + + assert_eq!(sampled_indices, expected_indices); + assert_eq!(sampled_values, expected_values); + } + + #[apply(threads)] + fn test_m4_scalar_without_x_parallel_correct(n_threads: usize) { + let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); + + let sampled_indices = m4_without_x_parallel(&arr, 12, n_threads); + let sampled_values = sampled_indices + .iter() + .map(|x| arr[*x]) + .collect::>(); + + let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99]; + let expected_values = expected_indices + .iter() + .map(|x| *x as f32) + .collect::>(); + + assert_eq!(sampled_indices, expected_indices); + assert_eq!(sampled_values, expected_values); + } + + #[test] + fn test_m4_scalar_with_x_correct() { + let x: [i32; 100] = core::array::from_fn(|i| i.as_()); + let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); + + let sampled_indices = m4_with_x(&x, &arr, 12); + let sampled_values = sampled_indices + .iter() + .map(|x| arr[*x]) + .collect::>(); + + let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99]; + let expected_values = expected_indices + .iter() + .map(|x| *x as f32) + .collect::>(); + + assert_eq!(sampled_indices, expected_indices); + assert_eq!(sampled_values, expected_values); + } + + #[apply(threads)] + fn test_m4_scalar_with_x_parallel_correct(n_threads: usize) { + let x: [i32; 100] = core::array::from_fn(|i| i.as_()); + let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); + + let sampled_indices = m4_with_x_parallel(&x, &arr, 12, n_threads); + let sampled_values = sampled_indices + .iter() + .map(|x| arr[*x]) + .collect::>(); + + let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99]; + let expected_values = expected_indices + .iter() + .map(|x| *x as f32) + .collect::>(); + + assert_eq!(sampled_indices, expected_indices); + assert_eq!(sampled_values, expected_values); + } + + #[test] + fn test_m4_scalar_with_x_gap() { + // We will create a gap in the middle of the array + // Increment the second half of the array by 50 + let x: [i32; 100] = core::array::from_fn(|i| if i > 50 { (i + 50).as_() } else { i.as_() }); + let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); + + let sampled_indices = m4_with_x(&x, &arr, 20); + assert_eq!(sampled_indices.len(), 16); // One full gap + let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99]; + assert_eq!(sampled_indices, expected_indices); + + // Increment the second half of the array by 50 again + let x = x.map(|x| if x > 101 { x + 50 } else { x }); + + let sampled_indices = m4_with_x(&x, &arr, 20); + assert_eq!(sampled_indices.len(), 17); // Gap with 1 value + let expected_indices = vec![ + 0, 0, 39, 39, 40, 40, 50, 50, 51, 52, 52, 59, 59, 60, 60, 99, 99, + ]; + assert_eq!(sampled_indices, expected_indices); + } + + #[apply(threads)] + fn test_m4_scalar_with_x_gap_parallel(n_threads: usize) { + // We will create a gap in the middle of the array + // Increment the second half of the array by 50 + let x: [i32; 100] = core::array::from_fn(|i| if i > 50 { (i + 50).as_() } else { i.as_() }); + let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); + + let sampled_indices = m4_with_x_parallel(&x, &arr, 20, n_threads); + assert_eq!(sampled_indices.len(), 16); // One full gap + let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99]; + assert_eq!(sampled_indices, expected_indices); + + // Increment the second half of the array by 50 again + let x = x.map(|x| if x > 101 { x + 50 } else { x }); + + let sampled_indices = m4_with_x_parallel(&x, &arr, 20, n_threads); + assert_eq!(sampled_indices.len(), 17); // Gap with 1 value + let expected_indices = vec![ + 0, 0, 39, 39, 40, 40, 50, 50, 51, 52, 52, 59, 59, 60, 60, 99, 99, + ]; + assert_eq!(sampled_indices, expected_indices); + } + + #[apply(threads)] + fn test_many_random_runs_correct(n_threads: usize) { + const N: usize = 20_003; + const N_OUT: usize = 204; + let x: [i32; N] = core::array::from_fn(|i| i.as_()); + for _ in 0..100 { + let arr = get_array_f32(N); + let idxs1 = m4_without_x(arr.as_slice(), N_OUT); + let idxs2 = m4_with_x(&x, arr.as_slice(), N_OUT); + assert_eq!(idxs1, idxs2); + let idxs3 = m4_without_x_parallel(arr.as_slice(), N_OUT, n_threads); + let idxs4 = m4_with_x_parallel(&x, arr.as_slice(), N_OUT, n_threads); + assert_eq!(idxs1, idxs3); + assert_eq!(idxs1, idxs4); // TODO: this should not fail when n_threads = 16 + } + } +} diff --git a/downsample_rs/src/m4/generic.rs b/downsample_rs/src/m4/generic.rs deleted file mode 100644 index e384660..0000000 --- a/downsample_rs/src/m4/generic.rs +++ /dev/null @@ -1,227 +0,0 @@ -use ndarray::Zip; -use ndarray::{Array1, ArrayView1}; - -use rayon::iter::IndexedParallelIterator; -use rayon::prelude::*; - -// TODO: check for duplicate data in the output array -// -> In the current implementation we always add 4 datapoints per bin (if of -// course the bin has >= 4 datapoints). However, the argmin and argmax might -// be the start and end of the bin, which would result in duplicate data in -// the output array. (this is for example the case for monotonic data). - -// --------------------- WITHOUT X - -#[inline(always)] -pub(crate) fn m4_generic( - arr: ArrayView1, - n_out: usize, - f_argminmax: fn(ArrayView1) -> (usize, usize), -) -> Array1 { - // Assumes n_out is a multiple of 4 - if n_out >= arr.len() { - return Array1::from((0..arr.len()).collect::>()); - } - - // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1) - let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 4) as f64; - let arr_ptr = arr.as_ptr(); - - let mut sampled_indices: Array1 = Array1::::default(n_out); - - let mut start_idx: usize = 0; - for i in 0..n_out / 4 { - // Decided to use multiplication instead of adding to the accumulator (end) - // as multiplication seems to be less prone to rounding errors. - let end: f64 = block_size * (i + 1) as f64; - let end_idx: usize = end as usize + 1; - - let (min_index, max_index) = f_argminmax(unsafe { - ArrayView1::from_shape_ptr((end_idx - start_idx,), arr_ptr.add(start_idx)) - }); - - // Add the indexes in sorted order - sampled_indices[4 * i] = start_idx; - if min_index < max_index { - sampled_indices[4 * i + 1] = min_index + start_idx; - sampled_indices[4 * i + 2] = max_index + start_idx; - } else { - sampled_indices[4 * i + 1] = max_index + start_idx; - sampled_indices[4 * i + 2] = min_index + start_idx; - } - sampled_indices[4 * i + 3] = end_idx - 1; - - start_idx = end_idx; - } - - sampled_indices -} - -#[inline(always)] -pub(crate) fn m4_generic_parallel( - arr: ArrayView1, - n_out: usize, - n_threads: usize, - f_argminmax: fn(ArrayView1) -> (usize, usize), -) -> Array1 { - // Assumes n_out is a multiple of 4 - if n_out >= arr.len() { - return Array1::from((0..arr.len()).collect::>()); - } - - // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1) - let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 4) as f64; - - // Store the enumerated indexes in the output array - let mut sampled_indices: Array1 = Array1::from_vec((0..n_out).collect::>()); - - // to limit the amounts of threads Rayon uses, an explicit threadpool needs to be created - // in which the required code is "installed". This limits the amount of used threads. - // https://docs.rs/rayon/latest/rayon/struct.ThreadPool.html#method.install - let pool = rayon::ThreadPoolBuilder::new() - .num_threads(n_threads) - .build(); - - let zip_func = || { - Zip::from(sampled_indices.exact_chunks_mut(4)).par_for_each(|mut sampled_index| { - let i: f64 = unsafe { *sampled_index.uget(0) >> 2 } as f64; - let start_idx: usize = (block_size * i) as usize + (i != 0.0) as usize; - let end_idx: usize = (block_size * (i + 1.0)) as usize + 1; - - let (min_index, max_index) = f_argminmax(unsafe { - ArrayView1::from_shape_ptr((end_idx - start_idx,), arr.as_ptr().add(start_idx)) - }); - - sampled_index[0] = start_idx; - // Add the indexes in sorted order - if min_index < max_index { - sampled_index[1] = min_index + start_idx; - sampled_index[2] = max_index + start_idx; - } else { - sampled_index[1] = max_index + start_idx; - sampled_index[2] = min_index + start_idx; - } - sampled_index[3] = end_idx - 1; - }) - }; - - pool.unwrap().install(zip_func); // allow panic if pool could not be created - - sampled_indices -} - -// --------------------- WITH X - -#[inline(always)] -pub(crate) fn m4_generic_with_x( - arr: ArrayView1, - bin_idx_iterator: impl Iterator>, - n_out: usize, - f_argminmax: fn(ArrayView1) -> (usize, usize), -) -> Array1 { - // Assumes n_out is a multiple of 4 - if n_out >= arr.len() { - return Array1::from((0..arr.len()).collect::>()); - } - - let arr_ptr = arr.as_ptr(); - let mut sampled_indices: Vec = Vec::with_capacity(n_out); - - bin_idx_iterator.for_each(|bin| { - if let Some((start, end)) = bin { - if end <= start + 4 { - // If the bin has <= 4 elements, just add them all - for i in start..end { - sampled_indices.push(i); - } - } else { - // If the bin has > 4 elements, add the first and last + argmin and argmax - let step = unsafe { ArrayView1::from_shape_ptr(end - start, arr_ptr.add(start)) }; - let (min_index, max_index) = f_argminmax(step); - - sampled_indices.push(start); - - // Add the indexes in sorted order - if min_index < max_index { - sampled_indices.push(min_index + start); - sampled_indices.push(max_index + start); - } else { - sampled_indices.push(max_index + start); - sampled_indices.push(min_index + start); - } - - sampled_indices.push(end - 1); - } - } - }); - - Array1::from_vec(sampled_indices) -} - -#[inline(always)] -pub(crate) fn m4_generic_with_x_parallel( - arr: ArrayView1, - bin_idx_iterator: impl IndexedParallelIterator>>, - n_out: usize, - n_threads: usize, - f_argminmax: fn(ArrayView1) -> (usize, usize), -) -> Array1 { - // Assumes n_out is a multiple of 4 - if n_out >= arr.len() { - return Array1::from((0..arr.len()).collect::>()); - } - - // to limit the amounts of threads Rayon uses, an explicit threadpool needs to be created - // in which the required code is "installed". This limits the amount of used threads. - // https://docs.rs/rayon/latest/rayon/struct.ThreadPool.html#method.install - let pool = rayon::ThreadPoolBuilder::new() - .num_threads(n_threads) - .build(); - - let iter_func = || { - Array1::from_vec( - bin_idx_iterator - .flat_map(|bin_idx_iterator| { - bin_idx_iterator - .map(|bin| { - match bin { - Some((start, end)) => { - if end <= start + 4 { - // If the bin has <= 4 elements, just return them all - return (start..end).collect::>(); - } - - // If the bin has > 4 elements, return the first and last + argmin and argmax - let step = unsafe { - ArrayView1::from_shape_ptr( - end - start, - arr.as_ptr().add(start), - ) - }; - let (min_index, max_index) = f_argminmax(step); - - // Return the indexes in sorted order - let mut sampled_index = vec![start, 0, 0, end - 1]; - if min_index < max_index { - sampled_index[1] = min_index + start; - sampled_index[2] = max_index + start; - } else { - sampled_index[1] = max_index + start; - sampled_index[2] = min_index + start; - } - sampled_index - } // If the bin is empty, return empty Vec - None => { - vec![] - } - } - }) - .collect::>>() - }) - .flatten() - .collect::>(), - ) - }; - - pool.unwrap().install(iter_func) // allow panic if pool could not be created -} diff --git a/downsample_rs/src/m4/mod.rs b/downsample_rs/src/m4/mod.rs deleted file mode 100644 index 3628374..0000000 --- a/downsample_rs/src/m4/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -pub mod scalar; -pub use scalar::*; -pub mod simd; -pub use simd::*; -mod generic; // private module diff --git a/downsample_rs/src/m4/scalar.rs b/downsample_rs/src/m4/scalar.rs deleted file mode 100644 index faebc08..0000000 --- a/downsample_rs/src/m4/scalar.rs +++ /dev/null @@ -1,263 +0,0 @@ -use argminmax::{ScalarArgMinMax, SCALAR}; - -use ndarray::{Array1, ArrayView1}; -use num_traits::{AsPrimitive, FromPrimitive}; - -use super::super::searchsorted::{ - get_equidistant_bin_idx_iterator, get_equidistant_bin_idx_iterator_parallel, -}; -use super::super::types::Num; -use super::generic::{m4_generic, m4_generic_parallel}; -use super::generic::{m4_generic_with_x, m4_generic_with_x_parallel}; - -// ----------------------------------- NON-PARALLEL ------------------------------------ - -// ----------- WITH X - -pub fn m4_scalar_with_x( - x: ArrayView1, - arr: ArrayView1, - n_out: usize, -) -> Array1 -where - SCALAR: ScalarArgMinMax, - Tx: Num + FromPrimitive + AsPrimitive, - Ty: Copy + PartialOrd, -{ - assert_eq!(n_out % 4, 0); - let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 4); - m4_generic_with_x(arr, bin_idx_iterator, n_out, SCALAR::argminmax) -} - -// ----------- WITHOUT X - -pub fn m4_scalar_without_x(arr: ArrayView1, n_out: usize) -> Array1 -where - SCALAR: ScalarArgMinMax, -{ - assert_eq!(n_out % 4, 0); - m4_generic(arr, n_out, SCALAR::argminmax) -} - -// ------------------------------------- PARALLEL -------------------------------------- - -// ----------- WITH X - -pub fn m4_scalar_with_x_parallel( - x: ArrayView1, - arr: ArrayView1, - n_out: usize, - n_threads: usize, -) -> Array1 -where - SCALAR: ScalarArgMinMax, - Tx: Num + FromPrimitive + AsPrimitive + Send + Sync, - Ty: Copy + PartialOrd + Send + Sync, -{ - assert_eq!(n_out % 4, 0); - let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 4, n_threads); - m4_generic_with_x_parallel(arr, bin_idx_iterator, n_out, n_threads, SCALAR::argminmax) -} - -// ----------- WITHOUT X - -pub fn m4_scalar_without_x_parallel( - arr: ArrayView1, - n_out: usize, - n_threads: usize, -) -> Array1 -where - SCALAR: ScalarArgMinMax, -{ - assert_eq!(n_out % 4, 0); - m4_generic_parallel(arr, n_out, n_threads, SCALAR::argminmax) -} - -// --------------------------------------- TESTS --------------------------------------- - -#[cfg(test)] -mod tests { - use rstest::rstest; - use rstest_reuse::{self, *}; - - use super::{m4_scalar_with_x, m4_scalar_without_x}; - use super::{m4_scalar_with_x_parallel, m4_scalar_without_x_parallel}; - use ndarray::Array1; - - use dev_utils::utils; - - fn get_array_f32(n: usize) -> Array1 { - utils::get_random_array(n, f32::MIN, f32::MAX) - } - - // Template for the n_threads matrix - #[template] - #[rstest] - #[case(1)] - #[case(utils::get_all_threads() / 2)] - #[case(utils::get_all_threads())] - #[case(utils::get_all_threads() * 2)] - fn threads(#[case] n_threads: usize) {} - - #[test] - fn test_m4_scalar_without_x_correct() { - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = m4_scalar_without_x(arr.view(), 12); - let sampled_values = sampled_indices.mapv(|x| arr[x]); - - let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99]; - let expected_values = expected_indices - .iter() - .map(|x| *x as f32) - .collect::>(); - - assert_eq!(sampled_indices, Array1::from(expected_indices)); - assert_eq!(sampled_values, Array1::from(expected_values)); - } - - #[apply(threads)] - fn test_m4_scalar_without_x_parallel_correct(n_threads: usize) { - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = m4_scalar_without_x_parallel(arr.view(), 12, n_threads); - let sampled_values = sampled_indices.mapv(|x| arr[x]); - - let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99]; - let expected_values = expected_indices - .iter() - .map(|x| *x as f32) - .collect::>(); - - assert_eq!(sampled_indices, Array1::from(expected_indices)); - assert_eq!(sampled_values, Array1::from(expected_values)); - } - - #[test] - fn test_m4_scalar_with_x_correct() { - let x = (0..100).collect::>(); - let x = Array1::from(x); - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = m4_scalar_with_x(x.view(), arr.view(), 12); - let sampled_values = sampled_indices.mapv(|x| arr[x]); - - let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99]; - let expected_values = expected_indices - .iter() - .map(|x| *x as f32) - .collect::>(); - - assert_eq!(sampled_indices, Array1::from(expected_indices)); - assert_eq!(sampled_values, Array1::from(expected_values)); - } - - #[apply(threads)] - fn test_m4_scalar_with_x_parallel_correct(n_threads: usize) { - let x = (0..100).collect::>(); - let x = Array1::from(x); - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = m4_scalar_with_x_parallel(x.view(), arr.view(), 12, n_threads); - let sampled_values = sampled_indices.mapv(|x| arr[x]); - - let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99]; - let expected_values = expected_indices - .iter() - .map(|x| *x as f32) - .collect::>(); - - assert_eq!(sampled_indices, Array1::from(expected_indices)); - assert_eq!(sampled_values, Array1::from(expected_values)); - } - - #[test] - fn test_m4_scalar_with_x_gap() { - // We will create a gap in the middle of the array - let x = (0..100).collect::>(); - - // Increment the second half of the array by 50 - let x = x - .iter() - .map(|x| if *x > 50 { *x + 50 } else { *x }) - .collect::>(); - let x = Array1::from(x); - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = m4_scalar_with_x(x.view(), arr.view(), 20); - assert_eq!(sampled_indices.len(), 16); // One full gap - let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99]; - assert_eq!(sampled_indices, Array1::from(expected_indices)); - - // Increment the second half of the array by 50 again - let x = x - .iter() - .map(|x| if *x > 101 { *x + 50 } else { *x }) - .collect::>(); - let x = Array1::from(x); - - let sampled_indices = m4_scalar_with_x(x.view(), arr.view(), 20); - assert_eq!(sampled_indices.len(), 17); // Gap with 1 value - let expected_indices = vec![ - 0, 0, 39, 39, 40, 40, 50, 50, 51, 52, 52, 59, 59, 60, 60, 99, 99, - ]; - assert_eq!(sampled_indices, Array1::from(expected_indices)); - } - - #[apply(threads)] - fn test_m4_scalar_with_x_gap_parallel(n_threads: usize) { - // We will create a gap in the middle of the array - let x = (0..100).collect::>(); - - // Increment the second half of the array by 50 - let x = x - .iter() - .map(|x| if *x > 50 { *x + 50 } else { *x }) - .collect::>(); - let x = Array1::from(x); - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = m4_scalar_with_x_parallel(x.view(), arr.view(), 20, n_threads); - assert_eq!(sampled_indices.len(), 16); // One full gap - let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99]; - assert_eq!(sampled_indices, Array1::from(expected_indices)); - - // Increment the second half of the array by 50 again - let x = x - .iter() - .map(|x| if *x > 101 { *x + 50 } else { *x }) - .collect::>(); - let x = Array1::from(x); - - let sampled_indices = m4_scalar_with_x_parallel(x.view(), arr.view(), 20, n_threads); - assert_eq!(sampled_indices.len(), 17); // Gap with 1 value - let expected_indices = vec![ - 0, 0, 39, 39, 40, 40, 50, 50, 51, 52, 52, 59, 59, 60, 60, 99, 99, - ]; - assert_eq!(sampled_indices, Array1::from(expected_indices)); - } - - #[apply(threads)] - fn test_many_random_runs_correct(n_threads: usize) { - let n: usize = 20_003; - let n_out: usize = 204; - let x = (0..n as i32).collect::>(); - let x = Array1::from(x); - for _ in 0..100 { - let arr = get_array_f32(n); - let idxs1 = m4_scalar_without_x(arr.view(), n_out); - let idxs2 = m4_scalar_with_x(x.view(), arr.view(), n_out); - assert_eq!(idxs1, idxs2); - let idxs3 = m4_scalar_without_x_parallel(arr.view(), n_out, n_threads); - let idxs4 = m4_scalar_with_x_parallel(x.view(), arr.view(), n_out, n_threads); - assert_eq!(idxs1, idxs3); - assert_eq!(idxs1, idxs4); // TODO: this should not fail when n_threads = 16 - } - } -} diff --git a/downsample_rs/src/m4/simd.rs b/downsample_rs/src/m4/simd.rs deleted file mode 100644 index 9b11ad1..0000000 --- a/downsample_rs/src/m4/simd.rs +++ /dev/null @@ -1,261 +0,0 @@ -use argminmax::ArgMinMax; - -use ndarray::{Array1, ArrayView1}; -use num_traits::{AsPrimitive, FromPrimitive}; - -use super::super::searchsorted::{ - get_equidistant_bin_idx_iterator, get_equidistant_bin_idx_iterator_parallel, -}; -use super::super::types::Num; -use super::generic::{m4_generic, m4_generic_parallel}; -use super::generic::{m4_generic_with_x, m4_generic_with_x_parallel}; - -// ----------------------------------- NON-PARALLEL ------------------------------------ - -// ----------- WITH X - -pub fn m4_simd_with_x(x: ArrayView1, arr: ArrayView1, n_out: usize) -> Array1 -where - for<'a> ArrayView1<'a, Ty>: ArgMinMax, - Tx: Num + FromPrimitive + AsPrimitive, - Ty: Copy + PartialOrd, -{ - assert_eq!(n_out % 4, 0); - let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 4); - m4_generic_with_x(arr, bin_idx_iterator, n_out, |arr| arr.argminmax()) -} - -// ----------- WITHOUT X - -pub fn m4_simd_without_x(arr: ArrayView1, n_out: usize) -> Array1 -where - for<'a> ArrayView1<'a, T>: ArgMinMax, -{ - assert_eq!(n_out % 4, 0); - m4_generic(arr, n_out, |arr| arr.argminmax()) -} - -// ------------------------------------- PARALLEL -------------------------------------- - -// ----------- WITH X - -pub fn m4_simd_with_x_parallel( - x: ArrayView1, - arr: ArrayView1, - n_out: usize, - n_threads: usize, -) -> Array1 -where - for<'a> ArrayView1<'a, Ty>: ArgMinMax, - Tx: Num + FromPrimitive + AsPrimitive + Send + Sync, - Ty: Copy + PartialOrd + Send + Sync, -{ - assert_eq!(n_out % 4, 0); - let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 4, n_threads); - m4_generic_with_x_parallel(arr, bin_idx_iterator, n_out, n_threads, |arr| { - arr.argminmax() - }) -} - -// ----------- WITHOUT X - -pub fn m4_simd_without_x_parallel( - arr: ArrayView1, - n_out: usize, - n_threads: usize, -) -> Array1 -where - for<'a> ArrayView1<'a, T>: ArgMinMax, -{ - assert_eq!(n_out % 4, 0); - m4_generic_parallel(arr, n_out, n_threads, |arr| arr.argminmax()) -} - -// --------------------------------------- TESTS --------------------------------------- - -#[cfg(test)] -mod tests { - use rstest::rstest; - use rstest_reuse::{self, *}; - - use super::{m4_simd_with_x, m4_simd_without_x}; - use super::{m4_simd_with_x_parallel, m4_simd_without_x_parallel}; - use ndarray::Array1; - - use dev_utils::utils; - - fn get_array_f32(n: usize) -> Array1 { - utils::get_random_array(n, f32::MIN, f32::MAX) - } - - // Template for the n_threads matrix - #[template] - #[rstest] - #[case(1)] - #[case(utils::get_all_threads() / 2)] - #[case(utils::get_all_threads())] - #[case(utils::get_all_threads() * 2)] - fn threads(#[case] n_threads: usize) {} - - #[test] - fn test_m4_simd_without_x_correct() { - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = m4_simd_without_x(arr.view(), 12); - let sampled_values = sampled_indices.mapv(|x| arr[x]); - - let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99]; - let expected_values = expected_indices - .iter() - .map(|x| *x as f32) - .collect::>(); - - assert_eq!(sampled_indices, Array1::from(expected_indices)); - assert_eq!(sampled_values, Array1::from(expected_values)); - } - - #[apply(threads)] - fn test_m4_simd_without_x_parallel_correct(n_threads: usize) { - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = m4_simd_without_x_parallel(arr.view(), 12, n_threads); - let sampled_values = sampled_indices.mapv(|x| arr[x]); - - let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99]; - let expected_values = expected_indices - .iter() - .map(|x| *x as f32) - .collect::>(); - - assert_eq!(sampled_indices, Array1::from(expected_indices)); - assert_eq!(sampled_values, Array1::from(expected_values)); - } - - #[test] - fn test_m4_simd_with_x_correct() { - let x = (0..100).collect::>(); - let x = Array1::from(x); - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = m4_simd_with_x(x.view(), arr.view(), 12); - let sampled_values = sampled_indices.mapv(|x| arr[x]); - - let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99]; - let expected_values = expected_indices - .iter() - .map(|x| *x as f32) - .collect::>(); - - assert_eq!(sampled_indices, Array1::from(expected_indices)); - assert_eq!(sampled_values, Array1::from(expected_values)); - } - - #[apply(threads)] - fn test_m4_simd_with_x_parallel_correct(n_threads: usize) { - let x = (0..100).collect::>(); - let x = Array1::from(x); - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = m4_simd_with_x_parallel(x.view(), arr.view(), 12, n_threads); - let sampled_values = sampled_indices.mapv(|x| arr[x]); - - let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99]; - let expected_values = expected_indices - .iter() - .map(|x| *x as f32) - .collect::>(); - - assert_eq!(sampled_indices, Array1::from(expected_indices)); - assert_eq!(sampled_values, Array1::from(expected_values)); - } - - #[test] - fn test_m4_simd_with_x_gap() { - // We will create a gap in the middle of the array - let x = (0..100).collect::>(); - - // Increment the second half of the array by 50 - let x = x - .iter() - .map(|x| if *x > 50 { *x + 50 } else { *x }) - .collect::>(); - let x = Array1::from(x); - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = m4_simd_with_x(x.view(), arr.view(), 20); - assert_eq!(sampled_indices.len(), 16); // One full gap - let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99]; - assert_eq!(sampled_indices, Array1::from(expected_indices)); - - // Increment the second half of the array by 50 again - let x = x - .iter() - .map(|x| if *x > 101 { *x + 50 } else { *x }) - .collect::>(); - let x = Array1::from(x); - - let sampled_indices = m4_simd_with_x(x.view(), arr.view(), 20); - assert_eq!(sampled_indices.len(), 17); // Gap with 1 value - let expected_indices = vec![ - 0, 0, 39, 39, 40, 40, 50, 50, 51, 52, 52, 59, 59, 60, 60, 99, 99, - ]; - assert_eq!(sampled_indices, Array1::from(expected_indices)); - } - - #[apply(threads)] - fn test_m4_simd_with_x_gap_parallel(n_threads: usize) { - // We will create a gap in the middle of the array - let x = (0..100).collect::>(); - - // Increment the second half of the array by 50 - let x = x - .iter() - .map(|x| if *x > 50 { *x + 50 } else { *x }) - .collect::>(); - let x = Array1::from(x); - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = m4_simd_with_x_parallel(x.view(), arr.view(), 20, n_threads); - assert_eq!(sampled_indices.len(), 16); // One full gap - let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99]; - assert_eq!(sampled_indices, Array1::from(expected_indices)); - - // Increment the second half of the array by 50 again - let x = x - .iter() - .map(|x| if *x > 101 { *x + 50 } else { *x }) - .collect::>(); - let x = Array1::from(x); - - let sampled_indices = m4_simd_with_x_parallel(x.view(), arr.view(), 20, n_threads); - assert_eq!(sampled_indices.len(), 17); // Gap with 1 value - let expected_indices = vec![ - 0, 0, 39, 39, 40, 40, 50, 50, 51, 52, 52, 59, 59, 60, 60, 99, 99, - ]; - assert_eq!(sampled_indices, Array1::from(expected_indices)); - } - - #[apply(threads)] - fn test_many_random_runs_correct(n_threads: usize) { - let n = 20_003; - let n_out = 204; - let x = (0..n).map(|x| x as i32).collect::>(); - let x = Array1::from(x); - for _ in 0..100 { - let arr = get_array_f32(n); - let idxs1 = m4_simd_without_x(arr.view(), n_out); - let idxs2 = m4_simd_without_x_parallel(arr.view(), n_out, n_threads); - let idxs3 = m4_simd_with_x(x.view(), arr.view(), n_out); - let idxs4 = m4_simd_with_x_parallel(x.view(), arr.view(), n_out, n_threads); - assert_eq!(idxs1, idxs2); - assert_eq!(idxs1, idxs3); - assert_eq!(idxs1, idxs4); // TODO: this should not fail when n_threads = 16 - } - } -} diff --git a/downsample_rs/src/minmax.rs b/downsample_rs/src/minmax.rs new file mode 100644 index 0000000..6858164 --- /dev/null +++ b/downsample_rs/src/minmax.rs @@ -0,0 +1,431 @@ +use rayon::iter::IndexedParallelIterator; +use rayon::prelude::*; + +use argminmax::ArgMinMax; +use num_traits::{AsPrimitive, FromPrimitive}; + +use super::searchsorted::{ + get_equidistant_bin_idx_iterator, get_equidistant_bin_idx_iterator_parallel, +}; +use super::types::Num; + +// ----------------------------------- NON-PARALLEL ------------------------------------ + +// ----------- WITH X + +pub fn min_max_with_x(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec +where + for<'a> &'a [Ty]: ArgMinMax, + Tx: Num + FromPrimitive + AsPrimitive, + Ty: Copy + PartialOrd, +{ + assert_eq!(n_out % 2, 0); + let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 2); + min_max_generic_with_x(arr, bin_idx_iterator, n_out, |arr| arr.argminmax()) +} + +// ----------- WITHOUT X + +pub fn min_max_without_x(arr: &[T], n_out: usize) -> Vec +where + for<'a> &'a [T]: ArgMinMax, +{ + assert_eq!(n_out % 2, 0); + min_max_generic(arr, n_out, |arr| arr.argminmax()) +} + +// ------------------------------------- PARALLEL -------------------------------------- + +// ----------- WITH X + +pub fn min_max_with_x_parallel( + x: &[Tx], + arr: &[Ty], + n_out: usize, + n_threads: usize, +) -> Vec +where + for<'a> &'a [Ty]: ArgMinMax, + Tx: Num + FromPrimitive + AsPrimitive + Send + Sync, + Ty: Copy + PartialOrd + Send + Sync, +{ + assert_eq!(n_out % 2, 0); + let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 2, n_threads); + min_max_generic_with_x_parallel(arr, bin_idx_iterator, n_out, n_threads, |arr| { + arr.argminmax() + }) +} + +// ----------- WITHOUT X + +pub fn min_max_without_x_parallel( + arr: &[T], + n_out: usize, + n_threads: usize, +) -> Vec +where + for<'a> &'a [T]: ArgMinMax, +{ + assert_eq!(n_out % 2, 0); + min_max_generic_parallel(arr, n_out, n_threads, |arr| arr.argminmax()) +} +// ----------------- GENERICS +// +// --------------------- WITHOUT X + +#[inline(always)] +pub(crate) fn min_max_generic( + arr: &[T], + n_out: usize, + f_argminmax: fn(&[T]) -> (usize, usize), +) -> Vec { + // Assumes n_out is a multiple of 2 + if n_out >= arr.len() { + return (0..arr.len()).collect::>(); + } + + // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1) + let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 2) as f64; + + let mut sampled_indices = vec![usize::default(); n_out]; + + let mut start_idx: usize = 0; + for i in 0..n_out / 2 { + // Decided to use multiplication instead of adding to the accumulator (end) + // as multiplication seems to be less prone to rounding errors. + let end: f64 = block_size * (i + 1) as f64; + let end_idx: usize = end as usize + 1; + + let (min_index, max_index) = f_argminmax(&arr[start_idx..end_idx]); + + // Add the indexes in sorted order + if min_index < max_index { + sampled_indices[2 * i] = min_index + start_idx; + sampled_indices[2 * i + 1] = max_index + start_idx; + } else { + sampled_indices[2 * i] = max_index + start_idx; + sampled_indices[2 * i + 1] = min_index + start_idx; + } + + start_idx = end_idx; + } + + sampled_indices +} + +#[inline(always)] +pub(crate) fn min_max_generic_parallel( + arr: &[T], + n_out: usize, + n_threads: usize, + f_argminmax: fn(&[T]) -> (usize, usize), +) -> Vec { + // Assumes n_out is a multiple of 2 + if n_out >= arr.len() { + return (0..arr.len()).collect::>(); + } + + // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1) + let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 2) as f64; + + // Store the enumerated indexes in the output array + // let mut sampled_indices: Array1 = Array1::from_vec((0..n_out).collect::>()); + let mut sampled_indices: Vec = (0..n_out).collect::>(); + + // to limit the amounts of threads Rayon uses, an explicit threadpool needs to be created + // in which the required code is "installed". This limits the amount of used threads. + // https://docs.rs/rayon/latest/rayon/struct.ThreadPool.html#method.install + let pool = rayon::ThreadPoolBuilder::new() + .num_threads(n_threads) + .build(); + + let func = || { + for chunk in sampled_indices.chunks_exact_mut(2) { + let i: f64 = unsafe { *chunk.get_unchecked(0) >> 1 } as f64; + let start_idx: usize = (block_size * i) as usize + (i != 0.0) as usize; + let end_idx: usize = (block_size * (i + 1.0)) as usize + 1; + + let (min_index, max_index) = f_argminmax(&arr[start_idx..end_idx]); + + // Add the indexes in sorted order + if min_index < max_index { + chunk[0] = min_index + start_idx; + chunk[1] = max_index + start_idx; + } else { + chunk[0] = max_index + start_idx; + chunk[1] = min_index + start_idx; + } + } + }; + + pool.unwrap().install(func); // allow panic if pool could not be created + + sampled_indices.to_vec() +} + +// --------------------- WITH X + +#[inline(always)] +pub(crate) fn min_max_generic_with_x( + arr: &[T], + bin_idx_iterator: impl Iterator>, + n_out: usize, + f_argminmax: fn(&[T]) -> (usize, usize), +) -> Vec { + // Assumes n_out is a multiple of 2 + if n_out >= arr.len() { + return (0..arr.len()).collect::>(); + } + + let mut sampled_indices: Vec = Vec::with_capacity(n_out); + + bin_idx_iterator.for_each(|bin| { + if let Some((start, end)) = bin { + if end <= start + 2 { + // If the bin has <= 2 elements, just add them all + for i in start..end { + sampled_indices.push(i); + } + } else { + // If the bin has at least two elements, add the argmin and argmax + let step = &arr[start..end]; + let (min_index, max_index) = f_argminmax(step); + + // Add the indexes in sorted order + if min_index < max_index { + sampled_indices.push(min_index + start); + sampled_indices.push(max_index + start); + } else { + sampled_indices.push(max_index + start); + sampled_indices.push(min_index + start); + } + } + } + }); + + sampled_indices +} + +#[inline(always)] +pub(crate) fn min_max_generic_with_x_parallel( + arr: &[T], + bin_idx_iterator: impl IndexedParallelIterator>>, + n_out: usize, + n_threads: usize, + f_argminmax: fn(&[T]) -> (usize, usize), +) -> Vec { + // Assumes n_out is a multiple of 2 + if n_out >= arr.len() { + return (0..arr.len()).collect::>(); + } + + // to limit the amounts of threads Rayon uses, an explicit threadpool needs to be created + // in which the required code is "installed". This limits the amount of used threads. + // https://docs.rs/rayon/latest/rayon/struct.ThreadPool.html#method.install + let pool = rayon::ThreadPoolBuilder::new() + .num_threads(n_threads) + .build(); + + let iter_func = || { + bin_idx_iterator + .flat_map(|bin_idx_iterator| { + bin_idx_iterator + .map(|bin| { + match bin { + Some((start, end)) => { + if end <= start + 2 { + // If the bin has <= 2 elements, just return them all + return (start..end).collect::>(); + } + + // If the bin has at least two elements, return the argmin and argmax + let step = &arr[start..end]; + let (min_index, max_index) = f_argminmax(step); + + // Return the indexes in sorted order + if min_index < max_index { + vec![min_index + start, max_index + start] + } else { + vec![max_index + start, min_index + start] + } + } // If the bin is empty, return empty Vec + None => { + vec![] + } + } + }) + .collect::>>() + }) + .flatten() + .collect::>() + }; + + pool.unwrap().install(iter_func) // allow panic if pool could not be created +} + +#[cfg(test)] +mod tests { + use num_traits::AsPrimitive; + use rstest::rstest; + use rstest_reuse::{self, *}; + + use super::{min_max_with_x, min_max_without_x}; + use super::{min_max_with_x_parallel, min_max_without_x_parallel}; + + use dev_utils::utils; + + fn get_array_f32(n: usize) -> Vec { + utils::get_random_array(n, f32::MIN, f32::MAX) + } + + // Template for the n_threads matrix + #[template] + #[rstest] + #[case(1)] + #[case(utils::get_all_threads() / 2)] + #[case(utils::get_all_threads())] + #[case(utils::get_all_threads() * 2)] + fn threads(#[case] n_threads: usize) {} + + #[test] + fn test_min_max_scalar_without_x_correct() { + let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); + + let sampled_indices = min_max_without_x(&arr, 10); + let sampled_values = sampled_indices + .iter() + .map(|x| arr[*x]) + .collect::>(); + + let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99]; + let expected_values = expected_indices + .iter() + .map(|x| *x as f32) + .collect::>(); + + assert_eq!(sampled_indices, expected_indices); + assert_eq!(sampled_values, expected_values); + } + + #[apply(threads)] + fn test_min_max_scalar_without_x_parallel_correct(n_threads: usize) { + let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); + + let sampled_indices = min_max_without_x_parallel(&arr, 10, n_threads); + let sampled_values = sampled_indices + .iter() + .map(|x| arr[*x]) + .collect::>(); + + let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99]; + let expected_values = expected_indices + .iter() + .map(|x| *x as f32) + .collect::>(); + + assert_eq!(sampled_indices, expected_indices); + assert_eq!(sampled_values, expected_values); + } + + #[test] + fn test_min_max_scalar_with_x_correct() { + let x: [i32; 100] = core::array::from_fn(|i| i.as_()); + let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); + + let sampled_indices = min_max_with_x(&x, &arr, 10); + let sampled_values = sampled_indices + .iter() + .map(|x| arr[*x]) + .collect::>(); + + let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99]; + let expected_values = expected_indices + .iter() + .map(|x| *x as f32) + .collect::>(); + + assert_eq!(sampled_indices, expected_indices); + assert_eq!(sampled_values, expected_values); + } + + #[apply(threads)] + fn test_min_max_scalar_with_x_parallel_correct(n_threads: usize) { + let x: [i32; 100] = core::array::from_fn(|i| i.as_()); + let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); + + let sampled_indices = min_max_with_x_parallel(&x, &arr, 10, n_threads); + let sampled_values = sampled_indices + .iter() + .map(|x| arr[*x]) + .collect::>(); + + let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99]; + let expected_values = expected_indices + .iter() + .map(|x| *x as f32) + .collect::>(); + + assert_eq!(sampled_indices, expected_indices); + assert_eq!(sampled_values, expected_values); + } + + #[test] + fn test_min_max_scalar_with_x_gap() { + // We will create a gap in the middle of the array + // Increment the second half of the array by 50 + let x: [i32; 100] = core::array::from_fn(|i| if i > 50 { (i + 50).as_() } else { i.as_() }); + let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); + + let sampled_indices = min_max_with_x(&x, &arr, 10); + assert_eq!(sampled_indices.len(), 8); // One full gap + let expected_indices = vec![0, 29, 30, 50, 51, 69, 70, 99]; + assert_eq!(sampled_indices, expected_indices); + + // Increment the second half of the array by 50 again + let x = x.map(|i| if i > 101 { i + 50 } else { i }); + + let sampled_indices = min_max_with_x(&x, &arr, 10); + assert_eq!(sampled_indices.len(), 9); // Gap with 1 value + let expected_indices = vec![0, 39, 40, 50, 51, 52, 59, 60, 99]; + assert_eq!(sampled_indices, expected_indices); + } + + #[apply(threads)] + fn test_min_max_scalar_with_x_parallel_gap(n_threads: usize) { + // Create a gap in the middle of the array + // Increment the second half of the array by 50 + let x: [i32; 100] = core::array::from_fn(|i| if i > 50 { (i + 50).as_() } else { i.as_() }); + let arr: [f32; 100] = core::array::from_fn(|i| i.as_()); + + let sampled_indices = min_max_with_x_parallel(&x, &arr, 10, n_threads); + assert_eq!(sampled_indices.len(), 8); // One full gap + let expected_indices = vec![0, 29, 30, 50, 51, 69, 70, 99]; + assert_eq!(sampled_indices, expected_indices); + + // Increment the second half of the array by 50 again + let x = x.map(|i| if i > 101 { i + 50 } else { i }); + + let sampled_indices = min_max_with_x_parallel(&x, &arr, 10, n_threads); + assert_eq!(sampled_indices.len(), 9); // Gap with 1 value + let expected_indices = vec![0, 39, 40, 50, 51, 52, 59, 60, 99]; + assert_eq!(sampled_indices, expected_indices); + } + + #[apply(threads)] + fn test_many_random_runs_same_output(n_threads: usize) { + const N: usize = 20_003; + const N_OUT: usize = 202; + let x: [i32; N] = core::array::from_fn(|i| i.as_()); + for _ in 0..100 { + let mut arr = get_array_f32(N); + arr[N - 1] = f32::INFINITY; // Make sure the last value is always the max + let idxs1 = min_max_without_x(arr.as_slice(), N_OUT); + let idxs2 = min_max_without_x_parallel(arr.as_slice(), N_OUT, n_threads); + let idxs3 = min_max_with_x(&x, arr.as_slice(), N_OUT); + let idxs4 = min_max_with_x_parallel(&x, arr.as_slice(), N_OUT, n_threads); + assert_eq!(idxs1, idxs2); + assert_eq!(idxs1, idxs3); + assert_eq!(idxs1, idxs4); + } + } +} diff --git a/downsample_rs/src/minmax/generic.rs b/downsample_rs/src/minmax/generic.rs deleted file mode 100644 index 4416fe1..0000000 --- a/downsample_rs/src/minmax/generic.rs +++ /dev/null @@ -1,209 +0,0 @@ -use ndarray::Zip; -use ndarray::{Array1, ArrayView1}; - -use rayon::iter::IndexedParallelIterator; -use rayon::prelude::*; - -// --------------------- WITHOUT X - -#[inline(always)] -pub(crate) fn min_max_generic( - arr: ArrayView1, - n_out: usize, - f_argminmax: fn(ArrayView1) -> (usize, usize), -) -> Array1 { - // Assumes n_out is a multiple of 2 - if n_out >= arr.len() { - return Array1::from((0..arr.len()).collect::>()); - } - - // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1) - let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 2) as f64; - let arr_ptr = arr.as_ptr(); - - let mut sampled_indices: Array1 = Array1::::default(n_out); - - let mut start_idx: usize = 0; - for i in 0..n_out / 2 { - // Decided to use multiplication instead of adding to the accumulator (end) - // as multiplication seems to be less prone to rounding errors. - let end: f64 = block_size * (i + 1) as f64; - let end_idx: usize = end as usize + 1; - - let (min_index, max_index) = f_argminmax(unsafe { - ArrayView1::from_shape_ptr((end_idx - start_idx,), arr_ptr.add(start_idx)) - }); - - // Add the indexes in sorted order - if min_index < max_index { - sampled_indices[2 * i] = min_index + start_idx; - sampled_indices[2 * i + 1] = max_index + start_idx; - } else { - sampled_indices[2 * i] = max_index + start_idx; - sampled_indices[2 * i + 1] = min_index + start_idx; - } - - start_idx = end_idx; - } - - sampled_indices -} - -#[inline(always)] -pub(crate) fn min_max_generic_parallel( - arr: ArrayView1, - n_out: usize, - n_threads: usize, - f_argminmax: fn(ArrayView1) -> (usize, usize), -) -> Array1 { - // Assumes n_out is a multiple of 2 - if n_out >= arr.len() { - return Array1::from((0..arr.len()).collect::>()); - } - - // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1) - let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 2) as f64; - - // Store the enumerated indexes in the output array - let mut sampled_indices: Array1 = Array1::from_vec((0..n_out).collect::>()); - - // to limit the amounts of threads Rayon uses, an explicit threadpool needs to be created - // in which the required code is "installed". This limits the amount of used threads. - // https://docs.rs/rayon/latest/rayon/struct.ThreadPool.html#method.install - let pool = rayon::ThreadPoolBuilder::new() - .num_threads(n_threads) - .build(); - - let zip_func = || { - Zip::from(sampled_indices.exact_chunks_mut(2)).par_for_each(|mut sampled_index| { - let i: f64 = unsafe { *sampled_index.uget(0) >> 1 } as f64; - let start_idx: usize = (block_size * i) as usize + (i != 0.0) as usize; - let end_idx: usize = (block_size * (i + 1.0)) as usize + 1; - - let (min_index, max_index) = f_argminmax(unsafe { - ArrayView1::from_shape_ptr((end_idx - start_idx,), arr.as_ptr().add(start_idx)) - }); - - // Add the indexes in sorted order - if min_index < max_index { - sampled_index[0] = min_index + start_idx; - sampled_index[1] = max_index + start_idx; - } else { - sampled_index[0] = max_index + start_idx; - sampled_index[1] = min_index + start_idx; - } - }); - }; - - pool.unwrap().install(zip_func); // allow panic if pool could not be created - - sampled_indices -} - -// --------------------- WITH X - -#[inline(always)] -pub(crate) fn min_max_generic_with_x( - arr: ArrayView1, - bin_idx_iterator: impl Iterator>, - n_out: usize, - f_argminmax: fn(ArrayView1) -> (usize, usize), -) -> Array1 { - // Assumes n_out is a multiple of 2 - if n_out >= arr.len() { - return Array1::from((0..arr.len()).collect::>()); - } - - let ptr = arr.as_ptr(); - let mut sampled_indices: Vec = Vec::with_capacity(n_out); - - bin_idx_iterator.for_each(|bin| { - if let Some((start, end)) = bin { - if end <= start + 2 { - // If the bin has <= 2 elements, just add them all - for i in start..end { - sampled_indices.push(i); - } - } else { - // If the bin has at least two elements, add the argmin and argmax - let step = unsafe { ArrayView1::from_shape_ptr(end - start, ptr.add(start)) }; - let (min_index, max_index) = f_argminmax(step); - - // Add the indexes in sorted order - if min_index < max_index { - sampled_indices.push(min_index + start); - sampled_indices.push(max_index + start); - } else { - sampled_indices.push(max_index + start); - sampled_indices.push(min_index + start); - } - } - } - }); - - Array1::from_vec(sampled_indices) -} - -#[inline(always)] -pub(crate) fn min_max_generic_with_x_parallel( - arr: ArrayView1, - bin_idx_iterator: impl IndexedParallelIterator>>, - n_out: usize, - n_threads: usize, - f_argminmax: fn(ArrayView1) -> (usize, usize), -) -> Array1 { - // Assumes n_out is a multiple of 2 - if n_out >= arr.len() { - return Array1::from((0..arr.len()).collect::>()); - } - - // to limit the amounts of threads Rayon uses, an explicit threadpool needs to be created - // in which the required code is "installed". This limits the amount of used threads. - // https://docs.rs/rayon/latest/rayon/struct.ThreadPool.html#method.install - let pool = rayon::ThreadPoolBuilder::new() - .num_threads(n_threads) - .build(); - - let iter_func = || { - Array1::from_vec( - bin_idx_iterator - .flat_map(|bin_idx_iterator| { - bin_idx_iterator - .map(|bin| { - match bin { - Some((start, end)) => { - if end <= start + 2 { - // If the bin has <= 2 elements, just return them all - return (start..end).collect::>(); - } - - // If the bin has at least two elements, return the argmin and argmax - let step = unsafe { - ArrayView1::from_shape_ptr( - end - start, - arr.as_ptr().add(start), - ) - }; - let (min_index, max_index) = f_argminmax(step); - - // Return the indexes in sorted order - if min_index < max_index { - vec![min_index + start, max_index + start] - } else { - vec![max_index + start, min_index + start] - } - } // If the bin is empty, return empty Vec - None => { - vec![] - } - } - }) - .collect::>>() - }) - .flatten() - .collect::>(), - ) - }; - - pool.unwrap().install(iter_func) // allow panic if pool could not be created -} diff --git a/downsample_rs/src/minmax/mod.rs b/downsample_rs/src/minmax/mod.rs deleted file mode 100644 index 3628374..0000000 --- a/downsample_rs/src/minmax/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -pub mod scalar; -pub use scalar::*; -pub mod simd; -pub use simd::*; -mod generic; // private module diff --git a/downsample_rs/src/minmax/scalar.rs b/downsample_rs/src/minmax/scalar.rs deleted file mode 100644 index 9d0c114..0000000 --- a/downsample_rs/src/minmax/scalar.rs +++ /dev/null @@ -1,263 +0,0 @@ -use argminmax::{ScalarArgMinMax, SCALAR}; -use num_traits::{AsPrimitive, FromPrimitive}; - -use ndarray::{Array1, ArrayView1}; - -use super::super::searchsorted::{ - get_equidistant_bin_idx_iterator, get_equidistant_bin_idx_iterator_parallel, -}; -use super::super::types::Num; -use super::generic::{min_max_generic, min_max_generic_parallel}; -use super::generic::{min_max_generic_with_x, min_max_generic_with_x_parallel}; - -// ----------------------------------- NON-PARALLEL ------------------------------------ - -// ----------- WITH X - -pub fn min_max_scalar_with_x( - x: ArrayView1, - arr: ArrayView1, - n_out: usize, -) -> Array1 -where - SCALAR: ScalarArgMinMax, - Tx: Num + FromPrimitive + AsPrimitive, - Ty: Copy + PartialOrd, -{ - assert_eq!(n_out % 2, 0); - let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 2); - min_max_generic_with_x(arr, bin_idx_iterator, n_out, SCALAR::argminmax) -} - -// ----------- WITHOUT X - -pub fn min_max_scalar_without_x( - arr: ArrayView1, - n_out: usize, -) -> Array1 -where - SCALAR: ScalarArgMinMax, -{ - assert_eq!(n_out % 2, 0); - min_max_generic(arr, n_out, SCALAR::argminmax) -} - -// ------------------------------------- PARALLEL -------------------------------------- - -// ----------- WITH X - -pub fn min_max_scalar_with_x_parallel( - x: ArrayView1, - arr: ArrayView1, - n_out: usize, - n_threads: usize, -) -> Array1 -where - SCALAR: ScalarArgMinMax, - Tx: Num + FromPrimitive + AsPrimitive + Send + Sync, - Ty: Copy + PartialOrd + Send + Sync, -{ - assert_eq!(n_out % 2, 0); - let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 2, n_threads); - min_max_generic_with_x_parallel(arr, bin_idx_iterator, n_out, n_threads, SCALAR::argminmax) -} - -// ----------- WITHOUT X - -pub fn min_max_scalar_without_x_parallel( - arr: ArrayView1, - n_out: usize, - n_threads: usize, -) -> Array1 -where - SCALAR: ScalarArgMinMax, -{ - assert_eq!(n_out % 2, 0); - min_max_generic_parallel(arr, n_out, n_threads, SCALAR::argminmax) -} - -// --------------------------------------- TESTS --------------------------------------- - -#[cfg(test)] -mod tests { - use rstest::rstest; - use rstest_reuse::{self, *}; - - use super::{min_max_scalar_with_x, min_max_scalar_without_x}; - use super::{min_max_scalar_with_x_parallel, min_max_scalar_without_x_parallel}; - use ndarray::Array1; - - use dev_utils::utils; - - fn get_array_f32(n: usize) -> Array1 { - utils::get_random_array(n, f32::MIN, f32::MAX) - } - - // Template for the n_threads matrix - #[template] - #[rstest] - #[case(1)] - #[case(utils::get_all_threads() / 2)] - #[case(utils::get_all_threads())] - #[case(utils::get_all_threads() * 2)] - fn threads(#[case] n_threads: usize) {} - - #[test] - fn test_min_max_scalar_without_x_correct() { - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = min_max_scalar_without_x(arr.view(), 10); - let sampled_values = sampled_indices.mapv(|x| arr[x]); - - let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99]; - let expected_values = expected_indices - .iter() - .map(|x| *x as f32) - .collect::>(); - - assert_eq!(sampled_indices, Array1::from(expected_indices)); - assert_eq!(sampled_values, Array1::from(expected_values)); - } - - #[apply(threads)] - fn test_min_max_scalar_without_x_parallel_correct(n_threads: usize) { - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = min_max_scalar_without_x_parallel(arr.view(), 10, n_threads); - let sampled_values = sampled_indices.mapv(|x| arr[x]); - - let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99]; - let expected_values = expected_indices - .iter() - .map(|x| *x as f32) - .collect::>(); - - assert_eq!(sampled_indices, Array1::from(expected_indices)); - assert_eq!(sampled_values, Array1::from(expected_values)); - } - - #[test] - fn test_min_max_scalar_with_x_correct() { - let x = (0..100).collect::>(); - let x = Array1::from(x); - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = min_max_scalar_with_x(x.view(), arr.view(), 10); - let sampled_values = sampled_indices.mapv(|x| arr[x]); - - let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99]; - let expected_values = expected_indices - .iter() - .map(|x| *x as f32) - .collect::>(); - - assert_eq!(sampled_indices, Array1::from(expected_indices)); - assert_eq!(sampled_values, Array1::from(expected_values)); - } - - #[apply(threads)] - fn test_min_max_scalar_with_x_parallel_correct(n_threads: usize) { - let x = (0..100).collect::>(); - let x = Array1::from(x); - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = min_max_scalar_with_x_parallel(x.view(), arr.view(), 10, n_threads); - let sampled_values = sampled_indices.mapv(|x| arr[x]); - - let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99]; - let expected_values = expected_indices - .iter() - .map(|x| *x as f32) - .collect::>(); - - assert_eq!(sampled_indices, Array1::from(expected_indices)); - assert_eq!(sampled_values, Array1::from(expected_values)); - } - - #[test] - fn test_min_max_scalar_with_x_gap() { - // We will create a gap in the middle of the array - let x = (0..100).collect::>(); - - // Increment the second half of the array by 50 - let x = x - .iter() - .map(|x| if *x > 50 { *x + 50 } else { *x }) - .collect::>(); - let x = Array1::from(x); - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = min_max_scalar_with_x(x.view(), arr.view(), 10); - assert_eq!(sampled_indices.len(), 8); // One full gap - let expected_indices = vec![0, 29, 30, 50, 51, 69, 70, 99]; - assert_eq!(sampled_indices, Array1::from(expected_indices)); - - // Increment the second half of the array by 50 again - let x = x - .iter() - .map(|x| if *x > 101 { *x + 50 } else { *x }) - .collect::>(); - let x = Array1::from(x); - - let sampled_indices = min_max_scalar_with_x(x.view(), arr.view(), 10); - assert_eq!(sampled_indices.len(), 9); // Gap with 1 value - let expected_indices = vec![0, 39, 40, 50, 51, 52, 59, 60, 99]; - assert_eq!(sampled_indices, Array1::from(expected_indices)); - } - - #[apply(threads)] - fn test_min_max_scalar_with_x_parallel_gap(n_threads: usize) { - // Create a gap in the middle of the array - let x = (0..100).collect::>(); - - // Increment the second half of the array by 50 - let x = x - .iter() - .map(|x| if *x > 50 { *x + 50 } else { *x }) - .collect::>(); - let x = Array1::from(x); - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = min_max_scalar_with_x_parallel(x.view(), arr.view(), 10, n_threads); - assert_eq!(sampled_indices.len(), 8); // One full gap - let expected_indices = vec![0, 29, 30, 50, 51, 69, 70, 99]; - assert_eq!(sampled_indices, Array1::from(expected_indices)); - - // Increment the second half of the array by 50 again - let x = x - .iter() - .map(|x| if *x > 101 { *x + 50 } else { *x }) - .collect::>(); - let x = Array1::from(x); - - let sampled_indices = min_max_scalar_with_x_parallel(x.view(), arr.view(), 10, n_threads); - assert_eq!(sampled_indices.len(), 9); // Gap with 1 value - let expected_indices = vec![0, 39, 40, 50, 51, 52, 59, 60, 99]; - assert_eq!(sampled_indices, Array1::from(expected_indices)); - } - - #[apply(threads)] - fn test_many_random_runs_same_output(n_threads: usize) { - let n: usize = 20_003; - let n_out = 202; - let x = (0..n as i32).collect::>(); - let x = Array1::from(x); - for _ in 0..100 { - let mut arr = get_array_f32(n); - arr[n - 1] = f32::INFINITY; // Make sure the last value is always the max - let idxs1 = min_max_scalar_without_x(arr.view(), n_out); - let idxs2 = min_max_scalar_without_x_parallel(arr.view(), n_out, n_threads); - let idxs3 = min_max_scalar_with_x(x.view(), arr.view(), n_out); - let idxs4 = min_max_scalar_with_x_parallel(x.view(), arr.view(), n_out, n_threads); - assert_eq!(idxs1, idxs2); - assert_eq!(idxs1, idxs3); - assert_eq!(idxs1, idxs4); - } - } -} diff --git a/downsample_rs/src/minmax/simd.rs b/downsample_rs/src/minmax/simd.rs deleted file mode 100644 index 545d092..0000000 --- a/downsample_rs/src/minmax/simd.rs +++ /dev/null @@ -1,265 +0,0 @@ -use argminmax::ArgMinMax; - -use ndarray::{Array1, ArrayView1}; -use num_traits::{AsPrimitive, FromPrimitive}; - -use super::super::searchsorted::{ - get_equidistant_bin_idx_iterator, get_equidistant_bin_idx_iterator_parallel, -}; -use super::super::types::Num; -use super::generic::{min_max_generic, min_max_generic_parallel}; -use super::generic::{min_max_generic_with_x, min_max_generic_with_x_parallel}; - -// ----------------------------------- NON-PARALLEL ------------------------------------ - -// ----------- WITH X - -pub fn min_max_simd_with_x( - x: ArrayView1, - arr: ArrayView1, - n_out: usize, -) -> Array1 -where - for<'a> ArrayView1<'a, Ty>: ArgMinMax, - Tx: Num + FromPrimitive + AsPrimitive, - Ty: Copy + PartialOrd, -{ - assert_eq!(n_out % 2, 0); - let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 2); - min_max_generic_with_x(arr, bin_idx_iterator, n_out, |arr| arr.argminmax()) -} - -// ----------- WITHOUT X - -pub fn min_max_simd_without_x( - arr: ArrayView1, - n_out: usize, -) -> Array1 -where - for<'a> ArrayView1<'a, T>: ArgMinMax, -{ - assert_eq!(n_out % 2, 0); - min_max_generic(arr, n_out, |arr| arr.argminmax()) -} - -// ------------------------------------- PARALLEL -------------------------------------- - -// ----------- WITH X - -pub fn min_max_simd_with_x_parallel( - x: ArrayView1, - arr: ArrayView1, - n_out: usize, - n_threads: usize, -) -> Array1 -where - for<'a> ArrayView1<'a, Ty>: ArgMinMax, - Tx: Num + FromPrimitive + AsPrimitive + Send + Sync, - Ty: Copy + PartialOrd + Send + Sync, -{ - assert_eq!(n_out % 2, 0); - let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 2, n_threads); - min_max_generic_with_x_parallel(arr, bin_idx_iterator, n_out, n_threads, |arr| { - arr.argminmax() - }) -} - -// ----------- WITHOUT X - -pub fn min_max_simd_without_x_parallel( - arr: ArrayView1, - n_out: usize, - n_threads: usize, -) -> Array1 -where - for<'a> ArrayView1<'a, T>: ArgMinMax, -{ - assert_eq!(n_out % 2, 0); - min_max_generic_parallel(arr, n_out, n_threads, |arr| arr.argminmax()) -} - -// --------------------------------------- TESTS --------------------------------------- - -#[cfg(test)] -mod tests { - use rstest::rstest; - use rstest_reuse::{self, *}; - - use super::{min_max_simd_with_x, min_max_simd_without_x}; - use super::{min_max_simd_with_x_parallel, min_max_simd_without_x_parallel}; - use ndarray::Array1; - - use dev_utils::utils; - - fn get_array_f32(n: usize) -> Array1 { - utils::get_random_array(n, f32::MIN, f32::MAX) - } - - // Template for the n_threads matrix - #[template] - #[rstest] - #[case(1)] - #[case(utils::get_all_threads() / 2)] - #[case(utils::get_all_threads())] - #[case(utils::get_all_threads() * 2)] - fn threads(#[case] n_threads: usize) {} - - #[test] - fn test_min_max_simd_without_x_correct() { - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = min_max_simd_without_x(arr.view(), 10); - let sampled_values = sampled_indices.mapv(|x| arr[x]); - - let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99]; - let expected_values = expected_indices - .iter() - .map(|x| *x as f32) - .collect::>(); - - assert_eq!(sampled_indices, Array1::from(expected_indices)); - assert_eq!(sampled_values, Array1::from(expected_values)); - } - - #[apply(threads)] - fn test_min_max_simd_without_x_parallel_correct(n_threads: usize) { - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = min_max_simd_without_x_parallel(arr.view(), 10, n_threads); - let sampled_values = sampled_indices.mapv(|x| arr[x]); - - let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99]; - let expected_values = expected_indices - .iter() - .map(|x| *x as f32) - .collect::>(); - - assert_eq!(sampled_indices, Array1::from(expected_indices)); - assert_eq!(sampled_values, Array1::from(expected_values)); - } - - #[test] - fn test_min_max_simd_with_x_correct() { - let x = (0..100).map(|x| x as f32).collect::>(); - let x = Array1::from(x); - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = min_max_simd_with_x(x.view(), arr.view(), 10); - let sampled_values = sampled_indices.mapv(|x| arr[x]); - - let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99]; - let expected_values = expected_indices - .iter() - .map(|x| *x as f32) - .collect::>(); - - assert_eq!(sampled_indices, Array1::from(expected_indices)); - assert_eq!(sampled_values, Array1::from(expected_values)); - } - - #[apply(threads)] - fn test_min_max_simd_with_x_parallel_correct(n_threads: usize) { - let x = (0..100).map(|x| x as f32).collect::>(); - let x = Array1::from(x); - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = min_max_simd_with_x_parallel(x.view(), arr.view(), 10, n_threads); - let sampled_values = sampled_indices.mapv(|x| arr[x]); - - let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99]; - let expected_values = expected_indices - .iter() - .map(|x| *x as f32) - .collect::>(); - - assert_eq!(sampled_indices, Array1::from(expected_indices)); - assert_eq!(sampled_values, Array1::from(expected_values)); - } - - #[test] - fn test_min_max_simd_with_x_gap() { - // We will create a gap in the middle of the array - let x = (0..100).collect::>(); - - // Increment the second half of the array by 50 - let x = x - .iter() - .map(|x| if *x > 50 { *x + 50 } else { *x }) - .collect::>(); - let x = Array1::from(x); - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = min_max_simd_with_x(x.view(), arr.view(), 10); - assert_eq!(sampled_indices.len(), 8); // One full gap - let expected_indices = vec![0, 29, 30, 50, 51, 69, 70, 99]; - assert_eq!(sampled_indices, Array1::from(expected_indices)); - - // Increment the second half of the array by 50 again - let x = x - .iter() - .map(|x| if *x > 101 { *x + 50 } else { *x }) - .collect::>(); - let x = Array1::from(x); - - let sampled_indices = min_max_simd_with_x(x.view(), arr.view(), 10); - assert_eq!(sampled_indices.len(), 9); // Gap with 1 value - let expected_indices = vec![0, 39, 40, 50, 51, 52, 59, 60, 99]; - assert_eq!(sampled_indices, Array1::from(expected_indices)); - } - - #[apply(threads)] - fn test_min_max_simd_with_x_parallel_gap(n_threads: usize) { - // Create a gap in the middle of the array - let x = (0..100).collect::>(); - - // Increment the second half of the array by 50 - let x = x - .iter() - .map(|x| if *x > 50 { *x + 50 } else { *x }) - .collect::>(); - let x = Array1::from(x); - let arr = (0..100).map(|x| x as f32).collect::>(); - let arr = Array1::from(arr); - - let sampled_indices = min_max_simd_with_x_parallel(x.view(), arr.view(), 10, n_threads); - assert_eq!(sampled_indices.len(), 8); // One full gap - let expected_indices = vec![0, 29, 30, 50, 51, 69, 70, 99]; - assert_eq!(sampled_indices, Array1::from(expected_indices)); - - // Increment the second half of the array by 50 again - let x = x - .iter() - .map(|x| if *x > 101 { *x + 50 } else { *x }) - .collect::>(); - let x = Array1::from(x); - - let sampled_indices = min_max_simd_with_x_parallel(x.view(), arr.view(), 10, n_threads); - assert_eq!(sampled_indices.len(), 9); // Gap with 1 value - let expected_indices = vec![0, 39, 40, 50, 51, 52, 59, 60, 99]; - assert_eq!(sampled_indices, Array1::from(expected_indices)); - } - - #[apply(threads)] - fn test_many_random_runs_same_output(n_threads: usize) { - let n = 20_003; - let n_out = 202; - let x = (0..n).map(|x| x as i32).collect::>(); - let x = Array1::from(x); - for _ in 0..100 { - let mut arr = get_array_f32(n); - arr[n - 1] = f32::INFINITY; // Make sure the last value is always the max - let idxs1 = min_max_simd_without_x(arr.view(), n_out); - let idxs2 = min_max_simd_without_x_parallel(arr.view(), n_out, n_threads); - let idxs3 = min_max_simd_with_x(x.view(), arr.view(), n_out); - let idxs4 = min_max_simd_with_x_parallel(x.view(), arr.view(), n_out, n_threads); - assert_eq!(idxs1, idxs2); - assert_eq!(idxs1, idxs3); - assert_eq!(idxs1, idxs4); - } - } -} diff --git a/downsample_rs/src/minmaxlttb.rs b/downsample_rs/src/minmaxlttb.rs new file mode 100644 index 0000000..9a0b708 --- /dev/null +++ b/downsample_rs/src/minmaxlttb.rs @@ -0,0 +1,290 @@ +use argminmax::ArgMinMax; + +use super::lttb::{lttb_with_x, lttb_without_x}; +use super::types::Num; + +use super::minmax; +use num_traits::{AsPrimitive, FromPrimitive}; + +// ----------------------------------- NON-PARALLEL ------------------------------------ + +// ----------- WITH X + +pub fn minmaxlttb_with_x + FromPrimitive, Ty: Num + AsPrimitive>( + x: &[Tx], + y: &[Ty], + n_out: usize, + minmax_ratio: usize, +) -> Vec +where + for<'a> &'a [Ty]: ArgMinMax, +{ + minmaxlttb_generic( + x, + y, + n_out, + minmax_ratio, + None, + MinMaxFunctionWithX::Serial(minmax::min_max_with_x), + ) +} + +// ----------- WITHOUT X + +pub fn minmaxlttb_without_x>( + y: &[Ty], + n_out: usize, + minmax_ratio: usize, +) -> Vec +where + for<'a> &'a [Ty]: ArgMinMax, +{ + minmaxlttb_generic_without_x( + y, + n_out, + minmax_ratio, + None, + MinMaxFunctionWithoutX::Serial(minmax::min_max_without_x), + ) +} + +// ------------------------------------- PARALLEL -------------------------------------- + +// ----------- WITH X + +pub fn minmaxlttb_with_x_parallel< + Tx: Num + AsPrimitive + FromPrimitive + Send + Sync, + Ty: Num + AsPrimitive + Send + Sync, +>( + x: &[Tx], + y: &[Ty], + n_out: usize, + minmax_ratio: usize, + n_threads: usize, +) -> Vec +where + for<'a> &'a [Ty]: ArgMinMax, +{ + minmaxlttb_generic( + x, + y, + n_out, + minmax_ratio, + Some(n_threads), + MinMaxFunctionWithX::Parallel(minmax::min_max_with_x_parallel), + ) +} + +// ----------- WITHOUT X + +pub fn minmaxlttb_without_x_parallel + Send + Sync>( + y: &[Ty], + n_out: usize, + minmax_ratio: usize, + n_threads: usize, +) -> Vec +where + for<'a> &'a [Ty]: ArgMinMax, +{ + minmaxlttb_generic_without_x( + y, + n_out, + minmax_ratio, + Some(n_threads), + MinMaxFunctionWithoutX::Parallel(minmax::min_max_without_x_parallel), + ) +} + +// ----------------------------------- GENERICS ------------------------------------ + +// types to make function signatures easier to read +type ThreadCount = usize; +type OutputCount = usize; + +pub enum MinMaxFunctionWithX, Ty: Num + AsPrimitive> { + Serial(fn(&[Tx], &[Ty], OutputCount) -> Vec), + Parallel(fn(&[Tx], &[Ty], OutputCount, ThreadCount) -> Vec), +} + +pub enum MinMaxFunctionWithoutX> { + Serial(fn(&[Ty], OutputCount) -> Vec), + Parallel(fn(&[Ty], OutputCount, ThreadCount) -> Vec), +} + +#[inline(always)] +pub(crate) fn minmaxlttb_generic, Ty: Num + AsPrimitive>( + x: &[Tx], + y: &[Ty], + n_out: usize, + minmax_ratio: usize, + n_threads: Option, + f_minmax: MinMaxFunctionWithX, +) -> Vec +where + for<'a> &'a [Ty]: ArgMinMax, +{ + assert_eq!(x.len(), y.len()); + assert!(minmax_ratio > 1); + // Apply first min max aggregation (if above ratio) + if x.len() / n_out > minmax_ratio { + // Get index of min max points + let mut index = match f_minmax { + MinMaxFunctionWithX::Serial(func) => func( + &x[1..(x.len() - 1)], + &y[1..(x.len() - 1)], + n_out * minmax_ratio, + ), + MinMaxFunctionWithX::Parallel(func) => func( + &x[1..(x.len() - 1)], + &y[1..(x.len() - 1)], + n_out * minmax_ratio, + n_threads.unwrap(), // n_threads cannot be None + ), + }; + // inplace + 1 + index.iter_mut().for_each(|elem| *elem += 1); + // Prepend first and last point + index.insert(0, 0); + index.push(x.len() - 1); + // Get x and y values at index + let x = unsafe { + index + .iter() + .map(|i| *x.get_unchecked(*i)) + .collect::>() + }; + let y = unsafe { + index + .iter() + .map(|i| *y.get_unchecked(*i)) + .collect::>() + }; + // Apply lttb on the reduced data + let index_points_selected = lttb_with_x(x.as_slice(), y.as_slice(), n_out); + // Return the original index + return index_points_selected + .iter() + .map(|i| index[*i]) + .collect::>(); + } + // Apply lttb on all data when requirement is not met + lttb_with_x(x, y, n_out) +} + +#[inline(always)] +pub(crate) fn minmaxlttb_generic_without_x>( + y: &[Ty], + n_out: usize, + minmax_ratio: usize, + n_threads: Option, + f_minmax: MinMaxFunctionWithoutX, +) -> Vec +where + for<'a> &'a [Ty]: ArgMinMax, +{ + assert!(minmax_ratio > 1); + // Apply first min max aggregation (if above ratio) + if y.len() / n_out > minmax_ratio { + // Get index of min max points + let mut index = match f_minmax { + MinMaxFunctionWithoutX::Serial(func) => { + func(&y[1..(y.len() - 1)], n_out * minmax_ratio) + } + MinMaxFunctionWithoutX::Parallel(func) => func( + &y[1..(y.len() - 1)], + n_out * minmax_ratio, + n_threads.unwrap(), // n_threads cannot be None + ), + }; + // inplace + 1 + index.iter_mut().for_each(|elem| *elem += 1); + // Prepend first and last point + index.insert(0, 0); + index.push(y.len() - 1); + // Get y values at index + let y = unsafe { + index + .iter() + .map(|i| *y.get_unchecked(*i)) + .collect::>() + }; + // Apply lttb on the reduced data + let index_points_selected = lttb_without_x(y.as_slice(), n_out); + // Return the original index + return index_points_selected + .iter() + .map(|i| index[*i]) + .collect::>(); + } + // Apply lttb on all data when requirement is not met + lttb_without_x(y, n_out).to_vec() +} + +#[cfg(test)] +mod tests { + use rstest::rstest; + use rstest_reuse::{self, *}; + + use super::{minmaxlttb_with_x, minmaxlttb_without_x}; + use super::{minmaxlttb_with_x_parallel, minmaxlttb_without_x_parallel}; + + use dev_utils::utils; + + fn get_array_f32(n: usize) -> Vec { + utils::get_random_array(n, f32::MIN, f32::MAX) + } + + // Template for the n_threads matrix + #[template] + #[rstest] + #[case(1)] + #[case(utils::get_all_threads() / 2)] + #[case(utils::get_all_threads())] + #[case(utils::get_all_threads() * 2)] + fn threads(#[case] n_threads: usize) {} + + #[test] + fn test_minmaxlttb_with_x() { + let x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; + let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; + let sampled_indices = minmaxlttb_with_x(&x, &y, 4, 2); + assert_eq!(sampled_indices, vec![0, 1, 5, 9]); + } + + #[test] + fn test_minmaxlttb_without_x() { + let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; + let sampled_indices = minmaxlttb_without_x(&y, 4, 2); + assert_eq!(sampled_indices, vec![0, 1, 5, 9]); + } + + #[apply(threads)] + fn test_minmaxlttb_with_x_parallel(n_threads: usize) { + let x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; + let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; + let sampled_indices = minmaxlttb_with_x_parallel(&x, &y, 4, 2, n_threads); + assert_eq!(sampled_indices, vec![0, 1, 5, 9]); + } + + #[apply(threads)] + fn test_minmaxlttb_without_x_parallel(n_threads: usize) { + let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; + let sampled_indices = minmaxlttb_without_x_parallel(&y, 4, 2, n_threads); + assert_eq!(sampled_indices, vec![0, 1, 5, 9]); + } + + #[apply(threads)] + fn test_many_random_runs_same_output(n_threads: usize) { + const N: usize = 20_000; + const N_OUT: usize = 100; + const MINMAX_RATIO: usize = 5; + for _ in 0..100 { + // TODO: test with x + let arr = get_array_f32(N); + let idxs1 = minmaxlttb_without_x(arr.as_slice(), N_OUT, MINMAX_RATIO); + let idxs2 = + minmaxlttb_without_x_parallel(arr.as_slice(), N_OUT, MINMAX_RATIO, n_threads); + assert_eq!(idxs1, idxs2); + } + } +} diff --git a/downsample_rs/src/minmaxlttb/generic.rs b/downsample_rs/src/minmaxlttb/generic.rs deleted file mode 100644 index 09b0007..0000000 --- a/downsample_rs/src/minmaxlttb/generic.rs +++ /dev/null @@ -1,108 +0,0 @@ -use ndarray::{s, Array1, ArrayView1}; - -use super::super::helpers::Average; -use super::super::lttb::{lttb_with_x, lttb_without_x}; -use super::super::types::Num; -use num_traits::AsPrimitive; - -// types to make function signatures easier to read -type ThreadCount = usize; -type OutputCount = usize; - -pub enum MinMaxFunctionWithX, Ty: Num + AsPrimitive> { - Serial(fn(ArrayView1, ArrayView1, OutputCount) -> Array1), - Parallel(fn(ArrayView1, ArrayView1, OutputCount, ThreadCount) -> Array1), -} - -pub enum MinMaxFunctionWithoutX> { - Serial(fn(ArrayView1, OutputCount) -> Array1), - Parallel(fn(ArrayView1, OutputCount, ThreadCount) -> Array1), -} - -#[inline(always)] -pub(crate) fn minmaxlttb_generic, Ty: Num + AsPrimitive>( - x: ArrayView1, - y: ArrayView1, - n_out: usize, - minmax_ratio: usize, - n_threads: Option, - f_minmax: MinMaxFunctionWithX, -) -> Array1 -where - for<'a> ArrayView1<'a, Ty>: Average, -{ - assert_eq!(x.len(), y.len()); - assert!(minmax_ratio > 1); - // Apply first min max aggregation (if above ratio) - if x.len() / n_out > minmax_ratio { - // Get index of min max points - let mut index = match f_minmax { - MinMaxFunctionWithX::Serial(func) => { - func(x.slice(s![1..-1]), y.slice(s![1..-1]), n_out * minmax_ratio) - } - MinMaxFunctionWithX::Parallel(func) => func( - x.slice(s![1..-1]), - y.slice(s![1..-1]), - n_out * minmax_ratio, - n_threads.unwrap(), // n_threads cannot be None - ), - }; - // inplace + 1 - index.mapv_inplace(|i| i + 1); - let mut index: Vec = index.into_raw_vec(); - // Prepend first and last point - index.insert(0, 0); - index.push(x.len() - 1); - let index = Array1::from_vec(index); - // Get x and y values at index - let x = unsafe { index.mapv(|i| *x.uget(i)) }; - let y = unsafe { index.mapv(|i| *y.uget(i)) }; - // Apply lttb on the reduced data - let index_points_selected = lttb_with_x(x.view(), y.view(), n_out); - // Return the original index - return index_points_selected.mapv(|i| index[i]); - } - // Apply lttb on all data when requirement is not met - lttb_with_x(x, y, n_out) -} - -#[inline(always)] -pub(crate) fn minmaxlttb_generic_without_x>( - y: ArrayView1, - n_out: usize, - minmax_ratio: usize, - n_threads: Option, - f_minmax: MinMaxFunctionWithoutX, -) -> Array1 -where - for<'a> ArrayView1<'a, Ty>: Average, -{ - assert!(minmax_ratio > 1); - // Apply first min max aggregation (if above ratio) - if y.len() / n_out > minmax_ratio { - // Get index of min max points - let mut index = match f_minmax { - MinMaxFunctionWithoutX::Serial(func) => func(y.slice(s![1..-1]), n_out * minmax_ratio), - MinMaxFunctionWithoutX::Parallel(func) => func( - y.slice(s![1..-1]), - n_out * minmax_ratio, - n_threads.unwrap(), // n_threads cannot be None - ), - }; - // inplace + 1 - index.mapv_inplace(|i| i + 1); - let mut index: Vec = index.into_raw_vec(); - // Prepend first and last point - index.insert(0, 0); - index.push(y.len() - 1); - let index = Array1::from_vec(index); - // Get y values at index - let y = unsafe { index.mapv(|i| *y.uget(i)) }; - // Apply lttb on the reduced data - let index_points_selected = lttb_without_x(y.view(), n_out); - // Return the original index - return index_points_selected.mapv(|i| index[i]); - } - // Apply lttb on all data when requirement is not met - lttb_without_x(y.view(), n_out) -} diff --git a/downsample_rs/src/minmaxlttb/mod.rs b/downsample_rs/src/minmaxlttb/mod.rs deleted file mode 100644 index 3893d2b..0000000 --- a/downsample_rs/src/minmaxlttb/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -pub mod scalar; -pub use scalar::*; -pub mod simd; -pub use simd::*; -mod generic; // Private module diff --git a/downsample_rs/src/minmaxlttb/scalar.rs b/downsample_rs/src/minmaxlttb/scalar.rs deleted file mode 100644 index b0539f4..0000000 --- a/downsample_rs/src/minmaxlttb/scalar.rs +++ /dev/null @@ -1,179 +0,0 @@ -use super::super::helpers::Average; -use super::super::minmax; -use super::super::types::Num; -use super::generic::{ - minmaxlttb_generic, minmaxlttb_generic_without_x, MinMaxFunctionWithX, MinMaxFunctionWithoutX, -}; -use ndarray::{Array1, ArrayView1}; -use num_traits::{AsPrimitive, FromPrimitive}; - -use argminmax::{ScalarArgMinMax, SCALAR}; - -// ----------------------------------- NON-PARALLEL ------------------------------------ - -// ----------- WITH X - -pub fn minmaxlttb_scalar_with_x< - Tx: Num + AsPrimitive + FromPrimitive, - Ty: Num + AsPrimitive, ->( - x: ArrayView1, - y: ArrayView1, - n_out: usize, - minmax_ratio: usize, -) -> Array1 -where - SCALAR: ScalarArgMinMax, - for<'a> ArrayView1<'a, Ty>: Average, -{ - minmaxlttb_generic( - x, - y, - n_out, - minmax_ratio, - None, - MinMaxFunctionWithX::Serial(minmax::min_max_scalar_with_x), - ) -} - -// ----------- WITHOUT X - -pub fn minmaxlttb_scalar_without_x>( - y: ArrayView1, - n_out: usize, - minmax_ratio: usize, -) -> Array1 -where - SCALAR: ScalarArgMinMax, - for<'a> ArrayView1<'a, Ty>: Average, -{ - minmaxlttb_generic_without_x( - y, - n_out, - minmax_ratio, - None, - MinMaxFunctionWithoutX::Serial(minmax::min_max_scalar_without_x), - ) -} - -// ------------------------------------- PARALLEL -------------------------------------- - -// ----------- WITH X - -pub fn minmaxlttb_scalar_with_x_parallel< - Tx: Num + AsPrimitive + FromPrimitive + Send + Sync, - Ty: Num + AsPrimitive + Send + Sync, ->( - x: ArrayView1, - y: ArrayView1, - n_out: usize, - minmax_ratio: usize, - n_threads: usize, -) -> Array1 -where - SCALAR: ScalarArgMinMax, - for<'a> ArrayView1<'a, Ty>: Average, -{ - minmaxlttb_generic( - x, - y, - n_out, - minmax_ratio, - Some(n_threads), - MinMaxFunctionWithX::Parallel(minmax::min_max_scalar_with_x_parallel), - ) -} - -// ----------- WITHOUT X - -pub fn minmaxlttb_scalar_without_x_parallel + Send + Sync>( - y: ArrayView1, - n_out: usize, - minmax_ratio: usize, - n_threads: usize, -) -> Array1 -where - SCALAR: ScalarArgMinMax, - for<'a> ArrayView1<'a, Ty>: Average, -{ - minmaxlttb_generic_without_x( - y, - n_out, - minmax_ratio, - Some(n_threads), - MinMaxFunctionWithoutX::Parallel(minmax::min_max_scalar_without_x_parallel), - ) -} - -// --------------------------------------- TESTS --------------------------------------- - -#[cfg(test)] -mod tests { - use rstest::rstest; - use rstest_reuse::{self, *}; - - use super::{minmaxlttb_scalar_with_x, minmaxlttb_scalar_without_x}; - use super::{minmaxlttb_scalar_with_x_parallel, minmaxlttb_scalar_without_x_parallel}; - use ndarray::{array, Array1}; - - use dev_utils::utils; - - fn get_array_f32(n: usize) -> Array1 { - utils::get_random_array(n, f32::MIN, f32::MAX) - } - - // Template for the n_threads matrix - #[template] - #[rstest] - #[case(1)] - #[case(utils::get_all_threads() / 2)] - #[case(utils::get_all_threads())] - #[case(utils::get_all_threads() * 2)] - fn threads(#[case] n_threads: usize) {} - - #[test] - fn test_minmaxlttb_with_x() { - let x = array![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; - let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; - let sampled_indices = minmaxlttb_scalar_with_x(x.view(), y.view(), 4, 2); - assert_eq!(sampled_indices, array![0, 1, 5, 9]); - } - - #[test] - fn test_minmaxlttb_without_x() { - let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; - let sampled_indices = minmaxlttb_scalar_without_x(y.view(), 4, 2); - assert_eq!(sampled_indices, array![0, 1, 5, 9]); - } - - #[apply(threads)] - fn test_minmaxlttb_with_x_parallel(n_threads: usize) { - let x = array![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; - let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; - let sampled_indices = - minmaxlttb_scalar_with_x_parallel(x.view(), y.view(), 4, 2, n_threads); - assert_eq!(sampled_indices, array![0, 1, 5, 9]); - } - - #[apply(threads)] - fn test_minmaxlttb_without_x_parallel(n_threads: usize) { - let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; - let sampled_indices = minmaxlttb_scalar_without_x_parallel(y.view(), 4, 2, n_threads); - assert_eq!(sampled_indices, array![0, 1, 5, 9]); - } - - #[apply(threads)] - fn test_many_random_runs_same_output(n_threads: usize) { - let n: usize = 20_000; - let n_out: usize = 100; - let minmax_ratio: usize = 5; - for _ in 0..100 { - // TODO: test with x - let arr = get_array_f32(n); - let idxs1 = minmaxlttb_scalar_without_x(arr.view(), n_out, minmax_ratio); - let idxs2 = - minmaxlttb_scalar_without_x_parallel(arr.view(), n_out, minmax_ratio, n_threads); - assert_eq!(idxs1, idxs2); - } - } -} diff --git a/downsample_rs/src/minmaxlttb/simd.rs b/downsample_rs/src/minmaxlttb/simd.rs deleted file mode 100644 index cf06644..0000000 --- a/downsample_rs/src/minmaxlttb/simd.rs +++ /dev/null @@ -1,178 +0,0 @@ -use super::super::minmax; -use super::generic::{ - minmaxlttb_generic, minmaxlttb_generic_without_x, MinMaxFunctionWithX, MinMaxFunctionWithoutX, -}; - -use super::super::helpers::Average; -use super::super::types::Num; -use ndarray::{Array1, ArrayView1}; -use num_traits::{AsPrimitive, FromPrimitive}; - -use argminmax::ArgMinMax; - -// ----------------------------------- NON-PARALLEL ------------------------------------ - -// ----------- WITH X - -pub fn minmaxlttb_simd_with_x< - Tx: Num + FromPrimitive + AsPrimitive, - Ty: Num + AsPrimitive, ->( - x: ArrayView1, - y: ArrayView1, - n_out: usize, - minmax_ratio: usize, -) -> Array1 -where - for<'a> ArrayView1<'a, Ty>: ArgMinMax, - for<'a> ArrayView1<'a, Ty>: Average, -{ - minmaxlttb_generic( - x, - y, - n_out, - minmax_ratio, - None, - MinMaxFunctionWithX::Serial(minmax::min_max_simd_with_x), - ) -} - -// ----------- WITHOUT X - -pub fn minmaxlttb_simd_without_x>( - y: ArrayView1, - n_out: usize, - minmax_ratio: usize, -) -> Array1 -where - for<'a> ArrayView1<'a, Ty>: ArgMinMax, - for<'a> ArrayView1<'a, Ty>: Average, -{ - minmaxlttb_generic_without_x( - y, - n_out, - minmax_ratio, - None, - MinMaxFunctionWithoutX::Serial(minmax::min_max_simd_without_x), - ) -} - -// ------------------------------------- PARALLEL -------------------------------------- - -// ----------- WITH X - -pub fn minmaxlttb_simd_with_x_parallel( - x: ArrayView1, - y: ArrayView1, - n_out: usize, - minmax_ratio: usize, - n_threads: usize, -) -> Array1 -where - for<'a> ArrayView1<'a, Ty>: ArgMinMax, - for<'a> ArrayView1<'a, Ty>: Average, - Tx: Num + FromPrimitive + AsPrimitive + Send + Sync, - Ty: Num + AsPrimitive + Send + Sync, -{ - minmaxlttb_generic( - x, - y, - n_out, - minmax_ratio, - Some(n_threads), - MinMaxFunctionWithX::Parallel(minmax::min_max_simd_with_x_parallel), - ) -} - -// ----------- WITHOUT X - -pub fn minmaxlttb_simd_without_x_parallel + Send + Sync>( - y: ArrayView1, - n_out: usize, - minmax_ratio: usize, - n_threads: usize, -) -> Array1 -where - for<'a> ArrayView1<'a, Ty>: ArgMinMax, - for<'a> ArrayView1<'a, Ty>: Average, -{ - minmaxlttb_generic_without_x( - y, - n_out, - minmax_ratio, - Some(n_threads), - MinMaxFunctionWithoutX::Parallel(minmax::min_max_simd_without_x_parallel), - ) -} - -// --------------------------------------- TESTS --------------------------------------- - -#[cfg(test)] -mod tests { - use rstest::rstest; - use rstest_reuse::{self, *}; - - use super::{minmaxlttb_simd_with_x, minmaxlttb_simd_without_x}; - use super::{minmaxlttb_simd_with_x_parallel, minmaxlttb_simd_without_x_parallel}; - use ndarray::{array, Array1}; - - use dev_utils::utils; - - fn get_array_f32(n: usize) -> Array1 { - utils::get_random_array(n, f32::MIN, f32::MAX) - } - - // Template for the n_threads matrix - #[template] - #[rstest] - #[case(1)] - #[case(utils::get_all_threads() / 2)] - #[case(utils::get_all_threads())] - #[case(utils::get_all_threads() * 2)] - fn threads(#[case] n_threads: usize) {} - - #[test] - fn test_minmaxlttb_with_x() { - let x = array![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; - let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; - let sampled_indices = minmaxlttb_simd_with_x(x.view(), y.view(), 4, 2); - assert_eq!(sampled_indices, array![0, 1, 5, 9]); - } - - #[test] - fn test_minmaxlttb_without_x() { - let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; - let sampled_indices = minmaxlttb_simd_without_x(y.view(), 4, 2); - assert_eq!(sampled_indices, array![0, 1, 5, 9]); - } - - #[apply(threads)] - fn test_minmaxlttb_with_x_parallel(n_threads: usize) { - let x = array![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; - let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; - let sampled_indices = minmaxlttb_simd_with_x_parallel(x.view(), y.view(), 4, 2, n_threads); - assert_eq!(sampled_indices, array![0, 1, 5, 9]); - } - - #[apply(threads)] - fn test_minmaxlttb_without_x_parallel(n_threads: usize) { - let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]; - let sampled_indices = minmaxlttb_simd_without_x_parallel(y.view(), 4, 2, n_threads); - assert_eq!(sampled_indices, array![0, 1, 5, 9]); - } - - #[apply(threads)] - fn test_many_random_runs_same_output(n_threads: usize) { - let n: usize = 20_000; - let n_out: usize = 100; - let minmax_ratio: usize = 5; - for _ in 0..100 { - // TODO: test with x - let arr = get_array_f32(n); - let idxs1 = minmaxlttb_simd_without_x(arr.view(), n_out, minmax_ratio); - let idxs2 = - minmaxlttb_simd_without_x_parallel(arr.view(), n_out, minmax_ratio, n_threads); - assert_eq!(idxs1, idxs2); - } - } -} diff --git a/downsample_rs/src/searchsorted.rs b/downsample_rs/src/searchsorted.rs index d60d261..af53a9c 100644 --- a/downsample_rs/src/searchsorted.rs +++ b/downsample_rs/src/searchsorted.rs @@ -1,5 +1,3 @@ -use ndarray::ArrayView1; - use rayon::iter::IndexedParallelIterator; use rayon::prelude::*; @@ -15,12 +13,7 @@ use num_traits::{AsPrimitive, FromPrimitive}; /// https://docs.python.org/3/library/bisect.html#bisect.bisect /// // #[inline(always)] -fn binary_search( - arr: ArrayView1, - value: T, - left: usize, - right: usize, -) -> usize { +fn binary_search(arr: &[T], value: T, left: usize, right: usize) -> usize { let mut size: usize = right - left; let mut left: usize = left; let mut right: usize = right; @@ -51,7 +44,7 @@ fn binary_search( /// // #[inline(always)] fn binary_search_with_mid( - arr: ArrayView1, + arr: &[T], value: T, left: usize, right: usize, @@ -83,7 +76,7 @@ fn binary_search_with_mid( // --- Sequential version pub(crate) fn get_equidistant_bin_idx_iterator( - arr: ArrayView1, + arr: &[T], nb_bins: usize, ) -> impl Iterator> + '_ where @@ -133,7 +126,7 @@ fn sequential_add_mul(start_val: f64, add_val: f64, mul: usize) -> f64 { } pub(crate) fn get_equidistant_bin_idx_iterator_parallel( - arr: ArrayView1, + arr: &[T], nb_bins: usize, n_threads: usize, ) -> impl IndexedParallelIterator> + '_> + '_ @@ -194,7 +187,6 @@ mod tests { use rstest_reuse::{self, *}; use super::*; - use ndarray::Array1; use dev_utils::utils::{get_all_threads, get_random_array}; @@ -210,9 +202,9 @@ mod tests { #[test] fn test_search_sorted_identicial_to_np_linspace_searchsorted() { // Create a 0..9999 array - let arr = Array1::from((0..10_000).collect::>()); + let arr: [u32; 10_000] = core::array::from_fn(|i| i.as_()); assert!(arr.len() == 10_000); - let iterator = get_equidistant_bin_idx_iterator(arr.view(), 4); + let iterator = get_equidistant_bin_idx_iterator(&arr, 4); // Check the iterator let mut idx: usize = 0; for bin in iterator { @@ -225,82 +217,49 @@ mod tests { #[test] fn test_binary_search() { - let arr = Array1::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); - assert_eq!(binary_search(arr.view(), 0, 0, arr.len() - 1), 0); - assert_eq!(binary_search(arr.view(), 1, 0, arr.len() - 1), 1); - assert_eq!(binary_search(arr.view(), 2, 0, arr.len() - 1), 2); - assert_eq!(binary_search(arr.view(), 3, 0, arr.len() - 1), 3); - assert_eq!(binary_search(arr.view(), 4, 0, arr.len() - 1), 4); - assert_eq!(binary_search(arr.view(), 5, 0, arr.len() - 1), 5); - assert_eq!(binary_search(arr.view(), 6, 0, arr.len() - 1), 6); - assert_eq!(binary_search(arr.view(), 7, 0, arr.len() - 1), 7); - assert_eq!(binary_search(arr.view(), 8, 0, arr.len() - 1), 8); - assert_eq!(binary_search(arr.view(), 9, 0, arr.len() - 1), 9); - assert_eq!(binary_search(arr.view(), 10, 0, arr.len() - 1), 10); - assert_eq!(binary_search(arr.view(), 11, 0, arr.len() - 1), 10); + let arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + assert_eq!(binary_search(&arr, 0, 0, arr.len() - 1), 0); + assert_eq!(binary_search(&arr, 1, 0, arr.len() - 1), 1); + assert_eq!(binary_search(&arr, 2, 0, arr.len() - 1), 2); + assert_eq!(binary_search(&arr, 3, 0, arr.len() - 1), 3); + assert_eq!(binary_search(&arr, 4, 0, arr.len() - 1), 4); + assert_eq!(binary_search(&arr, 5, 0, arr.len() - 1), 5); + assert_eq!(binary_search(&arr, 6, 0, arr.len() - 1), 6); + assert_eq!(binary_search(&arr, 7, 0, arr.len() - 1), 7); + assert_eq!(binary_search(&arr, 8, 0, arr.len() - 1), 8); + assert_eq!(binary_search(&arr, 9, 0, arr.len() - 1), 9); + assert_eq!(binary_search(&arr, 10, 0, arr.len() - 1), 10); + assert_eq!(binary_search(&arr, 11, 0, arr.len() - 1), 10); } #[test] fn test_binary_search_with_mid() { - let arr = Array1::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); - assert_eq!( - binary_search_with_mid(arr.view(), 0, 0, arr.len() - 1, 0), - 0 - ); - assert_eq!( - binary_search_with_mid(arr.view(), 1, 0, arr.len() - 1, 0), - 1 - ); - assert_eq!( - binary_search_with_mid(arr.view(), 2, 0, arr.len() - 1, 1), - 2 - ); - assert_eq!( - binary_search_with_mid(arr.view(), 3, 0, arr.len() - 1, 2), - 3 - ); - assert_eq!( - binary_search_with_mid(arr.view(), 4, 0, arr.len() - 1, 3), - 4 - ); - assert_eq!( - binary_search_with_mid(arr.view(), 5, 0, arr.len() - 1, 4), - 5 - ); - assert_eq!( - binary_search_with_mid(arr.view(), 6, 0, arr.len() - 1, 5), - 6 - ); - assert_eq!( - binary_search_with_mid(arr.view(), 7, 0, arr.len() - 1, 6), - 7 - ); - assert_eq!( - binary_search_with_mid(arr.view(), 8, 0, arr.len() - 1, 7), - 8 - ); - assert_eq!( - binary_search_with_mid(arr.view(), 9, 0, arr.len() - 1, 8), - 9 - ); - assert_eq!( - binary_search_with_mid(arr.view(), 10, 0, arr.len() - 1, 9), - 10 - ); - // This line causes the code to crash -> because value higher than arr[mid] - // assert_eq!(binary_search_with_mid(arr.view(), 11, 0, arr.len() - 1, 9), 10); + let arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + assert_eq!(binary_search_with_mid(&arr, 0, 0, arr.len() - 1, 0), 0); + assert_eq!(binary_search_with_mid(&arr, 1, 0, arr.len() - 1, 0), 1); + assert_eq!(binary_search_with_mid(&arr, 2, 0, arr.len() - 1, 1), 2); + assert_eq!(binary_search_with_mid(&arr, 3, 0, arr.len() - 1, 2), 3); + assert_eq!(binary_search_with_mid(&arr, 4, 0, arr.len() - 1, 3), 4); + assert_eq!(binary_search_with_mid(&arr, 5, 0, arr.len() - 1, 4), 5); + assert_eq!(binary_search_with_mid(&arr, 6, 0, arr.len() - 1, 5), 6); + assert_eq!(binary_search_with_mid(&arr, 7, 0, arr.len() - 1, 6), 7); + assert_eq!(binary_search_with_mid(&arr, 8, 0, arr.len() - 1, 7), 8); + assert_eq!(binary_search_with_mid(&arr, 9, 0, arr.len() - 1, 8), 9); + assert_eq!(binary_search_with_mid(&arr, 10, 0, arr.len() - 1, 9), 10); + // this line causes the code to crash -> because value higher than arr[mid] + // assert_eq!(binary_search_with_mid(&arr, 11, 0, arr.len() - 1, 9), 10); } #[apply(threads)] fn test_get_equidistant_bin_idxs(n_threads: usize) { let expected_indices = vec![0, 4, 7]; - let arr = Array1::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); - let bin_idxs_iter = get_equidistant_bin_idx_iterator(arr.view(), 3); + let arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + let bin_idxs_iter = get_equidistant_bin_idx_iterator(&arr, 3); let bin_idxs = bin_idxs_iter.map(|x| x.unwrap().0).collect::>(); assert_eq!(bin_idxs, expected_indices); - let bin_idxs_iter = get_equidistant_bin_idx_iterator_parallel(arr.view(), 3, n_threads); + let bin_idxs_iter = get_equidistant_bin_idx_iterator_parallel(&arr, 3, n_threads); let bin_idxs = bin_idxs_iter .map(|x| x.map(|x| x.unwrap().0).collect::>()) .flatten() @@ -314,19 +273,17 @@ mod tests { let nb_bins = 100; for _ in 0..100 { - let arr = get_random_array::(n, i32::MIN, i32::MAX); + let mut arr = get_random_array::(n, i32::MIN, i32::MAX); // Sort the array - let mut arr = arr.to_vec(); arr.sort_by(|a, b| a.partial_cmp(b).unwrap()); - let arr = Array1::from(arr); // Calculate the bin indexes - let bin_idxs_iter = get_equidistant_bin_idx_iterator(arr.view(), nb_bins); + let bin_idxs_iter = get_equidistant_bin_idx_iterator(&arr[..], nb_bins); let bin_idxs = bin_idxs_iter.map(|x| x.unwrap().0).collect::>(); // Calculate the bin indexes in parallel let bin_idxs_iter = - get_equidistant_bin_idx_iterator_parallel(arr.view(), nb_bins, n_threads); + get_equidistant_bin_idx_iterator_parallel(&arr[..], nb_bins, n_threads); let bin_idxs_parallel = bin_idxs_iter .map(|x| x.map(|x| x.unwrap().0).collect::>()) .flatten() diff --git a/src/lib.rs b/src/lib.rs index 4b25be8..5a93afc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,7 +23,7 @@ macro_rules! _create_pyfunc_without_x { y: PyReadonlyArray1<$type>, n_out: usize, ) -> &'py PyArray1 { - let y = y.as_array(); + let y = y.as_slice().unwrap(); let sampled_indices = $resample_mod::$resample_fn(y, n_out); sampled_indices.into_pyarray(py) } @@ -42,7 +42,7 @@ macro_rules! _create_pyfunc_without_x_multithreaded { n_out: usize, n_threads: usize, ) -> &'py PyArray1 { - let y = y.as_array(); + let y = y.as_slice().unwrap(); let sampled_indices = $resample_mod::$resample_fn(y, n_out, n_threads); sampled_indices.into_pyarray(py) } @@ -61,7 +61,7 @@ macro_rules! _create_pyfunc_without_x_with_ratio { n_out: usize, ratio: usize, ) -> &'py PyArray1 { - let y = y.as_array(); + let y = y.as_slice().unwrap(); let sampled_indices = $resample_mod::$resample_fn(y, n_out, ratio); sampled_indices.into_pyarray(py) } @@ -81,7 +81,7 @@ macro_rules! _create_pyfunc_without_x_with_ratio_multithreaded { ratio: usize, n_threads: usize, ) -> &'py PyArray1 { - let y = y.as_array(); + let y = y.as_slice().unwrap(); let sampled_indices = $resample_mod::$resample_fn(y, n_out, ratio, n_threads); sampled_indices.into_pyarray(py) } @@ -112,8 +112,8 @@ macro_rules! _create_pyfunc_with_x { y: PyReadonlyArray1<$type_y>, n_out: usize, ) -> &'py PyArray1 { - let x = x.as_array(); - let y = y.as_array(); + let x = x.as_slice().unwrap(); + let y = y.as_slice().unwrap(); let sampled_indices = $resample_mod::$resample_fn(x, y, n_out); sampled_indices.into_pyarray(py) } @@ -133,8 +133,8 @@ macro_rules! _create_pyfunc_with_x_multithreaded { n_out: usize, n_threads: usize, ) -> &'py PyArray1 { - let x = x.as_array(); - let y = y.as_array(); + let x = x.as_slice().unwrap(); + let y = y.as_slice().unwrap(); let sampled_indices = $resample_mod::$resample_fn(x, y, n_out, n_threads); sampled_indices.into_pyarray(py) } @@ -154,8 +154,8 @@ macro_rules! _create_pyfunc_with_x_with_ratio { n_out: usize, ratio: usize, ) -> &'py PyArray1 { - let x = x.as_array(); - let y = y.as_array(); + let x = x.as_slice().unwrap(); + let y = y.as_slice().unwrap(); let sampled_indices = $resample_mod::$resample_fn(x, y, n_out, ratio); sampled_indices.into_pyarray(py) } @@ -176,8 +176,8 @@ macro_rules! _create_pyfunc_with_x_with_ratio_multithreaded { ratio: usize, n_threads: usize, ) -> &'py PyArray1 { - let x = x.as_array(); - let y = y.as_array(); + let x = x.as_slice().unwrap(); + let y = y.as_slice().unwrap(); let sampled_indices = $resample_mod::$resample_fn(x, y, n_out, ratio, n_threads); sampled_indices.into_pyarray(py) } @@ -312,71 +312,37 @@ use downsample_rs::minmax as minmax_mod; // Create a sub module for the minmax algorithm #[pymodule] fn minmax(_py: Python<'_>, m: &PyModule) -> PyResult<()> { - // ----------------- SCALAR + // ----------------- SEQUENTIAL - let scalar_mod = PyModule::new(_py, "scalar")?; + let sequential_mod = PyModule::new(_py, "sequential")?; // ----- WITHOUT X { - create_pyfuncs_without_x!(minmax_mod, min_max_scalar_without_x, scalar_mod); + create_pyfuncs_without_x!(minmax_mod, min_max_without_x, sequential_mod); } // ----- WITH X { - create_pyfuncs_with_x!(minmax_mod, min_max_scalar_with_x, scalar_mod); + create_pyfuncs_with_x!(minmax_mod, min_max_with_x, sequential_mod); } - // ----------------- SCALAR PARALLEL + // ----------------- PARALLEL - let scalar_parallel_mod = PyModule::new(_py, "scalar_parallel")?; + let parallel_mod = PyModule::new(_py, "parallel")?; // ----- WITHOUT X { - create_pyfuncs_without_x!(@threaded minmax_mod, min_max_scalar_without_x_parallel, scalar_parallel_mod); + create_pyfuncs_without_x!(@threaded minmax_mod, min_max_without_x_parallel, parallel_mod); } // ----- WITH X { - create_pyfuncs_with_x!(@threaded minmax_mod, min_max_scalar_with_x_parallel, scalar_parallel_mod); - } - - // ----------------- SIMD - - let simd_mod = PyModule::new(_py, "simd")?; - - // ----- WITHOUT X - { - create_pyfuncs_without_x!(minmax_mod, min_max_simd_without_x, simd_mod); - } - - // ----- WITH X - { - create_pyfuncs_with_x!(minmax_mod, min_max_simd_with_x, simd_mod); - } - - // ----------------- SIMD PARALLEL - - let simd_parallel_mod = PyModule::new(_py, "simd_parallel")?; - - // ----- WITHOUT X - { - create_pyfuncs_without_x!(@threaded - minmax_mod, - min_max_simd_without_x_parallel, - simd_parallel_mod - ); - } - - // ----- WITH X - { - create_pyfuncs_with_x!(@threaded minmax_mod, min_max_simd_with_x_parallel, simd_parallel_mod); + create_pyfuncs_with_x!(@threaded minmax_mod, min_max_with_x_parallel, parallel_mod); } // Add the sub modules to the module - m.add_submodule(scalar_mod)?; - m.add_submodule(scalar_parallel_mod)?; - m.add_submodule(simd_mod)?; - m.add_submodule(simd_parallel_mod)?; + m.add_submodule(sequential_mod)?; + m.add_submodule(parallel_mod)?; Ok(()) } @@ -388,67 +354,37 @@ use downsample_rs::m4 as m4_mod; // Create a sub module for the M4 algorithm #[pymodule] fn m4(_py: Python, m: &PyModule) -> PyResult<()> { - // ----------------- SCALAR + // ----------------- SEQUENTIAL - let scalar_mod = PyModule::new(_py, "scalar")?; + let sequential_mod = PyModule::new(_py, "sequential")?; // ----- WITHOUT X { - create_pyfuncs_without_x!(m4_mod, m4_scalar_without_x, scalar_mod); + create_pyfuncs_without_x!(m4_mod, m4_without_x, sequential_mod); } // ----- WITH X { - create_pyfuncs_with_x!(m4_mod, m4_scalar_with_x, scalar_mod); + create_pyfuncs_with_x!(m4_mod, m4_with_x, sequential_mod); } - // ----------------- SCALAR PARALLEL + // ----------------- PARALLEL - let scalar_parallel_mod = PyModule::new(_py, "scalar_parallel")?; + let parallel_mod = PyModule::new(_py, "parallel")?; // ----- WITHOUT X { - create_pyfuncs_without_x!(@threaded m4_mod, m4_scalar_without_x_parallel, scalar_parallel_mod); + create_pyfuncs_without_x!(@threaded m4_mod, m4_without_x_parallel, parallel_mod); } // ----- WITH X { - create_pyfuncs_with_x!(@threaded m4_mod, m4_scalar_with_x_parallel, scalar_parallel_mod); - } - - // ----------------- SIMD - - let simd_mod = PyModule::new(_py, "simd")?; - - // ----- WITHOUT X - { - create_pyfuncs_without_x!(m4_mod, m4_simd_without_x, simd_mod); - } - - // ----- WITH X - { - create_pyfuncs_with_x!(m4_mod, m4_simd_with_x, simd_mod); - } - - // ----------------- SIMD PARALLEL - - let simd_parallel_mod = PyModule::new(_py, "simd_parallel")?; - - // ----- WITHOUT X - { - create_pyfuncs_without_x!(@threaded m4_mod, m4_simd_without_x_parallel, simd_parallel_mod); - } - - // ----- WITH X - { - create_pyfuncs_with_x!(@threaded m4_mod, m4_simd_with_x_parallel, simd_parallel_mod); + create_pyfuncs_with_x!(@threaded m4_mod, m4_with_x_parallel, parallel_mod); } // Add the sub modules to the module - m.add_submodule(scalar_mod)?; - m.add_submodule(scalar_parallel_mod)?; - m.add_submodule(simd_mod)?; - m.add_submodule(simd_parallel_mod)?; + m.add_submodule(sequential_mod)?; + m.add_submodule(parallel_mod)?; Ok(()) } @@ -460,23 +396,23 @@ use downsample_rs::lttb as lttb_mod; // Create a sub module for the LTTB algorithm #[pymodule] fn lttb(_py: Python, m: &PyModule) -> PyResult<()> { - // ----------------- SCALAR + // ----------------- SEQUENTIAL - let scalar_mod = PyModule::new(_py, "scalar")?; + let sequential_mod = PyModule::new(_py, "sequential")?; // Create the Python functions for the module // ----- WITHOUT X { - create_pyfuncs_without_x!(lttb_mod, lttb_without_x, scalar_mod); + create_pyfuncs_without_x!(lttb_mod, lttb_without_x, sequential_mod); } // ----- WITH X { - create_pyfuncs_with_x!(lttb_mod, lttb_with_x, scalar_mod); + create_pyfuncs_with_x!(lttb_mod, lttb_with_x, sequential_mod); } // Add the sub modules to the module - m.add_submodule(scalar_mod)?; + m.add_submodule(sequential_mod)?; Ok(()) } @@ -488,70 +424,30 @@ use downsample_rs::minmaxlttb as minmaxlttb_mod; // Create a sub module for the MINMAXLTTB algorithm #[pymodule] fn minmaxlttb(_py: Python, m: &PyModule) -> PyResult<()> { - // ----------------- SCALAR - - let scalar_mod = PyModule::new(_py, "scalar")?; - - // ----- WITHOUT X - { - create_pyfuncs_without_x_with_ratio!( - minmaxlttb_mod, - minmaxlttb_scalar_without_x, - scalar_mod - ); - } - - // ----- WITH X - { - create_pyfuncs_with_x_with_ratio!(minmaxlttb_mod, minmaxlttb_scalar_with_x, scalar_mod); - } - - // ----------------- SCALAR PARALLEL - - let scalar_parallel_mod = PyModule::new(_py, "scalar_parallel")?; - - // ----- WITHOUT X - { - create_pyfuncs_without_x_with_ratio!(@threaded - minmaxlttb_mod, - minmaxlttb_scalar_without_x_parallel, - scalar_parallel_mod - ); - } - - // ----- WITH X - { - create_pyfuncs_with_x_with_ratio!(@threaded - minmaxlttb_mod, - minmaxlttb_scalar_with_x_parallel, - scalar_parallel_mod - ); - } - - // ----------------- SIMD + // ----------------- SEQUENTIAL - let simd_mod = PyModule::new(_py, "simd")?; + let sequential_mod = PyModule::new(_py, "sequential")?; // ----- WITHOUT X { - create_pyfuncs_without_x_with_ratio!(minmaxlttb_mod, minmaxlttb_simd_without_x, simd_mod); + create_pyfuncs_without_x_with_ratio!(minmaxlttb_mod, minmaxlttb_without_x, sequential_mod); } // ----- WITH X { - create_pyfuncs_with_x_with_ratio!(minmaxlttb_mod, minmaxlttb_simd_with_x, simd_mod); + create_pyfuncs_with_x_with_ratio!(minmaxlttb_mod, minmaxlttb_with_x, sequential_mod); } - // ----------------- SIMD PARALLEL + // ----------------- PARALLEL - let simd_parallel_mod = PyModule::new(_py, "simd_parallel")?; + let parallel_mod = PyModule::new(_py, "parallel")?; // ----- WITHOUT X { create_pyfuncs_without_x_with_ratio!(@threaded minmaxlttb_mod, - minmaxlttb_simd_without_x_parallel, - simd_parallel_mod + minmaxlttb_without_x_parallel, + parallel_mod ); } @@ -559,16 +455,14 @@ fn minmaxlttb(_py: Python, m: &PyModule) -> PyResult<()> { { create_pyfuncs_with_x_with_ratio!(@threaded minmaxlttb_mod, - minmaxlttb_simd_with_x_parallel, - simd_parallel_mod + minmaxlttb_with_x_parallel, + parallel_mod ); } // Add the submodules to the module - m.add_submodule(scalar_mod)?; - m.add_submodule(scalar_parallel_mod)?; - m.add_submodule(simd_mod)?; - m.add_submodule(simd_parallel_mod)?; + m.add_submodule(sequential_mod)?; + m.add_submodule(parallel_mod)?; Ok(()) } diff --git a/tests/test_rust_mods.py b/tests/test_rust_mods.py index f592823..a6532d3 100644 --- a/tests/test_rust_mods.py +++ b/tests/test_rust_mods.py @@ -23,23 +23,23 @@ def _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl: bool): def test_minmax_rust_mod_correctly_build(): mod = tsds_rs.minmax - sub_mods = ["simd", "simd_parallel", "scalar", "scalar_parallel"] + sub_mods = ["sequential", "parallel"] _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=False) def test_m4_rust_mod_correctly_build(): mod = tsds_rs.m4 - sub_mods = ["simd", "simd_parallel", "scalar", "scalar_parallel"] + sub_mods = ["sequential", "parallel"] _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=False) def test_lttb_rust_mod_correctly_build(): mod = tsds_rs.lttb - sub_mods = ["scalar"] + sub_mods = ["sequential"] _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=True) def test_minmaxlttb_rust_mod_correctly_build(): mod = tsds_rs.minmaxlttb - sub_mods = ["simd", "simd_parallel", "scalar", "scalar_parallel"] + sub_mods = ["sequential", "parallel"] _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=True) diff --git a/tsdownsample/downsampling_interface.py b/tsdownsample/downsampling_interface.py index c177ca6..de20d46 100644 --- a/tsdownsample/downsampling_interface.py +++ b/tsdownsample/downsampling_interface.py @@ -161,10 +161,7 @@ def mod_single_core(self) -> ModuleType: If SIMD compiled module is available, that one is returned. Otherwise, the scalar compiled module is returned. """ - if hasattr(self.rust_mod, "simd"): - # use SIMD implementation if available - return self.rust_mod.simd - return self.rust_mod.scalar + return self.rust_mod.sequential @property def mod_multi_core(self) -> Union[ModuleType, None]: @@ -177,12 +174,9 @@ def mod_multi_core(self) -> Union[ModuleType, None]: Otherwise, the scalar parallel compiled module is returned. If no parallel compiled module is available, None is returned. """ - if hasattr(self.rust_mod, "simd_parallel"): + if hasattr(self.rust_mod, "parallel"): # use SIMD implementation if available - return self.rust_mod.simd_parallel - elif hasattr(self.rust_mod, "scalar_parallel"): - # use scalar implementation if available (when no SIMD available) - return self.rust_mod.scalar_parallel + return self.rust_mod.parallel return None # no parallel compiled module available @staticmethod