From 0359eba4179226ab97b1ef97b344d5fa4993567d Mon Sep 17 00:00:00 2001
From: Niels Praet <61625814+NielsPraet@users.noreply.github.com>
Date: Wed, 2 Aug 2023 14:08:17 +0200
Subject: [PATCH] =?UTF-8?q?=E2=AC=86=EF=B8=8F=20deps:=20update=20argminmax?=
 =?UTF-8?q?=20to=200.6.1=20(#57)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* ⬆️ deps: update argminmax to 0.6.1

* ♻️ refactor: use rust slices in searchsorted.rs

* ♻️ refactor: use slices in m4 generic

* ♻️ tests: fix m4 refactor

* ♻️ tests: refactor lttb to slices

* ♻️ tests: refactor minmax to slices

* ♻️ tests: refactor minmaxlttb to slices

* 🎨 chore: cleanup code

* 🐛 fix: fix rust to python bindings

* 🎨 chore: remove unused variable

* 🐛 fix: fix benchmark code

* 🔥 chore: remove Average implementation for ndarray structs

* ⏪️ chore: revert changes

* ♻️ refactor: remove last dependencies of ndarray

* ➖ deps: remove ndarray dependencies

* :broom: remove unnecessary f16 import

* 🎨 chore: remove useless trait bounds

* ♻️ refactor: clean up m4 code

* 🏗️ fix: update lib to reflect m4 file changes

* ♻️ refactor: clean up minmax code

* 🏗️ fix: update lib to reflect minmax file changes

* ♻️ refactor: add new minmax file

* ✅ tests: fix benchmarks

* ✅ tests: fix benchmarks for minmax

* ♻️ refactor: refactor minmaxlttb files

* 🏗️ refactor: update lib to reflect changes to minmaxlttb files

* ✅ tests: fix benchmarks for minmaxlttb

* 🏗️ refactor: refactor lttb files

* 🏗<fe0f> refactor: update lib to reflect changes to lttb files

* 🐛 fix: rename minmaxlttb mod name

* ✅ tests: fix test_rust_mods

* 🐛 fix: use new rust modules in python code

* :pen: code review

---------

Co-authored-by: Jeroen Van Der Donckt <boebievdd@gmail.com>
---
 downsample_rs/Cargo.toml                      |   3 +-
 downsample_rs/benches/bench_lttb.rs           |  25 +-
 downsample_rs/benches/bench_m4.rs             |  59 ++-
 downsample_rs/benches/bench_minmax.rs         |  75 ++-
 downsample_rs/benches/bench_minmaxlttb.rs     |  73 ++-
 downsample_rs/dev_utils/Cargo.toml            |   1 -
 downsample_rs/dev_utils/src/utils.rs          |  10 +-
 downsample_rs/src/helpers.rs                  |  44 +-
 downsample_rs/src/{lttb/scalar.rs => lttb.rs} |  88 ++--
 downsample_rs/src/lttb/mod.rs                 |   2 -
 downsample_rs/src/m4.rs                       | 452 ++++++++++++++++++
 downsample_rs/src/m4/generic.rs               | 227 ---------
 downsample_rs/src/m4/mod.rs                   |   5 -
 downsample_rs/src/m4/scalar.rs                | 263 ----------
 downsample_rs/src/m4/simd.rs                  | 261 ----------
 downsample_rs/src/minmax.rs                   | 431 +++++++++++++++++
 downsample_rs/src/minmax/generic.rs           | 209 --------
 downsample_rs/src/minmax/mod.rs               |   5 -
 downsample_rs/src/minmax/scalar.rs            | 263 ----------
 downsample_rs/src/minmax/simd.rs              | 265 ----------
 downsample_rs/src/minmaxlttb.rs               | 290 +++++++++++
 downsample_rs/src/minmaxlttb/generic.rs       | 108 -----
 downsample_rs/src/minmaxlttb/mod.rs           |   5 -
 downsample_rs/src/minmaxlttb/scalar.rs        | 179 -------
 downsample_rs/src/minmaxlttb/simd.rs          | 178 -------
 downsample_rs/src/searchsorted.rs             | 121 ++---
 src/lib.rs                                    | 204 ++------
 tests/test_rust_mods.py                       |   8 +-
 tsdownsample/downsampling_interface.py        |  12 +-
 29 files changed, 1437 insertions(+), 2429 deletions(-)
 rename downsample_rs/src/{lttb/scalar.rs => lttb.rs} (69%)
 delete mode 100644 downsample_rs/src/lttb/mod.rs
 create mode 100644 downsample_rs/src/m4.rs
 delete mode 100644 downsample_rs/src/m4/generic.rs
 delete mode 100644 downsample_rs/src/m4/mod.rs
 delete mode 100644 downsample_rs/src/m4/scalar.rs
 delete mode 100644 downsample_rs/src/m4/simd.rs
 create mode 100644 downsample_rs/src/minmax.rs
 delete mode 100644 downsample_rs/src/minmax/generic.rs
 delete mode 100644 downsample_rs/src/minmax/mod.rs
 delete mode 100644 downsample_rs/src/minmax/scalar.rs
 delete mode 100644 downsample_rs/src/minmax/simd.rs
 create mode 100644 downsample_rs/src/minmaxlttb.rs
 delete mode 100644 downsample_rs/src/minmaxlttb/generic.rs
 delete mode 100644 downsample_rs/src/minmaxlttb/mod.rs
 delete mode 100644 downsample_rs/src/minmaxlttb/scalar.rs
 delete mode 100644 downsample_rs/src/minmaxlttb/simd.rs
diff --git a/downsample_rs/Cargo.toml b/downsample_rs/Cargo.toml
index afdb78f..9c52eb8 100644
--- a/downsample_rs/Cargo.toml
+++ b/downsample_rs/Cargo.toml
@@ -8,8 +8,7 @@ license = "MIT"
 
 [dependencies]
 # TODO: perhaps use polars?
-ndarray = {version = "0.15.6", default-features = false, features = ["rayon"] }
-argminmax = { version = "0.3.1", features = ["half"] }
+argminmax = { version = "0.6.1", features = ["half"] }
 # argminmax = { path = "../../argminmax" , features = ["half", "ndarray"] }
 half = { version = "2.1", default-features = false , features=["num-traits"], optional = true}
 num-traits = { version = "0.2.15", default-features = false }
diff --git a/downsample_rs/benches/bench_lttb.rs b/downsample_rs/benches/bench_lttb.rs
index 1576ea4..9e4eb8c 100644
--- a/downsample_rs/benches/bench_lttb.rs
+++ b/downsample_rs/benches/bench_lttb.rs
@@ -2,22 +2,33 @@ use downsample_rs::lttb as lttb_mod;
 
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use dev_utils::{config, utils};
-use ndarray::Array1;
 
 fn lttb_f32_random_array_long(c: &mut Criterion) {
     let n = config::ARRAY_LENGTH_LONG;
-    let x = Array1::from((0..n).map(|i| i as i32).collect::<Vec<i32>>());
+    let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
     c.bench_function("lttb_scalx_f32", |b| {
-        b.iter(|| lttb_mod::lttb_with_x(black_box(x.view()), black_box(y.view()), black_box(2_000)))
+        b.iter(|| {
+            lttb_mod::lttb_with_x(
+                black_box(x.as_slice()),
+                black_box(y.as_slice()),
+                black_box(2_000),
+            )
+        })
     });
 }
 fn lttb_f32_random_array_50m(c: &mut Criterion) {
     let n = 50_000_000;
-    let x = Array1::from((0..n).map(|i| i as i32).collect::<Vec<i32>>());
+    let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
     c.bench_function("lttb_scalx_50M_f32", |b| {
-        b.iter(|| lttb_mod::lttb_with_x(black_box(x.view()), black_box(y.view()), black_box(2_000)))
+        b.iter(|| {
+            lttb_mod::lttb_with_x(
+                black_box(x.as_slice()),
+                black_box(y.as_slice()),
+                black_box(2_000),
+            )
+        })
     });
 }
 
@@ -25,14 +36,14 @@ fn lttb_without_x_f32_random_array_long(c: &mut Criterion) {
     let n = config::ARRAY_LENGTH_LONG;
     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
     c.bench_function("lttb_scal_f32", |b| {
-        b.iter(|| lttb_mod::lttb_without_x(black_box(y.view()), black_box(2_000)))
+        b.iter(|| lttb_mod::lttb_without_x(black_box(y.as_slice()), black_box(2_000)))
     });
 }
 fn lttb_without_x_f32_random_array_50m(c: &mut Criterion) {
     let n = 50_000_000;
     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
     c.bench_function("lttb_scal_50M_f32", |b| {
-        b.iter(|| lttb_mod::lttb_without_x(black_box(y.view()), black_box(2_000)))
+        b.iter(|| lttb_mod::lttb_without_x(black_box(y.as_slice()), black_box(2_000)))
     });
 }
 
diff --git a/downsample_rs/benches/bench_m4.rs b/downsample_rs/benches/bench_m4.rs
index 3d2b091..11b28ed 100644
--- a/downsample_rs/benches/bench_m4.rs
+++ b/downsample_rs/benches/bench_m4.rs
@@ -2,16 +2,15 @@ use downsample_rs::m4 as m4_mod;
 
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use dev_utils::{config, utils};
-use ndarray::Array1;
 
 fn m4_f32_random_array_long_single_core(c: &mut Criterion) {
     let n = config::ARRAY_LENGTH_LONG;
     let data = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
     c.bench_function("m4_scal_f32", |b| {
-        b.iter(|| m4_mod::m4_scalar_without_x(black_box(data.view()), black_box(2_000)))
+        b.iter(|| m4_mod::m4_without_x(black_box(data.as_slice()), black_box(2_000)))
     });
     c.bench_function("m4_simd_f32", |b| {
-        b.iter(|| m4_mod::m4_simd_without_x(black_box(data.view()), black_box(2_000)))
+        b.iter(|| m4_mod::m4_without_x(black_box(data.as_slice()), black_box(2_000)))
     });
 }
 
@@ -21,8 +20,8 @@ fn m4_f32_random_array_long_multi_core(c: &mut Criterion) {
     let all_threads: usize = utils::get_all_threads();
     c.bench_function("m4_scal_p_f32", |b| {
         b.iter(|| {
-            m4_mod::m4_scalar_without_x_parallel(
-                black_box(data.view()),
+            m4_mod::m4_without_x_parallel(
+                black_box(data.as_slice()),
                 black_box(2_000),
                 black_box(all_threads),
             )
@@ -30,8 +29,8 @@ fn m4_f32_random_array_long_multi_core(c: &mut Criterion) {
     });
     c.bench_function("m4_simd_p_f32", |b| {
         b.iter(|| {
-            m4_mod::m4_simd_without_x_parallel(
-                black_box(data.view()),
+            m4_mod::m4_without_x_parallel(
+                black_box(data.as_slice()),
                 black_box(2_000),
                 black_box(all_threads),
             )
@@ -42,27 +41,27 @@ fn m4_f32_random_array_long_multi_core(c: &mut Criterion) {
 fn m4_f32_random_array_50M_single_core(c: &mut Criterion) {
     let n = 50_000_000;
     let data = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
-    let x = Array1::from((0..n).map(|i| i as i32).collect::<Vec<i32>>());
+    let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
     c.bench_function("m4_scal_50M_f32", |b| {
-        b.iter(|| m4_mod::m4_scalar_without_x(black_box(data.view()), black_box(2_000)))
+        b.iter(|| m4_mod::m4_without_x(black_box(data.as_slice()), black_box(2_000)))
     });
     c.bench_function("m4_simd_50M_f32", |b| {
-        b.iter(|| m4_mod::m4_simd_without_x(black_box(data.view()), black_box(2_000)))
+        b.iter(|| m4_mod::m4_without_x(black_box(data.as_slice()), black_box(2_000)))
     });
     c.bench_function("m4_scalx_50M_f32", |b| {
         b.iter(|| {
-            m4_mod::m4_scalar_with_x(
-                black_box(x.view()),
-                black_box(data.view()),
+            m4_mod::m4_with_x(
+                black_box(x.as_slice()),
+                black_box(data.as_slice()),
                 black_box(2_000),
             )
         })
     });
     c.bench_function("m4_simdx_50M_f32", |b| {
         b.iter(|| {
-            m4_mod::m4_simd_with_x(
-                black_box(x.view()),
-                black_box(data.view()),
+            m4_mod::m4_with_x(
+                black_box(x.as_slice()),
+                black_box(data.as_slice()),
                 black_box(2_000),
             )
         })
@@ -72,12 +71,12 @@ fn m4_f32_random_array_50M_single_core(c: &mut Criterion) {
 fn m4_f32_random_array_50M_multi_core(c: &mut Criterion) {
     let n = 50_000_000;
     let data = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
-    let x = Array1::from((0..n).map(|i| i as i32).collect::<Vec<i32>>());
+    let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
     let all_threads: usize = utils::get_all_threads();
     c.bench_function("m4_scal_p_50M_f32", |b| {
         b.iter(|| {
-            m4_mod::m4_scalar_without_x_parallel(
-                black_box(data.view()),
+            m4_mod::m4_without_x_parallel(
+                black_box(data.as_slice()),
                 black_box(2_000),
                 black_box(all_threads),
             )
@@ -85,8 +84,8 @@ fn m4_f32_random_array_50M_multi_core(c: &mut Criterion) {
     });
     c.bench_function("m4_simd_p_50M_f32", |b| {
         b.iter(|| {
-            m4_mod::m4_simd_without_x_parallel(
-                black_box(data.view()),
+            m4_mod::m4_without_x_parallel(
+                black_box(data.as_slice()),
                 black_box(2_000),
                 black_box(all_threads),
             )
@@ -94,9 +93,9 @@ fn m4_f32_random_array_50M_multi_core(c: &mut Criterion) {
     });
     c.bench_function("m4_scalx_p_50M_f32", |b| {
         b.iter(|| {
-            m4_mod::m4_scalar_with_x_parallel(
-                black_box(x.view()),
-                black_box(data.view()),
+            m4_mod::m4_with_x_parallel(
+                black_box(x.as_slice()),
+                black_box(data.as_slice()),
                 black_box(2_000),
                 black_box(all_threads),
             )
@@ -104,9 +103,9 @@ fn m4_f32_random_array_50M_multi_core(c: &mut Criterion) {
     });
     c.bench_function("m4_simdx_p_50M_f32", |b| {
         b.iter(|| {
-            m4_mod::m4_simd_with_x_parallel(
-                black_box(x.view()),
-                black_box(data.view()),
+            m4_mod::m4_with_x_parallel(
+                black_box(x.as_slice()),
+                black_box(data.as_slice()),
                 black_box(2_000),
                 black_box(all_threads),
             )
@@ -118,13 +117,13 @@ fn m4_f32_random_array_50M_multi_core(c: &mut Criterion) {
 //     let n = config::ARRAY_LENGTH_LONG;
 //     let data = utils::get_worst_case_array::<f32>(n, 1.0);
 //     c.bench_function("overlap_worst_long_f32", |b| {
-//         b.iter(|| minmax_mod::min_max_overlap(black_box(data.view()), black_box(2_000)))
+//         b.iter(|| minmax_mod::min_max_overlap(black_box(data.as_slice()), black_box(2_000)))
 //     });
 //     c.bench_function("simple_worst_long_f32", |b| {
-//         b.iter(|| minmax_mod::min_max(black_box(data.view()), black_box(2_000)))
+//         b.iter(|| minmax_mod::min_max(black_box(data.as_slice()), black_box(2_000)))
 //     });
 //     c.bench_function("simd_worst_long_f32", |b| {
-//         b.iter(|| minmax_mod::min_max_simd_f32(black_box(data.view()), black_box(2_000)))
+//         b.iter(|| minmax_mod::min_max_simd_f32(black_box(data.as_slice()), black_box(2_000)))
 //     });
 // }
 
diff --git a/downsample_rs/benches/bench_minmax.rs b/downsample_rs/benches/bench_minmax.rs
index 9133385..7311c3c 100644
--- a/downsample_rs/benches/bench_minmax.rs
+++ b/downsample_rs/benches/bench_minmax.rs
@@ -2,16 +2,15 @@ use downsample_rs::minmax as minmax_mod;
 
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use dev_utils::{config, utils};
-use ndarray::Array1;
 
 fn minmax_f32_random_array_long_single_core(c: &mut Criterion) {
     let n = config::ARRAY_LENGTH_LONG;
     let data = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
     c.bench_function("minmax_scal_f32", |b| {
-        b.iter(|| minmax_mod::min_max_scalar_without_x(black_box(data.view()), black_box(2_000)))
+        b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(2_000)))
     });
     c.bench_function("minmax_simd_f32", |b| {
-        b.iter(|| minmax_mod::min_max_simd_without_x(black_box(data.view()), black_box(2_000)))
+        b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(2_000)))
     });
 }
 
@@ -21,8 +20,8 @@ fn minmax_f32_random_array_long_multi_core(c: &mut Criterion) {
     let all_threads: usize = utils::get_all_threads();
     c.bench_function("minmax_scal_p_f32", |b| {
         b.iter(|| {
-            minmax_mod::min_max_scalar_without_x_parallel(
-                black_box(data.view()),
+            minmax_mod::min_max_without_x_parallel(
+                black_box(data.as_slice()),
                 black_box(2_000),
                 black_box(all_threads),
             )
@@ -30,8 +29,8 @@ fn minmax_f32_random_array_long_multi_core(c: &mut Criterion) {
     });
     c.bench_function("minmax_simd_p_f32", |b| {
         b.iter(|| {
-            minmax_mod::min_max_simd_without_x_parallel(
-                black_box(data.view()),
+            minmax_mod::min_max_without_x_parallel(
+                black_box(data.as_slice()),
                 black_box(2_000),
                 black_box(all_threads),
             )
@@ -42,55 +41,55 @@ fn minmax_f32_random_array_long_multi_core(c: &mut Criterion) {
 fn minmax_f32_random_array_50M_single_core(c: &mut Criterion) {
     let n = 50_000_000;
     let data = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
-    let x = Array1::from((0..n).map(|i| i as i32).collect::<Vec<i32>>());
+    let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
     c.bench_function("minmax_scal_50M_f32", |b| {
-        b.iter(|| minmax_mod::min_max_scalar_without_x(black_box(data.view()), black_box(2_000)))
+        b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(2_000)))
     });
     c.bench_function("minmax_simd_50M_f32", |b| {
-        b.iter(|| minmax_mod::min_max_simd_without_x(black_box(data.view()), black_box(2_000)))
+        b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(2_000)))
     });
     c.bench_function("minmax_scalx_50M_f32", |b| {
         b.iter(|| {
-            minmax_mod::min_max_scalar_with_x(
-                black_box(x.view()),
-                black_box(data.view()),
+            minmax_mod::min_max_with_x(
+                black_box(x.as_slice()),
+                black_box(data.as_slice()),
                 black_box(2_000),
             )
         })
     });
     c.bench_function("minmax_simdx_50M_f32", |b| {
         b.iter(|| {
-            minmax_mod::min_max_simd_with_x(
-                black_box(x.view()),
-                black_box(data.view()),
+            minmax_mod::min_max_with_x(
+                black_box(x.as_slice()),
+                black_box(data.as_slice()),
                 black_box(2_000),
             )
         })
     });
 
     // c.bench_function("minmax_scal_50M_f32", |b| {
-    //     b.iter(|| minmax_mod::min_max_scalar_without_x(black_box(data.view()), black_box(60_000)))
+    //     b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(60_000)))
     // });
     // c.bench_function("minmax_simd_50M_f32", |b| {
-    //     b.iter(|| minmax_mod::min_max_simd_without_x(black_box(data.view()), black_box(60_000)))
+    //     b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(60_000)))
     // });
     // c.bench_function("minmax_scalx_50M_f32", |b| {
-    //     b.iter(|| minmax_mod::min_max_scalar_with_x(black_box(x.view()), black_box(data.view()), black_box(60_000)))
+    //     b.iter(|| minmax_mod::min_max_with_x(black_box(x.as_slice()), black_box(data.as_slice()), black_box(60_000)))
     // });
     // c.bench_function("minmax_simdx_50M_f32", |b| {
-    //     b.iter(|| minmax_mod::min_max_simd_with_x(black_box(x.view()), black_box(data.view()), black_box(60_000)))
+    //     b.iter(|| minmax_mod::min_max_with_x(black_box(x.as_slice()), black_box(data.as_slice()), black_box(60_000)))
     // });
 }
 
 fn minmax_f32_random_array_50M_long_multi_core(c: &mut Criterion) {
     let n = 50_000_000;
     let data = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
-    let x = Array1::from((0..n).map(|i| i as i32).collect::<Vec<i32>>());
+    let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
     let all_threads: usize = utils::get_all_threads();
     c.bench_function("minmax_scal_p_50M_f32", |b| {
         b.iter(|| {
-            minmax_mod::min_max_scalar_without_x_parallel(
-                black_box(data.view()),
+            minmax_mod::min_max_without_x_parallel(
+                black_box(data.as_slice()),
                 black_box(2_000),
                 black_box(all_threads),
             )
@@ -98,8 +97,8 @@ fn minmax_f32_random_array_50M_long_multi_core(c: &mut Criterion) {
     });
     c.bench_function("minmax_simd_p_50M_f32", |b| {
         b.iter(|| {
-            minmax_mod::min_max_simd_without_x_parallel(
-                black_box(data.view()),
+            minmax_mod::min_max_without_x_parallel(
+                black_box(data.as_slice()),
                 black_box(2_000),
                 black_box(all_threads),
             )
@@ -107,9 +106,9 @@ fn minmax_f32_random_array_50M_long_multi_core(c: &mut Criterion) {
     });
     c.bench_function("minmax_scalx_p_50M_f32", |b| {
         b.iter(|| {
-            minmax_mod::min_max_scalar_with_x_parallel(
-                black_box(x.view()),
-                black_box(data.view()),
+            minmax_mod::min_max_with_x_parallel(
+                black_box(x.as_slice()),
+                black_box(data.as_slice()),
                 black_box(2_000),
                 black_box(all_threads),
             )
@@ -117,9 +116,9 @@ fn minmax_f32_random_array_50M_long_multi_core(c: &mut Criterion) {
     });
     c.bench_function("minmax_simdx_p_50M_f32", |b| {
         b.iter(|| {
-            minmax_mod::min_max_simd_with_x_parallel(
-                black_box(x.view()),
-                black_box(data.view()),
+            minmax_mod::min_max_with_x_parallel(
+                black_box(x.as_slice()),
+                black_box(data.as_slice()),
                 black_box(2_000),
                 black_box(all_threads),
             )
@@ -127,16 +126,16 @@ fn minmax_f32_random_array_50M_long_multi_core(c: &mut Criterion) {
     });
 
     // c.bench_function("minmax_scal_p_50M_f32", |b| {
-    //     b.iter(|| minmax_mod::min_max_scalar_without_x_parallel(black_box(data.view()), black_box(60_000)))
+    //     b.iter(|| minmax_mod::min_max_without_x_parallel(black_box(data.as_slice()), black_box(60_000)))
     // });
     // c.bench_function("minmax_simd_p_50M_f32", |b| {
-    //     b.iter(|| minmax_mod::min_max_simd_without_x_parallel(black_box(data.view()), black_box(60_000)))
+    //     b.iter(|| minmax_mod::min_max_without_x_parallel(black_box(data.as_slice()), black_box(60_000)))
     // });
     // c.bench_function("minmax_scalx_p_50M_f32", |b| {
-    //     b.iter(|| minmax_mod::min_max_scalar_with_x_parallel(black_box(x.view()), black_box(data.view()), black_box(60_000)))
+    //     b.iter(|| minmax_mod::min_max_with_x_parallel(black_box(x.as_slice()), black_box(data.as_slice()), black_box(60_000)))
     // });
     // c.bench_function("minmax_simdx_p_50M_f32", |b| {
-    //     b.iter(|| minmax_mod::min_max_simd_with_x_parallel(black_box(x.view()), black_box(data.view()), black_box(60_000)))
+    //     b.iter(|| minmax_mod::min_max_with_x_parallel(black_box(x.as_slice()), black_box(data.as_slice()), black_box(60_000)))
     // });
 }
 
@@ -144,13 +143,13 @@ fn minmax_f32_random_array_50M_long_multi_core(c: &mut Criterion) {
 //     let n = config::ARRAY_LENGTH_LONG;
 //     let data = utils::get_worst_case_array::<f32>(n, 1.0);
 //     c.bench_function("overlap_worst_long_f32", |b| {
-//         b.iter(|| minmax_mod::min_max_overlap(black_box(data.view()), black_box(2_000)))
+//         b.iter(|| minmax_mod::min_max_overlap(black_box(data.as_slice()), black_box(2_000)))
 //     });
 //     c.bench_function("simple_worst_long_f32", |b| {
-//         b.iter(|| minmax_mod::min_max(black_box(data.view()), black_box(2_000)))
+//         b.iter(|| minmax_mod::min_max(black_box(data.as_slice()), black_box(2_000)))
 //     });
 //     c.bench_function("simd_worst_long_f32", |b| {
-//         b.iter(|| minmax_mod::min_max_simd_f32(black_box(data.view()), black_box(2_000)))
+//         b.iter(|| minmax_mod::min_max_simd_f32(black_box(data.as_slice()), black_box(2_000)))
 //     });
 // }
 
diff --git a/downsample_rs/benches/bench_minmaxlttb.rs b/downsample_rs/benches/bench_minmaxlttb.rs
index 447dda0..f8a2610 100644
--- a/downsample_rs/benches/bench_minmaxlttb.rs
+++ b/downsample_rs/benches/bench_minmaxlttb.rs
@@ -2,19 +2,18 @@ use downsample_rs::minmaxlttb as minmaxlttb_mod;
 
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use dev_utils::{config, utils};
-use ndarray::Array1;
 
 const MINMAX_RATIO: usize = 30;
 
 fn minmaxlttb_f32_random_array_long_single_core(c: &mut Criterion) {
     let n = config::ARRAY_LENGTH_LONG;
-    let x = Array1::from((0..n).map(|i| i as i32).collect::<Vec<i32>>());
+    let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
     c.bench_function("mmlttb_scalx_f32", |b| {
         b.iter(|| {
-            minmaxlttb_mod::minmaxlttb_scalar_with_x(
-                black_box(x.view()),
-                black_box(y.view()),
+            minmaxlttb_mod::minmaxlttb_with_x(
+                black_box(x.as_slice()),
+                black_box(y.as_slice()),
                 black_box(2_000),
                 black_box(MINMAX_RATIO),
             )
@@ -22,9 +21,9 @@ fn minmaxlttb_f32_random_array_long_single_core(c: &mut Criterion) {
     });
     c.bench_function("mlttb_simdx_f32", |b| {
         b.iter(|| {
-            minmaxlttb_mod::minmaxlttb_simd_with_x(
-                black_box(x.view()),
-                black_box(y.view()),
+            minmaxlttb_mod::minmaxlttb_with_x(
+                black_box(x.as_slice()),
+                black_box(y.as_slice()),
                 black_box(2_000),
                 black_box(MINMAX_RATIO),
             )
@@ -34,14 +33,14 @@ fn minmaxlttb_f32_random_array_long_single_core(c: &mut Criterion) {
 
 fn minmaxlttb_f32_random_array_long_multi_core(c: &mut Criterion) {
     let n = config::ARRAY_LENGTH_LONG;
-    let x = Array1::from((0..n).map(|i| i as i32).collect::<Vec<i32>>());
+    let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
     let all_threads: usize = utils::get_all_threads();
     c.bench_function("mmlttb_scalx_p_f32", |b| {
         b.iter(|| {
-            minmaxlttb_mod::minmaxlttb_scalar_with_x_parallel(
-                black_box(x.view()),
-                black_box(y.view()),
+            minmaxlttb_mod::minmaxlttb_with_x_parallel(
+                black_box(x.as_slice()),
+                black_box(y.as_slice()),
                 black_box(2_000),
                 black_box(MINMAX_RATIO),
                 black_box(all_threads),
@@ -50,9 +49,9 @@ fn minmaxlttb_f32_random_array_long_multi_core(c: &mut Criterion) {
     });
     c.bench_function("mlttb_simdx_p_f32", |b| {
         b.iter(|| {
-            minmaxlttb_mod::minmaxlttb_simd_with_x_parallel(
-                black_box(x.view()),
-                black_box(y.view()),
+            minmaxlttb_mod::minmaxlttb_with_x_parallel(
+                black_box(x.as_slice()),
+                black_box(y.as_slice()),
                 black_box(2_000),
                 black_box(MINMAX_RATIO),
                 black_box(all_threads),
@@ -63,13 +62,13 @@ fn minmaxlttb_f32_random_array_long_multi_core(c: &mut Criterion) {
 
 fn minmaxlttb_f32_random_array_50M_single_core(c: &mut Criterion) {
     let n = 50_000_000;
-    let x = Array1::from((0..n).map(|i| i as i32).collect::<Vec<i32>>());
+    let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
     c.bench_function("mlttb_scalx_50M_f32", |b| {
         b.iter(|| {
-            minmaxlttb_mod::minmaxlttb_scalar_with_x(
-                black_box(x.view()),
-                black_box(y.view()),
+            minmaxlttb_mod::minmaxlttb_with_x(
+                black_box(x.as_slice()),
+                black_box(y.as_slice()),
                 black_box(2_000),
                 black_box(MINMAX_RATIO),
             )
@@ -77,9 +76,9 @@ fn minmaxlttb_f32_random_array_50M_single_core(c: &mut Criterion) {
     });
     c.bench_function("mlttb_simdx_50M_f32", |b| {
         b.iter(|| {
-            minmaxlttb_mod::minmaxlttb_simd_with_x(
-                black_box(x.view()),
-                black_box(y.view()),
+            minmaxlttb_mod::minmaxlttb_with_x(
+                black_box(x.as_slice()),
+                black_box(y.as_slice()),
                 black_box(2_000),
                 black_box(MINMAX_RATIO),
             )
@@ -89,14 +88,14 @@ fn minmaxlttb_f32_random_array_50M_single_core(c: &mut Criterion) {
 
 fn minmaxlttb_f32_random_array_50M_multi_core(c: &mut Criterion) {
     let n = 50_000_000;
-    let x = Array1::from((0..n).map(|i| i as i32).collect::<Vec<i32>>());
+    let x = (0..n).map(|i| i as i32).collect::<Vec<i32>>();
     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
     let all_threads: usize = utils::get_all_threads();
     c.bench_function("mlttb_scalx_p_50M_f32", |b| {
         b.iter(|| {
-            minmaxlttb_mod::minmaxlttb_scalar_with_x_parallel(
-                black_box(x.view()),
-                black_box(y.view()),
+            minmaxlttb_mod::minmaxlttb_with_x_parallel(
+                black_box(x.as_slice()),
+                black_box(y.as_slice()),
                 black_box(2_000),
                 black_box(MINMAX_RATIO),
                 black_box(all_threads),
@@ -105,9 +104,9 @@ fn minmaxlttb_f32_random_array_50M_multi_core(c: &mut Criterion) {
     });
     c.bench_function("mlttb_simdx_p_50M_f32", |b| {
         b.iter(|| {
-            minmaxlttb_mod::minmaxlttb_simd_with_x_parallel(
-                black_box(x.view()),
-                black_box(y.view()),
+            minmaxlttb_mod::minmaxlttb_with_x_parallel(
+                black_box(x.as_slice()),
+                black_box(y.as_slice()),
                 black_box(2_000),
                 black_box(MINMAX_RATIO),
                 black_box(all_threads),
@@ -121,8 +120,8 @@ fn minmaxlttb_without_x_f32_random_array_50M_single_core(c: &mut Criterion) {
     let y = utils::get_random_array::<f32>(n, f32::MIN, f32::MAX);
     c.bench_function("mlttb_scal_50M_f32", |b| {
         b.iter(|| {
-            minmaxlttb_mod::minmaxlttb_scalar_without_x(
-                black_box(y.view()),
+            minmaxlttb_mod::minmaxlttb_without_x(
+                black_box(y.as_slice()),
                 black_box(2_000),
                 black_box(MINMAX_RATIO),
             )
@@ -130,8 +129,8 @@ fn minmaxlttb_without_x_f32_random_array_50M_single_core(c: &mut Criterion) {
     });
     c.bench_function("mlttb_simd_50M_f32", |b| {
         b.iter(|| {
-            minmaxlttb_mod::minmaxlttb_simd_without_x(
-                black_box(y.view()),
+            minmaxlttb_mod::minmaxlttb_without_x(
+                black_box(y.as_slice()),
                 black_box(2_000),
                 black_box(MINMAX_RATIO),
             )
@@ -145,8 +144,8 @@ fn minmaxlttb_without_x_f32_random_array_50M_multi_core(c: &mut Criterion) {
     let all_threads: usize = utils::get_all_threads();
     c.bench_function("mlttb_scal_p_50M_f32", |b| {
         b.iter(|| {
-            minmaxlttb_mod::minmaxlttb_scalar_without_x_parallel(
-                black_box(y.view()),
+            minmaxlttb_mod::minmaxlttb_without_x_parallel(
+                black_box(y.as_slice()),
                 black_box(2_000),
                 black_box(MINMAX_RATIO),
                 black_box(all_threads),
@@ -155,8 +154,8 @@ fn minmaxlttb_without_x_f32_random_array_50M_multi_core(c: &mut Criterion) {
     });
     c.bench_function("mlttb_simd_p_50M_f32", |b| {
         b.iter(|| {
-            minmaxlttb_mod::minmaxlttb_simd_without_x_parallel(
-                black_box(y.view()),
+            minmaxlttb_mod::minmaxlttb_without_x_parallel(
+                black_box(y.as_slice()),
                 black_box(2_000),
                 black_box(MINMAX_RATIO),
                 black_box(all_threads),
diff --git a/downsample_rs/dev_utils/Cargo.toml b/downsample_rs/dev_utils/Cargo.toml
index 458e27c..2ab2e6b 100644
--- a/downsample_rs/dev_utils/Cargo.toml
+++ b/downsample_rs/dev_utils/Cargo.toml
@@ -6,6 +6,5 @@ edition = "2021"
 description = "Shared utilities for development (tests & benchmarks)"
 
 [dependencies]
-ndarray = { version = "0.15.6", default-features = false }
 rand = { version = "0.7.2", default-features = false }
 rand_distr = { version = "0.2.2", default-features = false }
diff --git a/downsample_rs/dev_utils/src/utils.rs b/downsample_rs/dev_utils/src/utils.rs
index 77033f2..f3413ae 100644
--- a/downsample_rs/dev_utils/src/utils.rs
+++ b/downsample_rs/dev_utils/src/utils.rs
@@ -1,23 +1,21 @@
-use ndarray::Array1;
-
 use std::ops::{Add, Sub};
 
 use rand::{thread_rng, Rng};
 use rand_distr::Uniform;
 
 // random array that samples between min and max of T
-pub fn get_random_array<T>(n: usize, min_value: T, max_value: T) -> Array1<T>
+pub fn get_random_array<T>(n: usize, min_value: T, max_value: T) -> Vec<T>
 where
     T: Copy + rand::distributions::uniform::SampleUniform,
 {
     let rng = thread_rng();
     let uni = Uniform::new_inclusive(min_value, max_value);
     let arr: Vec<T> = rng.sample_iter(uni).take(n).collect();
-    Array1::from(arr)
+    arr
 }
 
 // worst case array that alternates between increasing max and decreasing min values
-pub fn get_worst_case_array<T>(n: usize, step: T) -> Array1<T>
+pub fn get_worst_case_array<T>(n: usize, step: T) -> Vec<T>
 where
     T: Copy + Default + Sub<Output = T> + Add<Output = T>,
 {
@@ -33,7 +31,7 @@ where
             max_value = max_value + step;
         }
     }
-    Array1::from(arr)
+    arr
 }
 
 // ------------- Multi-threading -------------
diff --git a/downsample_rs/src/helpers.rs b/downsample_rs/src/helpers.rs
index fb8b436..f2961ab 100644
--- a/downsample_rs/src/helpers.rs
+++ b/downsample_rs/src/helpers.rs
@@ -1,6 +1,6 @@
-#[cfg(feature = "half")]
-use half::f16;
-use ndarray::ArrayView1;
+use num_traits::AsPrimitive;
+
+use crate::types::Num;
 
 // ------------ AVERAGE
 
@@ -20,38 +20,14 @@ use ndarray::ArrayView1;
 // See more details: https://github.com/numpy/numpy/blob/8cec82012694571156e8d7696307c848a7603b4e/numpy/core/_methods.py#L164
 
 pub trait Average {
-    fn average(self) -> f64;
-}
-
-impl Average for ArrayView1<'_, f64> {
-    fn average(self) -> f64 {
-        self.mean().unwrap()
-    }
-}
-
-impl Average for ArrayView1<'_, f32> {
-    fn average(self) -> f64 {
-        self.mean().unwrap() as f64
-    }
+    fn average(&self) -> f64;
 }
 
-#[cfg(feature = "half")]
-impl Average for ArrayView1<'_, f16> {
-    fn average(self) -> f64 {
-        self.fold(0f32, |acc, &x| acc + x.to_f32()) as f64 / self.len() as f64
+impl<T> Average for [T]
+where
+    T: Num + AsPrimitive<f64>,
+{
+    fn average(&self) -> f64 {
+        self.iter().fold(0f64, |acc, &x| acc + x.as_()) as f64 / self.len() as f64
     }
 }
-
-macro_rules! impl_average {
-    ($($t:ty)*) => ($(
-        impl Average for ArrayView1<'_, $t> {
-            #[inline(always)]
-            fn average(self) -> f64 {
-                self.fold(0f64, |acc, &x| acc + x as f64) / self.len() as f64
-            }
-        }
-    )*)
-}
-
-// Implement for all signed and unsigned integers
-impl_average!(i8 i16 i32 i64 u8 u16 u32 u64);
diff --git a/downsample_rs/src/lttb/scalar.rs b/downsample_rs/src/lttb.rs
similarity index 69%
rename from downsample_rs/src/lttb/scalar.rs
rename to downsample_rs/src/lttb.rs
index dd2542a..20d28d4 100644
--- a/downsample_rs/src/lttb/scalar.rs
+++ b/downsample_rs/src/lttb.rs
@@ -1,6 +1,5 @@
-use super::super::helpers::Average;
-use super::super::types::Num;
-use ndarray::{Array1, ArrayView1};
+use super::helpers::Average;
+use super::types::Num;
 use num_traits::AsPrimitive;
 use std::cmp;
 
@@ -16,16 +15,13 @@ fn f64_to_i64unsigned(v: f64) -> i64 {
 // ----------- WITH X
 
 pub fn lttb_with_x<Tx: Num + AsPrimitive<f64>, Ty: Num + AsPrimitive<f64>>(
-    x: ArrayView1<Tx>,
-    y: ArrayView1<Ty>,
+    x: &[Tx],
+    y: &[Ty],
     n_out: usize,
-) -> Array1<usize>
-where
-    for<'a> ArrayView1<'a, Ty>: Average,
-{
+) -> Vec<usize> {
     assert_eq!(x.len(), y.len());
     if n_out >= x.len() {
-        return Array1::from((0..x.len()).collect::<Vec<usize>>());
+        return (0..x.len()).collect::<Vec<usize>>();
     }
     assert!(n_out >= 3); // avoid division by 0
 
@@ -34,10 +30,7 @@ where
     // Initially a is the first point in the triangle.
     let mut a: usize = 0;
 
-    let mut sampled_indices: Array1<usize> = Array1::<usize>::default(n_out);
-
-    let x_ptr = x.as_ptr();
-    let y_ptr = y.as_ptr();
+    let mut sampled_indices: Vec<usize> = vec![usize::default(); n_out];
 
     // Always add the first point
     sampled_indices[0] = 0;
@@ -47,33 +40,30 @@ where
         let avg_range_start = (every * (i + 1) as f64) as usize + 1;
         let avg_range_end = cmp::min((every * (i + 2) as f64) as usize + 1, x.len());
 
-        // ArrayBase::slice is rather expensive..
-        let y_slice = unsafe {
-            ArrayView1::from_shape_ptr(avg_range_end - avg_range_start, y_ptr.add(avg_range_start))
-        };
+        let y_slice = &y[avg_range_start..avg_range_end];
         let avg_y: f64 = y_slice.average();
         // TODO: avg_y could be approximated argminmax instead of mean?
         // TODO: below is faster than above, but not as accurate
         // let avg_x: f64 = (x_slice[avg_range_end - 1].as_() + x_slice[avg_range_start].as_()) / 2.0;
-        let avg_x: f64 =
-            unsafe { (x.uget(avg_range_end - 1).as_() + x.uget(avg_range_start).as_()) / 2.0 };
+        let avg_x: f64 = unsafe {
+            (x.get_unchecked(avg_range_end - 1).as_() + x.get_unchecked(avg_range_start).as_())
+                / 2.0
+        };
 
         // Get the range for this bucket
         let range_offs = (every * i as f64) as usize + 1;
         let range_to = avg_range_start; // = start of the next bucket
 
         // Point a
-        let point_ax = unsafe { x.uget(a).as_() };
-        let point_ay = unsafe { y.uget(a).as_() };
+        let point_ax = unsafe { x.get_unchecked(a).as_() };
+        let point_ay = unsafe { y.get_unchecked(a).as_() };
 
         let d1 = point_ax - avg_x;
         let d2 = avg_y - point_ay;
         let offset: f64 = d1 * point_ay + d2 * point_ax;
 
-        let x_slice =
-            unsafe { std::slice::from_raw_parts(x_ptr.add(range_offs), range_to - range_offs) };
-        let y_slice =
-            unsafe { std::slice::from_raw_parts(y_ptr.add(range_offs), range_to - range_offs) };
+        let x_slice = &x[range_offs..range_to];
+        let y_slice = &y[range_offs..range_to];
         (_, a) = y_slice.iter().zip(x_slice.iter()).enumerate().fold(
             (-1i64, a),
             |(max_area, a), (i, (y_, x_))| {
@@ -103,12 +93,9 @@ where
 
 // ----------- WITHOUT X
 
-pub fn lttb_without_x<Ty: Num + AsPrimitive<f64>>(y: ArrayView1<Ty>, n_out: usize) -> Array1<usize>
-where
-    for<'a> ArrayView1<'a, Ty>: Average,
-{
+pub fn lttb_without_x<Ty: Num + AsPrimitive<f64>>(y: &[Ty], n_out: usize) -> Vec<usize> {
     if n_out >= y.len() {
-        return Array1::from((0..y.len()).collect::<Vec<usize>>());
+        return (0..y.len()).collect::<Vec<usize>>();
     }
     assert!(n_out >= 3); // avoid division by 0
 
@@ -117,9 +104,7 @@ where
     // Initially a is the first point in the triangle.
     let mut a: usize = 0;
 
-    let mut sampled_indices: Array1<usize> = Array1::<usize>::default(n_out);
-
-    let y_ptr = y.as_ptr();
+    let mut sampled_indices: Vec<usize> = vec![usize::default(); n_out];
 
     // Always add the first point
     sampled_indices[0] = 0;
@@ -129,10 +114,7 @@ where
         let avg_range_start = (every * (i + 1) as f64) as usize + 1;
         let avg_range_end = cmp::min((every * (i + 2) as f64) as usize + 1, y.len());
 
-        // ArrayBase::slice is rather expensive..
-        let y_slice = unsafe {
-            ArrayView1::from_shape_ptr(avg_range_end - avg_range_start, y_ptr.add(avg_range_start))
-        };
+        let y_slice = &y[avg_range_start..avg_range_end];
         let avg_y: f64 = y_slice.average();
         let avg_x: f64 = (avg_range_start + avg_range_end - 1) as f64 / 2.0;
 
@@ -141,7 +123,7 @@ where
         let range_to = avg_range_start; // = start of the next bucket
 
         // Point a
-        let point_ay = unsafe { y.uget(a).as_() };
+        let point_ay = unsafe { y.get_unchecked(a).as_() };
         let point_ax = a as f64;
 
         let d1 = point_ax - avg_x;
@@ -153,8 +135,7 @@ where
         let offset: f64 = d1 * point_ay;
 
         // TODO: for some reason is this faster than the loop below -> check if this is true for other devices
-        let y_slice =
-            unsafe { ArrayView1::from_shape_ptr(range_to - range_offs, y_ptr.add(range_offs)) };
+        let y_slice = &y[range_offs..range_to];
         (_, a) = y_slice
             .iter()
             .enumerate()
@@ -205,31 +186,30 @@ mod tests {
     use dev_utils::utils;
 
     use super::{lttb_with_x, lttb_without_x};
-    use ndarray::{array, Array1};
 
     #[test]
     fn test_lttb_with_x() {
-        let x = array![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
-        let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
-        let sampled_indices = lttb_with_x(x.view(), y.view(), 4);
-        assert_eq!(sampled_indices, array![0, 1, 5, 9]);
+        let x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
+        let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
+        let sampled_indices = lttb_with_x(&x, &y, 4);
+        assert_eq!(sampled_indices, vec![0, 1, 5, 9]);
     }
 
     #[test]
     fn test_lttb_without_x() {
-        let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
-        let sampled_indices = lttb_without_x(y.view(), 4);
-        assert_eq!(sampled_indices, array![0, 1, 5, 9]);
+        let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
+        let sampled_indices = lttb_without_x(&y, 4);
+        assert_eq!(sampled_indices, vec![0, 1, 5, 9]);
     }
 
     #[test]
     fn test_random_same_output() {
         for _ in 0..100 {
-            let n = 5_000;
-            let x: Array1<i32> = Array1::from((0..n).map(|i| i as i32).collect::<Vec<i32>>());
-            let y = utils::get_random_array(n, f32::MIN, f32::MAX);
-            let sampled_indices1 = lttb_with_x(x.view(), y.view(), 200);
-            let sampled_indices2 = lttb_without_x(y.view(), 200);
+            const N: usize = 5_000;
+            let x: [i32; N] = core::array::from_fn(|i| i as i32);
+            let y = utils::get_random_array(N, f32::MIN, f32::MAX);
+            let sampled_indices1 = lttb_with_x(&x, y.as_slice(), 200);
+            let sampled_indices2 = lttb_without_x(y.as_slice(), 200);
             assert_eq!(sampled_indices1, sampled_indices2);
         }
     }
diff --git a/downsample_rs/src/lttb/mod.rs b/downsample_rs/src/lttb/mod.rs
deleted file mode 100644
index dd215d2..0000000
--- a/downsample_rs/src/lttb/mod.rs
+++ /dev/null
@@ -1,2 +0,0 @@
-mod scalar;
-pub use scalar::*;
diff --git a/downsample_rs/src/m4.rs b/downsample_rs/src/m4.rs
new file mode 100644
index 0000000..fa3dd50
--- /dev/null
+++ b/downsample_rs/src/m4.rs
@@ -0,0 +1,452 @@
+use argminmax::ArgMinMax;
+use num_traits::{AsPrimitive, FromPrimitive};
+use rayon::iter::IndexedParallelIterator;
+use rayon::prelude::*;
+
+use crate::searchsorted::{
+    get_equidistant_bin_idx_iterator, get_equidistant_bin_idx_iterator_parallel,
+};
+use crate::types::Num;
+
+// ----------------------------------- NON-PARALLEL ------------------------------------
+
+// ----------- WITH X
+
+pub fn m4_with_x<Tx, Ty>(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec<usize>
+where
+    for<'a> &'a [Ty]: ArgMinMax,
+    Tx: Num + FromPrimitive + AsPrimitive<f64>,
+    Ty: Copy + PartialOrd,
+{
+    assert_eq!(n_out % 4, 0);
+    let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 4);
+    m4_generic_with_x(arr, bin_idx_iterator, n_out, |arr| arr.argminmax())
+}
+
+// ----------- WITHOUT X
+
+pub fn m4_without_x<T: Copy + PartialOrd>(arr: &[T], n_out: usize) -> Vec<usize>
+where
+    for<'a> &'a [T]: ArgMinMax,
+{
+    assert_eq!(n_out % 4, 0);
+    m4_generic(arr, n_out, |arr| arr.argminmax())
+}
+
+// ------------------------------------- PARALLEL --------------------------------------
+
+// ----------- WITH X
+
+pub fn m4_with_x_parallel<Tx, Ty>(
+    x: &[Tx],
+    arr: &[Ty],
+    n_out: usize,
+    n_threads: usize,
+) -> Vec<usize>
+where
+    for<'a> &'a [Ty]: ArgMinMax,
+    Tx: Num + FromPrimitive + AsPrimitive<f64> + Send + Sync,
+    Ty: Copy + PartialOrd + Send + Sync,
+{
+    assert_eq!(n_out % 4, 0);
+    let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 4, n_threads);
+    m4_generic_with_x_parallel(arr, bin_idx_iterator, n_out, n_threads, |arr| {
+        arr.argminmax()
+    })
+}
+
+// ----------- WITHOUT X
+
+pub fn m4_without_x_parallel<T: Copy + PartialOrd + Send + Sync>(
+    arr: &[T],
+    n_out: usize,
+    n_threads: usize,
+) -> Vec<usize>
+where
+    for<'a> &'a [T]: ArgMinMax,
+{
+    assert_eq!(n_out % 4, 0);
+    m4_generic_parallel(arr, n_out, n_threads, |arr| arr.argminmax())
+}
+
+// TODO: check for duplicate data in the output array
+// -> In the current implementation we always add 4 datapoints per bin (if of
+//    course the bin has >= 4 datapoints). However, the argmin and argmax might
+//    be the start and end of the bin, which would result in duplicate data in
+//    the output array. (this is for example the case for monotonic data).
+
+// ----------------- GENERICS
+
+// --------------------- WITHOUT X
+
+#[inline(always)]
+pub(crate) fn m4_generic<T: Copy + PartialOrd>(
+    arr: &[T],
+    n_out: usize,
+    f_argminmax: fn(&[T]) -> (usize, usize),
+) -> Vec<usize> {
+    // Assumes n_out is a multiple of 4
+    if n_out >= arr.len() {
+        return (0..arr.len()).collect();
+    }
+
+    // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1)
+    let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 4) as f64;
+
+    let mut sampled_indices: Vec<usize> = vec![usize::default(); n_out];
+
+    let mut start_idx: usize = 0;
+    for i in 0..n_out / 4 {
+        // Decided to use multiplication instead of adding to the accumulator (end)
+        // as multiplication seems to be less prone to rounding errors.
+        let end: f64 = block_size * (i + 1) as f64;
+        let end_idx: usize = end as usize + 1;
+
+        let (min_index, max_index) = f_argminmax(&arr[start_idx..end_idx]);
+
+        // Add the indexes in sorted order
+        sampled_indices[4 * i] = start_idx;
+        if min_index < max_index {
+            sampled_indices[4 * i + 1] = min_index + start_idx;
+            sampled_indices[4 * i + 2] = max_index + start_idx;
+        } else {
+            sampled_indices[4 * i + 1] = max_index + start_idx;
+            sampled_indices[4 * i + 2] = min_index + start_idx;
+        }
+        sampled_indices[4 * i + 3] = end_idx - 1;
+
+        start_idx = end_idx;
+    }
+
+    sampled_indices
+}
+
+#[inline(always)]
+pub(crate) fn m4_generic_parallel<T: Copy + PartialOrd + Send + Sync>(
+    arr: &[T],
+    n_out: usize,
+    n_threads: usize,
+    f_argminmax: fn(&[T]) -> (usize, usize),
+) -> Vec<usize> {
+    // Assumes n_out is a multiple of 4
+    if n_out >= arr.len() {
+        return (0..arr.len()).collect::<Vec<usize>>();
+    }
+
+    // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1)
+    let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 4) as f64;
+
+    // Store the enumerated indexes in the output array
+    let mut sampled_indices: Vec<usize> = (0..n_out).collect::<Vec<usize>>();
+
+    // to limit the amounts of threads Rayon uses, an explicit threadpool needs to be created
+    // in which the required code is "installed". This limits the amount of used threads.
+    // https://docs.rs/rayon/latest/rayon/struct.ThreadPool.html#method.install
+    let pool = rayon::ThreadPoolBuilder::new()
+        .num_threads(n_threads)
+        .build();
+
+    let func = || {
+        for chunk in sampled_indices.chunks_exact_mut(4) {
+            let i: f64 = unsafe { *chunk.get_unchecked(0) >> 2 } as f64;
+            let start_idx: usize = (block_size * i) as usize + (i != 0.0) as usize;
+            let end_idx: usize = (block_size * (i + 1.0)) as usize + 1;
+
+            let (min_index, max_index) = f_argminmax(&arr[start_idx..end_idx]);
+
+            chunk[0] = start_idx;
+            // Add the indexes in sorted order
+            if min_index < max_index {
+                chunk[1] = min_index + start_idx;
+                chunk[2] = max_index + start_idx;
+            } else {
+                chunk[1] = max_index + start_idx;
+                chunk[2] = min_index + start_idx;
+            }
+            chunk[3] = end_idx - 1;
+        }
+    };
+
+    pool.unwrap().install(func); // allow panic if pool could not be created
+
+    sampled_indices.to_vec()
+}
+
+// --------------------- WITH X
+
+#[inline(always)]
+pub(crate) fn m4_generic_with_x<T: Copy>(
+    arr: &[T],
+    bin_idx_iterator: impl Iterator<Item = Option<(usize, usize)>>,
+    n_out: usize,
+    f_argminmax: fn(&[T]) -> (usize, usize),
+) -> Vec<usize> {
+    // Assumes n_out is a multiple of 4
+    if n_out >= arr.len() {
+        return (0..arr.len()).collect::<Vec<usize>>();
+    }
+
+    let mut sampled_indices: Vec<usize> = Vec::with_capacity(n_out);
+
+    bin_idx_iterator.for_each(|bin| {
+        if let Some((start, end)) = bin {
+            if end <= start + 4 {
+                // If the bin has <= 4 elements, just add them all
+                for i in start..end {
+                    sampled_indices.push(i);
+                }
+            } else {
+                // If the bin has > 4 elements, add the first and last + argmin and argmax
+                let step = &arr[start..end];
+                let (min_index, max_index) = f_argminmax(step);
+
+                sampled_indices.push(start);
+
+                // Add the indexes in sorted order
+                if min_index < max_index {
+                    sampled_indices.push(min_index + start);
+                    sampled_indices.push(max_index + start);
+                } else {
+                    sampled_indices.push(max_index + start);
+                    sampled_indices.push(min_index + start);
+                }
+
+                sampled_indices.push(end - 1);
+            }
+        }
+    });
+
+    sampled_indices
+}
+
+#[inline(always)]
+pub(crate) fn m4_generic_with_x_parallel<T: Copy + PartialOrd + Send + Sync>(
+    arr: &[T],
+    bin_idx_iterator: impl IndexedParallelIterator<Item = impl Iterator<Item = Option<(usize, usize)>>>,
+    n_out: usize,
+    n_threads: usize,
+    f_argminmax: fn(&[T]) -> (usize, usize),
+) -> Vec<usize> {
+    // Assumes n_out is a multiple of 4
+    if n_out >= arr.len() {
+        return (0..arr.len()).collect::<Vec<usize>>();
+    }
+
+    // to limit the amounts of threads Rayon uses, an explicit threadpool needs to be created
+    // in which the required code is "installed". This limits the amount of used threads.
+    // https://docs.rs/rayon/latest/rayon/struct.ThreadPool.html#method.install
+    let pool = rayon::ThreadPoolBuilder::new()
+        .num_threads(n_threads)
+        .build();
+
+    let iter_func = || {
+        bin_idx_iterator
+            .flat_map(|bin_idx_iterator| {
+                bin_idx_iterator
+                    .map(|bin| {
+                        match bin {
+                            Some((start, end)) => {
+                                if end <= start + 4 {
+                                    // If the bin has <= 4 elements, just return them all
+                                    return (start..end).collect::<Vec<usize>>();
+                                }
+
+                                // If the bin has > 4 elements, return the first and last + argmin and argmax
+                                let step = &arr[start..end];
+                                let (min_index, max_index) = f_argminmax(step);
+
+                                // Return the indexes in sorted order
+                                let mut sampled_index = vec![start, 0, 0, end - 1];
+                                if min_index < max_index {
+                                    sampled_index[1] = min_index + start;
+                                    sampled_index[2] = max_index + start;
+                                } else {
+                                    sampled_index[1] = max_index + start;
+                                    sampled_index[2] = min_index + start;
+                                }
+                                sampled_index
+                            } // If the bin is empty, return empty Vec
+                            None => {
+                                vec![]
+                            }
+                        }
+                    })
+                    .collect::<Vec<Vec<usize>>>()
+            })
+            .flatten()
+            .collect::<Vec<usize>>()
+    };
+
+    let result = pool.unwrap().install(iter_func); // allow panic if pool could not be created
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use num_traits::AsPrimitive;
+    use rstest::rstest;
+    use rstest_reuse::{self, *};
+
+    use super::{m4_with_x, m4_without_x};
+    use super::{m4_with_x_parallel, m4_without_x_parallel};
+
+    use dev_utils::utils;
+
+    fn get_array_f32(n: usize) -> Vec<f32> {
+        utils::get_random_array(n, f32::MIN, f32::MAX)
+    }
+
+    // Template for the n_threads matrix
+    #[template]
+    #[rstest]
+    #[case(1)]
+    #[case(utils::get_all_threads() / 2)]
+    #[case(utils::get_all_threads())]
+    #[case(utils::get_all_threads() * 2)]
+    fn threads(#[case] n_threads: usize) {}
+
+    #[test]
+    fn test_m4_scalar_without_x_correct() {
+        let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
+
+        let sampled_indices = m4_without_x(&arr, 12);
+        let sampled_values = sampled_indices
+            .iter()
+            .map(|x| arr[*x])
+            .collect::<Vec<f32>>();
+
+        let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99];
+        let expected_values = expected_indices
+            .iter()
+            .map(|x| *x as f32)
+            .collect::<Vec<f32>>();
+
+        assert_eq!(sampled_indices, expected_indices);
+        assert_eq!(sampled_values, expected_values);
+    }
+
+    #[apply(threads)]
+    fn test_m4_scalar_without_x_parallel_correct(n_threads: usize) {
+        let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
+
+        let sampled_indices = m4_without_x_parallel(&arr, 12, n_threads);
+        let sampled_values = sampled_indices
+            .iter()
+            .map(|x| arr[*x])
+            .collect::<Vec<f32>>();
+
+        let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99];
+        let expected_values = expected_indices
+            .iter()
+            .map(|x| *x as f32)
+            .collect::<Vec<f32>>();
+
+        assert_eq!(sampled_indices, expected_indices);
+        assert_eq!(sampled_values, expected_values);
+    }
+
+    #[test]
+    fn test_m4_scalar_with_x_correct() {
+        let x: [i32; 100] = core::array::from_fn(|i| i.as_());
+        let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
+
+        let sampled_indices = m4_with_x(&x, &arr, 12);
+        let sampled_values = sampled_indices
+            .iter()
+            .map(|x| arr[*x])
+            .collect::<Vec<f32>>();
+
+        let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99];
+        let expected_values = expected_indices
+            .iter()
+            .map(|x| *x as f32)
+            .collect::<Vec<f32>>();
+
+        assert_eq!(sampled_indices, expected_indices);
+        assert_eq!(sampled_values, expected_values);
+    }
+
+    #[apply(threads)]
+    fn test_m4_scalar_with_x_parallel_correct(n_threads: usize) {
+        let x: [i32; 100] = core::array::from_fn(|i| i.as_());
+        let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
+
+        let sampled_indices = m4_with_x_parallel(&x, &arr, 12, n_threads);
+        let sampled_values = sampled_indices
+            .iter()
+            .map(|x| arr[*x])
+            .collect::<Vec<f32>>();
+
+        let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99];
+        let expected_values = expected_indices
+            .iter()
+            .map(|x| *x as f32)
+            .collect::<Vec<f32>>();
+
+        assert_eq!(sampled_indices, expected_indices);
+        assert_eq!(sampled_values, expected_values);
+    }
+
+    #[test]
+    fn test_m4_scalar_with_x_gap() {
+        // We will create a gap in the middle of the array
+        // Increment the second half of the array by 50
+        let x: [i32; 100] = core::array::from_fn(|i| if i > 50 { (i + 50).as_() } else { i.as_() });
+        let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
+
+        let sampled_indices = m4_with_x(&x, &arr, 20);
+        assert_eq!(sampled_indices.len(), 16); // One full gap
+        let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99];
+        assert_eq!(sampled_indices, expected_indices);
+
+        // Increment the second half of the array by 50 again
+        let x = x.map(|x| if x > 101 { x + 50 } else { x });
+
+        let sampled_indices = m4_with_x(&x, &arr, 20);
+        assert_eq!(sampled_indices.len(), 17); // Gap with 1 value
+        let expected_indices = vec![
+            0, 0, 39, 39, 40, 40, 50, 50, 51, 52, 52, 59, 59, 60, 60, 99, 99,
+        ];
+        assert_eq!(sampled_indices, expected_indices);
+    }
+
+    #[apply(threads)]
+    fn test_m4_scalar_with_x_gap_parallel(n_threads: usize) {
+        // We will create a gap in the middle of the array
+        // Increment the second half of the array by 50
+        let x: [i32; 100] = core::array::from_fn(|i| if i > 50 { (i + 50).as_() } else { i.as_() });
+        let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
+
+        let sampled_indices = m4_with_x_parallel(&x, &arr, 20, n_threads);
+        assert_eq!(sampled_indices.len(), 16); // One full gap
+        let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99];
+        assert_eq!(sampled_indices, expected_indices);
+
+        // Increment the second half of the array by 50 again
+        let x = x.map(|x| if x > 101 { x + 50 } else { x });
+
+        let sampled_indices = m4_with_x_parallel(&x, &arr, 20, n_threads);
+        assert_eq!(sampled_indices.len(), 17); // Gap with 1 value
+        let expected_indices = vec![
+            0, 0, 39, 39, 40, 40, 50, 50, 51, 52, 52, 59, 59, 60, 60, 99, 99,
+        ];
+        assert_eq!(sampled_indices, expected_indices);
+    }
+
+    #[apply(threads)]
+    fn test_many_random_runs_correct(n_threads: usize) {
+        const N: usize = 20_003;
+        const N_OUT: usize = 204;
+        let x: [i32; N] = core::array::from_fn(|i| i.as_());
+        for _ in 0..100 {
+            let arr = get_array_f32(N);
+            let idxs1 = m4_without_x(arr.as_slice(), N_OUT);
+            let idxs2 = m4_with_x(&x, arr.as_slice(), N_OUT);
+            assert_eq!(idxs1, idxs2);
+            let idxs3 = m4_without_x_parallel(arr.as_slice(), N_OUT, n_threads);
+            let idxs4 = m4_with_x_parallel(&x, arr.as_slice(), N_OUT, n_threads);
+            assert_eq!(idxs1, idxs3);
+            assert_eq!(idxs1, idxs4); // TODO: this should not fail when n_threads = 16
+        }
+    }
+}
diff --git a/downsample_rs/src/m4/generic.rs b/downsample_rs/src/m4/generic.rs
deleted file mode 100644
index e384660..0000000
--- a/downsample_rs/src/m4/generic.rs
+++ /dev/null
@@ -1,227 +0,0 @@
-use ndarray::Zip;
-use ndarray::{Array1, ArrayView1};
-
-use rayon::iter::IndexedParallelIterator;
-use rayon::prelude::*;
-
-// TODO: check for duplicate data in the output array
-// -> In the current implementation we always add 4 datapoints per bin (if of
-//    course the bin has >= 4 datapoints). However, the argmin and argmax might
-//    be the start and end of the bin, which would result in duplicate data in
-//    the output array. (this is for example the case for monotonic data).
-
-// --------------------- WITHOUT X
-
-#[inline(always)]
-pub(crate) fn m4_generic<T: Copy + PartialOrd>(
-    arr: ArrayView1<T>,
-    n_out: usize,
-    f_argminmax: fn(ArrayView1<T>) -> (usize, usize),
-) -> Array1<usize> {
-    // Assumes n_out is a multiple of 4
-    if n_out >= arr.len() {
-        return Array1::from((0..arr.len()).collect::<Vec<usize>>());
-    }
-
-    // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1)
-    let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 4) as f64;
-    let arr_ptr = arr.as_ptr();
-
-    let mut sampled_indices: Array1<usize> = Array1::<usize>::default(n_out);
-
-    let mut start_idx: usize = 0;
-    for i in 0..n_out / 4 {
-        // Decided to use multiplication instead of adding to the accumulator (end)
-        // as multiplication seems to be less prone to rounding errors.
-        let end: f64 = block_size * (i + 1) as f64;
-        let end_idx: usize = end as usize + 1;
-
-        let (min_index, max_index) = f_argminmax(unsafe {
-            ArrayView1::from_shape_ptr((end_idx - start_idx,), arr_ptr.add(start_idx))
-        });
-
-        // Add the indexes in sorted order
-        sampled_indices[4 * i] = start_idx;
-        if min_index < max_index {
-            sampled_indices[4 * i + 1] = min_index + start_idx;
-            sampled_indices[4 * i + 2] = max_index + start_idx;
-        } else {
-            sampled_indices[4 * i + 1] = max_index + start_idx;
-            sampled_indices[4 * i + 2] = min_index + start_idx;
-        }
-        sampled_indices[4 * i + 3] = end_idx - 1;
-
-        start_idx = end_idx;
-    }
-
-    sampled_indices
-}
-
-#[inline(always)]
-pub(crate) fn m4_generic_parallel<T: Copy + PartialOrd + Send + Sync>(
-    arr: ArrayView1<T>,
-    n_out: usize,
-    n_threads: usize,
-    f_argminmax: fn(ArrayView1<T>) -> (usize, usize),
-) -> Array1<usize> {
-    // Assumes n_out is a multiple of 4
-    if n_out >= arr.len() {
-        return Array1::from((0..arr.len()).collect::<Vec<usize>>());
-    }
-
-    // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1)
-    let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 4) as f64;
-
-    // Store the enumerated indexes in the output array
-    let mut sampled_indices: Array1<usize> = Array1::from_vec((0..n_out).collect::<Vec<usize>>());
-
-    // to limit the amounts of threads Rayon uses, an explicit threadpool needs to be created
-    // in which the required code is "installed". This limits the amount of used threads.
-    // https://docs.rs/rayon/latest/rayon/struct.ThreadPool.html#method.install
-    let pool = rayon::ThreadPoolBuilder::new()
-        .num_threads(n_threads)
-        .build();
-
-    let zip_func = || {
-        Zip::from(sampled_indices.exact_chunks_mut(4)).par_for_each(|mut sampled_index| {
-            let i: f64 = unsafe { *sampled_index.uget(0) >> 2 } as f64;
-            let start_idx: usize = (block_size * i) as usize + (i != 0.0) as usize;
-            let end_idx: usize = (block_size * (i + 1.0)) as usize + 1;
-
-            let (min_index, max_index) = f_argminmax(unsafe {
-                ArrayView1::from_shape_ptr((end_idx - start_idx,), arr.as_ptr().add(start_idx))
-            });
-
-            sampled_index[0] = start_idx;
-            // Add the indexes in sorted order
-            if min_index < max_index {
-                sampled_index[1] = min_index + start_idx;
-                sampled_index[2] = max_index + start_idx;
-            } else {
-                sampled_index[1] = max_index + start_idx;
-                sampled_index[2] = min_index + start_idx;
-            }
-            sampled_index[3] = end_idx - 1;
-        })
-    };
-
-    pool.unwrap().install(zip_func); // allow panic if pool could not be created
-
-    sampled_indices
-}
-
-// --------------------- WITH X
-
-#[inline(always)]
-pub(crate) fn m4_generic_with_x<T: Copy>(
-    arr: ArrayView1<T>,
-    bin_idx_iterator: impl Iterator<Item = Option<(usize, usize)>>,
-    n_out: usize,
-    f_argminmax: fn(ArrayView1<T>) -> (usize, usize),
-) -> Array1<usize> {
-    // Assumes n_out is a multiple of 4
-    if n_out >= arr.len() {
-        return Array1::from((0..arr.len()).collect::<Vec<usize>>());
-    }
-
-    let arr_ptr = arr.as_ptr();
-    let mut sampled_indices: Vec<usize> = Vec::with_capacity(n_out);
-
-    bin_idx_iterator.for_each(|bin| {
-        if let Some((start, end)) = bin {
-            if end <= start + 4 {
-                // If the bin has <= 4 elements, just add them all
-                for i in start..end {
-                    sampled_indices.push(i);
-                }
-            } else {
-                // If the bin has > 4 elements, add the first and last + argmin and argmax
-                let step = unsafe { ArrayView1::from_shape_ptr(end - start, arr_ptr.add(start)) };
-                let (min_index, max_index) = f_argminmax(step);
-
-                sampled_indices.push(start);
-
-                // Add the indexes in sorted order
-                if min_index < max_index {
-                    sampled_indices.push(min_index + start);
-                    sampled_indices.push(max_index + start);
-                } else {
-                    sampled_indices.push(max_index + start);
-                    sampled_indices.push(min_index + start);
-                }
-
-                sampled_indices.push(end - 1);
-            }
-        }
-    });
-
-    Array1::from_vec(sampled_indices)
-}
-
-#[inline(always)]
-pub(crate) fn m4_generic_with_x_parallel<T: Copy + PartialOrd + Send + Sync>(
-    arr: ArrayView1<T>,
-    bin_idx_iterator: impl IndexedParallelIterator<Item = impl Iterator<Item = Option<(usize, usize)>>>,
-    n_out: usize,
-    n_threads: usize,
-    f_argminmax: fn(ArrayView1<T>) -> (usize, usize),
-) -> Array1<usize> {
-    // Assumes n_out is a multiple of 4
-    if n_out >= arr.len() {
-        return Array1::from((0..arr.len()).collect::<Vec<usize>>());
-    }
-
-    // to limit the amounts of threads Rayon uses, an explicit threadpool needs to be created
-    // in which the required code is "installed". This limits the amount of used threads.
-    // https://docs.rs/rayon/latest/rayon/struct.ThreadPool.html#method.install
-    let pool = rayon::ThreadPoolBuilder::new()
-        .num_threads(n_threads)
-        .build();
-
-    let iter_func = || {
-        Array1::from_vec(
-            bin_idx_iterator
-                .flat_map(|bin_idx_iterator| {
-                    bin_idx_iterator
-                        .map(|bin| {
-                            match bin {
-                                Some((start, end)) => {
-                                    if end <= start + 4 {
-                                        // If the bin has <= 4 elements, just return them all
-                                        return (start..end).collect::<Vec<usize>>();
-                                    }
-
-                                    // If the bin has > 4 elements, return the first and last + argmin and argmax
-                                    let step = unsafe {
-                                        ArrayView1::from_shape_ptr(
-                                            end - start,
-                                            arr.as_ptr().add(start),
-                                        )
-                                    };
-                                    let (min_index, max_index) = f_argminmax(step);
-
-                                    // Return the indexes in sorted order
-                                    let mut sampled_index = vec![start, 0, 0, end - 1];
-                                    if min_index < max_index {
-                                        sampled_index[1] = min_index + start;
-                                        sampled_index[2] = max_index + start;
-                                    } else {
-                                        sampled_index[1] = max_index + start;
-                                        sampled_index[2] = min_index + start;
-                                    }
-                                    sampled_index
-                                } // If the bin is empty, return empty Vec
-                                None => {
-                                    vec![]
-                                }
-                            }
-                        })
-                        .collect::<Vec<Vec<usize>>>()
-                })
-                .flatten()
-                .collect::<Vec<usize>>(),
-        )
-    };
-
-    pool.unwrap().install(iter_func) // allow panic if pool could not be created
-}
diff --git a/downsample_rs/src/m4/mod.rs b/downsample_rs/src/m4/mod.rs
deleted file mode 100644
index 3628374..0000000
--- a/downsample_rs/src/m4/mod.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-pub mod scalar;
-pub use scalar::*;
-pub mod simd;
-pub use simd::*;
-mod generic; // private module
diff --git a/downsample_rs/src/m4/scalar.rs b/downsample_rs/src/m4/scalar.rs
deleted file mode 100644
index faebc08..0000000
--- a/downsample_rs/src/m4/scalar.rs
+++ /dev/null
@@ -1,263 +0,0 @@
-use argminmax::{ScalarArgMinMax, SCALAR};
-
-use ndarray::{Array1, ArrayView1};
-use num_traits::{AsPrimitive, FromPrimitive};
-
-use super::super::searchsorted::{
-    get_equidistant_bin_idx_iterator, get_equidistant_bin_idx_iterator_parallel,
-};
-use super::super::types::Num;
-use super::generic::{m4_generic, m4_generic_parallel};
-use super::generic::{m4_generic_with_x, m4_generic_with_x_parallel};
-
-// ----------------------------------- NON-PARALLEL ------------------------------------
-
-// ----------- WITH X
-
-pub fn m4_scalar_with_x<Tx, Ty>(
-    x: ArrayView1<Tx>,
-    arr: ArrayView1<Ty>,
-    n_out: usize,
-) -> Array1<usize>
-where
-    SCALAR: ScalarArgMinMax<Ty>,
-    Tx: Num + FromPrimitive + AsPrimitive<f64>,
-    Ty: Copy + PartialOrd,
-{
-    assert_eq!(n_out % 4, 0);
-    let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 4);
-    m4_generic_with_x(arr, bin_idx_iterator, n_out, SCALAR::argminmax)
-}
-
-// ----------- WITHOUT X
-
-pub fn m4_scalar_without_x<T: Copy + PartialOrd>(arr: ArrayView1<T>, n_out: usize) -> Array1<usize>
-where
-    SCALAR: ScalarArgMinMax<T>,
-{
-    assert_eq!(n_out % 4, 0);
-    m4_generic(arr, n_out, SCALAR::argminmax)
-}
-
-// ------------------------------------- PARALLEL --------------------------------------
-
-// ----------- WITH X
-
-pub fn m4_scalar_with_x_parallel<Tx, Ty>(
-    x: ArrayView1<Tx>,
-    arr: ArrayView1<Ty>,
-    n_out: usize,
-    n_threads: usize,
-) -> Array1<usize>
-where
-    SCALAR: ScalarArgMinMax<Ty>,
-    Tx: Num + FromPrimitive + AsPrimitive<f64> + Send + Sync,
-    Ty: Copy + PartialOrd + Send + Sync,
-{
-    assert_eq!(n_out % 4, 0);
-    let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 4, n_threads);
-    m4_generic_with_x_parallel(arr, bin_idx_iterator, n_out, n_threads, SCALAR::argminmax)
-}
-
-// ----------- WITHOUT X
-
-pub fn m4_scalar_without_x_parallel<T: Copy + PartialOrd + Send + Sync>(
-    arr: ArrayView1<T>,
-    n_out: usize,
-    n_threads: usize,
-) -> Array1<usize>
-where
-    SCALAR: ScalarArgMinMax<T>,
-{
-    assert_eq!(n_out % 4, 0);
-    m4_generic_parallel(arr, n_out, n_threads, SCALAR::argminmax)
-}
-
-// --------------------------------------- TESTS ---------------------------------------
-
-#[cfg(test)]
-mod tests {
-    use rstest::rstest;
-    use rstest_reuse::{self, *};
-
-    use super::{m4_scalar_with_x, m4_scalar_without_x};
-    use super::{m4_scalar_with_x_parallel, m4_scalar_without_x_parallel};
-    use ndarray::Array1;
-
-    use dev_utils::utils;
-
-    fn get_array_f32(n: usize) -> Array1<f32> {
-        utils::get_random_array(n, f32::MIN, f32::MAX)
-    }
-
-    // Template for the n_threads matrix
-    #[template]
-    #[rstest]
-    #[case(1)]
-    #[case(utils::get_all_threads() / 2)]
-    #[case(utils::get_all_threads())]
-    #[case(utils::get_all_threads() * 2)]
-    fn threads(#[case] n_threads: usize) {}
-
-    #[test]
-    fn test_m4_scalar_without_x_correct() {
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = m4_scalar_without_x(arr.view(), 12);
-        let sampled_values = sampled_indices.mapv(|x| arr[x]);
-
-        let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99];
-        let expected_values = expected_indices
-            .iter()
-            .map(|x| *x as f32)
-            .collect::<Vec<f32>>();
-
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-        assert_eq!(sampled_values, Array1::from(expected_values));
-    }
-
-    #[apply(threads)]
-    fn test_m4_scalar_without_x_parallel_correct(n_threads: usize) {
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = m4_scalar_without_x_parallel(arr.view(), 12, n_threads);
-        let sampled_values = sampled_indices.mapv(|x| arr[x]);
-
-        let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99];
-        let expected_values = expected_indices
-            .iter()
-            .map(|x| *x as f32)
-            .collect::<Vec<f32>>();
-
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-        assert_eq!(sampled_values, Array1::from(expected_values));
-    }
-
-    #[test]
-    fn test_m4_scalar_with_x_correct() {
-        let x = (0..100).collect::<Vec<i32>>();
-        let x = Array1::from(x);
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = m4_scalar_with_x(x.view(), arr.view(), 12);
-        let sampled_values = sampled_indices.mapv(|x| arr[x]);
-
-        let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99];
-        let expected_values = expected_indices
-            .iter()
-            .map(|x| *x as f32)
-            .collect::<Vec<f32>>();
-
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-        assert_eq!(sampled_values, Array1::from(expected_values));
-    }
-
-    #[apply(threads)]
-    fn test_m4_scalar_with_x_parallel_correct(n_threads: usize) {
-        let x = (0..100).collect::<Vec<i32>>();
-        let x = Array1::from(x);
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = m4_scalar_with_x_parallel(x.view(), arr.view(), 12, n_threads);
-        let sampled_values = sampled_indices.mapv(|x| arr[x]);
-
-        let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99];
-        let expected_values = expected_indices
-            .iter()
-            .map(|x| *x as f32)
-            .collect::<Vec<f32>>();
-
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-        assert_eq!(sampled_values, Array1::from(expected_values));
-    }
-
-    #[test]
-    fn test_m4_scalar_with_x_gap() {
-        // We will create a gap in the middle of the array
-        let x = (0..100).collect::<Vec<i32>>();
-
-        // Increment the second half of the array by 50
-        let x = x
-            .iter()
-            .map(|x| if *x > 50 { *x + 50 } else { *x })
-            .collect::<Vec<i32>>();
-        let x = Array1::from(x);
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = m4_scalar_with_x(x.view(), arr.view(), 20);
-        assert_eq!(sampled_indices.len(), 16); // One full gap
-        let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99];
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-
-        // Increment the second half of the array by 50 again
-        let x = x
-            .iter()
-            .map(|x| if *x > 101 { *x + 50 } else { *x })
-            .collect::<Vec<i32>>();
-        let x = Array1::from(x);
-
-        let sampled_indices = m4_scalar_with_x(x.view(), arr.view(), 20);
-        assert_eq!(sampled_indices.len(), 17); // Gap with 1 value
-        let expected_indices = vec![
-            0, 0, 39, 39, 40, 40, 50, 50, 51, 52, 52, 59, 59, 60, 60, 99, 99,
-        ];
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-    }
-
-    #[apply(threads)]
-    fn test_m4_scalar_with_x_gap_parallel(n_threads: usize) {
-        // We will create a gap in the middle of the array
-        let x = (0..100).collect::<Vec<i32>>();
-
-        // Increment the second half of the array by 50
-        let x = x
-            .iter()
-            .map(|x| if *x > 50 { *x + 50 } else { *x })
-            .collect::<Vec<i32>>();
-        let x = Array1::from(x);
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = m4_scalar_with_x_parallel(x.view(), arr.view(), 20, n_threads);
-        assert_eq!(sampled_indices.len(), 16); // One full gap
-        let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99];
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-
-        // Increment the second half of the array by 50 again
-        let x = x
-            .iter()
-            .map(|x| if *x > 101 { *x + 50 } else { *x })
-            .collect::<Vec<i32>>();
-        let x = Array1::from(x);
-
-        let sampled_indices = m4_scalar_with_x_parallel(x.view(), arr.view(), 20, n_threads);
-        assert_eq!(sampled_indices.len(), 17); // Gap with 1 value
-        let expected_indices = vec![
-            0, 0, 39, 39, 40, 40, 50, 50, 51, 52, 52, 59, 59, 60, 60, 99, 99,
-        ];
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-    }
-
-    #[apply(threads)]
-    fn test_many_random_runs_correct(n_threads: usize) {
-        let n: usize = 20_003;
-        let n_out: usize = 204;
-        let x = (0..n as i32).collect::<Vec<i32>>();
-        let x = Array1::from(x);
-        for _ in 0..100 {
-            let arr = get_array_f32(n);
-            let idxs1 = m4_scalar_without_x(arr.view(), n_out);
-            let idxs2 = m4_scalar_with_x(x.view(), arr.view(), n_out);
-            assert_eq!(idxs1, idxs2);
-            let idxs3 = m4_scalar_without_x_parallel(arr.view(), n_out, n_threads);
-            let idxs4 = m4_scalar_with_x_parallel(x.view(), arr.view(), n_out, n_threads);
-            assert_eq!(idxs1, idxs3);
-            assert_eq!(idxs1, idxs4); // TODO: this should not fail when n_threads = 16
-        }
-    }
-}
diff --git a/downsample_rs/src/m4/simd.rs b/downsample_rs/src/m4/simd.rs
deleted file mode 100644
index 9b11ad1..0000000
--- a/downsample_rs/src/m4/simd.rs
+++ /dev/null
@@ -1,261 +0,0 @@
-use argminmax::ArgMinMax;
-
-use ndarray::{Array1, ArrayView1};
-use num_traits::{AsPrimitive, FromPrimitive};
-
-use super::super::searchsorted::{
-    get_equidistant_bin_idx_iterator, get_equidistant_bin_idx_iterator_parallel,
-};
-use super::super::types::Num;
-use super::generic::{m4_generic, m4_generic_parallel};
-use super::generic::{m4_generic_with_x, m4_generic_with_x_parallel};
-
-// ----------------------------------- NON-PARALLEL ------------------------------------
-
-// ----------- WITH X
-
-pub fn m4_simd_with_x<Tx, Ty>(x: ArrayView1<Tx>, arr: ArrayView1<Ty>, n_out: usize) -> Array1<usize>
-where
-    for<'a> ArrayView1<'a, Ty>: ArgMinMax,
-    Tx: Num + FromPrimitive + AsPrimitive<f64>,
-    Ty: Copy + PartialOrd,
-{
-    assert_eq!(n_out % 4, 0);
-    let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 4);
-    m4_generic_with_x(arr, bin_idx_iterator, n_out, |arr| arr.argminmax())
-}
-
-// ----------- WITHOUT X
-
-pub fn m4_simd_without_x<T: Copy + PartialOrd>(arr: ArrayView1<T>, n_out: usize) -> Array1<usize>
-where
-    for<'a> ArrayView1<'a, T>: ArgMinMax,
-{
-    assert_eq!(n_out % 4, 0);
-    m4_generic(arr, n_out, |arr| arr.argminmax())
-}
-
-// ------------------------------------- PARALLEL --------------------------------------
-
-// ----------- WITH X
-
-pub fn m4_simd_with_x_parallel<Tx, Ty>(
-    x: ArrayView1<Tx>,
-    arr: ArrayView1<Ty>,
-    n_out: usize,
-    n_threads: usize,
-) -> Array1<usize>
-where
-    for<'a> ArrayView1<'a, Ty>: ArgMinMax,
-    Tx: Num + FromPrimitive + AsPrimitive<f64> + Send + Sync,
-    Ty: Copy + PartialOrd + Send + Sync,
-{
-    assert_eq!(n_out % 4, 0);
-    let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 4, n_threads);
-    m4_generic_with_x_parallel(arr, bin_idx_iterator, n_out, n_threads, |arr| {
-        arr.argminmax()
-    })
-}
-
-// ----------- WITHOUT X
-
-pub fn m4_simd_without_x_parallel<T: Copy + PartialOrd + Send + Sync>(
-    arr: ArrayView1<T>,
-    n_out: usize,
-    n_threads: usize,
-) -> Array1<usize>
-where
-    for<'a> ArrayView1<'a, T>: ArgMinMax,
-{
-    assert_eq!(n_out % 4, 0);
-    m4_generic_parallel(arr, n_out, n_threads, |arr| arr.argminmax())
-}
-
-// --------------------------------------- TESTS ---------------------------------------
-
-#[cfg(test)]
-mod tests {
-    use rstest::rstest;
-    use rstest_reuse::{self, *};
-
-    use super::{m4_simd_with_x, m4_simd_without_x};
-    use super::{m4_simd_with_x_parallel, m4_simd_without_x_parallel};
-    use ndarray::Array1;
-
-    use dev_utils::utils;
-
-    fn get_array_f32(n: usize) -> Array1<f32> {
-        utils::get_random_array(n, f32::MIN, f32::MAX)
-    }
-
-    // Template for the n_threads matrix
-    #[template]
-    #[rstest]
-    #[case(1)]
-    #[case(utils::get_all_threads() / 2)]
-    #[case(utils::get_all_threads())]
-    #[case(utils::get_all_threads() * 2)]
-    fn threads(#[case] n_threads: usize) {}
-
-    #[test]
-    fn test_m4_simd_without_x_correct() {
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = m4_simd_without_x(arr.view(), 12);
-        let sampled_values = sampled_indices.mapv(|x| arr[x]);
-
-        let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99];
-        let expected_values = expected_indices
-            .iter()
-            .map(|x| *x as f32)
-            .collect::<Vec<f32>>();
-
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-        assert_eq!(sampled_values, Array1::from(expected_values));
-    }
-
-    #[apply(threads)]
-    fn test_m4_simd_without_x_parallel_correct(n_threads: usize) {
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = m4_simd_without_x_parallel(arr.view(), 12, n_threads);
-        let sampled_values = sampled_indices.mapv(|x| arr[x]);
-
-        let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99];
-        let expected_values = expected_indices
-            .iter()
-            .map(|x| *x as f32)
-            .collect::<Vec<f32>>();
-
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-        assert_eq!(sampled_values, Array1::from(expected_values));
-    }
-
-    #[test]
-    fn test_m4_simd_with_x_correct() {
-        let x = (0..100).collect::<Vec<i32>>();
-        let x = Array1::from(x);
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = m4_simd_with_x(x.view(), arr.view(), 12);
-        let sampled_values = sampled_indices.mapv(|x| arr[x]);
-
-        let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99];
-        let expected_values = expected_indices
-            .iter()
-            .map(|x| *x as f32)
-            .collect::<Vec<f32>>();
-
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-        assert_eq!(sampled_values, Array1::from(expected_values));
-    }
-
-    #[apply(threads)]
-    fn test_m4_simd_with_x_parallel_correct(n_threads: usize) {
-        let x = (0..100).collect::<Vec<i32>>();
-        let x = Array1::from(x);
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = m4_simd_with_x_parallel(x.view(), arr.view(), 12, n_threads);
-        let sampled_values = sampled_indices.mapv(|x| arr[x]);
-
-        let expected_indices = vec![0, 0, 33, 33, 34, 34, 66, 66, 67, 67, 99, 99];
-        let expected_values = expected_indices
-            .iter()
-            .map(|x| *x as f32)
-            .collect::<Vec<f32>>();
-
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-        assert_eq!(sampled_values, Array1::from(expected_values));
-    }
-
-    #[test]
-    fn test_m4_simd_with_x_gap() {
-        // We will create a gap in the middle of the array
-        let x = (0..100).collect::<Vec<i32>>();
-
-        // Increment the second half of the array by 50
-        let x = x
-            .iter()
-            .map(|x| if *x > 50 { *x + 50 } else { *x })
-            .collect::<Vec<i32>>();
-        let x = Array1::from(x);
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = m4_simd_with_x(x.view(), arr.view(), 20);
-        assert_eq!(sampled_indices.len(), 16); // One full gap
-        let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99];
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-
-        // Increment the second half of the array by 50 again
-        let x = x
-            .iter()
-            .map(|x| if *x > 101 { *x + 50 } else { *x })
-            .collect::<Vec<i32>>();
-        let x = Array1::from(x);
-
-        let sampled_indices = m4_simd_with_x(x.view(), arr.view(), 20);
-        assert_eq!(sampled_indices.len(), 17); // Gap with 1 value
-        let expected_indices = vec![
-            0, 0, 39, 39, 40, 40, 50, 50, 51, 52, 52, 59, 59, 60, 60, 99, 99,
-        ];
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-    }
-
-    #[apply(threads)]
-    fn test_m4_simd_with_x_gap_parallel(n_threads: usize) {
-        // We will create a gap in the middle of the array
-        let x = (0..100).collect::<Vec<i32>>();
-
-        // Increment the second half of the array by 50
-        let x = x
-            .iter()
-            .map(|x| if *x > 50 { *x + 50 } else { *x })
-            .collect::<Vec<i32>>();
-        let x = Array1::from(x);
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = m4_simd_with_x_parallel(x.view(), arr.view(), 20, n_threads);
-        assert_eq!(sampled_indices.len(), 16); // One full gap
-        let expected_indices = vec![0, 0, 29, 29, 30, 30, 50, 50, 51, 51, 69, 69, 70, 70, 99, 99];
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-
-        // Increment the second half of the array by 50 again
-        let x = x
-            .iter()
-            .map(|x| if *x > 101 { *x + 50 } else { *x })
-            .collect::<Vec<i32>>();
-        let x = Array1::from(x);
-
-        let sampled_indices = m4_simd_with_x_parallel(x.view(), arr.view(), 20, n_threads);
-        assert_eq!(sampled_indices.len(), 17); // Gap with 1 value
-        let expected_indices = vec![
-            0, 0, 39, 39, 40, 40, 50, 50, 51, 52, 52, 59, 59, 60, 60, 99, 99,
-        ];
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-    }
-
-    #[apply(threads)]
-    fn test_many_random_runs_correct(n_threads: usize) {
-        let n = 20_003;
-        let n_out = 204;
-        let x = (0..n).map(|x| x as i32).collect::<Vec<i32>>();
-        let x = Array1::from(x);
-        for _ in 0..100 {
-            let arr = get_array_f32(n);
-            let idxs1 = m4_simd_without_x(arr.view(), n_out);
-            let idxs2 = m4_simd_without_x_parallel(arr.view(), n_out, n_threads);
-            let idxs3 = m4_simd_with_x(x.view(), arr.view(), n_out);
-            let idxs4 = m4_simd_with_x_parallel(x.view(), arr.view(), n_out, n_threads);
-            assert_eq!(idxs1, idxs2);
-            assert_eq!(idxs1, idxs3);
-            assert_eq!(idxs1, idxs4); // TODO: this should not fail when n_threads = 16
-        }
-    }
-}
diff --git a/downsample_rs/src/minmax.rs b/downsample_rs/src/minmax.rs
new file mode 100644
index 0000000..6858164
--- /dev/null
+++ b/downsample_rs/src/minmax.rs
@@ -0,0 +1,431 @@
+use rayon::iter::IndexedParallelIterator;
+use rayon::prelude::*;
+
+use argminmax::ArgMinMax;
+use num_traits::{AsPrimitive, FromPrimitive};
+
+use super::searchsorted::{
+    get_equidistant_bin_idx_iterator, get_equidistant_bin_idx_iterator_parallel,
+};
+use super::types::Num;
+
+// ----------------------------------- NON-PARALLEL ------------------------------------
+
+// ----------- WITH X
+
+pub fn min_max_with_x<Tx, Ty>(x: &[Tx], arr: &[Ty], n_out: usize) -> Vec<usize>
+where
+    for<'a> &'a [Ty]: ArgMinMax,
+    Tx: Num + FromPrimitive + AsPrimitive<f64>,
+    Ty: Copy + PartialOrd,
+{
+    assert_eq!(n_out % 2, 0);
+    let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 2);
+    min_max_generic_with_x(arr, bin_idx_iterator, n_out, |arr| arr.argminmax())
+}
+
+// ----------- WITHOUT X
+
+pub fn min_max_without_x<T: Copy + PartialOrd>(arr: &[T], n_out: usize) -> Vec<usize>
+where
+    for<'a> &'a [T]: ArgMinMax,
+{
+    assert_eq!(n_out % 2, 0);
+    min_max_generic(arr, n_out, |arr| arr.argminmax())
+}
+
+// ------------------------------------- PARALLEL --------------------------------------
+
+// ----------- WITH X
+
+pub fn min_max_with_x_parallel<Tx, Ty>(
+    x: &[Tx],
+    arr: &[Ty],
+    n_out: usize,
+    n_threads: usize,
+) -> Vec<usize>
+where
+    for<'a> &'a [Ty]: ArgMinMax,
+    Tx: Num + FromPrimitive + AsPrimitive<f64> + Send + Sync,
+    Ty: Copy + PartialOrd + Send + Sync,
+{
+    assert_eq!(n_out % 2, 0);
+    let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 2, n_threads);
+    min_max_generic_with_x_parallel(arr, bin_idx_iterator, n_out, n_threads, |arr| {
+        arr.argminmax()
+    })
+}
+
+// ----------- WITHOUT X
+
+pub fn min_max_without_x_parallel<T: Copy + PartialOrd + Send + Sync>(
+    arr: &[T],
+    n_out: usize,
+    n_threads: usize,
+) -> Vec<usize>
+where
+    for<'a> &'a [T]: ArgMinMax,
+{
+    assert_eq!(n_out % 2, 0);
+    min_max_generic_parallel(arr, n_out, n_threads, |arr| arr.argminmax())
+}
+// ----------------- GENERICS
+//
+// --------------------- WITHOUT X
+
+#[inline(always)]
+pub(crate) fn min_max_generic<T: Copy>(
+    arr: &[T],
+    n_out: usize,
+    f_argminmax: fn(&[T]) -> (usize, usize),
+) -> Vec<usize> {
+    // Assumes n_out is a multiple of 2
+    if n_out >= arr.len() {
+        return (0..arr.len()).collect::<Vec<usize>>();
+    }
+
+    // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1)
+    let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 2) as f64;
+
+    let mut sampled_indices = vec![usize::default(); n_out];
+
+    let mut start_idx: usize = 0;
+    for i in 0..n_out / 2 {
+        // Decided to use multiplication instead of adding to the accumulator (end)
+        // as multiplication seems to be less prone to rounding errors.
+        let end: f64 = block_size * (i + 1) as f64;
+        let end_idx: usize = end as usize + 1;
+
+        let (min_index, max_index) = f_argminmax(&arr[start_idx..end_idx]);
+
+        // Add the indexes in sorted order
+        if min_index < max_index {
+            sampled_indices[2 * i] = min_index + start_idx;
+            sampled_indices[2 * i + 1] = max_index + start_idx;
+        } else {
+            sampled_indices[2 * i] = max_index + start_idx;
+            sampled_indices[2 * i + 1] = min_index + start_idx;
+        }
+
+        start_idx = end_idx;
+    }
+
+    sampled_indices
+}
+
+#[inline(always)]
+pub(crate) fn min_max_generic_parallel<T: Copy + PartialOrd + Send + Sync>(
+    arr: &[T],
+    n_out: usize,
+    n_threads: usize,
+    f_argminmax: fn(&[T]) -> (usize, usize),
+) -> Vec<usize> {
+    // Assumes n_out is a multiple of 2
+    if n_out >= arr.len() {
+        return (0..arr.len()).collect::<Vec<usize>>();
+    }
+
+    // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1)
+    let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 2) as f64;
+
+    // Store the enumerated indexes in the output array
+    // let mut sampled_indices: Array1<usize> = Array1::from_vec((0..n_out).collect::<Vec<usize>>());
+    let mut sampled_indices: Vec<usize> = (0..n_out).collect::<Vec<usize>>();
+
+    // to limit the amounts of threads Rayon uses, an explicit threadpool needs to be created
+    // in which the required code is "installed". This limits the amount of used threads.
+    // https://docs.rs/rayon/latest/rayon/struct.ThreadPool.html#method.install
+    let pool = rayon::ThreadPoolBuilder::new()
+        .num_threads(n_threads)
+        .build();
+
+    let func = || {
+        for chunk in sampled_indices.chunks_exact_mut(2) {
+            let i: f64 = unsafe { *chunk.get_unchecked(0) >> 1 } as f64;
+            let start_idx: usize = (block_size * i) as usize + (i != 0.0) as usize;
+            let end_idx: usize = (block_size * (i + 1.0)) as usize + 1;
+
+            let (min_index, max_index) = f_argminmax(&arr[start_idx..end_idx]);
+
+            // Add the indexes in sorted order
+            if min_index < max_index {
+                chunk[0] = min_index + start_idx;
+                chunk[1] = max_index + start_idx;
+            } else {
+                chunk[0] = max_index + start_idx;
+                chunk[1] = min_index + start_idx;
+            }
+        }
+    };
+
+    pool.unwrap().install(func); // allow panic if pool could not be created
+
+    sampled_indices.to_vec()
+}
+
+// --------------------- WITH X
+
+#[inline(always)]
+pub(crate) fn min_max_generic_with_x<T: Copy>(
+    arr: &[T],
+    bin_idx_iterator: impl Iterator<Item = Option<(usize, usize)>>,
+    n_out: usize,
+    f_argminmax: fn(&[T]) -> (usize, usize),
+) -> Vec<usize> {
+    // Assumes n_out is a multiple of 2
+    if n_out >= arr.len() {
+        return (0..arr.len()).collect::<Vec<usize>>();
+    }
+
+    let mut sampled_indices: Vec<usize> = Vec::with_capacity(n_out);
+
+    bin_idx_iterator.for_each(|bin| {
+        if let Some((start, end)) = bin {
+            if end <= start + 2 {
+                // If the bin has <= 2 elements, just add them all
+                for i in start..end {
+                    sampled_indices.push(i);
+                }
+            } else {
+                // If the bin has at least two elements, add the argmin and argmax
+                let step = &arr[start..end];
+                let (min_index, max_index) = f_argminmax(step);
+
+                // Add the indexes in sorted order
+                if min_index < max_index {
+                    sampled_indices.push(min_index + start);
+                    sampled_indices.push(max_index + start);
+                } else {
+                    sampled_indices.push(max_index + start);
+                    sampled_indices.push(min_index + start);
+                }
+            }
+        }
+    });
+
+    sampled_indices
+}
+
+#[inline(always)]
+pub(crate) fn min_max_generic_with_x_parallel<T: Copy + Send + Sync>(
+    arr: &[T],
+    bin_idx_iterator: impl IndexedParallelIterator<Item = impl Iterator<Item = Option<(usize, usize)>>>,
+    n_out: usize,
+    n_threads: usize,
+    f_argminmax: fn(&[T]) -> (usize, usize),
+) -> Vec<usize> {
+    // Assumes n_out is a multiple of 2
+    if n_out >= arr.len() {
+        return (0..arr.len()).collect::<Vec<usize>>();
+    }
+
+    // to limit the amounts of threads Rayon uses, an explicit threadpool needs to be created
+    // in which the required code is "installed". This limits the amount of used threads.
+    // https://docs.rs/rayon/latest/rayon/struct.ThreadPool.html#method.install
+    let pool = rayon::ThreadPoolBuilder::new()
+        .num_threads(n_threads)
+        .build();
+
+    let iter_func = || {
+        bin_idx_iterator
+            .flat_map(|bin_idx_iterator| {
+                bin_idx_iterator
+                    .map(|bin| {
+                        match bin {
+                            Some((start, end)) => {
+                                if end <= start + 2 {
+                                    // If the bin has <= 2 elements, just return them all
+                                    return (start..end).collect::<Vec<usize>>();
+                                }
+
+                                // If the bin has at least two elements, return the argmin and argmax
+                                let step = &arr[start..end];
+                                let (min_index, max_index) = f_argminmax(step);
+
+                                // Return the indexes in sorted order
+                                if min_index < max_index {
+                                    vec![min_index + start, max_index + start]
+                                } else {
+                                    vec![max_index + start, min_index + start]
+                                }
+                            } // If the bin is empty, return empty Vec
+                            None => {
+                                vec![]
+                            }
+                        }
+                    })
+                    .collect::<Vec<Vec<usize>>>()
+            })
+            .flatten()
+            .collect::<Vec<usize>>()
+    };
+
+    pool.unwrap().install(iter_func) // allow panic if pool could not be created
+}
+
+#[cfg(test)]
+mod tests {
+    use num_traits::AsPrimitive;
+    use rstest::rstest;
+    use rstest_reuse::{self, *};
+
+    use super::{min_max_with_x, min_max_without_x};
+    use super::{min_max_with_x_parallel, min_max_without_x_parallel};
+
+    use dev_utils::utils;
+
+    fn get_array_f32(n: usize) -> Vec<f32> {
+        utils::get_random_array(n, f32::MIN, f32::MAX)
+    }
+
+    // Template for the n_threads matrix
+    #[template]
+    #[rstest]
+    #[case(1)]
+    #[case(utils::get_all_threads() / 2)]
+    #[case(utils::get_all_threads())]
+    #[case(utils::get_all_threads() * 2)]
+    fn threads(#[case] n_threads: usize) {}
+
+    #[test]
+    fn test_min_max_scalar_without_x_correct() {
+        let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
+
+        let sampled_indices = min_max_without_x(&arr, 10);
+        let sampled_values = sampled_indices
+            .iter()
+            .map(|x| arr[*x])
+            .collect::<Vec<f32>>();
+
+        let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99];
+        let expected_values = expected_indices
+            .iter()
+            .map(|x| *x as f32)
+            .collect::<Vec<f32>>();
+
+        assert_eq!(sampled_indices, expected_indices);
+        assert_eq!(sampled_values, expected_values);
+    }
+
+    #[apply(threads)]
+    fn test_min_max_scalar_without_x_parallel_correct(n_threads: usize) {
+        let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
+
+        let sampled_indices = min_max_without_x_parallel(&arr, 10, n_threads);
+        let sampled_values = sampled_indices
+            .iter()
+            .map(|x| arr[*x])
+            .collect::<Vec<f32>>();
+
+        let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99];
+        let expected_values = expected_indices
+            .iter()
+            .map(|x| *x as f32)
+            .collect::<Vec<f32>>();
+
+        assert_eq!(sampled_indices, expected_indices);
+        assert_eq!(sampled_values, expected_values);
+    }
+
+    #[test]
+    fn test_min_max_scalar_with_x_correct() {
+        let x: [i32; 100] = core::array::from_fn(|i| i.as_());
+        let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
+
+        let sampled_indices = min_max_with_x(&x, &arr, 10);
+        let sampled_values = sampled_indices
+            .iter()
+            .map(|x| arr[*x])
+            .collect::<Vec<f32>>();
+
+        let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99];
+        let expected_values = expected_indices
+            .iter()
+            .map(|x| *x as f32)
+            .collect::<Vec<f32>>();
+
+        assert_eq!(sampled_indices, expected_indices);
+        assert_eq!(sampled_values, expected_values);
+    }
+
+    #[apply(threads)]
+    fn test_min_max_scalar_with_x_parallel_correct(n_threads: usize) {
+        let x: [i32; 100] = core::array::from_fn(|i| i.as_());
+        let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
+
+        let sampled_indices = min_max_with_x_parallel(&x, &arr, 10, n_threads);
+        let sampled_values = sampled_indices
+            .iter()
+            .map(|x| arr[*x])
+            .collect::<Vec<f32>>();
+
+        let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99];
+        let expected_values = expected_indices
+            .iter()
+            .map(|x| *x as f32)
+            .collect::<Vec<f32>>();
+
+        assert_eq!(sampled_indices, expected_indices);
+        assert_eq!(sampled_values, expected_values);
+    }
+
+    #[test]
+    fn test_min_max_scalar_with_x_gap() {
+        // We will create a gap in the middle of the array
+        // Increment the second half of the array by 50
+        let x: [i32; 100] = core::array::from_fn(|i| if i > 50 { (i + 50).as_() } else { i.as_() });
+        let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
+
+        let sampled_indices = min_max_with_x(&x, &arr, 10);
+        assert_eq!(sampled_indices.len(), 8); // One full gap
+        let expected_indices = vec![0, 29, 30, 50, 51, 69, 70, 99];
+        assert_eq!(sampled_indices, expected_indices);
+
+        // Increment the second half of the array by 50 again
+        let x = x.map(|i| if i > 101 { i + 50 } else { i });
+
+        let sampled_indices = min_max_with_x(&x, &arr, 10);
+        assert_eq!(sampled_indices.len(), 9); // Gap with 1 value
+        let expected_indices = vec![0, 39, 40, 50, 51, 52, 59, 60, 99];
+        assert_eq!(sampled_indices, expected_indices);
+    }
+
+    #[apply(threads)]
+    fn test_min_max_scalar_with_x_parallel_gap(n_threads: usize) {
+        // Create a gap in the middle of the array
+        // Increment the second half of the array by 50
+        let x: [i32; 100] = core::array::from_fn(|i| if i > 50 { (i + 50).as_() } else { i.as_() });
+        let arr: [f32; 100] = core::array::from_fn(|i| i.as_());
+
+        let sampled_indices = min_max_with_x_parallel(&x, &arr, 10, n_threads);
+        assert_eq!(sampled_indices.len(), 8); // One full gap
+        let expected_indices = vec![0, 29, 30, 50, 51, 69, 70, 99];
+        assert_eq!(sampled_indices, expected_indices);
+
+        // Increment the second half of the array by 50 again
+        let x = x.map(|i| if i > 101 { i + 50 } else { i });
+
+        let sampled_indices = min_max_with_x_parallel(&x, &arr, 10, n_threads);
+        assert_eq!(sampled_indices.len(), 9); // Gap with 1 value
+        let expected_indices = vec![0, 39, 40, 50, 51, 52, 59, 60, 99];
+        assert_eq!(sampled_indices, expected_indices);
+    }
+
+    #[apply(threads)]
+    fn test_many_random_runs_same_output(n_threads: usize) {
+        const N: usize = 20_003;
+        const N_OUT: usize = 202;
+        let x: [i32; N] = core::array::from_fn(|i| i.as_());
+        for _ in 0..100 {
+            let mut arr = get_array_f32(N);
+            arr[N - 1] = f32::INFINITY; // Make sure the last value is always the max
+            let idxs1 = min_max_without_x(arr.as_slice(), N_OUT);
+            let idxs2 = min_max_without_x_parallel(arr.as_slice(), N_OUT, n_threads);
+            let idxs3 = min_max_with_x(&x, arr.as_slice(), N_OUT);
+            let idxs4 = min_max_with_x_parallel(&x, arr.as_slice(), N_OUT, n_threads);
+            assert_eq!(idxs1, idxs2);
+            assert_eq!(idxs1, idxs3);
+            assert_eq!(idxs1, idxs4);
+        }
+    }
+}
diff --git a/downsample_rs/src/minmax/generic.rs b/downsample_rs/src/minmax/generic.rs
deleted file mode 100644
index 4416fe1..0000000
--- a/downsample_rs/src/minmax/generic.rs
+++ /dev/null
@@ -1,209 +0,0 @@
-use ndarray::Zip;
-use ndarray::{Array1, ArrayView1};
-
-use rayon::iter::IndexedParallelIterator;
-use rayon::prelude::*;
-
-// --------------------- WITHOUT X
-
-#[inline(always)]
-pub(crate) fn min_max_generic<T: Copy>(
-    arr: ArrayView1<T>,
-    n_out: usize,
-    f_argminmax: fn(ArrayView1<T>) -> (usize, usize),
-) -> Array1<usize> {
-    // Assumes n_out is a multiple of 2
-    if n_out >= arr.len() {
-        return Array1::from((0..arr.len()).collect::<Vec<usize>>());
-    }
-
-    // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1)
-    let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 2) as f64;
-    let arr_ptr = arr.as_ptr();
-
-    let mut sampled_indices: Array1<usize> = Array1::<usize>::default(n_out);
-
-    let mut start_idx: usize = 0;
-    for i in 0..n_out / 2 {
-        // Decided to use multiplication instead of adding to the accumulator (end)
-        // as multiplication seems to be less prone to rounding errors.
-        let end: f64 = block_size * (i + 1) as f64;
-        let end_idx: usize = end as usize + 1;
-
-        let (min_index, max_index) = f_argminmax(unsafe {
-            ArrayView1::from_shape_ptr((end_idx - start_idx,), arr_ptr.add(start_idx))
-        });
-
-        // Add the indexes in sorted order
-        if min_index < max_index {
-            sampled_indices[2 * i] = min_index + start_idx;
-            sampled_indices[2 * i + 1] = max_index + start_idx;
-        } else {
-            sampled_indices[2 * i] = max_index + start_idx;
-            sampled_indices[2 * i + 1] = min_index + start_idx;
-        }
-
-        start_idx = end_idx;
-    }
-
-    sampled_indices
-}
-
-#[inline(always)]
-pub(crate) fn min_max_generic_parallel<T: Copy + PartialOrd + Send + Sync>(
-    arr: ArrayView1<T>,
-    n_out: usize,
-    n_threads: usize,
-    f_argminmax: fn(ArrayView1<T>) -> (usize, usize),
-) -> Array1<usize> {
-    // Assumes n_out is a multiple of 2
-    if n_out >= arr.len() {
-        return Array1::from((0..arr.len()).collect::<Vec<usize>>());
-    }
-
-    // arr.len() - 1 is used to match the delta of a range-index (0..arr.len()-1)
-    let block_size: f64 = (arr.len() - 1) as f64 / (n_out / 2) as f64;
-
-    // Store the enumerated indexes in the output array
-    let mut sampled_indices: Array1<usize> = Array1::from_vec((0..n_out).collect::<Vec<usize>>());
-
-    // to limit the amounts of threads Rayon uses, an explicit threadpool needs to be created
-    // in which the required code is "installed". This limits the amount of used threads.
-    // https://docs.rs/rayon/latest/rayon/struct.ThreadPool.html#method.install
-    let pool = rayon::ThreadPoolBuilder::new()
-        .num_threads(n_threads)
-        .build();
-
-    let zip_func = || {
-        Zip::from(sampled_indices.exact_chunks_mut(2)).par_for_each(|mut sampled_index| {
-            let i: f64 = unsafe { *sampled_index.uget(0) >> 1 } as f64;
-            let start_idx: usize = (block_size * i) as usize + (i != 0.0) as usize;
-            let end_idx: usize = (block_size * (i + 1.0)) as usize + 1;
-
-            let (min_index, max_index) = f_argminmax(unsafe {
-                ArrayView1::from_shape_ptr((end_idx - start_idx,), arr.as_ptr().add(start_idx))
-            });
-
-            // Add the indexes in sorted order
-            if min_index < max_index {
-                sampled_index[0] = min_index + start_idx;
-                sampled_index[1] = max_index + start_idx;
-            } else {
-                sampled_index[0] = max_index + start_idx;
-                sampled_index[1] = min_index + start_idx;
-            }
-        });
-    };
-
-    pool.unwrap().install(zip_func); // allow panic if pool could not be created
-
-    sampled_indices
-}
-
-// --------------------- WITH X
-
-#[inline(always)]
-pub(crate) fn min_max_generic_with_x<T: Copy>(
-    arr: ArrayView1<T>,
-    bin_idx_iterator: impl Iterator<Item = Option<(usize, usize)>>,
-    n_out: usize,
-    f_argminmax: fn(ArrayView1<T>) -> (usize, usize),
-) -> Array1<usize> {
-    // Assumes n_out is a multiple of 2
-    if n_out >= arr.len() {
-        return Array1::from((0..arr.len()).collect::<Vec<usize>>());
-    }
-
-    let ptr = arr.as_ptr();
-    let mut sampled_indices: Vec<usize> = Vec::with_capacity(n_out);
-
-    bin_idx_iterator.for_each(|bin| {
-        if let Some((start, end)) = bin {
-            if end <= start + 2 {
-                // If the bin has <= 2 elements, just add them all
-                for i in start..end {
-                    sampled_indices.push(i);
-                }
-            } else {
-                // If the bin has at least two elements, add the argmin and argmax
-                let step = unsafe { ArrayView1::from_shape_ptr(end - start, ptr.add(start)) };
-                let (min_index, max_index) = f_argminmax(step);
-
-                // Add the indexes in sorted order
-                if min_index < max_index {
-                    sampled_indices.push(min_index + start);
-                    sampled_indices.push(max_index + start);
-                } else {
-                    sampled_indices.push(max_index + start);
-                    sampled_indices.push(min_index + start);
-                }
-            }
-        }
-    });
-
-    Array1::from_vec(sampled_indices)
-}
-
-#[inline(always)]
-pub(crate) fn min_max_generic_with_x_parallel<T: Copy + Send + Sync>(
-    arr: ArrayView1<T>,
-    bin_idx_iterator: impl IndexedParallelIterator<Item = impl Iterator<Item = Option<(usize, usize)>>>,
-    n_out: usize,
-    n_threads: usize,
-    f_argminmax: fn(ArrayView1<T>) -> (usize, usize),
-) -> Array1<usize> {
-    // Assumes n_out is a multiple of 2
-    if n_out >= arr.len() {
-        return Array1::from((0..arr.len()).collect::<Vec<usize>>());
-    }
-
-    // to limit the amounts of threads Rayon uses, an explicit threadpool needs to be created
-    // in which the required code is "installed". This limits the amount of used threads.
-    // https://docs.rs/rayon/latest/rayon/struct.ThreadPool.html#method.install
-    let pool = rayon::ThreadPoolBuilder::new()
-        .num_threads(n_threads)
-        .build();
-
-    let iter_func = || {
-        Array1::from_vec(
-            bin_idx_iterator
-                .flat_map(|bin_idx_iterator| {
-                    bin_idx_iterator
-                        .map(|bin| {
-                            match bin {
-                                Some((start, end)) => {
-                                    if end <= start + 2 {
-                                        // If the bin has <= 2 elements, just return them all
-                                        return (start..end).collect::<Vec<usize>>();
-                                    }
-
-                                    // If the bin has at least two elements, return the argmin and argmax
-                                    let step = unsafe {
-                                        ArrayView1::from_shape_ptr(
-                                            end - start,
-                                            arr.as_ptr().add(start),
-                                        )
-                                    };
-                                    let (min_index, max_index) = f_argminmax(step);
-
-                                    // Return the indexes in sorted order
-                                    if min_index < max_index {
-                                        vec![min_index + start, max_index + start]
-                                    } else {
-                                        vec![max_index + start, min_index + start]
-                                    }
-                                } // If the bin is empty, return empty Vec
-                                None => {
-                                    vec![]
-                                }
-                            }
-                        })
-                        .collect::<Vec<Vec<usize>>>()
-                })
-                .flatten()
-                .collect::<Vec<usize>>(),
-        )
-    };
-
-    pool.unwrap().install(iter_func) // allow panic if pool could not be created
-}
diff --git a/downsample_rs/src/minmax/mod.rs b/downsample_rs/src/minmax/mod.rs
deleted file mode 100644
index 3628374..0000000
--- a/downsample_rs/src/minmax/mod.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-pub mod scalar;
-pub use scalar::*;
-pub mod simd;
-pub use simd::*;
-mod generic; // private module
diff --git a/downsample_rs/src/minmax/scalar.rs b/downsample_rs/src/minmax/scalar.rs
deleted file mode 100644
index 9d0c114..0000000
--- a/downsample_rs/src/minmax/scalar.rs
+++ /dev/null
@@ -1,263 +0,0 @@
-use argminmax::{ScalarArgMinMax, SCALAR};
-use num_traits::{AsPrimitive, FromPrimitive};
-
-use ndarray::{Array1, ArrayView1};
-
-use super::super::searchsorted::{
-    get_equidistant_bin_idx_iterator, get_equidistant_bin_idx_iterator_parallel,
-};
-use super::super::types::Num;
-use super::generic::{min_max_generic, min_max_generic_parallel};
-use super::generic::{min_max_generic_with_x, min_max_generic_with_x_parallel};
-
-// ----------------------------------- NON-PARALLEL ------------------------------------
-
-// ----------- WITH X
-
-pub fn min_max_scalar_with_x<Tx, Ty>(
-    x: ArrayView1<Tx>,
-    arr: ArrayView1<Ty>,
-    n_out: usize,
-) -> Array1<usize>
-where
-    SCALAR: ScalarArgMinMax<Ty>,
-    Tx: Num + FromPrimitive + AsPrimitive<f64>,
-    Ty: Copy + PartialOrd,
-{
-    assert_eq!(n_out % 2, 0);
-    let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 2);
-    min_max_generic_with_x(arr, bin_idx_iterator, n_out, SCALAR::argminmax)
-}
-
-// ----------- WITHOUT X
-
-pub fn min_max_scalar_without_x<T: Copy + PartialOrd>(
-    arr: ArrayView1<T>,
-    n_out: usize,
-) -> Array1<usize>
-where
-    SCALAR: ScalarArgMinMax<T>,
-{
-    assert_eq!(n_out % 2, 0);
-    min_max_generic(arr, n_out, SCALAR::argminmax)
-}
-
-// ------------------------------------- PARALLEL --------------------------------------
-
-// ----------- WITH X
-
-pub fn min_max_scalar_with_x_parallel<Tx, Ty>(
-    x: ArrayView1<Tx>,
-    arr: ArrayView1<Ty>,
-    n_out: usize,
-    n_threads: usize,
-) -> Array1<usize>
-where
-    SCALAR: ScalarArgMinMax<Ty>,
-    Tx: Num + FromPrimitive + AsPrimitive<f64> + Send + Sync,
-    Ty: Copy + PartialOrd + Send + Sync,
-{
-    assert_eq!(n_out % 2, 0);
-    let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 2, n_threads);
-    min_max_generic_with_x_parallel(arr, bin_idx_iterator, n_out, n_threads, SCALAR::argminmax)
-}
-
-// ----------- WITHOUT X
-
-pub fn min_max_scalar_without_x_parallel<T: Copy + PartialOrd + Send + Sync>(
-    arr: ArrayView1<T>,
-    n_out: usize,
-    n_threads: usize,
-) -> Array1<usize>
-where
-    SCALAR: ScalarArgMinMax<T>,
-{
-    assert_eq!(n_out % 2, 0);
-    min_max_generic_parallel(arr, n_out, n_threads, SCALAR::argminmax)
-}
-
-// --------------------------------------- TESTS ---------------------------------------
-
-#[cfg(test)]
-mod tests {
-    use rstest::rstest;
-    use rstest_reuse::{self, *};
-
-    use super::{min_max_scalar_with_x, min_max_scalar_without_x};
-    use super::{min_max_scalar_with_x_parallel, min_max_scalar_without_x_parallel};
-    use ndarray::Array1;
-
-    use dev_utils::utils;
-
-    fn get_array_f32(n: usize) -> Array1<f32> {
-        utils::get_random_array(n, f32::MIN, f32::MAX)
-    }
-
-    // Template for the n_threads matrix
-    #[template]
-    #[rstest]
-    #[case(1)]
-    #[case(utils::get_all_threads() / 2)]
-    #[case(utils::get_all_threads())]
-    #[case(utils::get_all_threads() * 2)]
-    fn threads(#[case] n_threads: usize) {}
-
-    #[test]
-    fn test_min_max_scalar_without_x_correct() {
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = min_max_scalar_without_x(arr.view(), 10);
-        let sampled_values = sampled_indices.mapv(|x| arr[x]);
-
-        let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99];
-        let expected_values = expected_indices
-            .iter()
-            .map(|x| *x as f32)
-            .collect::<Vec<f32>>();
-
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-        assert_eq!(sampled_values, Array1::from(expected_values));
-    }
-
-    #[apply(threads)]
-    fn test_min_max_scalar_without_x_parallel_correct(n_threads: usize) {
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = min_max_scalar_without_x_parallel(arr.view(), 10, n_threads);
-        let sampled_values = sampled_indices.mapv(|x| arr[x]);
-
-        let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99];
-        let expected_values = expected_indices
-            .iter()
-            .map(|x| *x as f32)
-            .collect::<Vec<f32>>();
-
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-        assert_eq!(sampled_values, Array1::from(expected_values));
-    }
-
-    #[test]
-    fn test_min_max_scalar_with_x_correct() {
-        let x = (0..100).collect::<Vec<i32>>();
-        let x = Array1::from(x);
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = min_max_scalar_with_x(x.view(), arr.view(), 10);
-        let sampled_values = sampled_indices.mapv(|x| arr[x]);
-
-        let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99];
-        let expected_values = expected_indices
-            .iter()
-            .map(|x| *x as f32)
-            .collect::<Vec<f32>>();
-
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-        assert_eq!(sampled_values, Array1::from(expected_values));
-    }
-
-    #[apply(threads)]
-    fn test_min_max_scalar_with_x_parallel_correct(n_threads: usize) {
-        let x = (0..100).collect::<Vec<i32>>();
-        let x = Array1::from(x);
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = min_max_scalar_with_x_parallel(x.view(), arr.view(), 10, n_threads);
-        let sampled_values = sampled_indices.mapv(|x| arr[x]);
-
-        let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99];
-        let expected_values = expected_indices
-            .iter()
-            .map(|x| *x as f32)
-            .collect::<Vec<f32>>();
-
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-        assert_eq!(sampled_values, Array1::from(expected_values));
-    }
-
-    #[test]
-    fn test_min_max_scalar_with_x_gap() {
-        // We will create a gap in the middle of the array
-        let x = (0..100).collect::<Vec<i32>>();
-
-        // Increment the second half of the array by 50
-        let x = x
-            .iter()
-            .map(|x| if *x > 50 { *x + 50 } else { *x })
-            .collect::<Vec<i32>>();
-        let x = Array1::from(x);
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = min_max_scalar_with_x(x.view(), arr.view(), 10);
-        assert_eq!(sampled_indices.len(), 8); // One full gap
-        let expected_indices = vec![0, 29, 30, 50, 51, 69, 70, 99];
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-
-        // Increment the second half of the array by 50 again
-        let x = x
-            .iter()
-            .map(|x| if *x > 101 { *x + 50 } else { *x })
-            .collect::<Vec<i32>>();
-        let x = Array1::from(x);
-
-        let sampled_indices = min_max_scalar_with_x(x.view(), arr.view(), 10);
-        assert_eq!(sampled_indices.len(), 9); // Gap with 1 value
-        let expected_indices = vec![0, 39, 40, 50, 51, 52, 59, 60, 99];
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-    }
-
-    #[apply(threads)]
-    fn test_min_max_scalar_with_x_parallel_gap(n_threads: usize) {
-        // Create a gap in the middle of the array
-        let x = (0..100).collect::<Vec<i32>>();
-
-        // Increment the second half of the array by 50
-        let x = x
-            .iter()
-            .map(|x| if *x > 50 { *x + 50 } else { *x })
-            .collect::<Vec<i32>>();
-        let x = Array1::from(x);
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = min_max_scalar_with_x_parallel(x.view(), arr.view(), 10, n_threads);
-        assert_eq!(sampled_indices.len(), 8); // One full gap
-        let expected_indices = vec![0, 29, 30, 50, 51, 69, 70, 99];
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-
-        // Increment the second half of the array by 50 again
-        let x = x
-            .iter()
-            .map(|x| if *x > 101 { *x + 50 } else { *x })
-            .collect::<Vec<i32>>();
-        let x = Array1::from(x);
-
-        let sampled_indices = min_max_scalar_with_x_parallel(x.view(), arr.view(), 10, n_threads);
-        assert_eq!(sampled_indices.len(), 9); // Gap with 1 value
-        let expected_indices = vec![0, 39, 40, 50, 51, 52, 59, 60, 99];
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-    }
-
-    #[apply(threads)]
-    fn test_many_random_runs_same_output(n_threads: usize) {
-        let n: usize = 20_003;
-        let n_out = 202;
-        let x = (0..n as i32).collect::<Vec<i32>>();
-        let x = Array1::from(x);
-        for _ in 0..100 {
-            let mut arr = get_array_f32(n);
-            arr[n - 1] = f32::INFINITY; // Make sure the last value is always the max
-            let idxs1 = min_max_scalar_without_x(arr.view(), n_out);
-            let idxs2 = min_max_scalar_without_x_parallel(arr.view(), n_out, n_threads);
-            let idxs3 = min_max_scalar_with_x(x.view(), arr.view(), n_out);
-            let idxs4 = min_max_scalar_with_x_parallel(x.view(), arr.view(), n_out, n_threads);
-            assert_eq!(idxs1, idxs2);
-            assert_eq!(idxs1, idxs3);
-            assert_eq!(idxs1, idxs4);
-        }
-    }
-}
diff --git a/downsample_rs/src/minmax/simd.rs b/downsample_rs/src/minmax/simd.rs
deleted file mode 100644
index 545d092..0000000
--- a/downsample_rs/src/minmax/simd.rs
+++ /dev/null
@@ -1,265 +0,0 @@
-use argminmax::ArgMinMax;
-
-use ndarray::{Array1, ArrayView1};
-use num_traits::{AsPrimitive, FromPrimitive};
-
-use super::super::searchsorted::{
-    get_equidistant_bin_idx_iterator, get_equidistant_bin_idx_iterator_parallel,
-};
-use super::super::types::Num;
-use super::generic::{min_max_generic, min_max_generic_parallel};
-use super::generic::{min_max_generic_with_x, min_max_generic_with_x_parallel};
-
-// ----------------------------------- NON-PARALLEL ------------------------------------
-
-// ----------- WITH X
-
-pub fn min_max_simd_with_x<Tx, Ty>(
-    x: ArrayView1<Tx>,
-    arr: ArrayView1<Ty>,
-    n_out: usize,
-) -> Array1<usize>
-where
-    for<'a> ArrayView1<'a, Ty>: ArgMinMax,
-    Tx: Num + FromPrimitive + AsPrimitive<f64>,
-    Ty: Copy + PartialOrd,
-{
-    assert_eq!(n_out % 2, 0);
-    let bin_idx_iterator = get_equidistant_bin_idx_iterator(x, n_out / 2);
-    min_max_generic_with_x(arr, bin_idx_iterator, n_out, |arr| arr.argminmax())
-}
-
-// ----------- WITHOUT X
-
-pub fn min_max_simd_without_x<T: Copy + PartialOrd>(
-    arr: ArrayView1<T>,
-    n_out: usize,
-) -> Array1<usize>
-where
-    for<'a> ArrayView1<'a, T>: ArgMinMax,
-{
-    assert_eq!(n_out % 2, 0);
-    min_max_generic(arr, n_out, |arr| arr.argminmax())
-}
-
-// ------------------------------------- PARALLEL --------------------------------------
-
-// ----------- WITH X
-
-pub fn min_max_simd_with_x_parallel<Tx, Ty>(
-    x: ArrayView1<Tx>,
-    arr: ArrayView1<Ty>,
-    n_out: usize,
-    n_threads: usize,
-) -> Array1<usize>
-where
-    for<'a> ArrayView1<'a, Ty>: ArgMinMax,
-    Tx: Num + FromPrimitive + AsPrimitive<f64> + Send + Sync,
-    Ty: Copy + PartialOrd + Send + Sync,
-{
-    assert_eq!(n_out % 2, 0);
-    let bin_idx_iterator = get_equidistant_bin_idx_iterator_parallel(x, n_out / 2, n_threads);
-    min_max_generic_with_x_parallel(arr, bin_idx_iterator, n_out, n_threads, |arr| {
-        arr.argminmax()
-    })
-}
-
-// ----------- WITHOUT X
-
-pub fn min_max_simd_without_x_parallel<T: Copy + PartialOrd + Send + Sync>(
-    arr: ArrayView1<T>,
-    n_out: usize,
-    n_threads: usize,
-) -> Array1<usize>
-where
-    for<'a> ArrayView1<'a, T>: ArgMinMax,
-{
-    assert_eq!(n_out % 2, 0);
-    min_max_generic_parallel(arr, n_out, n_threads, |arr| arr.argminmax())
-}
-
-// --------------------------------------- TESTS ---------------------------------------
-
-#[cfg(test)]
-mod tests {
-    use rstest::rstest;
-    use rstest_reuse::{self, *};
-
-    use super::{min_max_simd_with_x, min_max_simd_without_x};
-    use super::{min_max_simd_with_x_parallel, min_max_simd_without_x_parallel};
-    use ndarray::Array1;
-
-    use dev_utils::utils;
-
-    fn get_array_f32(n: usize) -> Array1<f32> {
-        utils::get_random_array(n, f32::MIN, f32::MAX)
-    }
-
-    // Template for the n_threads matrix
-    #[template]
-    #[rstest]
-    #[case(1)]
-    #[case(utils::get_all_threads() / 2)]
-    #[case(utils::get_all_threads())]
-    #[case(utils::get_all_threads() * 2)]
-    fn threads(#[case] n_threads: usize) {}
-
-    #[test]
-    fn test_min_max_simd_without_x_correct() {
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = min_max_simd_without_x(arr.view(), 10);
-        let sampled_values = sampled_indices.mapv(|x| arr[x]);
-
-        let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99];
-        let expected_values = expected_indices
-            .iter()
-            .map(|x| *x as f32)
-            .collect::<Vec<f32>>();
-
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-        assert_eq!(sampled_values, Array1::from(expected_values));
-    }
-
-    #[apply(threads)]
-    fn test_min_max_simd_without_x_parallel_correct(n_threads: usize) {
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = min_max_simd_without_x_parallel(arr.view(), 10, n_threads);
-        let sampled_values = sampled_indices.mapv(|x| arr[x]);
-
-        let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99];
-        let expected_values = expected_indices
-            .iter()
-            .map(|x| *x as f32)
-            .collect::<Vec<f32>>();
-
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-        assert_eq!(sampled_values, Array1::from(expected_values));
-    }
-
-    #[test]
-    fn test_min_max_simd_with_x_correct() {
-        let x = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let x = Array1::from(x);
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = min_max_simd_with_x(x.view(), arr.view(), 10);
-        let sampled_values = sampled_indices.mapv(|x| arr[x]);
-
-        let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99];
-        let expected_values = expected_indices
-            .iter()
-            .map(|x| *x as f32)
-            .collect::<Vec<f32>>();
-
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-        assert_eq!(sampled_values, Array1::from(expected_values));
-    }
-
-    #[apply(threads)]
-    fn test_min_max_simd_with_x_parallel_correct(n_threads: usize) {
-        let x = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let x = Array1::from(x);
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = min_max_simd_with_x_parallel(x.view(), arr.view(), 10, n_threads);
-        let sampled_values = sampled_indices.mapv(|x| arr[x]);
-
-        let expected_indices = vec![0, 19, 20, 39, 40, 59, 60, 79, 80, 99];
-        let expected_values = expected_indices
-            .iter()
-            .map(|x| *x as f32)
-            .collect::<Vec<f32>>();
-
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-        assert_eq!(sampled_values, Array1::from(expected_values));
-    }
-
-    #[test]
-    fn test_min_max_simd_with_x_gap() {
-        // We will create a gap in the middle of the array
-        let x = (0..100).collect::<Vec<i32>>();
-
-        // Increment the second half of the array by 50
-        let x = x
-            .iter()
-            .map(|x| if *x > 50 { *x + 50 } else { *x })
-            .collect::<Vec<i32>>();
-        let x = Array1::from(x);
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = min_max_simd_with_x(x.view(), arr.view(), 10);
-        assert_eq!(sampled_indices.len(), 8); // One full gap
-        let expected_indices = vec![0, 29, 30, 50, 51, 69, 70, 99];
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-
-        // Increment the second half of the array by 50 again
-        let x = x
-            .iter()
-            .map(|x| if *x > 101 { *x + 50 } else { *x })
-            .collect::<Vec<i32>>();
-        let x = Array1::from(x);
-
-        let sampled_indices = min_max_simd_with_x(x.view(), arr.view(), 10);
-        assert_eq!(sampled_indices.len(), 9); // Gap with 1 value
-        let expected_indices = vec![0, 39, 40, 50, 51, 52, 59, 60, 99];
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-    }
-
-    #[apply(threads)]
-    fn test_min_max_simd_with_x_parallel_gap(n_threads: usize) {
-        // Create a gap in the middle of the array
-        let x = (0..100).collect::<Vec<i32>>();
-
-        // Increment the second half of the array by 50
-        let x = x
-            .iter()
-            .map(|x| if *x > 50 { *x + 50 } else { *x })
-            .collect::<Vec<i32>>();
-        let x = Array1::from(x);
-        let arr = (0..100).map(|x| x as f32).collect::<Vec<f32>>();
-        let arr = Array1::from(arr);
-
-        let sampled_indices = min_max_simd_with_x_parallel(x.view(), arr.view(), 10, n_threads);
-        assert_eq!(sampled_indices.len(), 8); // One full gap
-        let expected_indices = vec![0, 29, 30, 50, 51, 69, 70, 99];
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-
-        // Increment the second half of the array by 50 again
-        let x = x
-            .iter()
-            .map(|x| if *x > 101 { *x + 50 } else { *x })
-            .collect::<Vec<i32>>();
-        let x = Array1::from(x);
-
-        let sampled_indices = min_max_simd_with_x_parallel(x.view(), arr.view(), 10, n_threads);
-        assert_eq!(sampled_indices.len(), 9); // Gap with 1 value
-        let expected_indices = vec![0, 39, 40, 50, 51, 52, 59, 60, 99];
-        assert_eq!(sampled_indices, Array1::from(expected_indices));
-    }
-
-    #[apply(threads)]
-    fn test_many_random_runs_same_output(n_threads: usize) {
-        let n = 20_003;
-        let n_out = 202;
-        let x = (0..n).map(|x| x as i32).collect::<Vec<i32>>();
-        let x = Array1::from(x);
-        for _ in 0..100 {
-            let mut arr = get_array_f32(n);
-            arr[n - 1] = f32::INFINITY; // Make sure the last value is always the max
-            let idxs1 = min_max_simd_without_x(arr.view(), n_out);
-            let idxs2 = min_max_simd_without_x_parallel(arr.view(), n_out, n_threads);
-            let idxs3 = min_max_simd_with_x(x.view(), arr.view(), n_out);
-            let idxs4 = min_max_simd_with_x_parallel(x.view(), arr.view(), n_out, n_threads);
-            assert_eq!(idxs1, idxs2);
-            assert_eq!(idxs1, idxs3);
-            assert_eq!(idxs1, idxs4);
-        }
-    }
-}
diff --git a/downsample_rs/src/minmaxlttb.rs b/downsample_rs/src/minmaxlttb.rs
new file mode 100644
index 0000000..9a0b708
--- /dev/null
+++ b/downsample_rs/src/minmaxlttb.rs
@@ -0,0 +1,290 @@
+use argminmax::ArgMinMax;
+
+use super::lttb::{lttb_with_x, lttb_without_x};
+use super::types::Num;
+
+use super::minmax;
+use num_traits::{AsPrimitive, FromPrimitive};
+
+// ----------------------------------- NON-PARALLEL ------------------------------------
+
+// ----------- WITH X
+
+pub fn minmaxlttb_with_x<Tx: Num + AsPrimitive<f64> + FromPrimitive, Ty: Num + AsPrimitive<f64>>(
+    x: &[Tx],
+    y: &[Ty],
+    n_out: usize,
+    minmax_ratio: usize,
+) -> Vec<usize>
+where
+    for<'a> &'a [Ty]: ArgMinMax,
+{
+    minmaxlttb_generic(
+        x,
+        y,
+        n_out,
+        minmax_ratio,
+        None,
+        MinMaxFunctionWithX::Serial(minmax::min_max_with_x),
+    )
+}
+
+// ----------- WITHOUT X
+
+pub fn minmaxlttb_without_x<Ty: Num + AsPrimitive<f64>>(
+    y: &[Ty],
+    n_out: usize,
+    minmax_ratio: usize,
+) -> Vec<usize>
+where
+    for<'a> &'a [Ty]: ArgMinMax,
+{
+    minmaxlttb_generic_without_x(
+        y,
+        n_out,
+        minmax_ratio,
+        None,
+        MinMaxFunctionWithoutX::Serial(minmax::min_max_without_x),
+    )
+}
+
+// ------------------------------------- PARALLEL --------------------------------------
+
+// ----------- WITH X
+
+pub fn minmaxlttb_with_x_parallel<
+    Tx: Num + AsPrimitive<f64> + FromPrimitive + Send + Sync,
+    Ty: Num + AsPrimitive<f64> + Send + Sync,
+>(
+    x: &[Tx],
+    y: &[Ty],
+    n_out: usize,
+    minmax_ratio: usize,
+    n_threads: usize,
+) -> Vec<usize>
+where
+    for<'a> &'a [Ty]: ArgMinMax,
+{
+    minmaxlttb_generic(
+        x,
+        y,
+        n_out,
+        minmax_ratio,
+        Some(n_threads),
+        MinMaxFunctionWithX::Parallel(minmax::min_max_with_x_parallel),
+    )
+}
+
+// ----------- WITHOUT X
+
+pub fn minmaxlttb_without_x_parallel<Ty: Num + AsPrimitive<f64> + Send + Sync>(
+    y: &[Ty],
+    n_out: usize,
+    minmax_ratio: usize,
+    n_threads: usize,
+) -> Vec<usize>
+where
+    for<'a> &'a [Ty]: ArgMinMax,
+{
+    minmaxlttb_generic_without_x(
+        y,
+        n_out,
+        minmax_ratio,
+        Some(n_threads),
+        MinMaxFunctionWithoutX::Parallel(minmax::min_max_without_x_parallel),
+    )
+}
+
+// ----------------------------------- GENERICS ------------------------------------
+
+// types to make function signatures easier to read
+type ThreadCount = usize;
+type OutputCount = usize;
+
+pub enum MinMaxFunctionWithX<Tx: Num + AsPrimitive<f64>, Ty: Num + AsPrimitive<f64>> {
+    Serial(fn(&[Tx], &[Ty], OutputCount) -> Vec<usize>),
+    Parallel(fn(&[Tx], &[Ty], OutputCount, ThreadCount) -> Vec<usize>),
+}
+
+pub enum MinMaxFunctionWithoutX<Ty: Num + AsPrimitive<f64>> {
+    Serial(fn(&[Ty], OutputCount) -> Vec<usize>),
+    Parallel(fn(&[Ty], OutputCount, ThreadCount) -> Vec<usize>),
+}
+
+#[inline(always)]
+pub(crate) fn minmaxlttb_generic<Tx: Num + AsPrimitive<f64>, Ty: Num + AsPrimitive<f64>>(
+    x: &[Tx],
+    y: &[Ty],
+    n_out: usize,
+    minmax_ratio: usize,
+    n_threads: Option<usize>,
+    f_minmax: MinMaxFunctionWithX<Tx, Ty>,
+) -> Vec<usize>
+where
+    for<'a> &'a [Ty]: ArgMinMax,
+{
+    assert_eq!(x.len(), y.len());
+    assert!(minmax_ratio > 1);
+    // Apply first min max aggregation (if above ratio)
+    if x.len() / n_out > minmax_ratio {
+        // Get index of min max points
+        let mut index = match f_minmax {
+            MinMaxFunctionWithX::Serial(func) => func(
+                &x[1..(x.len() - 1)],
+                &y[1..(x.len() - 1)],
+                n_out * minmax_ratio,
+            ),
+            MinMaxFunctionWithX::Parallel(func) => func(
+                &x[1..(x.len() - 1)],
+                &y[1..(x.len() - 1)],
+                n_out * minmax_ratio,
+                n_threads.unwrap(), // n_threads cannot be None
+            ),
+        };
+        // inplace + 1
+        index.iter_mut().for_each(|elem| *elem += 1);
+        // Prepend first and last point
+        index.insert(0, 0);
+        index.push(x.len() - 1);
+        // Get x and y values at index
+        let x = unsafe {
+            index
+                .iter()
+                .map(|i| *x.get_unchecked(*i))
+                .collect::<Vec<Tx>>()
+        };
+        let y = unsafe {
+            index
+                .iter()
+                .map(|i| *y.get_unchecked(*i))
+                .collect::<Vec<Ty>>()
+        };
+        // Apply lttb on the reduced data
+        let index_points_selected = lttb_with_x(x.as_slice(), y.as_slice(), n_out);
+        // Return the original index
+        return index_points_selected
+            .iter()
+            .map(|i| index[*i])
+            .collect::<Vec<usize>>();
+    }
+    // Apply lttb on all data when requirement is not met
+    lttb_with_x(x, y, n_out)
+}
+
+#[inline(always)]
+pub(crate) fn minmaxlttb_generic_without_x<Ty: Num + AsPrimitive<f64>>(
+    y: &[Ty],
+    n_out: usize,
+    minmax_ratio: usize,
+    n_threads: Option<usize>,
+    f_minmax: MinMaxFunctionWithoutX<Ty>,
+) -> Vec<usize>
+where
+    for<'a> &'a [Ty]: ArgMinMax,
+{
+    assert!(minmax_ratio > 1);
+    // Apply first min max aggregation (if above ratio)
+    if y.len() / n_out > minmax_ratio {
+        // Get index of min max points
+        let mut index = match f_minmax {
+            MinMaxFunctionWithoutX::Serial(func) => {
+                func(&y[1..(y.len() - 1)], n_out * minmax_ratio)
+            }
+            MinMaxFunctionWithoutX::Parallel(func) => func(
+                &y[1..(y.len() - 1)],
+                n_out * minmax_ratio,
+                n_threads.unwrap(), // n_threads cannot be None
+            ),
+        };
+        // inplace + 1
+        index.iter_mut().for_each(|elem| *elem += 1);
+        // Prepend first and last point
+        index.insert(0, 0);
+        index.push(y.len() - 1);
+        // Get y values at index
+        let y = unsafe {
+            index
+                .iter()
+                .map(|i| *y.get_unchecked(*i))
+                .collect::<Vec<Ty>>()
+        };
+        // Apply lttb on the reduced data
+        let index_points_selected = lttb_without_x(y.as_slice(), n_out);
+        // Return the original index
+        return index_points_selected
+            .iter()
+            .map(|i| index[*i])
+            .collect::<Vec<usize>>();
+    }
+    // Apply lttb on all data when requirement is not met
+    lttb_without_x(y, n_out).to_vec()
+}
+
+#[cfg(test)]
+mod tests {
+    use rstest::rstest;
+    use rstest_reuse::{self, *};
+
+    use super::{minmaxlttb_with_x, minmaxlttb_without_x};
+    use super::{minmaxlttb_with_x_parallel, minmaxlttb_without_x_parallel};
+
+    use dev_utils::utils;
+
+    fn get_array_f32(n: usize) -> Vec<f32> {
+        utils::get_random_array(n, f32::MIN, f32::MAX)
+    }
+
+    // Template for the n_threads matrix
+    #[template]
+    #[rstest]
+    #[case(1)]
+    #[case(utils::get_all_threads() / 2)]
+    #[case(utils::get_all_threads())]
+    #[case(utils::get_all_threads() * 2)]
+    fn threads(#[case] n_threads: usize) {}
+
+    #[test]
+    fn test_minmaxlttb_with_x() {
+        let x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
+        let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
+        let sampled_indices = minmaxlttb_with_x(&x, &y, 4, 2);
+        assert_eq!(sampled_indices, vec![0, 1, 5, 9]);
+    }
+
+    #[test]
+    fn test_minmaxlttb_without_x() {
+        let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
+        let sampled_indices = minmaxlttb_without_x(&y, 4, 2);
+        assert_eq!(sampled_indices, vec![0, 1, 5, 9]);
+    }
+
+    #[apply(threads)]
+    fn test_minmaxlttb_with_x_parallel(n_threads: usize) {
+        let x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
+        let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
+        let sampled_indices = minmaxlttb_with_x_parallel(&x, &y, 4, 2, n_threads);
+        assert_eq!(sampled_indices, vec![0, 1, 5, 9]);
+    }
+
+    #[apply(threads)]
+    fn test_minmaxlttb_without_x_parallel(n_threads: usize) {
+        let y = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
+        let sampled_indices = minmaxlttb_without_x_parallel(&y, 4, 2, n_threads);
+        assert_eq!(sampled_indices, vec![0, 1, 5, 9]);
+    }
+
+    #[apply(threads)]
+    fn test_many_random_runs_same_output(n_threads: usize) {
+        const N: usize = 20_000;
+        const N_OUT: usize = 100;
+        const MINMAX_RATIO: usize = 5;
+        for _ in 0..100 {
+            // TODO: test with x
+            let arr = get_array_f32(N);
+            let idxs1 = minmaxlttb_without_x(arr.as_slice(), N_OUT, MINMAX_RATIO);
+            let idxs2 =
+                minmaxlttb_without_x_parallel(arr.as_slice(), N_OUT, MINMAX_RATIO, n_threads);
+            assert_eq!(idxs1, idxs2);
+        }
+    }
+}
diff --git a/downsample_rs/src/minmaxlttb/generic.rs b/downsample_rs/src/minmaxlttb/generic.rs
deleted file mode 100644
index 09b0007..0000000
--- a/downsample_rs/src/minmaxlttb/generic.rs
+++ /dev/null
@@ -1,108 +0,0 @@
-use ndarray::{s, Array1, ArrayView1};
-
-use super::super::helpers::Average;
-use super::super::lttb::{lttb_with_x, lttb_without_x};
-use super::super::types::Num;
-use num_traits::AsPrimitive;
-
-// types to make function signatures easier to read
-type ThreadCount = usize;
-type OutputCount = usize;
-
-pub enum MinMaxFunctionWithX<Tx: Num + AsPrimitive<f64>, Ty: Num + AsPrimitive<f64>> {
-    Serial(fn(ArrayView1<Tx>, ArrayView1<Ty>, OutputCount) -> Array1<usize>),
-    Parallel(fn(ArrayView1<Tx>, ArrayView1<Ty>, OutputCount, ThreadCount) -> Array1<usize>),
-}
-
-pub enum MinMaxFunctionWithoutX<Ty: Num + AsPrimitive<f64>> {
-    Serial(fn(ArrayView1<Ty>, OutputCount) -> Array1<usize>),
-    Parallel(fn(ArrayView1<Ty>, OutputCount, ThreadCount) -> Array1<usize>),
-}
-
-#[inline(always)]
-pub(crate) fn minmaxlttb_generic<Tx: Num + AsPrimitive<f64>, Ty: Num + AsPrimitive<f64>>(
-    x: ArrayView1<Tx>,
-    y: ArrayView1<Ty>,
-    n_out: usize,
-    minmax_ratio: usize,
-    n_threads: Option<usize>,
-    f_minmax: MinMaxFunctionWithX<Tx, Ty>,
-) -> Array1<usize>
-where
-    for<'a> ArrayView1<'a, Ty>: Average,
-{
-    assert_eq!(x.len(), y.len());
-    assert!(minmax_ratio > 1);
-    // Apply first min max aggregation (if above ratio)
-    if x.len() / n_out > minmax_ratio {
-        // Get index of min max points
-        let mut index = match f_minmax {
-            MinMaxFunctionWithX::Serial(func) => {
-                func(x.slice(s![1..-1]), y.slice(s![1..-1]), n_out * minmax_ratio)
-            }
-            MinMaxFunctionWithX::Parallel(func) => func(
-                x.slice(s![1..-1]),
-                y.slice(s![1..-1]),
-                n_out * minmax_ratio,
-                n_threads.unwrap(), // n_threads cannot be None
-            ),
-        };
-        // inplace + 1
-        index.mapv_inplace(|i| i + 1);
-        let mut index: Vec<usize> = index.into_raw_vec();
-        // Prepend first and last point
-        index.insert(0, 0);
-        index.push(x.len() - 1);
-        let index = Array1::from_vec(index);
-        // Get x and y values at index
-        let x = unsafe { index.mapv(|i| *x.uget(i)) };
-        let y = unsafe { index.mapv(|i| *y.uget(i)) };
-        // Apply lttb on the reduced data
-        let index_points_selected = lttb_with_x(x.view(), y.view(), n_out);
-        // Return the original index
-        return index_points_selected.mapv(|i| index[i]);
-    }
-    // Apply lttb on all data when requirement is not met
-    lttb_with_x(x, y, n_out)
-}
-
-#[inline(always)]
-pub(crate) fn minmaxlttb_generic_without_x<Ty: Num + AsPrimitive<f64>>(
-    y: ArrayView1<Ty>,
-    n_out: usize,
-    minmax_ratio: usize,
-    n_threads: Option<usize>,
-    f_minmax: MinMaxFunctionWithoutX<Ty>,
-) -> Array1<usize>
-where
-    for<'a> ArrayView1<'a, Ty>: Average,
-{
-    assert!(minmax_ratio > 1);
-    // Apply first min max aggregation (if above ratio)
-    if y.len() / n_out > minmax_ratio {
-        // Get index of min max points
-        let mut index = match f_minmax {
-            MinMaxFunctionWithoutX::Serial(func) => func(y.slice(s![1..-1]), n_out * minmax_ratio),
-            MinMaxFunctionWithoutX::Parallel(func) => func(
-                y.slice(s![1..-1]),
-                n_out * minmax_ratio,
-                n_threads.unwrap(), // n_threads cannot be None
-            ),
-        };
-        // inplace + 1
-        index.mapv_inplace(|i| i + 1);
-        let mut index: Vec<usize> = index.into_raw_vec();
-        // Prepend first and last point
-        index.insert(0, 0);
-        index.push(y.len() - 1);
-        let index = Array1::from_vec(index);
-        // Get y values at index
-        let y = unsafe { index.mapv(|i| *y.uget(i)) };
-        // Apply lttb on the reduced data
-        let index_points_selected = lttb_without_x(y.view(), n_out);
-        // Return the original index
-        return index_points_selected.mapv(|i| index[i]);
-    }
-    // Apply lttb on all data when requirement is not met
-    lttb_without_x(y.view(), n_out)
-}
diff --git a/downsample_rs/src/minmaxlttb/mod.rs b/downsample_rs/src/minmaxlttb/mod.rs
deleted file mode 100644
index 3893d2b..0000000
--- a/downsample_rs/src/minmaxlttb/mod.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-pub mod scalar;
-pub use scalar::*;
-pub mod simd;
-pub use simd::*;
-mod generic; // Private module
diff --git a/downsample_rs/src/minmaxlttb/scalar.rs b/downsample_rs/src/minmaxlttb/scalar.rs
deleted file mode 100644
index b0539f4..0000000
--- a/downsample_rs/src/minmaxlttb/scalar.rs
+++ /dev/null
@@ -1,179 +0,0 @@
-use super::super::helpers::Average;
-use super::super::minmax;
-use super::super::types::Num;
-use super::generic::{
-    minmaxlttb_generic, minmaxlttb_generic_without_x, MinMaxFunctionWithX, MinMaxFunctionWithoutX,
-};
-use ndarray::{Array1, ArrayView1};
-use num_traits::{AsPrimitive, FromPrimitive};
-
-use argminmax::{ScalarArgMinMax, SCALAR};
-
-// ----------------------------------- NON-PARALLEL ------------------------------------
-
-// ----------- WITH X
-
-pub fn minmaxlttb_scalar_with_x<
-    Tx: Num + AsPrimitive<f64> + FromPrimitive,
-    Ty: Num + AsPrimitive<f64>,
->(
-    x: ArrayView1<Tx>,
-    y: ArrayView1<Ty>,
-    n_out: usize,
-    minmax_ratio: usize,
-) -> Array1<usize>
-where
-    SCALAR: ScalarArgMinMax<Ty>,
-    for<'a> ArrayView1<'a, Ty>: Average,
-{
-    minmaxlttb_generic(
-        x,
-        y,
-        n_out,
-        minmax_ratio,
-        None,
-        MinMaxFunctionWithX::Serial(minmax::min_max_scalar_with_x),
-    )
-}
-
-// ----------- WITHOUT X
-
-pub fn minmaxlttb_scalar_without_x<Ty: Num + AsPrimitive<f64>>(
-    y: ArrayView1<Ty>,
-    n_out: usize,
-    minmax_ratio: usize,
-) -> Array1<usize>
-where
-    SCALAR: ScalarArgMinMax<Ty>,
-    for<'a> ArrayView1<'a, Ty>: Average,
-{
-    minmaxlttb_generic_without_x(
-        y,
-        n_out,
-        minmax_ratio,
-        None,
-        MinMaxFunctionWithoutX::Serial(minmax::min_max_scalar_without_x),
-    )
-}
-
-// ------------------------------------- PARALLEL --------------------------------------
-
-// ----------- WITH X
-
-pub fn minmaxlttb_scalar_with_x_parallel<
-    Tx: Num + AsPrimitive<f64> + FromPrimitive + Send + Sync,
-    Ty: Num + AsPrimitive<f64> + Send + Sync,
->(
-    x: ArrayView1<Tx>,
-    y: ArrayView1<Ty>,
-    n_out: usize,
-    minmax_ratio: usize,
-    n_threads: usize,
-) -> Array1<usize>
-where
-    SCALAR: ScalarArgMinMax<Ty>,
-    for<'a> ArrayView1<'a, Ty>: Average,
-{
-    minmaxlttb_generic(
-        x,
-        y,
-        n_out,
-        minmax_ratio,
-        Some(n_threads),
-        MinMaxFunctionWithX::Parallel(minmax::min_max_scalar_with_x_parallel),
-    )
-}
-
-// ----------- WITHOUT X
-
-pub fn minmaxlttb_scalar_without_x_parallel<Ty: Num + AsPrimitive<f64> + Send + Sync>(
-    y: ArrayView1<Ty>,
-    n_out: usize,
-    minmax_ratio: usize,
-    n_threads: usize,
-) -> Array1<usize>
-where
-    SCALAR: ScalarArgMinMax<Ty>,
-    for<'a> ArrayView1<'a, Ty>: Average,
-{
-    minmaxlttb_generic_without_x(
-        y,
-        n_out,
-        minmax_ratio,
-        Some(n_threads),
-        MinMaxFunctionWithoutX::Parallel(minmax::min_max_scalar_without_x_parallel),
-    )
-}
-
-// --------------------------------------- TESTS ---------------------------------------
-
-#[cfg(test)]
-mod tests {
-    use rstest::rstest;
-    use rstest_reuse::{self, *};
-
-    use super::{minmaxlttb_scalar_with_x, minmaxlttb_scalar_without_x};
-    use super::{minmaxlttb_scalar_with_x_parallel, minmaxlttb_scalar_without_x_parallel};
-    use ndarray::{array, Array1};
-
-    use dev_utils::utils;
-
-    fn get_array_f32(n: usize) -> Array1<f32> {
-        utils::get_random_array(n, f32::MIN, f32::MAX)
-    }
-
-    // Template for the n_threads matrix
-    #[template]
-    #[rstest]
-    #[case(1)]
-    #[case(utils::get_all_threads() / 2)]
-    #[case(utils::get_all_threads())]
-    #[case(utils::get_all_threads() * 2)]
-    fn threads(#[case] n_threads: usize) {}
-
-    #[test]
-    fn test_minmaxlttb_with_x() {
-        let x = array![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
-        let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
-        let sampled_indices = minmaxlttb_scalar_with_x(x.view(), y.view(), 4, 2);
-        assert_eq!(sampled_indices, array![0, 1, 5, 9]);
-    }
-
-    #[test]
-    fn test_minmaxlttb_without_x() {
-        let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
-        let sampled_indices = minmaxlttb_scalar_without_x(y.view(), 4, 2);
-        assert_eq!(sampled_indices, array![0, 1, 5, 9]);
-    }
-
-    #[apply(threads)]
-    fn test_minmaxlttb_with_x_parallel(n_threads: usize) {
-        let x = array![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
-        let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
-        let sampled_indices =
-            minmaxlttb_scalar_with_x_parallel(x.view(), y.view(), 4, 2, n_threads);
-        assert_eq!(sampled_indices, array![0, 1, 5, 9]);
-    }
-
-    #[apply(threads)]
-    fn test_minmaxlttb_without_x_parallel(n_threads: usize) {
-        let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
-        let sampled_indices = minmaxlttb_scalar_without_x_parallel(y.view(), 4, 2, n_threads);
-        assert_eq!(sampled_indices, array![0, 1, 5, 9]);
-    }
-
-    #[apply(threads)]
-    fn test_many_random_runs_same_output(n_threads: usize) {
-        let n: usize = 20_000;
-        let n_out: usize = 100;
-        let minmax_ratio: usize = 5;
-        for _ in 0..100 {
-            // TODO: test with x
-            let arr = get_array_f32(n);
-            let idxs1 = minmaxlttb_scalar_without_x(arr.view(), n_out, minmax_ratio);
-            let idxs2 =
-                minmaxlttb_scalar_without_x_parallel(arr.view(), n_out, minmax_ratio, n_threads);
-            assert_eq!(idxs1, idxs2);
-        }
-    }
-}
diff --git a/downsample_rs/src/minmaxlttb/simd.rs b/downsample_rs/src/minmaxlttb/simd.rs
deleted file mode 100644
index cf06644..0000000
--- a/downsample_rs/src/minmaxlttb/simd.rs
+++ /dev/null
@@ -1,178 +0,0 @@
-use super::super::minmax;
-use super::generic::{
-    minmaxlttb_generic, minmaxlttb_generic_without_x, MinMaxFunctionWithX, MinMaxFunctionWithoutX,
-};
-
-use super::super::helpers::Average;
-use super::super::types::Num;
-use ndarray::{Array1, ArrayView1};
-use num_traits::{AsPrimitive, FromPrimitive};
-
-use argminmax::ArgMinMax;
-
-// ----------------------------------- NON-PARALLEL ------------------------------------
-
-// ----------- WITH X
-
-pub fn minmaxlttb_simd_with_x<
-    Tx: Num + FromPrimitive + AsPrimitive<f64>,
-    Ty: Num + AsPrimitive<f64>,
->(
-    x: ArrayView1<Tx>,
-    y: ArrayView1<Ty>,
-    n_out: usize,
-    minmax_ratio: usize,
-) -> Array1<usize>
-where
-    for<'a> ArrayView1<'a, Ty>: ArgMinMax,
-    for<'a> ArrayView1<'a, Ty>: Average,
-{
-    minmaxlttb_generic(
-        x,
-        y,
-        n_out,
-        minmax_ratio,
-        None,
-        MinMaxFunctionWithX::Serial(minmax::min_max_simd_with_x),
-    )
-}
-
-// ----------- WITHOUT X
-
-pub fn minmaxlttb_simd_without_x<Ty: Num + AsPrimitive<f64>>(
-    y: ArrayView1<Ty>,
-    n_out: usize,
-    minmax_ratio: usize,
-) -> Array1<usize>
-where
-    for<'a> ArrayView1<'a, Ty>: ArgMinMax,
-    for<'a> ArrayView1<'a, Ty>: Average,
-{
-    minmaxlttb_generic_without_x(
-        y,
-        n_out,
-        minmax_ratio,
-        None,
-        MinMaxFunctionWithoutX::Serial(minmax::min_max_simd_without_x),
-    )
-}
-
-// ------------------------------------- PARALLEL --------------------------------------
-
-// ----------- WITH X
-
-pub fn minmaxlttb_simd_with_x_parallel<Tx, Ty>(
-    x: ArrayView1<Tx>,
-    y: ArrayView1<Ty>,
-    n_out: usize,
-    minmax_ratio: usize,
-    n_threads: usize,
-) -> Array1<usize>
-where
-    for<'a> ArrayView1<'a, Ty>: ArgMinMax,
-    for<'a> ArrayView1<'a, Ty>: Average,
-    Tx: Num + FromPrimitive + AsPrimitive<f64> + Send + Sync,
-    Ty: Num + AsPrimitive<f64> + Send + Sync,
-{
-    minmaxlttb_generic(
-        x,
-        y,
-        n_out,
-        minmax_ratio,
-        Some(n_threads),
-        MinMaxFunctionWithX::Parallel(minmax::min_max_simd_with_x_parallel),
-    )
-}
-
-// ----------- WITHOUT X
-
-pub fn minmaxlttb_simd_without_x_parallel<Ty: Num + AsPrimitive<f64> + Send + Sync>(
-    y: ArrayView1<Ty>,
-    n_out: usize,
-    minmax_ratio: usize,
-    n_threads: usize,
-) -> Array1<usize>
-where
-    for<'a> ArrayView1<'a, Ty>: ArgMinMax,
-    for<'a> ArrayView1<'a, Ty>: Average,
-{
-    minmaxlttb_generic_without_x(
-        y,
-        n_out,
-        minmax_ratio,
-        Some(n_threads),
-        MinMaxFunctionWithoutX::Parallel(minmax::min_max_simd_without_x_parallel),
-    )
-}
-
-// --------------------------------------- TESTS ---------------------------------------
-
-#[cfg(test)]
-mod tests {
-    use rstest::rstest;
-    use rstest_reuse::{self, *};
-
-    use super::{minmaxlttb_simd_with_x, minmaxlttb_simd_without_x};
-    use super::{minmaxlttb_simd_with_x_parallel, minmaxlttb_simd_without_x_parallel};
-    use ndarray::{array, Array1};
-
-    use dev_utils::utils;
-
-    fn get_array_f32(n: usize) -> Array1<f32> {
-        utils::get_random_array(n, f32::MIN, f32::MAX)
-    }
-
-    // Template for the n_threads matrix
-    #[template]
-    #[rstest]
-    #[case(1)]
-    #[case(utils::get_all_threads() / 2)]
-    #[case(utils::get_all_threads())]
-    #[case(utils::get_all_threads() * 2)]
-    fn threads(#[case] n_threads: usize) {}
-
-    #[test]
-    fn test_minmaxlttb_with_x() {
-        let x = array![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
-        let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
-        let sampled_indices = minmaxlttb_simd_with_x(x.view(), y.view(), 4, 2);
-        assert_eq!(sampled_indices, array![0, 1, 5, 9]);
-    }
-
-    #[test]
-    fn test_minmaxlttb_without_x() {
-        let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
-        let sampled_indices = minmaxlttb_simd_without_x(y.view(), 4, 2);
-        assert_eq!(sampled_indices, array![0, 1, 5, 9]);
-    }
-
-    #[apply(threads)]
-    fn test_minmaxlttb_with_x_parallel(n_threads: usize) {
-        let x = array![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
-        let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
-        let sampled_indices = minmaxlttb_simd_with_x_parallel(x.view(), y.view(), 4, 2, n_threads);
-        assert_eq!(sampled_indices, array![0, 1, 5, 9]);
-    }
-
-    #[apply(threads)]
-    fn test_minmaxlttb_without_x_parallel(n_threads: usize) {
-        let y = array![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
-        let sampled_indices = minmaxlttb_simd_without_x_parallel(y.view(), 4, 2, n_threads);
-        assert_eq!(sampled_indices, array![0, 1, 5, 9]);
-    }
-
-    #[apply(threads)]
-    fn test_many_random_runs_same_output(n_threads: usize) {
-        let n: usize = 20_000;
-        let n_out: usize = 100;
-        let minmax_ratio: usize = 5;
-        for _ in 0..100 {
-            // TODO: test with x
-            let arr = get_array_f32(n);
-            let idxs1 = minmaxlttb_simd_without_x(arr.view(), n_out, minmax_ratio);
-            let idxs2 =
-                minmaxlttb_simd_without_x_parallel(arr.view(), n_out, minmax_ratio, n_threads);
-            assert_eq!(idxs1, idxs2);
-        }
-    }
-}
diff --git a/downsample_rs/src/searchsorted.rs b/downsample_rs/src/searchsorted.rs
index d60d261..af53a9c 100644
--- a/downsample_rs/src/searchsorted.rs
+++ b/downsample_rs/src/searchsorted.rs
@@ -1,5 +1,3 @@
-use ndarray::ArrayView1;
-
 use rayon::iter::IndexedParallelIterator;
 use rayon::prelude::*;
 
@@ -15,12 +13,7 @@ use num_traits::{AsPrimitive, FromPrimitive};
 /// https://docs.python.org/3/library/bisect.html#bisect.bisect
 ///
 // #[inline(always)]
-fn binary_search<T: Copy + PartialOrd>(
-    arr: ArrayView1<T>,
-    value: T,
-    left: usize,
-    right: usize,
-) -> usize {
+fn binary_search<T: Copy + PartialOrd>(arr: &[T], value: T, left: usize, right: usize) -> usize {
     let mut size: usize = right - left;
     let mut left: usize = left;
     let mut right: usize = right;
@@ -51,7 +44,7 @@ fn binary_search<T: Copy + PartialOrd>(
 ///
 // #[inline(always)]
 fn binary_search_with_mid<T: Copy + PartialOrd>(
-    arr: ArrayView1<T>,
+    arr: &[T],
     value: T,
     left: usize,
     right: usize,
@@ -83,7 +76,7 @@ fn binary_search_with_mid<T: Copy + PartialOrd>(
 // --- Sequential version
 
 pub(crate) fn get_equidistant_bin_idx_iterator<T>(
-    arr: ArrayView1<T>,
+    arr: &[T],
     nb_bins: usize,
 ) -> impl Iterator<Item = Option<(usize, usize)>> + '_
 where
@@ -133,7 +126,7 @@ fn sequential_add_mul(start_val: f64, add_val: f64, mul: usize) -> f64 {
 }
 
 pub(crate) fn get_equidistant_bin_idx_iterator_parallel<T>(
-    arr: ArrayView1<T>,
+    arr: &[T],
     nb_bins: usize,
     n_threads: usize,
 ) -> impl IndexedParallelIterator<Item = impl Iterator<Item = Option<(usize, usize)>> + '_> + '_
@@ -194,7 +187,6 @@ mod tests {
     use rstest_reuse::{self, *};
 
     use super::*;
-    use ndarray::Array1;
 
     use dev_utils::utils::{get_all_threads, get_random_array};
 
@@ -210,9 +202,9 @@ mod tests {
     #[test]
     fn test_search_sorted_identicial_to_np_linspace_searchsorted() {
         // Create a 0..9999 array
-        let arr = Array1::from((0..10_000).collect::<Vec<usize>>());
+        let arr: [u32; 10_000] = core::array::from_fn(|i| i.as_());
         assert!(arr.len() == 10_000);
-        let iterator = get_equidistant_bin_idx_iterator(arr.view(), 4);
+        let iterator = get_equidistant_bin_idx_iterator(&arr, 4);
         // Check the iterator
         let mut idx: usize = 0;
         for bin in iterator {
@@ -225,82 +217,49 @@ mod tests {
 
     #[test]
     fn test_binary_search() {
-        let arr = Array1::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-        assert_eq!(binary_search(arr.view(), 0, 0, arr.len() - 1), 0);
-        assert_eq!(binary_search(arr.view(), 1, 0, arr.len() - 1), 1);
-        assert_eq!(binary_search(arr.view(), 2, 0, arr.len() - 1), 2);
-        assert_eq!(binary_search(arr.view(), 3, 0, arr.len() - 1), 3);
-        assert_eq!(binary_search(arr.view(), 4, 0, arr.len() - 1), 4);
-        assert_eq!(binary_search(arr.view(), 5, 0, arr.len() - 1), 5);
-        assert_eq!(binary_search(arr.view(), 6, 0, arr.len() - 1), 6);
-        assert_eq!(binary_search(arr.view(), 7, 0, arr.len() - 1), 7);
-        assert_eq!(binary_search(arr.view(), 8, 0, arr.len() - 1), 8);
-        assert_eq!(binary_search(arr.view(), 9, 0, arr.len() - 1), 9);
-        assert_eq!(binary_search(arr.view(), 10, 0, arr.len() - 1), 10);
-        assert_eq!(binary_search(arr.view(), 11, 0, arr.len() - 1), 10);
+        let arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+        assert_eq!(binary_search(&arr, 0, 0, arr.len() - 1), 0);
+        assert_eq!(binary_search(&arr, 1, 0, arr.len() - 1), 1);
+        assert_eq!(binary_search(&arr, 2, 0, arr.len() - 1), 2);
+        assert_eq!(binary_search(&arr, 3, 0, arr.len() - 1), 3);
+        assert_eq!(binary_search(&arr, 4, 0, arr.len() - 1), 4);
+        assert_eq!(binary_search(&arr, 5, 0, arr.len() - 1), 5);
+        assert_eq!(binary_search(&arr, 6, 0, arr.len() - 1), 6);
+        assert_eq!(binary_search(&arr, 7, 0, arr.len() - 1), 7);
+        assert_eq!(binary_search(&arr, 8, 0, arr.len() - 1), 8);
+        assert_eq!(binary_search(&arr, 9, 0, arr.len() - 1), 9);
+        assert_eq!(binary_search(&arr, 10, 0, arr.len() - 1), 10);
+        assert_eq!(binary_search(&arr, 11, 0, arr.len() - 1), 10);
     }
 
     #[test]
     fn test_binary_search_with_mid() {
-        let arr = Array1::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-        assert_eq!(
-            binary_search_with_mid(arr.view(), 0, 0, arr.len() - 1, 0),
-            0
-        );
-        assert_eq!(
-            binary_search_with_mid(arr.view(), 1, 0, arr.len() - 1, 0),
-            1
-        );
-        assert_eq!(
-            binary_search_with_mid(arr.view(), 2, 0, arr.len() - 1, 1),
-            2
-        );
-        assert_eq!(
-            binary_search_with_mid(arr.view(), 3, 0, arr.len() - 1, 2),
-            3
-        );
-        assert_eq!(
-            binary_search_with_mid(arr.view(), 4, 0, arr.len() - 1, 3),
-            4
-        );
-        assert_eq!(
-            binary_search_with_mid(arr.view(), 5, 0, arr.len() - 1, 4),
-            5
-        );
-        assert_eq!(
-            binary_search_with_mid(arr.view(), 6, 0, arr.len() - 1, 5),
-            6
-        );
-        assert_eq!(
-            binary_search_with_mid(arr.view(), 7, 0, arr.len() - 1, 6),
-            7
-        );
-        assert_eq!(
-            binary_search_with_mid(arr.view(), 8, 0, arr.len() - 1, 7),
-            8
-        );
-        assert_eq!(
-            binary_search_with_mid(arr.view(), 9, 0, arr.len() - 1, 8),
-            9
-        );
-        assert_eq!(
-            binary_search_with_mid(arr.view(), 10, 0, arr.len() - 1, 9),
-            10
-        );
-        // This line causes the code to crash -> because value higher than arr[mid]
-        // assert_eq!(binary_search_with_mid(arr.view(), 11, 0, arr.len() - 1, 9), 10);
+        let arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+        assert_eq!(binary_search_with_mid(&arr, 0, 0, arr.len() - 1, 0), 0);
+        assert_eq!(binary_search_with_mid(&arr, 1, 0, arr.len() - 1, 0), 1);
+        assert_eq!(binary_search_with_mid(&arr, 2, 0, arr.len() - 1, 1), 2);
+        assert_eq!(binary_search_with_mid(&arr, 3, 0, arr.len() - 1, 2), 3);
+        assert_eq!(binary_search_with_mid(&arr, 4, 0, arr.len() - 1, 3), 4);
+        assert_eq!(binary_search_with_mid(&arr, 5, 0, arr.len() - 1, 4), 5);
+        assert_eq!(binary_search_with_mid(&arr, 6, 0, arr.len() - 1, 5), 6);
+        assert_eq!(binary_search_with_mid(&arr, 7, 0, arr.len() - 1, 6), 7);
+        assert_eq!(binary_search_with_mid(&arr, 8, 0, arr.len() - 1, 7), 8);
+        assert_eq!(binary_search_with_mid(&arr, 9, 0, arr.len() - 1, 8), 9);
+        assert_eq!(binary_search_with_mid(&arr, 10, 0, arr.len() - 1, 9), 10);
+        // this line causes the code to crash -> because value higher than arr[mid]
+        // assert_eq!(binary_search_with_mid(&arr, 11, 0, arr.len() - 1, 9), 10);
     }
 
     #[apply(threads)]
     fn test_get_equidistant_bin_idxs(n_threads: usize) {
         let expected_indices = vec![0, 4, 7];
 
-        let arr = Array1::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-        let bin_idxs_iter = get_equidistant_bin_idx_iterator(arr.view(), 3);
+        let arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+        let bin_idxs_iter = get_equidistant_bin_idx_iterator(&arr, 3);
         let bin_idxs = bin_idxs_iter.map(|x| x.unwrap().0).collect::<Vec<usize>>();
         assert_eq!(bin_idxs, expected_indices);
 
-        let bin_idxs_iter = get_equidistant_bin_idx_iterator_parallel(arr.view(), 3, n_threads);
+        let bin_idxs_iter = get_equidistant_bin_idx_iterator_parallel(&arr, 3, n_threads);
         let bin_idxs = bin_idxs_iter
             .map(|x| x.map(|x| x.unwrap().0).collect::<Vec<usize>>())
             .flatten()
@@ -314,19 +273,17 @@ mod tests {
         let nb_bins = 100;
 
         for _ in 0..100 {
-            let arr = get_random_array::<i32>(n, i32::MIN, i32::MAX);
+            let mut arr = get_random_array::<i32>(n, i32::MIN, i32::MAX);
             // Sort the array
-            let mut arr = arr.to_vec();
             arr.sort_by(|a, b| a.partial_cmp(b).unwrap());
-            let arr = Array1::from(arr);
 
             // Calculate the bin indexes
-            let bin_idxs_iter = get_equidistant_bin_idx_iterator(arr.view(), nb_bins);
+            let bin_idxs_iter = get_equidistant_bin_idx_iterator(&arr[..], nb_bins);
             let bin_idxs = bin_idxs_iter.map(|x| x.unwrap().0).collect::<Vec<usize>>();
 
             // Calculate the bin indexes in parallel
             let bin_idxs_iter =
-                get_equidistant_bin_idx_iterator_parallel(arr.view(), nb_bins, n_threads);
+                get_equidistant_bin_idx_iterator_parallel(&arr[..], nb_bins, n_threads);
             let bin_idxs_parallel = bin_idxs_iter
                 .map(|x| x.map(|x| x.unwrap().0).collect::<Vec<usize>>())
                 .flatten()
diff --git a/src/lib.rs b/src/lib.rs
index 4b25be8..5a93afc 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -23,7 +23,7 @@ macro_rules! _create_pyfunc_without_x {
             y: PyReadonlyArray1<$type>,
             n_out: usize,
         ) -> &'py PyArray1<usize> {
-            let y = y.as_array();
+            let y = y.as_slice().unwrap();
             let sampled_indices = $resample_mod::$resample_fn(y, n_out);
             sampled_indices.into_pyarray(py)
         }
@@ -42,7 +42,7 @@ macro_rules! _create_pyfunc_without_x_multithreaded {
             n_out: usize,
             n_threads: usize,
         ) -> &'py PyArray1<usize> {
-            let y = y.as_array();
+            let y = y.as_slice().unwrap();
             let sampled_indices = $resample_mod::$resample_fn(y, n_out, n_threads);
             sampled_indices.into_pyarray(py)
         }
@@ -61,7 +61,7 @@ macro_rules! _create_pyfunc_without_x_with_ratio {
             n_out: usize,
             ratio: usize,
         ) -> &'py PyArray1<usize> {
-            let y = y.as_array();
+            let y = y.as_slice().unwrap();
             let sampled_indices = $resample_mod::$resample_fn(y, n_out, ratio);
             sampled_indices.into_pyarray(py)
         }
@@ -81,7 +81,7 @@ macro_rules! _create_pyfunc_without_x_with_ratio_multithreaded {
             ratio: usize,
             n_threads: usize,
         ) -> &'py PyArray1<usize> {
-            let y = y.as_array();
+            let y = y.as_slice().unwrap();
             let sampled_indices = $resample_mod::$resample_fn(y, n_out, ratio, n_threads);
             sampled_indices.into_pyarray(py)
         }
@@ -112,8 +112,8 @@ macro_rules! _create_pyfunc_with_x {
             y: PyReadonlyArray1<$type_y>,
             n_out: usize,
         ) -> &'py PyArray1<usize> {
-            let x = x.as_array();
-            let y = y.as_array();
+            let x = x.as_slice().unwrap();
+            let y = y.as_slice().unwrap();
             let sampled_indices = $resample_mod::$resample_fn(x, y, n_out);
             sampled_indices.into_pyarray(py)
         }
@@ -133,8 +133,8 @@ macro_rules! _create_pyfunc_with_x_multithreaded {
             n_out: usize,
             n_threads: usize,
         ) -> &'py PyArray1<usize> {
-            let x = x.as_array();
-            let y = y.as_array();
+            let x = x.as_slice().unwrap();
+            let y = y.as_slice().unwrap();
             let sampled_indices = $resample_mod::$resample_fn(x, y, n_out, n_threads);
             sampled_indices.into_pyarray(py)
         }
@@ -154,8 +154,8 @@ macro_rules! _create_pyfunc_with_x_with_ratio {
             n_out: usize,
             ratio: usize,
         ) -> &'py PyArray1<usize> {
-            let x = x.as_array();
-            let y = y.as_array();
+            let x = x.as_slice().unwrap();
+            let y = y.as_slice().unwrap();
             let sampled_indices = $resample_mod::$resample_fn(x, y, n_out, ratio);
             sampled_indices.into_pyarray(py)
         }
@@ -176,8 +176,8 @@ macro_rules! _create_pyfunc_with_x_with_ratio_multithreaded {
             ratio: usize,
             n_threads: usize,
         ) -> &'py PyArray1<usize> {
-            let x = x.as_array();
-            let y = y.as_array();
+            let x = x.as_slice().unwrap();
+            let y = y.as_slice().unwrap();
             let sampled_indices = $resample_mod::$resample_fn(x, y, n_out, ratio, n_threads);
             sampled_indices.into_pyarray(py)
         }
@@ -312,71 +312,37 @@ use downsample_rs::minmax as minmax_mod;
 // Create a sub module for the minmax algorithm
 #[pymodule]
 fn minmax(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
-    // ----------------- SCALAR
+    // ----------------- SEQUENTIAL
 
-    let scalar_mod = PyModule::new(_py, "scalar")?;
+    let sequential_mod = PyModule::new(_py, "sequential")?;
 
     // ----- WITHOUT X
     {
-        create_pyfuncs_without_x!(minmax_mod, min_max_scalar_without_x, scalar_mod);
+        create_pyfuncs_without_x!(minmax_mod, min_max_without_x, sequential_mod);
     }
 
     // ----- WITH X
     {
-        create_pyfuncs_with_x!(minmax_mod, min_max_scalar_with_x, scalar_mod);
+        create_pyfuncs_with_x!(minmax_mod, min_max_with_x, sequential_mod);
     }
 
-    // ----------------- SCALAR PARALLEL
+    // ----------------- PARALLEL
 
-    let scalar_parallel_mod = PyModule::new(_py, "scalar_parallel")?;
+    let parallel_mod = PyModule::new(_py, "parallel")?;
 
     // ----- WITHOUT X
     {
-        create_pyfuncs_without_x!(@threaded minmax_mod, min_max_scalar_without_x_parallel, scalar_parallel_mod);
+        create_pyfuncs_without_x!(@threaded minmax_mod, min_max_without_x_parallel, parallel_mod);
     }
 
     // ----- WITH X
     {
-        create_pyfuncs_with_x!(@threaded minmax_mod, min_max_scalar_with_x_parallel, scalar_parallel_mod);
-    }
-
-    // ----------------- SIMD
-
-    let simd_mod = PyModule::new(_py, "simd")?;
-
-    // ----- WITHOUT X
-    {
-        create_pyfuncs_without_x!(minmax_mod, min_max_simd_without_x, simd_mod);
-    }
-
-    // ----- WITH X
-    {
-        create_pyfuncs_with_x!(minmax_mod, min_max_simd_with_x, simd_mod);
-    }
-
-    // ----------------- SIMD PARALLEL
-
-    let simd_parallel_mod = PyModule::new(_py, "simd_parallel")?;
-
-    // ----- WITHOUT X
-    {
-        create_pyfuncs_without_x!(@threaded
-            minmax_mod,
-            min_max_simd_without_x_parallel,
-            simd_parallel_mod
-        );
-    }
-
-    // ----- WITH X
-    {
-        create_pyfuncs_with_x!(@threaded minmax_mod, min_max_simd_with_x_parallel, simd_parallel_mod);
+        create_pyfuncs_with_x!(@threaded minmax_mod, min_max_with_x_parallel, parallel_mod);
     }
 
     // Add the sub modules to the module
-    m.add_submodule(scalar_mod)?;
-    m.add_submodule(scalar_parallel_mod)?;
-    m.add_submodule(simd_mod)?;
-    m.add_submodule(simd_parallel_mod)?;
+    m.add_submodule(sequential_mod)?;
+    m.add_submodule(parallel_mod)?;
 
     Ok(())
 }
@@ -388,67 +354,37 @@ use downsample_rs::m4 as m4_mod;
 // Create a sub module for the M4 algorithm
 #[pymodule]
 fn m4(_py: Python, m: &PyModule) -> PyResult<()> {
-    // ----------------- SCALAR
+    // ----------------- SEQUENTIAL
 
-    let scalar_mod = PyModule::new(_py, "scalar")?;
+    let sequential_mod = PyModule::new(_py, "sequential")?;
 
     // ----- WITHOUT X
     {
-        create_pyfuncs_without_x!(m4_mod, m4_scalar_without_x, scalar_mod);
+        create_pyfuncs_without_x!(m4_mod, m4_without_x, sequential_mod);
     }
 
     // ----- WITH X
     {
-        create_pyfuncs_with_x!(m4_mod, m4_scalar_with_x, scalar_mod);
+        create_pyfuncs_with_x!(m4_mod, m4_with_x, sequential_mod);
     }
 
-    // ----------------- SCALAR PARALLEL
+    // ----------------- PARALLEL
 
-    let scalar_parallel_mod = PyModule::new(_py, "scalar_parallel")?;
+    let parallel_mod = PyModule::new(_py, "parallel")?;
 
     // ----- WITHOUT X
     {
-        create_pyfuncs_without_x!(@threaded m4_mod, m4_scalar_without_x_parallel, scalar_parallel_mod);
+        create_pyfuncs_without_x!(@threaded m4_mod, m4_without_x_parallel, parallel_mod);
     }
 
     // ----- WITH X
     {
-        create_pyfuncs_with_x!(@threaded m4_mod, m4_scalar_with_x_parallel, scalar_parallel_mod);
-    }
-
-    // ----------------- SIMD
-
-    let simd_mod = PyModule::new(_py, "simd")?;
-
-    // ----- WITHOUT X
-    {
-        create_pyfuncs_without_x!(m4_mod, m4_simd_without_x, simd_mod);
-    }
-
-    // ----- WITH X
-    {
-        create_pyfuncs_with_x!(m4_mod, m4_simd_with_x, simd_mod);
-    }
-
-    // ----------------- SIMD PARALLEL
-
-    let simd_parallel_mod = PyModule::new(_py, "simd_parallel")?;
-
-    // ----- WITHOUT X
-    {
-        create_pyfuncs_without_x!(@threaded m4_mod, m4_simd_without_x_parallel, simd_parallel_mod);
-    }
-
-    // ----- WITH X
-    {
-        create_pyfuncs_with_x!(@threaded m4_mod, m4_simd_with_x_parallel, simd_parallel_mod);
+        create_pyfuncs_with_x!(@threaded m4_mod, m4_with_x_parallel, parallel_mod);
     }
 
     // Add the sub modules to the module
-    m.add_submodule(scalar_mod)?;
-    m.add_submodule(scalar_parallel_mod)?;
-    m.add_submodule(simd_mod)?;
-    m.add_submodule(simd_parallel_mod)?;
+    m.add_submodule(sequential_mod)?;
+    m.add_submodule(parallel_mod)?;
 
     Ok(())
 }
@@ -460,23 +396,23 @@ use downsample_rs::lttb as lttb_mod;
 // Create a sub module for the LTTB algorithm
 #[pymodule]
 fn lttb(_py: Python, m: &PyModule) -> PyResult<()> {
-    // ----------------- SCALAR
+    // ----------------- SEQUENTIAL
 
-    let scalar_mod = PyModule::new(_py, "scalar")?;
+    let sequential_mod = PyModule::new(_py, "sequential")?;
 
     // Create the Python functions for the module
     // ----- WITHOUT X
     {
-        create_pyfuncs_without_x!(lttb_mod, lttb_without_x, scalar_mod);
+        create_pyfuncs_without_x!(lttb_mod, lttb_without_x, sequential_mod);
     }
 
     // ----- WITH X
     {
-        create_pyfuncs_with_x!(lttb_mod, lttb_with_x, scalar_mod);
+        create_pyfuncs_with_x!(lttb_mod, lttb_with_x, sequential_mod);
     }
 
     // Add the sub modules to the module
-    m.add_submodule(scalar_mod)?;
+    m.add_submodule(sequential_mod)?;
 
     Ok(())
 }
@@ -488,70 +424,30 @@ use downsample_rs::minmaxlttb as minmaxlttb_mod;
 // Create a sub module for the MINMAXLTTB algorithm
 #[pymodule]
 fn minmaxlttb(_py: Python, m: &PyModule) -> PyResult<()> {
-    // ----------------- SCALAR
-
-    let scalar_mod = PyModule::new(_py, "scalar")?;
-
-    // ----- WITHOUT X
-    {
-        create_pyfuncs_without_x_with_ratio!(
-            minmaxlttb_mod,
-            minmaxlttb_scalar_without_x,
-            scalar_mod
-        );
-    }
-
-    // ----- WITH X
-    {
-        create_pyfuncs_with_x_with_ratio!(minmaxlttb_mod, minmaxlttb_scalar_with_x, scalar_mod);
-    }
-
-    // ----------------- SCALAR PARALLEL
-
-    let scalar_parallel_mod = PyModule::new(_py, "scalar_parallel")?;
-
-    // ----- WITHOUT X
-    {
-        create_pyfuncs_without_x_with_ratio!(@threaded
-            minmaxlttb_mod,
-            minmaxlttb_scalar_without_x_parallel,
-            scalar_parallel_mod
-        );
-    }
-
-    // ----- WITH X
-    {
-        create_pyfuncs_with_x_with_ratio!(@threaded
-            minmaxlttb_mod,
-            minmaxlttb_scalar_with_x_parallel,
-            scalar_parallel_mod
-        );
-    }
-
-    // ----------------- SIMD
+    // ----------------- SEQUENTIAL
 
-    let simd_mod = PyModule::new(_py, "simd")?;
+    let sequential_mod = PyModule::new(_py, "sequential")?;
 
     // ----- WITHOUT X
     {
-        create_pyfuncs_without_x_with_ratio!(minmaxlttb_mod, minmaxlttb_simd_without_x, simd_mod);
+        create_pyfuncs_without_x_with_ratio!(minmaxlttb_mod, minmaxlttb_without_x, sequential_mod);
     }
 
     // ----- WITH X
     {
-        create_pyfuncs_with_x_with_ratio!(minmaxlttb_mod, minmaxlttb_simd_with_x, simd_mod);
+        create_pyfuncs_with_x_with_ratio!(minmaxlttb_mod, minmaxlttb_with_x, sequential_mod);
     }
 
-    // ----------------- SIMD PARALLEL
+    // ----------------- PARALLEL
 
-    let simd_parallel_mod = PyModule::new(_py, "simd_parallel")?;
+    let parallel_mod = PyModule::new(_py, "parallel")?;
 
     // ----- WITHOUT X
     {
         create_pyfuncs_without_x_with_ratio!(@threaded
             minmaxlttb_mod,
-            minmaxlttb_simd_without_x_parallel,
-            simd_parallel_mod
+            minmaxlttb_without_x_parallel,
+            parallel_mod
         );
     }
 
@@ -559,16 +455,14 @@ fn minmaxlttb(_py: Python, m: &PyModule) -> PyResult<()> {
     {
         create_pyfuncs_with_x_with_ratio!(@threaded
             minmaxlttb_mod,
-            minmaxlttb_simd_with_x_parallel,
-            simd_parallel_mod
+            minmaxlttb_with_x_parallel,
+            parallel_mod
         );
     }
 
     // Add the submodules to the module
-    m.add_submodule(scalar_mod)?;
-    m.add_submodule(scalar_parallel_mod)?;
-    m.add_submodule(simd_mod)?;
-    m.add_submodule(simd_parallel_mod)?;
+    m.add_submodule(sequential_mod)?;
+    m.add_submodule(parallel_mod)?;
 
     Ok(())
 }
diff --git a/tests/test_rust_mods.py b/tests/test_rust_mods.py
index f592823..a6532d3 100644
--- a/tests/test_rust_mods.py
+++ b/tests/test_rust_mods.py
@@ -23,23 +23,23 @@ def _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl: bool):
 
 def test_minmax_rust_mod_correctly_build():
     mod = tsds_rs.minmax
-    sub_mods = ["simd", "simd_parallel", "scalar", "scalar_parallel"]
+    sub_mods = ["sequential", "parallel"]
     _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=False)
 
 
 def test_m4_rust_mod_correctly_build():
     mod = tsds_rs.m4
-    sub_mods = ["simd", "simd_parallel", "scalar", "scalar_parallel"]
+    sub_mods = ["sequential", "parallel"]
     _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=False)
 
 
 def test_lttb_rust_mod_correctly_build():
     mod = tsds_rs.lttb
-    sub_mods = ["scalar"]
+    sub_mods = ["sequential"]
     _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=True)
 
 
 def test_minmaxlttb_rust_mod_correctly_build():
     mod = tsds_rs.minmaxlttb
-    sub_mods = ["simd", "simd_parallel", "scalar", "scalar_parallel"]
+    sub_mods = ["sequential", "parallel"]
     _test_rust_mod_correctly_build(mod, sub_mods, has_x_impl=True)
diff --git a/tsdownsample/downsampling_interface.py b/tsdownsample/downsampling_interface.py
index c177ca6..de20d46 100644
--- a/tsdownsample/downsampling_interface.py
+++ b/tsdownsample/downsampling_interface.py
@@ -161,10 +161,7 @@ def mod_single_core(self) -> ModuleType:
             If SIMD compiled module is available, that one is returned. Otherwise, the
             scalar compiled module is returned.
         """
-        if hasattr(self.rust_mod, "simd"):
-            # use SIMD implementation if available
-            return self.rust_mod.simd
-        return self.rust_mod.scalar
+        return self.rust_mod.sequential
 
     @property
     def mod_multi_core(self) -> Union[ModuleType, None]:
@@ -177,12 +174,9 @@ def mod_multi_core(self) -> Union[ModuleType, None]:
             Otherwise, the scalar parallel compiled module is returned.
             If no parallel compiled module is available, None is returned.
         """
-        if hasattr(self.rust_mod, "simd_parallel"):
+        if hasattr(self.rust_mod, "parallel"):
             # use SIMD implementation if available
-            return self.rust_mod.simd_parallel
-        elif hasattr(self.rust_mod, "scalar_parallel"):
-            # use scalar implementation if available (when no SIMD available)
-            return self.rust_mod.scalar_parallel
+            return self.rust_mod.parallel
         return None  # no parallel compiled module available
 
     @staticmethod